From 5774b3cfdedb3624ef0d2c82cccbfd61bcb60fd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Fri, 11 Jul 2025 12:18:31 -0300 Subject: drm/v3d: Add parameter to retrieve the global number of GPU resets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The GL extension KHR_robustness uses the number of global and per-context GPU resets to learn about graphics resets that affect a GL context. This commit introduces a new V3D parameter to retrieve the global number of GPU resets that have happened since the driver was probed. To retrieve this information, user-space must use DRM_V3D_PARAM_GLOBAL_RESET_COUNTER. Reviewed-by: Iago Toral Quiroga Link: https://lore.kernel.org/r/20250711-v3d-reset-counter-v1-1-1ac73e9fca2d@igalia.com Signed-off-by: Maíra Canal --- include/uapi/drm/v3d_drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h index dbbc404d2b3d..0a7ce2f6be19 100644 --- a/include/uapi/drm/v3d_drm.h +++ b/include/uapi/drm/v3d_drm.h @@ -294,6 +294,7 @@ enum drm_v3d_param { DRM_V3D_PARAM_SUPPORTS_CPU_QUEUE, DRM_V3D_PARAM_MAX_PERF_COUNTERS, DRM_V3D_PARAM_SUPPORTS_SUPER_PAGES, + DRM_V3D_PARAM_GLOBAL_RESET_COUNTER, }; struct drm_v3d_get_param { -- cgit v1.2.3 From 769c153cfc3c6669c7b318f66c2b21ec3951fb4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Fri, 11 Jul 2025 12:18:32 -0300 Subject: drm/v3d: Add parameter to retrieve the number of GPU resets per-fd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The GL extension KHR_robustness uses the number of global and per-context GPU resets to learn about graphics resets that affect a GL context. This commit introduces a new V3D parameter to retrieve the number of GPU resets triggered by jobs submitted through a file descriptor. To retrieve this information, user-space must use DRM_V3D_PARAM_CONTEXT_RESET_COUNTER. Reviewed-by: Iago Toral Quiroga Link: https://lore.kernel.org/r/20250711-v3d-reset-counter-v1-2-1ac73e9fca2d@igalia.com Signed-off-by: Maíra Canal --- include/uapi/drm/v3d_drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h index 0a7ce2f6be19..d9b01f4c3a04 100644 --- a/include/uapi/drm/v3d_drm.h +++ b/include/uapi/drm/v3d_drm.h @@ -295,6 +295,7 @@ enum drm_v3d_param { DRM_V3D_PARAM_MAX_PERF_COUNTERS, DRM_V3D_PARAM_SUPPORTS_SUPER_PAGES, DRM_V3D_PARAM_GLOBAL_RESET_COUNTER, + DRM_V3D_PARAM_CONTEXT_RESET_COUNTER, }; struct drm_v3d_get_param { -- cgit v1.2.3 From b9a572f471993d3e8bf874fcb57f331d66650440 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Thu, 1 May 2025 11:29:51 +0000 Subject: drm: document DRM_MODE_PAGE_FLIP_EVENT interactions with atomic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's not obvious off-hand which CRTCs will get a page-flip event when using this flag in an atomic commit, because it's all implicitly implied based on the contents of the atomic commit. Document requirements for using this flag and how to request an event for a CRTC. Note, because prepare_signaling() runs right after drm_atomic_set_property() calls, page-flip events are not delivered for CRTCs pulled in later by DRM core (e.g. on modeset by drm_atomic_helper_check_modeset()) or the driver (e.g. other CRTCs sharing a DP-MST connector). v2: fix cut off sentence in commit message (Pekka) Signed-off-by: Simon Ser Reviewed-by: Simona Vetter Cc: Ville Syrjälä Cc: Pekka Paalanen Cc: David Turner Cc: Daniel Stone Link: https://lore.kernel.org/r/20250501112945.6448-1-contact@emersion.fr --- include/uapi/drm/drm_mode.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index c082810c08a8..a122bea25593 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -962,6 +962,14 @@ struct hdr_output_metadata { * Request that the kernel sends back a vblank event (see * struct drm_event_vblank) with the &DRM_EVENT_FLIP_COMPLETE type when the * page-flip is done. + * + * When used with atomic uAPI, one event will be delivered per CRTC included in + * the atomic commit. A CRTC is included in an atomic commit if one of its + * properties is set, or if a property is set on a connector or plane linked + * via the CRTC_ID property to the CRTC. At least one CRTC must be included, + * and all pulled in CRTCs must be either previously or newly powered on (in + * other words, a powered off CRTC which stays off cannot be included in the + * atomic commit). */ #define DRM_MODE_PAGE_FLIP_EVENT 0x01 /** -- cgit v1.2.3 From 53096728b8910c6916ecc6c46a5abc5c678b58d9 Mon Sep 17 00:00:00 2001 From: David Francis Date: Thu, 17 Jul 2025 10:35:55 -0400 Subject: drm: Add DRM prime interface to reassign GEM handle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CRIU restore of drm buffer objects requires the ability to create or import a buffer object with a specific gem handle. Add new drm ioctl DRM_IOCTL_GEM_CHANGE_HANDLE, which takes the gem handle of an object and moves that object to a specified new gem handle. This ioctl needs to call drm_prime_remove_buf_handle, but that function acquires the prime lock, which the ioctl needs to hold for other purposes. Make drm_prime_remove_buf_handle not acquire the prime lock, and change its other caller to reflect this. The rest of the kernel patches required to enable CRIU can be found at https://lore.kernel.org/dri-devel/20250617194536.538681-1-David.Francis@amd.com/ v2 - Move documentation to UAPI headers v3 - Always return 0 on success Signed-off-by: David Francis Acked-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Christian König Link: https://lore.kernel.org/r/20250717143556.857893-2-David.Francis@amd.com --- include/uapi/drm/drm.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index e63a71d3c607..7fa123e11c3f 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -625,6 +625,21 @@ struct drm_gem_open { __u64 size; }; +/** + * struct drm_gem_change_handle - Argument for &DRM_IOCTL_GEM_CHANGE_HANDLE ioctl. + * @handle: The handle of a gem object. + * @new_handle: An available gem handle. + * + * This ioctl changes the handle of a GEM object to the specified one. + * The new handle must be unused. On success the old handle is closed + * and all further IOCTL should refer to the new handle only. + * Calls to DRM_IOCTL_PRIME_FD_TO_HANDLE will return the new handle. + */ +struct drm_gem_change_handle { + __u32 handle; + __u32 new_handle; +}; + /** * DRM_CAP_DUMB_BUFFER * @@ -1309,6 +1324,14 @@ extern "C" { */ #define DRM_IOCTL_SET_CLIENT_NAME DRM_IOWR(0xD1, struct drm_set_client_name) +/** + * DRM_IOCTL_GEM_CHANGE_HANDLE - Move an object to a different handle + * + * Some applications (notably CRIU) need objects to have specific gem handles. + * This ioctl changes the object at one gem handle to use a new gem handle. + */ +#define DRM_IOCTL_GEM_CHANGE_HANDLE DRM_IOWR(0xD2, struct drm_gem_change_handle) + /* * Device specific ioctls should only be in their respective headers * The device specific ioctl range is from 0x40 to 0x9f. -- cgit v1.2.3 From 0864197382fa7c8c2641bff0f36355bf4bd76398 Mon Sep 17 00:00:00 2001 From: David Francis Date: Thu, 17 Jul 2025 10:35:56 -0400 Subject: drm: Move drm_gem ioctl kerneldoc to uapi file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The drm_gem ioctls were documented in internal file drm_gem.c instead of uapi header drm.h. Move them there and change to appropriate kerneldoc formatting. Signed-off-by: David Francis Reviewed-by: Simona Vetter Signed-off-by: Christian König Link: https://lore.kernel.org/r/20250717143556.857893-3-David.Francis@amd.com --- include/uapi/drm/drm.h | 40 ++++++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 7fa123e11c3f..3cd5cf15e3c9 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -597,31 +597,47 @@ struct drm_set_version { int drm_dd_minor; }; -/* DRM_IOCTL_GEM_CLOSE ioctl argument type */ +/** + * struct drm_gem_close - Argument for &DRM_IOCTL_GEM_CLOSE ioctl. + * @handle: Handle of the object to be closed. + * @pad: Padding. + * + * Releases the handle to an mm object. + */ struct drm_gem_close { - /** Handle of the object to be closed. */ __u32 handle; __u32 pad; }; -/* DRM_IOCTL_GEM_FLINK ioctl argument type */ +/** + * struct drm_gem_flink - Argument for &DRM_IOCTL_GEM_FLINK ioctl. + * @handle: Handle for the object being named. + * @name: Returned global name. + * + * Create a global name for an object, returning the name. + * + * Note that the name does not hold a reference; when the object + * is freed, the name goes away. + */ struct drm_gem_flink { - /** Handle for the object being named */ __u32 handle; - - /** Returned global name */ __u32 name; }; -/* DRM_IOCTL_GEM_OPEN ioctl argument type */ +/** + * struct drm_gem_open - Argument for &DRM_IOCTL_GEM_OPEN ioctl. + * @name: Name of object being opened. + * @handle: Returned handle for the object. + * @size: Returned size of the object + * + * Open an object using the global name, returning a handle and the size. + * + * This handle (of course) holds a reference to the object, so the object + * will not go away until the handle is deleted. + */ struct drm_gem_open { - /** Name of object being opened */ __u32 name; - - /** Returned handle for the object */ __u32 handle; - - /** Returned size of the object */ __u64 size; }; -- cgit v1.2.3 From 4d2d28776ae3ad7aa95328d28aff220b0ec6202d Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Tue, 8 Jul 2025 17:48:18 +0200 Subject: drm/bridge: add a cleanup action for scope-based drm_bridge_put() invocation Many functions get a drm_bridge pointer, only use it in the function body (or a smaller scope such as a loop body), and don't store it. In these cases they always need to drm_bridge_put() it before returning (or exiting the scope). Some of those functions have complex code paths with multiple return points or loop break/continue. This makes adding drm_bridge_put() in the right places tricky, ugly and error prone in case of future code changes. Others use the bridge pointer in the return statement and would need to split the return line to fit the drm_bridge_put, which is a bit annoying: -return some_thing(bridge); +ret = some_thing(bridge); +drm_bridge_put(bridge); +return ret; To make it easier for all of them to put the bridge reference correctly without complicating code, define a scope-based cleanup action to be used with __free(). Reviewed-by: Maxime Ripard Link: https://lore.kernel.org/r/20250708-drm-bridge-alloc-getput-drm_bridge_chain_get_first_bridge-v9-1-db1ba3df7f58@bootlin.com Signed-off-by: Luca Ceresoli --- include/drm/drm_bridge.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index 8ed80cad77ec..8290e665554e 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -23,6 +23,7 @@ #ifndef __DRM_BRIDGE_H__ #define __DRM_BRIDGE_H__ +#include #include #include #include @@ -1228,6 +1229,9 @@ drm_priv_to_bridge(struct drm_private_obj *priv) struct drm_bridge *drm_bridge_get(struct drm_bridge *bridge); void drm_bridge_put(struct drm_bridge *bridge); +/* Cleanup action for use with __free() */ +DEFINE_FREE(drm_bridge_put, struct drm_bridge *, if (_T) drm_bridge_put(_T)) + void *__devm_drm_bridge_alloc(struct device *dev, size_t size, size_t offset, const struct drm_bridge_funcs *funcs); -- cgit v1.2.3 From 8fa5909400f377351836419223c33f1131f0f7d3 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Tue, 8 Jul 2025 17:48:19 +0200 Subject: drm/bridge: get the bridge returned by drm_bridge_chain_get_first_bridge() drm_bridge_chain_get_first_bridge() returns a bridge pointer that the caller could hold for a long time. Increment the refcount of the returned bridge and document it must be put by the caller. Reviewed-by: Maxime Ripard Link: https://lore.kernel.org/r/20250708-drm-bridge-alloc-getput-drm_bridge_chain_get_first_bridge-v9-2-db1ba3df7f58@bootlin.com Signed-off-by: Luca Ceresoli --- include/drm/drm_bridge.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index 8290e665554e..717171d0e587 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -1337,6 +1337,9 @@ drm_bridge_get_prev_bridge(struct drm_bridge *bridge) * drm_bridge_chain_get_first_bridge() - Get the first bridge in the chain * @encoder: encoder object * + * The refcount of the returned bridge is incremented. Use drm_bridge_put() + * when done with it. + * * RETURNS: * the first bridge in the chain, or NULL if @encoder has no bridge attached * to it. @@ -1344,8 +1347,8 @@ drm_bridge_get_prev_bridge(struct drm_bridge *bridge) static inline struct drm_bridge * drm_bridge_chain_get_first_bridge(struct drm_encoder *encoder) { - return list_first_entry_or_null(&encoder->bridge_chain, - struct drm_bridge, chain_node); + return drm_bridge_get(list_first_entry_or_null(&encoder->bridge_chain, + struct drm_bridge, chain_node)); } /** -- cgit v1.2.3 From bd72d4acda1069579b35123e3cc0b21ec1193a21 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Wed, 16 Jul 2025 09:44:14 -0700 Subject: accel/amdxdna: Support user space allocated buffer Enhance DRM_IOCTL_AMDXDNA_CREATE_BO to accept user space allocated buffer pointer. The buffer pages will be pinned in memory. Unless the CAP_IPC_LOCK is enabled for the application process, the total pinned memory can not beyond rlimit_memlock. Reviewed-by: Jacek Lawrynowicz Signed-off-by: Lizhi Hou Link: https://lore.kernel.org/r/20250716164414.112091-1-lizhi.hou@amd.com --- include/uapi/drm/amdxdna_accel.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h index a706ead39082..ce523e9ccc52 100644 --- a/include/uapi/drm/amdxdna_accel.h +++ b/include/uapi/drm/amdxdna_accel.h @@ -153,6 +153,31 @@ enum amdxdna_bo_type { AMDXDNA_BO_CMD, }; +/** + * struct amdxdna_drm_va_entry + * @vaddr: Virtual address. + * @len: Size of entry. + */ +struct amdxdna_drm_va_entry { + __u64 vaddr; + __u64 len; +}; + +/** + * struct amdxdna_drm_va_tbl + * @dmabuf_fd: The fd of dmabuf. + * @num_entries: Number of va entries. + * @va_entries: Array of va entries. + * + * The input can be either a dmabuf fd or a virtual address entry table. + * When dmabuf_fd is used, num_entries must be zero. + */ +struct amdxdna_drm_va_tbl { + __s32 dmabuf_fd; + __u32 num_entries; + struct amdxdna_drm_va_entry va_entries[]; +}; + /** * struct amdxdna_drm_create_bo - Create a buffer object. * @flags: Buffer flags. MBZ. -- cgit v1.2.3 From 9b75346e3c2b8ecb5b90b132c2fc185ddd30ecf3 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 9 Jul 2025 17:59:37 +0200 Subject: drm/bridge: get the bridge returned by drm_bridge_get_prev_bridge() drm_bridge_get_prev_bridge() returns a bridge pointer that the caller could hold for a long time. Increment the refcount of the returned bridge and document it must be put by the caller. Reviewed-by: Maxime Ripard Link: https://lore.kernel.org/r/20250709-drm-bridge-alloc-getput-drm_bridge_get_prev_bridge-v1-1-34ba6f395aaa@bootlin.com Signed-off-by: Luca Ceresoli --- include/drm/drm_bridge.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index 717171d0e587..620e119cc24c 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -1321,6 +1321,13 @@ drm_bridge_get_next_bridge(struct drm_bridge *bridge) * drm_bridge_get_prev_bridge() - Get the previous bridge in the chain * @bridge: bridge object * + * The caller is responsible of having a reference to @bridge via + * drm_bridge_get() or equivalent. This function leaves the refcount of + * @bridge unmodified. + * + * The refcount of the returned bridge is incremented. Use drm_bridge_put() + * when done with it. + * * RETURNS: * the previous bridge in the chain, or NULL if @bridge is the first. */ @@ -1330,7 +1337,7 @@ drm_bridge_get_prev_bridge(struct drm_bridge *bridge) if (list_is_first(&bridge->chain_node, &bridge->encoder->bridge_chain)) return NULL; - return list_prev_entry(bridge, chain_node); + return drm_bridge_get(list_prev_entry(bridge, chain_node)); } /** -- cgit v1.2.3 From 658ebeac33517bd3169d4b65ed801e9065d0211a Mon Sep 17 00:00:00 2001 From: Tomeu Vizoso Date: Mon, 21 Jul 2025 11:17:30 +0200 Subject: accel/rocket: Add IOCTL for BO creation This uses the SHMEM DRM helpers and we map right away to the CPU and NPU sides, as all buffers are expected to be accessed from both. v2: - Sync the IOMMUs for the other cores when mapping and unmapping. v3: - Make use of GPL-2.0-only for the copyright notice (Jeff Hugo) v6: - Use mutexes guard (Markus Elfring) v7: - Assign its own IOMMU domain to each client, for isolation (Daniel Stone and Robin Murphy) v8: - Correctly acquire a reference to the IOMMU (Robin Murphy) - Allocate DMA address ourselves with drm_mm (Robin Murphy) - Use refcount_read (Heiko Stuebner) - Remove superfluous dma_sync_sgtable_for_device (Robin Murphy) Reviewed-by: Jeffrey Hugo Tested-by: Heiko Stuebner Signed-off-by: Tomeu Vizoso Signed-off-by: Jeff Hugo Link: https://lore.kernel.org/r/20250721-6-10-rocket-v9-3-77ebd484941e@tomeuvizoso.net --- include/uapi/drm/rocket_accel.h | 44 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 include/uapi/drm/rocket_accel.h (limited to 'include') diff --git a/include/uapi/drm/rocket_accel.h b/include/uapi/drm/rocket_accel.h new file mode 100644 index 000000000000..95720702b7c4 --- /dev/null +++ b/include/uapi/drm/rocket_accel.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Tomeu Vizoso + */ +#ifndef __DRM_UAPI_ROCKET_ACCEL_H__ +#define __DRM_UAPI_ROCKET_ACCEL_H__ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +#define DRM_ROCKET_CREATE_BO 0x00 + +#define DRM_IOCTL_ROCKET_CREATE_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_ROCKET_CREATE_BO, struct drm_rocket_create_bo) + +/** + * struct drm_rocket_create_bo - ioctl argument for creating Rocket BOs. + * + */ +struct drm_rocket_create_bo { + /** Input: Size of the requested BO. */ + __u32 size; + + /** Output: GEM handle for the BO. */ + __u32 handle; + + /** + * Output: DMA address for the BO in the NPU address space. This address + * is private to the DRM fd and is valid for the lifetime of the GEM + * handle. + */ + __u64 dma_address; + + /** Output: Offset into the drm node to use for subsequent mmap call. */ + __u64 offset; +}; + +#if defined(__cplusplus) +} +#endif + +#endif /* __DRM_UAPI_ROCKET_ACCEL_H__ */ -- cgit v1.2.3 From 0810d5ad88a18f1e6d549853a388ad0316f74e36 Mon Sep 17 00:00:00 2001 From: Tomeu Vizoso Date: Mon, 21 Jul 2025 11:17:31 +0200 Subject: accel/rocket: Add job submission IOCTL Using the DRM GPU scheduler infrastructure, with a scheduler for each core. Userspace can decide for a series of tasks to be executed sequentially in the same core, so SRAM locality can be taken advantage of. The job submission code was initially based on Panfrost. v2: - Remove hardcoded number of cores - Misc. style fixes (Jeffrey Hugo) - Repack IOCTL struct (Jeffrey Hugo) v3: - Adapt to a split of the register block in the DT bindings (Nicolas Frattaroli) - Make use of GPL-2.0-only for the copyright notice (Jeff Hugo) - Use drm_* logging functions (Thomas Zimmermann) - Rename reg i/o macros (Thomas Zimmermann) - Add padding to ioctls and check for zero (Jeff Hugo) - Improve error handling (Nicolas Frattaroli) v6: - Use mutexes guard (Markus Elfring) - Use u64_to_user_ptr (Jeff Hugo) - Drop rocket_fence (Rob Herring) v7: - Assign its own IOMMU domain to each client, for isolation (Daniel Stone and Robin Murphy) v8: - Use reset lines to reset the cores (Robin Murphy) - Use the macros to compute the values for the bitfields (Robin Murphy) - More descriptive name for the IRQ (Robin Murphy) - Simplify job interrupt handing (Robin Murphy) - Correctly acquire a reference to the IOMMU (Robin Murphy) - Specify the size of the embedded structs in the IOCTLs for future extensibility (Rob Herring) - Expose only 32 bits for the address of the regcmd BO (Robin Murphy) Tested-by: Heiko Stuebner Reviewed-by: Jeff Hugo Signed-off-by: Tomeu Vizoso Signed-off-by: Jeff Hugo Link: https://lore.kernel.org/r/20250721-6-10-rocket-v9-4-77ebd484941e@tomeuvizoso.net --- include/uapi/drm/rocket_accel.h | 64 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/rocket_accel.h b/include/uapi/drm/rocket_accel.h index 95720702b7c4..374f8370ac9d 100644 --- a/include/uapi/drm/rocket_accel.h +++ b/include/uapi/drm/rocket_accel.h @@ -12,8 +12,10 @@ extern "C" { #endif #define DRM_ROCKET_CREATE_BO 0x00 +#define DRM_ROCKET_SUBMIT 0x01 #define DRM_IOCTL_ROCKET_CREATE_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_ROCKET_CREATE_BO, struct drm_rocket_create_bo) +#define DRM_IOCTL_ROCKET_SUBMIT DRM_IOW(DRM_COMMAND_BASE + DRM_ROCKET_SUBMIT, struct drm_rocket_submit) /** * struct drm_rocket_create_bo - ioctl argument for creating Rocket BOs. @@ -37,6 +39,68 @@ struct drm_rocket_create_bo { __u64 offset; }; +/** + * struct drm_rocket_task - A task to be run on the NPU + * + * A task is the smallest unit of work that can be run on the NPU. + */ +struct drm_rocket_task { + /** Input: DMA address to NPU mapping of register command buffer */ + __u32 regcmd; + + /** Input: Number of commands in the register command buffer */ + __u32 regcmd_count; +}; + +/** + * struct drm_rocket_job - A job to be run on the NPU + * + * The kernel will schedule the execution of this job taking into account its + * dependencies with other jobs. All tasks in the same job will be executed + * sequentially on the same core, to benefit from memory residency in SRAM. + */ +struct drm_rocket_job { + /** Input: Pointer to an array of struct drm_rocket_task. */ + __u64 tasks; + + /** Input: Pointer to a u32 array of the BOs that are read by the job. */ + __u64 in_bo_handles; + + /** Input: Pointer to a u32 array of the BOs that are written to by the job. */ + __u64 out_bo_handles; + + /** Input: Number of tasks passed in. */ + __u32 task_count; + + /** Input: Size in bytes of the structs in the @tasks field. */ + __u32 task_struct_size; + + /** Input: Number of input BO handles passed in (size is that times 4). */ + __u32 in_bo_handle_count; + + /** Input: Number of output BO handles passed in (size is that times 4). */ + __u32 out_bo_handle_count; +}; + +/** + * struct drm_rocket_submit - ioctl argument for submitting commands to the NPU. + * + * The kernel will schedule the execution of these jobs in dependency order. + */ +struct drm_rocket_submit { + /** Input: Pointer to an array of struct drm_rocket_job. */ + __u64 jobs; + + /** Input: Number of jobs passed in. */ + __u32 job_count; + + /** Input: Size in bytes of the structs in the @jobs field. */ + __u32 job_struct_size; + + /** Reserved, must be zero. */ + __u64 reserved; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From 525ad89dd90434d529b76a87b1c653a69fedc416 Mon Sep 17 00:00:00 2001 From: Tomeu Vizoso Date: Mon, 21 Jul 2025 11:17:32 +0200 Subject: accel/rocket: Add IOCTLs for synchronizing memory accesses The NPU cores have their own access to the memory bus, and this isn't cache coherent with the CPUs. Add IOCTLs so userspace can mark when the caches need to be flushed, and also when a writer job needs to be waited for before the buffer can be accessed from the CPU. Initially based on the same IOCTLs from the Etnaviv driver. v2: - Don't break UABI by reordering the IOCTL IDs (Jeff Hugo) v3: - Check that padding fields in IOCTLs are zero (Jeff Hugo) v6: - Fix conversion logic to make sure we use DMA_BIDIRECTIONAL when needed (Lucas Stach) v8: - Always sync BOs in both directions (Robin Murphy) Reviewed-by: Jeff Hugo Tested-by: Heiko Stuebner Signed-off-by: Tomeu Vizoso Signed-off-by: Jeff Hugo Link: https://lore.kernel.org/r/20250721-6-10-rocket-v9-5-77ebd484941e@tomeuvizoso.net --- include/uapi/drm/rocket_accel.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/rocket_accel.h b/include/uapi/drm/rocket_accel.h index 374f8370ac9d..14b2e12b7c49 100644 --- a/include/uapi/drm/rocket_accel.h +++ b/include/uapi/drm/rocket_accel.h @@ -13,9 +13,13 @@ extern "C" { #define DRM_ROCKET_CREATE_BO 0x00 #define DRM_ROCKET_SUBMIT 0x01 +#define DRM_ROCKET_PREP_BO 0x02 +#define DRM_ROCKET_FINI_BO 0x03 #define DRM_IOCTL_ROCKET_CREATE_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_ROCKET_CREATE_BO, struct drm_rocket_create_bo) #define DRM_IOCTL_ROCKET_SUBMIT DRM_IOW(DRM_COMMAND_BASE + DRM_ROCKET_SUBMIT, struct drm_rocket_submit) +#define DRM_IOCTL_ROCKET_PREP_BO DRM_IOW(DRM_COMMAND_BASE + DRM_ROCKET_PREP_BO, struct drm_rocket_prep_bo) +#define DRM_IOCTL_ROCKET_FINI_BO DRM_IOW(DRM_COMMAND_BASE + DRM_ROCKET_FINI_BO, struct drm_rocket_fini_bo) /** * struct drm_rocket_create_bo - ioctl argument for creating Rocket BOs. @@ -39,6 +43,36 @@ struct drm_rocket_create_bo { __u64 offset; }; +/** + * struct drm_rocket_prep_bo - ioctl argument for starting CPU ownership of the BO. + * + * Takes care of waiting for any NPU jobs that might still use the NPU and performs cache + * synchronization. + */ +struct drm_rocket_prep_bo { + /** Input: GEM handle of the buffer object. */ + __u32 handle; + + /** Reserved, must be zero. */ + __u32 reserved; + + /** Input: Amount of time to wait for NPU jobs. */ + __s64 timeout_ns; +}; + +/** + * struct drm_rocket_fini_bo - ioctl argument for finishing CPU ownership of the BO. + * + * Synchronize caches for NPU access. + */ +struct drm_rocket_fini_bo { + /** Input: GEM handle of the buffer object. */ + __u32 handle; + + /** Reserved, must be zero. */ + __u32 reserved; +}; + /** * struct drm_rocket_task - A task to be run on the NPU * -- cgit v1.2.3 From 4df0bd5eb497c59e14924452026d6d70505706b5 Mon Sep 17 00:00:00 2001 From: Priyanka Dandamudi Date: Mon, 28 Jul 2025 10:03:36 +0530 Subject: drm/xe/uapi: Add documentation for DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING Add documentation for drm_xe_gem_create structure flag DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING. v2: Modified to be in a more generalised way. Signed-off-by: Priyanka Dandamudi Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20250728043336.3319521-1-priyanka.dandamudi@intel.com Signed-off-by: Tejas Upadhyay --- include/uapi/drm/xe_drm.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index e2426413488f..c721e130c1d2 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -760,7 +760,11 @@ struct drm_xe_device_query { * gem creation * * The @flags can be: - * - %DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING + * - %DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING - Modify the GEM object + * allocation strategy by deferring physical memory allocation + * until the object is either bound to a virtual memory region via + * VM_BIND or accessed by the CPU. As a result, no backing memory is + * reserved at the time of GEM object creation. * - %DRM_XE_GEM_CREATE_FLAG_SCANOUT * - %DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM - When using VRAM as a * possible placement, ensure that the corresponding VRAM allocation -- cgit v1.2.3 From d94a2a00d2b8878678607c2969fee3b4e59126cb Mon Sep 17 00:00:00 2001 From: Brigham Campbell Date: Mon, 21 Jul 2025 19:53:08 -0600 Subject: drm: Create mipi_dsi_dual* macros Create mipi_dsi_dual, mipi_dsi_dual_dcs_write_seq_multi, and mipi_dsi_dual_generic_write_seq_multi macros for panels which are driven by two parallel serial interfaces. This allows for the reduction of code duplication in drivers for these panels. Remove mipi_dsi_dual_dcs_write_seq_multi definition from panel-novatek-nt36523.c to avoid the duplicate definition. Make novatek driver pass mipi_dsi_context struct as a pointer. Reviewed-by: Dmitry Baryshkov Signed-off-by: Brigham Campbell Reviewed-by: Douglas Anderson Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20250722015313.561966-2-me@brighamcampbell.com --- include/drm/drm_mipi_dsi.h | 95 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) (limited to 'include') diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index 369b0d8830c3..f1dc822f69d6 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -289,6 +289,10 @@ int mipi_dsi_generic_write_chatty(struct mipi_dsi_device *dsi, const void *payload, size_t size); void mipi_dsi_generic_write_multi(struct mipi_dsi_multi_context *ctx, const void *payload, size_t size); +void mipi_dsi_dual_generic_write_multi(struct mipi_dsi_multi_context *ctx, + struct mipi_dsi_device *dsi1, + struct mipi_dsi_device *dsi2, + const void *payload, size_t size); ssize_t mipi_dsi_generic_read(struct mipi_dsi_device *dsi, const void *params, size_t num_params, void *data, size_t size); u32 drm_mipi_dsi_get_input_bus_fmt(enum mipi_dsi_pixel_format dsi_format); @@ -329,6 +333,10 @@ int mipi_dsi_dcs_write_buffer_chatty(struct mipi_dsi_device *dsi, const void *data, size_t len); void mipi_dsi_dcs_write_buffer_multi(struct mipi_dsi_multi_context *ctx, const void *data, size_t len); +void mipi_dsi_dual_dcs_write_buffer_multi(struct mipi_dsi_multi_context *ctx, + struct mipi_dsi_device *dsi1, + struct mipi_dsi_device *dsi2, + const void *data, size_t len); ssize_t mipi_dsi_dcs_write(struct mipi_dsi_device *dsi, u8 cmd, const void *data, size_t len); ssize_t mipi_dsi_dcs_read(struct mipi_dsi_device *dsi, u8 cmd, void *data, @@ -431,6 +439,93 @@ void mipi_dsi_dcs_set_tear_off_multi(struct mipi_dsi_multi_context *ctx); mipi_dsi_dcs_write_buffer_multi(ctx, d, ARRAY_SIZE(d)); \ } while (0) +/** + * mipi_dsi_dual - send the same MIPI DSI command to two interfaces + * + * This macro will send the specified MIPI DSI command twice, once per each of + * the two interfaces supplied. This is useful for reducing duplication of code + * in panel drivers which use two parallel serial interfaces. + * + * Note that the _func parameter cannot accept a macro such as + * mipi_dsi_generic_write_multi() or mipi_dsi_dcs_write_buffer_multi(). See + * mipi_dsi_dual_generic_write_multi() and + * mipi_dsi_dual_dcs_write_buffer_multi() instead. + * + * WARNING: This macro reuses the _func argument and the optional trailing + * arguments twice each, which may cause unintended side effects. For example, + * adding the postfix increment ++ operator to one of the arguments to be + * passed to _func will cause the variable to be incremented twice instead of + * once and the variable will be its original value + 1 when sent to _dsi2. + * + * @_func: MIPI DSI function to pass context and arguments into + * @_ctx: Context for multiple DSI transactions + * @_dsi1: First DSI interface to act as recipient of the MIPI DSI command + * @_dsi2: Second DSI interface to act as recipient of the MIPI DSI command + * @...: Arguments to pass to MIPI DSI function or macro + */ + +#define mipi_dsi_dual(_func, _ctx, _dsi1, _dsi2, ...) \ + do { \ + struct mipi_dsi_multi_context *_ctxcpy = (_ctx); \ + _ctxcpy->dsi = (_dsi1); \ + (_func)(_ctxcpy, ##__VA_ARGS__); \ + _ctxcpy->dsi = (_dsi2); \ + (_func)(_ctxcpy, ##__VA_ARGS__); \ + } while (0) + +/** + * mipi_dsi_dual_generic_write_seq_multi - transmit data using a generic write + * packet to two dsi interfaces, one after the other + * + * This macro will send the specified generic packet twice, once per each of + * the two interfaces supplied. This is useful for reducing duplication of code + * in panel drivers which use two parallel serial interfaces. + * + * Note that if an error occurs while transmitting the packet to the first DSI + * interface, the packet will not be sent to the second DSI interface. + * + * This macro will print errors for you and error handling is optimized for + * callers that call this multiple times in a row. + * + * @_ctx: Context for multiple DSI transactions + * @_dsi1: First DSI interface to act as recipient of packet + * @_dsi2: Second DSI interface to act as recipient of packet + * @_seq: buffer containing the payload + */ +#define mipi_dsi_dual_generic_write_seq_multi(_ctx, _dsi1, _dsi2, _seq...) \ + do { \ + static const u8 d[] = { _seq }; \ + mipi_dsi_dual_generic_write_multi(_ctx, _dsi1, _dsi2, d, \ + ARRAY_SIZE(d)); \ + } while (0) + +/** + * mipi_dsi_dual_dcs_write_seq_multi - transmit a DCS command with payload to + * two dsi interfaces, one after the other + * + * This macro will send the specified DCS command with payload twice, once per + * each of the two interfaces supplied. This is useful for reducing duplication + * of code in panel drivers which use two parallel serial interfaces. + * + * Note that if an error occurs while transmitting the payload to the first DSI + * interface, the payload will not be sent to the second DSI interface. + * + * This macro will print errors for you and error handling is optimized for + * callers that call this multiple times in a row. + * + * @_ctx: Context for multiple DSI transactions + * @_dsi1: First DSI interface to act as recipient of packet + * @_dsi2: Second DSI interface to act as recipient of packet + * @_cmd: Command + * @_seq: buffer containing the payload + */ +#define mipi_dsi_dual_dcs_write_seq_multi(_ctx, _dsi1, _dsi2, _cmd, _seq...) \ + do { \ + static const u8 d[] = { _cmd, _seq }; \ + mipi_dsi_dual_dcs_write_buffer_multi(_ctx, _dsi1, _dsi2, d, \ + ARRAY_SIZE(d)); \ + } while (0) + /** * struct mipi_dsi_driver - DSI driver * @driver: device driver model driver -- cgit v1.2.3 From 79b6bb18f849818140dd351f6e76a097efe99e9f Mon Sep 17 00:00:00 2001 From: Brigham Campbell Date: Mon, 21 Jul 2025 19:53:10 -0600 Subject: drm: Remove unused MIPI write seq and chatty functions Remove the deprecated mipi_dsi_generic_write_seq() and mipi_dsi_generic_write_chatty() functions now that they are no longer used. Reviewed-by: Douglas Anderson Signed-off-by: Brigham Campbell Reviewed-by: Dmitry Baryshkov Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20250722015313.561966-4-me@brighamcampbell.com --- include/drm/drm_mipi_dsi.h | 23 ----------------------- 1 file changed, 23 deletions(-) (limited to 'include') diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index f1dc822f69d6..ea523eb35b08 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -285,8 +285,6 @@ void mipi_dsi_picture_parameter_set_multi(struct mipi_dsi_multi_context *ctx, ssize_t mipi_dsi_generic_write(struct mipi_dsi_device *dsi, const void *payload, size_t size); -int mipi_dsi_generic_write_chatty(struct mipi_dsi_device *dsi, - const void *payload, size_t size); void mipi_dsi_generic_write_multi(struct mipi_dsi_multi_context *ctx, const void *payload, size_t size); void mipi_dsi_dual_generic_write_multi(struct mipi_dsi_multi_context *ctx, @@ -387,27 +385,6 @@ void mipi_dsi_dcs_set_tear_scanline_multi(struct mipi_dsi_multi_context *ctx, u16 scanline); void mipi_dsi_dcs_set_tear_off_multi(struct mipi_dsi_multi_context *ctx); -/** - * mipi_dsi_generic_write_seq - transmit data using a generic write packet - * - * This macro will print errors for you and will RETURN FROM THE CALLING - * FUNCTION (yes this is non-intuitive) upon error. - * - * Because of the non-intuitive return behavior, THIS MACRO IS DEPRECATED. - * Please replace calls of it with mipi_dsi_generic_write_seq_multi(). - * - * @dsi: DSI peripheral device - * @seq: buffer containing the payload - */ -#define mipi_dsi_generic_write_seq(dsi, seq...) \ - do { \ - static const u8 d[] = { seq }; \ - int ret; \ - ret = mipi_dsi_generic_write_chatty(dsi, d, ARRAY_SIZE(d)); \ - if (ret < 0) \ - return ret; \ - } while (0) - /** * mipi_dsi_generic_write_seq_multi - transmit data using a generic write packet * -- cgit v1.2.3 From 86de56487e5f0017ffd5930b0dbd9dda43048849 Mon Sep 17 00:00:00 2001 From: Amery Hung Date: Wed, 30 Jul 2025 11:58:52 -0700 Subject: bpf: Allow syscall bpf programs to call non-recur helpers Allow syscall programs to call non-recur helpers too since syscall bpf programs runs in process context through bpf syscall, BPF_PROG_TEST_RUN, and cannot run recursively. bpf_task_storage_{get,set} have "_recur" versions that call trylock instead of taking the lock directly to avoid deadlock when called by bpf programs that run recursively. Currently, only bpf_lsm, bpf_iter, struct_ops without private stack are allow to call the non-recur helpers since they cannot be recursively called in another bpf program. Signed-off-by: Amery Hung Reviewed-by: Emil Tsalapatis Link: https://lore.kernel.org/r/20250730185903.3574598-2-ameryhung@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 94defa405c85..c823f8efe3ed 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -962,6 +962,7 @@ static inline bool bpf_prog_check_recur(const struct bpf_prog *prog) case BPF_PROG_TYPE_STRUCT_OPS: return prog->aux->jits_use_priv_stack; case BPF_PROG_TYPE_LSM: + case BPF_PROG_TYPE_SYSCALL: return false; default: return true; -- cgit v1.2.3 From 81aa3c7c62049b42959bd4054c3a3ed34b2d5bf4 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Tue, 5 Aug 2025 15:59:02 +0200 Subject: drm/pagemap: Rename drm_pagemap_device_addr to drm_pagemap_addr Rename this struct to the more generic name drm_pagemap_addr so it can be used in a broader context, such as DMA mappings of CPU memory. Reviewed-by: Matthew Brost Acked-by: Maarten Lankhorst Link: https://lore.kernel.org/r/20250805140028.599361-2-francois.dugast@intel.com Signed-off-by: Francois Dugast --- include/drm/drm_gpusvm.h | 4 ++-- include/drm/drm_pagemap.h | 32 ++++++++++++++++---------------- 2 files changed, 18 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/drm/drm_gpusvm.h b/include/drm/drm_gpusvm.h index 4aedc5423aff..8d613e9b2690 100644 --- a/include/drm/drm_gpusvm.h +++ b/include/drm/drm_gpusvm.h @@ -17,7 +17,7 @@ struct drm_gpusvm_notifier; struct drm_gpusvm_ops; struct drm_gpusvm_range; struct drm_pagemap; -struct drm_pagemap_device_addr; +struct drm_pagemap_addr; /** * struct drm_gpusvm_ops - Operations structure for GPU SVM @@ -154,7 +154,7 @@ struct drm_gpusvm_range { struct interval_tree_node itree; struct list_head entry; unsigned long notifier_seq; - struct drm_pagemap_device_addr *dma_addr; + struct drm_pagemap_addr *dma_addr; struct drm_pagemap *dpagemap; struct drm_gpusvm_range_flags flags; }; diff --git a/include/drm/drm_pagemap.h b/include/drm/drm_pagemap.h index e5f20a1235be..69d6ee49a3de 100644 --- a/include/drm/drm_pagemap.h +++ b/include/drm/drm_pagemap.h @@ -23,7 +23,7 @@ enum drm_interconnect_protocol { }; /** - * struct drm_pagemap_device_addr - Device address representation. + * struct drm_pagemap_addr - Address representation. * @addr: The dma address or driver-defined address for driver private interconnects. * @proto: The interconnect protocol. * @order: The page order of the device mapping. (Size is PAGE_SIZE << order). @@ -32,7 +32,7 @@ enum drm_interconnect_protocol { * Note: There is room for improvement here. We should be able to pack into * 64 bits. */ -struct drm_pagemap_device_addr { +struct drm_pagemap_addr { dma_addr_t addr; u64 proto : 54; u64 order : 8; @@ -40,21 +40,21 @@ struct drm_pagemap_device_addr { }; /** - * drm_pagemap_device_addr_encode() - Encode a dma address with metadata + * drm_pagemap_addr_encode() - Encode a dma address with metadata * @addr: The dma address or driver-defined address for driver private interconnects. * @proto: The interconnect protocol. * @order: The page order of the dma mapping. (Size is PAGE_SIZE << order). * @dir: The DMA direction. * - * Return: A struct drm_pagemap_device_addr encoding the above information. + * Return: A struct drm_pagemap_addr encoding the above information. */ -static inline struct drm_pagemap_device_addr -drm_pagemap_device_addr_encode(dma_addr_t addr, - enum drm_interconnect_protocol proto, - unsigned int order, - enum dma_data_direction dir) +static inline struct drm_pagemap_addr +drm_pagemap_addr_encode(dma_addr_t addr, + enum drm_interconnect_protocol proto, + unsigned int order, + enum dma_data_direction dir) { - return (struct drm_pagemap_device_addr) { + return (struct drm_pagemap_addr) { .addr = addr, .proto = proto, .order = order, @@ -75,11 +75,11 @@ struct drm_pagemap_ops { * @order: The page order of the device mapping. (Size is PAGE_SIZE << order). * @dir: The transfer direction. */ - struct drm_pagemap_device_addr (*device_map)(struct drm_pagemap *dpagemap, - struct device *dev, - struct page *page, - unsigned int order, - enum dma_data_direction dir); + struct drm_pagemap_addr (*device_map)(struct drm_pagemap *dpagemap, + struct device *dev, + struct page *page, + unsigned int order, + enum dma_data_direction dir); /** * @device_unmap: Unmap a device address previously obtained using @device_map. @@ -90,7 +90,7 @@ struct drm_pagemap_ops { */ void (*device_unmap)(struct drm_pagemap *dpagemap, struct device *dev, - struct drm_pagemap_device_addr addr); + struct drm_pagemap_addr addr); /** * @populate_mm: Populate part of the mm with @dpagemap memory, -- cgit v1.2.3 From f35a6cdf8a6d69f2fb35ece202a09f13fe7c87b2 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Tue, 5 Aug 2025 15:59:03 +0200 Subject: drm/pagemap: Use struct drm_pagemap_addr in mapping and copy functions This struct embeds more information than just the DMA address. This will help later to support folio orders greater than zero. At this point, there is no functional change as the only struct member used is addr. In Xe, adapt to the new drm_gpusvm_devmem_ops type signatures using struct drm_pagemap_addr, as well as the internal xe SVM functions implementing those operations. The use of this struct is propagated to xe_migrate as it makes indexed accesses to the next DMA address but they are no longer contiguous. v2: - Rename drm_pagemap_device_addr to drm_pagemap_addr (Matthew Brost) - Squash with patch for Xe (Matthew Brost) - Set proto and dir for completeness (Matthew Brost) - Assess DMA map protocol (Matthew Brost) Cc: Matthew Brost Reviewed-by: Matthew Brost Acked-by: Maarten Lankhorst Link: https://lore.kernel.org/r/20250805140028.599361-3-francois.dugast@intel.com Signed-off-by: Francois Dugast --- include/drm/drm_pagemap.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/drm/drm_pagemap.h b/include/drm/drm_pagemap.h index 69d6ee49a3de..1d5919a99139 100644 --- a/include/drm/drm_pagemap.h +++ b/include/drm/drm_pagemap.h @@ -170,7 +170,7 @@ struct drm_pagemap_devmem_ops { /** * @copy_to_devmem: Copy to device memory (required for migration) * @pages: Pointer to array of device memory pages (destination) - * @dma_addr: Pointer to array of DMA addresses (source) + * @pagemap_addr: Pointer to array of DMA information (source) * @npages: Number of pages to copy * * Copy pages to device memory. @@ -178,13 +178,13 @@ struct drm_pagemap_devmem_ops { * Return: 0 on success, a negative error code on failure. */ int (*copy_to_devmem)(struct page **pages, - dma_addr_t *dma_addr, + struct drm_pagemap_addr *pagemap_addr, unsigned long npages); /** * @copy_to_ram: Copy to system RAM (required for migration) * @pages: Pointer to array of device memory pages (source) - * @dma_addr: Pointer to array of DMA addresses (destination) + * @pagemap_addr: Pointer to array of DMA information (destination) * @npages: Number of pages to copy * * Copy pages to system RAM. @@ -192,7 +192,7 @@ struct drm_pagemap_devmem_ops { * Return: 0 on success, a negative error code on failure. */ int (*copy_to_ram)(struct page **pages, - dma_addr_t *dma_addr, + struct drm_pagemap_addr *pagemap_addr, unsigned long npages); }; -- cgit v1.2.3 From d755ff6063852cbd43c666726b69333d33d0d379 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Tue, 5 Aug 2025 15:59:04 +0200 Subject: drm/pagemap: DMA map folios when possible If the page is part of a folio, DMA map the whole folio at once instead of mapping individual pages one after the other. For example if 2MB folios are used instead of 4KB pages, this reduces the number of DMA mappings by 512. The folio order (and consequently, the size) is persisted in the struct drm_pagemap_device_addr to be available at the time of unmapping. v2: - Initialize order variable (Matthew Brost) - Set proto and dir for completeness (Matthew Brost) - Do not populate drm_pagemap_addr, document it (Matthew Brost) - Add and use macro NR_PAGES(order) (Matthew Brost) Cc: Matthew Brost Reviewed-by: Matthew Brost Acked-by: Maarten Lankhorst Link: https://lore.kernel.org/r/20250805140028.599361-4-francois.dugast@intel.com Signed-off-by: Francois Dugast --- include/drm/drm_pagemap.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/drm/drm_pagemap.h b/include/drm/drm_pagemap.h index 1d5919a99139..f6e7e234c089 100644 --- a/include/drm/drm_pagemap.h +++ b/include/drm/drm_pagemap.h @@ -6,6 +6,8 @@ #include #include +#define NR_PAGES(order) (1U << (order)) + struct drm_pagemap; struct drm_pagemap_zdd; struct device; @@ -173,7 +175,9 @@ struct drm_pagemap_devmem_ops { * @pagemap_addr: Pointer to array of DMA information (source) * @npages: Number of pages to copy * - * Copy pages to device memory. + * Copy pages to device memory. If the order of a @pagemap_addr entry + * is greater than 0, the entry is populated but subsequent entries + * within the range of that order are not populated. * * Return: 0 on success, a negative error code on failure. */ @@ -187,7 +191,9 @@ struct drm_pagemap_devmem_ops { * @pagemap_addr: Pointer to array of DMA information (destination) * @npages: Number of pages to copy * - * Copy pages to system RAM. + * Copy pages to system RAM. If the order of a @pagemap_addr entry + * is greater than 0, the entry is populated but subsequent entries + * within the range of that order are not populated. * * Return: 0 on success, a negative error code on failure. */ -- cgit v1.2.3 From d87a513d093726d121dd5c816e26803111a259d0 Mon Sep 17 00:00:00 2001 From: Amery Hung Date: Wed, 6 Aug 2025 09:25:38 -0700 Subject: bpf: Allow struct_ops to get map id by kdata Add bpf_struct_ops_id() to enable struct_ops implementors to use struct_ops map id as the unique id of a struct_ops in their subsystem. A subsystem that wishes to create a mapping between id and struct_ops instance pointer can update the mapping accordingly during bpf_struct_ops::reg(), unreg(), and update(). Signed-off-by: Amery Hung Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20250806162540.681679-2-ameryhung@gmail.com --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index cc700925b802..e7ee089e8a31 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1985,6 +1985,7 @@ static inline void bpf_module_put(const void *data, struct module *owner) module_put(owner); } int bpf_struct_ops_link_create(union bpf_attr *attr); +u32 bpf_struct_ops_id(const void *kdata); #ifdef CONFIG_NET /* Define it here to avoid the use of forward declaration */ -- cgit v1.2.3 From 8bcfcb3bd3e38b8f3bb7e5eb3acb4120500994a0 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Tue, 29 Jul 2025 15:06:32 +0200 Subject: ASoC: Intel: avs: Parse conditional path tuples MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Conditional paths need information about their source and sink paths to be created which is then stored to keep track of who their parents are. That information allows to change their state accordingly to what is currently happening to their parent paths. Signed-off-by: Amadeusz Sławiński Signed-off-by: Cezary Rojewski Link: https://patch.msgid.link/20250729130633.310388-2-cezary.rojewski@intel.com Signed-off-by: Mark Brown --- include/uapi/sound/intel/avs/tokens.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/uapi/sound/intel/avs/tokens.h b/include/uapi/sound/intel/avs/tokens.h index c9f845b3c523..f3ff6aae09a9 100644 --- a/include/uapi/sound/intel/avs/tokens.h +++ b/include/uapi/sound/intel/avs/tokens.h @@ -133,6 +133,21 @@ enum avs_tplg_token { AVS_TKN_PATH_FE_FMT_ID_U32 = 1902, AVS_TKN_PATH_BE_FMT_ID_U32 = 1903, + /* struct avs_tplg_path_template (conditional) */ + AVS_TKN_CONDPATH_TMPL_ID_U32 = 1801, + AVS_TKN_CONDPATH_TMPL_SOURCE_TPLG_NAME_STRING = 2002, + AVS_TKN_CONDPATH_TMPL_SOURCE_PATH_TMPL_ID_U32 = 2003, + AVS_TKN_CONDPATH_TMPL_SINK_TPLG_NAME_STRING = 2004, + AVS_TKN_CONDPATH_TMPL_SINK_PATH_TMPL_ID_U32 = 2005, + AVS_TKN_CONDPATH_TMPL_COND_TYPE_U32 = 2006, + AVS_TKN_CONDPATH_TMPL_OVERRIDABLE_BOOL = 2007, + AVS_TKN_CONDPATH_TMPL_PRIORITY_U8 = 2008, + + /* struct avs_tplg_path (conditional) */ + AVS_TKN_CONDPATH_ID_U32 = 1901, + AVS_TKN_CONDPATH_SOURCE_PATH_ID_U32 = 2102, + AVS_TKN_CONDPATH_SINK_PATH_ID_U32 = 2103, + /* struct avs_tplg_pin_format */ AVS_TKN_PIN_FMT_INDEX_U32 = 2201, AVS_TKN_PIN_FMT_IOBS_U32 = 2202, -- cgit v1.2.3 From 40229ea9fa437d6a1feb86be9dd419e843ec754c Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 6 Aug 2025 04:41:41 +0000 Subject: ASoC: soc-dapm: move snd_soc_dapm_get_bias_level() to soc-dpcm Because struct snd_soc_dapm_context is soc-dapm framework specific, user driver don't need to access its member directly, we would like to hide them. struct snd_soc_dapm_context will be removed from header in the future. Because dapm will not be used on user driver in the future, Let's move snd_soc_dapm_get_bias_level() to soc-dpcm.c Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/874iul83ju.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index 0b5c7e6a90c8..f1318cdcf7e4 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -722,6 +722,7 @@ struct snd_soc_dapm_context *snd_soc_dapm_kcontrol_dapm(struct snd_kcontrol *kco struct snd_soc_dapm_widget *snd_soc_dapm_kcontrol_widget(struct snd_kcontrol *kcontrol); int snd_soc_dapm_force_bias_level(struct snd_soc_dapm_context *dapm, enum snd_soc_bias_level level); +enum snd_soc_bias_level snd_soc_dapm_get_bias_level(struct snd_soc_dapm_context *dapm); #define for_each_dapm_widgets(list, i, widget) \ for ((i) = 0; \ @@ -747,18 +748,6 @@ static inline void snd_soc_dapm_init_bias_level( dapm->bias_level = level; } -/** - * snd_soc_dapm_get_bias_level() - Get current DAPM bias level - * @dapm: The context for which to get the bias level - * - * Returns: The current bias level of the passed DAPM context. - */ -static inline enum snd_soc_bias_level snd_soc_dapm_get_bias_level( - struct snd_soc_dapm_context *dapm) -{ - return dapm->bias_level; -} - /** * snd_soc_dapm_widget_for_each_path - Iterates over all paths in the * specified direction of a widget -- cgit v1.2.3 From e93703687cd75a0c7d330e15a3d9ff20b12f3d1d Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 6 Aug 2025 04:41:46 +0000 Subject: ASoC: soc-dapm: move snd_soc_dapm_init_bias_level() to soc-dpcm Because struct snd_soc_dapm_context is soc-dapm framework specific, user driver don't need to access its member directly, we would like to hide them. struct snd_soc_dapm_context will be removed from header in the future. Because dapm will not be used on user driver in the future, Let's move snd_soc_dapm_init_bias_level() to soc-dpcm.c Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/8734a583jp.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index f1318cdcf7e4..53bf6590bd4b 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -723,31 +723,13 @@ struct snd_soc_dapm_widget *snd_soc_dapm_kcontrol_widget(struct snd_kcontrol *kc int snd_soc_dapm_force_bias_level(struct snd_soc_dapm_context *dapm, enum snd_soc_bias_level level); enum snd_soc_bias_level snd_soc_dapm_get_bias_level(struct snd_soc_dapm_context *dapm); +void snd_soc_dapm_init_bias_level(struct snd_soc_dapm_context *dapm, enum snd_soc_bias_level level); #define for_each_dapm_widgets(list, i, widget) \ for ((i) = 0; \ (i) < list->num_widgets && (widget = list->widgets[i]); \ (i)++) -/** - * snd_soc_dapm_init_bias_level() - Initialize DAPM bias level - * @dapm: The DAPM context to initialize - * @level: The DAPM level to initialize to - * - * This function only sets the driver internal state of the DAPM level and will - * not modify the state of the device. Hence it should not be used during normal - * operation, but only to synchronize the internal state to the device state. - * E.g. during driver probe to set the DAPM level to the one corresponding with - * the power-on reset state of the device. - * - * To change the DAPM state of the device use snd_soc_dapm_set_bias_level(). - */ -static inline void snd_soc_dapm_init_bias_level( - struct snd_soc_dapm_context *dapm, enum snd_soc_bias_level level) -{ - dapm->bias_level = level; -} - /** * snd_soc_dapm_widget_for_each_path - Iterates over all paths in the * specified direction of a widget -- cgit v1.2.3 From 7b900b5878a910d60ecfa67448bbe81e4e2bb8b7 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 6 Aug 2025 04:41:51 +0000 Subject: ASoC: soc-component: unpack snd_soc_component_force_bias_level() Because struct snd_soc_dapm_context is soc-dapm framework specific, user driver don't need to access its member directly, we would like to hide them. struct snd_soc_dapm_context will be removed from header in the future. This patch unpack component wrapper to cleanup it. The function will be kept by using macro for a while, but will be replaced/cleanuped in the future. Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/871ppp83jk.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc-component.h | 17 ----------------- include/sound/soc-dapm.h | 3 +++ 2 files changed, 3 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h index 2caa807c6249..c616f489c237 100644 --- a/include/sound/soc-component.h +++ b/include/sound/soc-component.h @@ -314,23 +314,6 @@ snd_soc_component_get_bias_level(struct snd_soc_component *component) snd_soc_component_get_dapm(component)); } -/** - * snd_soc_component_force_bias_level() - Set the COMPONENT DAPM bias level - * @component: The COMPONENT for which to set the level - * @level: The level to set to - * - * Forces the COMPONENT bias level to a specific state. See - * snd_soc_dapm_force_bias_level(). - */ -static inline int -snd_soc_component_force_bias_level(struct snd_soc_component *component, - enum snd_soc_bias_level level) -{ - return snd_soc_dapm_force_bias_level( - snd_soc_component_get_dapm(component), - level); -} - /** * snd_soc_dapm_kcontrol_component() - Returns the component associated to a * kcontrol diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index 53bf6590bd4b..5bd9a27b12ba 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -725,6 +725,9 @@ int snd_soc_dapm_force_bias_level(struct snd_soc_dapm_context *dapm, enum snd_so enum snd_soc_bias_level snd_soc_dapm_get_bias_level(struct snd_soc_dapm_context *dapm); void snd_soc_dapm_init_bias_level(struct snd_soc_dapm_context *dapm, enum snd_soc_bias_level level); +// REMOVE ME !! +#define snd_soc_component_force_bias_level(c, l) snd_soc_dapm_force_bias_level(&(c)->dapm, l) + #define for_each_dapm_widgets(list, i, widget) \ for ((i) = 0; \ (i) < list->num_widgets && (widget = list->widgets[i]); \ -- cgit v1.2.3 From 7509e7e4288976e3028a6e8482d979ca77f584a7 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 6 Aug 2025 04:41:55 +0000 Subject: ASoC: soc-component: unpack snd_soc_component_get_bias_level() Because struct snd_soc_dapm_context is soc-dapm framework specific, user driver don't need to access its member directly, we would like to hide them. struct snd_soc_dapm_context will be removed from header in the future. This patch unpack component wrapper to cleanup it. The function will be kept by using macro for a while, but will be replaced/cleanuped in the future. Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/87zfcd6oz0.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc-component.h | 13 ------------- include/sound/soc-dapm.h | 1 + 2 files changed, 1 insertion(+), 13 deletions(-) (limited to 'include') diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h index c616f489c237..86ad2802879f 100644 --- a/include/sound/soc-component.h +++ b/include/sound/soc-component.h @@ -301,19 +301,6 @@ snd_soc_component_init_bias_level(struct snd_soc_component *component, snd_soc_component_get_dapm(component), level); } -/** - * snd_soc_component_get_bias_level() - Get current COMPONENT DAPM bias level - * @component: The COMPONENT for which to get the DAPM bias level - * - * Returns: The current DAPM bias level of the COMPONENT. - */ -static inline enum snd_soc_bias_level -snd_soc_component_get_bias_level(struct snd_soc_component *component) -{ - return snd_soc_dapm_get_bias_level( - snd_soc_component_get_dapm(component)); -} - /** * snd_soc_dapm_kcontrol_component() - Returns the component associated to a * kcontrol diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index 5bd9a27b12ba..6e2db79d6a97 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -727,6 +727,7 @@ void snd_soc_dapm_init_bias_level(struct snd_soc_dapm_context *dapm, enum snd_so // REMOVE ME !! #define snd_soc_component_force_bias_level(c, l) snd_soc_dapm_force_bias_level(&(c)->dapm, l) +#define snd_soc_component_get_bias_level(c) snd_soc_dapm_get_bias_level(&(c)->dapm) #define for_each_dapm_widgets(list, i, widget) \ for ((i) = 0; \ -- cgit v1.2.3 From cf25eb8eae91bcae9b2065d84b0c0ba0f6d9dd34 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 6 Aug 2025 04:42:05 +0000 Subject: ASoC: soc-component: unpack snd_soc_component_init_bias_level() Because struct snd_soc_dapm_context is soc-dapm framework specific, user driver don't need to access its member directly, we would like to hide them. struct snd_soc_dapm_context will be removed from header in the future. This patch unpack component wrapper to cleanup it. This patch keeps compatible by using define, but old name will be replaced on each drivers and removed from ASoC in the future. Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/87y0rx6oyx.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc-component.h | 15 --------------- include/sound/soc-dapm.h | 1 + 2 files changed, 1 insertion(+), 15 deletions(-) (limited to 'include') diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h index 86ad2802879f..54bfa0cb1085 100644 --- a/include/sound/soc-component.h +++ b/include/sound/soc-component.h @@ -286,21 +286,6 @@ static inline struct snd_soc_dapm_context *snd_soc_component_get_dapm( return &component->dapm; } -/** - * snd_soc_component_init_bias_level() - Initialize COMPONENT DAPM bias level - * @component: The COMPONENT for which to initialize the DAPM bias level - * @level: The DAPM level to initialize to - * - * Initializes the COMPONENT DAPM bias level. See snd_soc_dapm_init_bias_level() - */ -static inline void -snd_soc_component_init_bias_level(struct snd_soc_component *component, - enum snd_soc_bias_level level) -{ - snd_soc_dapm_init_bias_level( - snd_soc_component_get_dapm(component), level); -} - /** * snd_soc_dapm_kcontrol_component() - Returns the component associated to a * kcontrol diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index 6e2db79d6a97..2e9196b6ffba 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -728,6 +728,7 @@ void snd_soc_dapm_init_bias_level(struct snd_soc_dapm_context *dapm, enum snd_so // REMOVE ME !! #define snd_soc_component_force_bias_level(c, l) snd_soc_dapm_force_bias_level(&(c)->dapm, l) #define snd_soc_component_get_bias_level(c) snd_soc_dapm_get_bias_level(&(c)->dapm) +#define snd_soc_component_init_bias_level(c, l) snd_soc_dapm_init_bias_level(&(c)->dapm, l) #define for_each_dapm_widgets(list, i, widget) \ for ((i) = 0; \ -- cgit v1.2.3 From 32dffd4c3e3129e3d9bb378af8d80bb57dc3038b Mon Sep 17 00:00:00 2001 From: Chancel Liu Date: Fri, 8 Aug 2025 15:17:41 +0900 Subject: ASoC: dmaengine_pcm: Add port_window_size to DAI dma data struct The port_window_size is a struct member of dma slave channel runtime config. It's the length of the register area in words the data need to be accessed on the device side. It is only used for devices which is using an area instead of a single register to send or receive the data. Typically the DMA loops in this area in order to transfer the data. It's useful for cases that reading/writing multiple registers in DMA transactions. Signed-off-by: Chancel Liu Link: https://patch.msgid.link/20250808061741.187414-1-chancel.liu@nxp.com Signed-off-by: Mark Brown --- include/sound/dmaengine_pcm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/sound/dmaengine_pcm.h b/include/sound/dmaengine_pcm.h index 1ef13bcdc43f..9472f0a966a2 100644 --- a/include/sound/dmaengine_pcm.h +++ b/include/sound/dmaengine_pcm.h @@ -69,6 +69,10 @@ struct dma_chan *snd_dmaengine_pcm_get_chan(struct snd_pcm_substream *substream) * @peripheral_config: peripheral configuration for programming peripheral * for dmaengine transfer * @peripheral_size: peripheral configuration buffer size + * @port_window_size: The length of the register area in words the data need + * to be accessed on the device side. It is only used for devices which is using + * an area instead of a single register to send/receive the data. Typically the + * DMA loops in this area in order to transfer the data. */ struct snd_dmaengine_dai_dma_data { dma_addr_t addr; @@ -80,6 +84,7 @@ struct snd_dmaengine_dai_dma_data { unsigned int flags; void *peripheral_config; size_t peripheral_size; + u32 port_window_size; }; void snd_dmaengine_pcm_set_config_from_dai_data( -- cgit v1.2.3 From e83dcd139e776ebb86d5e88e13282580407278e4 Mon Sep 17 00:00:00 2001 From: Shenghao Ding Date: Sun, 3 Aug 2025 21:11:10 +0800 Subject: ASoC: tas2781: Add keyword "init" in profile section Since version 0x105, the keyword 'init' was introduced into the profile, which is used for chip initialization, particularly to store common settings for other non-initialization profiles. Signed-off-by: Shenghao Ding Link: https://patch.msgid.link/20250803131110.1443-1-shenghao-ding@ti.com Signed-off-by: Mark Brown --- include/sound/tas2781-dsp.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/sound/tas2781-dsp.h b/include/sound/tas2781-dsp.h index c3a9efa73d5d..a21f34c0266e 100644 --- a/include/sound/tas2781-dsp.h +++ b/include/sound/tas2781-dsp.h @@ -198,6 +198,14 @@ struct tasdevice_rca { int ncfgs; struct tasdevice_config_info **cfg_info; int profile_cfg_id; + /* + * Since version 0x105, the keyword 'init' was introduced into the + * profile, which is used for chip initialization, particularly to + * store common settings for other non-initialization profiles. + * if (init_profile_id < 0) + * No init profile inside the RCA firmware. + */ + int init_profile_id; }; void tasdevice_select_cfg_blk(void *context, int conf_no, -- cgit v1.2.3 From f4ee43ae6ea8c509d470f20e7b446adf5a167dba Mon Sep 17 00:00:00 2001 From: Baojun Xu Date: Sun, 10 Aug 2025 20:23:58 +0800 Subject: ALSA: hda: Add TAS5825 support Add TAS5825 support in TI's HDA driver. TAS5825 is an on-chip DSP, but no calibration is required, and no global address support smart amplifier devices. Signed-off-by: Baojun Xu Acked-by: Mark Brown Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250810122358.1575-1-baojun.xu@ti.com --- include/sound/tas2781-dsp.h | 3 +++ include/sound/tas2781.h | 4 ++-- include/sound/tas5825-tlv.h | 24 ++++++++++++++++++++++++ 3 files changed, 29 insertions(+), 2 deletions(-) create mode 100644 include/sound/tas5825-tlv.h (limited to 'include') diff --git a/include/sound/tas2781-dsp.h b/include/sound/tas2781-dsp.h index c3a9efa73d5d..49bbf24d6559 100644 --- a/include/sound/tas2781-dsp.h +++ b/include/sound/tas2781-dsp.h @@ -34,6 +34,7 @@ #define PPC3_VERSION_TAS2781_BASIC_MIN 0x14600 #define PPC3_VERSION_TAS2781_ALPHA_MIN 0x4a00 #define PPC3_VERSION_TAS2781_BETA_MIN 0x19400 +#define PPC3_VERSION_TAS5825_BASE 0x114200 #define TASDEVICE_DEVICE_SUM 8 #define TASDEVICE_CONFIG_SUM 64 @@ -53,6 +54,8 @@ enum tasdevice_dsp_dev_idx { TASDEVICE_DSP_TAS_2781_DUAL_MONO, TASDEVICE_DSP_TAS_2781_21, TASDEVICE_DSP_TAS_2781_QUAD, + TASDEVICE_DSP_TAS_5825_MONO, + TASDEVICE_DSP_TAS_5825_DUAL, TASDEVICE_DSP_TAS_MAX_DEVICE }; diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h index 3875e92f1ec5..f0aefc04a957 100644 --- a/include/sound/tas2781.h +++ b/include/sound/tas2781.h @@ -49,9 +49,9 @@ #define TASDEVICE_REG(book, page, reg) (((book * 256 * 128) + \ (page * 128)) + reg) -/* Software Reset */ +/* Software Reset, compatble with new device (TAS5825). */ #define TASDEVICE_REG_SWRESET TASDEVICE_REG(0x0, 0x0, 0x01) -#define TASDEVICE_REG_SWRESET_RESET BIT(0) +#define TASDEVICE_REG_SWRESET_RESET (BIT(0) | BIT(4)) /* Checksum */ #define TASDEVICE_CHECKSUM_REG TASDEVICE_REG(0x0, 0x0, 0x7e) diff --git a/include/sound/tas5825-tlv.h b/include/sound/tas5825-tlv.h new file mode 100644 index 000000000000..95f2d3fad120 --- /dev/null +++ b/include/sound/tas5825-tlv.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// +// ALSA SoC Texas Instruments TAS5825 Audio Smart Amplifier +// +// Copyright (C) 2025 Texas Instruments Incorporated +// https://www.ti.com +// +// The TAS5825 hda driver implements for one or two TAS5825 chips. +// +// Author: Baojun Xu +// + +#ifndef __TAS5825_TLV_H__ +#define __TAS5825_TLV_H__ + +#define TAS5825_DVC_LEVEL TASDEVICE_REG(0x0, 0x0, 0x4c) +#define TAS5825_AMP_LEVEL TASDEVICE_REG(0x0, 0x0, 0x54) + +static const __maybe_unused DECLARE_TLV_DB_SCALE( + tas5825_dvc_tlv, -10300, 50, 0); +static const __maybe_unused DECLARE_TLV_DB_SCALE( + tas5825_amp_tlv, -1550, 50, 0); + +#endif -- cgit v1.2.3 From 2c223f7239f376a90d71903ec474ba887cf21d94 Mon Sep 17 00:00:00 2001 From: Oreoluwa Babatunde Date: Wed, 6 Aug 2025 10:24:21 -0700 Subject: of: reserved_mem: Restructure call site for dma_contiguous_early_fixup() Restructure the call site for dma_contiguous_early_fixup() to where the reserved_mem nodes are being parsed from the DT so that dma_mmu_remap[] is populated before dma_contiguous_remap() is called. Fixes: 8a6e02d0c00e ("of: reserved_mem: Restructure how the reserved memory regions are processed") Signed-off-by: Oreoluwa Babatunde Tested-by: William Zhang Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20250806172421.2748302-1-oreoluwa.babatunde@oss.qualcomm.com --- include/linux/dma-map-ops.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index f48e5fb88bd5..332b80c42b6f 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -153,6 +153,9 @@ static inline void dma_free_contiguous(struct device *dev, struct page *page, { __free_pages(page, get_order(size)); } +static inline void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) +{ +} #endif /* CONFIG_DMA_CMA*/ #ifdef CONFIG_DMA_DECLARE_COHERENT -- cgit v1.2.3 From 17e8b7e08fa8bf7a936f70444a42a88750410251 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Jul 2025 09:48:54 +0200 Subject: fs: mark file_remove_privs_flags static file_remove_privs_flags is only used inside of inode.c, mark it static. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/20250724074854.3316911-1-hch@lst.de Signed-off-by: Christian Brauner --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index d7ab4f96d705..796319914b0a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3393,7 +3393,6 @@ static inline struct inode *new_inode_pseudo(struct super_block *sb) extern struct inode *new_inode(struct super_block *sb); extern void free_inode_nonrcu(struct inode *inode); extern int setattr_should_drop_suidgid(struct mnt_idmap *, struct inode *); -extern int file_remove_privs_flags(struct file *file, unsigned int flags); extern int file_remove_privs(struct file *); int setattr_should_drop_sgid(struct mnt_idmap *idmap, const struct inode *inode); -- cgit v1.2.3 From 4e021920812d164bb02c30cc40e08a3681b1c755 Mon Sep 17 00:00:00 2001 From: Kriish Sharma Date: Wed, 30 Jul 2025 20:18:53 +0000 Subject: fs: document 'name' parameter for name_contains_dotdot() The kernel-doc for name_contains_dotdot() was missing the @name parameter description, leading to a warning during make htmldocs. Add the missing documentation to resolve this warning. Signed-off-by: Kriish Sharma Link: https://lore.kernel.org/20250730201853.8436-1-kriish.sharma2006@gmail.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 796319914b0a..780e9c774c54 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3281,7 +3281,7 @@ static inline bool is_dot_dotdot(const char *name, size_t len) /** * name_contains_dotdot - check if a file name contains ".." path components - * + * @name: File path string to check * Search for ".." surrounded by either '/' or start/end of string. */ static inline bool name_contains_dotdot(const char *name) -- cgit v1.2.3 From 56ecfd9175b999dfc303ac6a0f9ea4bd1bee49d7 Mon Sep 17 00:00:00 2001 From: Pedro Falcato Date: Wed, 23 Jul 2025 14:21:54 +0100 Subject: fs: Remove mount_nodev mount_nodev has had no in-tree users since cc0876f817d6 ("vfs: Convert devpts to use the new mount API"). Remove it. Signed-off-by: Pedro Falcato Link: https://lore.kernel.org/20250723132156.225410-2-pfalcato@suse.de Signed-off-by: Christian Brauner --- include/linux/fs.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index d7ab4f96d705..204328ed7ebb 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2716,9 +2716,6 @@ static inline bool is_mgtime(const struct inode *inode) extern struct dentry *mount_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, int (*fill_super)(struct super_block *, void *, int)); -extern struct dentry *mount_nodev(struct file_system_type *fs_type, - int flags, void *data, - int (*fill_super)(struct super_block *, void *, int)); extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path); void retire_super(struct super_block *sb); void generic_shutdown_super(struct super_block *sb); -- cgit v1.2.3 From f7d161c2804f3ad36bdc3222cb93c8fee67be98c Mon Sep 17 00:00:00 2001 From: Pedro Falcato Date: Wed, 23 Jul 2025 14:21:55 +0100 Subject: fs: Remove mount_bdev mount_bdev has no in-tree users ever since f2fs adopted the new mount API. Remove it. Signed-off-by: Pedro Falcato Link: https://lore.kernel.org/20250723132156.225410-3-pfalcato@suse.de Signed-off-by: Christian Brauner --- include/linux/fs.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 204328ed7ebb..98afcf455b28 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2713,9 +2713,6 @@ static inline bool is_mgtime(const struct inode *inode) return inode->i_opflags & IOP_MGTIME; } -extern struct dentry *mount_bdev(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, - int (*fill_super)(struct super_block *, void *, int)); extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path); void retire_super(struct super_block *sb); void generic_shutdown_super(struct super_block *sb); -- cgit v1.2.3 From ad7fe23b4b0dc0c26187df92a5649948ef7049fa Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Wed, 6 Aug 2025 16:07:05 +1000 Subject: fscontext: add custom-prefix log helpers Sometimes, errors associated with an fscontext come from the VFS or otherwise outside of the filesystem driver itself. However, the default logging of errorfc will always prefix the message with the filesystem name. So, add some *fcp() wrappers that allow for custom prefixes to be used when emitting information to the fscontext log. Signed-off-by: Aleksa Sarai Link: https://lore.kernel.org/20250806-errorfc-mount-too-revealing-v2-1-534b9b4d45bb@cyphar.com Signed-off-by: Christian Brauner --- include/linux/fs_context.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index 7773eb870039..671f031be173 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -186,10 +186,12 @@ struct fc_log { extern __attribute__((format(printf, 4, 5))) void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt, ...); -#define __logfc(fc, l, fmt, ...) logfc((fc)->log.log, NULL, \ - l, fmt, ## __VA_ARGS__) -#define __plog(p, l, fmt, ...) logfc((p)->log, (p)->prefix, \ - l, fmt, ## __VA_ARGS__) +#define __logfc(fc, l, fmt, ...) \ + logfc((fc)->log.log, NULL, (l), (fmt), ## __VA_ARGS__) +#define __plogp(p, prefix, l, fmt, ...) \ + logfc((p)->log, (prefix), (l), (fmt), ## __VA_ARGS__) +#define __plog(p, l, fmt, ...) __plogp(p, (p)->prefix, l, fmt, ## __VA_ARGS__) + /** * infof - Store supplementary informational message * @fc: The context in which to log the informational message @@ -201,6 +203,8 @@ void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt, #define infof(fc, fmt, ...) __logfc(fc, 'i', fmt, ## __VA_ARGS__) #define info_plog(p, fmt, ...) __plog(p, 'i', fmt, ## __VA_ARGS__) #define infofc(fc, fmt, ...) __plog((&(fc)->log), 'i', fmt, ## __VA_ARGS__) +#define infofcp(fc, prefix, fmt, ...) \ + __plogp((&(fc)->log), prefix, 'i', fmt, ## __VA_ARGS__) /** * warnf - Store supplementary warning message @@ -213,6 +217,8 @@ void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt, #define warnf(fc, fmt, ...) __logfc(fc, 'w', fmt, ## __VA_ARGS__) #define warn_plog(p, fmt, ...) __plog(p, 'w', fmt, ## __VA_ARGS__) #define warnfc(fc, fmt, ...) __plog((&(fc)->log), 'w', fmt, ## __VA_ARGS__) +#define warnfcp(fc, prefix, fmt, ...) \ + __plogp((&(fc)->log), prefix, 'w', fmt, ## __VA_ARGS__) /** * errorf - Store supplementary error message @@ -225,6 +231,8 @@ void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt, #define errorf(fc, fmt, ...) __logfc(fc, 'e', fmt, ## __VA_ARGS__) #define error_plog(p, fmt, ...) __plog(p, 'e', fmt, ## __VA_ARGS__) #define errorfc(fc, fmt, ...) __plog((&(fc)->log), 'e', fmt, ## __VA_ARGS__) +#define errorfcp(fc, prefix, fmt, ...) \ + __plogp((&(fc)->log), prefix, 'e', fmt, ## __VA_ARGS__) /** * invalf - Store supplementary invalid argument error message @@ -237,5 +245,7 @@ void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt, #define invalf(fc, fmt, ...) (errorf(fc, fmt, ## __VA_ARGS__), -EINVAL) #define inval_plog(p, fmt, ...) (error_plog(p, fmt, ## __VA_ARGS__), -EINVAL) #define invalfc(fc, fmt, ...) (errorfc(fc, fmt, ## __VA_ARGS__), -EINVAL) +#define invalfcp(fc, prefix, fmt, ...) \ + (errorfcp(fc, prefix, fmt, ## __VA_ARGS__), -EINVAL) #endif /* _LINUX_FS_CONTEXT_H */ -- cgit v1.2.3 From bb2441402392ef1f49563be68e8f0dcb127ac965 Mon Sep 17 00:00:00 2001 From: Dzmitry Sankouski Date: Tue, 5 Aug 2025 22:40:56 +0300 Subject: regulator: add s2dos05 regulator support S2DOS05 has 1 buck and 4 LDO regulators, used for powering panel/touchscreen. Signed-off-by: Dzmitry Sankouski Link: https://patch.msgid.link/20250805-starqltechn_integration_upstream-v8-1-09d8a321fafe@gmail.com Signed-off-by: Mark Brown --- include/linux/regulator/s2dos05.h | 73 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 include/linux/regulator/s2dos05.h (limited to 'include') diff --git a/include/linux/regulator/s2dos05.h b/include/linux/regulator/s2dos05.h new file mode 100644 index 000000000000..2e89fcbce769 --- /dev/null +++ b/include/linux/regulator/s2dos05.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +// s2dos05.h +// +// Copyright (c) 2016 Samsung Electronics Co., Ltd +// http://www.samsung.com +// Copyright (C) 2024 Dzmitry Sankouski + +#ifndef __LINUX_S2DOS05_H +#define __LINUX_S2DOS05_H + +// S2DOS05 registers +// Slave Addr : 0xC0 +enum S2DOS05_reg { + S2DOS05_REG_DEV_ID, + S2DOS05_REG_TOPSYS_STAT, + S2DOS05_REG_STAT, + S2DOS05_REG_EN, + S2DOS05_REG_LDO1_CFG, + S2DOS05_REG_LDO2_CFG, + S2DOS05_REG_LDO3_CFG, + S2DOS05_REG_LDO4_CFG, + S2DOS05_REG_BUCK_CFG, + S2DOS05_REG_BUCK_VOUT, + S2DOS05_REG_IRQ_MASK = 0x0D, + S2DOS05_REG_SSD_TSD = 0x0E, + S2DOS05_REG_OCL = 0x10, + S2DOS05_REG_IRQ = 0x11 +}; + +// S2DOS05 regulator ids +enum S2DOS05_regulators { + S2DOS05_LDO1, + S2DOS05_LDO2, + S2DOS05_LDO3, + S2DOS05_LDO4, + S2DOS05_BUCK1, + S2DOS05_REG_MAX, +}; + +#define S2DOS05_IRQ_PWRMT_MASK BIT(5) +#define S2DOS05_IRQ_TSD_MASK BIT(4) +#define S2DOS05_IRQ_SSD_MASK BIT(3) +#define S2DOS05_IRQ_SCP_MASK BIT(2) +#define S2DOS05_IRQ_UVLO_MASK BIT(1) +#define S2DOS05_IRQ_OCD_MASK BIT(0) + +#define S2DOS05_BUCK_MIN1 506250 +#define S2DOS05_LDO_MIN1 1500000 +#define S2DOS05_LDO_MIN2 2700000 +#define S2DOS05_BUCK_STEP1 6250 +#define S2DOS05_LDO_STEP1 25000 +#define S2DOS05_LDO_VSEL_MASK 0x7F +#define S2DOS05_LDO_FD_MASK 0x80 +#define S2DOS05_BUCK_VSEL_MASK 0xFF +#define S2DOS05_BUCK_FD_MASK 0x08 + +#define S2DOS05_ENABLE_MASK_L1 BIT(0) +#define S2DOS05_ENABLE_MASK_L2 BIT(1) +#define S2DOS05_ENABLE_MASK_L3 BIT(2) +#define S2DOS05_ENABLE_MASK_L4 BIT(3) +#define S2DOS05_ENABLE_MASK_B1 BIT(4) + +#define S2DOS05_RAMP_DELAY 12000 + +#define S2DOS05_ENABLE_TIME_LDO 50 +#define S2DOS05_ENABLE_TIME_BUCK 350 + +#define S2DOS05_LDO_N_VOLTAGES (S2DOS05_LDO_VSEL_MASK + 1) +#define S2DOS05_BUCK_N_VOLTAGES (S2DOS05_BUCK_VSEL_MASK + 1) + +#define S2DOS05_REGULATOR_MAX (S2DOS05_REG_MAX) + +#endif // __LINUX_S2DOS05_H -- cgit v1.2.3 From 181fe022ecf8a8e85def0e94852c631c59a8b3f6 Mon Sep 17 00:00:00 2001 From: Thomas Richard Date: Mon, 11 Aug 2025 15:25:44 +0200 Subject: gpiolib: add support to register sparse pin range Add support to register for GPIO<->pin mapping using a list of non consecutive pins. The core already supports sparse pin range (pins member of struct pinctrl_gpio_range), but it was not possible to register one. If pins is not NULL the core uses it, otherwise it assumes that a consecutive pin range was registered and it uses pin_base. The function gpiochip_add_pin_range() which allocates and fills the struct pinctrl_gpio_range was renamed to gpiochip_add_pin_range_with_pins() and the pins parameter was added. Two new functions were added, gpiochip_add_pin_range() and gpiochip_add_sparse_pin_range() to register a consecutive or sparse pins range. Both use gpiochip_add_pin_range_with_pins(). Reviewed-by: Linus Walleij Reviewed-by: Andy Shevchenko Acked-by: Linus Walleij Signed-off-by: Thomas Richard Link: https://lore.kernel.org/r/20250811-aaeon-up-board-pinctrl-support-v9-1-29f0cbbdfb30@bootlin.com Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/driver.h | 51 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 48 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 667f8fd58a79..9fcd4a988081 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -772,16 +772,50 @@ struct gpio_pin_range { #ifdef CONFIG_PINCTRL -int gpiochip_add_pin_range(struct gpio_chip *gc, const char *pinctl_name, - unsigned int gpio_offset, unsigned int pin_offset, - unsigned int npins); +int gpiochip_add_pin_range_with_pins(struct gpio_chip *gc, + const char *pinctl_name, + unsigned int gpio_offset, + unsigned int pin_offset, + unsigned int const *pins, + unsigned int npins); int gpiochip_add_pingroup_range(struct gpio_chip *gc, struct pinctrl_dev *pctldev, unsigned int gpio_offset, const char *pin_group); void gpiochip_remove_pin_ranges(struct gpio_chip *gc); +static inline int +gpiochip_add_pin_range(struct gpio_chip *gc, + const char *pinctl_name, + unsigned int gpio_offset, + unsigned int pin_offset, + unsigned int npins) +{ + return gpiochip_add_pin_range_with_pins(gc, pinctl_name, gpio_offset, + pin_offset, NULL, npins); +} + +static inline int +gpiochip_add_sparse_pin_range(struct gpio_chip *gc, + const char *pinctl_name, + unsigned int gpio_offset, + unsigned int const *pins, + unsigned int npins) +{ + return gpiochip_add_pin_range_with_pins(gc, pinctl_name, gpio_offset, 0, + pins, npins); +} #else /* ! CONFIG_PINCTRL */ +static inline int +gpiochip_add_pin_range_with_pins(struct gpio_chip *gc, + const char *pinctl_name, + unsigned int gpio_offset, + unsigned int pin_offset, + unsigned int npins) +{ + return 0; +} + static inline int gpiochip_add_pin_range(struct gpio_chip *gc, const char *pinctl_name, unsigned int gpio_offset, unsigned int pin_offset, @@ -789,6 +823,17 @@ gpiochip_add_pin_range(struct gpio_chip *gc, const char *pinctl_name, { return 0; } + +static inline int +gpiochip_add_sparse_pin_range(struct gpio_chip *gc, + const char *pinctl_name, + unsigned int gpio_offset, + unsigned int const *pins, + unsigned int npins) +{ + return 0; +} + static inline int gpiochip_add_pingroup_range(struct gpio_chip *gc, struct pinctrl_dev *pctldev, -- cgit v1.2.3 From 6e986f8852f56cf9214ea2ec02b4b432e201d02c Mon Sep 17 00:00:00 2001 From: Thomas Richard Date: Mon, 11 Aug 2025 15:25:49 +0200 Subject: gpio: aggregator: export symbols of the GPIO forwarder library Export all symbols and create header file for the GPIO forwarder library. It will be used in the next changes. Acked-by: Linus Walleij Signed-off-by: Thomas Richard Link: https://lore.kernel.org/r/20250811-aaeon-up-board-pinctrl-support-v9-6-29f0cbbdfb30@bootlin.com Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/forwarder.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 include/linux/gpio/forwarder.h (limited to 'include') diff --git a/include/linux/gpio/forwarder.h b/include/linux/gpio/forwarder.h new file mode 100644 index 000000000000..e21a1b7b1905 --- /dev/null +++ b/include/linux/gpio/forwarder.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_GPIO_FORWARDER_H +#define __LINUX_GPIO_FORWARDER_H + +struct gpio_desc; +struct gpio_chip; +struct gpiochip_fwd; + +struct gpiochip_fwd *devm_gpiochip_fwd_alloc(struct device *dev, + unsigned int ngpios); +int gpiochip_fwd_desc_add(struct gpiochip_fwd *fwd, + struct gpio_desc *desc, unsigned int offset); +int gpiochip_fwd_register(struct gpiochip_fwd *fwd); + +struct gpio_chip *gpiochip_fwd_get_gpiochip(struct gpiochip_fwd *fwd); + +int gpiochip_fwd_gpio_get_direction(struct gpiochip_fwd *fwd, + unsigned int offset); +int gpiochip_fwd_gpio_direction_input(struct gpiochip_fwd *fwd, + unsigned int offset); +int gpiochip_fwd_gpio_direction_output(struct gpiochip_fwd *fwd, + unsigned int offset, + int value); +int gpiochip_fwd_gpio_get(struct gpiochip_fwd *fwd, unsigned int offset); +int gpiochip_fwd_gpio_get_multiple(struct gpiochip_fwd *fwd, + unsigned long *mask, + unsigned long *bits); +int gpiochip_fwd_gpio_set(struct gpiochip_fwd *fwd, unsigned int offset, + int value); +int gpiochip_fwd_gpio_set_multiple(struct gpiochip_fwd *fwd, + unsigned long *mask, + unsigned long *bits); +int gpiochip_fwd_gpio_set_config(struct gpiochip_fwd *fwd, unsigned int offset, + unsigned long config); +int gpiochip_fwd_gpio_to_irq(struct gpiochip_fwd *fwd, unsigned int offset); + +#endif -- cgit v1.2.3 From b31c68fd851e74526ad963362ea205eb97b9a710 Mon Sep 17 00:00:00 2001 From: Thomas Richard Date: Mon, 11 Aug 2025 15:25:50 +0200 Subject: gpio: aggregator: handle runtime registration of gpio_desc in gpiochip_fwd Add request() callback to check if the GPIO descriptor was well registered in the gpiochip_fwd before using it. This is done to handle the case where GPIO descriptor is added at runtime in the forwarder. If at least one GPIO descriptor was not added before the forwarder registration, we assume the forwarder can sleep as if a GPIO is added at runtime it may sleep. Acked-by: Linus Walleij Signed-off-by: Thomas Richard Link: https://lore.kernel.org/r/20250811-aaeon-up-board-pinctrl-support-v9-7-29f0cbbdfb30@bootlin.com Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/forwarder.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/gpio/forwarder.h b/include/linux/gpio/forwarder.h index e21a1b7b1905..45e0190308f0 100644 --- a/include/linux/gpio/forwarder.h +++ b/include/linux/gpio/forwarder.h @@ -10,10 +10,12 @@ struct gpiochip_fwd *devm_gpiochip_fwd_alloc(struct device *dev, unsigned int ngpios); int gpiochip_fwd_desc_add(struct gpiochip_fwd *fwd, struct gpio_desc *desc, unsigned int offset); +void gpiochip_fwd_desc_free(struct gpiochip_fwd *fwd, unsigned int offset); int gpiochip_fwd_register(struct gpiochip_fwd *fwd); struct gpio_chip *gpiochip_fwd_get_gpiochip(struct gpiochip_fwd *fwd); +int gpiochip_fwd_gpio_request(struct gpiochip_fwd *fwd, unsigned int offset); int gpiochip_fwd_gpio_get_direction(struct gpiochip_fwd *fwd, unsigned int offset); int gpiochip_fwd_gpio_direction_input(struct gpiochip_fwd *fwd, -- cgit v1.2.3 From 60e92c1009c7c6abd4a9d0caf33a8cba5d09f67c Mon Sep 17 00:00:00 2001 From: Thomas Richard Date: Mon, 11 Aug 2025 15:25:51 +0200 Subject: gpio: aggregator: add possibility to attach data to the forwarder Add a data pointer to store private data in the forwarder. Reviewed-by: Andy Shevchenko Reviewed-by: Geert Uytterhoeven Acked-by: Linus Walleij Signed-off-by: Thomas Richard Link: https://lore.kernel.org/r/20250811-aaeon-up-board-pinctrl-support-v9-8-29f0cbbdfb30@bootlin.com Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/forwarder.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/gpio/forwarder.h b/include/linux/gpio/forwarder.h index 45e0190308f0..ee5d8355f735 100644 --- a/include/linux/gpio/forwarder.h +++ b/include/linux/gpio/forwarder.h @@ -11,10 +11,12 @@ struct gpiochip_fwd *devm_gpiochip_fwd_alloc(struct device *dev, int gpiochip_fwd_desc_add(struct gpiochip_fwd *fwd, struct gpio_desc *desc, unsigned int offset); void gpiochip_fwd_desc_free(struct gpiochip_fwd *fwd, unsigned int offset); -int gpiochip_fwd_register(struct gpiochip_fwd *fwd); +int gpiochip_fwd_register(struct gpiochip_fwd *fwd, void *data); struct gpio_chip *gpiochip_fwd_get_gpiochip(struct gpiochip_fwd *fwd); +void *gpiochip_fwd_get_data(struct gpiochip_fwd *fwd); + int gpiochip_fwd_gpio_request(struct gpiochip_fwd *fwd, unsigned int offset); int gpiochip_fwd_gpio_get_direction(struct gpiochip_fwd *fwd, unsigned int offset); -- cgit v1.2.3 From 53ec9169db1345f04174febb90f88a871fc28d9e Mon Sep 17 00:00:00 2001 From: Thomas Richard Date: Mon, 11 Aug 2025 15:25:52 +0200 Subject: lib/string_choices: Add str_input_output() helper Add str_input_output() helper to return 'input' or 'output' string literal. Also add the inversed variant str_output_input(). Suggested-by: Andy Shevchenko Reviewed-by: Andy Shevchenko Acked-by: Linus Walleij Signed-off-by: Thomas Richard Link: https://lore.kernel.org/r/20250811-aaeon-up-board-pinctrl-support-v9-9-29f0cbbdfb30@bootlin.com Signed-off-by: Bartosz Golaszewski --- include/linux/string_choices.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/string_choices.h b/include/linux/string_choices.h index f3ba4f52ff26..a27c87c954ae 100644 --- a/include/linux/string_choices.h +++ b/include/linux/string_choices.h @@ -41,6 +41,12 @@ static inline const char *str_high_low(bool v) } #define str_low_high(v) str_high_low(!(v)) +static inline const char *str_input_output(bool v) +{ + return v ? "input" : "output"; +} +#define str_output_input(v) str_input_output(!(v)) + static inline const char *str_on_off(bool v) { return v ? "on" : "off"; -- cgit v1.2.3 From 8a5a0294f40a50e5be83e9b7ebbc15b546f64e41 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Mon, 4 Aug 2025 21:26:42 +0100 Subject: dt-bindings: clock: renesas,r9a09g077/87: Add USB_CLK clock ID Add the USB clock (USB_CLK) definition for the Renesas RZ/T2H (R9A09G077) and RZ/N2H (R9A09G087) SoCs. USB_CLK is used as the reference clock for USB PHY layer. Signed-off-by: Lad Prabhakar Acked-by: Krzysztof Kozlowski Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/20250804202643.3967484-2-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- include/dt-bindings/clock/renesas,r9a09g077-cpg-mssr.h | 1 + include/dt-bindings/clock/renesas,r9a09g087-cpg-mssr.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/renesas,r9a09g077-cpg-mssr.h b/include/dt-bindings/clock/renesas,r9a09g077-cpg-mssr.h index 7ecc4f0b235a..0c2ce81a8744 100644 --- a/include/dt-bindings/clock/renesas,r9a09g077-cpg-mssr.h +++ b/include/dt-bindings/clock/renesas,r9a09g077-cpg-mssr.h @@ -25,5 +25,6 @@ #define R9A09G077_CLK_PCLKM 13 #define R9A09G077_CLK_PCLKL 14 #define R9A09G077_SDHI_CLKHS 15 +#define R9A09G077_USB_CLK 16 #endif /* __DT_BINDINGS_CLOCK_RENESAS_R9A09G077_CPG_H__ */ diff --git a/include/dt-bindings/clock/renesas,r9a09g087-cpg-mssr.h b/include/dt-bindings/clock/renesas,r9a09g087-cpg-mssr.h index 925e57703925..70ee883f2386 100644 --- a/include/dt-bindings/clock/renesas,r9a09g087-cpg-mssr.h +++ b/include/dt-bindings/clock/renesas,r9a09g087-cpg-mssr.h @@ -25,5 +25,6 @@ #define R9A09G087_CLK_PCLKM 13 #define R9A09G087_CLK_PCLKL 14 #define R9A09G087_SDHI_CLKHS 15 +#define R9A09G087_USB_CLK 16 #endif /* __DT_BINDINGS_CLOCK_RENESAS_R9A09G087_CPG_H__ */ -- cgit v1.2.3 From 5293e8f2a854344ef9aba2391b44c7a437889ebb Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Fri, 8 Aug 2025 14:30:15 +0100 Subject: dt-bindings: pinctrl: renesas: Document RZ/T2H and RZ/N2H SoCs Document the pin and GPIO controller IP for the Renesas RZ/T2H (R9A09G077) and RZ/N2H (R9A09G087) SoCs, and add the shared DTSI header file used by both the bindings and the driver. The RZ/T2H SoC supports 729 pins, while RZ/N2H supports 576 pins. Both share the same controller architecture; separate compatible strings are added for each SoC to distinguish them. Co-developed-by: Thierry Bultel Signed-off-by: Thierry Bultel Signed-off-by: Lad Prabhakar Reviewed-by: "Rob Herring (Arm)" Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/20250808133017.2053637-2-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- .../pinctrl/renesas,r9a09g077-pinctrl.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 include/dt-bindings/pinctrl/renesas,r9a09g077-pinctrl.h (limited to 'include') diff --git a/include/dt-bindings/pinctrl/renesas,r9a09g077-pinctrl.h b/include/dt-bindings/pinctrl/renesas,r9a09g077-pinctrl.h new file mode 100644 index 000000000000..f088793f23ee --- /dev/null +++ b/include/dt-bindings/pinctrl/renesas,r9a09g077-pinctrl.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * This header provides constants for Renesas RZ/T2H family pinctrl bindings. + * + * Copyright (C) 2025 Renesas Electronics Corp. + */ + +#ifndef __DT_BINDINGS_PINCTRL_RENESAS_R9A09G077_PINCTRL_H__ +#define __DT_BINDINGS_PINCTRL_RENESAS_R9A09G077_PINCTRL_H__ + +#define RZT2H_PINS_PER_PORT 8 + +/* + * Create the pin index from its bank and position numbers and store in + * the upper 16 bits the alternate function identifier + */ +#define RZT2H_PORT_PINMUX(b, p, f) ((b) * RZT2H_PINS_PER_PORT + (p) | ((f) << 16)) + +/* Convert a port and pin label to its global pin index */ +#define RZT2H_GPIO(port, pin) ((port) * RZT2H_PINS_PER_PORT + (pin)) + +#endif /* __DT_BINDINGS_PINCTRL_RENESAS_R9A09G077_PINCTRL_H__ */ -- cgit v1.2.3 From 6d3c3ca4c77e93660cce5819bf707f75df03e0c8 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Fri, 8 Aug 2025 15:28:47 +0200 Subject: module: Rename EXPORT_SYMBOL_GPL_FOR_MODULES to EXPORT_SYMBOL_FOR_MODULES Christoph suggested that the explicit _GPL_ can be dropped from the module namespace export macro, as it's intended for in-tree modules only. It would be possible to restrict it technically, but it was pointed out [2] that some cases of using an out-of-tree build of an in-tree module with the same name are legitimate. But in that case those also have to be GPL anyway so it's unnecessary to spell it out in the macro name. Link: https://lore.kernel.org/all/aFleJN_fE-RbSoFD@infradead.org/ [1] Link: https://lore.kernel.org/all/CAK7LNATRkZHwJGpojCnvdiaoDnP%2BaeUXgdey5sb_8muzdWTMkA@mail.gmail.com/ [2] Suggested-by: Christoph Hellwig Reviewed-by: Shivank Garg Acked-by: David Hildenbrand Acked-by: Nicolas Schier Reviewed-by: Daniel Gomez Reviewed-by: Christian Brauner Signed-off-by: Vlastimil Babka Link: https://lore.kernel.org/20250808-export_modules-v4-1-426945bcc5e1@suse.cz Signed-off-by: Christian Brauner --- include/linux/export.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/export.h b/include/linux/export.h index f35d03b4113b..a686fd0ba406 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -91,6 +91,6 @@ #define EXPORT_SYMBOL_NS(sym, ns) __EXPORT_SYMBOL(sym, "", ns) #define EXPORT_SYMBOL_NS_GPL(sym, ns) __EXPORT_SYMBOL(sym, "GPL", ns) -#define EXPORT_SYMBOL_GPL_FOR_MODULES(sym, mods) __EXPORT_SYMBOL(sym, "GPL", "module:" mods) +#define EXPORT_SYMBOL_FOR_MODULES(sym, mods) __EXPORT_SYMBOL(sym, "GPL", "module:" mods) #endif /* _LINUX_EXPORT_H */ -- cgit v1.2.3 From edb660ad79ffe81a982c2eca02360a6ffac83e46 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 8 Aug 2025 10:41:08 -0700 Subject: drm/intel/pciids: Add match on vendor/id only All our PCI ID macros match on the PCI class besides the vendor and devid, even for devices that may or may not have display. This may not work going forward, so add a simple INTEL_PCI_DEVICE that matches only on vendor/device IDs. Cc: Jani Nikula Reviewed-by: Jani Nikula Link: https://lore.kernel.org/r/20250808-intel-pci-device-v1-1-ce3545d86502@intel.com Signed-off-by: Lucas De Marchi --- include/drm/intel/pciids.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/drm/intel/pciids.h b/include/drm/intel/pciids.h index 76f8d26f9cc9..da6301a6fcea 100644 --- a/include/drm/intel/pciids.h +++ b/include/drm/intel/pciids.h @@ -26,6 +26,11 @@ #define __PCIIDS_H__ #ifdef __KERNEL__ +#define INTEL_PCI_DEVICE(_id, _info) { \ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, (_id)), \ + .driver_data = (kernel_ulong_t)(_info), \ +} + #define INTEL_VGA_DEVICE(_id, _info) { \ PCI_DEVICE(PCI_VENDOR_ID_INTEL, (_id)), \ .class = PCI_BASE_CLASS_DISPLAY << 16, .class_mask = 0xff << 16, \ -- cgit v1.2.3 From c17ccefb611fdb346eef9be6bfbd0bfd04afa204 Mon Sep 17 00:00:00 2001 From: Sricharan Ramabadhran Date: Mon, 11 Aug 2025 14:39:51 +0530 Subject: dt-bindings: clock: ipq5424-apss-clk: Add ipq5424 apss clock controller The CPU core in ipq5424 is clocked by a huayra PLL with RCG support. The RCG and PLL have a separate register space from the GCC. Also the L3 cache has a separate pll and needs to be scaled along with the CPU. Co-developed-by: Md Sadre Alam Signed-off-by: Md Sadre Alam Signed-off-by: Sricharan Ramabadhran [ Added interconnect related changes ] Reviewed-by: Krzysztof Kozlowski Signed-off-by: Varadarajan Narayanan Link: https://lore.kernel.org/r/20250811090954.2854440-2-quic_varada@quicinc.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,apss-ipq.h | 6 ++++++ include/dt-bindings/interconnect/qcom,ipq5424.h | 3 +++ 2 files changed, 9 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,apss-ipq.h b/include/dt-bindings/clock/qcom,apss-ipq.h index 77b6e05492e2..0bb41e5efdef 100644 --- a/include/dt-bindings/clock/qcom,apss-ipq.h +++ b/include/dt-bindings/clock/qcom,apss-ipq.h @@ -8,5 +8,11 @@ #define APCS_ALIAS0_CLK_SRC 0 #define APCS_ALIAS0_CORE_CLK 1 +#define APSS_PLL_EARLY 2 +#define APSS_SILVER_CLK_SRC 3 +#define APSS_SILVER_CORE_CLK 4 +#define L3_PLL 5 +#define L3_CLK_SRC 6 +#define L3_CORE_CLK 7 #endif diff --git a/include/dt-bindings/interconnect/qcom,ipq5424.h b/include/dt-bindings/interconnect/qcom,ipq5424.h index a770356112ee..afd7e0683a24 100644 --- a/include/dt-bindings/interconnect/qcom,ipq5424.h +++ b/include/dt-bindings/interconnect/qcom,ipq5424.h @@ -21,4 +21,7 @@ #define MASTER_CNOC_USB 16 #define SLAVE_CNOC_USB 17 +#define MASTER_CPU 0 +#define SLAVE_L3 1 + #endif /* INTERCONNECT_QCOM_IPQ5424_H */ -- cgit v1.2.3 From 0daf35da397b083ea0ea5407196bb6bd210530ec Mon Sep 17 00:00:00 2001 From: Mukesh Ojha Date: Thu, 7 Aug 2025 13:13:10 +0530 Subject: soc: qcom: mdt_loader: Remove pas id parameter pas id is not used in qcom_mdt_load_no_init() and it should not be used as it is non-PAS specific function and has no relation to PAS specific mechanism. Reviewed-by: Dikshita Agarwal Acked-by: Jeff Johnson # drivers/net/wireless/ath/ath12k/ahb.c Signed-off-by: Mukesh Ojha Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20250807074311.2381713-2-mukesh.ojha@oss.qualcomm.com Signed-off-by: Bjorn Andersson --- include/linux/soc/qcom/mdt_loader.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/soc/qcom/mdt_loader.h b/include/linux/soc/qcom/mdt_loader.h index 9e8e60421192..8ea8230579a2 100644 --- a/include/linux/soc/qcom/mdt_loader.h +++ b/include/linux/soc/qcom/mdt_loader.h @@ -24,7 +24,7 @@ int qcom_mdt_load(struct device *dev, const struct firmware *fw, phys_addr_t *reloc_base); int qcom_mdt_load_no_init(struct device *dev, const struct firmware *fw, - const char *fw_name, int pas_id, void *mem_region, + const char *fw_name, void *mem_region, phys_addr_t mem_phys, size_t mem_size, phys_addr_t *reloc_base); void *qcom_mdt_read_metadata(const struct firmware *fw, size_t *data_len, @@ -54,9 +54,8 @@ static inline int qcom_mdt_load(struct device *dev, const struct firmware *fw, static inline int qcom_mdt_load_no_init(struct device *dev, const struct firmware *fw, - const char *fw_name, int pas_id, - void *mem_region, phys_addr_t mem_phys, - size_t mem_size, + const char *fw_name, void *mem_region, + phys_addr_t mem_phys, size_t mem_size, phys_addr_t *reloc_base) { return -ENODEV; -- cgit v1.2.3 From 5634c8cb298a7146b4e38873473e280b50e27a2c Mon Sep 17 00:00:00 2001 From: Nitin Gote Date: Fri, 18 Jul 2025 16:20:51 +0530 Subject: iosys-map: Fix undefined behavior in iosys_map_clear() The current iosys_map_clear() implementation reads the potentially uninitialized 'is_iomem' boolean field to decide which union member to clear. This causes undefined behavior when called on uninitialized structures, as 'is_iomem' may contain garbage values like 0xFF. UBSAN detects this as: UBSAN: invalid-load in include/linux/iosys-map.h:267 load of value 255 is not a valid value for type '_Bool' Fix by unconditionally clearing the entire structure with memset(), eliminating the need to read uninitialized data and ensuring all fields are set to known good values. Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/14639 Fixes: 01fd30da0474 ("dma-buf: Add struct dma-buf-map for storing struct dma_buf.vaddr_ptr") Signed-off-by: Nitin Gote Reviewed-by: Andi Shyti Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://lore.kernel.org/r/20250718105051.2709487-1-nitin.r.gote@intel.com --- include/linux/iosys-map.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/iosys-map.h b/include/linux/iosys-map.h index 4696abfd311c..3e85afe794c0 100644 --- a/include/linux/iosys-map.h +++ b/include/linux/iosys-map.h @@ -264,12 +264,7 @@ static inline bool iosys_map_is_set(const struct iosys_map *map) */ static inline void iosys_map_clear(struct iosys_map *map) { - if (map->is_iomem) { - map->vaddr_iomem = NULL; - map->is_iomem = false; - } else { - map->vaddr = NULL; - } + memset(map, 0, sizeof(*map)); } /** -- cgit v1.2.3 From ce8370e2e62a903e18be7dd0e0be2eee079501e1 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Wed, 6 Aug 2025 17:04:07 -0400 Subject: audit: record fanotify event regardless of presence of rules When no audit rules are in place, fanotify event results are unconditionally dropped due to an explicit check for the existence of any audit rules. Given this is a report from another security sub-system, allow it to be recorded regardless of the existence of any audit rules. To test, install and run the fapolicyd daemon with default config. Then as an unprivileged user, create and run a very simple binary that should be denied. Then check for an event with ausearch -m FANOTIFY -ts recent Link: https://issues.redhat.com/browse/RHEL-9065 Signed-off-by: Richard Guy Briggs Signed-off-by: Paul Moore --- include/linux/audit.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/audit.h b/include/linux/audit.h index a394614ccd0b..e3f06eba9c6e 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -527,7 +527,7 @@ static inline void audit_log_kern_module(const char *name) static inline void audit_fanotify(u32 response, struct fanotify_response_info_audit_rule *friar) { - if (!audit_dummy_context()) + if (audit_enabled) __audit_fanotify(response, friar); } -- cgit v1.2.3 From b41dc83f0790fd3488a45b31de0b0c3af7d441fe Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 11 Aug 2025 11:26:29 -0700 Subject: kunit, lib/crypto: Move run_irq_test() to common header Rename run_irq_test() to kunit_run_irq_test() and move it to a public header so that it can be reused by crc_kunit. Link: https://lore.kernel.org/r/20250811182631.376302-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/kunit/run-in-irq-context.h | 129 +++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 include/kunit/run-in-irq-context.h (limited to 'include') diff --git a/include/kunit/run-in-irq-context.h b/include/kunit/run-in-irq-context.h new file mode 100644 index 000000000000..108e96433ea4 --- /dev/null +++ b/include/kunit/run-in-irq-context.h @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Helper function for testing code in interrupt contexts + * + * Copyright 2025 Google LLC + */ +#ifndef _KUNIT_RUN_IN_IRQ_CONTEXT_H +#define _KUNIT_RUN_IN_IRQ_CONTEXT_H + +#include +#include +#include +#include + +#define KUNIT_IRQ_TEST_HRTIMER_INTERVAL us_to_ktime(5) + +struct kunit_irq_test_state { + bool (*func)(void *test_specific_state); + void *test_specific_state; + bool task_func_reported_failure; + bool hardirq_func_reported_failure; + bool softirq_func_reported_failure; + unsigned long hardirq_func_calls; + unsigned long softirq_func_calls; + struct hrtimer timer; + struct work_struct bh_work; +}; + +static enum hrtimer_restart kunit_irq_test_timer_func(struct hrtimer *timer) +{ + struct kunit_irq_test_state *state = + container_of(timer, typeof(*state), timer); + + WARN_ON_ONCE(!in_hardirq()); + state->hardirq_func_calls++; + + if (!state->func(state->test_specific_state)) + state->hardirq_func_reported_failure = true; + + hrtimer_forward_now(&state->timer, KUNIT_IRQ_TEST_HRTIMER_INTERVAL); + queue_work(system_bh_wq, &state->bh_work); + return HRTIMER_RESTART; +} + +static void kunit_irq_test_bh_work_func(struct work_struct *work) +{ + struct kunit_irq_test_state *state = + container_of(work, typeof(*state), bh_work); + + WARN_ON_ONCE(!in_serving_softirq()); + state->softirq_func_calls++; + + if (!state->func(state->test_specific_state)) + state->softirq_func_reported_failure = true; +} + +/* + * Helper function which repeatedly runs the given @func in task, softirq, and + * hardirq context concurrently, and reports a failure to KUnit if any + * invocation of @func in any context returns false. @func is passed + * @test_specific_state as its argument. At most 3 invocations of @func will + * run concurrently: one in each of task, softirq, and hardirq context. + * + * The main purpose of this interrupt context testing is to validate fallback + * code paths that run in contexts where the normal code path cannot be used, + * typically due to the FPU or vector registers already being in-use in kernel + * mode. These code paths aren't covered when the test code is executed only by + * the KUnit test runner thread in task context. The reason for the concurrency + * is because merely using hardirq context is not sufficient to reach a fallback + * code path on some architectures; the hardirq actually has to occur while the + * FPU or vector unit was already in-use in kernel mode. + * + * Another purpose of this testing is to detect issues with the architecture's + * irq_fpu_usable() and kernel_fpu_begin/end() or equivalent functions, + * especially in softirq context when the softirq may have interrupted a task + * already using kernel-mode FPU or vector (if the arch didn't prevent that). + * Crypto functions are often executed in softirqs, so this is important. + */ +static inline void kunit_run_irq_test(struct kunit *test, bool (*func)(void *), + int max_iterations, + void *test_specific_state) +{ + struct kunit_irq_test_state state = { + .func = func, + .test_specific_state = test_specific_state, + }; + unsigned long end_jiffies; + + /* + * Set up a hrtimer (the way we access hardirq context) and a work + * struct for the BH workqueue (the way we access softirq context). + */ + hrtimer_setup_on_stack(&state.timer, kunit_irq_test_timer_func, + CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); + INIT_WORK_ONSTACK(&state.bh_work, kunit_irq_test_bh_work_func); + + /* Run for up to max_iterations or 1 second, whichever comes first. */ + end_jiffies = jiffies + HZ; + hrtimer_start(&state.timer, KUNIT_IRQ_TEST_HRTIMER_INTERVAL, + HRTIMER_MODE_REL_HARD); + for (int i = 0; i < max_iterations && !time_after(jiffies, end_jiffies); + i++) { + if (!func(test_specific_state)) + state.task_func_reported_failure = true; + } + + /* Cancel the timer and work. */ + hrtimer_cancel(&state.timer); + flush_work(&state.bh_work); + + /* Sanity check: the timer and BH functions should have been run. */ + KUNIT_EXPECT_GT_MSG(test, state.hardirq_func_calls, 0, + "Timer function was not called"); + KUNIT_EXPECT_GT_MSG(test, state.softirq_func_calls, 0, + "BH work function was not called"); + + /* Check for incorrect hash values reported from any context. */ + KUNIT_EXPECT_FALSE_MSG( + test, state.task_func_reported_failure, + "Incorrect hash values reported from task context"); + KUNIT_EXPECT_FALSE_MSG( + test, state.hardirq_func_reported_failure, + "Incorrect hash values reported from hardirq context"); + KUNIT_EXPECT_FALSE_MSG( + test, state.softirq_func_reported_failure, + "Incorrect hash values reported from softirq context"); +} + +#endif /* _KUNIT_RUN_IN_IRQ_CONTEXT_H */ -- cgit v1.2.3 From 5816bf4273edb32716a88c796e0b04f0e12962eb Mon Sep 17 00:00:00 2001 From: Blaise Boscaccy Date: Tue, 22 Jul 2025 14:21:34 -0700 Subject: lsm,selinux: Add LSM blob support for BPF objects This patch introduces LSM blob support for BPF maps, programs, and tokens to enable LSM stacking and multiplexing of LSM modules that govern BPF objects. Additionally, the existing BPF hooks used by SELinux have been updated to utilize the new blob infrastructure, removing the assumption of exclusive ownership of the security pointer. Signed-off-by: Blaise Boscaccy [PM: dropped local variable init, style fixes] Signed-off-by: Paul Moore --- include/linux/lsm_hooks.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 090d1d3e19fe..79ec5a2bdcca 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -116,6 +116,9 @@ struct lsm_blob_sizes { int lbs_xattr_count; /* number of xattr slots in new_xattrs array */ int lbs_tun_dev; int lbs_bdev; + int lbs_bpf_map; + int lbs_bpf_prog; + int lbs_bpf_token; }; /* -- cgit v1.2.3 From fb357dbadbebc7a9ca3c5ef26f6c792b0e8e1278 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 15 Jul 2025 14:24:42 +0200 Subject: fbcon: Add necessary include statements and forward declarations Make the header self contained for including. Signed-off-by: Thomas Zimmermann Reviewed-by: Simona Vetter Link: https://lore.kernel.org/r/20250715122643.137027-6-tzimmermann@suse.de --- include/linux/fbcon.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/fbcon.h b/include/linux/fbcon.h index 2382dec6d6ab..81f0e698acbf 100644 --- a/include/linux/fbcon.h +++ b/include/linux/fbcon.h @@ -1,6 +1,13 @@ #ifndef _LINUX_FBCON_H #define _LINUX_FBCON_H +#include + +struct fb_blit_caps; +struct fb_info; +struct fb_var_screeninfo; +struct fb_videomode; + #ifdef CONFIG_FRAMEBUFFER_CONSOLE void __init fb_console_init(void); void __exit fb_console_exit(void); -- cgit v1.2.3 From 039a504cda2cb69354387aa453391ec89a9e0e49 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 11 Aug 2025 22:11:33 -0500 Subject: dt-bindings: clock: dispcc-sc7280: Add display resets Like other platforms the sc7280 display clock controller provides a couple of resets, add the defines to allow referring to them. Signed-off-by: Bjorn Andersson Reviewed-by: Taniya Das Link: https://lore.kernel.org/r/20250811-sc7280-mdss-reset-v1-1-83ceff1d48de@oss.qualcomm.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,dispcc-sc7280.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,dispcc-sc7280.h b/include/dt-bindings/clock/qcom,dispcc-sc7280.h index a4a692c20acf..9f113f346be8 100644 --- a/include/dt-bindings/clock/qcom,dispcc-sc7280.h +++ b/include/dt-bindings/clock/qcom,dispcc-sc7280.h @@ -52,4 +52,8 @@ /* DISP_CC power domains */ #define DISP_CC_MDSS_CORE_GDSC 0 +/* DISPCC resets */ +#define DISP_CC_MDSS_CORE_BCR 0 +#define DISP_CC_MDSS_RSCC_BCR 1 + #endif -- cgit v1.2.3 From bea90085dcb0f9a75748e73d723bde557a5ebf1a Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 23 Jul 2025 11:21:53 -0400 Subject: dlm: use defines for force values in dlm_release_lockspace Clarify the use of the force parameter by renaming it to "release_option" and adding defines (with descriptions) for each of the accepted values. Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- include/linux/dlm.h | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dlm.h b/include/linux/dlm.h index bacda9898f2b..cc7a36244893 100644 --- a/include/linux/dlm.h +++ b/include/linux/dlm.h @@ -87,13 +87,37 @@ int dlm_new_lockspace(const char *name, const char *cluster, const struct dlm_lockspace_ops *ops, void *ops_arg, int *ops_result, dlm_lockspace_t **lockspace); +/* + * dlm_release_lockspace() release_option values: + * + * DLM_RELEASE_NO_LOCKS returns -EBUSY if any locks (lkb's) + * exist in the local lockspace. + * + * DLM_RELEASE_UNUSED previous value that is no longer used. + * + * DLM_RELEASE_NORMAL releases the lockspace regardless of any + * locks managed in the local lockspace. + * + * DLM_RELEASE_NO_EVENT release the lockspace regardless of any + * locks managed in the local lockspace, and does not submit + * a leave event to the cluster manager, so other nodes will + * not be notified that the node should be removed from the + * list of lockspace members. + */ +#define DLM_RELEASE_NO_LOCKS 0 +#define DLM_RELEASE_UNUSED 1 +#define DLM_RELEASE_NORMAL 2 +#define DLM_RELEASE_NO_EVENT 3 + /* * dlm_release_lockspace * * Stop a lockspace. + * + * release_option: see DLM_RELEASE values above. */ -int dlm_release_lockspace(dlm_lockspace_t *lockspace, int force); +int dlm_release_lockspace(dlm_lockspace_t *lockspace, int release_option); /* * dlm_lock -- cgit v1.2.3 From 6f4f4ca5caf73de5e86329547d4527b3e0c08488 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 23 Jul 2025 11:21:56 -0400 Subject: dlm: add new flag DLM_RELEASE_RECOVER for dlm_lockspace_release When dlm_lockspace_release() is passed DLM_RELEASE_RECOVER, it tells the dlm to handle the release/leave as if the node had failed, i.e. perform recovery steps for a failed node, like recover_slot(). When DLM_RELEASE_RECOVER is set: - dlm_release_lockspace() includes RELEASE_RECOVER=1 in the OFFLINE uevent sent to userspace. - userspace/dlm_controld sends a message to all lockspace members indicating that the subsequent node removal should be handled as if the node had failed. - when dlm_controld on all nodes receives the new message, it sets the release_recover configfs entry to 1 for the node. - when the dlm/kernel next performs recovery and removes the node, it will see that release_recover has been set, and will perform recovery steps for the node as if it had failed, e.g. the recover_slot() callback is called to notify the fs. Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- include/linux/dlm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/dlm.h b/include/linux/dlm.h index cc7a36244893..108eb953eb18 100644 --- a/include/linux/dlm.h +++ b/include/linux/dlm.h @@ -103,11 +103,16 @@ int dlm_new_lockspace(const char *name, const char *cluster, * a leave event to the cluster manager, so other nodes will * not be notified that the node should be removed from the * list of lockspace members. + * + * DLM_RELEASE_RECOVER like DLM_RELEASE_NORMAL, but the remaining + * nodes will handle the removal of the node as if the node + * had failed, e.g. the recover_slot() callback would be used. */ #define DLM_RELEASE_NO_LOCKS 0 #define DLM_RELEASE_UNUSED 1 #define DLM_RELEASE_NORMAL 2 #define DLM_RELEASE_NO_EVENT 3 +#define DLM_RELEASE_RECOVER 4 /* * dlm_release_lockspace -- cgit v1.2.3 From c93c59baa5ab57e94b874000cec56e26611b7a23 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Mon, 11 Aug 2025 20:58:20 +0200 Subject: bpf: Tidy verifier bug message Yonghong noticed that error messages for potential verifier bugs often have a '(1)' at the end. This is happening because verifier_bug_if(cond, env, fmt, args...) prints "(" #cond ")\n" as part of the message and verifier_bug() is defined as: #define verifier_bug(env, fmt, args...) verifier_bug_if(1, env, fmt, ##args) Hence, verifier_bug() always ends up displaying '(1)'. This small patch fixes it by having verifier_bug_if conditionally call verifier_bug instead of the other way around. Fixes: 1cb0f56d9618 ("bpf: WARN_ONCE on verifier bugs") Reported-by: Yonghong Song Signed-off-by: Paul Chaignon Signed-off-by: Andrii Nakryiko Tested-by: Eduard Zingerman Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/aJo9THBrzo8jFXsh@mail.gmail.com --- include/linux/bpf_verifier.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index c823f8efe3ed..020de62bd09c 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -875,13 +875,15 @@ __printf(3, 4) void verbose_linfo(struct bpf_verifier_env *env, #define verifier_bug_if(cond, env, fmt, args...) \ ({ \ bool __cond = (cond); \ - if (unlikely(__cond)) { \ - BPF_WARN_ONCE(1, "verifier bug: " fmt "(" #cond ")\n", ##args); \ - bpf_log(&env->log, "verifier bug: " fmt "(" #cond ")\n", ##args); \ - } \ + if (unlikely(__cond)) \ + verifier_bug(env, fmt " (" #cond ")", ##args); \ (__cond); \ }) -#define verifier_bug(env, fmt, args...) verifier_bug_if(1, env, fmt, ##args) +#define verifier_bug(env, fmt, args...) \ + ({ \ + BPF_WARN_ONCE(1, "verifier bug: " fmt "\n", ##args); \ + bpf_log(&env->log, "verifier bug: " fmt "\n", ##args); \ + }) static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env) { -- cgit v1.2.3 From 07bbbfe7addf5b032e04f3c38f0b183d067a3f0d Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 11 Aug 2025 19:50:43 +0100 Subject: net: stmmac: add suspend()/resume() platform ops Add suspend/resume platform operations, which, when populated, override the init/exit platform operations when we suspend and resume. These suspend()/resume() methods are called by core code, and thus are designed to support any struct device, not just platform devices. This allows them to be used by the PCI drivers we have. Signed-off-by: Russell King (Oracle) Reviewed-by: Maxime Chevallier Link: https://patch.msgid.link/E1ulXbX-008gqZ-Bb@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/linux/stmmac.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 26ddf95d23f9..22c24dacbc65 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -248,6 +248,8 @@ struct plat_stmmacenet_data { void (*ptp_clk_freq_config)(struct stmmac_priv *priv); int (*init)(struct platform_device *pdev, void *priv); void (*exit)(struct platform_device *pdev, void *priv); + int (*suspend)(struct device *dev, void *priv); + int (*resume)(struct device *dev, void *priv); struct mac_device_info *(*setup)(void *priv); int (*clks_config)(void *priv, bool enabled); int (*crosststamp)(ktime_t *device, struct system_counterval_t *system, -- cgit v1.2.3 From b3ef7bdec66fb1813e865fd39d179a93cefd2015 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 11 Aug 2025 17:31:42 +0200 Subject: net: airoha: Add airoha_offload.h header Move NPU definitions to airoha_offload.h in include/linux/soc/airoha/ in order to allow the MT76 driver to access the callback definitions. Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20250811-airoha-en7581-wlan-offlaod-v7-7-58823603bb4e@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/soc/airoha/airoha_offload.h | 260 ++++++++++++++++++++++++++++++ 1 file changed, 260 insertions(+) create mode 100644 include/linux/soc/airoha/airoha_offload.h (limited to 'include') diff --git a/include/linux/soc/airoha/airoha_offload.h b/include/linux/soc/airoha/airoha_offload.h new file mode 100644 index 000000000000..117c63c2448d --- /dev/null +++ b/include/linux/soc/airoha/airoha_offload.h @@ -0,0 +1,260 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2025 AIROHA Inc + * Author: Lorenzo Bianconi + */ +#ifndef AIROHA_OFFLOAD_H +#define AIROHA_OFFLOAD_H + +#include +#include + +#define NPU_NUM_CORES 8 +#define NPU_NUM_IRQ 6 +#define NPU_RX0_DESC_NUM 512 +#define NPU_RX1_DESC_NUM 512 + +/* CTRL */ +#define NPU_RX_DMA_DESC_LAST_MASK BIT(29) +#define NPU_RX_DMA_DESC_LEN_MASK GENMASK(28, 15) +#define NPU_RX_DMA_DESC_CUR_LEN_MASK GENMASK(14, 1) +#define NPU_RX_DMA_DESC_DONE_MASK BIT(0) +/* INFO */ +#define NPU_RX_DMA_PKT_COUNT_MASK GENMASK(31, 28) +#define NPU_RX_DMA_PKT_ID_MASK GENMASK(28, 26) +#define NPU_RX_DMA_SRC_PORT_MASK GENMASK(25, 21) +#define NPU_RX_DMA_CRSN_MASK GENMASK(20, 16) +#define NPU_RX_DMA_FOE_ID_MASK GENMASK(15, 0) +/* DATA */ +#define NPU_RX_DMA_SID_MASK GENMASK(31, 16) +#define NPU_RX_DMA_FRAG_TYPE_MASK GENMASK(15, 14) +#define NPU_RX_DMA_PRIORITY_MASK GENMASK(13, 10) +#define NPU_RX_DMA_RADIO_ID_MASK GENMASK(9, 6) +#define NPU_RX_DMA_VAP_ID_MASK GENMASK(5, 2) +#define NPU_RX_DMA_FRAME_TYPE_MASK GENMASK(1, 0) + +struct airoha_npu_rx_dma_desc { + u32 ctrl; + u32 info; + u32 data; + u32 addr; + u64 rsv; +} __packed; + +/* CTRL */ +#define NPU_TX_DMA_DESC_SCHED_MASK BIT(31) +#define NPU_TX_DMA_DESC_LEN_MASK GENMASK(30, 18) +#define NPU_TX_DMA_DESC_VEND_LEN_MASK GENMASK(17, 1) +#define NPU_TX_DMA_DESC_DONE_MASK BIT(0) + +#define NPU_TXWI_LEN 192 + +struct airoha_npu_tx_dma_desc { + u32 ctrl; + u32 addr; + u64 rsv; + u8 txwi[NPU_TXWI_LEN]; +} __packed; + +enum airoha_npu_wlan_set_cmd { + WLAN_FUNC_SET_WAIT_PCIE_ADDR, + WLAN_FUNC_SET_WAIT_DESC, + WLAN_FUNC_SET_WAIT_NPU_INIT_DONE, + WLAN_FUNC_SET_WAIT_TRAN_TO_CPU, + WLAN_FUNC_SET_WAIT_BA_WIN_SIZE, + WLAN_FUNC_SET_WAIT_DRIVER_MODEL, + WLAN_FUNC_SET_WAIT_DEL_STA, + WLAN_FUNC_SET_WAIT_DRAM_BA_NODE_ADDR, + WLAN_FUNC_SET_WAIT_PKT_BUF_ADDR, + WLAN_FUNC_SET_WAIT_IS_TEST_NOBA, + WLAN_FUNC_SET_WAIT_FLUSHONE_TIMEOUT, + WLAN_FUNC_SET_WAIT_FLUSHALL_TIMEOUT, + WLAN_FUNC_SET_WAIT_IS_FORCE_TO_CPU, + WLAN_FUNC_SET_WAIT_PCIE_STATE, + WLAN_FUNC_SET_WAIT_PCIE_PORT_TYPE, + WLAN_FUNC_SET_WAIT_ERROR_RETRY_TIMES, + WLAN_FUNC_SET_WAIT_BAR_INFO, + WLAN_FUNC_SET_WAIT_FAST_FLAG, + WLAN_FUNC_SET_WAIT_NPU_BAND0_ONCPU, + WLAN_FUNC_SET_WAIT_TX_RING_PCIE_ADDR, + WLAN_FUNC_SET_WAIT_TX_DESC_HW_BASE, + WLAN_FUNC_SET_WAIT_TX_BUF_SPACE_HW_BASE, + WLAN_FUNC_SET_WAIT_RX_RING_FOR_TXDONE_HW_BASE, + WLAN_FUNC_SET_WAIT_TX_PKT_BUF_ADDR, + WLAN_FUNC_SET_WAIT_INODE_TXRX_REG_ADDR, + WLAN_FUNC_SET_WAIT_INODE_DEBUG_FLAG, + WLAN_FUNC_SET_WAIT_INODE_HW_CFG_INFO, + WLAN_FUNC_SET_WAIT_INODE_STOP_ACTION, + WLAN_FUNC_SET_WAIT_INODE_PCIE_SWAP, + WLAN_FUNC_SET_WAIT_RATELIMIT_CTRL, + WLAN_FUNC_SET_WAIT_HWNAT_INIT, + WLAN_FUNC_SET_WAIT_ARHT_CHIP_INFO, + WLAN_FUNC_SET_WAIT_TX_BUF_CHECK_ADDR, + WLAN_FUNC_SET_WAIT_TOKEN_ID_SIZE, +}; + +enum airoha_npu_wlan_get_cmd { + WLAN_FUNC_GET_WAIT_NPU_INFO, + WLAN_FUNC_GET_WAIT_LAST_RATE, + WLAN_FUNC_GET_WAIT_COUNTER, + WLAN_FUNC_GET_WAIT_DBG_COUNTER, + WLAN_FUNC_GET_WAIT_RXDESC_BASE, + WLAN_FUNC_GET_WAIT_WCID_DBG_COUNTER, + WLAN_FUNC_GET_WAIT_DMA_ADDR, + WLAN_FUNC_GET_WAIT_RING_SIZE, + WLAN_FUNC_GET_WAIT_NPU_SUPPORT_MAP, + WLAN_FUNC_GET_WAIT_MDC_LOCK_ADDRESS, + WLAN_FUNC_GET_WAIT_NPU_VERSION, +}; + +struct airoha_npu { +#if (IS_BUILTIN(CONFIG_NET_AIROHA_NPU) || IS_MODULE(CONFIG_NET_AIROHA_NPU)) + struct device *dev; + struct regmap *regmap; + + struct airoha_npu_core { + struct airoha_npu *npu; + /* protect concurrent npu memory accesses */ + spinlock_t lock; + struct work_struct wdt_work; + } cores[NPU_NUM_CORES]; + + int irqs[NPU_NUM_IRQ]; + + struct airoha_foe_stats __iomem *stats; + + struct { + int (*ppe_init)(struct airoha_npu *npu); + int (*ppe_deinit)(struct airoha_npu *npu); + int (*ppe_flush_sram_entries)(struct airoha_npu *npu, + dma_addr_t foe_addr, + int sram_num_entries); + int (*ppe_foe_commit_entry)(struct airoha_npu *npu, + dma_addr_t foe_addr, + u32 entry_size, u32 hash, + bool ppe2); + int (*wlan_init_reserved_memory)(struct airoha_npu *npu); + int (*wlan_send_msg)(struct airoha_npu *npu, int ifindex, + enum airoha_npu_wlan_set_cmd func_id, + void *data, int data_len, gfp_t gfp); + int (*wlan_get_msg)(struct airoha_npu *npu, int ifindex, + enum airoha_npu_wlan_get_cmd func_id, + void *data, int data_len, gfp_t gfp); + u32 (*wlan_get_queue_addr)(struct airoha_npu *npu, int qid, + bool xmit); + void (*wlan_set_irq_status)(struct airoha_npu *npu, u32 val); + u32 (*wlan_get_irq_status)(struct airoha_npu *npu, int q); + void (*wlan_enable_irq)(struct airoha_npu *npu, int q); + void (*wlan_disable_irq)(struct airoha_npu *npu, int q); + } ops; +#endif +}; + +#if (IS_BUILTIN(CONFIG_NET_AIROHA_NPU) || IS_MODULE(CONFIG_NET_AIROHA_NPU)) +struct airoha_npu *airoha_npu_get(struct device *dev, dma_addr_t *stats_addr); +void airoha_npu_put(struct airoha_npu *npu); + +static inline int airoha_npu_wlan_init_reserved_memory(struct airoha_npu *npu) +{ + return npu->ops.wlan_init_reserved_memory(npu); +} + +static inline int airoha_npu_wlan_send_msg(struct airoha_npu *npu, + int ifindex, + enum airoha_npu_wlan_set_cmd cmd, + void *data, int data_len, gfp_t gfp) +{ + return npu->ops.wlan_send_msg(npu, ifindex, cmd, data, data_len, gfp); +} + +static inline int airoha_npu_wlan_get_msg(struct airoha_npu *npu, int ifindex, + enum airoha_npu_wlan_get_cmd cmd, + void *data, int data_len, gfp_t gfp) +{ + return npu->ops.wlan_get_msg(npu, ifindex, cmd, data, data_len, gfp); +} + +static inline u32 airoha_npu_wlan_get_queue_addr(struct airoha_npu *npu, + int qid, bool xmit) +{ + return npu->ops.wlan_get_queue_addr(npu, qid, xmit); +} + +static inline void airoha_npu_wlan_set_irq_status(struct airoha_npu *npu, + u32 val) +{ + npu->ops.wlan_set_irq_status(npu, val); +} + +static inline u32 airoha_npu_wlan_get_irq_status(struct airoha_npu *npu, int q) +{ + return npu->ops.wlan_get_irq_status(npu, q); +} + +static inline void airoha_npu_wlan_enable_irq(struct airoha_npu *npu, int q) +{ + npu->ops.wlan_enable_irq(npu, q); +} + +static inline void airoha_npu_wlan_disable_irq(struct airoha_npu *npu, int q) +{ + npu->ops.wlan_disable_irq(npu, q); +} +#else +static inline struct airoha_npu *airoha_npu_get(struct device *dev, + dma_addr_t *foe_stats_addr) +{ + return NULL; +} + +static inline void airoha_npu_put(struct airoha_npu *npu) +{ +} + +static inline int airoha_npu_wlan_init_reserved_memory(struct airoha_npu *npu) +{ + return -EOPNOTSUPP; +} + +static inline int airoha_npu_wlan_send_msg(struct airoha_npu *npu, + int ifindex, + enum airoha_npu_wlan_set_cmd cmd, + void *data, int data_len, gfp_t gfp) +{ + return -EOPNOTSUPP; +} + +static inline int airoha_npu_wlan_get_msg(struct airoha_npu *npu, int ifindex, + enum airoha_npu_wlan_get_cmd cmd, + void *data, int data_len, gfp_t gfp) +{ + return -EOPNOTSUPP; +} + +static inline u32 airoha_npu_wlan_get_queue_addr(struct airoha_npu *npu, + int qid, bool xmit) +{ + return 0; +} + +static inline void airoha_npu_wlan_set_irq_status(struct airoha_npu *npu, + u32 val) +{ +} + +static inline u32 airoha_npu_wlan_get_irq_status(struct airoha_npu *npu, + int q) +{ + return 0; +} + +static inline void airoha_npu_wlan_enable_irq(struct airoha_npu *npu, int q) +{ +} + +static inline void airoha_npu_wlan_disable_irq(struct airoha_npu *npu, int q) +{ +} +#endif + +#endif /* AIROHA_OFFLOAD_H */ -- cgit v1.2.3 From 618882c92681de18e9bd99d2a88bb21c897283f3 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sun, 10 Aug 2025 04:29:50 +0300 Subject: media: Wrap file->private_data access with a helper function Accessing file->private_data manually to retrieve the v4l2_fh pointer is error-prone, as the field is a void * and will happily convert implicitly to any pointer type. To avoid direct access to file->private_data, introduce a new inline function that retrieves the v4l2_fh pointer, and use it to replace common access patterns through the kernel. Changes to drivers have been generated with the following coccinelle semantic patch: @@ struct file *filp; identifier fh; @@ - struct v4l2_fh *fh = filp->private_data; + struct v4l2_fh *fh = file_to_v4l2_fh(filp); Manual changes have been applied to Documentation/ to update the usage patterns, and to include/media/v4l2-fh.h to add the new function. While at it, fix a typo in the title of v4l2-fh.rst: the file describes the "file handles" API, not "file handlers". No functional change is intended, this only paves the way to remove direct accesses to file->private_data and make V4L2 drivers safer. Other accesses to the field will be addressed separately. Signed-off-by: Laurent Pinchart Signed-off-by: Hans Verkuil --- include/media/v4l2-fh.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/media/v4l2-fh.h b/include/media/v4l2-fh.h index b5b3e00c8e6a..823fa8ebeb8f 100644 --- a/include/media/v4l2-fh.h +++ b/include/media/v4l2-fh.h @@ -56,6 +56,20 @@ struct v4l2_fh { struct v4l2_m2m_ctx *m2m_ctx; }; +/** + * file_to_v4l2_fh - Return the v4l2_fh associated with a struct file + * + * @filp: pointer to &struct file + * + * This function should be used by drivers to retrieve the &struct v4l2_fh + * instance pointer stored in the file private_data instead of accessing the + * private_data field directly. + */ +static inline struct v4l2_fh *file_to_v4l2_fh(struct file *filp) +{ + return filp->private_data; +} + /** * v4l2_fh_init - Initialise the file handle. * -- cgit v1.2.3 From bbe4debfaa6a16f11064d5c40ef6d468dad4398d Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sun, 10 Aug 2025 04:29:58 +0300 Subject: media: v4l2-fh: Move piece of documentation to correct function The paragraph in the v4l2_fh_del() documentation that indicates the function sets filp->private_data was added in the wrong place. It is meant for v4l2_fh_open(). Move it to where it belongs. Signed-off-by: Laurent Pinchart Signed-off-by: Hans Verkuil --- include/media/v4l2-fh.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/media/v4l2-fh.h b/include/media/v4l2-fh.h index 823fa8ebeb8f..14e7136e693f 100644 --- a/include/media/v4l2-fh.h +++ b/include/media/v4l2-fh.h @@ -101,6 +101,9 @@ void v4l2_fh_add(struct v4l2_fh *fh); * * It allocates a v4l2_fh and inits and adds it to the &struct video_device * associated with the file pointer. + * + * On error filp->private_data will be %NULL, otherwise it will point to + * the &struct v4l2_fh. */ int v4l2_fh_open(struct file *filp); @@ -109,9 +112,6 @@ int v4l2_fh_open(struct file *filp); * * @fh: pointer to &struct v4l2_fh * - * On error filp->private_data will be %NULL, otherwise it will point to - * the &struct v4l2_fh. - * * .. note:: * Must be called in v4l2_file_operations->release\(\) handler if the driver * uses &struct v4l2_fh. -- cgit v1.2.3 From 47f4b1acb4d505b1e7e81d8e0ebce774422b8c2e Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sun, 10 Aug 2025 04:30:08 +0300 Subject: media: Set file->private_data in v4l2_fh_add() All the drivers that use v4l2_fh and call v4l2_fh_add() manually store a pointer to the v4l2_fh instance in file->private_data in their video device .open() file operation handler. Move the code to the v4l2_fh_add() function to avoid direct access to file->private_data in drivers. This requires adding a file pointer argument to the function. Changes to drivers have been generated with the following coccinelle semantic patch: @@ expression fh; identifier filp; identifier open; type ret; @@ ret open(..., struct file *filp, ...) { <... - filp->private_data = fh; ... - v4l2_fh_add(fh); + v4l2_fh_add(fh, filp); ...> } @@ expression fh; identifier filp; identifier open; type ret; @@ ret open(..., struct file *filp, ...) { <... - v4l2_fh_add(fh); + v4l2_fh_add(fh, filp); ... - filp->private_data = fh; ...> } Manual changes have been applied to Documentation/ to update the usage patterns, to drivers/media/v4l2-core/v4l2-fh.c to update the v4l2_fh_add() prototype set file->private_data, and to include/media/v4l2-fh.h to update the v4l2_fh_add() function prototype and its documentation. Additionally, white space issues have been fixed manually in drivers/media/platform/nvidia/tegra-vde/v4l2.c, drivers/media/platform/rockchip/rkvdec/rkvdec.c, drivers/media/v4l2-core/v4l2-fh.c and drivers/staging/most/video/video.c. Signed-off-by: Laurent Pinchart Signed-off-by: Hans Verkuil --- include/media/v4l2-fh.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/media/v4l2-fh.h b/include/media/v4l2-fh.h index 14e7136e693f..d8fcf49f10e0 100644 --- a/include/media/v4l2-fh.h +++ b/include/media/v4l2-fh.h @@ -87,11 +87,14 @@ void v4l2_fh_init(struct v4l2_fh *fh, struct video_device *vdev); * v4l2_fh_add - Add the fh to the list of file handles on a video_device. * * @fh: pointer to &struct v4l2_fh + * @filp: pointer to &struct file associated with @fh + * + * The function sets filp->private_data to point to @fh. * * .. note:: * The @fh file handle must be initialised first. */ -void v4l2_fh_add(struct v4l2_fh *fh); +void v4l2_fh_add(struct v4l2_fh *fh, struct file *filp); /** * v4l2_fh_open - Ancillary routine that can be used as the open\(\) op -- cgit v1.2.3 From 277966749f46bc6292c4052b4e66a554f193a78a Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sun, 10 Aug 2025 04:30:09 +0300 Subject: media: Reset file->private_data to NULL in v4l2_fh_del() Multiple drivers that use v4l2_fh and call v4l2_fh_del() manually reset the file->private_data pointer to NULL in their video device .release() file operation handler. Move the code to the v4l2_fh_del() function to avoid direct access to file->private_data in drivers. This requires adding a file pointer argument to the function. Changes to drivers have been generated with the following coccinelle semantic patch: @@ expression fh; identifier filp; identifier release; type ret; @@ ret release(..., struct file *filp, ...) { <... - filp->private_data = NULL; ... - v4l2_fh_del(fh); + v4l2_fh_del(fh, filp); ...> } @@ expression fh; identifier filp; identifier release; type ret; @@ ret release(..., struct file *filp, ...) { <... - v4l2_fh_del(fh); + v4l2_fh_del(fh, filp); ... - filp->private_data = NULL; ...> } @@ expression fh; identifier filp; identifier release; type ret; @@ ret release(..., struct file *filp, ...) { <... - v4l2_fh_del(fh); + v4l2_fh_del(fh, filp); ...> } Manual changes have been applied to Documentation/ to update the usage patterns, to drivers/media/v4l2-core/v4l2-fh.c to update the v4l2_fh_del() prototype and reset file->private_data, and to include/media/v4l2-fh.h to update the v4l2_fh_del() function prototype and its documentation. Additionally, white space issues have been fixed manually in drivers/usb/gadget/function/uvc_v4l2.c Signed-off-by: Laurent Pinchart Signed-off-by: Hans Verkuil --- include/media/v4l2-fh.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/media/v4l2-fh.h b/include/media/v4l2-fh.h index d8fcf49f10e0..5e4c76163512 100644 --- a/include/media/v4l2-fh.h +++ b/include/media/v4l2-fh.h @@ -114,12 +114,15 @@ int v4l2_fh_open(struct file *filp); * v4l2_fh_del - Remove file handle from the list of file handles. * * @fh: pointer to &struct v4l2_fh + * @filp: pointer to &struct file associated with @fh + * + * The function resets filp->private_data to NULL. * * .. note:: * Must be called in v4l2_file_operations->release\(\) handler if the driver * uses &struct v4l2_fh. */ -void v4l2_fh_del(struct v4l2_fh *fh); +void v4l2_fh_del(struct v4l2_fh *fh, struct file *filp); /** * v4l2_fh_exit - Release resources related to a file handle. -- cgit v1.2.3 From bb4d6be205dae94aa2d3c3a1ad814dad90d4fd62 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sun, 10 Aug 2025 04:30:14 +0300 Subject: media: Drop V4L2_FL_USES_V4L2_FH checks Now that all drivers use v4l2_fh, we can drop the V4L2_FL_USES_V4L2_FH checks through the V4L2 core. To ensure that all new drivers use v4l2_fh, keep setting the V4L2_FL_USES_V4L2_FH flag in v4l2_fh_init(), and verify it is set after the .open() file operation returns. Signed-off-by: Laurent Pinchart Signed-off-by: Hans Verkuil --- include/media/v4l2-dev.h | 2 +- include/media/v4l2-fh.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/media/v4l2-dev.h b/include/media/v4l2-dev.h index a69801274800..a213c3398dcf 100644 --- a/include/media/v4l2-dev.h +++ b/include/media/v4l2-dev.h @@ -74,7 +74,7 @@ struct dentry; * @V4L2_FL_USES_V4L2_FH: * indicates that file->private_data points to &struct v4l2_fh. * This flag is set by the core when v4l2_fh_init() is called. - * All new drivers should use it. + * All drivers must use it. * @V4L2_FL_QUIRK_INVERTED_CROP: * some old M2M drivers use g/s_crop/cropcap incorrectly: crop and * compose are swapped. If this flag is set, then the selection diff --git a/include/media/v4l2-fh.h b/include/media/v4l2-fh.h index 5e4c76163512..aad4b3689d7e 100644 --- a/include/media/v4l2-fh.h +++ b/include/media/v4l2-fh.h @@ -3,7 +3,7 @@ * v4l2-fh.h * * V4L2 file handle. Store per file handle data for the V4L2 - * framework. Using file handles is optional for the drivers. + * framework. Using file handles is mandatory for the drivers. * * Copyright (C) 2009--2010 Nokia Corporation. * -- cgit v1.2.3 From 0f1a7facb64abe5104c3eb235824ea8d6296474d Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sun, 10 Aug 2025 04:30:51 +0300 Subject: media: v4l2-ioctl: Stop passing fh pointer to ioctl handlers Now that all drivers access the v4l2_fh from the file structure, there is no need to pass it as an explicit argument to ioctl handlers. Set the argument to NULL in the __video_do_ioctl() function, and rename the 'fh' argument in the ioctl handler declarations to 'priv' indicate it does not contain a file handle. The argument could be removed altogether with a mechanical change (probably using coccinelle), but there are plans to pass a new argument to the ioctl handlers in the near future. The tree-wide change to remove the argument, only to add another one soon after, would be too much churn. While at it, fix argument alignment in vidioc_try_fmt_vid_out_overlay(). Signed-off-by: Laurent Pinchart Signed-off-by: Hans Verkuil --- include/media/v4l2-ioctl.h | 238 ++++++++++++++++++++++----------------------- 1 file changed, 119 insertions(+), 119 deletions(-) (limited to 'include') diff --git a/include/media/v4l2-ioctl.h b/include/media/v4l2-ioctl.h index 82695c3a300a..6f7a58350441 100644 --- a/include/media/v4l2-ioctl.h +++ b/include/media/v4l2-ioctl.h @@ -293,144 +293,144 @@ struct v4l2_ioctl_ops { /* ioctl callbacks */ /* VIDIOC_QUERYCAP handler */ - int (*vidioc_querycap)(struct file *file, void *fh, + int (*vidioc_querycap)(struct file *file, void *priv, struct v4l2_capability *cap); /* VIDIOC_ENUM_FMT handlers */ - int (*vidioc_enum_fmt_vid_cap)(struct file *file, void *fh, + int (*vidioc_enum_fmt_vid_cap)(struct file *file, void *priv, struct v4l2_fmtdesc *f); - int (*vidioc_enum_fmt_vid_overlay)(struct file *file, void *fh, + int (*vidioc_enum_fmt_vid_overlay)(struct file *file, void *priv, struct v4l2_fmtdesc *f); - int (*vidioc_enum_fmt_vid_out)(struct file *file, void *fh, + int (*vidioc_enum_fmt_vid_out)(struct file *file, void *priv, struct v4l2_fmtdesc *f); - int (*vidioc_enum_fmt_sdr_cap)(struct file *file, void *fh, + int (*vidioc_enum_fmt_sdr_cap)(struct file *file, void *priv, struct v4l2_fmtdesc *f); - int (*vidioc_enum_fmt_sdr_out)(struct file *file, void *fh, + int (*vidioc_enum_fmt_sdr_out)(struct file *file, void *priv, struct v4l2_fmtdesc *f); - int (*vidioc_enum_fmt_meta_cap)(struct file *file, void *fh, + int (*vidioc_enum_fmt_meta_cap)(struct file *file, void *priv, struct v4l2_fmtdesc *f); - int (*vidioc_enum_fmt_meta_out)(struct file *file, void *fh, + int (*vidioc_enum_fmt_meta_out)(struct file *file, void *priv, struct v4l2_fmtdesc *f); /* VIDIOC_G_FMT handlers */ - int (*vidioc_g_fmt_vid_cap)(struct file *file, void *fh, + int (*vidioc_g_fmt_vid_cap)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_g_fmt_vid_overlay)(struct file *file, void *fh, + int (*vidioc_g_fmt_vid_overlay)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_g_fmt_vid_out)(struct file *file, void *fh, + int (*vidioc_g_fmt_vid_out)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_g_fmt_vid_out_overlay)(struct file *file, void *fh, + int (*vidioc_g_fmt_vid_out_overlay)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_g_fmt_vbi_cap)(struct file *file, void *fh, + int (*vidioc_g_fmt_vbi_cap)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_g_fmt_vbi_out)(struct file *file, void *fh, + int (*vidioc_g_fmt_vbi_out)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_g_fmt_sliced_vbi_cap)(struct file *file, void *fh, + int (*vidioc_g_fmt_sliced_vbi_cap)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_g_fmt_sliced_vbi_out)(struct file *file, void *fh, + int (*vidioc_g_fmt_sliced_vbi_out)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_g_fmt_vid_cap_mplane)(struct file *file, void *fh, + int (*vidioc_g_fmt_vid_cap_mplane)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_g_fmt_vid_out_mplane)(struct file *file, void *fh, + int (*vidioc_g_fmt_vid_out_mplane)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_g_fmt_sdr_cap)(struct file *file, void *fh, + int (*vidioc_g_fmt_sdr_cap)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_g_fmt_sdr_out)(struct file *file, void *fh, + int (*vidioc_g_fmt_sdr_out)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_g_fmt_meta_cap)(struct file *file, void *fh, + int (*vidioc_g_fmt_meta_cap)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_g_fmt_meta_out)(struct file *file, void *fh, + int (*vidioc_g_fmt_meta_out)(struct file *file, void *priv, struct v4l2_format *f); /* VIDIOC_S_FMT handlers */ - int (*vidioc_s_fmt_vid_cap)(struct file *file, void *fh, + int (*vidioc_s_fmt_vid_cap)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_s_fmt_vid_overlay)(struct file *file, void *fh, + int (*vidioc_s_fmt_vid_overlay)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_s_fmt_vid_out)(struct file *file, void *fh, + int (*vidioc_s_fmt_vid_out)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_s_fmt_vid_out_overlay)(struct file *file, void *fh, + int (*vidioc_s_fmt_vid_out_overlay)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_s_fmt_vbi_cap)(struct file *file, void *fh, + int (*vidioc_s_fmt_vbi_cap)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_s_fmt_vbi_out)(struct file *file, void *fh, + int (*vidioc_s_fmt_vbi_out)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_s_fmt_sliced_vbi_cap)(struct file *file, void *fh, + int (*vidioc_s_fmt_sliced_vbi_cap)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_s_fmt_sliced_vbi_out)(struct file *file, void *fh, + int (*vidioc_s_fmt_sliced_vbi_out)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_s_fmt_vid_cap_mplane)(struct file *file, void *fh, + int (*vidioc_s_fmt_vid_cap_mplane)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_s_fmt_vid_out_mplane)(struct file *file, void *fh, + int (*vidioc_s_fmt_vid_out_mplane)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_s_fmt_sdr_cap)(struct file *file, void *fh, + int (*vidioc_s_fmt_sdr_cap)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_s_fmt_sdr_out)(struct file *file, void *fh, + int (*vidioc_s_fmt_sdr_out)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_s_fmt_meta_cap)(struct file *file, void *fh, + int (*vidioc_s_fmt_meta_cap)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_s_fmt_meta_out)(struct file *file, void *fh, + int (*vidioc_s_fmt_meta_out)(struct file *file, void *priv, struct v4l2_format *f); /* VIDIOC_TRY_FMT handlers */ - int (*vidioc_try_fmt_vid_cap)(struct file *file, void *fh, + int (*vidioc_try_fmt_vid_cap)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_try_fmt_vid_overlay)(struct file *file, void *fh, + int (*vidioc_try_fmt_vid_overlay)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_try_fmt_vid_out)(struct file *file, void *fh, + int (*vidioc_try_fmt_vid_out)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_try_fmt_vid_out_overlay)(struct file *file, void *fh, - struct v4l2_format *f); - int (*vidioc_try_fmt_vbi_cap)(struct file *file, void *fh, + int (*vidioc_try_fmt_vid_out_overlay)(struct file *file, void *priv, + struct v4l2_format *f); + int (*vidioc_try_fmt_vbi_cap)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_try_fmt_vbi_out)(struct file *file, void *fh, + int (*vidioc_try_fmt_vbi_out)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_try_fmt_sliced_vbi_cap)(struct file *file, void *fh, + int (*vidioc_try_fmt_sliced_vbi_cap)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_try_fmt_sliced_vbi_out)(struct file *file, void *fh, + int (*vidioc_try_fmt_sliced_vbi_out)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_try_fmt_vid_cap_mplane)(struct file *file, void *fh, + int (*vidioc_try_fmt_vid_cap_mplane)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_try_fmt_vid_out_mplane)(struct file *file, void *fh, + int (*vidioc_try_fmt_vid_out_mplane)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_try_fmt_sdr_cap)(struct file *file, void *fh, + int (*vidioc_try_fmt_sdr_cap)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_try_fmt_sdr_out)(struct file *file, void *fh, + int (*vidioc_try_fmt_sdr_out)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_try_fmt_meta_cap)(struct file *file, void *fh, + int (*vidioc_try_fmt_meta_cap)(struct file *file, void *priv, struct v4l2_format *f); - int (*vidioc_try_fmt_meta_out)(struct file *file, void *fh, + int (*vidioc_try_fmt_meta_out)(struct file *file, void *priv, struct v4l2_format *f); /* Buffer handlers */ - int (*vidioc_reqbufs)(struct file *file, void *fh, + int (*vidioc_reqbufs)(struct file *file, void *priv, struct v4l2_requestbuffers *b); - int (*vidioc_querybuf)(struct file *file, void *fh, + int (*vidioc_querybuf)(struct file *file, void *priv, struct v4l2_buffer *b); - int (*vidioc_qbuf)(struct file *file, void *fh, + int (*vidioc_qbuf)(struct file *file, void *priv, struct v4l2_buffer *b); - int (*vidioc_expbuf)(struct file *file, void *fh, + int (*vidioc_expbuf)(struct file *file, void *priv, struct v4l2_exportbuffer *e); - int (*vidioc_dqbuf)(struct file *file, void *fh, + int (*vidioc_dqbuf)(struct file *file, void *priv, struct v4l2_buffer *b); - int (*vidioc_create_bufs)(struct file *file, void *fh, + int (*vidioc_create_bufs)(struct file *file, void *priv, struct v4l2_create_buffers *b); - int (*vidioc_prepare_buf)(struct file *file, void *fh, + int (*vidioc_prepare_buf)(struct file *file, void *priv, struct v4l2_buffer *b); - int (*vidioc_remove_bufs)(struct file *file, void *fh, + int (*vidioc_remove_bufs)(struct file *file, void *priv, struct v4l2_remove_buffers *d); - int (*vidioc_overlay)(struct file *file, void *fh, unsigned int i); - int (*vidioc_g_fbuf)(struct file *file, void *fh, + int (*vidioc_overlay)(struct file *file, void *priv, unsigned int i); + int (*vidioc_g_fbuf)(struct file *file, void *priv, struct v4l2_framebuffer *a); - int (*vidioc_s_fbuf)(struct file *file, void *fh, + int (*vidioc_s_fbuf)(struct file *file, void *priv, const struct v4l2_framebuffer *a); /* Stream on/off */ - int (*vidioc_streamon)(struct file *file, void *fh, + int (*vidioc_streamon)(struct file *file, void *priv, enum v4l2_buf_type i); - int (*vidioc_streamoff)(struct file *file, void *fh, + int (*vidioc_streamoff)(struct file *file, void *priv, enum v4l2_buf_type i); /* @@ -438,135 +438,135 @@ struct v4l2_ioctl_ops { * * Note: ENUMSTD is handled by videodev.c */ - int (*vidioc_g_std)(struct file *file, void *fh, v4l2_std_id *norm); - int (*vidioc_s_std)(struct file *file, void *fh, v4l2_std_id norm); - int (*vidioc_querystd)(struct file *file, void *fh, v4l2_std_id *a); + int (*vidioc_g_std)(struct file *file, void *priv, v4l2_std_id *norm); + int (*vidioc_s_std)(struct file *file, void *priv, v4l2_std_id norm); + int (*vidioc_querystd)(struct file *file, void *priv, v4l2_std_id *a); /* Input handling */ - int (*vidioc_enum_input)(struct file *file, void *fh, + int (*vidioc_enum_input)(struct file *file, void *priv, struct v4l2_input *inp); - int (*vidioc_g_input)(struct file *file, void *fh, unsigned int *i); - int (*vidioc_s_input)(struct file *file, void *fh, unsigned int i); + int (*vidioc_g_input)(struct file *file, void *priv, unsigned int *i); + int (*vidioc_s_input)(struct file *file, void *priv, unsigned int i); /* Output handling */ - int (*vidioc_enum_output)(struct file *file, void *fh, + int (*vidioc_enum_output)(struct file *file, void *priv, struct v4l2_output *a); - int (*vidioc_g_output)(struct file *file, void *fh, unsigned int *i); - int (*vidioc_s_output)(struct file *file, void *fh, unsigned int i); + int (*vidioc_g_output)(struct file *file, void *priv, unsigned int *i); + int (*vidioc_s_output)(struct file *file, void *priv, unsigned int i); /* Control handling */ - int (*vidioc_query_ext_ctrl)(struct file *file, void *fh, + int (*vidioc_query_ext_ctrl)(struct file *file, void *priv, struct v4l2_query_ext_ctrl *a); - int (*vidioc_g_ext_ctrls)(struct file *file, void *fh, + int (*vidioc_g_ext_ctrls)(struct file *file, void *priv, struct v4l2_ext_controls *a); - int (*vidioc_s_ext_ctrls)(struct file *file, void *fh, + int (*vidioc_s_ext_ctrls)(struct file *file, void *priv, struct v4l2_ext_controls *a); - int (*vidioc_try_ext_ctrls)(struct file *file, void *fh, + int (*vidioc_try_ext_ctrls)(struct file *file, void *priv, struct v4l2_ext_controls *a); - int (*vidioc_querymenu)(struct file *file, void *fh, + int (*vidioc_querymenu)(struct file *file, void *priv, struct v4l2_querymenu *a); /* Audio ioctls */ - int (*vidioc_enumaudio)(struct file *file, void *fh, + int (*vidioc_enumaudio)(struct file *file, void *priv, struct v4l2_audio *a); - int (*vidioc_g_audio)(struct file *file, void *fh, + int (*vidioc_g_audio)(struct file *file, void *priv, struct v4l2_audio *a); - int (*vidioc_s_audio)(struct file *file, void *fh, + int (*vidioc_s_audio)(struct file *file, void *priv, const struct v4l2_audio *a); /* Audio out ioctls */ - int (*vidioc_enumaudout)(struct file *file, void *fh, + int (*vidioc_enumaudout)(struct file *file, void *priv, struct v4l2_audioout *a); - int (*vidioc_g_audout)(struct file *file, void *fh, + int (*vidioc_g_audout)(struct file *file, void *priv, struct v4l2_audioout *a); - int (*vidioc_s_audout)(struct file *file, void *fh, + int (*vidioc_s_audout)(struct file *file, void *priv, const struct v4l2_audioout *a); - int (*vidioc_g_modulator)(struct file *file, void *fh, + int (*vidioc_g_modulator)(struct file *file, void *priv, struct v4l2_modulator *a); - int (*vidioc_s_modulator)(struct file *file, void *fh, + int (*vidioc_s_modulator)(struct file *file, void *priv, const struct v4l2_modulator *a); /* Crop ioctls */ - int (*vidioc_g_pixelaspect)(struct file *file, void *fh, + int (*vidioc_g_pixelaspect)(struct file *file, void *priv, int buf_type, struct v4l2_fract *aspect); - int (*vidioc_g_selection)(struct file *file, void *fh, + int (*vidioc_g_selection)(struct file *file, void *priv, struct v4l2_selection *s); - int (*vidioc_s_selection)(struct file *file, void *fh, + int (*vidioc_s_selection)(struct file *file, void *priv, struct v4l2_selection *s); /* Compression ioctls */ - int (*vidioc_g_jpegcomp)(struct file *file, void *fh, + int (*vidioc_g_jpegcomp)(struct file *file, void *priv, struct v4l2_jpegcompression *a); - int (*vidioc_s_jpegcomp)(struct file *file, void *fh, + int (*vidioc_s_jpegcomp)(struct file *file, void *priv, const struct v4l2_jpegcompression *a); - int (*vidioc_g_enc_index)(struct file *file, void *fh, + int (*vidioc_g_enc_index)(struct file *file, void *priv, struct v4l2_enc_idx *a); - int (*vidioc_encoder_cmd)(struct file *file, void *fh, + int (*vidioc_encoder_cmd)(struct file *file, void *priv, struct v4l2_encoder_cmd *a); - int (*vidioc_try_encoder_cmd)(struct file *file, void *fh, + int (*vidioc_try_encoder_cmd)(struct file *file, void *priv, struct v4l2_encoder_cmd *a); - int (*vidioc_decoder_cmd)(struct file *file, void *fh, + int (*vidioc_decoder_cmd)(struct file *file, void *priv, struct v4l2_decoder_cmd *a); - int (*vidioc_try_decoder_cmd)(struct file *file, void *fh, + int (*vidioc_try_decoder_cmd)(struct file *file, void *priv, struct v4l2_decoder_cmd *a); /* Stream type-dependent parameter ioctls */ - int (*vidioc_g_parm)(struct file *file, void *fh, + int (*vidioc_g_parm)(struct file *file, void *priv, struct v4l2_streamparm *a); - int (*vidioc_s_parm)(struct file *file, void *fh, + int (*vidioc_s_parm)(struct file *file, void *priv, struct v4l2_streamparm *a); /* Tuner ioctls */ - int (*vidioc_g_tuner)(struct file *file, void *fh, + int (*vidioc_g_tuner)(struct file *file, void *priv, struct v4l2_tuner *a); - int (*vidioc_s_tuner)(struct file *file, void *fh, + int (*vidioc_s_tuner)(struct file *file, void *priv, const struct v4l2_tuner *a); - int (*vidioc_g_frequency)(struct file *file, void *fh, + int (*vidioc_g_frequency)(struct file *file, void *priv, struct v4l2_frequency *a); - int (*vidioc_s_frequency)(struct file *file, void *fh, + int (*vidioc_s_frequency)(struct file *file, void *priv, const struct v4l2_frequency *a); - int (*vidioc_enum_freq_bands)(struct file *file, void *fh, + int (*vidioc_enum_freq_bands)(struct file *file, void *priv, struct v4l2_frequency_band *band); /* Sliced VBI cap */ - int (*vidioc_g_sliced_vbi_cap)(struct file *file, void *fh, + int (*vidioc_g_sliced_vbi_cap)(struct file *file, void *priv, struct v4l2_sliced_vbi_cap *a); /* Log status ioctl */ - int (*vidioc_log_status)(struct file *file, void *fh); + int (*vidioc_log_status)(struct file *file, void *priv); - int (*vidioc_s_hw_freq_seek)(struct file *file, void *fh, + int (*vidioc_s_hw_freq_seek)(struct file *file, void *priv, const struct v4l2_hw_freq_seek *a); /* Debugging ioctls */ #ifdef CONFIG_VIDEO_ADV_DEBUG - int (*vidioc_g_register)(struct file *file, void *fh, + int (*vidioc_g_register)(struct file *file, void *priv, struct v4l2_dbg_register *reg); - int (*vidioc_s_register)(struct file *file, void *fh, + int (*vidioc_s_register)(struct file *file, void *priv, const struct v4l2_dbg_register *reg); - int (*vidioc_g_chip_info)(struct file *file, void *fh, + int (*vidioc_g_chip_info)(struct file *file, void *priv, struct v4l2_dbg_chip_info *chip); #endif - int (*vidioc_enum_framesizes)(struct file *file, void *fh, + int (*vidioc_enum_framesizes)(struct file *file, void *priv, struct v4l2_frmsizeenum *fsize); - int (*vidioc_enum_frameintervals)(struct file *file, void *fh, + int (*vidioc_enum_frameintervals)(struct file *file, void *priv, struct v4l2_frmivalenum *fival); /* DV Timings IOCTLs */ - int (*vidioc_s_dv_timings)(struct file *file, void *fh, + int (*vidioc_s_dv_timings)(struct file *file, void *priv, struct v4l2_dv_timings *timings); - int (*vidioc_g_dv_timings)(struct file *file, void *fh, + int (*vidioc_g_dv_timings)(struct file *file, void *priv, struct v4l2_dv_timings *timings); - int (*vidioc_query_dv_timings)(struct file *file, void *fh, + int (*vidioc_query_dv_timings)(struct file *file, void *priv, struct v4l2_dv_timings *timings); - int (*vidioc_enum_dv_timings)(struct file *file, void *fh, + int (*vidioc_enum_dv_timings)(struct file *file, void *priv, struct v4l2_enum_dv_timings *timings); - int (*vidioc_dv_timings_cap)(struct file *file, void *fh, + int (*vidioc_dv_timings_cap)(struct file *file, void *priv, struct v4l2_dv_timings_cap *cap); - int (*vidioc_g_edid)(struct file *file, void *fh, + int (*vidioc_g_edid)(struct file *file, void *priv, struct v4l2_edid *edid); - int (*vidioc_s_edid)(struct file *file, void *fh, + int (*vidioc_s_edid)(struct file *file, void *priv, struct v4l2_edid *edid); int (*vidioc_subscribe_event)(struct v4l2_fh *fh, @@ -575,7 +575,7 @@ struct v4l2_ioctl_ops { const struct v4l2_event_subscription *sub); /* For other private ioctls */ - long (*vidioc_default)(struct file *file, void *fh, + long (*vidioc_default)(struct file *file, void *priv, bool valid_prio, unsigned int cmd, void *arg); }; -- cgit v1.2.3 From 9d05191c4ed31bb817c03a1b7028ed81fefa8bfb Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sun, 10 Aug 2025 04:30:56 +0300 Subject: media: v4l2-core: Rename second ioctl handlers argument to 'void *priv' The second argument to the ioctl handlers is not a file handle any more. Rename it from 'void *fh' to 'void *priv' in the V4L2 core, to avoid misconceptions. While at it, align function arguments in include/media/v4l2-mem2mem.h. Signed-off-by: Laurent Pinchart Signed-off-by: Hans Verkuil --- include/media/v4l2-ctrls.h | 4 ++-- include/media/v4l2-mem2mem.h | 42 +++++++++++++++++++++--------------------- 2 files changed, 23 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/media/v4l2-ctrls.h b/include/media/v4l2-ctrls.h index c32c46286441..4a294a5c7bdd 100644 --- a/include/media/v4l2-ctrls.h +++ b/include/media/v4l2-ctrls.h @@ -1313,13 +1313,13 @@ void v4l2_ctrl_merge(const struct v4l2_event *old, struct v4l2_event *new); * v4l2_ctrl_log_status - helper function to implement %VIDIOC_LOG_STATUS ioctl * * @file: pointer to struct file - * @fh: unused. Kept just to be compatible to the arguments expected by + * @priv: unused. Kept just to be compatible to the arguments expected by * &struct v4l2_ioctl_ops.vidioc_log_status. * * Can be used as a vidioc_log_status function that just dumps all controls * associated with the filehandle. */ -int v4l2_ctrl_log_status(struct file *file, void *fh); +int v4l2_ctrl_log_status(struct file *file, void *priv); /** * v4l2_ctrl_subscribe_event - Subscribes to an event diff --git a/include/media/v4l2-mem2mem.h b/include/media/v4l2-mem2mem.h index 0af330cf91c3..09c6164577cc 100644 --- a/include/media/v4l2-mem2mem.h +++ b/include/media/v4l2-mem2mem.h @@ -864,34 +864,34 @@ void v4l2_m2m_request_queue(struct media_request *req); /* v4l2 ioctl helpers */ int v4l2_m2m_ioctl_reqbufs(struct file *file, void *priv, - struct v4l2_requestbuffers *rb); -int v4l2_m2m_ioctl_create_bufs(struct file *file, void *fh, - struct v4l2_create_buffers *create); + struct v4l2_requestbuffers *rb); +int v4l2_m2m_ioctl_create_bufs(struct file *file, void *priv, + struct v4l2_create_buffers *create); int v4l2_m2m_ioctl_remove_bufs(struct file *file, void *priv, struct v4l2_remove_buffers *d); -int v4l2_m2m_ioctl_querybuf(struct file *file, void *fh, - struct v4l2_buffer *buf); -int v4l2_m2m_ioctl_expbuf(struct file *file, void *fh, - struct v4l2_exportbuffer *eb); -int v4l2_m2m_ioctl_qbuf(struct file *file, void *fh, - struct v4l2_buffer *buf); -int v4l2_m2m_ioctl_dqbuf(struct file *file, void *fh, - struct v4l2_buffer *buf); -int v4l2_m2m_ioctl_prepare_buf(struct file *file, void *fh, +int v4l2_m2m_ioctl_querybuf(struct file *file, void *priv, + struct v4l2_buffer *buf); +int v4l2_m2m_ioctl_expbuf(struct file *file, void *priv, + struct v4l2_exportbuffer *eb); +int v4l2_m2m_ioctl_qbuf(struct file *file, void *priv, + struct v4l2_buffer *buf); +int v4l2_m2m_ioctl_dqbuf(struct file *file, void *priv, + struct v4l2_buffer *buf); +int v4l2_m2m_ioctl_prepare_buf(struct file *file, void *priv, struct v4l2_buffer *buf); -int v4l2_m2m_ioctl_streamon(struct file *file, void *fh, - enum v4l2_buf_type type); -int v4l2_m2m_ioctl_streamoff(struct file *file, void *fh, - enum v4l2_buf_type type); -int v4l2_m2m_ioctl_encoder_cmd(struct file *file, void *fh, +int v4l2_m2m_ioctl_streamon(struct file *file, void *priv, + enum v4l2_buf_type type); +int v4l2_m2m_ioctl_streamoff(struct file *file, void *priv, + enum v4l2_buf_type type); +int v4l2_m2m_ioctl_encoder_cmd(struct file *file, void *priv, struct v4l2_encoder_cmd *ec); -int v4l2_m2m_ioctl_decoder_cmd(struct file *file, void *fh, +int v4l2_m2m_ioctl_decoder_cmd(struct file *file, void *priv, struct v4l2_decoder_cmd *dc); -int v4l2_m2m_ioctl_try_encoder_cmd(struct file *file, void *fh, +int v4l2_m2m_ioctl_try_encoder_cmd(struct file *file, void *priv, struct v4l2_encoder_cmd *ec); -int v4l2_m2m_ioctl_try_decoder_cmd(struct file *file, void *fh, +int v4l2_m2m_ioctl_try_decoder_cmd(struct file *file, void *priv, struct v4l2_decoder_cmd *dc); -int v4l2_m2m_ioctl_stateless_try_decoder_cmd(struct file *file, void *fh, +int v4l2_m2m_ioctl_stateless_try_decoder_cmd(struct file *file, void *priv, struct v4l2_decoder_cmd *dc); int v4l2_m2m_ioctl_stateless_decoder_cmd(struct file *file, void *priv, struct v4l2_decoder_cmd *dc); -- cgit v1.2.3 From 1fd143c24fb621f063f913cb1e48cc688c7eca15 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 21 May 2024 23:08:31 -0400 Subject: scsi: switch scsi_bios_ptable() and scsi_partsize() to gendisk Both helpers are reading the partition table of the disk specified by block_device of some partition on it; result depends only upon the disk in question, so we might as well pass the struct gendisk instead. Reviewed-by: Martin K. Petersen Reviewed-by: Christoph Hellwig Acked-by: Jens Axboe Signed-off-by: Al Viro --- include/scsi/scsicam.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/scsi/scsicam.h b/include/scsi/scsicam.h index 08edd603e521..67f4e8835bc8 100644 --- a/include/scsi/scsicam.h +++ b/include/scsi/scsicam.h @@ -13,7 +13,8 @@ #ifndef SCSICAM_H #define SCSICAM_H +struct gendisk; int scsicam_bios_param(struct block_device *bdev, sector_t capacity, int *ip); -bool scsi_partsize(struct block_device *bdev, sector_t capacity, int geom[3]); -unsigned char *scsi_bios_ptable(struct block_device *bdev); +bool scsi_partsize(struct gendisk *disk, sector_t capacity, int geom[3]); +unsigned char *scsi_bios_ptable(struct gendisk *disk); #endif /* def SCSICAM_H */ -- cgit v1.2.3 From 3eb50369c09efb0f668a7f568a7e6f7cf4194cde Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 21 May 2024 23:22:01 -0400 Subject: scsi: switch ->bios_param() to passing gendisk Instances are passed struct block_device *bdev argument; the only thing it is used for (if it's used in the first place) is bdev->bd_disk. Might as well pass that in the first place... Reviewed-by: Martin K. Petersen Reviewed-by: Christoph Hellwig Acked-by: Jens Axboe Signed-off-by: Al Viro --- include/linux/libata.h | 2 +- include/scsi/libsas.h | 2 +- include/scsi/scsi_host.h | 2 +- include/scsi/scsicam.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 0620dd67369f..21de0935775d 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1203,7 +1203,7 @@ extern void ata_qc_complete(struct ata_queued_cmd *qc); extern u64 ata_qc_get_active(struct ata_port *ap); extern void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd); extern int ata_std_bios_param(struct scsi_device *sdev, - struct block_device *bdev, + struct gendisk *unused, sector_t capacity, int geom[]); extern void ata_scsi_unlock_native_capacity(struct scsi_device *sdev); extern int ata_scsi_sdev_init(struct scsi_device *sdev); diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index ba460b6c0374..9c6e90829dbd 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -685,7 +685,7 @@ extern int sas_queuecommand(struct Scsi_Host *, struct scsi_cmnd *); extern int sas_target_alloc(struct scsi_target *); int sas_sdev_configure(struct scsi_device *dev, struct queue_limits *lim); extern int sas_change_queue_depth(struct scsi_device *, int new_depth); -extern int sas_bios_param(struct scsi_device *, struct block_device *, +extern int sas_bios_param(struct scsi_device *, struct gendisk *, sector_t capacity, int *hsc); int sas_execute_internal_abort_single(struct domain_device *device, u16 tag, unsigned int qid, diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index c53812b9026f..f5a243261236 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -318,7 +318,7 @@ struct scsi_host_template { * * Status: OPTIONAL */ - int (* bios_param)(struct scsi_device *, struct block_device *, + int (* bios_param)(struct scsi_device *, struct gendisk *, sector_t, int []); /* diff --git a/include/scsi/scsicam.h b/include/scsi/scsicam.h index 67f4e8835bc8..1131f51ed2c8 100644 --- a/include/scsi/scsicam.h +++ b/include/scsi/scsicam.h @@ -14,7 +14,7 @@ #ifndef SCSICAM_H #define SCSICAM_H struct gendisk; -int scsicam_bios_param(struct block_device *bdev, sector_t capacity, int *ip); +int scsicam_bios_param(struct gendisk *disk, sector_t capacity, int *ip); bool scsi_partsize(struct gendisk *disk, sector_t capacity, int geom[3]); unsigned char *scsi_bios_ptable(struct gendisk *disk); #endif /* def SCSICAM_H */ -- cgit v1.2.3 From 4fc8728aa34f54835b72e4db0f3db76a72948b65 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 21 May 2024 22:19:55 -0400 Subject: block: switch ->getgeo() to struct gendisk Instances are happier that way and it makes more sense anyway - the only part of the result that is related to partition we are given is the start sector, and that has been filled in by the caller. Everything else is a function of the disk. Only one instance (DASD) is ever looking at anything other than bdev->bd_disk and that one is trivial to adjust. Reviewed-by: Martin K. Petersen Reviewed-by: Christoph Hellwig Acked-by: Jens Axboe Signed-off-by: Al Viro --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 95886b404b16..fbc45121cd4f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1659,7 +1659,7 @@ struct block_device_operations { unsigned int (*check_events) (struct gendisk *disk, unsigned int clearing); void (*unlock_native_capacity) (struct gendisk *); - int (*getgeo)(struct block_device *, struct hd_geometry *); + int (*getgeo)(struct gendisk *, struct hd_geometry *); int (*set_read_only)(struct block_device *bdev, bool ro); void (*free_disk)(struct gendisk *disk); /* this callback is with swap_lock and sometimes page table lock held */ -- cgit v1.2.3 From a892a3e74fb4f6ef040659297603abf11ccf29a7 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 30 Jun 2025 13:52:32 +0300 Subject: RDMA/sa_query: Support IB service records resolution Add an SA query API ib_sa_service_rec_get() to support building and sending SA query MADs that ask for service records with a specific name or ID, and receiving and parsing responses from the SM. Signed-off-by: Or Har-Toov Signed-off-by: Mark Zhang Reviewed-by: Vlad Dumitrescu Link: https://patch.msgid.link/9af6c82f3a3a9d975115a33235fb4ffc7c8edb21.1751279793.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- include/rdma/ib_mad.h | 1 + include/rdma/ib_sa.h | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 3f1b58d8b4bf..8bd0e1eb393b 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -48,6 +48,7 @@ #define IB_MGMT_METHOD_REPORT 0x06 #define IB_MGMT_METHOD_REPORT_RESP 0x86 #define IB_MGMT_METHOD_TRAP_REPRESS 0x07 +#define IB_MGMT_METHOD_GET_TABLE 0x12 #define IB_MGMT_METHOD_RESP 0x80 #define IB_BM_ATTR_MOD_RESP cpu_to_be32(1) diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index b46353fc53bf..95e8924ad563 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -189,6 +189,20 @@ struct sa_path_rec { u32 flags; }; +struct sa_service_rec { + __be64 id; + __u8 gid[16]; + __be16 pkey; + __u8 reserved[2]; + __be32 lease; + __u8 key[16]; + __u8 name[64]; + __u8 data_8[16]; + __be16 data_16[8]; + __be32 data_32[4]; + __be64 data_64[2]; +}; + static inline enum ib_gid_type sa_conv_pathrec_to_gid_type(struct sa_path_rec *rec) { @@ -417,6 +431,17 @@ int ib_sa_path_rec_get(struct ib_sa_client *client, struct ib_device *device, unsigned int num_prs, void *context), void *context, struct ib_sa_query **query); +int ib_sa_service_rec_get(struct ib_sa_client *client, + struct ib_device *device, u32 port_num, + struct sa_service_rec *rec, + ib_sa_comp_mask comp_mask, + unsigned long timeout_ms, gfp_t gfp_mask, + void (*callback)(int status, + struct sa_service_rec *resp, + unsigned int num_services, + void *context), + void *context, struct ib_sa_query **sa_query); + struct ib_sa_multicast { struct ib_sa_mcmember_rec rec; ib_sa_comp_mask comp_mask; @@ -508,6 +533,18 @@ int ib_init_ah_attr_from_path(struct ib_device *device, u32 port_num, */ void ib_sa_pack_path(struct sa_path_rec *rec, void *attribute); +/** + * ib_sa_pack_service - Convert a service record from struct ib_sa_service_rec + * to IB MAD wire format. + */ +void ib_sa_pack_service(struct sa_service_rec *rec, void *attribute); + +/** + * ib_sa_unpack_service - Convert a service record from MAD format to struct + * ib_sa_service_rec. + */ +void ib_sa_unpack_service(void *attribute, struct sa_service_rec *rec); + /** * ib_sa_unpack_path - Convert a path record from MAD format to struct * ib_sa_path_rec. -- cgit v1.2.3 From a6404823fe20e06d4061bc63e0295b7165af4c14 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 30 Jun 2025 13:52:33 +0300 Subject: RDMA/cma: Support IB service record resolution Add new UCMA command and the corresponding CMA implementation. Userspace can send this command to request service resolution based on service name or ID. On a successful resolution, one or multiple service records are returned, the first one will be used as destination address by default. Two new CM events are added and returned to caller accordingly: - RDMA_CM_EVENT_ADDRINFO_RESOLVED: Resolve succeeded; - RDMA_CM_EVENT_ADDRINFO_ERROR: Resolve failed. Internally two new CM states are added: - RDMA_CM_ADDRINFO_QUERY: CM is in the process of IB service resolution; - RDMA_CM_ADDRINFO_RESOLVED: CM has finished the resolve process. With these new states, beside existing state transfer processes, 2 new processes are supported: 1. The default address is used: RDMA_CM_ADDR_BOUND -> RDMA_CM_ADDRINFO_QUERY -> RDMA_CM_ADDRINFO_RESOLVED -> RDMA_CM_ROUTE_QUERY 2. To use a different address: RDMA_CM_ADDR_BOUND -> RDMA_CM_ADDRINFO_QUERY-> RDMA_CM_ADDRINFO_RESOLVED -> RDMA_CM_ADDR_QUERY -> RDMA_CM_ADDR_RESOLVED -> RDMA_CM_ROUTE_QUERY In the 2nd case, resolve_addrinfo returns multiple records, a user could call rdma_resolve_addr() with the one that is not the first. Signed-off-by: Or Har-Toov Signed-off-by: Mark Zhang Reviewed-by: Vlad Dumitrescu Link: https://patch.msgid.link/b6e82ad75522a13b5efe4ff86da0e465aab04cc2.1751279794.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- include/rdma/rdma_cm.h | 18 +++++++++++++++++- include/uapi/rdma/rdma_user_cm.h | 20 +++++++++++++++++++- 2 files changed, 36 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index d1593ad47e28..72d1568e4cfb 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -33,7 +33,9 @@ enum rdma_cm_event_type { RDMA_CM_EVENT_MULTICAST_JOIN, RDMA_CM_EVENT_MULTICAST_ERROR, RDMA_CM_EVENT_ADDR_CHANGE, - RDMA_CM_EVENT_TIMEWAIT_EXIT + RDMA_CM_EVENT_TIMEWAIT_EXIT, + RDMA_CM_EVENT_ADDRINFO_RESOLVED, + RDMA_CM_EVENT_ADDRINFO_ERROR }; const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event); @@ -63,6 +65,9 @@ struct rdma_route { * 2 - Both primary and alternate path are available */ int num_pri_alt_paths; + + unsigned int num_service_recs; + struct sa_service_rec *service_recs; }; struct rdma_conn_param { @@ -197,6 +202,17 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, */ int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms); +/** + * rdma_resolve_ib_service - Resolve the IB service record of the + * service with the given service ID or name. + * + * This function is optional in the rdma cm flow. It is called on the client + * side of a connection, before calling rdma_resolve_route. The resolution + * can be done once per rdma_cm_id. + */ +int rdma_resolve_ib_service(struct rdma_cm_id *id, + struct rdma_ucm_ib_service *ibs); + /** * rdma_create_qp - Allocate a QP and associate it with the specified RDMA * identifier. diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h index 7cea03581f79..8799623bcba0 100644 --- a/include/uapi/rdma/rdma_user_cm.h +++ b/include/uapi/rdma/rdma_user_cm.h @@ -67,7 +67,8 @@ enum { RDMA_USER_CM_CMD_QUERY, RDMA_USER_CM_CMD_BIND, RDMA_USER_CM_CMD_RESOLVE_ADDR, - RDMA_USER_CM_CMD_JOIN_MCAST + RDMA_USER_CM_CMD_JOIN_MCAST, + RDMA_USER_CM_CMD_RESOLVE_IB_SERVICE }; /* See IBTA Annex A11, servies ID bytes 4 & 5 */ @@ -338,4 +339,21 @@ struct rdma_ucm_migrate_resp { __u32 events_reported; }; +enum { + RDMA_USER_CM_IB_SERVICE_FLAG_ID = 1 << 0, + RDMA_USER_CM_IB_SERVICE_FLAG_NAME = 1 << 1, +}; + +#define RDMA_USER_CM_IB_SERVICE_NAME_SIZE 64 +struct rdma_ucm_ib_service { + __u64 service_id; + __u8 service_name[RDMA_USER_CM_IB_SERVICE_NAME_SIZE]; + __u32 flags; + __u32 reserved; +}; + +struct rdma_ucm_resolve_ib_service { + __u32 id; + struct rdma_ucm_ib_service ibs; +}; #endif /* RDMA_USER_CM_H */ -- cgit v1.2.3 From 810f874eda8e492701ba3623aa298caad61bb47c Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 30 Jun 2025 13:52:34 +0300 Subject: RDMA/ucma: Support query resolved service records Enable user-space to query resolved service records through a ucma command when a RDMA_CM_EVENT_ADDRINFO_RESOLVED event is received. Signed-off-by: Or Har-Toov Signed-off-by: Mark Zhang Reviewed-by: Vlad Dumitrescu Link: https://patch.msgid.link/1090ee7c00c3f8058c4f9e7557de983504a16715.1751279794.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- include/uapi/rdma/ib_user_sa.h | 14 ++++++++++++++ include/uapi/rdma/rdma_user_cm.h | 8 +++++++- 2 files changed, 21 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/rdma/ib_user_sa.h b/include/uapi/rdma/ib_user_sa.h index 435155d6e1c6..acfa20816bc6 100644 --- a/include/uapi/rdma/ib_user_sa.h +++ b/include/uapi/rdma/ib_user_sa.h @@ -74,4 +74,18 @@ struct ib_user_path_rec { __u8 preference; }; +struct ib_user_service_rec { + __be64 id; + __u8 gid[16]; + __be16 pkey; + __u8 reserved[2]; + __be32 lease; + __u8 key[16]; + __u8 name[64]; + __u8 data_8[16]; + __be16 data_16[8]; + __be32 data_32[4]; + __be64 data_64[2]; +}; + #endif /* IB_USER_SA_H */ diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h index 8799623bcba0..00501da0567e 100644 --- a/include/uapi/rdma/rdma_user_cm.h +++ b/include/uapi/rdma/rdma_user_cm.h @@ -148,7 +148,8 @@ struct rdma_ucm_resolve_route { enum { RDMA_USER_CM_QUERY_ADDR, RDMA_USER_CM_QUERY_PATH, - RDMA_USER_CM_QUERY_GID + RDMA_USER_CM_QUERY_GID, + RDMA_USER_CM_QUERY_IB_SERVICE }; struct rdma_ucm_query { @@ -188,6 +189,11 @@ struct rdma_ucm_query_path_resp { struct ib_path_rec_data path_data[]; }; +struct rdma_ucm_query_ib_service_resp { + __u32 num_service_recs; + struct ib_user_service_rec recs[]; +}; + struct rdma_ucm_conn_param { __u32 qp_num; __u32 qkey; -- cgit v1.2.3 From a3c9d0fcd3715541bbf97da2ddde9d032e2fe6d5 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 30 Jun 2025 13:52:35 +0300 Subject: RDMA/ucma: Support write an event into a CM Enable user-space to inject an event into a CM through it's event channel. Two new events are added and supported: RDMA_CM_EVENT_USER and RDMA_CM_EVENT_INTERNAL. With these 2 events a new event parameter "arg" is supported, which is passed from sender to receiver transparently. With this feature an application is able to write an event into a CM channel with a new user-space rdmacm API. For example thread T1 could write an event with the API: rdma_write_cm_event(cm_id, RDMA_CM_EVENT_USER, status, arg); and thread T2 could receive the event with rdma_get_cm_event(). Signed-off-by: Mark Zhang Reviewed-by: Vlad Dumitrescu Link: https://patch.msgid.link/fdf49d0b17a45933c5d8c1d90605c9447d9a3c73.1751279794.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- include/rdma/rdma_cm.h | 5 ++++- include/uapi/rdma/rdma_user_cm.h | 16 +++++++++++++++- 2 files changed, 19 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 72d1568e4cfb..9bd930a83e6e 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -35,7 +35,9 @@ enum rdma_cm_event_type { RDMA_CM_EVENT_ADDR_CHANGE, RDMA_CM_EVENT_TIMEWAIT_EXIT, RDMA_CM_EVENT_ADDRINFO_RESOLVED, - RDMA_CM_EVENT_ADDRINFO_ERROR + RDMA_CM_EVENT_ADDRINFO_ERROR, + RDMA_CM_EVENT_USER, + RDMA_CM_EVENT_INTERNAL, }; const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event); @@ -98,6 +100,7 @@ struct rdma_cm_event { union { struct rdma_conn_param conn; struct rdma_ud_param ud; + u64 arg; } param; struct rdma_ucm_ece ece; }; diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h index 00501da0567e..5ded174687ee 100644 --- a/include/uapi/rdma/rdma_user_cm.h +++ b/include/uapi/rdma/rdma_user_cm.h @@ -68,7 +68,8 @@ enum { RDMA_USER_CM_CMD_BIND, RDMA_USER_CM_CMD_RESOLVE_ADDR, RDMA_USER_CM_CMD_JOIN_MCAST, - RDMA_USER_CM_CMD_RESOLVE_IB_SERVICE + RDMA_USER_CM_CMD_RESOLVE_IB_SERVICE, + RDMA_USER_CM_CMD_WRITE_CM_EVENT, }; /* See IBTA Annex A11, servies ID bytes 4 & 5 */ @@ -304,6 +305,7 @@ struct rdma_ucm_event_resp { union { struct rdma_ucm_conn_param conn; struct rdma_ucm_ud_param ud; + __u32 arg32[2]; } param; __u32 reserved; struct rdma_ucm_ece ece; @@ -362,4 +364,16 @@ struct rdma_ucm_resolve_ib_service { __u32 id; struct rdma_ucm_ib_service ibs; }; + +struct rdma_ucm_write_cm_event { + __u32 id; + __u32 reserved; + __u32 event; + __u32 status; + union { + struct rdma_ucm_conn_param conn; + struct rdma_ucm_ud_param ud; + __u64 arg; + } param; +}; #endif /* RDMA_USER_CM_H */ -- cgit v1.2.3 From 7ecb662b717a34305beacf71e9fadd1559b4b9ee Mon Sep 17 00:00:00 2001 From: Mehdi Djait Date: Mon, 7 Jul 2025 16:32:53 +0200 Subject: media: v4l2-common: Add a helper for obtaining the clock producer Introduce a helper for v4l2 sensor drivers on both DT- and ACPI-based platforms to retrieve a reference to the clock producer from firmware. This helper behaves the same as devm_clk_get() except where there is no clock producer like in ACPI-based platforms. For ACPI-based platforms the function will read the "clock-frequency" ACPI _DSD property and register a fixed frequency clock with the frequency indicated in the property. This function also handles the special ACPI-based system case where: . The clock-frequency _DSD property is present. . A reference to the clock producer is present, where the clock is provided by a camera sensor PMIC driver (e.g. int3472/tps68470.c) In this case try to set the clock-frequency value to the provided clock. Reviewed-by: Laurent Pinchart Reviewed-by: Lad Prabhakar Signed-off-by: Mehdi Djait Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/media/v4l2-common.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include') diff --git a/include/media/v4l2-common.h b/include/media/v4l2-common.h index 0a43f56578bc..9d6c236e8f14 100644 --- a/include/media/v4l2-common.h +++ b/include/media/v4l2-common.h @@ -97,6 +97,7 @@ int v4l2_ctrl_query_fill(struct v4l2_queryctrl *qctrl, /* ------------------------------------------------------------------------- */ +struct clk; struct v4l2_device; struct v4l2_subdev; struct v4l2_subdev_ops; @@ -620,6 +621,32 @@ int v4l2_link_freq_to_bitmap(struct device *dev, const u64 *fw_link_freqs, unsigned int num_of_driver_link_freqs, unsigned long *bitmap); +/** + * devm_v4l2_sensor_clk_get - lookup and obtain a reference to a clock producer + * for a camera sensor. + * + * @dev: device for v4l2 sensor clock "consumer" + * @id: clock consumer ID + * + * This function behaves the same way as devm_clk_get() except where there + * is no clock producer like in ACPI-based platforms. + * + * For ACPI-based platforms, the function will read the "clock-frequency" + * ACPI _DSD property and register a fixed-clock with the frequency indicated + * in the property. + * + * This function also handles the special ACPI-based system case where: + * + * * The clock-frequency _DSD property is present. + * * A reference to the clock producer is present, where the clock is provided + * by a camera sensor PMIC driver (e.g. int3472/tps68470.c) + * + * In this case try to set the clock-frequency value to the provided clock. + * + * Returns a pointer to a struct clk on success or an error pointer on failure. + */ +struct clk *devm_v4l2_sensor_clk_get(struct device *dev, const char *id); + static inline u64 v4l2_buffer_get_timestamp(const struct v4l2_buffer *buf) { /* -- cgit v1.2.3 From 9528d32873b38281ae105f2f5799e79ae9d086c2 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 11 Aug 2025 10:27:45 +0200 Subject: kcov, usb: Don't disable interrupts in kcov_remote_start_usb_softirq() kcov_remote_start_usb_softirq() the begin of urb's completion callback. HCDs marked HCD_BH will invoke this function from the softirq and in_serving_softirq() will detect this properly. Root-HUB (RH) requests will not be delayed to softirq but complete immediately in IRQ context. This will confuse kcov because in_serving_softirq() will report true if the softirq is served after the hardirq and if the softirq got interrupted by the hardirq in which currently runs. This was addressed by simply disabling interrupts in kcov_remote_start_usb_softirq() which avoided the interruption by the RH while a regular completion callback was invoked. This not only changes the behaviour while kconv is enabled but also breaks PREEMPT_RT because now sleeping locks can no longer be acquired. Revert the previous fix. Address the issue by invoking kcov_remote_start_usb() only if the context is just "serving softirqs" which is identified by checking in_serving_softirq() and in_hardirq() must be false. Fixes: f85d39dd7ed89 ("kcov, usb: disable interrupts in kcov_remote_start_usb_softirq") Cc: stable Reported-by: Yunseong Kim Closes: https://lore.kernel.org/all/20250725201400.1078395-2-ysk@kzalloc.com/ Tested-by: Yunseong Kim Signed-off-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20250811082745.ycJqBXMs@linutronix.de Signed-off-by: Greg Kroah-Hartman --- include/linux/kcov.h | 47 +++++++++-------------------------------------- 1 file changed, 9 insertions(+), 38 deletions(-) (limited to 'include') diff --git a/include/linux/kcov.h b/include/linux/kcov.h index 75a2fb8b16c3..0143358874b0 100644 --- a/include/linux/kcov.h +++ b/include/linux/kcov.h @@ -57,47 +57,21 @@ static inline void kcov_remote_start_usb(u64 id) /* * The softirq flavor of kcov_remote_*() functions is introduced as a temporary - * workaround for KCOV's lack of nested remote coverage sections support. - * - * Adding support is tracked in https://bugzilla.kernel.org/show_bug.cgi?id=210337. - * - * kcov_remote_start_usb_softirq(): - * - * 1. Only collects coverage when called in the softirq context. This allows - * avoiding nested remote coverage collection sections in the task context. - * For example, USB/IP calls usb_hcd_giveback_urb() in the task context - * within an existing remote coverage collection section. Thus, KCOV should - * not attempt to start collecting coverage within the coverage collection - * section in __usb_hcd_giveback_urb() in this case. - * - * 2. Disables interrupts for the duration of the coverage collection section. - * This allows avoiding nested remote coverage collection sections in the - * softirq context (a softirq might occur during the execution of a work in - * the BH workqueue, which runs with in_serving_softirq() > 0). - * For example, usb_giveback_urb_bh() runs in the BH workqueue with - * interrupts enabled, so __usb_hcd_giveback_urb() might be interrupted in - * the middle of its remote coverage collection section, and the interrupt - * handler might invoke __usb_hcd_giveback_urb() again. + * work around for kcov's lack of nested remote coverage sections support in + * task context. Adding support for nested sections is tracked in: + * https://bugzilla.kernel.org/show_bug.cgi?id=210337 */ -static inline unsigned long kcov_remote_start_usb_softirq(u64 id) +static inline void kcov_remote_start_usb_softirq(u64 id) { - unsigned long flags = 0; - - if (in_serving_softirq()) { - local_irq_save(flags); + if (in_serving_softirq() && !in_hardirq()) kcov_remote_start_usb(id); - } - - return flags; } -static inline void kcov_remote_stop_softirq(unsigned long flags) +static inline void kcov_remote_stop_softirq(void) { - if (in_serving_softirq()) { + if (in_serving_softirq() && !in_hardirq()) kcov_remote_stop(); - local_irq_restore(flags); - } } #ifdef CONFIG_64BIT @@ -131,11 +105,8 @@ static inline u64 kcov_common_handle(void) } static inline void kcov_remote_start_common(u64 id) {} static inline void kcov_remote_start_usb(u64 id) {} -static inline unsigned long kcov_remote_start_usb_softirq(u64 id) -{ - return 0; -} -static inline void kcov_remote_stop_softirq(unsigned long flags) {} +static inline void kcov_remote_start_usb_softirq(u64 id) {} +static inline void kcov_remote_stop_softirq(void) {} #endif /* CONFIG_KCOV */ #endif /* _LINUX_KCOV_H */ -- cgit v1.2.3 From 4c70fb2624ab1588faa58dcd407d4c61d64b288d Mon Sep 17 00:00:00 2001 From: Chen Ridong Date: Wed, 13 Aug 2025 08:29:01 +0000 Subject: cpuset: remove redundant CS_ONLINE flag The CS_ONLINE flag was introduced prior to the CSS_ONLINE flag in the cpuset subsystem. Currently, the flag setting sequence is as follows: 1. cpuset_css_online() sets CS_ONLINE 2. css->flags gets CSS_ONLINE set ... 3. cgroup->kill_css sets CSS_DYING 4. cpuset_css_offline() clears CS_ONLINE 5. css->flags clears CSS_ONLINE The is_cpuset_online() check currently occurs between steps 1 and 3. However, it would be equally safe to perform this check between steps 2 and 3, as CSS_ONLINE provides the same synchronization guarantee as CS_ONLINE. Since CS_ONLINE is redundant with CSS_ONLINE and provides no additional synchronization benefits, we can safely remove it to simplify the code. Signed-off-by: Chen Ridong Acked-by: Waiman Long Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b18fb5fcb38e..ae73dbb19165 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -354,6 +354,11 @@ static inline bool css_is_dying(struct cgroup_subsys_state *css) return css->flags & CSS_DYING; } +static inline bool css_is_online(struct cgroup_subsys_state *css) +{ + return css->flags & CSS_ONLINE; +} + static inline bool css_is_self(struct cgroup_subsys_state *css) { if (css == &css->cgroup->self) { -- cgit v1.2.3 From 2caa6b88e0ba0231fb4ff0ba8e73cedd5fb81fc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 11 Aug 2025 14:08:04 +0200 Subject: bpf: Don't use %pK through printk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the past %pK was preferable to %p as it would not leak raw pointer values into the kernel log. Since commit ad67b74d2469 ("printk: hash addresses printed with %p") the regular %p has been improved to avoid this issue. Furthermore, restricted pointers ("%pK") were never meant to be used through printk(). They can still unintentionally leak raw pointers or acquire sleeping locks in atomic contexts. Switch to the regular pointer formatting which is safer and easier to reason about. Signed-off-by: Thomas Weißschuh Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250811-restricted-pointers-bpf-v1-1-a1d7cc3cb9e7@linutronix.de --- include/linux/filter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 1e7fd3ee759e..52fecb7a1fe3 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1296,7 +1296,7 @@ void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other); static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, u32 pass, void *image) { - pr_err("flen=%u proglen=%u pass=%u image=%pK from=%s pid=%d\n", flen, + pr_err("flen=%u proglen=%u pass=%u image=%p from=%s pid=%d\n", flen, proglen, pass, image, current->comm, task_pid_nr(current)); if (image) -- cgit v1.2.3 From 139235103f6039c2c77cb8f51cb2e7e610fe0114 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Mon, 11 Aug 2025 15:35:05 +0800 Subject: net: stmmac: Change first parameter of fix_soc_reset() In order to use netdev_err() to print message in the callback function of fix_soc_reset(), change fix_soc_reset() to have "struct stmmac_priv *" as its first parameter. This is preparation for later patch, no functionality change. Suggested-by: Andrew Lunn Signed-off-by: Tiezhu Yang Link: https://patch.msgid.link/20250811073506.27513-3-yangtiezhu@loongson.cn Signed-off-by: Jakub Kicinski --- include/linux/stmmac.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 22c24dacbc65..e284f04964bf 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -238,7 +238,7 @@ struct plat_stmmacenet_data { int (*set_clk_tx_rate)(void *priv, struct clk *clk_tx_i, phy_interface_t interface, int speed); void (*fix_mac_speed)(void *priv, int speed, unsigned int mode); - int (*fix_soc_reset)(void *priv, void __iomem *ioaddr); + int (*fix_soc_reset)(struct stmmac_priv *priv, void __iomem *ioaddr); int (*serdes_powerup)(struct net_device *ndev, void *priv); void (*serdes_powerdown)(struct net_device *ndev, void *priv); int (*mac_finish)(struct net_device *ndev, -- cgit v1.2.3 From 96326447d466ca62b713f660cfc73ef7879151a0 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 12 Aug 2025 06:57:23 +0200 Subject: net: mediatek: wed: Introduce MT7992 WED support to MT7988 SoC Introduce the second WDMA RX ring in WED driver for MT7988 SoC since the Mediatek MT7992 WiFi chipset supports two separated WDMA rings. Add missing MT7988 configurations to properly support WED for MT7992 in MT76 driver. Co-developed-by: Rex Lu Signed-off-by: Rex Lu Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20250812-mt7992-wed-support-v3-1-9ada78a819a4@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/soc/mediatek/mtk_wed.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h index d8949a4ed0dc..c4ff6bab176d 100644 --- a/include/linux/soc/mediatek/mtk_wed.h +++ b/include/linux/soc/mediatek/mtk_wed.h @@ -147,7 +147,7 @@ struct mtk_wed_device { u32 wpdma_tx; u32 wpdma_txfree; u32 wpdma_rx_glo; - u32 wpdma_rx; + u32 wpdma_rx[MTK_WED_RX_QUEUES]; u32 wpdma_rx_rro[MTK_WED_RX_QUEUES]; u32 wpdma_rx_pg; -- cgit v1.2.3 From c308bb4190a8f6f09270b541457d6ae905d2f7bc Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Wed, 13 Aug 2025 22:55:02 +0200 Subject: ALSA: hda: Use min() to simplify snd_hda_get_devices() Use min() to simplify snd_hda_get_devices() and improve its readability. Change the function parameter 'max_devices' from 'int' to 'unsigned int' to avoid a min() signedness error. Update all related local variables and the function's return type to 'unsigned int' accordingly. No functional changes intended. Signed-off-by: Thorsten Blum Link: https://patch.msgid.link/20250813205507.215658-2-thorsten.blum@linux.dev Signed-off-by: Takashi Iwai --- include/sound/hda_codec.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/sound/hda_codec.h b/include/sound/hda_codec.h index ddc9c392f93f..006d4e4a8195 100644 --- a/include/sound/hda_codec.h +++ b/include/sound/hda_codec.h @@ -360,8 +360,8 @@ int snd_hda_override_conn_list(struct hda_codec *codec, hda_nid_t nid, int nums, int snd_hda_get_conn_index(struct hda_codec *codec, hda_nid_t mux, hda_nid_t nid, int recursive); unsigned int snd_hda_get_num_devices(struct hda_codec *codec, hda_nid_t nid); -int snd_hda_get_devices(struct hda_codec *codec, hda_nid_t nid, - u8 *dev_list, int max_devices); +unsigned int snd_hda_get_devices(struct hda_codec *codec, hda_nid_t nid, + u8 *dev_list, unsigned int max_devices); int snd_hda_get_dev_select(struct hda_codec *codec, hda_nid_t nid); int snd_hda_set_dev_select(struct hda_codec *codec, hda_nid_t nid, int dev_id); -- cgit v1.2.3 From f22cc6f766f84496b260347d4f0d92cf95f30699 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 11 Aug 2025 16:42:09 -0700 Subject: net: ethtool: support including Flow Label in the flow hash for RSS Some modern NICs support including the IPv6 Flow Label in the flow hash for RSS queue selection. This is outside the old "Microsoft spec", but was included in the OCP NIC spec: [ ] RSS include flow label in the hash (configurable) https://www.opencompute.org/w/index.php?title=Core_Offloads#Receive_Side_Scaling RSS Flow Label hashing allows TCP Protective Load Balancing (PLB) to recover from receiver congestion / overload. Rx CPU/queue hotspots are relatively common for data ingest workloads, and so far we had to try to detect the condition at the RPC layer and reopen the connection. PLB lets us change the Flow Label and therefore Rx CPU on RTO, with minimal packet reordering. PLB reaction times are much faster, and can happen at any point in the connection, not just at RPC boundaries. Due to the nature of host processing (relatively long queues, other kernel subsystems masking IRQs for 100s of msecs) the risk of reordering within the host is higher than in the network. But for applications which need it - it is far preferable to potentially persistent overload of subset of queues. It is expected that the hash communicated to the host may change if the Flow Label changes. This may be surprising to some host software, but I don't expect the devices can compute two Toeplitz hashes, one with the Flow Label for queue selection and one without for the rx hash communicated to the host. Besides, changing the hash may potentially help to change the path thru host queues. User can disable NETIF_F_RXHASH if they require a stable flow hash. The name RXH_IP6_FL was chosen based on what we call Flow Label variables in IPv6 processing (fl). I prefer fl_lbl but that appears to be an fbnic-only spelling. We could spell out RXH_IP6_FLOW_LABEL but existing RXH_ defines are a lot more terse. Willem notes [1] that Flow Label is defined as identifying the flow and therefore including both the flow label _and_ the L4 header fields is not generally necessary. But it should not hurt so it's not explicitly prevented if the driver supports hashing on both at the same time. Link: https://lore.kernel.org/68483433b45e2_3cd66f29440@willemb.c.googlers.com.notmuch [1] Signed-off-by: Jakub Kicinski Reviewed-by: Joe Damato Link: https://patch.msgid.link/20250811234212.580748-2-kuba@kernel.org Signed-off-by: Paolo Abeni --- include/uapi/linux/ethtool.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 9e9afdd1238a..8bd5ea5469d9 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -2380,6 +2380,7 @@ enum { #define RXH_L4_B_0_1 (1 << 6) /* src port in case of TCP/UDP/SCTP */ #define RXH_L4_B_2_3 (1 << 7) /* dst port in case of TCP/UDP/SCTP */ #define RXH_GTP_TEID (1 << 8) /* teid in case of GTP */ +#define RXH_IP6_FL (1 << 9) /* IPv6 flow label */ #define RXH_DISCARD (1 << 31) #define RX_CLS_FLOW_DISC 0xffffffffffffffffULL -- cgit v1.2.3 From f443d7c9ed4642489d2f73a35e86df6228f65dfc Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Wed, 13 Aug 2025 16:17:15 +0800 Subject: dt-bindings: reset: thead,th1520-reset: add more VOSYS resets VOSYS contains more resets for a display pipeline, includes ones for the display controller (called DPU in the manual), the HDMI controller and 2 MIPI DSI controllers. Allocate IDs for these resets in the dt binding header file. Now all peripheral related VOSYS reset controls are here, only the bus matrix / IOPMP ones are missing, which shouldn't be messed with. Signed-off-by: Icenowy Zheng Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20250813081716.2181843-2-uwu@icenowy.me Signed-off-by: Philipp Zabel --- include/dt-bindings/reset/thead,th1520-reset.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/reset/thead,th1520-reset.h b/include/dt-bindings/reset/thead,th1520-reset.h index 00459f160489..ee799286c175 100644 --- a/include/dt-bindings/reset/thead,th1520-reset.h +++ b/include/dt-bindings/reset/thead,th1520-reset.h @@ -12,5 +12,12 @@ #define TH1520_RESET_ID_NPU 2 #define TH1520_RESET_ID_WDT0 3 #define TH1520_RESET_ID_WDT1 4 +#define TH1520_RESET_ID_DPU_AHB 5 +#define TH1520_RESET_ID_DPU_AXI 6 +#define TH1520_RESET_ID_DPU_CORE 7 +#define TH1520_RESET_ID_DSI0_APB 8 +#define TH1520_RESET_ID_DSI1_APB 9 +#define TH1520_RESET_ID_HDMI 10 +#define TH1520_RESET_ID_HDMI_APB 11 #endif /* _DT_BINDINGS_TH1520_RESET_H */ -- cgit v1.2.3 From 2412f16c9afa7710778fc032139a6df38b68fd7c Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Thu, 14 Aug 2025 16:50:07 +0300 Subject: media: v4l2-common: Improve devm_v4l2_sensor_clk_get() documentation Remove the extra leading period and provide more elaborate explanation for why devm_v4l2_sensor_clk_get() is only allowed to be used on camera sensor devices. Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/media/v4l2-common.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/media/v4l2-common.h b/include/media/v4l2-common.h index 9d6c236e8f14..39dd0c78d70f 100644 --- a/include/media/v4l2-common.h +++ b/include/media/v4l2-common.h @@ -623,7 +623,7 @@ int v4l2_link_freq_to_bitmap(struct device *dev, const u64 *fw_link_freqs, /** * devm_v4l2_sensor_clk_get - lookup and obtain a reference to a clock producer - * for a camera sensor. + * for a camera sensor * * @dev: device for v4l2 sensor clock "consumer" * @id: clock consumer ID @@ -643,6 +643,14 @@ int v4l2_link_freq_to_bitmap(struct device *dev, const u64 *fw_link_freqs, * * In this case try to set the clock-frequency value to the provided clock. * + * As the name indicates, this function may only be used on camera sensor + * devices. This is because generally only camera sensors do need a clock to + * query the frequency from, due to the requirement to configure the PLL for a + * given CSI-2 interface frequency where the sensor's external clock frequency + * is a factor. Additionally, the clock frequency tends to be available on ACPI + * firmware based systems for camera sensors specifically (if e.g. DisCo for + * Imaging compliant). + * * Returns a pointer to a struct clk on success or an error pointer on failure. */ struct clk *devm_v4l2_sensor_clk_get(struct device *dev, const char *id); -- cgit v1.2.3 From fb2f2a86f0cd9690357b9bb67af00d386a7e819f Mon Sep 17 00:00:00 2001 From: Dave Ertman Date: Mon, 16 Jun 2025 13:03:21 +0200 Subject: ice: cleanup capabilities evaluation When evaluating the capabilities field, the ICE_AQC_BIT_ROCEV2_LAG and ICE_AQC_BIT_SRIOV_LAG defines were both not using the BIT operator, instead simply setting a hex value that set the correct bits. While not inaccurate, this method is misleading, and when it is expanded in the following implementation it becomes even more confusing. Switch to using the BIT() operator to clarify what is being checked. Reviewed-by: Przemek Kitszel Reviewed-by: Aleksandr Loktionov Reviewed-by: Marcin Szycik Signed-off-by: Dave Ertman Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- include/linux/net/intel/libie/adminq.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/net/intel/libie/adminq.h b/include/linux/net/intel/libie/adminq.h index 012b5d499c1a..dbe93f940ef0 100644 --- a/include/linux/net/intel/libie/adminq.h +++ b/include/linux/net/intel/libie/adminq.h @@ -192,8 +192,8 @@ LIBIE_CHECK_STRUCT_LEN(16, libie_aqc_list_caps); #define LIBIE_AQC_CAPS_TX_SCHED_TOPO_COMP_MODE 0x0085 #define LIBIE_AQC_CAPS_NAC_TOPOLOGY 0x0087 #define LIBIE_AQC_CAPS_FW_LAG_SUPPORT 0x0092 -#define LIBIE_AQC_BIT_ROCEV2_LAG 0x01 -#define LIBIE_AQC_BIT_SRIOV_LAG 0x02 +#define LIBIE_AQC_BIT_ROCEV2_LAG BIT(0) +#define LIBIE_AQC_BIT_SRIOV_LAG BIT(1) #define LIBIE_AQC_CAPS_FLEX10 0x00F1 #define LIBIE_AQC_CAPS_CEM 0x00F2 -- cgit v1.2.3 From 556c1ad666ad90c50ec8fccb930dd5046cfbecfb Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Thu, 14 Aug 2025 10:20:42 -0700 Subject: x86/vmscape: Enable the mitigation Enable the previously added mitigation for VMscape. Add the cmdline vmscape={off|ibpb|force} and sysfs reporting. Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Borislav Petkov (AMD) Reviewed-by: Dave Hansen --- include/linux/cpu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index b91b993f58ee..487b3bf2e1ea 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -83,6 +83,7 @@ extern ssize_t cpu_show_old_microcode(struct device *dev, extern ssize_t cpu_show_indirect_target_selection(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_vmscape(struct device *dev, struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, -- cgit v1.2.3 From 37d1ade89606875c9cd6eb3b4ee416b7e1800fc4 Mon Sep 17 00:00:00 2001 From: Hans Zhang <18255117159@163.com> Date: Wed, 13 Aug 2025 22:45:24 +0800 Subject: PCI: Clean up __pci_find_next_cap_ttl() readability Refactor the __pci_find_next_cap_ttl() to improve code clarity: - Replace magic number 0x40 with PCI_STD_HEADER_SIZEOF. - Use ALIGN_DOWN() for position alignment instead of manual bitmask. - Extract PCI capability fields via FIELD_GET() with standardized masks. - Add necessary headers (linux/align.h). No functional changes intended. Signed-off-by: Hans Zhang <18255117159@163.com> Signed-off-by: Bjorn Helgaas Tested-by: Niklas Schnelle Acked-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20250813144529.303548-2-18255117159@163.com --- include/uapi/linux/pci_regs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index f5b17745de60..1bba99b46227 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -207,6 +207,9 @@ /* Capability lists */ +#define PCI_CAP_ID_MASK 0x00ff /* Capability ID mask */ +#define PCI_CAP_LIST_NEXT_MASK 0xff00 /* Next Capability Pointer mask */ + #define PCI_CAP_LIST_ID 0 /* Capability ID */ #define PCI_CAP_ID_PM 0x01 /* Power Management */ #define PCI_CAP_ID_AGP 0x02 /* Accelerated Graphics Port */ -- cgit v1.2.3 From 8d90041a0d285044b89629f539ca0685e156848b Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Thu, 14 Aug 2025 11:22:13 -0400 Subject: dlm: handle release_option as unsigned Future patches will introduce a invalid argument check for undefined values. All values for release_option are positive integer values to not check on negative values as well we just change the parameter to unsigned int. Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- include/linux/dlm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dlm.h b/include/linux/dlm.h index 108eb953eb18..34015a008b80 100644 --- a/include/linux/dlm.h +++ b/include/linux/dlm.h @@ -122,7 +122,8 @@ int dlm_new_lockspace(const char *name, const char *cluster, * release_option: see DLM_RELEASE values above. */ -int dlm_release_lockspace(dlm_lockspace_t *lockspace, int release_option); +int dlm_release_lockspace(dlm_lockspace_t *lockspace, + unsigned int release_option); /* * dlm_lock -- cgit v1.2.3 From 8e40210788636619404871df07445fa4590138b4 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Thu, 14 Aug 2025 11:22:14 -0400 Subject: dlm: check for undefined release_option values Checking on all undefined release_option values to return -EINVAL in case a user is providing them to dlm_release_lockspace(). Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- include/linux/dlm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/dlm.h b/include/linux/dlm.h index 34015a008b80..7e7b45b0d097 100644 --- a/include/linux/dlm.h +++ b/include/linux/dlm.h @@ -113,6 +113,7 @@ int dlm_new_lockspace(const char *name, const char *cluster, #define DLM_RELEASE_NORMAL 2 #define DLM_RELEASE_NO_EVENT 3 #define DLM_RELEASE_RECOVER 4 +#define __DLM_RELEASE_MAX DLM_RELEASE_RECOVER /* * dlm_release_lockspace -- cgit v1.2.3 From dab32f2576a39d5f54f3dbbbc718d92fa5109ce9 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Thu, 7 Aug 2025 15:55:39 +0200 Subject: s390/pci: Use pci_uevent_ers() in PCI recovery Issue uevents on s390 during PCI recovery using pci_uevent_ers() as done by EEH and AER PCIe recovery routines. Signed-off-by: Niklas Schnelle Signed-off-by: Bjorn Helgaas Reviewed-by: Lukas Wunner Link: https://patch.msgid.link/20250807-add_err_uevents-v5-2-adf85b0620b0@linux.ibm.com --- include/linux/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 59876de13860..7735acf6f349 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2764,7 +2764,7 @@ static inline bool pci_is_thunderbolt_attached(struct pci_dev *pdev) return false; } -#if defined(CONFIG_PCIEPORTBUS) || defined(CONFIG_EEH) +#if defined(CONFIG_PCIEPORTBUS) || defined(CONFIG_EEH) || defined(CONFIG_S390) void pci_uevent_ers(struct pci_dev *pdev, enum pci_ers_result err_type); #endif -- cgit v1.2.3 From 7fdc1d1b02e471c2ad4292705265706e003430a0 Mon Sep 17 00:00:00 2001 From: Raviteja Laggyshetty Date: Thu, 14 Aug 2025 14:54:19 +0000 Subject: dt-bindings: interconnect: document the RPMh Network-On-Chip interconnect in Glymur SoC Document the RPMh Network-On-Chip Interconnect in Glymur platform. Co-developed-by: Odelu Kukatla Signed-off-by: Odelu Kukatla Reviewed-by: "Rob Herring (Arm)" Signed-off-by: Raviteja Laggyshetty Link: https://lore.kernel.org/r/20250814-glymur-icc-v2-1-596cca6b6015@oss.qualcomm.com Signed-off-by: Georgi Djakov --- .../dt-bindings/interconnect/qcom,glymur-rpmh.h | 205 +++++++++++++++++++++ 1 file changed, 205 insertions(+) create mode 100644 include/dt-bindings/interconnect/qcom,glymur-rpmh.h (limited to 'include') diff --git a/include/dt-bindings/interconnect/qcom,glymur-rpmh.h b/include/dt-bindings/interconnect/qcom,glymur-rpmh.h new file mode 100644 index 000000000000..6a0e754345e4 --- /dev/null +++ b/include/dt-bindings/interconnect/qcom,glymur-rpmh.h @@ -0,0 +1,205 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2025, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef __DT_BINDINGS_INTERCONNECT_QCOM_GLYMUR_H +#define __DT_BINDINGS_INTERCONNECT_QCOM_GLYMUR_H + +#define MASTER_CRYPTO 0 +#define MASTER_SOCCP_PROC 1 +#define MASTER_QDSS_ETR 2 +#define MASTER_QDSS_ETR_1 3 +#define SLAVE_A1NOC_SNOC 4 + +#define MASTER_UFS_MEM 0 +#define MASTER_USB3_2 1 +#define MASTER_USB4_2 2 +#define SLAVE_A2NOC_SNOC 3 + +#define MASTER_QSPI_0 0 +#define MASTER_QUP_0 1 +#define MASTER_QUP_1 2 +#define MASTER_QUP_2 3 +#define MASTER_SP 4 +#define MASTER_SDCC_2 5 +#define MASTER_SDCC_4 6 +#define MASTER_USB2 7 +#define MASTER_USB3_MP 8 +#define SLAVE_A3NOC_SNOC 9 + +#define MASTER_USB3_0 0 +#define MASTER_USB3_1 1 +#define MASTER_USB4_0 2 +#define MASTER_USB4_1 3 +#define SLAVE_A4NOC_HSCNOC 4 + +#define MASTER_QUP_CORE_0 0 +#define MASTER_QUP_CORE_1 1 +#define MASTER_QUP_CORE_2 2 +#define SLAVE_QUP_CORE_0 3 +#define SLAVE_QUP_CORE_1 4 +#define SLAVE_QUP_CORE_2 5 + +#define MASTER_CNOC_CFG 0 +#define SLAVE_AHB2PHY_SOUTH 1 +#define SLAVE_AHB2PHY_NORTH 2 +#define SLAVE_AHB2PHY_2 3 +#define SLAVE_AHB2PHY_3 4 +#define SLAVE_AV1_ENC_CFG 5 +#define SLAVE_CAMERA_CFG 6 +#define SLAVE_CLK_CTL 7 +#define SLAVE_CRYPTO_0_CFG 8 +#define SLAVE_DISPLAY_CFG 9 +#define SLAVE_GFX3D_CFG 10 +#define SLAVE_IMEM_CFG 11 +#define SLAVE_PCIE_0_CFG 12 +#define SLAVE_PCIE_1_CFG 13 +#define SLAVE_PCIE_2_CFG 14 +#define SLAVE_PCIE_3A_CFG 15 +#define SLAVE_PCIE_3B_CFG 16 +#define SLAVE_PCIE_4_CFG 17 +#define SLAVE_PCIE_5_CFG 18 +#define SLAVE_PCIE_6_CFG 19 +#define SLAVE_PCIE_RSCC 20 +#define SLAVE_PDM 21 +#define SLAVE_PRNG 22 +#define SLAVE_QDSS_CFG 23 +#define SLAVE_QSPI_0 24 +#define SLAVE_QUP_0 25 +#define SLAVE_QUP_1 26 +#define SLAVE_QUP_2 27 +#define SLAVE_SDCC_2 28 +#define SLAVE_SDCC_4 29 +#define SLAVE_SMMUV3_CFG 30 +#define SLAVE_TCSR 31 +#define SLAVE_TLMM 32 +#define SLAVE_UFS_MEM_CFG 33 +#define SLAVE_USB2 34 +#define SLAVE_USB3_0 35 +#define SLAVE_USB3_1 36 +#define SLAVE_USB3_2 37 +#define SLAVE_USB3_MP 38 +#define SLAVE_USB4_0 39 +#define SLAVE_USB4_1 40 +#define SLAVE_USB4_2 41 +#define SLAVE_VENUS_CFG 42 +#define SLAVE_CNOC_PCIE_SLAVE_EAST_CFG 43 +#define SLAVE_CNOC_PCIE_SLAVE_WEST_CFG 44 +#define SLAVE_LPASS_QTB_CFG 45 +#define SLAVE_CNOC_MNOC_CFG 46 +#define SLAVE_NSP_QTB_CFG 47 +#define SLAVE_PCIE_EAST_ANOC_CFG 48 +#define SLAVE_PCIE_WEST_ANOC_CFG 49 +#define SLAVE_QDSS_STM 50 +#define SLAVE_TCU 51 + +#define MASTER_HSCNOC_CNOC 0 +#define SLAVE_AOSS 1 +#define SLAVE_IPC_ROUTER_CFG 2 +#define SLAVE_SOCCP 3 +#define SLAVE_TME_CFG 4 +#define SLAVE_APPSS 5 +#define SLAVE_CNOC_CFG 6 +#define SLAVE_BOOT_IMEM 7 +#define SLAVE_IMEM 8 + +#define MASTER_GPU_TCU 0 +#define MASTER_PCIE_TCU 1 +#define MASTER_SYS_TCU 2 +#define MASTER_APPSS_PROC 3 +#define MASTER_AGGRE_NOC_EAST 4 +#define MASTER_GFX3D 5 +#define MASTER_LPASS_GEM_NOC 6 +#define MASTER_MNOC_HF_MEM_NOC 7 +#define MASTER_MNOC_SF_MEM_NOC 8 +#define MASTER_COMPUTE_NOC 9 +#define MASTER_PCIE_EAST 10 +#define MASTER_PCIE_WEST 11 +#define MASTER_SNOC_SF_MEM_NOC 12 +#define MASTER_WLAN_Q6 13 +#define MASTER_GIC 14 +#define SLAVE_HSCNOC_CNOC 15 +#define SLAVE_LLCC 16 +#define SLAVE_PCIE_EAST 17 +#define SLAVE_PCIE_WEST 18 + +#define MASTER_LPIAON_NOC 0 +#define SLAVE_LPASS_GEM_NOC 1 + +#define MASTER_LPASS_LPINOC 0 +#define SLAVE_LPIAON_NOC_LPASS_AG_NOC 1 + +#define MASTER_LPASS_PROC 0 +#define SLAVE_LPICX_NOC_LPIAON_NOC 1 + +#define MASTER_LLCC 0 +#define SLAVE_EBI1 1 + +#define MASTER_AV1_ENC 0 +#define MASTER_CAMNOC_HF 1 +#define MASTER_CAMNOC_ICP 2 +#define MASTER_CAMNOC_SF 3 +#define MASTER_EVA 4 +#define MASTER_MDP 5 +#define MASTER_CDSP_HCP 6 +#define MASTER_VIDEO 7 +#define MASTER_VIDEO_CV_PROC 8 +#define MASTER_VIDEO_V_PROC 9 +#define MASTER_CNOC_MNOC_CFG 10 +#define SLAVE_MNOC_HF_MEM_NOC 11 +#define SLAVE_MNOC_SF_MEM_NOC 12 +#define SLAVE_SERVICE_MNOC 13 + +#define MASTER_CPUCP 0 +#define SLAVE_NSINOC_SYSTEM_NOC 1 +#define SLAVE_SERVICE_NSINOC 2 + +#define MASTER_CDSP_PROC 0 +#define SLAVE_NSP0_HSC_NOC 1 + +#define MASTER_OOBMSS_SP_PROC 0 +#define SLAVE_OOBMSS_SNOC 1 + +#define MASTER_PCIE_EAST_ANOC_CFG 0 +#define MASTER_PCIE_0 1 +#define MASTER_PCIE_1 2 +#define MASTER_PCIE_5 3 +#define SLAVE_PCIE_EAST_MEM_NOC 4 +#define SLAVE_SERVICE_PCIE_EAST_AGGRE_NOC 5 + +#define MASTER_HSCNOC_PCIE_EAST 0 +#define MASTER_CNOC_PCIE_EAST_SLAVE_CFG 1 +#define SLAVE_HSCNOC_PCIE_EAST_MS_MPU_CFG 2 +#define SLAVE_SERVICE_PCIE_EAST 3 +#define SLAVE_PCIE_0 4 +#define SLAVE_PCIE_1 5 +#define SLAVE_PCIE_5 6 + +#define MASTER_PCIE_WEST_ANOC_CFG 0 +#define MASTER_PCIE_2 1 +#define MASTER_PCIE_3A 2 +#define MASTER_PCIE_3B 3 +#define MASTER_PCIE_4 4 +#define MASTER_PCIE_6 5 +#define SLAVE_PCIE_WEST_MEM_NOC 6 +#define SLAVE_SERVICE_PCIE_WEST_AGGRE_NOC 7 + +#define MASTER_HSCNOC_PCIE_WEST 0 +#define MASTER_CNOC_PCIE_WEST_SLAVE_CFG 1 +#define SLAVE_HSCNOC_PCIE_WEST_MS_MPU_CFG 2 +#define SLAVE_SERVICE_PCIE_WEST 3 +#define SLAVE_PCIE_2 4 +#define SLAVE_PCIE_3A 5 +#define SLAVE_PCIE_3B 6 +#define SLAVE_PCIE_4 7 +#define SLAVE_PCIE_6 8 + +#define MASTER_A1NOC_SNOC 0 +#define MASTER_A2NOC_SNOC 1 +#define MASTER_A3NOC_SNOC 2 +#define MASTER_NSINOC_SNOC 3 +#define MASTER_OOBMSS 4 +#define SLAVE_SNOC_GEM_NOC_SF 5 + +#endif -- cgit v1.2.3 From f39494089aaa1022008eee245fb83ef1ae911b6d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 15 Jul 2025 21:09:13 -0700 Subject: srcu: Move rcu_is_watching() checks to srcu_read_{,un}lock_fast() The rcu_is_watching() warnings are currently in the SRCU-tree implementations of __srcu_read_lock_fast() and __srcu_read_unlock_fast(). However, this makes it difficult to create _notrace variants of srcu_read_lock_fast() and srcu_read_unlock_fast(). This commit therefore moves these checks to srcu_read_lock_fast(), srcu_read_unlock_fast(), srcu_down_read_fast(), and srcu_up_read_fast(). Signed-off-by: Paul E. McKenney Reviewed-by: Joel Fernandes Cc: Mathieu Desnoyers Cc: Steven Rostedt Cc: Sebastian Andrzej Siewior Cc: --- include/linux/srcu.h | 4 ++++ include/linux/srcutree.h | 2 -- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index f179700fecaf..478c73d067f7 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -275,6 +275,7 @@ static inline struct srcu_ctr __percpu *srcu_read_lock_fast(struct srcu_struct * { struct srcu_ctr __percpu *retval; + RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_fast()."); srcu_check_read_flavor_force(ssp, SRCU_READ_FLAVOR_FAST); retval = __srcu_read_lock_fast(ssp); rcu_try_lock_acquire(&ssp->dep_map); @@ -295,6 +296,7 @@ static inline struct srcu_ctr __percpu *srcu_read_lock_fast(struct srcu_struct * static inline struct srcu_ctr __percpu *srcu_down_read_fast(struct srcu_struct *ssp) __acquires(ssp) { WARN_ON_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && in_nmi()); + RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_down_read_fast()."); srcu_check_read_flavor_force(ssp, SRCU_READ_FLAVOR_FAST); return __srcu_read_lock_fast(ssp); } @@ -389,6 +391,7 @@ static inline void srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ct srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_FAST); srcu_lock_release(&ssp->dep_map); __srcu_read_unlock_fast(ssp, scp); + RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_fast()."); } /** @@ -405,6 +408,7 @@ static inline void srcu_up_read_fast(struct srcu_struct *ssp, struct srcu_ctr __ WARN_ON_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && in_nmi()); srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_FAST); __srcu_read_unlock_fast(ssp, scp); + RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_up_read_fast()."); } /** diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index bf44d8d1e69e..043b5a67ef71 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -244,7 +244,6 @@ static inline struct srcu_ctr __percpu *__srcu_read_lock_fast(struct srcu_struct { struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp); - RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_fast()."); if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) this_cpu_inc(scp->srcu_locks.counter); /* Y */ else @@ -275,7 +274,6 @@ static inline void __srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ this_cpu_inc(scp->srcu_unlocks.counter); /* Z */ else atomic_long_inc(raw_cpu_ptr(&scp->srcu_unlocks)); /* Z */ - RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_fast()."); } void __srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor); -- cgit v1.2.3 From 7e2a2d060da4860af37e1000dc62a30a1551d9e8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 16 Jul 2025 09:12:16 -0700 Subject: srcu: Add srcu_read_lock_fast_notrace() and srcu_read_unlock_fast_notrace() This commit adds no-trace variants of the srcu_read_lock_fast() and srcu_read_unlock_fast() functions for tracing use. [ paulmck: Apply notrace feedback from Joel Fernandes, Steven Rostedt, and Mathieu Desnoyers. ] [ paulmck: Apply excess-notrace feedback from Boqun Feng. ] Link: https://lore.kernel.org/all/20250721162433.10454-1-paulmck@kernel.org Signed-off-by: Paul E. McKenney Reviewed-by: Joel Fernandes Cc: Mathieu Desnoyers Cc: Steven Rostedt Cc: Sebastian Andrzej Siewior Cc: --- include/linux/srcu.h | 25 +++++++++++++++++++++++++ include/linux/srcutree.h | 5 +++-- 2 files changed, 28 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 478c73d067f7..7a692bf8f99b 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -282,6 +282,20 @@ static inline struct srcu_ctr __percpu *srcu_read_lock_fast(struct srcu_struct * return retval; } +/* + * Used by tracing, cannot be traced and cannot call lockdep. + * See srcu_read_lock_fast() for more information. + */ +static inline struct srcu_ctr __percpu *srcu_read_lock_fast_notrace(struct srcu_struct *ssp) + __acquires(ssp) +{ + struct srcu_ctr __percpu *retval; + + srcu_check_read_flavor_force(ssp, SRCU_READ_FLAVOR_FAST); + retval = __srcu_read_lock_fast(ssp); + return retval; +} + /** * srcu_down_read_fast - register a new reader for an SRCU-protected structure. * @ssp: srcu_struct in which to register the new reader. @@ -394,6 +408,17 @@ static inline void srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ct RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_fast()."); } +/* + * Used by tracing, cannot be traced and cannot call lockdep. + * See srcu_read_unlock_fast() for more information. + */ +static inline void srcu_read_unlock_fast_notrace(struct srcu_struct *ssp, + struct srcu_ctr __percpu *scp) __releases(ssp) +{ + srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_FAST); + __srcu_read_unlock_fast(ssp, scp); +} + /** * srcu_up_read_fast - unregister a old reader from an SRCU-protected structure. * @ssp: srcu_struct in which to unregister the old reader. diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 043b5a67ef71..4d2fee4d3828 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -240,7 +240,7 @@ static inline struct srcu_ctr __percpu *__srcu_ctr_to_ptr(struct srcu_struct *ss * on architectures that support NMIs but do not supply NMI-safe * implementations of this_cpu_inc(). */ -static inline struct srcu_ctr __percpu *__srcu_read_lock_fast(struct srcu_struct *ssp) +static inline struct srcu_ctr __percpu notrace *__srcu_read_lock_fast(struct srcu_struct *ssp) { struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp); @@ -267,7 +267,8 @@ static inline struct srcu_ctr __percpu *__srcu_read_lock_fast(struct srcu_struct * on architectures that support NMIs but do not supply NMI-safe * implementations of this_cpu_inc(). */ -static inline void __srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) +static inline void notrace +__srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) { barrier(); /* Avoid leaking the critical section. */ if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) -- cgit v1.2.3 From cacadb630375b8c30ca4d0300812178bb884c0b0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 16 Jul 2025 09:19:39 -0700 Subject: srcu: Add guards for notrace variants of SRCU-fast readers This adds the usual scoped_guard(srcu_fast_notrace, &my_srcu) and guard(srcu_fast_notrace)(&my_srcu). Signed-off-by: Paul E. McKenney Reviewed-by: Joel Fernandes Cc: Mathieu Desnoyers Cc: Steven Rostedt Cc: Sebastian Andrzej Siewior Cc: --- include/linux/srcu.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 7a692bf8f99b..ada65b58bc4c 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -515,4 +515,9 @@ DEFINE_LOCK_GUARD_1(srcu_fast, struct srcu_struct, srcu_read_unlock_fast(_T->lock, _T->scp), struct srcu_ctr __percpu *scp) +DEFINE_LOCK_GUARD_1(srcu_fast_notrace, struct srcu_struct, + _T->scp = srcu_read_lock_fast_notrace(_T->lock), + srcu_read_unlock_fast_notrace(_T->lock, _T->scp), + struct srcu_ctr __percpu *scp) + #endif -- cgit v1.2.3 From ffc23a204a5f2e763a8cc8a8cfefe0027a6f0ec3 Mon Sep 17 00:00:00 2001 From: Brigham Campbell Date: Wed, 30 Jul 2025 21:23:42 -0600 Subject: drm: Add MIPI read_multi func and two write macros Create mipi_dsi_dcs_read_multi(), which accepts a mipi_dsi_multi_context struct for improved error handling and cleaner panel driver code. Create mipi_dsi_dcs_write_var_seq_multi() and mipi_dsi_generic_write_var_seq_multi() macros which allow MIPI panel drivers to write non-constant data to display controllers. Reviewed-by: Douglas Anderson Signed-off-by: Brigham Campbell Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20250731032343.1258366-3-me@brighamcampbell.com --- include/drm/drm_mipi_dsi.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include') diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index 16eeb9552064..3aba7b380c8d 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -342,6 +342,8 @@ ssize_t mipi_dsi_dcs_write(struct mipi_dsi_device *dsi, u8 cmd, const void *data, size_t len); ssize_t mipi_dsi_dcs_read(struct mipi_dsi_device *dsi, u8 cmd, void *data, size_t len); +void mipi_dsi_dcs_read_multi(struct mipi_dsi_multi_context *ctx, u8 cmd, + void *data, size_t len); int mipi_dsi_dcs_nop(struct mipi_dsi_device *dsi); int mipi_dsi_dcs_soft_reset(struct mipi_dsi_device *dsi); int mipi_dsi_dcs_get_power_mode(struct mipi_dsi_device *dsi, u8 *mode); @@ -403,6 +405,22 @@ void mipi_dsi_dcs_set_tear_off_multi(struct mipi_dsi_multi_context *ctx); mipi_dsi_generic_write_multi(ctx, d, ARRAY_SIZE(d)); \ } while (0) +/** + * mipi_dsi_generic_write_var_seq_multi - transmit non-constant data using a + * generic write packet + * + * This macro will print errors for you and error handling is optimized for + * callers that call this multiple times in a row. + * + * @ctx: Context for multiple DSI transactions + * @seq: buffer containing the payload + */ +#define mipi_dsi_generic_write_var_seq_multi(ctx, seq...) \ + do { \ + const u8 d[] = { seq }; \ + mipi_dsi_generic_write_multi(ctx, d, ARRAY_SIZE(d)); \ + } while (0) + /** * mipi_dsi_dcs_write_seq_multi - transmit a DCS command with payload * @@ -419,6 +437,23 @@ void mipi_dsi_dcs_set_tear_off_multi(struct mipi_dsi_multi_context *ctx); mipi_dsi_dcs_write_buffer_multi(ctx, d, ARRAY_SIZE(d)); \ } while (0) +/** + * mipi_dsi_dcs_write_var_seq_multi - transmit a DCS command with non-constant + * payload + * + * This macro will print errors for you and error handling is optimized for + * callers that call this multiple times in a row. + * + * @ctx: Context for multiple DSI transactions + * @cmd: Command + * @seq: buffer containing data to be transmitted + */ +#define mipi_dsi_dcs_write_var_seq_multi(ctx, cmd, seq...) \ + do { \ + const u8 d[] = { cmd, seq }; \ + mipi_dsi_dcs_write_buffer_multi(ctx, d, ARRAY_SIZE(d)); \ + } while (0) + /** * mipi_dsi_dual - send the same MIPI DSI command to two interfaces * -- cgit v1.2.3 From 28f073b38372b99d8d33ff5e63897d28419bda20 Mon Sep 17 00:00:00 2001 From: Dave Ertman Date: Mon, 16 Jun 2025 13:03:23 +0200 Subject: ice: Implement support for SRIOV VFs across Active/Active bonds This patch implements the software flows to handle SRIOV VF communication across an Active/Active link aggregate. The same restrictions apply as are in place for the support of Active/Backup bonds. - the two interfaces must be on the same NIC - the FW LLDP engine needs to be disabled - the DDP package that supports VF LAG must be loaded on device - the two interfaces must have the same QoS config - only the first interface added to the bond will have VF support - the interface with VFs must be in switchdev mode With the additional requirement of - the version of the FW on the NIC needs to have VF Active/Active support This requirement is indicated in the capabilities struct associated with the NVM loaded on the NIC. The balancing of traffic between the two interfaces is done on a queue basis. Taking the queues allocated to all of the VFs as a whole, one half of them will be distributed to each interface. When a link goes down, then the queues allocated to the down interface will migrate to the active port. When the down port comes back up, then the same queues as were originally assigned there will be moved back. Co-developed-by: Marcin Szycik Signed-off-by: Marcin Szycik Signed-off-by: Dave Ertman Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- include/linux/net/intel/libie/adminq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/net/intel/libie/adminq.h b/include/linux/net/intel/libie/adminq.h index dbe93f940ef0..ba62f703df43 100644 --- a/include/linux/net/intel/libie/adminq.h +++ b/include/linux/net/intel/libie/adminq.h @@ -194,6 +194,7 @@ LIBIE_CHECK_STRUCT_LEN(16, libie_aqc_list_caps); #define LIBIE_AQC_CAPS_FW_LAG_SUPPORT 0x0092 #define LIBIE_AQC_BIT_ROCEV2_LAG BIT(0) #define LIBIE_AQC_BIT_SRIOV_LAG BIT(1) +#define LIBIE_AQC_BIT_SRIOV_AA_LAG BIT(2) #define LIBIE_AQC_CAPS_FLEX10 0x00F1 #define LIBIE_AQC_CAPS_CEM 0x00F2 -- cgit v1.2.3 From 41a6e8ab18642741437da932c2f5762b185e928c Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 13 Aug 2025 12:44:17 +0300 Subject: devlink/port: Check attributes early and constify Constify the devlink port attributes to indicate they are read only and does not depend on anything else. Therefore, validate it early before setting in the devlink port. Reviewed-by: Jiri Pirko Signed-off-by: Parav Pandit Link: https://patch.msgid.link/20250813094417.7269-3-parav@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index b32c9ceeb81d..3119d053bc4d 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1743,7 +1743,7 @@ void devlink_port_type_ib_set(struct devlink_port *devlink_port, struct ib_device *ibdev); void devlink_port_type_clear(struct devlink_port *devlink_port); void devlink_port_attrs_set(struct devlink_port *devlink_port, - struct devlink_port_attrs *devlink_port_attrs); + const struct devlink_port_attrs *attrs); void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 controller, u16 pf, bool external); void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 controller, -- cgit v1.2.3 From 52bf272636bda69587952b35ae97690b8dc89941 Mon Sep 17 00:00:00 2001 From: William Liu Date: Tue, 12 Aug 2025 23:57:57 +0000 Subject: net/sched: Fix backlog accounting in qdisc_dequeue_internal This issue applies for the following qdiscs: hhf, fq, fq_codel, and fq_pie, and occurs in their change handlers when adjusting to the new limit. The problem is the following in the values passed to the subsequent qdisc_tree_reduce_backlog call given a tbf parent: When the tbf parent runs out of tokens, skbs of these qdiscs will be placed in gso_skb. Their peek handlers are qdisc_peek_dequeued, which accounts for both qlen and backlog. However, in the case of qdisc_dequeue_internal, ONLY qlen is accounted for when pulling from gso_skb. This means that these qdiscs are missing a qdisc_qstats_backlog_dec when dropping packets to satisfy the new limit in their change handlers. One can observe this issue with the following (with tc patched to support a limit of 0): export TARGET=fq tc qdisc del dev lo root tc qdisc add dev lo root handle 1: tbf rate 8bit burst 100b latency 1ms tc qdisc replace dev lo handle 3: parent 1:1 $TARGET limit 1000 echo ''; echo 'add child'; tc -s -d qdisc show dev lo ping -I lo -f -c2 -s32 -W0.001 127.0.0.1 2>&1 >/dev/null echo ''; echo 'after ping'; tc -s -d qdisc show dev lo tc qdisc change dev lo handle 3: parent 1:1 $TARGET limit 0 echo ''; echo 'after limit drop'; tc -s -d qdisc show dev lo tc qdisc replace dev lo handle 2: parent 1:1 sfq echo ''; echo 'post graft'; tc -s -d qdisc show dev lo The second to last show command shows 0 packets but a positive number (74) of backlog bytes. The problem becomes clearer in the last show command, where qdisc_purge_queue triggers qdisc_tree_reduce_backlog with the positive backlog and causes an underflow in the tbf parent's backlog (4096 Mb instead of 0). To fix this issue, the codepath for all clients of qdisc_dequeue_internal has been simplified: codel, pie, hhf, fq, fq_pie, and fq_codel. qdisc_dequeue_internal handles the backlog adjustments for all cases that do not directly use the dequeue handler. The old fq_codel_change limit adjustment loop accumulated the arguments to the subsequent qdisc_tree_reduce_backlog call through the cstats field. However, this is confusing and error prone as fq_codel_dequeue could also potentially mutate this field (which qdisc_dequeue_internal calls in the non gso_skb case), so we have unified the code here with other qdiscs. Fixes: 2d3cbfd6d54a ("net_sched: Flush gso_skb list too during ->change()") Fixes: 4b549a2ef4be ("fq_codel: Fair Queue Codel AQM") Fixes: 10239edf86f1 ("net-qdisc-hhf: Heavy-Hitter Filter (HHF) qdisc") Signed-off-by: William Liu Reviewed-by: Savino Dicanosa Link: https://patch.msgid.link/20250812235725.45243-1-will@willsroot.io Signed-off-by: Jakub Kicinski --- include/net/sch_generic.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 638948be4c50..738cd5b13c62 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -1038,12 +1038,17 @@ static inline struct sk_buff *qdisc_dequeue_internal(struct Qdisc *sch, bool dir skb = __skb_dequeue(&sch->gso_skb); if (skb) { sch->q.qlen--; + qdisc_qstats_backlog_dec(sch, skb); return skb; } - if (direct) - return __qdisc_dequeue_head(&sch->q); - else + if (direct) { + skb = __qdisc_dequeue_head(&sch->q); + if (skb) + qdisc_qstats_backlog_dec(sch, skb); + return skb; + } else { return sch->dequeue(sch); + } } static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch) -- cgit v1.2.3 From 3b1dc21d6d800cb86c1ef0c97968f5c783343f2b Mon Sep 17 00:00:00 2001 From: Karunika Choo Date: Thu, 7 Aug 2025 17:26:30 +0100 Subject: drm/panthor: Add support for Mali-Gx15 family of GPUs Mali-Gx15 introduces a new GPU_FEATURES register that provides information about GPU-wide supported features. The register value will be passed on to userspace via gpu_info. Additionally, Mali-Gx15 presents an 'Immortalis' naming variant depending on the shader core count and presence of Ray Intersection feature support. This patch adds: - support for correctly identifying the model names for Mali-Gx15 GPUs. - arch 11.8 FW binary support Reviewed-by: Steven Price Reviewed-by: Chia-I Wu Reviewed-by: Liviu Dudau Signed-off-by: Karunika Choo Signed-off-by: Steven Price Link: https://lore.kernel.org/r/20250807162633.3666310-5-karunika.choo@arm.com --- include/uapi/drm/panthor_drm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index e1f43deb7eca..467d365ed7ba 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -327,6 +327,9 @@ struct drm_panthor_gpu_info { /** @pad: MBZ. */ __u32 pad; + + /** @gpu_features: Bitmask describing supported GPU-wide features */ + __u64 gpu_features; }; /** -- cgit v1.2.3 From 448f97fba9013ffa13f5dd82febd18836b189499 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Aug 2025 12:39:13 +0200 Subject: perf: Convert mmap() refcounts to refcount_t The recently fixed reference count leaks could have been detected by using refcount_t and refcount_t would have mitigated the potential overflow at least. Now that the code is properly structured, convert the mmap() related mmap_count variants over to refcount_t. No functional change intended. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Lorenzo Stoakes Link: https://lore.kernel.org/r/20250812104020.071507932@infradead.org --- include/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index ec9d96025683..bfbf9ea53f25 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -859,7 +859,7 @@ struct perf_event { /* mmap bits */ struct mutex mmap_mutex; - atomic_t mmap_count; + refcount_t mmap_count; struct perf_buffer *rb; struct list_head rb_entry; -- cgit v1.2.3 From 709788b154caf042874d765628ffa860f0bb0d1e Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 4 Aug 2025 09:54:05 -0400 Subject: Bluetooth: hci_core: Fix using {cis,bis}_capable for current settings {cis,bis}_capable only indicates the controller supports the feature since it doesn't check that LE is enabled so it shall not be used for current setting, instead this introduces {cis,bis}_enabled macros that can be used to indicate that these features are currently enabled. Fixes: 26afbd826ee3 ("Bluetooth: Add initial implementation of CIS connections") Fixes: eca0ae4aea66 ("Bluetooth: Add initial implementation of BIS connections") Fixes: ae7533613133 ("Bluetooth: Check for ISO support in controller") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/bluetooth.h | 4 ++-- include/net/bluetooth/hci_core.h | 13 ++++++++++++- 2 files changed, 14 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index ada5b56a4413..e5751f3070b8 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -647,7 +647,7 @@ static inline void sco_exit(void) #if IS_ENABLED(CONFIG_BT_LE) int iso_init(void); int iso_exit(void); -bool iso_enabled(void); +bool iso_inited(void); #else static inline int iso_init(void) { @@ -659,7 +659,7 @@ static inline int iso_exit(void) return 0; } -static inline bool iso_enabled(void) +static inline bool iso_inited(void) { return false; } diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 4dc11c66f7b8..bc29f2e2e16f 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1915,6 +1915,8 @@ void hci_conn_del_sysfs(struct hci_conn *conn); !hci_dev_test_flag(dev, HCI_RPA_EXPIRED)) #define adv_rpa_valid(adv) (bacmp(&adv->random_addr, BDADDR_ANY) && \ !adv->rpa_expired) +#define le_enabled(dev) (lmp_le_capable(dev) && \ + hci_dev_test_flag(dev, HCI_LE_ENABLED)) #define scan_1m(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_1M) || \ ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_1M)) @@ -1981,14 +1983,23 @@ void hci_conn_del_sysfs(struct hci_conn *conn); /* CIS Master/Slave and BIS support */ #define iso_capable(dev) (cis_capable(dev) || bis_capable(dev)) +#define iso_enabled(dev) (le_enabled(dev) && iso_capable(dev)) #define cis_capable(dev) \ (cis_central_capable(dev) || cis_peripheral_capable(dev)) +#define cis_enabled(dev) (le_enabled(dev) && cis_capable(dev)) #define cis_central_capable(dev) \ ((dev)->le_features[3] & HCI_LE_CIS_CENTRAL) +#define cis_central_enabled(dev) \ + (le_enabled(dev) && cis_central_capable(dev)) #define cis_peripheral_capable(dev) \ ((dev)->le_features[3] & HCI_LE_CIS_PERIPHERAL) +#define cis_peripheral_enabled(dev) \ + (le_enabled(dev) && cis_peripheral_capable(dev)) #define bis_capable(dev) ((dev)->le_features[3] & HCI_LE_ISO_BROADCASTER) -#define sync_recv_capable(dev) ((dev)->le_features[3] & HCI_LE_ISO_SYNC_RECEIVER) +#define bis_enabled(dev) (le_enabled(dev) && bis_capable(dev)) +#define sync_recv_capable(dev) \ + ((dev)->le_features[3] & HCI_LE_ISO_SYNC_RECEIVER) +#define sync_recv_enabled(dev) (le_enabled(dev) && sync_recv_capable(dev)) #define mws_transport_config_capable(dev) (((dev)->commands[30] & 0x08) && \ (!hci_test_quirk((dev), HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG))) -- cgit v1.2.3 From 3dcf7175f2c04bd3a7d50db3fa42a0bd933b6e23 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 4 Aug 2025 14:05:03 -0400 Subject: Bluetooth: hci_core: Fix using ll_privacy_capable for current settings ll_privacy_capable only indicates that the controller supports the feature but it doesnt' check that LE is enabled so it end up being marked as active in the current settings when it shouldn't. Fixes: ad383c2c65a5 ("Bluetooth: hci_sync: Enable advertising when LL privacy is enabled") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index bc29f2e2e16f..bb30bde6f0e8 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1934,6 +1934,7 @@ void hci_conn_del_sysfs(struct hci_conn *conn); ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_CODED)) #define ll_privacy_capable(dev) ((dev)->le_features[0] & HCI_LE_LL_PRIVACY) +#define ll_privacy_enabled(dev) (le_enabled(dev) && ll_privacy_capable(dev)) #define privacy_mode_capable(dev) (ll_privacy_capable(dev) && \ ((dev)->commands[39] & 0x04)) -- cgit v1.2.3 From a3de58b12ce074ec05b8741fa28d62ccb1070468 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 14 Aug 2025 22:45:50 +0100 Subject: netfs: Fix unbuffered write error handling If all the subrequests in an unbuffered write stream fail, the subrequest collector doesn't update the stream->transferred value and it retains its initial LONG_MAX value. Unfortunately, if all active streams fail, then we take the smallest value of { LONG_MAX, LONG_MAX, ... } as the value to set in wreq->transferred - which is then returned from ->write_iter(). LONG_MAX was chosen as the initial value so that all the streams can be quickly assessed by taking the smallest value of all stream->transferred - but this only works if we've set any of them. Fix this by adding a flag to indicate whether the value in stream->transferred is valid and checking that when we integrate the values. stream->transferred can then be initialised to zero. This was found by running the generic/750 xfstest against cifs with cache=none. It splices data to the target file. Once (if) it has used up all the available scratch space, the writes start failing with ENOSPC. This causes ->write_iter() to fail. However, it was returning wreq->transferred, i.e. LONG_MAX, rather than an error (because it thought the amount transferred was non-zero) and iter_file_splice_write() would then try to clean up that amount of pipe bufferage - leading to an oops when it overran. The kernel log showed: CIFS: VFS: Send error in write = -28 followed by: BUG: kernel NULL pointer dereference, address: 0000000000000008 with: RIP: 0010:iter_file_splice_write+0x3a4/0x520 do_splice+0x197/0x4e0 or: RIP: 0010:pipe_buf_release (include/linux/pipe_fs_i.h:282) iter_file_splice_write (fs/splice.c:755) Also put a warning check into splice to announce if ->write_iter() returned that it had written more than it was asked to. Fixes: 288ace2f57c9 ("netfs: New writeback implementation") Reported-by: Xiaoli Feng Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220445 Signed-off-by: David Howells Link: https://lore.kernel.org/915443.1755207950@warthog.procyon.org.uk cc: Paulo Alcantara cc: Steve French cc: Shyam Prasad N cc: netfs@lists.linux.dev cc: linux-cifs@vger.kernel.org cc: linux-fsdevel@vger.kernel.org cc: stable@vger.kernel.org Signed-off-by: Christian Brauner --- include/linux/netfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 185bd8196503..98c96d649bf9 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -150,6 +150,7 @@ struct netfs_io_stream { bool active; /* T if stream is active */ bool need_retry; /* T if this stream needs retrying */ bool failed; /* T if this stream failed */ + bool transferred_valid; /* T is ->transferred is valid */ }; /* -- cgit v1.2.3 From 9d4b01a0bf8d2163ae129c9c537cb0753ad5a2aa Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 14 Aug 2025 11:57:19 -0400 Subject: Bluetooth: hci_core: Fix not accounting for BIS/CIS/PA links separately This fixes the likes of hci_conn_num(CIS_LINK) returning the total of ISO connection which includes BIS_LINK as well, so this splits the iso_num into each link type and introduces hci_iso_num that can be used in places where the total number of ISO connection still needs to be used. Fixes: 23205562ffc8 ("Bluetooth: separate CIS_LINK and BIS_LINK link types") Fixes: a7bcffc673de ("Bluetooth: Add PA_LINK to distinguish BIG sync and PA sync connections") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index bb30bde6f0e8..6906af7a8f24 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -129,7 +129,9 @@ struct hci_conn_hash { struct list_head list; unsigned int acl_num; unsigned int sco_num; - unsigned int iso_num; + unsigned int cis_num; + unsigned int bis_num; + unsigned int pa_num; unsigned int le_num; unsigned int le_num_peripheral; }; @@ -1014,9 +1016,13 @@ static inline void hci_conn_hash_add(struct hci_dev *hdev, struct hci_conn *c) h->sco_num++; break; case CIS_LINK: + h->cis_num++; + break; case BIS_LINK: + h->bis_num++; + break; case PA_LINK: - h->iso_num++; + h->pa_num++; break; } } @@ -1042,9 +1048,13 @@ static inline void hci_conn_hash_del(struct hci_dev *hdev, struct hci_conn *c) h->sco_num--; break; case CIS_LINK: + h->cis_num--; + break; case BIS_LINK: + h->bis_num--; + break; case PA_LINK: - h->iso_num--; + h->pa_num--; break; } } @@ -1061,9 +1071,11 @@ static inline unsigned int hci_conn_num(struct hci_dev *hdev, __u8 type) case ESCO_LINK: return h->sco_num; case CIS_LINK: + return h->cis_num; case BIS_LINK: + return h->bis_num; case PA_LINK: - return h->iso_num; + return h->pa_num; default: return 0; } @@ -1073,7 +1085,15 @@ static inline unsigned int hci_conn_count(struct hci_dev *hdev) { struct hci_conn_hash *c = &hdev->conn_hash; - return c->acl_num + c->sco_num + c->le_num + c->iso_num; + return c->acl_num + c->sco_num + c->le_num + c->cis_num + c->bis_num + + c->pa_num; +} + +static inline unsigned int hci_iso_count(struct hci_dev *hdev) +{ + struct hci_conn_hash *c = &hdev->conn_hash; + + return c->cis_num + c->bis_num; } static inline bool hci_conn_valid(struct hci_dev *hdev, struct hci_conn *conn) -- cgit v1.2.3 From 2335b3f56690f76ac34b972fcaef368bab1f76f2 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 16 Jun 2025 23:41:53 -0700 Subject: net/mlx5: mlx5_ifc, Add hardware definitions needed for adjacent vports Next patches will implement the discovery and creation of adjacent functions vports, this patch introduces the hardware structures definitions needed for the driver implementation. Signed-off-by: Saeed Mahameed Reviewed-by: Mark Bloch Reviewed-by: Parav Pandit Reviewed-by: Jack Morgenstein Signed-off-by: Alexei Lazar --- include/linux/mlx5/mlx5_ifc.h | 133 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 129 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 8360d9011d4f..44d497272162 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -189,6 +189,9 @@ enum { MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS = 0x727, MLX5_CMD_OP_RELEASE_XRQ_ERROR = 0x729, MLX5_CMD_OP_MODIFY_XRQ = 0x72a, + MLX5_CMD_OPCODE_QUERY_DELEGATED_VHCA = 0x732, + MLX5_CMD_OPCODE_CREATE_ESW_VPORT = 0x733, + MLX5_CMD_OPCODE_DESTROY_ESW_VPORT = 0x734, MLX5_CMD_OP_QUERY_ESW_FUNCTIONS = 0x740, MLX5_CMD_OP_QUERY_VPORT_STATE = 0x750, MLX5_CMD_OP_MODIFY_VPORT_STATE = 0x751, @@ -2207,7 +2210,19 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_440[0x8]; u8 max_num_eqs_24b[0x18]; - u8 reserved_at_460[0x3a0]; + + u8 reserved_at_460[0x160]; + + u8 query_adjacent_functions_id[0x1]; + u8 ingress_egress_esw_vport_connect[0x1]; + u8 function_id_type_vhca_id[0x1]; + u8 reserved_at_5c3[0xd]; + u8 delegate_vhca_management_profiles[0x10]; + + u8 delegated_vhca_max[0x10]; + u8 delegate_vhca_max[0x10]; + + u8 reserved_at_600[0x200]; }; enum mlx5_ifc_flow_destination_type { @@ -5159,7 +5174,9 @@ struct mlx5_ifc_set_hca_cap_in_bits { u8 other_function[0x1]; u8 ec_vf_function[0x1]; - u8 reserved_at_42[0xe]; + u8 reserved_at_42[0x1]; + u8 function_id_type[0x1]; + u8 reserved_at_44[0xc]; u8 function_id[0x10]; u8 reserved_at_60[0x20]; @@ -6357,7 +6374,9 @@ struct mlx5_ifc_query_hca_cap_in_bits { u8 other_function[0x1]; u8 ec_vf_function[0x1]; - u8 reserved_at_42[0xe]; + u8 reserved_at_42[0x1]; + u8 function_id_type[0x1]; + u8 reserved_at_44[0xc]; u8 function_id[0x10]; u8 reserved_at_60[0x20]; @@ -6983,6 +7002,28 @@ struct mlx5_ifc_query_esw_vport_context_in_bits { u8 reserved_at_60[0x20]; }; +struct mlx5_ifc_destroy_esw_vport_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x20]; +}; + +struct mlx5_ifc_destroy_esw_vport_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x10]; + u8 vport_num[0x10]; + + u8 reserved_at_60[0x20]; +}; + struct mlx5_ifc_modify_esw_vport_context_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -7484,6 +7525,85 @@ struct mlx5_ifc_query_adapter_in_bits { u8 reserved_at_40[0x40]; }; +struct mlx5_ifc_function_vhca_rid_info_reg_bits { + u8 host_number[0x8]; + u8 host_pci_device_function[0x8]; + u8 host_pci_bus[0x8]; + u8 reserved_at_18[0x3]; + u8 pci_bus_assigned[0x1]; + u8 function_type[0x4]; + + u8 parent_pci_device_function[0x8]; + u8 parent_pci_bus[0x8]; + u8 vhca_id[0x10]; + + u8 reserved_at_40[0x10]; + u8 function_id[0x10]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_delegated_function_vhca_rid_info_bits { + struct mlx5_ifc_function_vhca_rid_info_reg_bits function_vhca_rid_info; + + u8 reserved_at_80[0x18]; + u8 manage_profile[0x8]; + + u8 reserved_at_a0[0x60]; +}; + +struct mlx5_ifc_query_delegated_vhca_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x20]; + + u8 reserved_at_60[0x10]; + u8 functions_count[0x10]; + + u8 reserved_at_80[0x80]; + + struct mlx5_ifc_delegated_function_vhca_rid_info_bits + delegated_function_vhca_rid_info[]; +}; + +struct mlx5_ifc_query_delegated_vhca_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_create_esw_vport_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x20]; + + u8 reserved_at_60[0x10]; + u8 vport_num[0x10]; +}; + +struct mlx5_ifc_create_esw_vport_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x10]; + u8 managed_vhca_id[0x10]; + + u8 reserved_at_60[0x20]; +}; + struct mlx5_ifc_qp_2rst_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -7611,7 +7731,12 @@ struct mlx5_ifc_modify_vport_state_in_bits { u8 reserved_at_41[0xf]; u8 vport_number[0x10]; - u8 reserved_at_60[0x18]; + u8 reserved_at_60[0x10]; + u8 ingress_connect[0x1]; + u8 egress_connect[0x1]; + u8 ingress_connect_valid[0x1]; + u8 egress_connect_valid[0x1]; + u8 reserved_at_74[0x4]; u8 admin_state[0x4]; u8 reserved_at_7c[0x4]; }; -- cgit v1.2.3 From 40653f280b2640e5caa94eeedee43e0f1df97704 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 16 Jun 2025 17:28:20 -0700 Subject: {rdma,net}/mlx5: export mlx5_vport_get_vhca_id vhca id is already cached in the vport structure no need to query on every mlx5 layer, use the mlx5_vport_get_vhca_id, where possible. Signed-off-by: Saeed Mahameed Reviewed-by: Mark Bloch Reviewed-by: Parav Pandit Signed-off-by: Alexei Lazar Reviewed-by: Feng Liu Reviewed-by: Tariq Toukan --- include/linux/mlx5/vport.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index c36cc6d82926..c87b9507cfa1 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -135,4 +135,6 @@ int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev); u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev); int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 vport, void *out, u16 opmod); +int mlx5_vport_get_vhca_id(struct mlx5_core_dev *dev, u16 vport, u16 *vhca_id); + #endif /* __MLX5_VPORT_H__ */ -- cgit v1.2.3 From c27973211ffcdf0a092eec265d5993e64b89adaf Mon Sep 17 00:00:00 2001 From: Xiao Ni Date: Fri, 15 Aug 2025 12:00:28 +0800 Subject: md: keep recovery_cp in mdp_superblock_s commit 907a99c314a5 ("md: rename recovery_cp to resync_offset") replaces recovery_cp with resync_offset in mdp_superblock_s which is in md_p.h. md_p.h is used in userspace too. So mdadm building fails because of this. This patch revert this change. Fixes: 907a99c314a5 ("md: rename recovery_cp to resync_offset") Signed-off-by: Xiao Ni Link: https://lore.kernel.org/linux-raid/20250815040028.18085-1-xni@redhat.com Signed-off-by: Yu Kuai --- include/uapi/linux/raid/md_p.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h index b13946287277..ac74133a4768 100644 --- a/include/uapi/linux/raid/md_p.h +++ b/include/uapi/linux/raid/md_p.h @@ -173,7 +173,7 @@ typedef struct mdp_superblock_s { #else #error unspecified endianness #endif - __u32 resync_offset; /* 11 resync checkpoint sector count */ + __u32 recovery_cp; /* 11 resync checkpoint sector count */ /* There are only valid for minor_version > 90 */ __u64 reshape_position; /* 12,13 next address in array-space for reshape */ __u32 new_level; /* 14 new level we are reshaping to */ -- cgit v1.2.3 From 450bbe43ef90a213d66fac1def64050d9d9ada8e Mon Sep 17 00:00:00 2001 From: Ashish Kalra Date: Mon, 21 Jul 2025 14:12:32 +0000 Subject: crypto: ccp - New bit-field definitions for SNP_PLATFORM_STATUS command Define new bit-field definitions returned by SNP_PLATFORM_STATUS command such as new capabilities like SNP_FEATURE_INFO command availability, ciphertext hiding enabled and capability. Reviewed-by: Tom Lendacky Signed-off-by: Ashish Kalra Reviewed-by: Kim Phillips Signed-off-by: Herbert Xu --- include/uapi/linux/psp-sev.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/psp-sev.h b/include/uapi/linux/psp-sev.h index eeb20dfb1fda..c2fd324623c4 100644 --- a/include/uapi/linux/psp-sev.h +++ b/include/uapi/linux/psp-sev.h @@ -185,6 +185,10 @@ struct sev_user_data_get_id2 { * @mask_chip_id: whether chip id is present in attestation reports or not * @mask_chip_key: whether attestation reports are signed or not * @vlek_en: VLEK (Version Loaded Endorsement Key) hashstick is loaded + * @feature_info: whether SNP_FEATURE_INFO command is available + * @rapl_dis: whether RAPL is disabled + * @ciphertext_hiding_cap: whether platform has ciphertext hiding capability + * @ciphertext_hiding_en: whether ciphertext hiding is enabled * @rsvd1: reserved * @guest_count: the number of guest currently managed by the firmware * @current_tcb_version: current TCB version @@ -200,7 +204,11 @@ struct sev_user_data_snp_status { __u32 mask_chip_id:1; /* Out */ __u32 mask_chip_key:1; /* Out */ __u32 vlek_en:1; /* Out */ - __u32 rsvd1:29; + __u32 feature_info:1; /* Out */ + __u32 rapl_dis:1; /* Out */ + __u32 ciphertext_hiding_cap:1; /* Out */ + __u32 ciphertext_hiding_en:1; /* Out */ + __u32 rsvd1:25; __u32 guest_count; /* Out */ __u64 current_tcb_version; /* Out */ __u64 reported_tcb_version; /* Out */ -- cgit v1.2.3 From 33cfb80d1910b41d1a25cef89b159c945aff0f24 Mon Sep 17 00:00:00 2001 From: Ashish Kalra Date: Mon, 21 Jul 2025 14:13:10 +0000 Subject: crypto: ccp - Add support for SNP_FEATURE_INFO command The FEATURE_INFO command provides hypervisors with a programmatic means to learn about the supported features of the currently loaded firmware. This command mimics the CPUID instruction relative to sub-leaf input and the four unsigned integer output values. To obtain information regarding the features present in the currently loaded SEV firmware, use the SNP_FEATURE_INFO command. Cache the SNP platform status and feature information from CPUID 0x8000_0024 in the sev_device structure. If SNP is enabled, utilize this cached SNP platform status for the API major, minor and build version. Reviewed-by: Tom Lendacky Signed-off-by: Ashish Kalra Reviewed-by: Kim Phillips Signed-off-by: Herbert Xu --- include/linux/psp-sev.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include') diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index 0f5f94137f6d..5fb6ae0f51cc 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -107,6 +107,7 @@ enum sev_cmd { SEV_CMD_SNP_DOWNLOAD_FIRMWARE_EX = 0x0CA, SEV_CMD_SNP_COMMIT = 0x0CB, SEV_CMD_SNP_VLEK_LOAD = 0x0CD, + SEV_CMD_SNP_FEATURE_INFO = 0x0CE, SEV_CMD_MAX, }; @@ -814,6 +815,34 @@ struct sev_data_snp_commit { u32 len; } __packed; +/** + * struct sev_data_snp_feature_info - SEV_SNP_FEATURE_INFO structure + * + * @length: len of the command buffer read by the PSP + * @ecx_in: subfunction index + * @feature_info_paddr : System Physical Address of the FEATURE_INFO structure + */ +struct sev_data_snp_feature_info { + u32 length; + u32 ecx_in; + u64 feature_info_paddr; +} __packed; + +/** + * struct feature_info - FEATURE_INFO structure + * + * @eax: output of SNP_FEATURE_INFO command + * @ebx: output of SNP_FEATURE_INFO command + * @ecx: output of SNP_FEATURE_INFO command + * #edx: output of SNP_FEATURE_INFO command + */ +struct snp_feature_info { + u32 eax; + u32 ebx; + u32 ecx; + u32 edx; +} __packed; + #ifdef CONFIG_CRYPTO_DEV_SP_PSP /** -- cgit v1.2.3 From 45d59bd4a3e0f0475b3646e8b9936d34794e503d Mon Sep 17 00:00:00 2001 From: Ashish Kalra Date: Mon, 21 Jul 2025 14:13:27 +0000 Subject: crypto: ccp - Introduce new API interface to indicate SEV-SNP Ciphertext hiding feature Implement an API that checks the overall feature support for SEV-SNP ciphertext hiding. This API verifies both the support of the SEV firmware for the feature and its enablement in the platform's BIOS. Reviewed-by: Tom Lendacky Signed-off-by: Ashish Kalra Reviewed-by: Kim Phillips Signed-off-by: Herbert Xu --- include/linux/psp-sev.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index 5fb6ae0f51cc..d83185b4268b 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -843,6 +843,8 @@ struct snp_feature_info { u32 edx; } __packed; +#define SNP_CIPHER_TEXT_HIDING_SUPPORTED BIT(3) + #ifdef CONFIG_CRYPTO_DEV_SP_PSP /** @@ -986,6 +988,7 @@ void *psp_copy_user_blob(u64 uaddr, u32 len); void *snp_alloc_firmware_page(gfp_t mask); void snp_free_firmware_page(void *addr); void sev_platform_shutdown(void); +bool sev_is_snp_ciphertext_hiding_supported(void); #else /* !CONFIG_CRYPTO_DEV_SP_PSP */ @@ -1022,6 +1025,8 @@ static inline void snp_free_firmware_page(void *addr) { } static inline void sev_platform_shutdown(void) { } +static inline bool sev_is_snp_ciphertext_hiding_supported(void) { return false; } + #endif /* CONFIG_CRYPTO_DEV_SP_PSP */ #endif /* __PSP_SEV_H__ */ -- cgit v1.2.3 From c9760b0fca6bfa250c02e14bfe81c542f3626a72 Mon Sep 17 00:00:00 2001 From: Ashish Kalra Date: Mon, 21 Jul 2025 14:13:55 +0000 Subject: crypto: ccp - Add support to enable CipherTextHiding on SNP_INIT_EX To enable ciphertext hiding, it must be specified in the SNP_INIT_EX command as part of SNP initialization. Modify the sev_platform_init_args structure, which is used as input to sev_platform_init(), to include a field that, when non-zero, indicates that ciphertext hiding should be enabled and specifies the maximum ASID that can be used for an SEV-SNP guest. Reviewed-by: Tom Lendacky Signed-off-by: Ashish Kalra Reviewed-by: Kim Phillips Signed-off-by: Herbert Xu --- include/linux/psp-sev.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index d83185b4268b..e0dbcb4b4fd9 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -748,10 +748,13 @@ struct sev_data_snp_guest_request { struct sev_data_snp_init_ex { u32 init_rmp:1; u32 list_paddr_en:1; - u32 rsvd:30; + u32 rapl_dis:1; + u32 ciphertext_hiding_en:1; + u32 rsvd:28; u32 rsvd1; u64 list_paddr; - u8 rsvd2[48]; + u16 max_snp_asid; + u8 rsvd2[46]; } __packed; /** @@ -800,10 +803,13 @@ struct sev_data_snp_shutdown_ex { * @probe: True if this is being called as part of CCP module probe, which * will defer SEV_INIT/SEV_INIT_EX firmware initialization until needed * unless psp_init_on_probe module param is set + * @max_snp_asid: When non-zero, enable ciphertext hiding and specify the + * maximum ASID that can be used for an SEV-SNP guest. */ struct sev_platform_init_args { int error; bool probe; + unsigned int max_snp_asid; }; /** -- cgit v1.2.3 From b76c739c3d11d1dacc8efe7fa873bee28ac991f1 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Tue, 22 Jul 2025 16:52:38 -0500 Subject: iio: fix iio_push_to_buffers_with_ts() typo Replace iio_push_to_buffer_with_ts() with iio_push_to_buffers_with_ts() in some documentation comments in iio.h. The latter is the correct name of the function, the former doesn't exist. Signed-off-by: David Lechner Link: https://patch.msgid.link/20250722-iio-fix-iio_push_to_buffer_with_ts-typo-v1-1-6ac9efb856d3@baylibre.com Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index d11668f14a3e..2f5560646ee4 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -779,7 +779,7 @@ static inline void *iio_device_get_drvdata(const struct iio_dev *indio_dev) * them safe for use with non-coherent DMA. * * A number of drivers also use this on buffers that include a 64-bit timestamp - * that is used with iio_push_to_buffer_with_ts(). Therefore, in the case where + * that is used with iio_push_to_buffers_with_ts(). Therefore, in the case where * DMA alignment is not sufficient for proper timestamp alignment, we align to * 8 bytes instead. */ @@ -794,7 +794,7 @@ static inline void *iio_device_get_drvdata(const struct iio_dev *indio_dev) * @name: identifier name of the buffer * @count: number of elements in the buffer * - * Declares a buffer that is safe to use with iio_push_to_buffer_with_ts(). In + * Declares a buffer that is safe to use with iio_push_to_buffers_with_ts(). In * addition to allocating enough space for @count elements of @type, it also * allocates space for a s64 timestamp at the end of the buffer and ensures * proper alignment of the timestamp. -- cgit v1.2.3 From 4bf1541e41d0540f9fcce8a32424ab05ae26fdca Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Mon, 30 Jun 2025 11:32:56 +0200 Subject: pcmcia: remove PCCARD_IODYN The config PCCARD_IODYN was last used in the config option PCMCIA_M8XX with its m8xx_pcmcia driver. This driver was removed with commit 39eb56da2b53 ("pcmcia: Remove m8xx_pcmcia driver"), included in v3.17, back in 2014. Since then, the config PCCARD_IODYN is unused. Remove the config option, the corresponding file included with this config and the corresponding definition in the pcmcia header file. Signed-off-by: Lukas Bulwahn Signed-off-by: Dominik Brodowski --- include/pcmcia/ss.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include') diff --git a/include/pcmcia/ss.h b/include/pcmcia/ss.h index 7cf7dbbfa131..89aed99bfeae 100644 --- a/include/pcmcia/ss.h +++ b/include/pcmcia/ss.h @@ -227,12 +227,8 @@ struct pcmcia_socket { /* socket drivers must define the resource operations type they use. There - * are three options: + * are two options: * - pccard_static_ops iomem and ioport areas are assigned statically - * - pccard_iodyn_ops iomem areas is assigned statically, ioport - * areas dynamically - * If this option is selected, use - * "select PCCARD_IODYN" in Kconfig. * - pccard_nonstatic_ops iomem and ioport areas are assigned dynamically. * If this option is selected, use * "select PCCARD_NONSTATIC" in Kconfig. @@ -240,13 +236,11 @@ struct pcmcia_socket { */ extern struct pccard_resource_ops pccard_static_ops; #if defined(CONFIG_PCMCIA) || defined(CONFIG_PCMCIA_MODULE) -extern struct pccard_resource_ops pccard_iodyn_ops; extern struct pccard_resource_ops pccard_nonstatic_ops; #else /* If PCMCIA is not used, but only CARDBUS, these functions are not used * at all. Therefore, do not use the large (240K!) rsrc_nonstatic module */ -#define pccard_iodyn_ops pccard_static_ops #define pccard_nonstatic_ops pccard_static_ops #endif -- cgit v1.2.3 From 4847d1187402a5027d9a04393f12d52a5a1d7f98 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Thu, 14 Aug 2025 09:24:41 +0200 Subject: console: introduce console_lock guard()s Having this, guards like these work: guard(console_lock)(); or scoped_guard(console_lock) { ... } See e.g. "vc_screen: use guard()s" later in this series. Signed-off-by: "Jiri Slaby (SUSE)" Link: https://lore.kernel.org/r/20250814072456.182853-2-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/console.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/console.h b/include/linux/console.h index 8f10d0a85bb4..031a58dc2b91 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -666,6 +666,8 @@ void vcs_remove_sysfs(int index); */ extern atomic_t ignore_console_lock_warning; +DEFINE_LOCK_GUARD_0(console_lock, console_lock(), console_unlock()); + extern void console_init(void); /* For deferred console takeover */ -- cgit v1.2.3 From e8398b8aed50382c21fcec77e80a5314e7c45c25 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Thu, 14 Aug 2025 09:24:42 +0200 Subject: tty: introduce tty_port_tty guard() Having this, guards like these work: scoped_guard(tty_port_tty, port) tty_wakeup(scoped_tty()); See e.g. "tty_port: use scoped_guard()" later in this series. The definitions depend on CONFIG_TTY. It's due to tty_kref_put(). On !CONFIG_TTY, it is an inline and its declaration would conflict. The guards are not needed in that case, of course. Signed-off-by: "Jiri Slaby (SUSE)" Link: https://lore.kernel.org/r/20250814072456.182853-3-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/tty_port.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/tty_port.h b/include/linux/tty_port.h index 332ddb93603e..660c254f1efe 100644 --- a/include/linux/tty_port.h +++ b/include/linux/tty_port.h @@ -270,4 +270,18 @@ static inline void tty_port_tty_vhangup(struct tty_port *port) __tty_port_tty_hangup(port, false, false); } +#ifdef CONFIG_TTY +void tty_kref_put(struct tty_struct *tty); +__DEFINE_CLASS_IS_CONDITIONAL(tty_port_tty, true); +__DEFINE_UNLOCK_GUARD(tty_port_tty, struct tty_struct, tty_kref_put(_T->lock)); +static inline class_tty_port_tty_t class_tty_port_tty_constructor(struct tty_port *tport) +{ + class_tty_port_tty_t _t = { + .lock = tty_port_tty_get(tport), + }; + return _t; +} +#define scoped_tty() ((struct tty_struct *)(__guard_ptr(tty_port_tty)(&scope))) +#endif + #endif -- cgit v1.2.3 From 0fd60b689b0dacce659253ec15cb3d3bf660e30b Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Thu, 14 Aug 2025 09:24:43 +0200 Subject: serial: introduce uart_port_lock() guard()s Having this, guards like these work: guard(uart_port_lock_irq)(&up->port); or scoped_guard(uart_port_lock_irqsave, port) { ... } See e.g. "serial: 8250: use guard()s" later in this series. Signed-off-by: "Jiri Slaby (SUSE)" Link: https://lore.kernel.org/r/20250814072456.182853-4-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 84b4648ead7e..666430b47899 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -788,6 +788,19 @@ static inline void uart_port_unlock_irqrestore(struct uart_port *up, unsigned lo spin_unlock_irqrestore(&up->lock, flags); } +DEFINE_GUARD(uart_port_lock, struct uart_port *, uart_port_lock(_T), uart_port_unlock(_T)); +DEFINE_GUARD_COND(uart_port_lock, _try, uart_port_trylock(_T)); + +DEFINE_GUARD(uart_port_lock_irq, struct uart_port *, uart_port_lock_irq(_T), + uart_port_unlock_irq(_T)); + +DEFINE_LOCK_GUARD_1(uart_port_lock_irqsave, struct uart_port, + uart_port_lock_irqsave(_T->lock, &_T->flags), + uart_port_unlock_irqrestore(_T->lock, _T->flags), + unsigned long flags); +DEFINE_LOCK_GUARD_1_COND(uart_port_lock_irqsave, _try, + uart_port_trylock_irqsave(_T->lock, &_T->flags)); + static inline int serial_port_in(struct uart_port *up, int offset) { return up->serial_in(up, offset); -- cgit v1.2.3 From 292cb391479d50f4379a0abab34324de92c82a92 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 13 Aug 2025 23:03:52 -0700 Subject: software node: Constify node_group in registration functions The software_node_register_node_group() and software_node_unregister_node_group() functions take in essence an array of pointers to software_node structs. Since the functions do not modify the array declare the argument as constant, so that static arrays can be declared as const and annotated as __initconst. Signed-off-by: Dmitry Torokhov Link: https://lore.kernel.org/r/2zny5grbgtwbplynxffxg6dkgjgqf45aigwmgxio5stesdr3wi@gf2zamk5amic Signed-off-by: Greg Kroah-Hartman --- include/linux/property.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/property.h b/include/linux/property.h index 82f0cb3abd1e..d1e80b3c9918 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -574,8 +574,8 @@ const struct software_node * software_node_find_by_name(const struct software_node *parent, const char *name); -int software_node_register_node_group(const struct software_node **node_group); -void software_node_unregister_node_group(const struct software_node **node_group); +int software_node_register_node_group(const struct software_node * const *node_group); +void software_node_unregister_node_group(const struct software_node * const *node_group); int software_node_register(const struct software_node *node); void software_node_unregister(const struct software_node *node); -- cgit v1.2.3 From a032fe30cf09b6723ab61a05aee057311b00f9e1 Mon Sep 17 00:00:00 2001 From: Dongcheng Yan Date: Fri, 25 Apr 2025 18:43:30 +0800 Subject: platform/x86: int3472: add hpd pin support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Typically HDMI to MIPI CSI-2 bridges have a pin to signal image data is being received. On the host side this is wired to a GPIO for polling or interrupts. This includes the Lontium HDMI to MIPI CSI-2 bridges lt6911uxe and lt6911uxc. The GPIO "hpd" is used already by other HDMI to CSI-2 bridges, use it here as well. Signed-off-by: Dongcheng Yan Reviewed-by: Sakari Ailus Acked-by: Ilpo Järvinen Reviewed-by: Hans de Goede Reviewed-by: Andy Shevchenko Fixes: 20244cbafbd6 ("media: i2c: change lt6911uxe irq_gpio name to "hpd"") Cc: stable@vger.kernel.org Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/linux/platform_data/x86/int3472.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/platform_data/x86/int3472.h b/include/linux/platform_data/x86/int3472.h index 78276a11c48d..1571e9157fa5 100644 --- a/include/linux/platform_data/x86/int3472.h +++ b/include/linux/platform_data/x86/int3472.h @@ -27,6 +27,7 @@ #define INT3472_GPIO_TYPE_CLK_ENABLE 0x0c #define INT3472_GPIO_TYPE_PRIVACY_LED 0x0d #define INT3472_GPIO_TYPE_HANDSHAKE 0x12 +#define INT3472_GPIO_TYPE_HOTPLUG_DETECT 0x13 #define INT3472_PDEV_MAX_NAME_LEN 23 #define INT3472_MAX_SENSOR_GPIOS 3 -- cgit v1.2.3 From 5576d8098052952a6c95af86ad3dcb341554ac75 Mon Sep 17 00:00:00 2001 From: Inbaraj E Date: Thu, 14 Aug 2025 19:39:32 +0530 Subject: dt-bindings: clock: Add CAM_CSI clock macro for FSD CAM_CSI block has ACLK, PCLK and PLL clocks. PCLK id is already assigned. To use PCLK and PLL clock in driver add id macro for CAM_CSI_PLL and CAM_CSI_PCLK. Signed-off-by: Inbaraj E Link: https://lore.kernel.org/r/20250814140943.22531-2-inbaraj.e@samsung.com Signed-off-by: Krzysztof Kozlowski --- include/dt-bindings/clock/fsd-clk.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/fsd-clk.h b/include/dt-bindings/clock/fsd-clk.h index 3f7b64d93558..58fdec8f4c2a 100644 --- a/include/dt-bindings/clock/fsd-clk.h +++ b/include/dt-bindings/clock/fsd-clk.h @@ -139,5 +139,18 @@ #define CAM_CSI2_1_IPCLKPORT_I_ACLK 10 #define CAM_CSI2_2_IPCLKPORT_I_ACLK 11 #define CAM_CSI2_3_IPCLKPORT_I_ACLK 12 +#define CAM_CSI_PLL 13 +#define CAM_CSI0_0_IPCLKPORT_I_PCLK 14 +#define CAM_CSI0_1_IPCLKPORT_I_PCLK 15 +#define CAM_CSI0_2_IPCLKPORT_I_PCLK 16 +#define CAM_CSI0_3_IPCLKPORT_I_PCLK 17 +#define CAM_CSI1_0_IPCLKPORT_I_PCLK 18 +#define CAM_CSI1_1_IPCLKPORT_I_PCLK 19 +#define CAM_CSI1_2_IPCLKPORT_I_PCLK 20 +#define CAM_CSI1_3_IPCLKPORT_I_PCLK 21 +#define CAM_CSI2_0_IPCLKPORT_I_PCLK 22 +#define CAM_CSI2_1_IPCLKPORT_I_PCLK 23 +#define CAM_CSI2_2_IPCLKPORT_I_PCLK 24 +#define CAM_CSI2_3_IPCLKPORT_I_PCLK 25 #endif /*_DT_BINDINGS_CLOCK_FSD_H */ -- cgit v1.2.3 From c0ed3c2edc7692c6b8af7578b41012694dc8c671 Mon Sep 17 00:00:00 2001 From: Shenghao Ding Date: Sat, 16 Aug 2025 12:27:41 +0800 Subject: ALSA: hda/tas2781: Add name prefix tas2781 for tas2781's dvc_tlv and amp_vol_tlv With some new devices adding into the driver, dvc_tlv and amp_vol_tlv will cause confusion for customers on which devices they support. Fixes: 5be27f1e3ec9 ("ALSA: hda/tas2781: Add tas2781 HDA driver") Signed-off-by: Shenghao Ding Link: https://patch.msgid.link/20250816042741.1659-1-shenghao-ding@ti.com Signed-off-by: Takashi Iwai --- include/sound/tas2781-tlv.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/sound/tas2781-tlv.h b/include/sound/tas2781-tlv.h index ef9b9f19d212..273224df9282 100644 --- a/include/sound/tas2781-tlv.h +++ b/include/sound/tas2781-tlv.h @@ -2,7 +2,7 @@ // // ALSA SoC Texas Instruments TAS2781 Audio Smart Amplifier // -// Copyright (C) 2022 - 2024 Texas Instruments Incorporated +// Copyright (C) 2022 - 2025 Texas Instruments Incorporated // https://www.ti.com // // The TAS2781 driver implements a flexible and configurable @@ -15,7 +15,7 @@ #ifndef __TAS2781_TLV_H__ #define __TAS2781_TLV_H__ -static const __maybe_unused DECLARE_TLV_DB_SCALE(dvc_tlv, -10000, 50, 0); -static const __maybe_unused DECLARE_TLV_DB_SCALE(amp_vol_tlv, 1100, 50, 0); +static const __maybe_unused DECLARE_TLV_DB_SCALE(tas2781_dvc_tlv, -10000, 50, 0); +static const __maybe_unused DECLARE_TLV_DB_SCALE(tas2781_amp_tlv, 1100, 50, 0); #endif -- cgit v1.2.3 From 894af4a1cde61c3401f237184fb770f72ff12df8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 12 Apr 2025 13:56:01 +0200 Subject: objtool: Validate kCFI calls Validate that all indirect calls adhere to kCFI rules. Notably doing nocfi indirect call to a cfi function is broken. Apparently some Rust 'core' code violates this and explodes when ran with FineIBT. All the ANNOTATE_NOCFI_SYM sites are prime targets for attackers. - runtime EFI is especially henous because it also needs to disable IBT. Basically calling unknown code without CFI protection at runtime is a massice security issue. - Kexec image handover; if you can exploit this, you get to keep it :-) Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Acked-by: Sean Christopherson Link: https://lkml.kernel.org/r/20250714103441.496787279@infradead.org --- include/linux/objtool.h | 10 ++++++++++ include/linux/objtool_types.h | 1 + 2 files changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 366ad004d794..46ebaa46e6c5 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -184,6 +184,15 @@ * WARN using UD2. */ #define ANNOTATE_REACHABLE(label) __ASM_ANNOTATE(label, ANNOTYPE_REACHABLE) +/* + * This should not be used; it annotates away CFI violations. There are a few + * valid use cases like kexec handover to the next kernel image, and there is + * no security concern there. + * + * There are also a few real issues annotated away, like EFI because we can't + * control the EFI code. + */ +#define ANNOTATE_NOCFI_SYM(sym) asm(__ASM_ANNOTATE(sym, ANNOTYPE_NOCFI)) #else #define ANNOTATE_NOENDBR ANNOTATE type=ANNOTYPE_NOENDBR @@ -194,6 +203,7 @@ #define ANNOTATE_INTRA_FUNCTION_CALL ANNOTATE type=ANNOTYPE_INTRA_FUNCTION_CALL #define ANNOTATE_UNRET_BEGIN ANNOTATE type=ANNOTYPE_UNRET_BEGIN #define ANNOTATE_REACHABLE ANNOTATE type=ANNOTYPE_REACHABLE +#define ANNOTATE_NOCFI_SYM ANNOTATE type=ANNOTYPE_NOCFI #endif #if defined(CONFIG_NOINSTR_VALIDATION) && \ diff --git a/include/linux/objtool_types.h b/include/linux/objtool_types.h index df5d9fa84dba..aceac94632c8 100644 --- a/include/linux/objtool_types.h +++ b/include/linux/objtool_types.h @@ -65,5 +65,6 @@ struct unwind_hint { #define ANNOTYPE_IGNORE_ALTS 6 #define ANNOTYPE_INTRA_FUNCTION_CALL 7 #define ANNOTYPE_REACHABLE 8 +#define ANNOTYPE_NOCFI 9 #endif /* _LINUX_OBJTOOL_TYPES_H */ -- cgit v1.2.3 From 8ea815399c3fcce1889bd951fec25b5b9a3979c1 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 14 Apr 2025 16:41:07 +0200 Subject: compiler: remove __ADDRESSABLE_ASM{_STR,}() again __ADDRESSABLE_ASM_STR() is where the necessary stringification happens. As long as "sym" doesn't contain any odd characters, no quoting is required for its use with .quad / .long. In fact the quotation gets in the way with gas 2.25; it's only from 2.26 onwards that quoted symbols are half-way properly supported. However, assembly being different from C anyway, drop __ADDRESSABLE_ASM_STR() and its helper macro altogether. A simple .global directive will suffice to get the symbol "declared", i.e. into the symbol table. While there also stop open-coding STATIC_CALL_TRAMP() and STATIC_CALL_KEY(). Fixes: 0ef8047b737d ("x86/static-call: provide a way to do very early static-call updates") Signed-off-by: Jan Beulich Acked-by: Josh Poimboeuf Cc: stable@vger.kernel.org Signed-off-by: Juergen Gross Message-ID: <609d2c74-de13-4fae-ab1a-1ec44afb948d@suse.com> --- include/linux/compiler.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 6f04a1d8c720..64ff73c533e5 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -288,14 +288,6 @@ static inline void *offset_to_ptr(const int *off) #define __ADDRESSABLE(sym) \ ___ADDRESSABLE(sym, __section(".discard.addressable")) -#define __ADDRESSABLE_ASM(sym) \ - .pushsection .discard.addressable,"aw"; \ - .align ARCH_SEL(8,4); \ - ARCH_SEL(.quad, .long) __stringify(sym); \ - .popsection; - -#define __ADDRESSABLE_ASM_STR(sym) __stringify(__ADDRESSABLE_ASM(sym)) - /* * This returns a constant expression while determining if an argument is * a constant expression, most importantly without evaluating the argument. -- cgit v1.2.3 From 89d912e494f786e79f69ed9d567a8842c71dbb03 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 14 Aug 2025 11:59:27 +0200 Subject: bpf: Add dynptr type for skb metadata Add a dynptr type, similar to skb dynptr, but for the skb metadata access. The dynptr provides an alternative to __sk_buff->data_meta for accessing the custom metadata area allocated using the bpf_xdp_adjust_meta() helper. More importantly, it abstracts away the fact where the storage for the custom metadata lives, which opens up the way to persist the metadata by relocating it as the skb travels through the network stack layers. Writes to skb metadata invalidate any existing skb payload and metadata slices. While this is more restrictive that needed at the moment, it leaves the door open to reallocating the metadata on writes, and should be only a minor inconvenience to the users. Only the program types which can access __sk_buff->data_meta today are allowed to create a dynptr for skb metadata at the moment. We need to modify the network stack to persist the metadata across layers before opening up access to other BPF hooks. Once more BPF hooks gain access to skb_meta dynptr, we will also need to add a read-only variant of the helper similar to bpf_dynptr_from_skb_rdonly. skb_meta dynptr ops are stubbed out and implemented by subsequent changes. Signed-off-by: Jakub Sitnicki Signed-off-by: Martin KaFai Lau Reviewed-by: Jesse Brandeburg Link: https://patch.msgid.link/20250814-skb-metadata-thru-dynptr-v7-1-8a39e636e0fb@cloudflare.com --- include/linux/bpf.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index cc700925b802..ec527b476dba 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -767,12 +767,15 @@ enum bpf_type_flag { */ MEM_WRITE = BIT(18 + BPF_BASE_TYPE_BITS), + /* DYNPTR points to skb_metadata_end()-skb_metadata_len() */ + DYNPTR_TYPE_SKB_META = BIT(19 + BPF_BASE_TYPE_BITS), + __BPF_TYPE_FLAG_MAX, __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, }; #define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF | DYNPTR_TYPE_SKB \ - | DYNPTR_TYPE_XDP) + | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META) /* Max number of base types. */ #define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS) @@ -1358,6 +1361,8 @@ enum bpf_dynptr_type { BPF_DYNPTR_TYPE_SKB, /* Underlying data is a xdp_buff */ BPF_DYNPTR_TYPE_XDP, + /* Points to skb_metadata_end()-skb_metadata_len() */ + BPF_DYNPTR_TYPE_SKB_META, }; int bpf_dynptr_check_size(u32 size); -- cgit v1.2.3 From 6877cd392baecf816c2ba896a9d42874628004a5 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 14 Aug 2025 11:59:28 +0200 Subject: bpf: Enable read/write access to skb metadata through a dynptr Now that we can create a dynptr to skb metadata, make reads to the metadata area possible with bpf_dynptr_read() or through a bpf_dynptr_slice(), and make writes to the metadata area possible with bpf_dynptr_write() or through a bpf_dynptr_slice_rdwr(). Note that for cloned skbs which share data with the original, we limit the skb metadata dynptr to be read-only since we don't unclone on a bpf_dynptr_write to metadata. Signed-off-by: Jakub Sitnicki Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20250814-skb-metadata-thru-dynptr-v7-2-8a39e636e0fb@cloudflare.com --- include/linux/filter.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 1e7fd3ee759e..9ed21b65e2e9 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1784,6 +1784,7 @@ int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len); void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len); void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, void *buf, unsigned long len, bool flush); +void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset); #else /* CONFIG_NET */ static inline int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len) @@ -1818,6 +1819,11 @@ static inline void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, voi unsigned long len, bool flush) { } + +static inline void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset) +{ + return NULL; +} #endif /* CONFIG_NET */ #endif /* __LINUX_FILTER_H__ */ -- cgit v1.2.3 From e5eb72c92eb724aa14c50c7d92d1a576dd50d7e6 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 14 Aug 2025 19:32:18 +0200 Subject: scsi: libsas: Add dev_parent_is_expander() helper Many libsas drivers check if the parent of the device is an expander. Create a helper that the libsas drivers will use in follow up commits. Suggested-by: Damien Le Moal Signed-off-by: Niklas Cassel Link: https://lore.kernel.org/r/20250814173215.1765055-15-cassel@kernel.org Reviewed-by: Damien Le Moal Reviewed-by: John Garry Reviewed-by: Jason Yan Signed-off-by: Martin K. Petersen --- include/scsi/libsas.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index ba460b6c0374..8d38565e99fa 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -203,6 +203,14 @@ static inline bool dev_is_expander(enum sas_device_type type) type == SAS_FANOUT_EXPANDER_DEVICE; } +static inline bool dev_parent_is_expander(struct domain_device *dev) +{ + if (!dev->parent) + return false; + + return dev_is_expander(dev->parent->dev_type); +} + static inline void INIT_SAS_WORK(struct sas_work *sw, void (*fn)(struct work_struct *)) { INIT_WORK(&sw->work, fn); -- cgit v1.2.3 From dcb8d01b65fb5a891ddbbedcbe6eff0b8ec37867 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 18 Jul 2025 19:13:39 +0300 Subject: dt-bindings: power: qcom-rpmpd: split RPMh domains definitions Historically both RPM and RPMh domain definitions were a part of the same, qcom-rpmpd.h header. Now as we have a separate header for RPMh definitions, qcom,rpmhpd.h, move all RPMh power domain definitions to that header. Signed-off-by: Dmitry Baryshkov Acked-by: Rob Herring (Arm) Reviewed-by: Konrad Dybcio Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20250718-rework-rpmhpd-rpmpd-v1-1-eedca108e540@oss.qualcomm.com Signed-off-by: Ulf Hansson --- include/dt-bindings/power/qcom,rpmhpd.h | 233 ++++++++++++++++++++++++++++++++ include/dt-bindings/power/qcom-rpmpd.h | 228 +------------------------------ 2 files changed, 234 insertions(+), 227 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/power/qcom,rpmhpd.h b/include/dt-bindings/power/qcom,rpmhpd.h index e54ffa361451..73cceb88953f 100644 --- a/include/dt-bindings/power/qcom,rpmhpd.h +++ b/include/dt-bindings/power/qcom,rpmhpd.h @@ -29,4 +29,237 @@ #define RPMHPD_NSP2 19 #define RPMHPD_GMXC 20 +/* RPMh Power Domain performance levels */ +#define RPMH_REGULATOR_LEVEL_RETENTION 16 +#define RPMH_REGULATOR_LEVEL_MIN_SVS 48 +#define RPMH_REGULATOR_LEVEL_LOW_SVS_D3 50 +#define RPMH_REGULATOR_LEVEL_LOW_SVS_D2 52 +#define RPMH_REGULATOR_LEVEL_LOW_SVS_D1 56 +#define RPMH_REGULATOR_LEVEL_LOW_SVS_D0 60 +#define RPMH_REGULATOR_LEVEL_LOW_SVS 64 +#define RPMH_REGULATOR_LEVEL_LOW_SVS_P1 72 +#define RPMH_REGULATOR_LEVEL_LOW_SVS_L1 80 +#define RPMH_REGULATOR_LEVEL_LOW_SVS_L2 96 +#define RPMH_REGULATOR_LEVEL_SVS 128 +#define RPMH_REGULATOR_LEVEL_SVS_L0 144 +#define RPMH_REGULATOR_LEVEL_SVS_L1 192 +#define RPMH_REGULATOR_LEVEL_SVS_L2 224 +#define RPMH_REGULATOR_LEVEL_NOM 256 +#define RPMH_REGULATOR_LEVEL_NOM_L0 288 +#define RPMH_REGULATOR_LEVEL_NOM_L1 320 +#define RPMH_REGULATOR_LEVEL_NOM_L2 336 +#define RPMH_REGULATOR_LEVEL_TURBO 384 +#define RPMH_REGULATOR_LEVEL_TURBO_L0 400 +#define RPMH_REGULATOR_LEVEL_TURBO_L1 416 +#define RPMH_REGULATOR_LEVEL_TURBO_L2 432 +#define RPMH_REGULATOR_LEVEL_TURBO_L3 448 +#define RPMH_REGULATOR_LEVEL_TURBO_L4 452 +#define RPMH_REGULATOR_LEVEL_TURBO_L5 456 +#define RPMH_REGULATOR_LEVEL_SUPER_TURBO 464 +#define RPMH_REGULATOR_LEVEL_SUPER_TURBO_NO_CPR 480 + +/* + * Platform-specific power domain bindings. Don't add new entries here, use + * RPMHPD_* above. + */ + +/* SA8775P Power Domain Indexes */ +#define SA8775P_CX 0 +#define SA8775P_CX_AO 1 +#define SA8775P_DDR 2 +#define SA8775P_EBI 3 +#define SA8775P_GFX 4 +#define SA8775P_LCX 5 +#define SA8775P_LMX 6 +#define SA8775P_MMCX 7 +#define SA8775P_MMCX_AO 8 +#define SA8775P_MSS 9 +#define SA8775P_MX 10 +#define SA8775P_MX_AO 11 +#define SA8775P_MXC 12 +#define SA8775P_MXC_AO 13 +#define SA8775P_NSP0 14 +#define SA8775P_NSP1 15 +#define SA8775P_XO 16 + +/* SDM670 Power Domain Indexes */ +#define SDM670_MX 0 +#define SDM670_MX_AO 1 +#define SDM670_CX 2 +#define SDM670_CX_AO 3 +#define SDM670_LMX 4 +#define SDM670_LCX 5 +#define SDM670_GFX 6 +#define SDM670_MSS 7 + +/* SDM845 Power Domain Indexes */ +#define SDM845_EBI 0 +#define SDM845_MX 1 +#define SDM845_MX_AO 2 +#define SDM845_CX 3 +#define SDM845_CX_AO 4 +#define SDM845_LMX 5 +#define SDM845_LCX 6 +#define SDM845_GFX 7 +#define SDM845_MSS 8 + +/* SDX55 Power Domain Indexes */ +#define SDX55_MSS 0 +#define SDX55_MX 1 +#define SDX55_CX 2 + +/* SDX65 Power Domain Indexes */ +#define SDX65_MSS 0 +#define SDX65_MX 1 +#define SDX65_MX_AO 2 +#define SDX65_CX 3 +#define SDX65_CX_AO 4 +#define SDX65_MXC 5 + +/* SM6350 Power Domain Indexes */ +#define SM6350_CX 0 +#define SM6350_GFX 1 +#define SM6350_LCX 2 +#define SM6350_LMX 3 +#define SM6350_MSS 4 +#define SM6350_MX 5 + +/* SM8150 Power Domain Indexes */ +#define SM8150_MSS 0 +#define SM8150_EBI 1 +#define SM8150_LMX 2 +#define SM8150_LCX 3 +#define SM8150_GFX 4 +#define SM8150_MX 5 +#define SM8150_MX_AO 6 +#define SM8150_CX 7 +#define SM8150_CX_AO 8 +#define SM8150_MMCX 9 +#define SM8150_MMCX_AO 10 + +/* SA8155P is a special case, kept for backwards compatibility */ +#define SA8155P_CX SM8150_CX +#define SA8155P_CX_AO SM8150_CX_AO +#define SA8155P_EBI SM8150_EBI +#define SA8155P_GFX SM8150_GFX +#define SA8155P_MSS SM8150_MSS +#define SA8155P_MX SM8150_MX +#define SA8155P_MX_AO SM8150_MX_AO + +/* SM8250 Power Domain Indexes */ +#define SM8250_CX 0 +#define SM8250_CX_AO 1 +#define SM8250_EBI 2 +#define SM8250_GFX 3 +#define SM8250_LCX 4 +#define SM8250_LMX 5 +#define SM8250_MMCX 6 +#define SM8250_MMCX_AO 7 +#define SM8250_MX 8 +#define SM8250_MX_AO 9 + +/* SM8350 Power Domain Indexes */ +#define SM8350_CX 0 +#define SM8350_CX_AO 1 +#define SM8350_EBI 2 +#define SM8350_GFX 3 +#define SM8350_LCX 4 +#define SM8350_LMX 5 +#define SM8350_MMCX 6 +#define SM8350_MMCX_AO 7 +#define SM8350_MX 8 +#define SM8350_MX_AO 9 +#define SM8350_MXC 10 +#define SM8350_MXC_AO 11 +#define SM8350_MSS 12 + +/* SM8450 Power Domain Indexes */ +#define SM8450_CX 0 +#define SM8450_CX_AO 1 +#define SM8450_EBI 2 +#define SM8450_GFX 3 +#define SM8450_LCX 4 +#define SM8450_LMX 5 +#define SM8450_MMCX 6 +#define SM8450_MMCX_AO 7 +#define SM8450_MX 8 +#define SM8450_MX_AO 9 +#define SM8450_MXC 10 +#define SM8450_MXC_AO 11 +#define SM8450_MSS 12 + +/* SM8550 Power Domain Indexes */ +#define SM8550_CX 0 +#define SM8550_CX_AO 1 +#define SM8550_EBI 2 +#define SM8550_GFX 3 +#define SM8550_LCX 4 +#define SM8550_LMX 5 +#define SM8550_MMCX 6 +#define SM8550_MMCX_AO 7 +#define SM8550_MX 8 +#define SM8550_MX_AO 9 +#define SM8550_MXC 10 +#define SM8550_MXC_AO 11 +#define SM8550_MSS 12 +#define SM8550_NSP 13 + +/* QDU1000/QRU1000 Power Domain Indexes */ +#define QDU1000_EBI 0 +#define QDU1000_MSS 1 +#define QDU1000_CX 2 +#define QDU1000_MX 3 + +/* SC7180 Power Domain Indexes */ +#define SC7180_CX 0 +#define SC7180_CX_AO 1 +#define SC7180_GFX 2 +#define SC7180_MX 3 +#define SC7180_MX_AO 4 +#define SC7180_LMX 5 +#define SC7180_LCX 6 +#define SC7180_MSS 7 + +/* SC7280 Power Domain Indexes */ +#define SC7280_CX 0 +#define SC7280_CX_AO 1 +#define SC7280_EBI 2 +#define SC7280_GFX 3 +#define SC7280_MX 4 +#define SC7280_MX_AO 5 +#define SC7280_LMX 6 +#define SC7280_LCX 7 +#define SC7280_MSS 8 + +/* SC8180X Power Domain Indexes */ +#define SC8180X_CX 0 +#define SC8180X_CX_AO 1 +#define SC8180X_EBI 2 +#define SC8180X_GFX 3 +#define SC8180X_LCX 4 +#define SC8180X_LMX 5 +#define SC8180X_MMCX 6 +#define SC8180X_MMCX_AO 7 +#define SC8180X_MSS 8 +#define SC8180X_MX 9 +#define SC8180X_MX_AO 10 + +/* SC8280XP Power Domain Indexes */ +#define SC8280XP_CX 0 +#define SC8280XP_CX_AO 1 +#define SC8280XP_DDR 2 +#define SC8280XP_EBI 3 +#define SC8280XP_GFX 4 +#define SC8280XP_LCX 5 +#define SC8280XP_LMX 6 +#define SC8280XP_MMCX 7 +#define SC8280XP_MMCX_AO 8 +#define SC8280XP_MSS 9 +#define SC8280XP_MX 10 +#define SC8280XP_MXC 12 +#define SC8280XP_MX_AO 11 +#define SC8280XP_NSP 13 +#define SC8280XP_QPHY 14 +#define SC8280XP_XO 15 + #endif diff --git a/include/dt-bindings/power/qcom-rpmpd.h b/include/dt-bindings/power/qcom-rpmpd.h index f15bcee7c928..d303b3b37f18 100644 --- a/include/dt-bindings/power/qcom-rpmpd.h +++ b/include/dt-bindings/power/qcom-rpmpd.h @@ -4,66 +4,7 @@ #ifndef _DT_BINDINGS_POWER_QCOM_RPMPD_H #define _DT_BINDINGS_POWER_QCOM_RPMPD_H -/* SA8775P Power Domain Indexes */ -#define SA8775P_CX 0 -#define SA8775P_CX_AO 1 -#define SA8775P_DDR 2 -#define SA8775P_EBI 3 -#define SA8775P_GFX 4 -#define SA8775P_LCX 5 -#define SA8775P_LMX 6 -#define SA8775P_MMCX 7 -#define SA8775P_MMCX_AO 8 -#define SA8775P_MSS 9 -#define SA8775P_MX 10 -#define SA8775P_MX_AO 11 -#define SA8775P_MXC 12 -#define SA8775P_MXC_AO 13 -#define SA8775P_NSP0 14 -#define SA8775P_NSP1 15 -#define SA8775P_XO 16 - -/* SDM670 Power Domain Indexes */ -#define SDM670_MX 0 -#define SDM670_MX_AO 1 -#define SDM670_CX 2 -#define SDM670_CX_AO 3 -#define SDM670_LMX 4 -#define SDM670_LCX 5 -#define SDM670_GFX 6 -#define SDM670_MSS 7 - -/* SDM845 Power Domain Indexes */ -#define SDM845_EBI 0 -#define SDM845_MX 1 -#define SDM845_MX_AO 2 -#define SDM845_CX 3 -#define SDM845_CX_AO 4 -#define SDM845_LMX 5 -#define SDM845_LCX 6 -#define SDM845_GFX 7 -#define SDM845_MSS 8 - -/* SDX55 Power Domain Indexes */ -#define SDX55_MSS 0 -#define SDX55_MX 1 -#define SDX55_CX 2 - -/* SDX65 Power Domain Indexes */ -#define SDX65_MSS 0 -#define SDX65_MX 1 -#define SDX65_MX_AO 2 -#define SDX65_CX 3 -#define SDX65_CX_AO 4 -#define SDX65_MXC 5 - -/* SM6350 Power Domain Indexes */ -#define SM6350_CX 0 -#define SM6350_GFX 1 -#define SM6350_LCX 2 -#define SM6350_LMX 3 -#define SM6350_MSS 4 -#define SM6350_MX 5 +#include /* SM6375 Power Domain Indexes */ #define SM6375_VDDCX 0 @@ -77,173 +18,6 @@ #define SM6375_VDD_LPI_CX 8 #define SM6375_VDD_LPI_MX 9 -/* SM8150 Power Domain Indexes */ -#define SM8150_MSS 0 -#define SM8150_EBI 1 -#define SM8150_LMX 2 -#define SM8150_LCX 3 -#define SM8150_GFX 4 -#define SM8150_MX 5 -#define SM8150_MX_AO 6 -#define SM8150_CX 7 -#define SM8150_CX_AO 8 -#define SM8150_MMCX 9 -#define SM8150_MMCX_AO 10 - -/* SA8155P is a special case, kept for backwards compatibility */ -#define SA8155P_CX SM8150_CX -#define SA8155P_CX_AO SM8150_CX_AO -#define SA8155P_EBI SM8150_EBI -#define SA8155P_GFX SM8150_GFX -#define SA8155P_MSS SM8150_MSS -#define SA8155P_MX SM8150_MX -#define SA8155P_MX_AO SM8150_MX_AO - -/* SM8250 Power Domain Indexes */ -#define SM8250_CX 0 -#define SM8250_CX_AO 1 -#define SM8250_EBI 2 -#define SM8250_GFX 3 -#define SM8250_LCX 4 -#define SM8250_LMX 5 -#define SM8250_MMCX 6 -#define SM8250_MMCX_AO 7 -#define SM8250_MX 8 -#define SM8250_MX_AO 9 - -/* SM8350 Power Domain Indexes */ -#define SM8350_CX 0 -#define SM8350_CX_AO 1 -#define SM8350_EBI 2 -#define SM8350_GFX 3 -#define SM8350_LCX 4 -#define SM8350_LMX 5 -#define SM8350_MMCX 6 -#define SM8350_MMCX_AO 7 -#define SM8350_MX 8 -#define SM8350_MX_AO 9 -#define SM8350_MXC 10 -#define SM8350_MXC_AO 11 -#define SM8350_MSS 12 - -/* SM8450 Power Domain Indexes */ -#define SM8450_CX 0 -#define SM8450_CX_AO 1 -#define SM8450_EBI 2 -#define SM8450_GFX 3 -#define SM8450_LCX 4 -#define SM8450_LMX 5 -#define SM8450_MMCX 6 -#define SM8450_MMCX_AO 7 -#define SM8450_MX 8 -#define SM8450_MX_AO 9 -#define SM8450_MXC 10 -#define SM8450_MXC_AO 11 -#define SM8450_MSS 12 - -/* SM8550 Power Domain Indexes */ -#define SM8550_CX 0 -#define SM8550_CX_AO 1 -#define SM8550_EBI 2 -#define SM8550_GFX 3 -#define SM8550_LCX 4 -#define SM8550_LMX 5 -#define SM8550_MMCX 6 -#define SM8550_MMCX_AO 7 -#define SM8550_MX 8 -#define SM8550_MX_AO 9 -#define SM8550_MXC 10 -#define SM8550_MXC_AO 11 -#define SM8550_MSS 12 -#define SM8550_NSP 13 - -/* QDU1000/QRU1000 Power Domain Indexes */ -#define QDU1000_EBI 0 -#define QDU1000_MSS 1 -#define QDU1000_CX 2 -#define QDU1000_MX 3 - -/* SC7180 Power Domain Indexes */ -#define SC7180_CX 0 -#define SC7180_CX_AO 1 -#define SC7180_GFX 2 -#define SC7180_MX 3 -#define SC7180_MX_AO 4 -#define SC7180_LMX 5 -#define SC7180_LCX 6 -#define SC7180_MSS 7 - -/* SC7280 Power Domain Indexes */ -#define SC7280_CX 0 -#define SC7280_CX_AO 1 -#define SC7280_EBI 2 -#define SC7280_GFX 3 -#define SC7280_MX 4 -#define SC7280_MX_AO 5 -#define SC7280_LMX 6 -#define SC7280_LCX 7 -#define SC7280_MSS 8 - -/* SC8180X Power Domain Indexes */ -#define SC8180X_CX 0 -#define SC8180X_CX_AO 1 -#define SC8180X_EBI 2 -#define SC8180X_GFX 3 -#define SC8180X_LCX 4 -#define SC8180X_LMX 5 -#define SC8180X_MMCX 6 -#define SC8180X_MMCX_AO 7 -#define SC8180X_MSS 8 -#define SC8180X_MX 9 -#define SC8180X_MX_AO 10 - -/* SC8280XP Power Domain Indexes */ -#define SC8280XP_CX 0 -#define SC8280XP_CX_AO 1 -#define SC8280XP_DDR 2 -#define SC8280XP_EBI 3 -#define SC8280XP_GFX 4 -#define SC8280XP_LCX 5 -#define SC8280XP_LMX 6 -#define SC8280XP_MMCX 7 -#define SC8280XP_MMCX_AO 8 -#define SC8280XP_MSS 9 -#define SC8280XP_MX 10 -#define SC8280XP_MXC 12 -#define SC8280XP_MX_AO 11 -#define SC8280XP_NSP 13 -#define SC8280XP_QPHY 14 -#define SC8280XP_XO 15 - -/* SDM845 Power Domain performance levels */ -#define RPMH_REGULATOR_LEVEL_RETENTION 16 -#define RPMH_REGULATOR_LEVEL_MIN_SVS 48 -#define RPMH_REGULATOR_LEVEL_LOW_SVS_D3 50 -#define RPMH_REGULATOR_LEVEL_LOW_SVS_D2 52 -#define RPMH_REGULATOR_LEVEL_LOW_SVS_D1 56 -#define RPMH_REGULATOR_LEVEL_LOW_SVS_D0 60 -#define RPMH_REGULATOR_LEVEL_LOW_SVS 64 -#define RPMH_REGULATOR_LEVEL_LOW_SVS_P1 72 -#define RPMH_REGULATOR_LEVEL_LOW_SVS_L1 80 -#define RPMH_REGULATOR_LEVEL_LOW_SVS_L2 96 -#define RPMH_REGULATOR_LEVEL_SVS 128 -#define RPMH_REGULATOR_LEVEL_SVS_L0 144 -#define RPMH_REGULATOR_LEVEL_SVS_L1 192 -#define RPMH_REGULATOR_LEVEL_SVS_L2 224 -#define RPMH_REGULATOR_LEVEL_NOM 256 -#define RPMH_REGULATOR_LEVEL_NOM_L0 288 -#define RPMH_REGULATOR_LEVEL_NOM_L1 320 -#define RPMH_REGULATOR_LEVEL_NOM_L2 336 -#define RPMH_REGULATOR_LEVEL_TURBO 384 -#define RPMH_REGULATOR_LEVEL_TURBO_L0 400 -#define RPMH_REGULATOR_LEVEL_TURBO_L1 416 -#define RPMH_REGULATOR_LEVEL_TURBO_L2 432 -#define RPMH_REGULATOR_LEVEL_TURBO_L3 448 -#define RPMH_REGULATOR_LEVEL_TURBO_L4 452 -#define RPMH_REGULATOR_LEVEL_TURBO_L5 456 -#define RPMH_REGULATOR_LEVEL_SUPER_TURBO 464 -#define RPMH_REGULATOR_LEVEL_SUPER_TURBO_NO_CPR 480 - /* MDM9607 Power Domains */ #define MDM9607_VDDCX 0 #define MDM9607_VDDCX_AO 1 -- cgit v1.2.3 From e6e1e3b6b8f9b9b78aa0dccdde431145cefb05f5 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 18 Jul 2025 19:13:40 +0300 Subject: dt-bindings: power: qcom-rpmpd: sort out entries After removing RPMh PD indices, it becomes obvious that several entries don't follow the alphabetic sorting order. Move them in order to keep the file sorted. Signed-off-by: Dmitry Baryshkov Acked-by: Rob Herring (Arm) Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20250718-rework-rpmhpd-rpmpd-v1-2-eedca108e540@oss.qualcomm.com Signed-off-by: Ulf Hansson --- include/dt-bindings/power/qcom-rpmpd.h | 42 +++++++++++++++++----------------- 1 file changed, 21 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/power/qcom-rpmpd.h b/include/dt-bindings/power/qcom-rpmpd.h index d303b3b37f18..65f7d5ecc352 100644 --- a/include/dt-bindings/power/qcom-rpmpd.h +++ b/include/dt-bindings/power/qcom-rpmpd.h @@ -6,18 +6,6 @@ #include -/* SM6375 Power Domain Indexes */ -#define SM6375_VDDCX 0 -#define SM6375_VDDCX_AO 1 -#define SM6375_VDDCX_VFL 2 -#define SM6375_VDDMX 3 -#define SM6375_VDDMX_AO 4 -#define SM6375_VDDMX_VFL 5 -#define SM6375_VDDGX 6 -#define SM6375_VDDGX_AO 7 -#define SM6375_VDD_LPI_CX 8 -#define SM6375_VDD_LPI_MX 9 - /* MDM9607 Power Domains */ #define MDM9607_VDDCX 0 #define MDM9607_VDDCX_AO 1 @@ -130,6 +118,16 @@ #define MSM8998_SSCMX 8 #define MSM8998_SSCMX_VFL 9 +/* QCM2290 Power Domains */ +#define QCM2290_VDDCX 0 +#define QCM2290_VDDCX_AO 1 +#define QCM2290_VDDCX_VFL 2 +#define QCM2290_VDDMX 3 +#define QCM2290_VDDMX_AO 4 +#define QCM2290_VDDMX_VFL 5 +#define QCM2290_VDD_LPI_CX 6 +#define QCM2290_VDD_LPI_MX 7 + /* QCS404 Power Domains */ #define QCS404_VDDMX 0 #define QCS404_VDDMX_AO 1 @@ -169,15 +167,17 @@ #define SM6125_VDDMX_AO 4 #define SM6125_VDDMX_VFL 5 -/* QCM2290 Power Domains */ -#define QCM2290_VDDCX 0 -#define QCM2290_VDDCX_AO 1 -#define QCM2290_VDDCX_VFL 2 -#define QCM2290_VDDMX 3 -#define QCM2290_VDDMX_AO 4 -#define QCM2290_VDDMX_VFL 5 -#define QCM2290_VDD_LPI_CX 6 -#define QCM2290_VDD_LPI_MX 7 +/* SM6375 Power Domain Indexes */ +#define SM6375_VDDCX 0 +#define SM6375_VDDCX_AO 1 +#define SM6375_VDDCX_VFL 2 +#define SM6375_VDDMX 3 +#define SM6375_VDDMX_AO 4 +#define SM6375_VDDMX_VFL 5 +#define SM6375_VDDGX 6 +#define SM6375_VDDGX_AO 7 +#define SM6375_VDD_LPI_CX 8 +#define SM6375_VDD_LPI_MX 9 /* RPM SMD Power Domain performance levels */ #define RPM_SMD_LEVEL_RETENTION 16 -- cgit v1.2.3 From 94838f383a050e124c044e74a954777b8f2e6c17 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 18 Jul 2025 19:13:41 +0300 Subject: dt-bindings: power: qcom-rpmpd: add generic bindings for RPM power domains Some of the Qualcomm RPM PD controllers use a common set of indices for power domains. Add generic indices for Qualcomm RPM power domain controllers. Signed-off-by: Dmitry Baryshkov Acked-by: Rob Herring (Arm) Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20250718-rework-rpmhpd-rpmpd-v1-3-eedca108e540@oss.qualcomm.com Signed-off-by: Ulf Hansson --- include/dt-bindings/power/qcom-rpmpd.h | 121 +++++++++++++++++++-------------- 1 file changed, 70 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/power/qcom-rpmpd.h b/include/dt-bindings/power/qcom-rpmpd.h index 65f7d5ecc352..4371ac941f29 100644 --- a/include/dt-bindings/power/qcom-rpmpd.h +++ b/include/dt-bindings/power/qcom-rpmpd.h @@ -6,18 +6,37 @@ #include +/* Generic RPM Power Domain Indexes */ +#define RPMPD_VDDCX 0 +#define RPMPD_VDDCX_AO 1 +/* VFC and VFL are mutually exclusive and can not be present on the same platform */ +#define RPMPD_VDDCX_VFC 2 +#define RPMPD_VDDCX_VFL 2 +#define RPMPD_VDDMX 3 +#define RPMPD_VDDMX_AO 4 +#define RPMPD_VDDMX_VFL 5 +#define RPMPD_SSCCX 6 +#define RPMPD_SSCCX_VFL 7 +#define RPMPD_SSCMX 8 +#define RPMPD_SSCMX_VFL 9 + +/* + * Platform-specific power domain bindings. Don't add new entries here, use + * RPMPD_* above. + */ + /* MDM9607 Power Domains */ -#define MDM9607_VDDCX 0 -#define MDM9607_VDDCX_AO 1 -#define MDM9607_VDDCX_VFL 2 -#define MDM9607_VDDMX 3 -#define MDM9607_VDDMX_AO 4 -#define MDM9607_VDDMX_VFL 5 +#define MDM9607_VDDCX RPMPD_VDDCX +#define MDM9607_VDDCX_AO RPMPD_VDDCX_AO +#define MDM9607_VDDCX_VFL RPMPD_VDDCX_VFL +#define MDM9607_VDDMX RPMPD_VDDMX +#define MDM9607_VDDMX_AO RPMPD_VDDMX_AO +#define MDM9607_VDDMX_VFL RPMPD_VDDMX_VFL /* MSM8226 Power Domain Indexes */ -#define MSM8226_VDDCX 0 -#define MSM8226_VDDCX_AO 1 -#define MSM8226_VDDCX_VFC 2 +#define MSM8226_VDDCX RPMPD_VDDCX +#define MSM8226_VDDCX_AO RPMPD_VDDCX_AO +#define MSM8226_VDDCX_VFC RPMPD_VDDCX_VFC /* MSM8939 Power Domains */ #define MSM8939_VDDMDCX 0 @@ -30,11 +49,11 @@ #define MSM8939_VDDMX_AO 7 /* MSM8916 Power Domain Indexes */ -#define MSM8916_VDDCX 0 -#define MSM8916_VDDCX_AO 1 -#define MSM8916_VDDCX_VFC 2 -#define MSM8916_VDDMX 3 -#define MSM8916_VDDMX_AO 4 +#define MSM8916_VDDCX RPMPD_VDDCX +#define MSM8916_VDDCX_AO RPMPD_VDDCX_AO +#define MSM8916_VDDCX_VFC RPMPD_VDDCX_VFC +#define MSM8916_VDDMX RPMPD_VDDMX +#define MSM8916_VDDMX_AO RPMPD_VDDMX_AO /* MSM8909 Power Domain Indexes */ #define MSM8909_VDDCX MSM8916_VDDCX @@ -44,11 +63,11 @@ #define MSM8909_VDDMX_AO MSM8916_VDDMX_AO /* MSM8917 Power Domain Indexes */ -#define MSM8917_VDDCX 0 -#define MSM8917_VDDCX_AO 1 -#define MSM8917_VDDCX_VFL 2 -#define MSM8917_VDDMX 3 -#define MSM8917_VDDMX_AO 4 +#define MSM8917_VDDCX RPMPD_VDDCX +#define MSM8917_VDDCX_AO RPMPD_VDDCX_AO +#define MSM8917_VDDCX_VFL RPMPD_VDDCX_VFL +#define MSM8917_VDDMX RPMPD_VDDMX +#define MSM8917_VDDMX_AO RPMPD_VDDMX_AO /* MSM8937 Power Domain Indexes */ #define MSM8937_VDDCX MSM8917_VDDCX @@ -81,12 +100,12 @@ #define MSM8974_VDDGFX_VFC 4 /* MSM8976 Power Domain Indexes */ -#define MSM8976_VDDCX 0 -#define MSM8976_VDDCX_AO 1 -#define MSM8976_VDDCX_VFL 2 -#define MSM8976_VDDMX 3 -#define MSM8976_VDDMX_AO 4 -#define MSM8976_VDDMX_VFL 5 +#define MSM8976_VDDCX RPMPD_VDDCX +#define MSM8976_VDDCX_AO RPMPD_VDDCX_AO +#define MSM8976_VDDCX_VFL RPMPD_VDDCX_VFL +#define MSM8976_VDDMX RPMPD_VDDMX +#define MSM8976_VDDMX_AO RPMPD_VDDMX_AO +#define MSM8976_VDDMX_VFL RPMPD_VDDMX_VFL /* MSM8994 Power Domain Indexes */ #define MSM8994_VDDCX 0 @@ -107,16 +126,16 @@ #define MSM8996_VDDSSCX_VFC 6 /* MSM8998 Power Domain Indexes */ -#define MSM8998_VDDCX 0 -#define MSM8998_VDDCX_AO 1 -#define MSM8998_VDDCX_VFL 2 -#define MSM8998_VDDMX 3 -#define MSM8998_VDDMX_AO 4 -#define MSM8998_VDDMX_VFL 5 -#define MSM8998_SSCCX 6 -#define MSM8998_SSCCX_VFL 7 -#define MSM8998_SSCMX 8 -#define MSM8998_SSCMX_VFL 9 +#define MSM8998_VDDCX RPMPD_VDDCX +#define MSM8998_VDDCX_AO RPMPD_VDDCX_AO +#define MSM8998_VDDCX_VFL RPMPD_VDDCX_VFL +#define MSM8998_VDDMX RPMPD_VDDMX +#define MSM8998_VDDMX_AO RPMPD_VDDMX_AO +#define MSM8998_VDDMX_VFL RPMPD_VDDMX_VFL +#define MSM8998_SSCCX RPMPD_SSCCX +#define MSM8998_SSCCX_VFL RPMPD_SSCCX_VFL +#define MSM8998_SSCMX RPMPD_SSCMX +#define MSM8998_SSCMX_VFL RPMPD_SSCMX_VFL /* QCM2290 Power Domains */ #define QCM2290_VDDCX 0 @@ -138,16 +157,16 @@ #define QCS404_LPIMX_VFL 6 /* SDM660 Power Domains */ -#define SDM660_VDDCX 0 -#define SDM660_VDDCX_AO 1 -#define SDM660_VDDCX_VFL 2 -#define SDM660_VDDMX 3 -#define SDM660_VDDMX_AO 4 -#define SDM660_VDDMX_VFL 5 -#define SDM660_SSCCX 6 -#define SDM660_SSCCX_VFL 7 -#define SDM660_SSCMX 8 -#define SDM660_SSCMX_VFL 9 +#define SDM660_VDDCX RPMPD_VDDCX +#define SDM660_VDDCX_AO RPMPD_VDDCX_AO +#define SDM660_VDDCX_VFL RPMPD_VDDCX_VFL +#define SDM660_VDDMX RPMPD_VDDMX +#define SDM660_VDDMX_AO RPMPD_VDDMX_AO +#define SDM660_VDDMX_VFL RPMPD_VDDMX_VFL +#define SDM660_SSCCX RPMPD_SSCCX +#define SDM660_SSCCX_VFL RPMPD_SSCCX_VFL +#define SDM660_SSCMX RPMPD_SSCMX +#define SDM660_SSCMX_VFL RPMPD_SSCMX_VFL /* SM6115 Power Domains */ #define SM6115_VDDCX 0 @@ -160,12 +179,12 @@ #define SM6115_VDD_LPI_MX 7 /* SM6125 Power Domains */ -#define SM6125_VDDCX 0 -#define SM6125_VDDCX_AO 1 -#define SM6125_VDDCX_VFL 2 -#define SM6125_VDDMX 3 -#define SM6125_VDDMX_AO 4 -#define SM6125_VDDMX_VFL 5 +#define SM6125_VDDCX RPMPD_VDDCX +#define SM6125_VDDCX_AO RPMPD_VDDCX_AO +#define SM6125_VDDCX_VFL RPMPD_VDDCX_VFL +#define SM6125_VDDMX RPMPD_VDDMX +#define SM6125_VDDMX_AO RPMPD_VDDMX_AO +#define SM6125_VDDMX_VFL RPMPD_VDDMX_VFL /* SM6375 Power Domain Indexes */ #define SM6375_VDDCX 0 -- cgit v1.2.3 From 807221d3c5ff6e3c91ff57bc82a0b7a541462e20 Mon Sep 17 00:00:00 2001 From: Ricky Wu Date: Tue, 12 Aug 2025 14:35:21 +0800 Subject: misc: rtsx_pci: Add separate CD/WP pin polarity reversal support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, the Card Detect (CD) and Write Protect (WP) pins shared the same reverse polarity setting in the configuration space. This meant both signals were reversed together, without the ability to configure them individually. This patch introduces two new parameters: sd_cd_reverse_en – enable reverse polarity for the CD pin. sd_wp_reverse_en – enable reverse polarity for the WP pin. With this change, the controller can now support: 1.Reversing both CD and WP pins together (original behavior). 2.Reversing CD and WP pins separately (newly added behavior), if supported by the configuration space. This provides greater flexibility when dealing with devices that have independent polarity requirements for CD and WP pins. Signed-off-by: Ricky Wu Link: https://lore.kernel.org/r/20250812063521.2427696-1-ricky_wu@realtek.com Signed-off-by: Greg Kroah-Hartman --- include/linux/rtsx_pci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/rtsx_pci.h b/include/linux/rtsx_pci.h index 3b4c36705a9b..3c5689356004 100644 --- a/include/linux/rtsx_pci.h +++ b/include/linux/rtsx_pci.h @@ -1160,6 +1160,8 @@ struct rtsx_cr_option { bool ocp_en; u8 sd_400mA_ocp_thd; u8 sd_800mA_ocp_thd; + u8 sd_cd_reverse_en; + u8 sd_wp_reverse_en; }; /* -- cgit v1.2.3 From f5597840ac907858ad2a462b00e4a68fd199121e Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Mon, 14 Jul 2025 23:34:14 +0800 Subject: char: misc: Disallow registering miscdevice whose minor > MISC_DYNAMIC_MINOR Currently, It is allowed to register miscdevice with minor > 255 which is defined by macro MISC_DYNAMIC_MINOR, and cause: - Chaos regarding division and management of minor codes. - Registering failure if the minor was allocated to other dynamic request. Fortunately, in-kernel users have not had such usage yet. Fix by refusing to register miscdevice whose minor > 255. Also bring in a very simple minor code space division and management: < 255 : Fixed minor code == 255 : Indicator to request dynamic minor code > 255 : Dynamic minor code requested, 1048320 minor codes totally And all fixed minors allocated should be registered in 'linux/miscdevice.h' Signed-off-by: Zijun Hu Link: https://lore.kernel.org/r/20250714-rfc_miscdev-v6-3-2ed949665bde@oss.qualcomm.com Signed-off-by: Greg Kroah-Hartman --- include/linux/miscdevice.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 3e6deb00fc85..565b88efeb23 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -71,6 +71,14 @@ #define USERIO_MINOR 240 #define VHOST_VSOCK_MINOR 241 #define RFKILL_MINOR 242 + +/* + * Misc char device minor code space division related to below macro: + * + * < 255 : Fixed minor code + * == 255 : Indicator to request dynamic minor code + * > 255 : Dynamic minor code requested, 1048320 minor codes totally. + */ #define MISC_DYNAMIC_MINOR 255 struct miscdevice { -- cgit v1.2.3 From d7f8d0758b975db8406c91cf242d46cd9611ba3e Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Mon, 14 Jul 2025 23:34:18 +0800 Subject: char: misc: Register fixed minor EISA_EEPROM_MINOR in linux/miscdevice.h Move fixed minor EISA_EEPROM_MINOR definition to linux/miscdevice.h. Signed-off-by: Zijun Hu Link: https://lore.kernel.org/r/20250714-rfc_miscdev-v6-7-2ed949665bde@oss.qualcomm.com Signed-off-by: Greg Kroah-Hartman --- include/linux/miscdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 565b88efeb23..7d0aa718499c 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -70,6 +70,7 @@ #define UHID_MINOR 239 #define USERIO_MINOR 240 #define VHOST_VSOCK_MINOR 241 +#define EISA_EEPROM_MINOR 241 #define RFKILL_MINOR 242 /* -- cgit v1.2.3 From 63740349eba78f242bcbf60d5244d7f2b2600853 Mon Sep 17 00:00:00 2001 From: Li Li Date: Sun, 27 Jul 2025 18:29:06 +0000 Subject: binder: introduce transaction reports via netlink Introduce a generic netlink multicast event to report binder transaction failures to userspace. This allows subscribers to monitor these events and take appropriate actions, such as stopping a misbehaving application that is spamming a service with huge amount of transactions. The multicast event contains full details of the failed transactions, including the sender/target PIDs, payload size and specific error code. This interface is defined using a YAML spec, from which the UAPI and kernel headers and source are auto-generated. Signed-off-by: Li Li Signed-off-by: Carlos Llamas Link: https://lore.kernel.org/r/20250727182932.2499194-4-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/android/binder_netlink.h | 37 +++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 include/uapi/linux/android/binder_netlink.h (limited to 'include') diff --git a/include/uapi/linux/android/binder_netlink.h b/include/uapi/linux/android/binder_netlink.h new file mode 100644 index 000000000000..b218f96d6668 --- /dev/null +++ b/include/uapi/linux/android/binder_netlink.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/binder.yaml */ +/* YNL-GEN uapi header */ + +#ifndef _UAPI_LINUX_ANDROID_BINDER_NETLINK_H +#define _UAPI_LINUX_ANDROID_BINDER_NETLINK_H + +#define BINDER_FAMILY_NAME "binder" +#define BINDER_FAMILY_VERSION 1 + +enum { + BINDER_A_REPORT_ERROR = 1, + BINDER_A_REPORT_CONTEXT, + BINDER_A_REPORT_FROM_PID, + BINDER_A_REPORT_FROM_TID, + BINDER_A_REPORT_TO_PID, + BINDER_A_REPORT_TO_TID, + BINDER_A_REPORT_IS_REPLY, + BINDER_A_REPORT_FLAGS, + BINDER_A_REPORT_CODE, + BINDER_A_REPORT_DATA_SIZE, + + __BINDER_A_REPORT_MAX, + BINDER_A_REPORT_MAX = (__BINDER_A_REPORT_MAX - 1) +}; + +enum { + BINDER_CMD_REPORT = 1, + + __BINDER_CMD_MAX, + BINDER_CMD_MAX = (__BINDER_CMD_MAX - 1) +}; + +#define BINDER_MCGRP_REPORT "report" + +#endif /* _UAPI_LINUX_ANDROID_BINDER_NETLINK_H */ -- cgit v1.2.3 From 1d6249c1ce826fcf03c695973095eb4a50fb7fd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 11 Aug 2025 11:13:35 +0200 Subject: sysfs: remove bin_attribute::read_new/write_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These transitional fields are now unused and unnecessary. Remove them and their logic in the sysfs core. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20250811-sysfs-const-bin_attr-final-v4-1-7b6053fd58bb@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index f418aae4f113..7544f6d81c05 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -308,12 +308,8 @@ struct bin_attribute { struct address_space *(*f_mapping)(void); ssize_t (*read)(struct file *, struct kobject *, const struct bin_attribute *, char *, loff_t, size_t); - ssize_t (*read_new)(struct file *, struct kobject *, const struct bin_attribute *, - char *, loff_t, size_t); ssize_t (*write)(struct file *, struct kobject *, const struct bin_attribute *, char *, loff_t, size_t); - ssize_t (*write_new)(struct file *, struct kobject *, - const struct bin_attribute *, char *, loff_t, size_t); loff_t (*llseek)(struct file *, struct kobject *, const struct bin_attribute *, loff_t, int); int (*mmap)(struct file *, struct kobject *, const struct bin_attribute *attr, -- cgit v1.2.3 From 44d454fcffa8b08d6d66df132121c1d387fa85db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 11 Aug 2025 11:13:36 +0200 Subject: sysfs: remove attribute_group::bin_attrs_new MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This transitional field is now unused and unnecessary. Remove it. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20250811-sysfs-const-bin_attr-final-v4-2-7b6053fd58bb@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 7544f6d81c05..9a25a2911652 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -106,10 +106,7 @@ struct attribute_group { const struct bin_attribute *, int); struct attribute **attrs; - union { - const struct bin_attribute *const *bin_attrs; - const struct bin_attribute *const *bin_attrs_new; - }; + const struct bin_attribute *const *bin_attrs; }; #define SYSFS_PREALLOC 010000 @@ -293,7 +290,7 @@ __ATTRIBUTE_GROUPS(_name) #define BIN_ATTRIBUTE_GROUPS(_name) \ static const struct attribute_group _name##_group = { \ - .bin_attrs_new = _name##_attrs, \ + .bin_attrs = _name##_attrs, \ }; \ __ATTRIBUTE_GROUPS(_name) -- cgit v1.2.3 From 12cc0ff3cdd95f2bc0ffdc63bcd9da231eb33199 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Tue, 19 Aug 2025 11:01:46 +0100 Subject: ASoC: qcom: audioreach: deprecate AR_TKN_U32_MODULE_[IN/OUT]_PORTS Deprecate usage of AR_TKN_U32_MODULE_IN_PORTS and AR_TKN_U32_MODULE_OUT_PORTS as the connectivity of modules is taken care by AR_TKN_U32_MODULE_SRC_OP_PORT_ID* and AR_TKN_U32_MODULE_DST_IN_PORT_ID* Also this property is never used in the drivers. Signed-off-by: Srinivas Kandagatla Reviewed-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20250819100151.1294047-2-srinivas.kandagatla@oss.qualcomm.com Signed-off-by: Mark Brown --- include/uapi/sound/snd_ar_tokens.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/sound/snd_ar_tokens.h b/include/uapi/sound/snd_ar_tokens.h index b9b9093b4396..bc0b1bede00c 100644 --- a/include/uapi/sound/snd_ar_tokens.h +++ b/include/uapi/sound/snd_ar_tokens.h @@ -184,8 +184,8 @@ enum ar_event_types { #define AR_TKN_U32_MODULE_INSTANCE_ID 201 #define AR_TKN_U32_MODULE_MAX_IP_PORTS 202 #define AR_TKN_U32_MODULE_MAX_OP_PORTS 203 -#define AR_TKN_U32_MODULE_IN_PORTS 204 -#define AR_TKN_U32_MODULE_OUT_PORTS 205 +#define AR_TKN_U32_MODULE_IN_PORTS 204 /* deprecated */ +#define AR_TKN_U32_MODULE_OUT_PORTS 205 /* deprecated */ #define AR_TKN_U32_MODULE_SRC_OP_PORT_ID 206 #define AR_TKN_U32_MODULE_DST_IN_PORT_ID 207 #define AR_TKN_U32_MODULE_SRC_INSTANCE_ID 208 -- cgit v1.2.3 From f07b81b573b28e5cae5c1482001ad0d6c0b7c051 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Tue, 19 Aug 2025 11:01:47 +0100 Subject: ASoC: qcom: audioreach: add documentation for i2s interface type Add documentation of possible values for I2S interface types, currently this is only documented for DMA module. Signed-off-by: Srinivas Kandagatla Reviewed-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20250819100151.1294047-3-srinivas.kandagatla@oss.qualcomm.com Signed-off-by: Mark Brown --- include/uapi/sound/snd_ar_tokens.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/uapi/sound/snd_ar_tokens.h b/include/uapi/sound/snd_ar_tokens.h index bc0b1bede00c..92cf72a6fdd4 100644 --- a/include/uapi/sound/snd_ar_tokens.h +++ b/include/uapi/sound/snd_ar_tokens.h @@ -118,6 +118,12 @@ enum ar_event_types { * LPAIF_WSA = 2, * LPAIF_VA = 3, * LPAIF_AXI = 4 + * Possible values for MI2S + * I2S_INTF_TYPE_PRIMARY = 0, + * I2S_INTF_TYPE_SECONDARY = 1, + * I2S_INTF_TYPE_TERTIARY = 2, + * I2S_INTF_TYPE_QUATERNARY = 3, + * I2S_INTF_TYPE_QUINARY = 4, * * %AR_TKN_U32_MODULE_FMT_INTERLEAVE: PCM Interleaving * PCM_INTERLEAVED = 1, -- cgit v1.2.3 From c7ed4c2debfd192f6071f4ab33c092d419abb941 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Tue, 19 Aug 2025 11:01:48 +0100 Subject: ASoC: qcom: audioreach: add support for static calibration This change adds support for static calibration data via ASoC topology file. This static calibration data could include binary blob of data that is required by specific module and is not part of topology tokens. Reason for adding this support is to allow loading module specific data that can not be part of the tplg tokens, example, Echo and Noise cancelling module needs a blob of calibration data to function correctly. This support is also one of the building block for adding speaker protection support. Tested this with Single Mic ECNS(Echo and Noise Cancellation). tplg can now contain this calibration data like: SectionWidget."stream2.SMECNS_V224" { ... data [ ... "stream2.SMECNS_V224_cfg_data" ] } SectionData."stream2.SMECNS_V224_cfg_data" { words "0x00000330, 0x01001006,0x00000000,0x00000000, 0x00004145,0x08001026,0x00000004,0x00000000, ..." } } Signed-off-by: Srinivas Kandagatla Reviewed-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20250819100151.1294047-4-srinivas.kandagatla@oss.qualcomm.com Signed-off-by: Mark Brown --- include/uapi/sound/snd_ar_tokens.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/uapi/sound/snd_ar_tokens.h b/include/uapi/sound/snd_ar_tokens.h index 92cf72a6fdd4..6b8102eaa121 100644 --- a/include/uapi/sound/snd_ar_tokens.h +++ b/include/uapi/sound/snd_ar_tokens.h @@ -3,6 +3,8 @@ #ifndef __SND_AR_TOKENS_H__ #define __SND_AR_TOKENS_H__ +#include + #define APM_SUB_GRAPH_PERF_MODE_LOW_POWER 0x1 #define APM_SUB_GRAPH_PERF_MODE_LOW_LATENCY 0x2 @@ -238,4 +240,12 @@ enum ar_event_types { #define AR_TKN_U32_MODULE_LOG_TAP_POINT_ID 260 #define AR_TKN_U32_MODULE_LOG_MODE 261 +#define SND_SOC_AR_TPLG_MODULE_CFG_TYPE 0x01001006 +struct audioreach_module_priv_data { + __le32 size; /* size in bytes of the array, including all elements */ + __le32 type; /* SND_SOC_AR_TPLG_MODULE_CFG_TYPE */ + __le32 priv[2]; /* Private data for future expansion */ + __le32 data[0]; /* config data */ +}; + #endif /* __SND_AR_TOKENS_H__ */ -- cgit v1.2.3 From 74f44ad07d1063933c237a7db16f6a4036643d60 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Wed, 30 Jul 2025 17:46:14 +0100 Subject: mmc: tmio: Add 64-bit read/write support for SD_BUF0 in polling mode As per the RZ/{G2L,G3E} HW manual SD_BUF0 can be accessed by 16/32/64 bits. Most of the data transfer in SD/SDIO/eMMC mode is more than 8 bytes. During testing it is found that, if the DMA buffer is not aligned to 128 bit it fallback to PIO mode. In such cases, 64-bit access is much more efficient than the current 16-bit. Tested-by: Wolfram Sang Reviewed-by: Wolfram Sang Signed-off-by: Biju Das Link: https://lore.kernel.org/r/20250730164618.233117-2-biju.das.jz@bp.renesas.com Signed-off-by: Ulf Hansson --- include/linux/platform_data/tmio.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/platform_data/tmio.h b/include/linux/platform_data/tmio.h index b060124ba1ae..426291713b83 100644 --- a/include/linux/platform_data/tmio.h +++ b/include/linux/platform_data/tmio.h @@ -47,6 +47,9 @@ /* Some controllers have a CBSY bit */ #define TMIO_MMC_HAVE_CBSY BIT(11) +/* Some controllers have a 64-bit wide data port register */ +#define TMIO_MMC_64BIT_DATA_PORT BIT(12) + struct tmio_mmc_data { void *chan_priv_tx; void *chan_priv_rx; -- cgit v1.2.3 From d2e6fb2c31a07f34e5e7533df11431cb0d2ecf9f Mon Sep 17 00:00:00 2001 From: Ricky Wu Date: Tue, 12 Aug 2025 11:08:11 +0800 Subject: misc: rtsx: usb card reader: add OCP support This patch adds support for Over Current Protection (OCP) to the Realtek USB card reader driver. The OCP mechanism protects the hardware by detecting and handling current overload conditions. This implementation includes: - Register configurations to enable OCP monitoring. - Handling of OCP interrupt events and associated error reporting. - Card power management changes in response to OCP triggers. This enhancement improves the robustness of the driver when operating in environments where electrical anomalies may occur, particularly with SD and MS card interfaces. Signed-off-by: Ricky Wu Link: https://lore.kernel.org/r/20250812030811.2426112-1-ricky_wu@realtek.com Signed-off-by: Ulf Hansson --- include/linux/rtsx_usb.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/rtsx_usb.h b/include/linux/rtsx_usb.h index f267a06c6b1e..276b509c03e3 100644 --- a/include/linux/rtsx_usb.h +++ b/include/linux/rtsx_usb.h @@ -99,6 +99,17 @@ extern int rtsx_usb_card_exclusive_check(struct rtsx_ucr *ucr, int card); #define CD_MASK (SD_CD | MS_CD | XD_CD) #define SD_WP 0x08 +/* OCPCTL */ +#define MS_OCP_DETECT_EN 0x08 +#define MS_OCP_INT_EN 0x04 +#define MS_OCP_INT_CLR 0x02 +#define MS_OCP_CLEAR 0x01 + +/* OCPSTAT */ +#define MS_OCP_DETECT 0x80 +#define MS_OCP_NOW 0x02 +#define MS_OCP_EVER 0x01 + /* reader command field offset & parameters */ #define READ_REG_CMD 0 #define WRITE_REG_CMD 1 -- cgit v1.2.3 From 99e6cc80d5ce5af5781f84d20e4f3478d66ee8ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Monin?= Date: Mon, 18 Aug 2025 16:02:50 +0200 Subject: mmc: core: add mmc_read_tuning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provide a function to the MMC hosts to read some blocks of data as part of their tuning. This function only returns the status of the read operation, not the data read. Signed-off-by: Benoît Monin Link: https://lore.kernel.org/r/20250818-mobileye-emmc-for-upstream-4-v4-5-34ecb3995e96@bootlin.com Signed-off-by: Ulf Hansson --- include/linux/mmc/host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 68f09a955a90..5ed5d203de23 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -743,5 +743,6 @@ int mmc_send_status(struct mmc_card *card, u32 *status); int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error); int mmc_send_abort_tuning(struct mmc_host *host, u32 opcode); int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd); +int mmc_read_tuning(struct mmc_host *host, unsigned int blksz, unsigned int blocks); #endif /* LINUX_MMC_HOST_H */ -- cgit v1.2.3 From 730ff06d3f5cc2ce0348414b78c10528b767d4a3 Mon Sep 17 00:00:00 2001 From: Dipayaan Roy Date: Thu, 14 Aug 2025 07:04:10 -0700 Subject: net: mana: Use page pool fragments for RX buffers instead of full pages to improve memory efficiency. This patch enhances RX buffer handling in the mana driver by allocating pages from a page pool and slicing them into MTU-sized fragments, rather than dedicating a full page per packet. This approach is especially beneficial on systems with large base page sizes like 64KB. Key improvements: - Proper integration of page pool for RX buffer allocations. - MTU-sized buffer slicing to improve memory utilization. - Reduce overall per Rx queue memory footprint. - Automatic fallback to full-page buffers when: * Jumbo frames are enabled (MTU > PAGE_SIZE / 2). * The XDP path is active, to avoid complexities with fragment reuse. Testing on VMs with 64KB pages shows around 200% throughput improvement. Memory efficiency is significantly improved due to reduced wastage in page allocations. Example: We are now able to fit 35 rx buffers in a single 64kb page for MTU size of 1500, instead of 1 rx buffer per page previously. Tested: - iperf3, iperf2, and nttcp benchmarks. - Jumbo frames with MTU 9000. - Native XDP programs (XDP_PASS, XDP_DROP, XDP_TX, XDP_REDIRECT) for testing the XDP path in driver. - Memory leak detection (kmemleak). - Driver load/unload, reboot, and stress scenarios. Reviewed-by: Jacob Keller Reviewed-by: Saurabh Sengar Reviewed-by: Haiyang Zhang Signed-off-by: Dipayaan Roy Link: https://patch.msgid.link/20250814140410.GA22089@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net Signed-off-by: Paolo Abeni --- include/net/mana/mana.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index e1030a7d2daa..0921485565c0 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -65,6 +65,8 @@ enum TRI_STATE { #define MANA_STATS_RX_COUNT 5 #define MANA_STATS_TX_COUNT 11 +#define MANA_RX_FRAG_ALIGNMENT 64 + struct mana_stats_rx { u64 packets; u64 bytes; @@ -328,6 +330,7 @@ struct mana_rxq { u32 datasize; u32 alloc_size; u32 headroom; + u32 frag_count; mana_handle_t rxobj; @@ -510,6 +513,7 @@ struct mana_port_context { u32 rxbpre_datasize; u32 rxbpre_alloc_size; u32 rxbpre_headroom; + u32 rxbpre_frag_count; struct bpf_prog *bpf_prog; -- cgit v1.2.3 From 76d2e3890fb169168c73f2e4f8375c7cc24a765e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 16 Aug 2025 07:25:20 -0700 Subject: NFS: Fix a race when updating an existing write After nfs_lock_and_join_requests() tests for whether the request is still attached to the mapping, nothing prevents a call to nfs_inode_remove_request() from succeeding until we actually lock the page group. The reason is that whoever called nfs_inode_remove_request() doesn't necessarily have a lock on the page group head. So in order to avoid races, let's take the page group lock earlier in nfs_lock_and_join_requests(), and hold it across the removal of the request in nfs_inode_remove_request(). Reported-by: Jeff Layton Tested-by: Joe Quanaim Tested-by: Andrew Steffen Reviewed-by: Jeff Layton Fixes: bd37d6fce184 ("NFSv4: Convert nfs_lock_and_join_requests() to use nfs_page_find_head_request()") Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- include/linux/nfs_page.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 169b4ae30ff4..9aed39abc94b 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -160,6 +160,7 @@ extern void nfs_join_page_group(struct nfs_page *head, extern int nfs_page_group_lock(struct nfs_page *); extern void nfs_page_group_unlock(struct nfs_page *); extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); +extern bool nfs_page_group_sync_on_bit_locked(struct nfs_page *, unsigned int); extern int nfs_page_set_headlock(struct nfs_page *req); extern void nfs_page_clear_headlock(struct nfs_page *req); extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *); -- cgit v1.2.3 From 808471ddb0fa785559c3e7aee59be20a13b46ef5 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Wed, 13 Aug 2025 15:04:55 +0900 Subject: iov_iter: iterate_folioq: fix handling of offset >= folio size It's apparently possible to get an iov advanced all the way up to the end of the current page we're looking at, e.g. (gdb) p *iter $24 = {iter_type = 4 '\004', nofault = false, data_source = false, iov_offset = 4096, {__ubuf_iovec = { iov_base = 0xffff88800f5bc000, iov_len = 655}, {{__iov = 0xffff88800f5bc000, kvec = 0xffff88800f5bc000, bvec = 0xffff88800f5bc000, folioq = 0xffff88800f5bc000, xarray = 0xffff88800f5bc000, ubuf = 0xffff88800f5bc000}, count = 655}}, {nr_segs = 2, folioq_slot = 2 '\002', xarray_start = 2}} Where iov_offset is 4k with 4k-sized folios This should have been fine because we're only in the 2nd slot and there's another one after this, but iterate_folioq should not try to map a folio that skips the whole size, and more importantly part here does not end up zero (because 'PAGE_SIZE - skip % PAGE_SIZE' ends up PAGE_SIZE and not zero..), so skip forward to the "advance to next folio" code Link: https://lkml.kernel.org/r/20250813-iot_iter_folio-v3-0-a0ffad2b665a@codewreck.org Link: https://lkml.kernel.org/r/20250813-iot_iter_folio-v3-1-a0ffad2b665a@codewreck.org Signed-off-by: Dominique Martinet Fixes: db0aa2e9566f ("mm: Define struct folio_queue and ITER_FOLIOQ to handle a sequence of folios") Reported-by: Maximilian Bosch Reported-by: Ryan Lahfa Reported-by: Christian Theune Reported-by: Arnout Engelen Link: https://lkml.kernel.org/r/D4LHHUNLG79Y.12PI0X6BEHRHW@mbosch.me/ Acked-by: David Howells Cc: Al Viro Cc: Christian Brauner Cc: Matthew Wilcox (Oracle) Cc: [6.12+] Signed-off-by: Andrew Morton --- include/linux/iov_iter.h | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/iov_iter.h b/include/linux/iov_iter.h index c4aa58032faf..f9a17fbbd398 100644 --- a/include/linux/iov_iter.h +++ b/include/linux/iov_iter.h @@ -160,7 +160,7 @@ size_t iterate_folioq(struct iov_iter *iter, size_t len, void *priv, void *priv2 do { struct folio *folio = folioq_folio(folioq, slot); - size_t part, remain, consumed; + size_t part, remain = 0, consumed; size_t fsize; void *base; @@ -168,14 +168,16 @@ size_t iterate_folioq(struct iov_iter *iter, size_t len, void *priv, void *priv2 break; fsize = folioq_folio_size(folioq, slot); - base = kmap_local_folio(folio, skip); - part = umin(len, PAGE_SIZE - skip % PAGE_SIZE); - remain = step(base, progress, part, priv, priv2); - kunmap_local(base); - consumed = part - remain; - len -= consumed; - progress += consumed; - skip += consumed; + if (skip < fsize) { + base = kmap_local_folio(folio, skip); + part = umin(len, PAGE_SIZE - skip % PAGE_SIZE); + remain = step(base, progress, part, priv, priv2); + kunmap_local(base); + consumed = part - remain; + len -= consumed; + progress += consumed; + skip += consumed; + } if (skip >= fsize) { skip = 0; slot++; -- cgit v1.2.3 From 053c8ebe74f7e1f4c072e59428da80b9d78bc4b7 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 17 Aug 2025 23:17:59 +0800 Subject: mm/migrate: fix NULL movable_ops if CONFIG_ZSMALLOC=m After commit 84caf98838a3e5f4bdb34 ("mm: stop storing migration_ops in page->mapping") we get such an error message if CONFIG_ZSMALLOC=m: WARNING: CPU: 3 PID: 42 at mm/migrate.c:142 isolate_movable_ops_page+0xa8/0x1c0 CPU: 3 UID: 0 PID: 42 Comm: kcompactd0 Not tainted 6.16.0-rc5+ #2133 PREEMPT pc 9000000000540bd8 ra 9000000000540b84 tp 9000000100420000 sp 9000000100423a60 a0 9000000100193a80 a1 000000000000000c a2 000000000000001b a3 ffffffffffffffff a4 ffffffffffffffff a5 0000000000000267 a6 0000000000000000 a7 9000000100423ae0 t0 00000000000000f1 t1 00000000000000f6 t2 0000000000000000 t3 0000000000000001 t4 ffffff00010eb834 t5 0000000000000040 t6 900000010c89d380 t7 90000000023fcc70 t8 0000000000000018 u0 0000000000000000 s9 ffffff00010eb800 s0 ffffff00010eb800 s1 000000000000000c s2 0000000000043ae0 s3 0000800000000000 s4 900000000219cc40 s5 0000000000000000 s6 ffffff00010eb800 s7 0000000000000001 s8 90000000025b4000 ra: 9000000000540b84 isolate_movable_ops_page+0x54/0x1c0 ERA: 9000000000540bd8 isolate_movable_ops_page+0xa8/0x1c0 CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) PRMD: 00000004 (PPLV0 +PIE -PWE) EUEN: 00000000 (-FPE -SXE -ASXE -BTE) ECFG: 00071c1d (LIE=0,2-4,10-12 VS=7) ESTAT: 000c0000 [BRK] (IS= ECode=12 EsubCode=0) PRID: 0014c010 (Loongson-64bit, Loongson-3A5000) CPU: 3 UID: 0 PID: 42 Comm: kcompactd0 Not tainted 6.16.0-rc5+ #2133 PREEMPT Stack : 90000000021fd000 0000000000000000 9000000000247720 9000000100420000 90000001004236a0 90000001004236a8 0000000000000000 90000001004237e8 90000001004237e0 90000001004237e0 9000000100423550 0000000000000001 0000000000000001 90000001004236a8 725a84864a19e2d9 90000000023fcc58 9000000100420000 90000000024c6848 9000000002416848 0000000000000001 0000000000000000 000000000000000a 0000000007fe0000 ffffff00010eb800 0000000000000000 90000000021fd000 0000000000000000 900000000205cf30 000000000000008e 0000000000000009 ffffff00010eb800 0000000000000001 90000000025b4000 0000000000000000 900000000024773c 00007ffff103d748 00000000000000b0 0000000000000004 0000000000000000 0000000000071c1d ... Call Trace: [<900000000024773c>] show_stack+0x5c/0x190 [<90000000002415e0>] dump_stack_lvl+0x70/0x9c [<90000000004abe6c>] isolate_migratepages_block+0x3bc/0x16e0 [<90000000004af408>] compact_zone+0x558/0x1000 [<90000000004b0068>] compact_node+0xa8/0x1e0 [<90000000004b0aa4>] kcompactd+0x394/0x410 [<90000000002b3c98>] kthread+0x128/0x140 [<9000000001779148>] ret_from_kernel_thread+0x28/0xc0 [<9000000000245528>] ret_from_kernel_thread_asm+0x10/0x88 The reason is that defined(CONFIG_ZSMALLOC) evaluates to 1 only when CONFIG_ZSMALLOC=y, we should use IS_ENABLED(CONFIG_ZSMALLOC) instead. But when I use IS_ENABLED(CONFIG_ZSMALLOC), page_movable_ops() cannot access zsmalloc_mops because zsmalloc_mops is in a module. To solve this problem, we define a set_movable_ops() interface to register and unregister offline_movable_ops / zsmalloc_movable_ops in mm/migrate.c, and call them at mm/balloon_compaction.c & mm/zsmalloc.c. Since offline_movable_ops / zsmalloc_movable_ops are always accessible, all #ifdef / #endif are removed in page_movable_ops(). Link: https://lkml.kernel.org/r/20250817151759.2525174-1-chenhuacai@loongson.cn Fixes: 84caf98838a3 ("mm: stop storing migration_ops in page->mapping") Signed-off-by: Huacai Chen Acked-by: Zi Yan Acked-by: David Hildenbrand Cc: Huacai Chen Cc: Huacai Chen Cc: Lorenzo Stoakes Cc: "Michael S. Tsirkin" Cc: Minchan Kim Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton --- include/linux/migrate.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index acadd41e0b5c..9009e27b5f44 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -79,6 +79,7 @@ void migration_entry_wait_on_locked(swp_entry_t entry, spinlock_t *ptl) void folio_migrate_flags(struct folio *newfolio, struct folio *folio); int folio_migrate_mapping(struct address_space *mapping, struct folio *newfolio, struct folio *folio, int extra_count); +int set_movable_ops(const struct movable_operations *ops, enum pagetype type); #else @@ -100,6 +101,10 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping, { return -ENOSYS; } +static inline int set_movable_ops(const struct movable_operations *ops, enum pagetype type) +{ + return -ENOSYS; +} #endif /* CONFIG_MIGRATION */ -- cgit v1.2.3 From c3f0c02997c7f8489fec259e28e0e04e9811edac Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 18 Aug 2025 08:40:26 -0700 Subject: net: Add skb_dstref_steal and skb_dstref_restore Going forward skb_dst_set will assert that skb dst_entry is empty during skb_dst_set to prevent potential leaks. There are few places that still manually manage dst_entry not using the helpers. Convert them to the following new helpers: - skb_dstref_steal that resets dst_entry and returns previous dst_entry value - skb_dstref_restore that restores dst_entry previously reset via skb_dstref_steal Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250818154032.3173645-2-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 14b923ddb6df..7538ca507ee9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1159,6 +1159,38 @@ static inline struct dst_entry *skb_dst(const struct sk_buff *skb) return (struct dst_entry *)(skb->_skb_refdst & SKB_DST_PTRMASK); } +/** + * skb_dstref_steal() - return current dst_entry value and clear it + * @skb: buffer + * + * Resets skb dst_entry without adjusting its reference count. Useful in + * cases where dst_entry needs to be temporarily reset and restored. + * Note that the returned value cannot be used directly because it + * might contain SKB_DST_NOREF bit. + * + * When in doubt, prefer skb_dst_drop() over skb_dstref_steal() to correctly + * handle dst_entry reference counting. + * + * Returns: original skb dst_entry. + */ +static inline unsigned long skb_dstref_steal(struct sk_buff *skb) +{ + unsigned long refdst = skb->_skb_refdst; + + skb->_skb_refdst = 0; + return refdst; +} + +/** + * skb_dstref_restore() - restore skb dst_entry removed via skb_dstref_steal() + * @skb: buffer + * @refdst: dst entry from a call to skb_dstref_steal() + */ +static inline void skb_dstref_restore(struct sk_buff *skb, unsigned long refdst) +{ + skb->_skb_refdst = refdst; +} + /** * skb_dst_set - sets skb dst * @skb: buffer -- cgit v1.2.3 From a890348adcc993f48d1ae38f1174dc8de4c3c5ac Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 18 Aug 2025 08:40:32 -0700 Subject: net: Add skb_dst_check_unset To prevent dst_entry leaks, add warning when the non-NULL dst_entry is rewritten. Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250818154032.3173645-8-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7538ca507ee9..ca8be45dd8be 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1159,6 +1159,12 @@ static inline struct dst_entry *skb_dst(const struct sk_buff *skb) return (struct dst_entry *)(skb->_skb_refdst & SKB_DST_PTRMASK); } +static inline void skb_dst_check_unset(struct sk_buff *skb) +{ + DEBUG_NET_WARN_ON_ONCE((skb->_skb_refdst & SKB_DST_PTRMASK) && + !(skb->_skb_refdst & SKB_DST_NOREF)); +} + /** * skb_dstref_steal() - return current dst_entry value and clear it * @skb: buffer @@ -1188,6 +1194,7 @@ static inline unsigned long skb_dstref_steal(struct sk_buff *skb) */ static inline void skb_dstref_restore(struct sk_buff *skb, unsigned long refdst) { + skb_dst_check_unset(skb); skb->_skb_refdst = refdst; } @@ -1201,6 +1208,7 @@ static inline void skb_dstref_restore(struct sk_buff *skb, unsigned long refdst) */ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) { + skb_dst_check_unset(skb); skb->slow_gro |= !!dst; skb->_skb_refdst = (unsigned long)dst; } @@ -1217,6 +1225,7 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) */ static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) { + skb_dst_check_unset(skb); WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); skb->slow_gro |= !!dst; skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF; -- cgit v1.2.3 From 68889dfd547bd8eabc5a98b58475d7b901cf5129 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 15 Aug 2025 20:16:09 +0000 Subject: mptcp: Fix up subflow's memcg when CONFIG_SOCK_CGROUP_DATA=n. When sk_alloc() allocates a socket, mem_cgroup_sk_alloc() sets sk->sk_memcg based on the current task. MPTCP subflow socket creation is triggered from userspace or an in-kernel worker. In the latter case, sk->sk_memcg is not what we want. So, we fix it up from the parent socket's sk->sk_memcg in mptcp_attach_cgroup(). Although the code is placed under #ifdef CONFIG_MEMCG, it is buried under #ifdef CONFIG_SOCK_CGROUP_DATA. The two configs are orthogonal. If CONFIG_MEMCG is enabled without CONFIG_SOCK_CGROUP_DATA, the subflow's memory usage is not charged correctly. Let's move the code out of the wrong ifdef guard. Note that sk->sk_memcg is freed in sk_prot_free() and the parent sk holds the refcnt of memcg->css here, so we don't need to use css_tryget(). Fixes: 3764b0c5651e3 ("mptcp: attach subflow socket to parent cgroup") Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Acked-by: Matthieu Baerts (NGI0) Acked-by: Shakeel Butt Link: https://patch.msgid.link/20250815201712.1745332-2-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/linux/memcontrol.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 785173aa0739..25921fbec685 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1604,6 +1604,7 @@ extern struct static_key_false memcg_sockets_enabled_key; #define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key) void mem_cgroup_sk_alloc(struct sock *sk); void mem_cgroup_sk_free(struct sock *sk); +void mem_cgroup_sk_inherit(const struct sock *sk, struct sock *newsk); #if BITS_PER_LONG < 64 static inline void mem_cgroup_set_socket_pressure(struct mem_cgroup *memcg) @@ -1661,6 +1662,11 @@ void reparent_shrinker_deferred(struct mem_cgroup *memcg); #define mem_cgroup_sockets_enabled 0 static inline void mem_cgroup_sk_alloc(struct sock *sk) { }; static inline void mem_cgroup_sk_free(struct sock *sk) { }; + +static inline void mem_cgroup_sk_inherit(const struct sock *sk, struct sock *newsk) +{ +} + static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) { return false; -- cgit v1.2.3 From f7161b234f2ec7f18999009c4becc04eeb6b12a7 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 15 Aug 2025 20:16:14 +0000 Subject: net-memcg: Introduce mem_cgroup_from_sk(). We will store a flag in the lowest bit of sk->sk_memcg. Then, directly dereferencing sk->sk_memcg will be illegal, and we do not want to allow touching the raw sk->sk_memcg in many places. Let's introduce mem_cgroup_from_sk(). Other places accessing the raw sk->sk_memcg will be converted later. Note that we cannot define the helper as an inline function in memcontrol.h as we cannot access any fields of struct sock there due to circular dependency, so it is placed in sock.h. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Acked-by: Roman Gushchin Acked-by: Shakeel Butt Link: https://patch.msgid.link/20250815201712.1745332-7-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index c8a4b283df6f..811f95ea8d00 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2594,6 +2594,18 @@ static inline gfp_t gfp_memcg_charge(void) return in_softirq() ? GFP_ATOMIC : GFP_KERNEL; } +#ifdef CONFIG_MEMCG +static inline struct mem_cgroup *mem_cgroup_from_sk(const struct sock *sk) +{ + return sk->sk_memcg; +} +#else +static inline struct mem_cgroup *mem_cgroup_from_sk(const struct sock *sk) +{ + return NULL; +} +#endif + static inline long sock_rcvtimeo(const struct sock *sk, bool noblock) { return noblock ? 0 : READ_ONCE(sk->sk_rcvtimeo); -- cgit v1.2.3 From 43049b0db03823c2cd003ca7d3dddcd3924da8dc Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 15 Aug 2025 20:16:15 +0000 Subject: net-memcg: Introduce mem_cgroup_sk_enabled(). The socket memcg feature is enabled by a static key and only works for non-root cgroup. We check both conditions in many places. Let's factorise it as a helper function. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Acked-by: Roman Gushchin Acked-by: Shakeel Butt Link: https://patch.msgid.link/20250815201712.1745332-8-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/proto_memory.h | 2 +- include/net/sock.h | 10 ++++++++++ include/net/tcp.h | 2 +- 3 files changed, 12 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/proto_memory.h b/include/net/proto_memory.h index a6ab2f4f5e28..859e63de81c4 100644 --- a/include/net/proto_memory.h +++ b/include/net/proto_memory.h @@ -31,7 +31,7 @@ static inline bool sk_under_memory_pressure(const struct sock *sk) if (!sk->sk_prot->memory_pressure) return false; - if (mem_cgroup_sockets_enabled && sk->sk_memcg && + if (mem_cgroup_sk_enabled(sk) && mem_cgroup_under_socket_pressure(sk->sk_memcg)) return true; diff --git a/include/net/sock.h b/include/net/sock.h index 811f95ea8d00..3efdf680401d 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2599,11 +2599,21 @@ static inline struct mem_cgroup *mem_cgroup_from_sk(const struct sock *sk) { return sk->sk_memcg; } + +static inline bool mem_cgroup_sk_enabled(const struct sock *sk) +{ + return mem_cgroup_sockets_enabled && mem_cgroup_from_sk(sk); +} #else static inline struct mem_cgroup *mem_cgroup_from_sk(const struct sock *sk) { return NULL; } + +static inline bool mem_cgroup_sk_enabled(const struct sock *sk) +{ + return false; +} #endif static inline long sock_rcvtimeo(const struct sock *sk, bool noblock) diff --git a/include/net/tcp.h b/include/net/tcp.h index 526a26e7a150..9f01b6be6444 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -275,7 +275,7 @@ extern unsigned long tcp_memory_pressure; /* optimized version of sk_under_memory_pressure() for TCP sockets */ static inline bool tcp_under_memory_pressure(const struct sock *sk) { - if (mem_cgroup_sockets_enabled && sk->sk_memcg && + if (mem_cgroup_sk_enabled(sk) && mem_cgroup_under_socket_pressure(sk->sk_memcg)) return true; -- cgit v1.2.3 From bb178c6bc08525d758a57775458d644304011bf8 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 15 Aug 2025 20:16:16 +0000 Subject: net-memcg: Pass struct sock to mem_cgroup_sk_(un)?charge(). We will store a flag in the lowest bit of sk->sk_memcg. Then, we cannot pass the raw pointer to mem_cgroup_charge_skmem() and mem_cgroup_uncharge_skmem(). Let's pass struct sock to the functions. While at it, they are renamed to match other functions starting with mem_cgroup_sk_. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Acked-by: Roman Gushchin Acked-by: Shakeel Butt Link: https://patch.msgid.link/20250815201712.1745332-9-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/linux/memcontrol.h | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 25921fbec685..0837d3de3a68 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1596,15 +1596,16 @@ static inline void mem_cgroup_flush_foreign(struct bdi_writeback *wb) #endif /* CONFIG_CGROUP_WRITEBACK */ struct sock; -bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages, - gfp_t gfp_mask); -void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); #ifdef CONFIG_MEMCG extern struct static_key_false memcg_sockets_enabled_key; #define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key) + void mem_cgroup_sk_alloc(struct sock *sk); void mem_cgroup_sk_free(struct sock *sk); void mem_cgroup_sk_inherit(const struct sock *sk, struct sock *newsk); +bool mem_cgroup_sk_charge(const struct sock *sk, unsigned int nr_pages, + gfp_t gfp_mask); +void mem_cgroup_sk_uncharge(const struct sock *sk, unsigned int nr_pages); #if BITS_PER_LONG < 64 static inline void mem_cgroup_set_socket_pressure(struct mem_cgroup *memcg) @@ -1660,13 +1661,31 @@ void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id); void reparent_shrinker_deferred(struct mem_cgroup *memcg); #else #define mem_cgroup_sockets_enabled 0 -static inline void mem_cgroup_sk_alloc(struct sock *sk) { }; -static inline void mem_cgroup_sk_free(struct sock *sk) { }; + +static inline void mem_cgroup_sk_alloc(struct sock *sk) +{ +} + +static inline void mem_cgroup_sk_free(struct sock *sk) +{ +} static inline void mem_cgroup_sk_inherit(const struct sock *sk, struct sock *newsk) { } +static inline bool mem_cgroup_sk_charge(const struct sock *sk, + unsigned int nr_pages, + gfp_t gfp_mask) +{ + return false; +} + +static inline void mem_cgroup_sk_uncharge(const struct sock *sk, + unsigned int nr_pages) +{ +} + static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) { return false; -- cgit v1.2.3 From b2ffd10cddde47cc6830e4981e91e3215def62b1 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 15 Aug 2025 20:16:17 +0000 Subject: net-memcg: Pass struct sock to mem_cgroup_sk_under_memory_pressure(). We will store a flag in the lowest bit of sk->sk_memcg. Then, we cannot pass the raw pointer to mem_cgroup_under_socket_pressure(). Let's pass struct sock to it and rename the function to match other functions starting with mem_cgroup_sk_. Note that the helper is moved to sock.h to use mem_cgroup_from_sk(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Acked-by: Roman Gushchin Acked-by: Shakeel Butt Link: https://patch.msgid.link/20250815201712.1745332-10-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/linux/memcontrol.h | 18 ------------------ include/net/proto_memory.h | 2 +- include/net/sock.h | 22 ++++++++++++++++++++++ include/net/tcp.h | 2 +- 4 files changed, 24 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 0837d3de3a68..fb27e3d2fdac 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1642,19 +1642,6 @@ static inline u64 mem_cgroup_get_socket_pressure(struct mem_cgroup *memcg) } #endif -static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) -{ -#ifdef CONFIG_MEMCG_V1 - if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) - return !!memcg->tcpmem_pressure; -#endif /* CONFIG_MEMCG_V1 */ - do { - if (time_before64(get_jiffies_64(), mem_cgroup_get_socket_pressure(memcg))) - return true; - } while ((memcg = parent_mem_cgroup(memcg))); - return false; -} - int alloc_shrinker_info(struct mem_cgroup *memcg); void free_shrinker_info(struct mem_cgroup *memcg); void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id); @@ -1686,11 +1673,6 @@ static inline void mem_cgroup_sk_uncharge(const struct sock *sk, { } -static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) -{ - return false; -} - static inline void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id) { diff --git a/include/net/proto_memory.h b/include/net/proto_memory.h index 859e63de81c4..8e91a8fa31b5 100644 --- a/include/net/proto_memory.h +++ b/include/net/proto_memory.h @@ -32,7 +32,7 @@ static inline bool sk_under_memory_pressure(const struct sock *sk) return false; if (mem_cgroup_sk_enabled(sk) && - mem_cgroup_under_socket_pressure(sk->sk_memcg)) + mem_cgroup_sk_under_memory_pressure(sk)) return true; return !!READ_ONCE(*sk->sk_prot->memory_pressure); diff --git a/include/net/sock.h b/include/net/sock.h index 3efdf680401d..3bc4d566f7d0 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2604,6 +2604,23 @@ static inline bool mem_cgroup_sk_enabled(const struct sock *sk) { return mem_cgroup_sockets_enabled && mem_cgroup_from_sk(sk); } + +static inline bool mem_cgroup_sk_under_memory_pressure(const struct sock *sk) +{ + struct mem_cgroup *memcg = mem_cgroup_from_sk(sk); + +#ifdef CONFIG_MEMCG_V1 + if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) + return !!memcg->tcpmem_pressure; +#endif /* CONFIG_MEMCG_V1 */ + + do { + if (time_before64(get_jiffies_64(), mem_cgroup_get_socket_pressure(memcg))) + return true; + } while ((memcg = parent_mem_cgroup(memcg))); + + return false; +} #else static inline struct mem_cgroup *mem_cgroup_from_sk(const struct sock *sk) { @@ -2614,6 +2631,11 @@ static inline bool mem_cgroup_sk_enabled(const struct sock *sk) { return false; } + +static inline bool mem_cgroup_sk_under_memory_pressure(const struct sock *sk) +{ + return false; +} #endif static inline long sock_rcvtimeo(const struct sock *sk, bool noblock) diff --git a/include/net/tcp.h b/include/net/tcp.h index 9f01b6be6444..2936b8175950 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -276,7 +276,7 @@ extern unsigned long tcp_memory_pressure; static inline bool tcp_under_memory_pressure(const struct sock *sk) { if (mem_cgroup_sk_enabled(sk) && - mem_cgroup_under_socket_pressure(sk->sk_memcg)) + mem_cgroup_sk_under_memory_pressure(sk)) return true; return READ_ONCE(tcp_memory_pressure); -- cgit v1.2.3 From bf64002c94fc330b996bc438f3d1b6bd3d781659 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 15 Aug 2025 20:16:18 +0000 Subject: net: Define sk_memcg under CONFIG_MEMCG. Except for sk_clone_lock(), all accesses to sk->sk_memcg is done under CONFIG_MEMCG. As a bonus, let's define sk->sk_memcg under CONFIG_MEMCG. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Acked-by: Roman Gushchin Acked-by: Shakeel Butt Link: https://patch.msgid.link/20250815201712.1745332-11-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 3bc4d566f7d0..1c49ea13af4a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -443,7 +443,9 @@ struct sock { __cacheline_group_begin(sock_read_rxtx); int sk_err; struct socket *sk_socket; +#ifdef CONFIG_MEMCG struct mem_cgroup *sk_memcg; +#endif #ifdef CONFIG_XFRM struct xfrm_policy __rcu *sk_policy[2]; #endif -- cgit v1.2.3 From b5940feda3dc7a12133c6589e463d2b3b6c7fe96 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 19 Aug 2025 08:43:49 -0700 Subject: scsi: ufs: core: Reduce the size of struct ufshcd_lrb The size of the data structures that are used in the hot path matters for performance (IOPS). Hence this patch that reduces the size of struct ufshcd_lrb on 64-bit systems by 16 bytes. The size of this data structure is reduced from 152 to 136 bytes. Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20250819154356.2256952-1-bvanassche@acm.org Reviewed-by: Peter Wang Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index 1d3943777584..30ff169878dc 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -167,13 +167,13 @@ struct ufs_pm_lvl_states { * @task_tag: Task tag of the command * @lun: LUN of the command * @intr_cmd: Interrupt command (doesn't participate in interrupt aggregation) + * @req_abort_skip: skip request abort task flag * @issue_time_stamp: time stamp for debug purposes (CLOCK_MONOTONIC) * @issue_time_stamp_local_clock: time stamp for debug purposes (local_clock) * @compl_time_stamp: time stamp for statistics (CLOCK_MONOTONIC) * @compl_time_stamp_local_clock: time stamp for debug purposes (local_clock) * @crypto_key_slot: the key slot to use for inline crypto (-1 if none) * @data_unit_num: the data unit number for the first block for inline crypto - * @req_abort_skip: skip request abort task flag */ struct ufshcd_lrb { struct utp_transfer_req_desc *utr_descriptor_ptr; @@ -193,6 +193,7 @@ struct ufshcd_lrb { int task_tag; u8 lun; /* UPIU LUN id field is only 8-bit wide */ bool intr_cmd; + bool req_abort_skip; ktime_t issue_time_stamp; u64 issue_time_stamp_local_clock; ktime_t compl_time_stamp; @@ -201,8 +202,6 @@ struct ufshcd_lrb { int crypto_key_slot; u64 data_unit_num; #endif - - bool req_abort_skip; }; /** -- cgit v1.2.3 From bf40785fa437c1752117df2edb3220e9c37d98a6 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 18 Aug 2025 13:54:24 -0700 Subject: sctp: Use HMAC-SHA1 and HMAC-SHA256 library for chunk authentication For SCTP chunk authentication, use the HMAC-SHA1 and HMAC-SHA256 library functions instead of crypto_shash. This is simpler and faster. There's no longer any need to pre-allocate 'crypto_shash' objects; the SCTP code now simply calls into the HMAC code directly. As part of this, make SCTP always support both HMAC-SHA1 and HMAC-SHA256. Previously, it only guaranteed support for HMAC-SHA1. However, HMAC-SHA256 tended to be supported too anyway, as it was supported if CONFIG_CRYPTO_SHA256 was enabled elsewhere in the kconfig. Acked-by: Xin Long Signed-off-by: Eric Biggers Link: https://patch.msgid.link/20250818205426.30222-4-ebiggers@kernel.org Signed-off-by: Jakub Kicinski --- include/net/sctp/auth.h | 17 ++++++----------- include/net/sctp/constants.h | 4 ---- include/net/sctp/structs.h | 5 ----- 3 files changed, 6 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/net/sctp/auth.h b/include/net/sctp/auth.h index d4b3b2dcd15b..3d5879e08e78 100644 --- a/include/net/sctp/auth.h +++ b/include/net/sctp/auth.h @@ -22,16 +22,11 @@ struct sctp_endpoint; struct sctp_association; struct sctp_authkey; struct sctp_hmacalgo; -struct crypto_shash; -/* - * Define a generic struct that will hold all the info - * necessary for an HMAC transform - */ +/* Defines an HMAC algorithm supported by SCTP chunk authentication */ struct sctp_hmac { - __u16 hmac_id; /* one of the above ids */ - char *hmac_name; /* name for loading */ - __u16 hmac_len; /* length of the signature */ + __u16 hmac_id; /* one of SCTP_AUTH_HMAC_ID_* */ + __u16 hmac_len; /* length of the HMAC value in bytes */ }; /* This is generic structure that containst authentication bytes used @@ -78,9 +73,9 @@ int sctp_auth_asoc_copy_shkeys(const struct sctp_endpoint *ep, struct sctp_association *asoc, gfp_t gfp); int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp); -void sctp_auth_destroy_hmacs(struct crypto_shash *auth_hmacs[]); -struct sctp_hmac *sctp_auth_get_hmac(__u16 hmac_id); -struct sctp_hmac *sctp_auth_asoc_get_hmac(const struct sctp_association *asoc); +const struct sctp_hmac *sctp_auth_get_hmac(__u16 hmac_id); +const struct sctp_hmac * +sctp_auth_asoc_get_hmac(const struct sctp_association *asoc); void sctp_auth_asoc_set_default_hmac(struct sctp_association *asoc, struct sctp_hmac_algo_param *hmacs); int sctp_auth_asoc_verify_hmac_id(const struct sctp_association *asoc, diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 5859e0a16a58..8e0f4c4f7750 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -417,16 +417,12 @@ enum { SCTP_AUTH_HMAC_ID_RESERVED_0, SCTP_AUTH_HMAC_ID_SHA1, SCTP_AUTH_HMAC_ID_RESERVED_2, -#if defined (CONFIG_CRYPTO_SHA256) || defined (CONFIG_CRYPTO_SHA256_MODULE) SCTP_AUTH_HMAC_ID_SHA256, -#endif __SCTP_AUTH_HMAC_MAX }; #define SCTP_AUTH_HMAC_ID_MAX __SCTP_AUTH_HMAC_MAX - 1 #define SCTP_AUTH_NUM_HMACS __SCTP_AUTH_HMAC_MAX -#define SCTP_SHA1_SIG_SIZE 20 -#define SCTP_SHA256_SIG_SIZE 32 /* SCTP-AUTH, Section 3.2 * The chunk types for INIT, INIT-ACK, SHUTDOWN-COMPLETE and AUTH chunks diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 8a540ad9b509..6be6aec25731 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1329,11 +1329,6 @@ struct sctp_endpoint { /* rcvbuf acct. policy. */ __u32 rcvbuf_policy; - /* SCTP AUTH: array of the HMACs that will be allocated - * we need this per association so that we don't serialize - */ - struct crypto_shash **auth_hmacs; - /* SCTP-AUTH: hmacs for the endpoint encoded into parameter */ struct sctp_hmac_algo_param *auth_hmacs_list; -- cgit v1.2.3 From 2f3dd6ec901f29aef5fff3d7a63b1371d67c1760 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 18 Aug 2025 13:54:25 -0700 Subject: sctp: Convert cookie authentication to use HMAC-SHA256 Convert SCTP cookies to use HMAC-SHA256, instead of the previous choice of the legacy algorithms HMAC-MD5 and HMAC-SHA1. Simplify and optimize the code by using the HMAC-SHA256 library instead of crypto_shash, and by preparing the HMAC key when it is generated instead of per-operation. This doesn't break compatibility, since the cookie format is an implementation detail, not part of the SCTP protocol itself. Note that the cookie size doesn't change either. The HMAC field was already 32 bytes, even though previously at most 20 bytes were actually compared. 32 bytes exactly fits an untruncated HMAC-SHA256 value. So, although we could safely truncate the MAC to something slightly shorter, for now just keep the cookie size the same. I also considered SipHash, but that would generate only 8-byte MACs. An 8-byte MAC *might* suffice here. However, there's quite a lot of information in the SCTP cookies: more than in TCP SYN cookies. So absent an analysis that occasional forgeries of all that information is okay in SCTP, I errored on the side of caution. Remove HMAC-MD5 and HMAC-SHA1 as options, since the new HMAC-SHA256 option is just better. It's faster as well as more secure. For example, benchmarking on x86_64, cookie authentication is now nearly 3x as fast as the previous default choice and implementation of HMAC-MD5. Also just make the kernel always support cookie authentication if SCTP is supported at all, rather than making it optional in the build. (It was sort of optional before, but it didn't really work properly. E.g., a kernel with CONFIG_SCTP_COOKIE_HMAC_MD5=n still supported HMAC-MD5 cookie authentication if CONFIG_CRYPTO_HMAC and CONFIG_CRYPTO_MD5 happened to be enabled in the kconfig for other reasons.) Acked-by: Xin Long Signed-off-by: Eric Biggers Link: https://patch.msgid.link/20250818205426.30222-5-ebiggers@kernel.org Signed-off-by: Jakub Kicinski --- include/net/netns/sctp.h | 4 ++-- include/net/sctp/constants.h | 5 ++--- include/net/sctp/structs.h | 30 +++++++----------------------- 3 files changed, 11 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index d25cd7a9c5ff..c0f97f36389e 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -75,8 +75,8 @@ struct netns_sctp { /* Whether Cookie Preservative is enabled(1) or not(0) */ int cookie_preserve_enable; - /* The namespace default hmac alg */ - char *sctp_hmac_alg; + /* Whether cookie authentication is enabled(1) or not(0) */ + int cookie_auth_enable; /* Valid.Cookie.Life - 60 seconds */ unsigned int valid_cookie_life; diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 8e0f4c4f7750..ae3376ba0b99 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -296,9 +296,8 @@ enum { SCTP_MAX_GABS = 16 }; */ #define SCTP_DEFAULT_MINSEGMENT 512 /* MTU size ... if no mtu disc */ -#define SCTP_SECRET_SIZE 32 /* Number of octets in a 256 bits. */ - -#define SCTP_SIGNATURE_SIZE 20 /* size of a SLA-1 signature */ +#define SCTP_COOKIE_KEY_SIZE 32 /* size of cookie HMAC key */ +#define SCTP_COOKIE_MAC_SIZE 32 /* size of HMAC field in cookies */ #define SCTP_COOKIE_MULTIPLE 32 /* Pad out our cookie to make our hash * functions simpler to write. diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 6be6aec25731..2ae390219efd 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -32,6 +32,7 @@ #ifndef __sctp_structs_h__ #define __sctp_structs_h__ +#include #include #include #include @@ -68,7 +69,6 @@ struct sctp_outq; struct sctp_bind_addr; struct sctp_ulpq; struct sctp_ep_common; -struct crypto_shash; struct sctp_stream; @@ -155,10 +155,6 @@ struct sctp_sock { /* PF_ family specific functions. */ struct sctp_pf *pf; - /* Access to HMAC transform. */ - struct crypto_shash *hmac; - char *sctp_hmac_alg; - /* What is our base endpointer? */ struct sctp_endpoint *ep; @@ -227,7 +223,8 @@ struct sctp_sock { frag_interleave:1, recvrcvinfo:1, recvnxtinfo:1, - data_ready_signalled:1; + data_ready_signalled:1, + cookie_auth_enable:1; atomic_t pd_mode; @@ -335,7 +332,7 @@ struct sctp_cookie { /* The format of our cookie that we send to our peer. */ struct sctp_signed_cookie { - __u8 signature[SCTP_SECRET_SIZE]; + __u8 mac[SCTP_COOKIE_MAC_SIZE]; __u32 __pad; /* force sctp_cookie alignment to 64 bits */ struct sctp_cookie c; } __packed; @@ -1307,22 +1304,9 @@ struct sctp_endpoint { /* This is really a list of struct sctp_association entries. */ struct list_head asocs; - /* Secret Key: A secret key used by this endpoint to compute - * the MAC. This SHOULD be a cryptographic quality - * random number with a sufficient length. - * Discussion in [RFC1750] can be helpful in - * selection of the key. - */ - __u8 secret_key[SCTP_SECRET_SIZE]; - - /* digest: This is a digest of the sctp cookie. This field is - * only used on the receive path when we try to validate - * that the cookie has not been tampered with. We put - * this here so we pre-allocate this once and can re-use - * on every receive. - */ - __u8 *digest; - + /* Cookie authentication key used by this endpoint */ + struct hmac_sha256_key cookie_auth_key; + /* sendbuf acct. policy. */ __u32 sndbuf_policy; -- cgit v1.2.3 From 000a45dce7adc13e45b2925b383e39f32e5f3004 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 19 Aug 2025 21:50:55 +0530 Subject: drm/gpuvm: Pass map arguments through a struct We are about to pass more arguments to drm_gpuvm_sm_map[_ops_create](), so, before we do that, let's pass arguments through a struct instead of changing each call site every time a new optional argument is added. Cc: Danilo Krummrich Cc: Brendan King Cc: Matt Coster Cc: Boris Brezillon Cc: Caterina Shablia Cc: Rob Clark Cc: Matthew Brost Cc: Co-developed-by: Himal Prasad Ghimiray Signed-off-by: Himal Prasad Ghimiray Signed-off-by: Boris Brezillon Acked-by: Danilo Krummrich Reviewed-by: Matthew Brost Reviewed-by: Rob Clark Reviewed-by: Matt Coster # imagination/pvr_vm.c Acked-by: Matt Coster Signed-off-by: Matthew Brost Link: https://lore.kernel.org/r/20250819162058.2777306-2-himal.prasad.ghimiray@intel.com --- include/drm/drm_gpuvm.h | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h index 274532facfd6..a9fa44148e0c 100644 --- a/include/drm/drm_gpuvm.h +++ b/include/drm/drm_gpuvm.h @@ -1058,10 +1058,20 @@ struct drm_gpuva_ops { */ #define drm_gpuva_next_op(op) list_next_entry(op, entry) +/** + * struct drm_gpuvm_map_req - arguments passed to drm_gpuvm_sm_map[_ops_create]() + */ +struct drm_gpuvm_map_req { + /** + * @op_map: struct drm_gpuva_op_map + */ + struct drm_gpuva_op_map map; +}; + struct drm_gpuva_ops * drm_gpuvm_sm_map_ops_create(struct drm_gpuvm *gpuvm, - u64 addr, u64 range, - struct drm_gem_object *obj, u64 offset); + const struct drm_gpuvm_map_req *req); + struct drm_gpuva_ops * drm_gpuvm_sm_unmap_ops_create(struct drm_gpuvm *gpuvm, u64 addr, u64 range); @@ -1205,16 +1215,14 @@ struct drm_gpuvm_ops { }; int drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm, void *priv, - u64 addr, u64 range, - struct drm_gem_object *obj, u64 offset); + const struct drm_gpuvm_map_req *req); int drm_gpuvm_sm_unmap(struct drm_gpuvm *gpuvm, void *priv, u64 addr, u64 range); int drm_gpuvm_sm_map_exec_lock(struct drm_gpuvm *gpuvm, struct drm_exec *exec, unsigned int num_fences, - u64 req_addr, u64 req_range, - struct drm_gem_object *obj, u64 offset); + struct drm_gpuvm_map_req *req); int drm_gpuvm_sm_unmap_exec_lock(struct drm_gpuvm *gpuvm, struct drm_exec *exec, u64 req_addr, u64 req_range); -- cgit v1.2.3 From 3309323241fbb3c1da885e6b84bdf95e9708e4bb Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 19 Aug 2025 21:50:56 +0530 Subject: drm/gpuvm: Kill drm_gpuva_init() drm_gpuva_init() only has one internal user, and given we are about to add new optional fields, it only add maintenance burden for no real benefit, so let's kill the thing now. Cc: Danilo Krummrich Cc: Rob Clark Signed-off-by: Boris Brezillon Acked-by: Danilo Krummrich Reviewed-by: Matthew Brost Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Rob Clark Signed-off-by: Matthew Brost Link: https://lore.kernel.org/r/20250819162058.2777306-3-himal.prasad.ghimiray@intel.com --- include/drm/drm_gpuvm.h | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h index a9fa44148e0c..05347ac6cc73 100644 --- a/include/drm/drm_gpuvm.h +++ b/include/drm/drm_gpuvm.h @@ -160,15 +160,6 @@ struct drm_gpuva *drm_gpuva_find_first(struct drm_gpuvm *gpuvm, struct drm_gpuva *drm_gpuva_find_prev(struct drm_gpuvm *gpuvm, u64 start); struct drm_gpuva *drm_gpuva_find_next(struct drm_gpuvm *gpuvm, u64 end); -static inline void drm_gpuva_init(struct drm_gpuva *va, u64 addr, u64 range, - struct drm_gem_object *obj, u64 offset) -{ - va->va.addr = addr; - va->va.range = range; - va->gem.obj = obj; - va->gem.offset = offset; -} - /** * drm_gpuva_invalidate() - sets whether the backing GEM of this &drm_gpuva is * invalidated @@ -1089,8 +1080,10 @@ void drm_gpuva_ops_free(struct drm_gpuvm *gpuvm, static inline void drm_gpuva_init_from_op(struct drm_gpuva *va, struct drm_gpuva_op_map *op) { - drm_gpuva_init(va, op->va.addr, op->va.range, - op->gem.obj, op->gem.offset); + va->va.addr = op->va.addr; + va->va.range = op->va.range; + va->gem.obj = op->gem.obj; + va->gem.offset = op->gem.offset; } /** -- cgit v1.2.3 From baf1638c095686ad970aecee4ca9446c1de18dad Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Tue, 19 Aug 2025 21:50:57 +0530 Subject: drm/gpuvm: Introduce drm_gpuvm_madvise_ops_create This ops is used to iterate over GPUVA's in the user-provided range and split the existing sparse VMA's if the start or end of the input range lies within it. The operations can create up to 2 REMAPS and 2 MAPs. The primary use case is for drivers to assign attributes to GPU VAs in the specified range without performing unmaps or merging mappings, supporting fine-grained control over sparse va's. Cc: Danilo Krummrich Cc: Matthew Brost Cc: Boris Brezillon Cc: Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Matthew Brost Acked-by: Danilo Krummrich Signed-off-by: Matthew Brost Link: https://lore.kernel.org/r/20250819162058.2777306-4-himal.prasad.ghimiray@intel.com --- include/drm/drm_gpuvm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h index 05347ac6cc73..4a22b9d848f7 100644 --- a/include/drm/drm_gpuvm.h +++ b/include/drm/drm_gpuvm.h @@ -1062,6 +1062,9 @@ struct drm_gpuvm_map_req { struct drm_gpuva_ops * drm_gpuvm_sm_map_ops_create(struct drm_gpuvm *gpuvm, const struct drm_gpuvm_map_req *req); +struct drm_gpuva_ops * +drm_gpuvm_madvise_ops_create(struct drm_gpuvm *gpuvm, + const struct drm_gpuvm_map_req *req); struct drm_gpuva_ops * drm_gpuvm_sm_unmap_ops_create(struct drm_gpuvm *gpuvm, -- cgit v1.2.3 From dab74906423c5a0e41dfd4cefc3758d351ccc51e Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Tue, 19 Aug 2025 21:50:58 +0530 Subject: drm/gpusvm: Make drm_gpusvm_for_each_* macros public The drm_gpusvm_for_each_notifier, drm_gpusvm_for_each_notifier_safe and drm_gpusvm_for_each_range_safe macros are useful for locating notifiers and ranges within a user-specified range. By making these macros public, we enable broader access and utility for developers who need to leverage them in their implementations. v2 (Matthew Brost) - drop inline __drm_gpusvm_range_find - /s/notifier_iter_first/drm_gpusvm_notifier_find Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://lore.kernel.org/r/20250819162058.2777306-5-himal.prasad.ghimiray@intel.com --- include/drm/drm_gpusvm.h | 70 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) (limited to 'include') diff --git a/include/drm/drm_gpusvm.h b/include/drm/drm_gpusvm.h index 4aedc5423aff..142fc2af1716 100644 --- a/include/drm/drm_gpusvm.h +++ b/include/drm/drm_gpusvm.h @@ -282,6 +282,10 @@ void drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm, bool drm_gpusvm_has_mapping(struct drm_gpusvm *gpusvm, unsigned long start, unsigned long end); +struct drm_gpusvm_notifier * +drm_gpusvm_notifier_find(struct drm_gpusvm *gpusvm, unsigned long start, + unsigned long end); + struct drm_gpusvm_range * drm_gpusvm_range_find(struct drm_gpusvm_notifier *notifier, unsigned long start, unsigned long end); @@ -434,4 +438,70 @@ __drm_gpusvm_range_next(struct drm_gpusvm_range *range) (range__) && (drm_gpusvm_range_start(range__) < (end__)); \ (range__) = __drm_gpusvm_range_next(range__)) +/** + * drm_gpusvm_for_each_range_safe() - Safely iterate over GPU SVM ranges in a notifier + * @range__: Iterator variable for the ranges + * @next__: Iterator variable for the ranges temporay storage + * @notifier__: Pointer to the GPU SVM notifier + * @start__: Start address of the range + * @end__: End address of the range + * + * This macro is used to iterate over GPU SVM ranges in a notifier while + * removing ranges from it. + */ +#define drm_gpusvm_for_each_range_safe(range__, next__, notifier__, start__, end__) \ + for ((range__) = drm_gpusvm_range_find((notifier__), (start__), (end__)), \ + (next__) = __drm_gpusvm_range_next(range__); \ + (range__) && (drm_gpusvm_range_start(range__) < (end__)); \ + (range__) = (next__), (next__) = __drm_gpusvm_range_next(range__)) + +/** + * __drm_gpusvm_notifier_next() - get the next drm_gpusvm_notifier in the list + * @notifier: a pointer to the current drm_gpusvm_notifier + * + * Return: A pointer to the next drm_gpusvm_notifier if available, or NULL if + * the current notifier is the last one or if the input notifier is + * NULL. + */ +static inline struct drm_gpusvm_notifier * +__drm_gpusvm_notifier_next(struct drm_gpusvm_notifier *notifier) +{ + if (notifier && !list_is_last(¬ifier->entry, + ¬ifier->gpusvm->notifier_list)) + return list_next_entry(notifier, entry); + + return NULL; +} + +/** + * drm_gpusvm_for_each_notifier() - Iterate over GPU SVM notifiers in a gpusvm + * @notifier__: Iterator variable for the notifiers + * @gpusvm__: Pointer to the GPU SVM notifier + * @start__: Start address of the notifier + * @end__: End address of the notifier + * + * This macro is used to iterate over GPU SVM notifiers in a gpusvm. + */ +#define drm_gpusvm_for_each_notifier(notifier__, gpusvm__, start__, end__) \ + for ((notifier__) = drm_gpusvm_notifier_find((gpusvm__), (start__), (end__)); \ + (notifier__) && (drm_gpusvm_notifier_start(notifier__) < (end__)); \ + (notifier__) = __drm_gpusvm_notifier_next(notifier__)) + +/** + * drm_gpusvm_for_each_notifier_safe() - Safely iterate over GPU SVM notifiers in a gpusvm + * @notifier__: Iterator variable for the notifiers + * @next__: Iterator variable for the notifiers temporay storage + * @gpusvm__: Pointer to the GPU SVM notifier + * @start__: Start address of the notifier + * @end__: End address of the notifier + * + * This macro is used to iterate over GPU SVM notifiers in a gpusvm while + * removing notifiers from it. + */ +#define drm_gpusvm_for_each_notifier_safe(notifier__, next__, gpusvm__, start__, end__) \ + for ((notifier__) = drm_gpusvm_notifier_find((gpusvm__), (start__), (end__)), \ + (next__) = __drm_gpusvm_notifier_next(notifier__); \ + (notifier__) && (drm_gpusvm_notifier_start(notifier__) < (end__)); \ + (notifier__) = (next__), (next__) = __drm_gpusvm_notifier_next(notifier__)) + #endif /* __DRM_GPUSVM_H__ */ -- cgit v1.2.3 From d072148a8631f102de60ed5a3a827e85d09d24f0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Aug 2025 10:25:00 +0200 Subject: fs: add a FMODE_ flag to indicate IOCB_HAS_METADATA availability Currently the kernel will happily route io_uring requests with metadata to file operations that don't support it. Add a FMODE_ flag to guard that. Fixes: 4de2ce04c862 ("fs: introduce IOCB_HAS_METADATA for metadata") Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/20250819082517.2038819-2-hch@lst.de Signed-off-by: Christian Brauner --- include/linux/fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index d7ab4f96d705..601d036a6c78 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -149,7 +149,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* Expect random access pattern */ #define FMODE_RANDOM ((__force fmode_t)(1 << 12)) -/* FMODE_* bit 13 */ +/* Supports IOCB_HAS_METADATA */ +#define FMODE_HAS_METADATA ((__force fmode_t)(1 << 13)) /* File is opened with O_PATH; almost nothing can be done with it */ #define FMODE_PATH ((__force fmode_t)(1 << 14)) -- cgit v1.2.3 From 8151320c747efb22d30b035af989fed0d502176e Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Tue, 22 Jul 2025 22:32:33 +0800 Subject: ACPI: pfr_update: Fix the driver update version check The security-version-number check should be used rather than the runtime version check for driver updates. Otherwise, the firmware update would fail when the update binary had a lower runtime version number than the current one. Fixes: 0db89fa243e5 ("ACPI: Introduce Platform Firmware Runtime Update device driver") Cc: 5.17+ # 5.17+ Reported-by: "Govindarajulu, Hariganesh" Signed-off-by: Chen Yu Link: https://patch.msgid.link/20250722143233.3970607-1-yu.c.chen@intel.com [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- include/uapi/linux/pfrut.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/pfrut.h b/include/uapi/linux/pfrut.h index 42fa15f8310d..b77d5c210c26 100644 --- a/include/uapi/linux/pfrut.h +++ b/include/uapi/linux/pfrut.h @@ -89,6 +89,7 @@ struct pfru_payload_hdr { __u32 hw_ver; __u32 rt_ver; __u8 platform_id[16]; + __u32 svn_ver; }; enum pfru_dsm_status { -- cgit v1.2.3 From 8ef7f3132e4005a103b382e71abea7ad01fbeb86 Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Wed, 20 Aug 2025 22:23:44 +0800 Subject: LoongArch: Add cpuhotplug hooks to fix high cpu usage of vCPU threads When the CPU is offline, the timer of LoongArch is not correctly closed. This is harmless for real machines, but resulting in an excessively high cpu usage rate of the offline vCPU thread in the virtual machines. To correctly close the timer, we have made the following modifications: Register the cpu hotplug event (CPUHP_AP_LOONGARCH_ARCH_TIMER_STARTING) for LoongArch. This event's hooks will be called to close the timer when the CPU is offline. Clear the timer interrupt when the timer is turned off. Since before the timer is turned off, there may be a timer interrupt that has already been in the pending state due to the interruption of the disabled, which also affects the halt state of the offline vCPU. Signed-off-by: Xianglai Li Signed-off-by: Huacai Chen --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index edfa61d80702..62cd7b35a29c 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -168,6 +168,7 @@ enum cpuhp_state { CPUHP_AP_QCOM_TIMER_STARTING, CPUHP_AP_TEGRA_TIMER_STARTING, CPUHP_AP_ARMADA_TIMER_STARTING, + CPUHP_AP_LOONGARCH_ARCH_TIMER_STARTING, CPUHP_AP_MIPS_GIC_TIMER_STARTING, CPUHP_AP_ARC_TIMER_STARTING, CPUHP_AP_REALTEK_TIMER_STARTING, -- cgit v1.2.3 From f135fb24ef29335b94921077588cae445bc7f099 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Wed, 20 Aug 2025 15:22:00 +0100 Subject: ASoC: cs35l56: Update Firmware Addresses for CS35L63 for production silicon Production silicon for CS36L63 has some small differences compared to pre-production silicon. Update firmware addresses, which are different. No product was ever released with pre-production silicon so there is no need for the driver to include support for it. Fixes: 978858791ced ("ASoC: cs35l56: Add initial support for CS35L63 for I2C and SoundWire") Signed-off-by: Stefan Binding Link: https://patch.msgid.link/20250820142209.127575-2-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/cs35l56.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index e17c4cadd04d..f44aabde805e 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -107,8 +107,8 @@ #define CS35L56_DSP1_PMEM_5114 0x3804FE8 #define CS35L63_DSP1_FW_VER CS35L56_DSP1_FW_VER -#define CS35L63_DSP1_HALO_STATE 0x280396C -#define CS35L63_DSP1_PM_CUR_STATE 0x28042C8 +#define CS35L63_DSP1_HALO_STATE 0x2803C04 +#define CS35L63_DSP1_PM_CUR_STATE 0x2804518 #define CS35L63_PROTECTION_STATUS 0x340009C #define CS35L63_TRANSDUCER_ACTUAL_PS 0x34000F4 #define CS35L63_MAIN_RENDER_USER_MUTE 0x3400020 -- cgit v1.2.3 From 8dadc11b67d4b83deff45e4889b3b5540b9c0a7f Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Wed, 20 Aug 2025 15:22:01 +0100 Subject: ASoC: cs35l56: Handle new algorithms IDs for CS35L63 CS35L63 uses different algorithm IDs from CS35L56. Add a new mechanism to handle different alg IDs between parts in the CS35L56 driver. Fixes: 978858791ced ("ASoC: cs35l56: Add initial support for CS35L63 for I2C and SoundWire") Signed-off-by: Richard Fitzgerald Signed-off-by: Stefan Binding Link: https://patch.msgid.link/20250820142209.127575-3-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/cs35l56.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index f44aabde805e..7c8bbe8ad1e2 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -306,6 +306,7 @@ struct cs35l56_base { struct gpio_desc *reset_gpio; struct cs35l56_spi_payload *spi_payload_buf; const struct cs35l56_fw_reg *fw_reg; + const struct cirrus_amp_cal_controls *calibration_controls; }; static inline bool cs35l56_is_otp_register(unsigned int reg) -- cgit v1.2.3 From 407a2fab3c99c40ad1acedaf028e8222da1f0433 Mon Sep 17 00:00:00 2001 From: Hsin-Yi Wang Date: Tue, 12 Aug 2025 16:17:58 +0800 Subject: drm_bridge: register content protect property Some bridges can update HDCP status based on userspace requests if they support HDCP. The HDCP property is created after connector initialization and before registration, just like other connector properties. Add the content protection property to the connector if a bridge supports HDCP. Signed-off-by: Hsin-Yi Wang Reviewed-by: Sean Paul Signed-off-by: Fei Shao Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20250812082135.3351172-2-fshao@chromium.org --- include/drm/drm_bridge.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index 2ec1b136d603..8d9d4fd078e7 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -1171,6 +1171,10 @@ struct drm_bridge { * before the peripheral. */ bool pre_enable_prev_first; + /** + * @support_hdcp: Indicate that the bridge supports HDCP. + */ + bool support_hdcp; /** * @ddc: Associated I2C adapter for DDC access, if any. */ -- cgit v1.2.3 From 8fc6056dcf79937c46c97fa4996cda65956437a9 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 7 Aug 2025 10:44:31 +0800 Subject: f2fs: fix to detect potential corrupted nid in free_nid_list As reported, on-disk footer.ino and footer.nid is the same and out-of-range, let's add sanity check on f2fs_alloc_nid() to detect any potential corruption in free_nid_list. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 2f8b8bfc0e73..6afb4a13b81d 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -79,6 +79,7 @@ enum stop_cp_reason { STOP_CP_REASON_FLUSH_FAIL, STOP_CP_REASON_NO_SEGMENT, STOP_CP_REASON_CORRUPTED_FREE_BITMAP, + STOP_CP_REASON_CORRUPTED_NID, STOP_CP_REASON_MAX, }; -- cgit v1.2.3 From 5f8a4f34f6dcf4b64c3cbcfd4aa5a8d7dbcd268d Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 19 Aug 2025 09:39:15 -0700 Subject: bnxt_en: hsi: Update FW interface to 1.10.3.133 The major change is struct pcie_ctx_hw_stats_v2 which has new latency histograms added. Signed-off-by: Michael Chan Link: https://patch.msgid.link/20250819163919.104075-2-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- include/linux/bnxt/hsi.h | 315 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 253 insertions(+), 62 deletions(-) (limited to 'include') diff --git a/include/linux/bnxt/hsi.h b/include/linux/bnxt/hsi.h index 549231703bce..8c5dac3b3ef3 100644 --- a/include/linux/bnxt/hsi.h +++ b/include/linux/bnxt/hsi.h @@ -276,6 +276,10 @@ struct cmd_nums { #define HWRM_REG_POWER_QUERY 0xe1UL #define HWRM_CORE_FREQUENCY_QUERY 0xe2UL #define HWRM_REG_POWER_HISTOGRAM 0xe3UL + #define HWRM_MONITOR_PAX_HISTOGRAM_START 0xe4UL + #define HWRM_MONITOR_PAX_HISTOGRAM_COLLECT 0xe5UL + #define HWRM_STAT_QUERY_ROCE_STATS 0xe6UL + #define HWRM_STAT_QUERY_ROCE_STATS_EXT 0xe7UL #define HWRM_WOL_FILTER_ALLOC 0xf0UL #define HWRM_WOL_FILTER_FREE 0xf1UL #define HWRM_WOL_FILTER_QCFG 0xf2UL @@ -407,9 +411,8 @@ struct cmd_nums { #define HWRM_FUNC_LAG_UPDATE 0x1b1UL #define HWRM_FUNC_LAG_FREE 0x1b2UL #define HWRM_FUNC_LAG_QCFG 0x1b3UL - #define HWRM_FUNC_TIMEDTX_PACING_RATE_ADD 0x1c2UL - #define HWRM_FUNC_TIMEDTX_PACING_RATE_DELETE 0x1c3UL - #define HWRM_FUNC_TIMEDTX_PACING_RATE_QUERY 0x1c4UL + #define HWRM_FUNC_TTX_PACING_RATE_PROF_QUERY 0x1c3UL + #define HWRM_FUNC_TTX_PACING_RATE_QUERY 0x1c4UL #define HWRM_SELFTEST_QLIST 0x200UL #define HWRM_SELFTEST_EXEC 0x201UL #define HWRM_SELFTEST_IRQ 0x202UL @@ -441,6 +444,7 @@ struct cmd_nums { #define HWRM_MFG_WRITE_CERT_NVM 0x21cUL #define HWRM_PORT_POE_CFG 0x230UL #define HWRM_PORT_POE_QCFG 0x231UL + #define HWRM_PORT_PHY_FDRSTAT 0x232UL #define HWRM_UDCC_QCAPS 0x258UL #define HWRM_UDCC_CFG 0x259UL #define HWRM_UDCC_QCFG 0x25aUL @@ -453,6 +457,8 @@ struct cmd_nums { #define HWRM_QUEUE_PFCWD_TIMEOUT_QCAPS 0x261UL #define HWRM_QUEUE_PFCWD_TIMEOUT_CFG 0x262UL #define HWRM_QUEUE_PFCWD_TIMEOUT_QCFG 0x263UL + #define HWRM_QUEUE_ADPTV_QOS_RX_QCFG 0x264UL + #define HWRM_QUEUE_ADPTV_QOS_TX_QCFG 0x265UL #define HWRM_TF 0x2bcUL #define HWRM_TF_VERSION_GET 0x2bdUL #define HWRM_TF_SESSION_OPEN 0x2c6UL @@ -551,6 +557,8 @@ struct cmd_nums { #define HWRM_DBG_COREDUMP_CAPTURE 0xff2cUL #define HWRM_DBG_PTRACE 0xff2dUL #define HWRM_DBG_SIM_CABLE_STATE 0xff2eUL + #define HWRM_DBG_TOKEN_QUERY_AUTH_IDS 0xff2fUL + #define HWRM_DBG_TOKEN_CFG 0xff30UL #define HWRM_NVM_GET_VPD_FIELD_INFO 0xffeaUL #define HWRM_NVM_SET_VPD_FIELD_INFO 0xffebUL #define HWRM_NVM_DEFRAG 0xffecUL @@ -632,8 +640,8 @@ struct hwrm_err_output { #define HWRM_VERSION_MAJOR 1 #define HWRM_VERSION_MINOR 10 #define HWRM_VERSION_UPDATE 3 -#define HWRM_VERSION_RSVD 97 -#define HWRM_VERSION_STR "1.10.3.97" +#define HWRM_VERSION_RSVD 133 +#define HWRM_VERSION_STR "1.10.3.133" /* hwrm_ver_get_input (size:192b/24B) */ struct hwrm_ver_get_input { @@ -688,6 +696,7 @@ struct hwrm_ver_get_output { #define VER_GET_RESP_DEV_CAPS_CFG_CFA_TRUFLOW_SUPPORTED 0x4000UL #define VER_GET_RESP_DEV_CAPS_CFG_SECURE_BOOT_CAPABLE 0x8000UL #define VER_GET_RESP_DEV_CAPS_CFG_SECURE_SOC_CAPABLE 0x10000UL + #define VER_GET_RESP_DEV_CAPS_CFG_DEBUG_TOKEN_SUPPORTED 0x20000UL u8 roce_fw_maj_8b; u8 roce_fw_min_8b; u8 roce_fw_bld_8b; @@ -872,7 +881,8 @@ struct hwrm_async_event_cmpl { #define ASYNC_EVENT_CMPL_EVENT_ID_REPRESENTOR_PAIR_CHANGE 0x4eUL #define ASYNC_EVENT_CMPL_EVENT_ID_VF_STAT_CHANGE 0x4fUL #define ASYNC_EVENT_CMPL_EVENT_ID_HOST_COREDUMP 0x50UL - #define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID 0x51UL + #define ASYNC_EVENT_CMPL_EVENT_ID_ADPTV_QOS 0x51UL + #define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID 0x52UL #define ASYNC_EVENT_CMPL_EVENT_ID_FW_TRACE_MSG 0xfeUL #define ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR 0xffUL #define ASYNC_EVENT_CMPL_EVENT_ID_LAST ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR @@ -1344,7 +1354,8 @@ struct hwrm_async_event_cmpl_dbg_buf_producer { #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_CA2_TRACE 0x9UL #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_RIGP1_TRACE 0xaUL #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_AFM_KONG_HWRM_TRACE 0xbUL - #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_LAST ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_AFM_KONG_HWRM_TRACE + #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_ERR_QPC_TRACE 0xcUL + #define ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_LAST ASYNC_EVENT_CMPL_DBG_BUF_PRODUCER_EVENT_DATA1_TYPE_ERR_QPC_TRACE }; /* hwrm_async_event_cmpl_hwrm_error (size:128b/16B) */ @@ -1401,7 +1412,11 @@ struct hwrm_async_event_cmpl_error_report_base { #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD 0x4UL #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_THERMAL_THRESHOLD 0x5UL #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DUAL_DATA_RATE_NOT_SUPPORTED 0x6UL - #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DUAL_DATA_RATE_NOT_SUPPORTED + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DUP_UDCC_SES 0x7UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DB_DROP 0x8UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_MD_TEMP 0x9UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_VNIC_ERR 0xaUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_VNIC_ERR }; /* hwrm_async_event_cmpl_error_report_pause_storm (size:128b/16B) */ @@ -1914,6 +1929,12 @@ struct hwrm_func_qcaps_output { #define FUNC_QCAPS_RESP_FLAGS_EXT3_RX_RATE_PROFILE_SEL_SUPPORTED 0x8UL #define FUNC_QCAPS_RESP_FLAGS_EXT3_BIDI_OPT_SUPPORTED 0x10UL #define FUNC_QCAPS_RESP_FLAGS_EXT3_MIRROR_ON_ROCE_SUPPORTED 0x20UL + #define FUNC_QCAPS_RESP_FLAGS_EXT3_ROCE_VF_DYN_ALLOC_SUPPORT 0x40UL + #define FUNC_QCAPS_RESP_FLAGS_EXT3_CHANGE_UDP_SRCPORT_SUPPORT 0x80UL + #define FUNC_QCAPS_RESP_FLAGS_EXT3_PCIE_COMPLIANCE_SUPPORTED 0x100UL + #define FUNC_QCAPS_RESP_FLAGS_EXT3_MULTI_L2_DB_SUPPORTED 0x200UL + #define FUNC_QCAPS_RESP_FLAGS_EXT3_PCIE_SECURE_ATS_SUPPORTED 0x400UL + #define FUNC_QCAPS_RESP_FLAGS_EXT3_MBUF_STATS_SUPPORTED 0x800UL __le16 max_roce_vfs; __le16 max_crypto_rx_flow_filters; u8 unused_3[3]; @@ -1931,7 +1952,7 @@ struct hwrm_func_qcfg_input { u8 unused_0[6]; }; -/* hwrm_func_qcfg_output (size:1344b/168B) */ +/* hwrm_func_qcfg_output (size:1408b/176B) */ struct hwrm_func_qcfg_output { __le16 error_code; __le16 req_type; @@ -2124,7 +2145,43 @@ struct hwrm_func_qcfg_output { #define FUNC_QCFG_RESP_XID_PARTITION_CFG_TX_CK 0x1UL #define FUNC_QCFG_RESP_XID_PARTITION_CFG_RX_CK 0x2UL __le16 mirror_vnic_id; - u8 unused_7[7]; + u8 max_link_width; + #define FUNC_QCFG_RESP_MAX_LINK_WIDTH_UNKNOWN 0x0UL + #define FUNC_QCFG_RESP_MAX_LINK_WIDTH_X1 0x1UL + #define FUNC_QCFG_RESP_MAX_LINK_WIDTH_X2 0x2UL + #define FUNC_QCFG_RESP_MAX_LINK_WIDTH_X4 0x4UL + #define FUNC_QCFG_RESP_MAX_LINK_WIDTH_X8 0x8UL + #define FUNC_QCFG_RESP_MAX_LINK_WIDTH_X16 0x10UL + #define FUNC_QCFG_RESP_MAX_LINK_WIDTH_LAST FUNC_QCFG_RESP_MAX_LINK_WIDTH_X16 + u8 max_link_speed; + #define FUNC_QCFG_RESP_MAX_LINK_SPEED_UNKNOWN 0x0UL + #define FUNC_QCFG_RESP_MAX_LINK_SPEED_G1 0x1UL + #define FUNC_QCFG_RESP_MAX_LINK_SPEED_G2 0x2UL + #define FUNC_QCFG_RESP_MAX_LINK_SPEED_G3 0x3UL + #define FUNC_QCFG_RESP_MAX_LINK_SPEED_G4 0x4UL + #define FUNC_QCFG_RESP_MAX_LINK_SPEED_G5 0x5UL + #define FUNC_QCFG_RESP_MAX_LINK_SPEED_LAST FUNC_QCFG_RESP_MAX_LINK_SPEED_G5 + u8 negotiated_link_width; + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_UNKNOWN 0x0UL + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_X1 0x1UL + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_X2 0x2UL + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_X4 0x4UL + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_X8 0x8UL + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_X16 0x10UL + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_LAST FUNC_QCFG_RESP_NEGOTIATED_LINK_WIDTH_X16 + u8 negotiated_link_speed; + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_UNKNOWN 0x0UL + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_G1 0x1UL + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_G2 0x2UL + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_G3 0x3UL + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_G4 0x4UL + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_G5 0x5UL + #define FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_LAST FUNC_QCFG_RESP_NEGOTIATED_LINK_SPEED_G5 + u8 unused_7[2]; + u8 pcie_compliance; + u8 unused_8; + __le16 l2_db_multi_page_size_kb; + u8 unused_9[5]; u8 valid; }; @@ -2322,6 +2379,7 @@ struct hwrm_func_cfg_input { #define FUNC_CFG_REQ_ENABLES2_ROCE_MAX_GID_PER_VF 0x200UL #define FUNC_CFG_REQ_ENABLES2_XID_PARTITION_CFG 0x400UL #define FUNC_CFG_REQ_ENABLES2_PHYSICAL_SLOT_NUMBER 0x800UL + #define FUNC_CFG_REQ_ENABLES2_PCIE_COMPLIANCE 0x1000UL u8 port_kdnet_mode; #define FUNC_CFG_REQ_PORT_KDNET_MODE_DISABLED 0x0UL #define FUNC_CFG_REQ_PORT_KDNET_MODE_ENABLED 0x1UL @@ -2353,7 +2411,8 @@ struct hwrm_func_cfg_input { __le16 xid_partition_cfg; #define FUNC_CFG_REQ_XID_PARTITION_CFG_TX_CK 0x1UL #define FUNC_CFG_REQ_XID_PARTITION_CFG_RX_CK 0x2UL - __le16 unused_2; + u8 pcie_compliance; + u8 unused_2; }; /* hwrm_func_cfg_output (size:128b/16B) */ @@ -2370,11 +2429,41 @@ struct hwrm_func_cfg_output { struct hwrm_func_cfg_cmd_err { u8 code; #define FUNC_CFG_CMD_ERR_CODE_UNKNOWN 0x0UL - #define FUNC_CFG_CMD_ERR_CODE_PARTITION_MIN_BW_RANGE 0x1UL - #define FUNC_CFG_CMD_ERR_CODE_PARTITION_MIN_MORE_THAN_MAX 0x2UL - #define FUNC_CFG_CMD_ERR_CODE_PARTITION_MIN_BW_UNSUPPORTED 0x3UL - #define FUNC_CFG_CMD_ERR_CODE_PARTITION_BW_PERCENT 0x4UL - #define FUNC_CFG_CMD_ERR_CODE_LAST FUNC_CFG_CMD_ERR_CODE_PARTITION_BW_PERCENT + #define FUNC_CFG_CMD_ERR_CODE_PARTITION_BW_OUT_OF_RANGE 0x1UL + #define FUNC_CFG_CMD_ERR_CODE_NPAR_PARTITION_DOWN_FAILED 0x2UL + #define FUNC_CFG_CMD_ERR_CODE_TPID_SET_DFLT_VLAN_NOT_SET 0x3UL + #define FUNC_CFG_CMD_ERR_CODE_RES_ARRAY_ALLOC_FAILED 0x4UL + #define FUNC_CFG_CMD_ERR_CODE_TX_RING_ASSET_TEST_FAILED 0x5UL + #define FUNC_CFG_CMD_ERR_CODE_TX_RING_RES_UPDATE_FAILED 0x6UL + #define FUNC_CFG_CMD_ERR_CODE_APPLY_MAX_BW_FAILED 0x7UL + #define FUNC_CFG_CMD_ERR_CODE_ENABLE_EVB_FAILED 0x8UL + #define FUNC_CFG_CMD_ERR_CODE_RSS_CTXT_ASSET_TEST_FAILED 0x9UL + #define FUNC_CFG_CMD_ERR_CODE_RSS_CTXT_RES_UPDATE_FAILED 0xaUL + #define FUNC_CFG_CMD_ERR_CODE_CMPL_RING_ASSET_TEST_FAILED 0xbUL + #define FUNC_CFG_CMD_ERR_CODE_CMPL_RING_RES_UPDATE_FAILED 0xcUL + #define FUNC_CFG_CMD_ERR_CODE_NQ_ASSET_TEST_FAILED 0xdUL + #define FUNC_CFG_CMD_ERR_CODE_NQ_RES_UPDATE_FAILED 0xeUL + #define FUNC_CFG_CMD_ERR_CODE_RX_RING_ASSET_TEST_FAILED 0xfUL + #define FUNC_CFG_CMD_ERR_CODE_RX_RING_RES_UPDATE_FAILED 0x10UL + #define FUNC_CFG_CMD_ERR_CODE_VNIC_ASSET_TEST_FAILED 0x11UL + #define FUNC_CFG_CMD_ERR_CODE_VNIC_RES_UPDATE_FAILED 0x12UL + #define FUNC_CFG_CMD_ERR_CODE_FAILED_TO_START_STATS_THREAD 0x13UL + #define FUNC_CFG_CMD_ERR_CODE_RDMA_SRIOV_DISABLED 0x14UL + #define FUNC_CFG_CMD_ERR_CODE_TX_KTLS_DISABLED 0x15UL + #define FUNC_CFG_CMD_ERR_CODE_TX_KTLS_ASSET_TEST_FAILED 0x16UL + #define FUNC_CFG_CMD_ERR_CODE_TX_KTLS_RES_UPDATE_FAILED 0x17UL + #define FUNC_CFG_CMD_ERR_CODE_RX_KTLS_DISABLED 0x18UL + #define FUNC_CFG_CMD_ERR_CODE_RX_KTLS_ASSET_TEST_FAILED 0x19UL + #define FUNC_CFG_CMD_ERR_CODE_RX_KTLS_RES_UPDATE_FAILED 0x1aUL + #define FUNC_CFG_CMD_ERR_CODE_TX_QUIC_DISABLED 0x1bUL + #define FUNC_CFG_CMD_ERR_CODE_TX_QUIC_ASSET_TEST_FAILED 0x1cUL + #define FUNC_CFG_CMD_ERR_CODE_TX_QUIC_RES_UPDATE_FAILED 0x1dUL + #define FUNC_CFG_CMD_ERR_CODE_RX_QUIC_DISABLED 0x1eUL + #define FUNC_CFG_CMD_ERR_CODE_RX_QUIC_ASSET_TEST_FAILED 0x1fUL + #define FUNC_CFG_CMD_ERR_CODE_RX_QUIC_RES_UPDATE_FAILED 0x20UL + #define FUNC_CFG_CMD_ERR_CODE_INVALID_KDNET_MODE 0x21UL + #define FUNC_CFG_CMD_ERR_CODE_SCHQ_CFG_FAIL 0x22UL + #define FUNC_CFG_CMD_ERR_CODE_LAST FUNC_CFG_CMD_ERR_CODE_SCHQ_CFG_FAIL u8 unused_0[7]; }; @@ -3780,6 +3869,7 @@ struct hwrm_func_backing_store_cfg_v2_input { #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_CA2_TRACE 0x28UL #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_RIGP1_TRACE 0x29UL #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_AFM_KONG_HWRM_TRACE 0x2aUL + #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_ERR_QPC_TRACE 0x2bUL #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_INVALID 0xffffUL #define FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_LAST FUNC_BACKING_STORE_CFG_V2_REQ_TYPE_INVALID __le16 instance; @@ -3865,6 +3955,7 @@ struct hwrm_func_backing_store_qcfg_v2_input { #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_CA2_TRACE 0x28UL #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_RIGP1_TRACE 0x29UL #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_AFM_KONG_HWRM_TRACE 0x2aUL + #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_ERR_QPC_TRACE 0x2bUL #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_INVALID 0xffffUL #define FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_LAST FUNC_BACKING_STORE_QCFG_V2_REQ_TYPE_INVALID __le16 instance; @@ -3904,6 +3995,7 @@ struct hwrm_func_backing_store_qcfg_v2_output { #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CA1_TRACE 0x27UL #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_CA2_TRACE 0x28UL #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_RIGP1_TRACE 0x29UL + #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_ERR_QPC_TRACE 0x2aUL #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_INVALID 0xffffUL #define FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_LAST FUNC_BACKING_STORE_QCFG_V2_RESP_TYPE_INVALID __le16 instance; @@ -4027,6 +4119,7 @@ struct hwrm_func_backing_store_qcaps_v2_input { #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CA2_TRACE 0x28UL #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RIGP1_TRACE 0x29UL #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_AFM_KONG_HWRM_TRACE 0x2aUL + #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_ERR_QPC_TRACE 0x2bUL #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_INVALID 0xffffUL #define FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_LAST FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_INVALID u8 rsvd[6]; @@ -4070,6 +4163,7 @@ struct hwrm_func_backing_store_qcaps_v2_output { #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_CA2_TRACE 0x28UL #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_RIGP1_TRACE 0x29UL #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_AFM_KONG_HWRM_TRACE 0x2aUL + #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_ERR_QPC_TRACE 0x2bUL #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_INVALID 0xffffUL #define FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_LAST FUNC_BACKING_STORE_QCAPS_V2_RESP_TYPE_INVALID __le16 entry_size; @@ -4216,6 +4310,10 @@ struct hwrm_port_phy_cfg_input { #define PORT_PHY_CFG_REQ_FLAGS_FEC_RS272_1XN_DISABLE 0x100000UL #define PORT_PHY_CFG_REQ_FLAGS_FEC_RS272_IEEE_ENABLE 0x200000UL #define PORT_PHY_CFG_REQ_FLAGS_FEC_RS272_IEEE_DISABLE 0x400000UL + #define PORT_PHY_CFG_REQ_FLAGS_LINK_TRAINING_ENABLE 0x800000UL + #define PORT_PHY_CFG_REQ_FLAGS_LINK_TRAINING_DISABLE 0x1000000UL + #define PORT_PHY_CFG_REQ_FLAGS_PRECODING_ENABLE 0x2000000UL + #define PORT_PHY_CFG_REQ_FLAGS_PRECODING_DISABLE 0x4000000UL __le32 enables; #define PORT_PHY_CFG_REQ_ENABLES_AUTO_MODE 0x1UL #define PORT_PHY_CFG_REQ_ENABLES_AUTO_DUPLEX 0x2UL @@ -4703,6 +4801,8 @@ struct hwrm_port_phy_qcfg_output { #define PORT_PHY_QCFG_RESP_OPTION_FLAGS_MEDIA_AUTO_DETECT 0x1UL #define PORT_PHY_QCFG_RESP_OPTION_FLAGS_SIGNAL_MODE_KNOWN 0x2UL #define PORT_PHY_QCFG_RESP_OPTION_FLAGS_SPEEDS2_SUPPORTED 0x4UL + #define PORT_PHY_QCFG_RESP_OPTION_FLAGS_LINK_TRAINING 0x8UL + #define PORT_PHY_QCFG_RESP_OPTION_FLAGS_PRECODING 0x10UL char phy_vendor_name[16]; char phy_vendor_partnumber[16]; __le16 support_pam4_speeds; @@ -4725,6 +4825,10 @@ struct hwrm_port_phy_qcfg_output { u8 link_down_reason; #define PORT_PHY_QCFG_RESP_LINK_DOWN_REASON_RF 0x1UL #define PORT_PHY_QCFG_RESP_LINK_DOWN_REASON_OTP_SPEED_VIOLATION 0x2UL + #define PORT_PHY_QCFG_RESP_LINK_DOWN_REASON_CABLE_REMOVED 0x4UL + #define PORT_PHY_QCFG_RESP_LINK_DOWN_REASON_MODULE_FAULT 0x8UL + #define PORT_PHY_QCFG_RESP_LINK_DOWN_REASON_BMC_REQUEST 0x10UL + #define PORT_PHY_QCFG_RESP_LINK_DOWN_REASON_TX_LASER_DISABLED 0x20UL __le16 support_speeds2; #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS2_1GB 0x1UL #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS2_10GB 0x2UL @@ -5882,9 +5986,10 @@ struct hwrm_port_led_qcaps_output { #define PORT_LED_QCAPS_RESP_LED0_STATE_CAPS_BLINK_SUPPORTED 0x8UL #define PORT_LED_QCAPS_RESP_LED0_STATE_CAPS_BLINK_ALT_SUPPORTED 0x10UL __le16 led0_color_caps; - #define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_RSVD 0x1UL - #define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_AMBER_SUPPORTED 0x2UL - #define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_GREEN_SUPPORTED 0x4UL + #define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_RSVD 0x1UL + #define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_AMBER_SUPPORTED 0x2UL + #define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_GREEN_SUPPORTED 0x4UL + #define PORT_LED_QCAPS_RESP_LED0_COLOR_CAPS_GRNAMB_SUPPORTED 0x8UL u8 led1_id; u8 led1_type; #define PORT_LED_QCAPS_RESP_LED1_TYPE_SPEED 0x0UL @@ -5900,9 +6005,10 @@ struct hwrm_port_led_qcaps_output { #define PORT_LED_QCAPS_RESP_LED1_STATE_CAPS_BLINK_SUPPORTED 0x8UL #define PORT_LED_QCAPS_RESP_LED1_STATE_CAPS_BLINK_ALT_SUPPORTED 0x10UL __le16 led1_color_caps; - #define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_RSVD 0x1UL - #define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_AMBER_SUPPORTED 0x2UL - #define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_GREEN_SUPPORTED 0x4UL + #define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_RSVD 0x1UL + #define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_AMBER_SUPPORTED 0x2UL + #define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_GREEN_SUPPORTED 0x4UL + #define PORT_LED_QCAPS_RESP_LED1_COLOR_CAPS_GRNAMB_SUPPORTED 0x8UL u8 led2_id; u8 led2_type; #define PORT_LED_QCAPS_RESP_LED2_TYPE_SPEED 0x0UL @@ -5918,9 +6024,10 @@ struct hwrm_port_led_qcaps_output { #define PORT_LED_QCAPS_RESP_LED2_STATE_CAPS_BLINK_SUPPORTED 0x8UL #define PORT_LED_QCAPS_RESP_LED2_STATE_CAPS_BLINK_ALT_SUPPORTED 0x10UL __le16 led2_color_caps; - #define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_RSVD 0x1UL - #define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_AMBER_SUPPORTED 0x2UL - #define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_GREEN_SUPPORTED 0x4UL + #define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_RSVD 0x1UL + #define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_AMBER_SUPPORTED 0x2UL + #define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_GREEN_SUPPORTED 0x4UL + #define PORT_LED_QCAPS_RESP_LED2_COLOR_CAPS_GRNAMB_SUPPORTED 0x8UL u8 led3_id; u8 led3_type; #define PORT_LED_QCAPS_RESP_LED3_TYPE_SPEED 0x0UL @@ -5936,9 +6043,10 @@ struct hwrm_port_led_qcaps_output { #define PORT_LED_QCAPS_RESP_LED3_STATE_CAPS_BLINK_SUPPORTED 0x8UL #define PORT_LED_QCAPS_RESP_LED3_STATE_CAPS_BLINK_ALT_SUPPORTED 0x10UL __le16 led3_color_caps; - #define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_RSVD 0x1UL - #define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_AMBER_SUPPORTED 0x2UL - #define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_GREEN_SUPPORTED 0x4UL + #define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_RSVD 0x1UL + #define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_AMBER_SUPPORTED 0x2UL + #define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_GREEN_SUPPORTED 0x4UL + #define PORT_LED_QCAPS_RESP_LED3_COLOR_CAPS_GRNAMB_SUPPORTED 0x8UL u8 unused_4[3]; u8 valid; }; @@ -7036,9 +7144,22 @@ struct hwrm_vnic_rss_cfg_output { /* hwrm_vnic_rss_cfg_cmd_err (size:64b/8B) */ struct hwrm_vnic_rss_cfg_cmd_err { u8 code; - #define VNIC_RSS_CFG_CMD_ERR_CODE_UNKNOWN 0x0UL - #define VNIC_RSS_CFG_CMD_ERR_CODE_INTERFACE_NOT_READY 0x1UL - #define VNIC_RSS_CFG_CMD_ERR_CODE_LAST VNIC_RSS_CFG_CMD_ERR_CODE_INTERFACE_NOT_READY + #define VNIC_RSS_CFG_CMD_ERR_CODE_UNKNOWN 0x0UL + #define VNIC_RSS_CFG_CMD_ERR_CODE_INTERFACE_NOT_READY 0x1UL + #define VNIC_RSS_CFG_CMD_ERR_CODE_UNABLE_TO_GET_RSS_CFG 0x2UL + #define VNIC_RSS_CFG_CMD_ERR_CODE_HASH_TYPE_UNSUPPORTED 0x3UL + #define VNIC_RSS_CFG_CMD_ERR_CODE_HASH_TYPE_ERR 0x4UL + #define VNIC_RSS_CFG_CMD_ERR_CODE_HASH_MODE_FAIL 0x5UL + #define VNIC_RSS_CFG_CMD_ERR_CODE_RING_GRP_TABLE_ALLOC_ERR 0x6UL + #define VNIC_RSS_CFG_CMD_ERR_CODE_HASH_KEY_ALLOC_ERR 0x7UL + #define VNIC_RSS_CFG_CMD_ERR_CODE_DMA_FAILED 0x8UL + #define VNIC_RSS_CFG_CMD_ERR_CODE_RX_RING_ALLOC_ERR 0x9UL + #define VNIC_RSS_CFG_CMD_ERR_CODE_CMPL_RING_ALLOC_ERR 0xaUL + #define VNIC_RSS_CFG_CMD_ERR_CODE_HW_SET_RSS_FAILED 0xbUL + #define VNIC_RSS_CFG_CMD_ERR_CODE_CTX_INVALID 0xcUL + #define VNIC_RSS_CFG_CMD_ERR_CODE_VNIC_INVALID 0xdUL + #define VNIC_RSS_CFG_CMD_ERR_CODE_VNIC_RING_TABLE_PAIR_INVALID 0xeUL + #define VNIC_RSS_CFG_CMD_ERR_CODE_LAST VNIC_RSS_CFG_CMD_ERR_CODE_VNIC_RING_TABLE_PAIR_INVALID u8 unused_0[7]; }; @@ -7177,7 +7298,7 @@ struct hwrm_vnic_rss_cos_lb_ctx_free_output { u8 valid; }; -/* hwrm_ring_alloc_input (size:704b/88B) */ +/* hwrm_ring_alloc_input (size:768b/96B) */ struct hwrm_ring_alloc_input { __le16 req_type; __le16 cmpl_ring; @@ -7195,6 +7316,7 @@ struct hwrm_ring_alloc_input { #define RING_ALLOC_REQ_ENABLES_MPC_CHNLS_TYPE 0x400UL #define RING_ALLOC_REQ_ENABLES_STEERING_TAG_VALID 0x800UL #define RING_ALLOC_REQ_ENABLES_RX_RATE_PROFILE_VALID 0x1000UL + #define RING_ALLOC_REQ_ENABLES_DPI_VALID 0x2000UL u8 ring_type; #define RING_ALLOC_REQ_RING_TYPE_L2_CMPL 0x0UL #define RING_ALLOC_REQ_RING_TYPE_TX 0x1UL @@ -7287,6 +7409,8 @@ struct hwrm_ring_alloc_input { #define RING_ALLOC_REQ_RX_RATE_PROFILE_SEL_LAST RING_ALLOC_REQ_RX_RATE_PROFILE_SEL_POLL_MODE u8 unused_4; __le64 cq_handle; + __le16 dpi; + __le16 unused_5[3]; }; /* hwrm_ring_alloc_output (size:128b/16B) */ @@ -7776,7 +7900,10 @@ struct hwrm_cfa_l2_set_rx_mask_cmd_err { u8 code; #define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_UNKNOWN 0x0UL #define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_NTUPLE_FILTER_CONFLICT_ERR 0x1UL - #define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_LAST CFA_L2_SET_RX_MASK_CMD_ERR_CODE_NTUPLE_FILTER_CONFLICT_ERR + #define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_MAX_VLAN_TAGS 0x2UL + #define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_INVALID_VNIC_ID 0x3UL + #define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_INVALID_ACTION 0x4UL + #define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_LAST CFA_L2_SET_RX_MASK_CMD_ERR_CODE_INVALID_ACTION u8 unused_0[7]; }; @@ -8109,9 +8236,38 @@ struct hwrm_cfa_ntuple_filter_alloc_output { /* hwrm_cfa_ntuple_filter_alloc_cmd_err (size:64b/8B) */ struct hwrm_cfa_ntuple_filter_alloc_cmd_err { u8 code; - #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_UNKNOWN 0x0UL - #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_RX_MASK_VLAN_CONFLICT_ERR 0x1UL - #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_LAST CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_RX_MASK_VLAN_CONFLICT_ERR + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_UNKNOWN 0x0UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_ZERO_MAC 0x65UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_BC_MC_MAC 0x66UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_VNIC 0x67UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_PF_FID 0x68UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_L2_CTXT_ID 0x69UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_NULL_L2_CTXT_CFG 0x6aUL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_NULL_L2_DATA_FLD 0x6bUL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_CFA_LAYOUT 0x6cUL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_L2_CTXT_ALLOC_FAIL 0x6dUL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_ROCE_FLOW_ERR 0x6eUL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_OWNER_FID 0x6fUL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_ZERO_REF_CNT 0x70UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_FLOW_TYPE 0x71UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_IVLAN 0x72UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_MAX_VLAN_ID 0x73UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_TNL_REQ 0x74UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_L2_ADDR 0x75UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_L2_IVLAN 0x76UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_L3_ADDR 0x77UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_L3_ADDR_TYPE 0x78UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_T_L3_ADDR_TYPE 0x79UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_DST_VNIC_ID 0x7aUL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_VNI 0x7bUL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_DST_ID 0x7cUL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_FAIL_ROCE_L2_FLOW 0x7dUL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_NPAR_VLAN 0x7eUL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_ATSP_ADD 0x7fUL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_DFLT_VLAN_FAIL 0x80UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_INVALID_L3_TYPE 0x81UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_VAL_FAIL_TNL_FLOW 0x82UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_LAST CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_VAL_FAIL_TNL_FLOW u8 unused_0[7]; }; @@ -9181,7 +9337,7 @@ struct pcie_ctx_hw_stats { __le64 pcie_recovery_histogram; }; -/* pcie_ctx_hw_stats_v2 (size:4096b/512B) */ +/* pcie_ctx_hw_stats_v2 (size:4544b/568B) */ struct pcie_ctx_hw_stats_v2 { __le64 pcie_pl_signal_integrity; __le64 pcie_dl_signal_integrity; @@ -9212,6 +9368,9 @@ struct pcie_ctx_hw_stats_v2 { __le64 pcie_other_packet_count; __le64 pcie_blocked_packet_count; __le64 pcie_cmpl_packet_count; + __le32 pcie_rd_latency_histogram[12]; + __le32 pcie_rd_latency_all_normal_count; + __le32 unused_2; }; /* hwrm_stat_generic_qstats_input (size:256b/32B) */ @@ -9406,7 +9565,8 @@ struct hwrm_struct_hdr { #define STRUCT_HDR_STRUCT_ID_MSIX_PER_VF 0xc8UL #define STRUCT_HDR_STRUCT_ID_UDCC_RTT_BUCKET_COUNT 0x12cUL #define STRUCT_HDR_STRUCT_ID_UDCC_RTT_BUCKET_BOUND 0x12dUL - #define STRUCT_HDR_STRUCT_ID_LAST STRUCT_HDR_STRUCT_ID_UDCC_RTT_BUCKET_BOUND + #define STRUCT_HDR_STRUCT_ID_DBG_TOKEN_CLAIMS 0x190UL + #define STRUCT_HDR_STRUCT_ID_LAST STRUCT_HDR_STRUCT_ID_DBG_TOKEN_CLAIMS __le16 len; u8 version; #define STRUCT_HDR_VERSION_0 0x0UL @@ -9459,11 +9619,13 @@ struct hwrm_fw_set_structured_data_output { /* hwrm_fw_set_structured_data_cmd_err (size:64b/8B) */ struct hwrm_fw_set_structured_data_cmd_err { u8 code; - #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_UNKNOWN 0x0UL - #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_HDR_CNT 0x1UL - #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_FMT 0x2UL - #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_ID 0x3UL - #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_LAST FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_ID + #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_UNKNOWN 0x0UL + #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_HDR_CNT 0x1UL + #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_FMT 0x2UL + #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_ID 0x3UL + #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_ALREADY_ADDED 0x4UL + #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_INST_IN_PROG 0x5UL + #define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_LAST FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_INST_IN_PROG u8 unused_0[7]; }; @@ -9487,7 +9649,9 @@ struct hwrm_fw_get_structured_data_input { #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_NON_TPMR_PEER 0x201UL #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_NON_TPMR_OPERATIONAL 0x202UL #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_HOST_OPERATIONAL 0x300UL - #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_LAST FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_HOST_OPERATIONAL + #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_CLAIMS_SUPPORTED 0x320UL + #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_CLAIMS_ACTIVE 0x321UL + #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_LAST FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_CLAIMS_ACTIVE u8 count; u8 unused_0; }; @@ -10172,7 +10336,8 @@ struct hwrm_dbg_log_buffer_flush_input { #define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_CA2_TRACE 0x9UL #define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_RIGP1_TRACE 0xaUL #define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_AFM_KONG_HWRM_TRACE 0xbUL - #define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_LAST DBG_LOG_BUFFER_FLUSH_REQ_TYPE_AFM_KONG_HWRM_TRACE + #define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_ERR_QPC_TRACE 0xcUL + #define DBG_LOG_BUFFER_FLUSH_REQ_TYPE_LAST DBG_LOG_BUFFER_FLUSH_REQ_TYPE_ERR_QPC_TRACE u8 unused_1[2]; __le32 flags; #define DBG_LOG_BUFFER_FLUSH_REQ_FLAGS_FLUSH_ALL_BUFFERS 0x1UL @@ -10295,10 +10460,15 @@ struct hwrm_nvm_write_output { /* hwrm_nvm_write_cmd_err (size:64b/8B) */ struct hwrm_nvm_write_cmd_err { u8 code; - #define NVM_WRITE_CMD_ERR_CODE_UNKNOWN 0x0UL - #define NVM_WRITE_CMD_ERR_CODE_FRAG_ERR 0x1UL - #define NVM_WRITE_CMD_ERR_CODE_NO_SPACE 0x2UL - #define NVM_WRITE_CMD_ERR_CODE_LAST NVM_WRITE_CMD_ERR_CODE_NO_SPACE + #define NVM_WRITE_CMD_ERR_CODE_UNKNOWN 0x0UL + #define NVM_WRITE_CMD_ERR_CODE_FRAG_ERR 0x1UL + #define NVM_WRITE_CMD_ERR_CODE_NO_SPACE 0x2UL + #define NVM_WRITE_CMD_ERR_CODE_WRITE_FAILED 0x3UL + #define NVM_WRITE_CMD_ERR_CODE_REQD_ERASE_FAILED 0x4UL + #define NVM_WRITE_CMD_ERR_CODE_VERIFY_FAILED 0x5UL + #define NVM_WRITE_CMD_ERR_CODE_INVALID_HEADER 0x6UL + #define NVM_WRITE_CMD_ERR_CODE_UPDATE_DIGEST_FAILED 0x7UL + #define NVM_WRITE_CMD_ERR_CODE_LAST NVM_WRITE_CMD_ERR_CODE_UPDATE_DIGEST_FAILED u8 unused_0[7]; }; @@ -10438,7 +10608,11 @@ struct hwrm_nvm_get_dev_info_output { __le16 srt2_fw_minor; __le16 srt2_fw_build; __le16 srt2_fw_patch; - u8 unused_0[7]; + u8 security_soc_fw_major; + u8 security_soc_fw_minor; + u8 security_soc_fw_build; + u8 security_soc_fw_patch; + u8 unused_0[3]; u8 valid; }; @@ -10568,7 +10742,9 @@ struct hwrm_nvm_install_update_cmd_err { #define NVM_INSTALL_UPDATE_CMD_ERR_CODE_NO_SPACE 0x2UL #define NVM_INSTALL_UPDATE_CMD_ERR_CODE_ANTI_ROLLBACK 0x3UL #define NVM_INSTALL_UPDATE_CMD_ERR_CODE_NO_VOLTREG_SUPPORT 0x4UL - #define NVM_INSTALL_UPDATE_CMD_ERR_CODE_LAST NVM_INSTALL_UPDATE_CMD_ERR_CODE_NO_VOLTREG_SUPPORT + #define NVM_INSTALL_UPDATE_CMD_ERR_CODE_DEFRAG_FAILED 0x5UL + #define NVM_INSTALL_UPDATE_CMD_ERR_CODE_UNKNOWN_DIR_ERR 0x6UL + #define NVM_INSTALL_UPDATE_CMD_ERR_CODE_LAST NVM_INSTALL_UPDATE_CMD_ERR_CODE_UNKNOWN_DIR_ERR u8 unused_0[7]; }; @@ -10591,7 +10767,8 @@ struct hwrm_nvm_get_variable_input { __le16 index_2; __le16 index_3; u8 flags; - #define NVM_GET_VARIABLE_REQ_FLAGS_FACTORY_DFLT 0x1UL + #define NVM_GET_VARIABLE_REQ_FLAGS_FACTORY_DFLT 0x1UL + #define NVM_GET_VARIABLE_REQ_FLAGS_VALIDATE_OPT_VALUE 0x2UL u8 unused_0; }; @@ -10606,18 +10783,25 @@ struct hwrm_nvm_get_variable_output { #define NVM_GET_VARIABLE_RESP_OPTION_NUM_RSVD_0 0x0UL #define NVM_GET_VARIABLE_RESP_OPTION_NUM_RSVD_FFFF 0xffffUL #define NVM_GET_VARIABLE_RESP_OPTION_NUM_LAST NVM_GET_VARIABLE_RESP_OPTION_NUM_RSVD_FFFF - u8 unused_0[3]; + u8 flags; + #define NVM_GET_VARIABLE_RESP_FLAGS_VALIDATE_OPT_VALUE 0x1UL + u8 unused_0[2]; u8 valid; }; /* hwrm_nvm_get_variable_cmd_err (size:64b/8B) */ struct hwrm_nvm_get_variable_cmd_err { u8 code; - #define NVM_GET_VARIABLE_CMD_ERR_CODE_UNKNOWN 0x0UL - #define NVM_GET_VARIABLE_CMD_ERR_CODE_VAR_NOT_EXIST 0x1UL - #define NVM_GET_VARIABLE_CMD_ERR_CODE_CORRUPT_VAR 0x2UL - #define NVM_GET_VARIABLE_CMD_ERR_CODE_LEN_TOO_SHORT 0x3UL - #define NVM_GET_VARIABLE_CMD_ERR_CODE_LAST NVM_GET_VARIABLE_CMD_ERR_CODE_LEN_TOO_SHORT + #define NVM_GET_VARIABLE_CMD_ERR_CODE_UNKNOWN 0x0UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_VAR_NOT_EXIST 0x1UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_CORRUPT_VAR 0x2UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_LEN_TOO_SHORT 0x3UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_INDEX_INVALID 0x4UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_ACCESS_DENIED 0x5UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_CB_FAILED 0x6UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_INVALID_DATA_LEN 0x7UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_NO_MEM 0x8UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_LAST NVM_GET_VARIABLE_CMD_ERR_CODE_NO_MEM u8 unused_0[7]; }; @@ -10667,10 +10851,17 @@ struct hwrm_nvm_set_variable_output { /* hwrm_nvm_set_variable_cmd_err (size:64b/8B) */ struct hwrm_nvm_set_variable_cmd_err { u8 code; - #define NVM_SET_VARIABLE_CMD_ERR_CODE_UNKNOWN 0x0UL - #define NVM_SET_VARIABLE_CMD_ERR_CODE_VAR_NOT_EXIST 0x1UL - #define NVM_SET_VARIABLE_CMD_ERR_CODE_CORRUPT_VAR 0x2UL - #define NVM_SET_VARIABLE_CMD_ERR_CODE_LAST NVM_SET_VARIABLE_CMD_ERR_CODE_CORRUPT_VAR + #define NVM_SET_VARIABLE_CMD_ERR_CODE_UNKNOWN 0x0UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_VAR_NOT_EXIST 0x1UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_CORRUPT_VAR 0x2UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_LEN_TOO_SHORT 0x3UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_ACTION_NOT_SUPPORTED 0x4UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_INDEX_INVALID 0x5UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_ACCESS_DENIED 0x6UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_CB_FAILED 0x7UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_INVALID_DATA_LEN 0x8UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_NO_MEM 0x9UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_LAST NVM_SET_VARIABLE_CMD_ERR_CODE_NO_MEM u8 unused_0[7]; }; -- cgit v1.2.3 From a6d4f25888b83b8300aef28d9ee22765c1cc9b34 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 Aug 2025 17:40:30 +0000 Subject: net: set net.core.rmem_max and net.core.wmem_max to 4 MB SO_RCVBUF and SO_SNDBUF have limited range today, unless distros or system admins change rmem_max and wmem_max. Even iproute2 uses 1 MB SO_RCVBUF which is capped by the kernel. Decouple [rw]mem_max and [rw]mem_default and increase [rw]mem_max to 4 MB. Before: $ sysctl net.core.rmem_default net.core.rmem_max net.core.wmem_default net.core.wmem_max net.core.rmem_default = 212992 net.core.rmem_max = 212992 net.core.wmem_default = 212992 net.core.wmem_max = 212992 After: $ sysctl net.core.rmem_default net.core.rmem_max net.core.wmem_default net.core.wmem_max net.core.rmem_default = 212992 net.core.rmem_max = 4194304 net.core.wmem_default = 212992 net.core.wmem_max = 4194304 Signed-off-by: Eric Dumazet Reviewed-by: Neal Cardwell Link: https://patch.msgid.link/20250819174030.1986278-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 1c49ea13af4a..63a6a48afb48 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2970,8 +2970,8 @@ void sk_get_meminfo(const struct sock *sk, u32 *meminfo); */ #define _SK_MEM_PACKETS 256 #define _SK_MEM_OVERHEAD SKB_TRUESIZE(256) -#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) -#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) +#define SK_WMEM_DEFAULT (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) +#define SK_RMEM_DEFAULT (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) extern __u32 sysctl_wmem_max; extern __u32 sysctl_rmem_max; -- cgit v1.2.3 From b64d035f77b1f02ab449393342264b44950a75ae Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 15 Aug 2025 06:19:58 +0000 Subject: bonding: update LACP activity flag after setting lacp_active The port's actor_oper_port_state activity flag should be updated immediately after changing the lacp_active option to reflect the current mode correctly. Fixes: 3a755cd8b7c6 ("bonding: add new option lacp_active") Signed-off-by: Hangbin Liu Link: https://patch.msgid.link/20250815062000.22220-2-liuhangbin@gmail.com Signed-off-by: Paolo Abeni --- include/net/bond_3ad.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h index 2053cd8e788a..dba369a2cf27 100644 --- a/include/net/bond_3ad.h +++ b/include/net/bond_3ad.h @@ -307,6 +307,7 @@ int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond, struct slave *slave); int bond_3ad_set_carrier(struct bonding *bond); void bond_3ad_update_lacp_rate(struct bonding *bond); +void bond_3ad_update_lacp_active(struct bonding *bond); void bond_3ad_update_ad_actor_settings(struct bonding *bond); int bond_3ad_stats_fill(struct sk_buff *skb, struct bond_3ad_stats *stats); size_t bond_3ad_stats_size(void); -- cgit v1.2.3 From 5a774b64cd6a008f016119782a5d3f30ed0bf3b7 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Mon, 18 Aug 2025 09:51:21 +0200 Subject: net: phy: micrel: Add support for lan8842 The LAN8842 is a low-power, single port triple-speed (10BASE-T/ 100BASE-TX/ 1000BASE-T) ethernet physical layer transceiver (PHY) that supports transmission and reception of data on standard CAT-5, as well as CAT-5e and CAT-6, Unshielded Twisted Pair (UTP) cables. The LAN8842 supports industry-standard SGMII (Serial Gigabit Media Independent Interface) providing chip-to-chip connection to a Gigabit Ethernet MAC using a single serialized link (differential pair) in each direction. There are 2 variants of the lan8842. The one that supports timestamping (lan8842) and one that doesn't have timestamping (lan8832). Signed-off-by: Horatiu Vultur Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250818075121.1298170-5-horatiu.vultur@microchip.com Signed-off-by: Paolo Abeni --- include/linux/micrel_phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index 9af01bdd86d2..ca691641788b 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -32,6 +32,7 @@ #define PHY_ID_LAN8814 0x00221660 #define PHY_ID_LAN8804 0x00221670 #define PHY_ID_LAN8841 0x00221650 +#define PHY_ID_LAN8842 0x002216C0 #define PHY_ID_KSZ886X 0x00221430 #define PHY_ID_KSZ8863 0x00221435 -- cgit v1.2.3 From 6c9468aad215a198742c8375b0415e42521c905c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 10 Aug 2025 00:56:54 -0700 Subject: fscrypt: replace raw loads of info pointer with helper function Add and use a helper function fscrypt_get_inode_info_raw(). It loads an inode's fscrypt info pointer using a raw dereference, which is appropriate when the caller knows the key setup already happened. This eliminates most occurrences of inode::i_crypt_info in the source, in preparation for replacing that with a filesystem-specific field. Co-developed-by: Christian Brauner Signed-off-by: Eric Biggers Link: https://lore.kernel.org/20250810075706.172910-2-ebiggers@kernel.org Signed-off-by: Christian Brauner --- include/linux/fscrypt.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 10dd161690a2..23c5198612d1 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -195,6 +195,22 @@ struct fscrypt_operations { int fscrypt_d_revalidate(struct inode *dir, const struct qstr *name, struct dentry *dentry, unsigned int flags); +/* + * Load the inode's fscrypt info pointer, using a raw dereference. Since this + * uses a raw dereference with no memory barrier, it is appropriate to use only + * when the caller knows the inode's key setup already happened, resulting in + * non-NULL fscrypt info. E.g., the file contents en/decryption functions use + * this, since fscrypt_file_open() set up the key. + */ +static inline struct fscrypt_inode_info * +fscrypt_get_inode_info_raw(const struct inode *inode) +{ + struct fscrypt_inode_info *ci = inode->i_crypt_info; + + VFS_WARN_ON_ONCE(ci == NULL); + return ci; +} + static inline struct fscrypt_inode_info * fscrypt_get_inode_info(const struct inode *inode) { -- cgit v1.2.3 From 93221de31a8df6710e02328f82dc68d7ab4ad9e6 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 10 Aug 2025 00:56:55 -0700 Subject: fscrypt: add support for info in fs-specific part of inode Add an inode_info_offs field to struct fscrypt_operations, and update fs/crypto/ to support it. When set to a nonzero value, it specifies the offset to the fscrypt_inode_info pointer within the filesystem-specific part of the inode structure, to be used instead of inode::i_crypt_info. Since this makes inode::i_crypt_info no longer necessarily used, update comments that mentioned it. This is a prerequisite for a later commit that removes inode::i_crypt_info, saving memory and improving cache efficiency with filesystems that don't support fscrypt. Co-developed-by: Christian Brauner Signed-off-by: Eric Biggers Link: https://lore.kernel.org/20250810075706.172910-3-ebiggers@kernel.org Signed-off-by: Christian Brauner --- include/linux/fscrypt.h | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 23c5198612d1..d7ff53accbfe 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -61,6 +61,12 @@ struct fscrypt_name { /* Crypto operations for filesystems */ struct fscrypt_operations { + /* + * The offset of the pointer to struct fscrypt_inode_info in the + * filesystem-specific part of the inode, relative to the beginning of + * the common part of the inode (the 'struct inode'). + */ + ptrdiff_t inode_info_offs; /* * If set, then fs/crypto/ will allocate a global bounce page pool the @@ -195,6 +201,14 @@ struct fscrypt_operations { int fscrypt_d_revalidate(struct inode *dir, const struct qstr *name, struct dentry *dentry, unsigned int flags); +static inline struct fscrypt_inode_info ** +fscrypt_inode_info_addr(const struct inode *inode) +{ + if (inode->i_sb->s_cop->inode_info_offs == 0) + return (struct fscrypt_inode_info **)&inode->i_crypt_info; + return (void *)inode + inode->i_sb->s_cop->inode_info_offs; +} + /* * Load the inode's fscrypt info pointer, using a raw dereference. Since this * uses a raw dereference with no memory barrier, it is appropriate to use only @@ -205,7 +219,7 @@ int fscrypt_d_revalidate(struct inode *dir, const struct qstr *name, static inline struct fscrypt_inode_info * fscrypt_get_inode_info_raw(const struct inode *inode) { - struct fscrypt_inode_info *ci = inode->i_crypt_info; + struct fscrypt_inode_info *ci = *fscrypt_inode_info_addr(inode); VFS_WARN_ON_ONCE(ci == NULL); return ci; @@ -216,11 +230,11 @@ fscrypt_get_inode_info(const struct inode *inode) { /* * Pairs with the cmpxchg_release() in fscrypt_setup_encryption_info(). - * I.e., another task may publish ->i_crypt_info concurrently, executing - * a RELEASE barrier. We need to use smp_load_acquire() here to safely + * I.e., another task may publish the fscrypt info concurrently, + * executing a RELEASE barrier. Use smp_load_acquire() here to safely * ACQUIRE the memory the other task published. */ - return smp_load_acquire(&inode->i_crypt_info); + return smp_load_acquire(fscrypt_inode_info_addr(inode)); } /** -- cgit v1.2.3 From ab90c2d2476c4dd6deddd089c7e83b858d135783 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 10 Aug 2025 00:57:00 -0700 Subject: fs: remove inode::i_crypt_info Now that all fscrypt-capable filesystems store the pointer to fscrypt_inode_info in the filesystem-specific part of the inode structure, inode::i_crypt_info is no longer needed. Update fscrypt_inode_info_addr() to no longer support the fallback to inode::i_crypt_info. Finally, remove inode::i_crypt_info itself along with the now-unnecessary forward declaration of fscrypt_inode_info. The end result of the migration to the filesystem-specific pointer is memory savings on CONFIG_FS_ENCRYPTION=y kernels for all filesystems that don't support fscrypt. Specifically, their in-memory inodes are now smaller by the size of a pointer: either 4 or 8 bytes. Co-developed-by: Christian Brauner Signed-off-by: Eric Biggers Link: https://lore.kernel.org/20250810075706.172910-8-ebiggers@kernel.org Signed-off-by: Christian Brauner --- include/linux/fs.h | 5 ----- include/linux/fscrypt.h | 8 ++++++-- 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index d7ab4f96d705..1dafa18169be 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -72,7 +72,6 @@ struct swap_info_struct; struct seq_file; struct workqueue_struct; struct iov_iter; -struct fscrypt_inode_info; struct fscrypt_operations; struct fsverity_info; struct fsverity_operations; @@ -780,10 +779,6 @@ struct inode { struct fsnotify_mark_connector __rcu *i_fsnotify_marks; #endif -#ifdef CONFIG_FS_ENCRYPTION - struct fscrypt_inode_info *i_crypt_info; -#endif - #ifdef CONFIG_FS_VERITY struct fsverity_info *i_verity_info; #endif diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index d7ff53accbfe..516aba5b858b 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -201,11 +201,15 @@ struct fscrypt_operations { int fscrypt_d_revalidate(struct inode *dir, const struct qstr *name, struct dentry *dentry, unsigned int flags); +/* + * Returns the address of the fscrypt info pointer within the + * filesystem-specific part of the inode. (To save memory on filesystems that + * don't support fscrypt, a field in 'struct inode' itself is no longer used.) + */ static inline struct fscrypt_inode_info ** fscrypt_inode_info_addr(const struct inode *inode) { - if (inode->i_sb->s_cop->inode_info_offs == 0) - return (struct fscrypt_inode_info **)&inode->i_crypt_info; + VFS_WARN_ON_ONCE(inode->i_sb->s_cop->inode_info_offs == 0); return (void *)inode + inode->i_sb->s_cop->inode_info_offs; } -- cgit v1.2.3 From 2a7349add18e5915cd87251af5f98db1772b6131 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 10 Aug 2025 00:57:01 -0700 Subject: fsverity: add support for info in fs-specific part of inode Add an inode_info_offs field to struct fsverity_operations, and update fs/verity/ to support it. When set to a nonzero value, it specifies the offset to the fsverity_info pointer within the filesystem-specific part of the inode structure, to be used instead of inode::i_verity_info. Since this makes inode::i_verity_info no longer necessarily used, update comments that mentioned it. This is a prerequisite for a later commit that removes inode::i_verity_info, saving memory and improving cache efficiency on filesystems that don't support fsverity. Co-developed-by: Christian Brauner Signed-off-by: Eric Biggers Link: https://lore.kernel.org/20250810075706.172910-9-ebiggers@kernel.org Signed-off-by: Christian Brauner --- include/linux/fsverity.h | 44 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index 1eb7eae580be..e0f132cb7839 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -28,6 +28,12 @@ /* Verity operations for filesystems */ struct fsverity_operations { + /** + * The offset of the pointer to struct fsverity_info in the + * filesystem-specific part of the inode, relative to the beginning of + * the common part of the inode (the 'struct inode'). + */ + ptrdiff_t inode_info_offs; /** * Begin enabling verity on the given file. @@ -124,15 +130,33 @@ struct fsverity_operations { #ifdef CONFIG_FS_VERITY +static inline struct fsverity_info ** +fsverity_info_addr(const struct inode *inode) +{ + if (inode->i_sb->s_vop->inode_info_offs == 0) + return (struct fsverity_info **)&inode->i_verity_info; + return (void *)inode + inode->i_sb->s_vop->inode_info_offs; +} + static inline struct fsverity_info *fsverity_get_info(const struct inode *inode) { /* - * Pairs with the cmpxchg_release() in fsverity_set_info(). - * I.e., another task may publish ->i_verity_info concurrently, - * executing a RELEASE barrier. We need to use smp_load_acquire() here - * to safely ACQUIRE the memory the other task published. + * Since this function can be called on inodes belonging to filesystems + * that don't support fsverity at all, and fsverity_info_addr() doesn't + * work on such filesystems, we have to start with an IS_VERITY() check. + * Checking IS_VERITY() here is also useful to minimize the overhead of + * fsverity_active() on non-verity files. + */ + if (!IS_VERITY(inode)) + return NULL; + + /* + * Pairs with the cmpxchg_release() in fsverity_set_info(). I.e., + * another task may publish the inode's verity info concurrently, + * executing a RELEASE barrier. Use smp_load_acquire() here to safely + * ACQUIRE the memory the other task published. */ - return smp_load_acquire(&inode->i_verity_info); + return smp_load_acquire(fsverity_info_addr(inode)); } /* enable.c */ @@ -156,11 +180,11 @@ void __fsverity_cleanup_inode(struct inode *inode); * fsverity_cleanup_inode() - free the inode's verity info, if present * @inode: an inode being evicted * - * Filesystems must call this on inode eviction to free ->i_verity_info. + * Filesystems must call this on inode eviction to free the inode's verity info. */ static inline void fsverity_cleanup_inode(struct inode *inode) { - if (inode->i_verity_info) + if (*fsverity_info_addr(inode)) __fsverity_cleanup_inode(inode); } @@ -267,12 +291,12 @@ static inline bool fsverity_verify_page(struct page *page) * fsverity_active() - do reads from the inode need to go through fs-verity? * @inode: inode to check * - * This checks whether ->i_verity_info has been set. + * This checks whether the inode's verity info has been set. * * Filesystems call this from ->readahead() to check whether the pages need to * be verified or not. Don't use IS_VERITY() for this purpose; it's subject to * a race condition where the file is being read concurrently with - * FS_IOC_ENABLE_VERITY completing. (S_VERITY is set before ->i_verity_info.) + * FS_IOC_ENABLE_VERITY completing. (S_VERITY is set before the verity info.) * * Return: true if reads need to go through fs-verity, otherwise false */ @@ -287,7 +311,7 @@ static inline bool fsverity_active(const struct inode *inode) * @filp: the struct file being set up * * When opening a verity file, deny the open if it is for writing. Otherwise, - * set up the inode's ->i_verity_info if not already done. + * set up the inode's verity info if not already done. * * When combined with fscrypt, this must be called after fscrypt_file_open(). * Otherwise, we won't have the key set up to decrypt the verity metadata. -- cgit v1.2.3 From 818c659ac164e4e4639ceaedaccbdfebb1ef63b5 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 10 Aug 2025 00:57:05 -0700 Subject: fs: remove inode::i_verity_info Now that all fsverity-capable filesystems store the pointer to fsverity_info in the filesystem-specific part of the inode structure, inode::i_verity_info is no longer needed. Update fsverity_info_addr() to no longer support the fallback to inode::i_verity_info. Finally, remove inode::i_verity_info itself, and move the forward declaration of struct fsverity_info from fs.h (which no longer needs it) to fsverity.h. The end result of the migration to the filesystem-specific pointer is memory savings on CONFIG_FS_VERITY=y kernels for all filesystems that don't support fsverity. Specifically, their in-memory inodes are now smaller by the size of a pointer: either 4 or 8 bytes. Co-developed-by: Christian Brauner Signed-off-by: Eric Biggers Link: https://lore.kernel.org/20250810075706.172910-13-ebiggers@kernel.org Signed-off-by: Christian Brauner --- include/linux/fs.h | 5 ----- include/linux/fsverity.h | 10 ++++++++-- 2 files changed, 8 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 1dafa18169be..12ecc6b0e6f9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -73,7 +73,6 @@ struct seq_file; struct workqueue_struct; struct iov_iter; struct fscrypt_operations; -struct fsverity_info; struct fsverity_operations; struct fsnotify_mark_connector; struct fsnotify_sb_info; @@ -779,10 +778,6 @@ struct inode { struct fsnotify_mark_connector __rcu *i_fsnotify_marks; #endif -#ifdef CONFIG_FS_VERITY - struct fsverity_info *i_verity_info; -#endif - void *i_private; /* fs or device private pointer */ } __randomize_layout; diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index e0f132cb7839..844f7b8b56bb 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -26,6 +26,8 @@ /* Arbitrary limit to bound the kmalloc() size. Can be changed. */ #define FS_VERITY_MAX_DESCRIPTOR_SIZE 16384 +struct fsverity_info; + /* Verity operations for filesystems */ struct fsverity_operations { /** @@ -130,11 +132,15 @@ struct fsverity_operations { #ifdef CONFIG_FS_VERITY +/* + * Returns the address of the verity info pointer within the filesystem-specific + * part of the inode. (To save memory on filesystems that don't support + * fsverity, a field in 'struct inode' itself is no longer used.) + */ static inline struct fsverity_info ** fsverity_info_addr(const struct inode *inode) { - if (inode->i_sb->s_vop->inode_info_offs == 0) - return (struct fsverity_info **)&inode->i_verity_info; + VFS_WARN_ON_ONCE(inode->i_sb->s_vop->inode_info_offs == 0); return (void *)inode + inode->i_sb->s_vop->inode_info_offs; } -- cgit v1.2.3 From 8a3d00dde63a339d31d1fdeead24ddfd4d459c70 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 10 Aug 2025 00:57:06 -0700 Subject: fsverity: check IS_VERITY() in fsverity_cleanup_inode() Since getting the address of the fsverity_info has gotten a bit more expensive, make fsverity_cleanup_inode() check for IS_VERITY() instead. This avoids adding more overhead to non-verity files. This assumes that verity info is never set when !IS_VERITY(), which is currently true, but add a VFS_WARN_ON_ONCE() that asserts that. (This of course defeats the optimization, but only when CONFIG_VFS_DEBUG=y.) Signed-off-by: Eric Biggers Link: https://lore.kernel.org/20250810075706.172910-14-ebiggers@kernel.org Signed-off-by: Christian Brauner --- include/linux/fsverity.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index 844f7b8b56bb..5bc7280425a7 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -190,8 +190,15 @@ void __fsverity_cleanup_inode(struct inode *inode); */ static inline void fsverity_cleanup_inode(struct inode *inode) { - if (*fsverity_info_addr(inode)) + /* + * Only IS_VERITY() inodes can have verity info, so start by checking + * for IS_VERITY() (which is faster than retrieving the pointer to the + * verity info). This minimizes overhead for non-verity inodes. + */ + if (IS_VERITY(inode)) __fsverity_cleanup_inode(inode); + else + VFS_WARN_ON_ONCE(*fsverity_info_addr(inode) != NULL); } /* read_metadata.c */ -- cgit v1.2.3 From 370ac285f23aecae40600851fb4a1a9e75e50973 Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Thu, 14 Aug 2025 13:54:59 +0530 Subject: block: avoid cpu_hotplug_lock depedency on freeze_lock A recent lockdep[1] splat observed while running blktest block/005 reveals a potential deadlock caused by the cpu_hotplug_lock dependency on ->freeze_lock. This dependency was introduced by commit 033b667a823e ("block: blk-rq-qos: guard rq-qos helpers by static key"). That change added a static key to avoid fetching q->rq_qos when neither blk-wbt nor blk-iolatency is configured. The static key dynamically patches kernel text to a NOP when disabled, eliminating overhead of fetching q->rq_qos in the I/O hot path. However, enabling a static key at runtime requires acquiring both cpu_hotplug_lock and jump_label_mutex. When this happens after the queue has already been frozen (i.e., while holding ->freeze_lock), it creates a locking dependency from cpu_hotplug_lock to ->freeze_lock, which leads to a potential deadlock reported by lockdep [1]. To resolve this, replace the static key mechanism with q->queue_flags: QUEUE_FLAG_QOS_ENABLED. This flag is evaluated in the fast path before accessing q->rq_qos. If the flag is set, we proceed to fetch q->rq_qos; otherwise, the access is skipped. Since q->queue_flags is commonly accessed in IO hotpath and resides in the first cacheline of struct request_queue, checking it imposes minimal overhead while eliminating the deadlock risk. This change avoids the lockdep splat without introducing performance regressions. [1] https://lore.kernel.org/linux-block/4fdm37so3o4xricdgfosgmohn63aa7wj3ua4e5vpihoamwg3ui@fq42f5q5t5ic/ Reported-by: Shinichiro Kawasaki Closes: https://lore.kernel.org/linux-block/4fdm37so3o4xricdgfosgmohn63aa7wj3ua4e5vpihoamwg3ui@fq42f5q5t5ic/ Fixes: 033b667a823e ("block: blk-rq-qos: guard rq-qos helpers by static key") Tested-by: Shin'ichiro Kawasaki Signed-off-by: Nilay Shroff Reviewed-by: Ming Lei Reviewed-by: Yu Kuai Link: https://lore.kernel.org/r/20250814082612.500845-4-nilay@linux.ibm.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 95886b404b16..fe1797bbec42 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -656,6 +656,7 @@ enum { QUEUE_FLAG_SQ_SCHED, /* single queue style io dispatch */ QUEUE_FLAG_DISABLE_WBT_DEF, /* for sched to disable/enable wbt */ QUEUE_FLAG_NO_ELV_SWITCH, /* can't switch elevator any more */ + QUEUE_FLAG_QOS_ENABLED, /* qos is enabled */ QUEUE_FLAG_MAX }; -- cgit v1.2.3 From 07cf71bf25cd4e5735ff13468e7b86f02c3665cb Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 19 Aug 2025 19:56:50 -0700 Subject: net: page_pool: add page_pool_get() There is a page_pool_put() function but no get equivalent. Having multiple references to a page pool is quite useful. It avoids branching in create / destroy paths in drivers which support memory providers. Use the new helper in bnxt. Acked-by: Jesper Dangaard Brouer Reviewed-by: Dragos Tatulea Reviewed-by: Mina Almasry Link: https://patch.msgid.link/20250820025704.166248-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/net/page_pool/helpers.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index db180626be06..aa3719f28216 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -489,6 +489,11 @@ page_pool_dma_sync_netmem_for_cpu(const struct page_pool *pool, offset, dma_sync_size); } +static inline void page_pool_get(struct page_pool *pool) +{ + refcount_inc(&pool->user_cnt); +} + static inline bool page_pool_put(struct page_pool *pool) { return refcount_dec_and_test(&pool->user_cnt); -- cgit v1.2.3 From 0f07b7919d679050d354d3279faa74bdc7ce17a0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 20 Jul 2025 13:21:12 +0200 Subject: uprobes: Rename arch_uretprobe_trampoline function We are about to add uprobe trampoline, so cleaning up the namespace. Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Acked-by: Andrii Nakryiko Acked-by: Oleg Nesterov Link: https://lore.kernel.org/r/20250720112133.244369-3-jolsa@kernel.org --- include/linux/uprobes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 516217c39094..01112f27cd21 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -224,7 +224,7 @@ extern bool arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs); extern void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, void *src, unsigned long len); extern void uprobe_handle_trampoline(struct pt_regs *regs); -extern void *arch_uprobe_trampoline(unsigned long *psize); +extern void *arch_uretprobe_trampoline(unsigned long *psize); extern unsigned long uprobe_get_trampoline_vaddr(void); #else /* !CONFIG_UPROBES */ struct uprobes_state { -- cgit v1.2.3 From 82afdd05a16a424409682e06a53d6afcda038d30 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 20 Jul 2025 13:21:13 +0200 Subject: uprobes: Make copy_from_page global Making copy_from_page global and adding uprobe prefix. Adding the uprobe prefix to copy_to_page as well for symmetry. Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Acked-by: Andrii Nakryiko Acked-by: Oleg Nesterov Link: https://lore.kernel.org/r/20250720112133.244369-4-jolsa@kernel.org --- include/linux/uprobes.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 01112f27cd21..7447e15559b8 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -226,6 +226,7 @@ extern void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, extern void uprobe_handle_trampoline(struct pt_regs *regs); extern void *arch_uretprobe_trampoline(unsigned long *psize); extern unsigned long uprobe_get_trampoline_vaddr(void); +extern void uprobe_copy_from_page(struct page *page, unsigned long vaddr, void *dst, int len); #else /* !CONFIG_UPROBES */ struct uprobes_state { }; -- cgit v1.2.3 From 33d7b2beaf34a3c0f6406bc76f6e1b1755150ad9 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 20 Jul 2025 13:21:14 +0200 Subject: uprobes: Add uprobe_write function Adding uprobe_write function that does what uprobe_write_opcode did so far, but allows to pass verify callback function that checks the memory location before writing the opcode. It will be used in following changes to implement specific checking logic for instruction update. The uprobe_write_opcode now calls uprobe_write with verify_opcode as the verify callback. Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Acked-by: Andrii Nakryiko Acked-by: Masami Hiramatsu (Google) Acked-by: Oleg Nesterov Link: https://lore.kernel.org/r/20250720112133.244369-5-jolsa@kernel.org --- include/linux/uprobes.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 7447e15559b8..e13382054435 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -187,6 +187,9 @@ struct uprobes_state { struct xol_area *xol_area; }; +typedef int (*uprobe_write_verify_t)(struct page *page, unsigned long vaddr, + uprobe_opcode_t *opcode); + extern void __init uprobes_init(void); extern int set_swbp(struct arch_uprobe *aup, struct vm_area_struct *vma, unsigned long vaddr); extern int set_orig_insn(struct arch_uprobe *aup, struct vm_area_struct *vma, unsigned long vaddr); @@ -195,6 +198,8 @@ extern bool is_trap_insn(uprobe_opcode_t *insn); extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs); extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs); extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct vm_area_struct *vma, unsigned long vaddr, uprobe_opcode_t); +extern int uprobe_write(struct arch_uprobe *auprobe, struct vm_area_struct *vma, const unsigned long opcode_vaddr, + uprobe_opcode_t opcode, uprobe_write_verify_t verify); extern struct uprobe *uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc); extern int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool); extern void uprobe_unregister_nosync(struct uprobe *uprobe, struct uprobe_consumer *uc); -- cgit v1.2.3 From f8b7c528b4fb7018d12b6bb63bb52576cfc73697 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 20 Jul 2025 13:21:15 +0200 Subject: uprobes: Add nbytes argument to uprobe_write Adding nbytes argument to uprobe_write and related functions as preparation for writing whole instructions in following changes. Also renaming opcode arguments to insn, which seems to fit better. Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Acked-by: Masami Hiramatsu (Google) Acked-by: Andrii Nakryiko Acked-by: Oleg Nesterov Link: https://lore.kernel.org/r/20250720112133.244369-6-jolsa@kernel.org --- include/linux/uprobes.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index e13382054435..147c4a0a1af9 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -188,7 +188,7 @@ struct uprobes_state { }; typedef int (*uprobe_write_verify_t)(struct page *page, unsigned long vaddr, - uprobe_opcode_t *opcode); + uprobe_opcode_t *insn, int nbytes); extern void __init uprobes_init(void); extern int set_swbp(struct arch_uprobe *aup, struct vm_area_struct *vma, unsigned long vaddr); @@ -199,7 +199,7 @@ extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs); extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs); extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct vm_area_struct *vma, unsigned long vaddr, uprobe_opcode_t); extern int uprobe_write(struct arch_uprobe *auprobe, struct vm_area_struct *vma, const unsigned long opcode_vaddr, - uprobe_opcode_t opcode, uprobe_write_verify_t verify); + uprobe_opcode_t *insn, int nbytes, uprobe_write_verify_t verify); extern struct uprobe *uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc); extern int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool); extern void uprobe_unregister_nosync(struct uprobe *uprobe, struct uprobe_consumer *uc); -- cgit v1.2.3 From ec46350fe1e2338f42ee84974c36b25afe8ba53a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 20 Jul 2025 13:21:16 +0200 Subject: uprobes: Add is_register argument to uprobe_write and uprobe_write_opcode The uprobe_write has special path to restore the original page when we write original instruction back. This happens when uprobe_write detects that we want to write anything else but breakpoint instruction. Moving the detection away and passing it to uprobe_write as argument, so it's possible to write different instructions (other than just breakpoint and rest). Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Acked-by: Masami Hiramatsu (Google) Acked-by: Andrii Nakryiko Acked-by: Oleg Nesterov Link: https://lore.kernel.org/r/20250720112133.244369-7-jolsa@kernel.org --- include/linux/uprobes.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 147c4a0a1af9..518b26756469 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -197,9 +197,10 @@ extern bool is_swbp_insn(uprobe_opcode_t *insn); extern bool is_trap_insn(uprobe_opcode_t *insn); extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs); extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs); -extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct vm_area_struct *vma, unsigned long vaddr, uprobe_opcode_t); +extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct vm_area_struct *vma, unsigned long vaddr, uprobe_opcode_t, + bool is_register); extern int uprobe_write(struct arch_uprobe *auprobe, struct vm_area_struct *vma, const unsigned long opcode_vaddr, - uprobe_opcode_t *insn, int nbytes, uprobe_write_verify_t verify); + uprobe_opcode_t *insn, int nbytes, uprobe_write_verify_t verify, bool is_register); extern struct uprobe *uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc); extern int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool); extern void uprobe_unregister_nosync(struct uprobe *uprobe, struct uprobe_consumer *uc); -- cgit v1.2.3 From 18a111256a0b4fedfe47101f084441a84d7e357a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 20 Jul 2025 13:21:17 +0200 Subject: uprobes: Add do_ref_ctr argument to uprobe_write function Making update_ref_ctr call in uprobe_write conditional based on do_ref_ctr argument. This way we can use uprobe_write for instruction update without doing ref_ctr_offset update. Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Masami Hiramatsu (Google) Acked-by: Andrii Nakryiko Acked-by: Oleg Nesterov Link: https://lore.kernel.org/r/20250720112133.244369-8-jolsa@kernel.org --- include/linux/uprobes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 518b26756469..5080619560d4 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -200,7 +200,7 @@ extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs); extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct vm_area_struct *vma, unsigned long vaddr, uprobe_opcode_t, bool is_register); extern int uprobe_write(struct arch_uprobe *auprobe, struct vm_area_struct *vma, const unsigned long opcode_vaddr, - uprobe_opcode_t *insn, int nbytes, uprobe_write_verify_t verify, bool is_register); + uprobe_opcode_t *insn, int nbytes, uprobe_write_verify_t verify, bool is_register, bool do_update_ref_ctr); extern struct uprobe *uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc); extern int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool); extern void uprobe_unregister_nosync(struct uprobe *uprobe, struct uprobe_consumer *uc); -- cgit v1.2.3 From 91440ff4cafad4c86322a612e523f7f021a493e7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 20 Jul 2025 13:21:18 +0200 Subject: uprobes/x86: Add mapping for optimized uprobe trampolines Adding support to add special mapping for user space trampoline with following functions: uprobe_trampoline_get - find or add uprobe_trampoline uprobe_trampoline_put - remove or destroy uprobe_trampoline The user space trampoline is exported as arch specific user space special mapping through tramp_mapping, which is initialized in following changes with new uprobe syscall. The uprobe trampoline needs to be callable/reachable from the probed address, so while searching for available address we use is_reachable_by_call function to decide if the uprobe trampoline is callable from the probe address. All uprobe_trampoline objects are stored in uprobes_state object and are cleaned up when the process mm_struct goes down. Adding new arch hooks for that, because this change is x86_64 specific. Locking is provided by callers in following changes. Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Acked-by: Andrii Nakryiko Acked-by: Oleg Nesterov Acked-by: Masami Hiramatsu (Google) Link: https://lore.kernel.org/r/20250720112133.244369-9-jolsa@kernel.org --- include/linux/uprobes.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 5080619560d4..b40d33aae016 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -17,6 +17,7 @@ #include #include #include +#include struct uprobe; struct vm_area_struct; @@ -185,6 +186,9 @@ struct xol_area; struct uprobes_state { struct xol_area *xol_area; +#ifdef CONFIG_X86_64 + struct hlist_head head_tramps; +#endif }; typedef int (*uprobe_write_verify_t)(struct page *page, unsigned long vaddr, @@ -233,6 +237,8 @@ extern void uprobe_handle_trampoline(struct pt_regs *regs); extern void *arch_uretprobe_trampoline(unsigned long *psize); extern unsigned long uprobe_get_trampoline_vaddr(void); extern void uprobe_copy_from_page(struct page *page, unsigned long vaddr, void *dst, int len); +extern void arch_uprobe_clear_state(struct mm_struct *mm); +extern void arch_uprobe_init_state(struct mm_struct *mm); #else /* !CONFIG_UPROBES */ struct uprobes_state { }; -- cgit v1.2.3 From 56101b69c9190667f473b9f93f8b6d8209aaa816 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 20 Jul 2025 13:21:19 +0200 Subject: uprobes/x86: Add uprobe syscall to speed up uprobe Adding new uprobe syscall that calls uprobe handlers for given 'breakpoint' address. The idea is that the 'breakpoint' address calls the user space trampoline which executes the uprobe syscall. The syscall handler reads the return address of the initial call to retrieve the original 'breakpoint' address. With this address we find the related uprobe object and call its consumers. Adding the arch_uprobe_trampoline_mapping function that provides uprobe trampoline mapping. This mapping is backed with one global page initialized at __init time and shared by the all the mapping instances. We do not allow to execute uprobe syscall if the caller is not from uprobe trampoline mapping. The uprobe syscall ensures the consumer (bpf program) sees registers values in the state before the trampoline was called. Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Acked-by: Andrii Nakryiko Acked-by: Oleg Nesterov Acked-by: Masami Hiramatsu (Google) Link: https://lore.kernel.org/r/20250720112133.244369-10-jolsa@kernel.org --- include/linux/syscalls.h | 2 ++ include/linux/uprobes.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 77f45e5d4413..66c06fcdfe19 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1005,6 +1005,8 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int on); asmlinkage long sys_uretprobe(void); +asmlinkage long sys_uprobe(void); + /* pciconfig: alpha, arm, arm64, ia64, sparc */ asmlinkage long sys_pciconfig_read(unsigned long bus, unsigned long dfn, unsigned long off, unsigned long len, diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index b40d33aae016..b6b077cc7d0f 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -239,6 +239,7 @@ extern unsigned long uprobe_get_trampoline_vaddr(void); extern void uprobe_copy_from_page(struct page *page, unsigned long vaddr, void *dst, int len); extern void arch_uprobe_clear_state(struct mm_struct *mm); extern void arch_uprobe_init_state(struct mm_struct *mm); +extern void handle_syscall_uprobe(struct pt_regs *regs, unsigned long bp_vaddr); #else /* !CONFIG_UPROBES */ struct uprobes_state { }; -- cgit v1.2.3 From ba2bfc97b4629b10bd8d02b36e04f3932a04cac4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 20 Jul 2025 13:21:20 +0200 Subject: uprobes/x86: Add support to optimize uprobes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Putting together all the previously added pieces to support optimized uprobes on top of 5-byte nop instruction. The current uprobe execution goes through following: - installs breakpoint instruction over original instruction - exception handler hit and calls related uprobe consumers - and either simulates original instruction or does out of line single step execution of it - returns to user space The optimized uprobe path does following: - checks the original instruction is 5-byte nop (plus other checks) - adds (or uses existing) user space trampoline with uprobe syscall - overwrites original instruction (5-byte nop) with call to user space trampoline - the user space trampoline executes uprobe syscall that calls related uprobe consumers - trampoline returns back to next instruction This approach won't speed up all uprobes as it's limited to using nop5 as original instruction, but we plan to use nop5 as USDT probe instruction (which currently uses single byte nop) and speed up the USDT probes. The arch_uprobe_optimize triggers the uprobe optimization and is called after first uprobe hit. I originally had it called on uprobe installation but then it clashed with elf loader, because the user space trampoline was added in a place where loader might need to put elf segments, so I decided to do it after first uprobe hit when loading is done. The uprobe is un-optimized in arch specific set_orig_insn call. The instruction overwrite is x86 arch specific and needs to go through 3 updates: (on top of nop5 instruction) - write int3 into 1st byte - write last 4 bytes of the call instruction - update the call instruction opcode And cleanup goes though similar reverse stages: - overwrite call opcode with breakpoint (int3) - write last 4 bytes of the nop5 instruction - write the nop5 first instruction byte We do not unmap and release uprobe trampoline when it's no longer needed, because there's no easy way to make sure none of the threads is still inside the trampoline. But we do not waste memory, because there's just single page for all the uprobe trampoline mappings. We do waste frame on page mapping for every 4GB by keeping the uprobe trampoline page mapped, but that seems ok. We take the benefit from the fact that set_swbp and set_orig_insn are called under mmap_write_lock(mm), so we can use the current instruction as the state the uprobe is in - nop5/breakpoint/call trampoline - and decide the needed action (optimize/un-optimize) based on that. Attaching the speed up from benchs/run_bench_uprobes.sh script: current: usermode-count : 152.604 ± 0.044M/s syscall-count : 13.359 ± 0.042M/s --> uprobe-nop : 3.229 ± 0.002M/s uprobe-push : 3.086 ± 0.004M/s uprobe-ret : 1.114 ± 0.004M/s uprobe-nop5 : 1.121 ± 0.005M/s uretprobe-nop : 2.145 ± 0.002M/s uretprobe-push : 2.070 ± 0.001M/s uretprobe-ret : 0.931 ± 0.001M/s uretprobe-nop5 : 0.957 ± 0.001M/s after the change: usermode-count : 152.448 ± 0.244M/s syscall-count : 14.321 ± 0.059M/s uprobe-nop : 3.148 ± 0.007M/s uprobe-push : 2.976 ± 0.004M/s uprobe-ret : 1.068 ± 0.003M/s --> uprobe-nop5 : 7.038 ± 0.007M/s uretprobe-nop : 2.109 ± 0.004M/s uretprobe-push : 2.035 ± 0.001M/s uretprobe-ret : 0.908 ± 0.001M/s uretprobe-nop5 : 3.377 ± 0.009M/s I see bit more speed up on Intel (above) compared to AMD. The big nop5 speed up is partly due to emulating nop5 and partly due to optimization. The key speed up we do this for is the USDT switch from nop to nop5: uprobe-nop : 3.148 ± 0.007M/s uprobe-nop5 : 7.038 ± 0.007M/s Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Acked-by: Andrii Nakryiko Acked-by: Oleg Nesterov Acked-by: Masami Hiramatsu (Google) Link: https://lore.kernel.org/r/20250720112133.244369-11-jolsa@kernel.org --- include/linux/uprobes.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index b6b077cc7d0f..08ef78439d0d 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -192,7 +192,7 @@ struct uprobes_state { }; typedef int (*uprobe_write_verify_t)(struct page *page, unsigned long vaddr, - uprobe_opcode_t *insn, int nbytes); + uprobe_opcode_t *insn, int nbytes, void *data); extern void __init uprobes_init(void); extern int set_swbp(struct arch_uprobe *aup, struct vm_area_struct *vma, unsigned long vaddr); @@ -204,7 +204,8 @@ extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs); extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct vm_area_struct *vma, unsigned long vaddr, uprobe_opcode_t, bool is_register); extern int uprobe_write(struct arch_uprobe *auprobe, struct vm_area_struct *vma, const unsigned long opcode_vaddr, - uprobe_opcode_t *insn, int nbytes, uprobe_write_verify_t verify, bool is_register, bool do_update_ref_ctr); + uprobe_opcode_t *insn, int nbytes, uprobe_write_verify_t verify, bool is_register, bool do_update_ref_ctr, + void *data); extern struct uprobe *uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc); extern int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool); extern void uprobe_unregister_nosync(struct uprobe *uprobe, struct uprobe_consumer *uc); @@ -240,6 +241,7 @@ extern void uprobe_copy_from_page(struct page *page, unsigned long vaddr, void * extern void arch_uprobe_clear_state(struct mm_struct *mm); extern void arch_uprobe_init_state(struct mm_struct *mm); extern void handle_syscall_uprobe(struct pt_regs *regs, unsigned long bp_vaddr); +extern void arch_uprobe_optimize(struct arch_uprobe *auprobe, unsigned long vaddr); #else /* !CONFIG_UPROBES */ struct uprobes_state { }; -- cgit v1.2.3 From b08a784a5d1495c42ff9b0c70887d49211cddfe0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 18 Aug 2025 19:03:54 +0100 Subject: net: Introduce skb_copy_datagram_from_iter_full() In a similar manner to copy_from_iter()/copy_from_iter_full(), introduce skb_copy_datagram_from_iter_full() which reverts the iterator to its initial state when returning an error. A subsequent fix for a vsock regression will make use of this new function. Cc: Christian Brauner Cc: Alexander Viro Signed-off-by: Will Deacon Acked-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi Link: https://patch.msgid.link/20250818180355.29275-2-will@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 14b923ddb6df..fa633657e4c0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4172,6 +4172,8 @@ int skb_copy_and_crc32c_datagram_iter(const struct sk_buff *skb, int offset, struct iov_iter *to, int len, u32 *crcp); int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, struct iov_iter *from, int len); +int skb_copy_datagram_from_iter_full(struct sk_buff *skb, int offset, + struct iov_iter *from, int len); int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm); void skb_free_datagram(struct sock *sk, struct sk_buff *skb); int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags); -- cgit v1.2.3 From 11d5674fc2e5e75ccaa13685a909c14e033544b7 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 11 Aug 2025 12:44:00 +0800 Subject: crypto: hash - Make HASH_MAX_DESCSIZE a bit more obvious Move S390_SHA_CTX_SIZE into crypto/hash.h so that the derivation of HASH_MAX_DESCSIZE is less cryptic. Signed-off-by: Herbert Xu --- include/crypto/hash.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/crypto/hash.h b/include/crypto/hash.h index bbaeae705ef0..586700332c73 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -177,14 +177,26 @@ struct shash_desc { #define HASH_MAX_DIGESTSIZE 64 +/* + * The size of a core hash state and a partial block. The final byte + * is the length of the partial block. + */ +#define HASH_STATE_AND_BLOCK(state, block) ((state) + (block) + 1) + + /* Worst case is sha3-224. */ -#define HASH_MAX_STATESIZE 200 + 144 + 1 +#define HASH_MAX_STATESIZE HASH_STATE_AND_BLOCK(200, 144) + +/* This needs to match arch/s390/crypto/sha.h. */ +#define S390_SHA_CTX_SIZE 216 /* * Worst case is hmac(sha3-224-s390). Its context is a nested 'shash_desc' * containing a 'struct s390_sha_ctx'. */ -#define HASH_MAX_DESCSIZE (sizeof(struct shash_desc) + 361) +#define SHA3_224_S390_DESCSIZE HASH_STATE_AND_BLOCK(S390_SHA_CTX_SIZE, 144) +#define HASH_MAX_DESCSIZE (sizeof(struct shash_desc) + \ + SHA3_224_S390_DESCSIZE) #define MAX_SYNC_HASH_REQSIZE (sizeof(struct ahash_request) + \ HASH_MAX_DESCSIZE) -- cgit v1.2.3 From 3202d6ed9368fc1e842fda73727553ae614633f8 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Thu, 21 Aug 2025 15:07:50 +0200 Subject: mmc: core: Add infrastructure for undervoltage handling Implement the core infrastructure to allow MMC bus types to handle REGULATOR_EVENT_UNDER_VOLTAGE events from power regulators. This is primarily aimed at allowing devices like eMMC to perform an emergency shutdown to prevent data corruption when a power failure is imminent. This patch introduces: - A new 'handle_undervoltage' function pointer to 'struct mmc_bus_ops'. Bus drivers (e.g., for eMMC) can implement this to define their emergency procedures. - A workqueue ('uv_work') in 'struct mmc_supply' to handle the event asynchronously in a high-priority context. - A new function 'mmc_handle_undervoltage()' which is called from the workqueue. It stops the host queue to prevent races with card removal, checks for the bus op, and invokes the handler. - Functions to register and unregister the regulator notifier, intended to be called by bus drivers like 'mmc_attach_mmc' when a compatible card is detected. The notifier is only registered for the main vmmc supply, as undervoltage handling for vqmmc or vqmmc2 is not required at this time. Signed-off-by: Oleksij Rempel Link: https://lore.kernel.org/r/20250821130751.2089587-2-o.rempel@pengutronix.de Signed-off-by: Ulf Hansson --- include/linux/mmc/host.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 5ed5d203de23..e0d935a4ac1d 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -337,11 +337,15 @@ struct mmc_slot { struct regulator; struct mmc_pwrseq; +struct notifier_block; struct mmc_supply { struct regulator *vmmc; /* Card power supply */ struct regulator *vqmmc; /* Optional Vccq supply */ struct regulator *vqmmc2; /* Optional supply for phy */ + + struct notifier_block vmmc_nb; /* Notifier for vmmc */ + struct work_struct uv_work; /* Undervoltage work */ }; struct mmc_ctx { @@ -494,6 +498,13 @@ struct mmc_host { unsigned int can_dma_map_merge:1; /* merging can be used */ unsigned int vqmmc_enabled:1; /* vqmmc regulator is enabled */ + /* + * Indicates if an undervoltage event has already been handled. + * This prevents repeated regulator notifiers from triggering + * multiple REGULATOR_EVENT_UNDER_VOLTAGE events. + */ + unsigned int undervoltage:1; /* Undervoltage state */ + int rescan_disable; /* disable card detection */ int rescan_entered; /* used with nonremovable devices */ -- cgit v1.2.3 From f41345f47fb267a9c95ca710c33448f8d0d81d83 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Wed, 20 Aug 2025 15:18:06 +0200 Subject: bpf: Use tnums for JEQ/JNE is_branch_taken logic In the following toy program (reg states minimized for readability), R0 and R1 always have different values at instruction 6. This is obvious when reading the program but cannot be guessed from ranges alone as they overlap (R0 in [0; 0xc0000000], R1 in [1024; 0xc0000400]). 0: call bpf_get_prandom_u32#7 ; R0_w=scalar() 1: w0 = w0 ; R0_w=scalar(var_off=(0x0; 0xffffffff)) 2: r0 >>= 30 ; R0_w=scalar(var_off=(0x0; 0x3)) 3: r0 <<= 30 ; R0_w=scalar(var_off=(0x0; 0xc0000000)) 4: r1 = r0 ; R1_w=scalar(var_off=(0x0; 0xc0000000)) 5: r1 += 1024 ; R1_w=scalar(var_off=(0x400; 0xc0000000)) 6: if r1 != r0 goto pc+1 Looking at tnums however, we can deduce that R1 is always different from R0 because their tnums don't agree on known bits. This patch uses this logic to improve is_scalar_branch_taken in case of BPF_JEQ and BPF_JNE. This change has a tiny impact on complexity, which was measured with the Cilium complexity CI test. That test covers 72 programs with various build and load time configurations for a total of 970 test cases. For 80% of test cases, the patch has no impact. On the other test cases, the patch decreases complexity by only 0.08% on average. In the best case, the verifier needs to walk 3% less instructions and, in the worst case, 1.5% more. Overall, the patch has a small positive impact, especially for our largest programs. Signed-off-by: Paul Chaignon Signed-off-by: Daniel Borkmann Acked-by: Eduard Zingerman Acked-by: Shung-Hsi Yu Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/be3ee70b6e489c49881cb1646114b1d861b5c334.1755694147.git.paul.chaignon@gmail.com --- include/linux/tnum.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/tnum.h b/include/linux/tnum.h index 57ed3035cc30..0ffb77ffe0e8 100644 --- a/include/linux/tnum.h +++ b/include/linux/tnum.h @@ -51,6 +51,9 @@ struct tnum tnum_xor(struct tnum a, struct tnum b); /* Multiply two tnums, return @a * @b */ struct tnum tnum_mul(struct tnum a, struct tnum b); +/* Return true if the known bits of both tnums have the same value */ +bool tnum_overlap(struct tnum a, struct tnum b); + /* Return a tnum representing numbers satisfying both @a and @b */ struct tnum tnum_intersect(struct tnum a, struct tnum b); -- cgit v1.2.3 From afa3701c0e45ecb9e4d160048ca4e353c7489948 Mon Sep 17 00:00:00 2001 From: Tiffany Yang Date: Thu, 21 Aug 2025 18:37:52 -0700 Subject: cgroup: cgroup.stat.local time accounting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There isn't yet a clear way to identify a set of "lost" time that everyone (or at least a wider group of users) cares about. However, users can perform some delay accounting by iterating over components of interest. This patch allows cgroup v2 freezing time to be one of those components. Track the cumulative time that each v2 cgroup spends freezing and expose it to userland via a new local stat file in cgroupfs. Thank you to Michal, who provided the ASCII art in the updated documentation. To access this value: $ mkdir /sys/fs/cgroup/test $ cat /sys/fs/cgroup/test/cgroup.stat.local freeze_time_total 0 Ensure consistent freeze time reads with freeze_seq, a per-cgroup sequence counter. Writes are serialized using the css_set_lock. Signed-off-by: Tiffany Yang Cc: Tejun Heo Cc: Michal Koutný Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 6b93a64115fe..539c64eeef38 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -433,6 +433,23 @@ struct cgroup_freezer_state { * frozen, SIGSTOPped, and PTRACEd. */ int nr_frozen_tasks; + + /* Freeze time data consistency protection */ + seqcount_t freeze_seq; + + /* + * Most recent time the cgroup was requested to freeze. + * Accesses guarded by freeze_seq counter. Writes serialized + * by css_set_lock. + */ + u64 freeze_start_nsec; + + /* + * Total duration the cgroup has spent freezing. + * Accesses guarded by freeze_seq counter. Writes serialized + * by css_set_lock. + */ + u64 frozen_nsec; }; struct cgroup { -- cgit v1.2.3 From 6bbd0d3f0c23fc53c17409dd7476f38ae0ff0cd9 Mon Sep 17 00:00:00 2001 From: Pavel Shpakovskiy Date: Fri, 22 Aug 2025 12:20:55 +0300 Subject: Bluetooth: hci_sync: fix set_local_name race condition Function set_name_sync() uses hdev->dev_name field to send HCI_OP_WRITE_LOCAL_NAME command, but copying from data to hdev->dev_name is called after mgmt cmd was queued, so it is possible that function set_name_sync() will read old name value. This change adds name as a parameter for function hci_update_name_sync() to avoid race condition. Fixes: 6f6ff38a1e14 ("Bluetooth: hci_sync: Convert MGMT_OP_SET_LOCAL_NAME") Signed-off-by: Pavel Shpakovskiy Reviewed-by: Paul Menzel Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_sync.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 5224f57f6af2..e352a4e0ef8d 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -93,7 +93,7 @@ int hci_update_class_sync(struct hci_dev *hdev); int hci_update_eir_sync(struct hci_dev *hdev); int hci_update_class_sync(struct hci_dev *hdev); -int hci_update_name_sync(struct hci_dev *hdev); +int hci_update_name_sync(struct hci_dev *hdev, const u8 *name); int hci_write_ssp_mode_sync(struct hci_dev *hdev, u8 mode); int hci_get_random_address(struct hci_dev *hdev, bool require_privacy, -- cgit v1.2.3 From d47cc4dea17391c99b943fa8d70a279e906b2843 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 11 Aug 2025 13:16:15 -0700 Subject: bpf: Use sha1() instead of sha1_transform() in bpf_prog_calc_tag() Now that there's a proper SHA-1 library API, just use that instead of the low-level SHA-1 compression function. This eliminates the need for bpf_prog_calc_tag() to implement the SHA-1 padding itself. No functional change; the computed tags remain the same. Signed-off-by: Eric Biggers Signed-off-by: Andrii Nakryiko Acked-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20250811201615.564461-1-ebiggers@kernel.org --- include/linux/filter.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index c0a74fb9fcb1..9092d8ea95c8 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -997,12 +997,6 @@ static inline u32 bpf_prog_insn_size(const struct bpf_prog *prog) return prog->len * sizeof(struct bpf_insn); } -static inline u32 bpf_prog_tag_scratch_size(const struct bpf_prog *prog) -{ - return round_up(bpf_prog_insn_size(prog) + - sizeof(__be64) + 1, SHA1_BLOCK_SIZE); -} - static inline unsigned int bpf_prog_size(unsigned int proglen) { return max(sizeof(struct bpf_prog), -- cgit v1.2.3 From ec79003c5f9d2c7f9576fc69b8dbda80305cbe3a Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 21 Aug 2025 02:18:24 +0000 Subject: atm: atmtcp: Prevent arbitrary write in atmtcp_recv_control(). syzbot reported the splat below. [0] When atmtcp_v_open() or atmtcp_v_close() is called via connect() or close(), atmtcp_send_control() is called to send an in-kernel special message. The message has ATMTCP_HDR_MAGIC in atmtcp_control.hdr.length. Also, a pointer of struct atm_vcc is set to atmtcp_control.vcc. The notable thing is struct atmtcp_control is uAPI but has a space for an in-kernel pointer. struct atmtcp_control { struct atmtcp_hdr hdr; /* must be first */ ... atm_kptr_t vcc; /* both directions */ ... } __ATM_API_ALIGN; typedef struct { unsigned char _[8]; } __ATM_API_ALIGN atm_kptr_t; The special message is processed in atmtcp_recv_control() called from atmtcp_c_send(). atmtcp_c_send() is vcc->dev->ops->send() and called from 2 paths: 1. .ndo_start_xmit() (vcc->send() == atm_send_aal0()) 2. vcc_sendmsg() The problem is sendmsg() does not validate the message length and userspace can abuse atmtcp_recv_control() to overwrite any kptr by atmtcp_control. Let's add a new ->pre_send() hook to validate messages from sendmsg(). [0]: Oops: general protection fault, probably for non-canonical address 0xdffffc00200000ab: 0000 [#1] SMP KASAN PTI KASAN: probably user-memory-access in range [0x0000000100000558-0x000000010000055f] CPU: 0 UID: 0 PID: 5865 Comm: syz-executor331 Not tainted 6.17.0-rc1-syzkaller-00215-gbab3ce404553 #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/12/2025 RIP: 0010:atmtcp_recv_control drivers/atm/atmtcp.c:93 [inline] RIP: 0010:atmtcp_c_send+0x1da/0x950 drivers/atm/atmtcp.c:297 Code: 4d 8d 75 1a 4c 89 f0 48 c1 e8 03 42 0f b6 04 20 84 c0 0f 85 15 06 00 00 41 0f b7 1e 4d 8d b7 60 05 00 00 4c 89 f0 48 c1 e8 03 <42> 0f b6 04 20 84 c0 0f 85 13 06 00 00 66 41 89 1e 4d 8d 75 1c 4c RSP: 0018:ffffc90003f5f810 EFLAGS: 00010203 RAX: 00000000200000ab RBX: 0000000000000000 RCX: 0000000000000000 RDX: ffff88802a510000 RSI: 00000000ffffffff RDI: ffff888030a6068c RBP: ffff88802699fb40 R08: ffff888030a606eb R09: 1ffff1100614c0dd R10: dffffc0000000000 R11: ffffffff8718fc40 R12: dffffc0000000000 R13: ffff888030a60680 R14: 000000010000055f R15: 00000000ffffffff FS: 00007f8d7e9236c0(0000) GS:ffff888125c1c000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000045ad50 CR3: 0000000075bde000 CR4: 00000000003526f0 Call Trace: vcc_sendmsg+0xa10/0xc60 net/atm/common.c:645 sock_sendmsg_nosec net/socket.c:714 [inline] __sock_sendmsg+0x219/0x270 net/socket.c:729 ____sys_sendmsg+0x505/0x830 net/socket.c:2614 ___sys_sendmsg+0x21f/0x2a0 net/socket.c:2668 __sys_sendmsg net/socket.c:2700 [inline] __do_sys_sendmsg net/socket.c:2705 [inline] __se_sys_sendmsg net/socket.c:2703 [inline] __x64_sys_sendmsg+0x19b/0x260 net/socket.c:2703 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xfa/0x3b0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f8d7e96a4a9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 51 18 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f8d7e923198 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f8d7e9f4308 RCX: 00007f8d7e96a4a9 RDX: 0000000000000000 RSI: 0000200000000240 RDI: 0000000000000005 RBP: 00007f8d7e9f4300 R08: 65732f636f72702f R09: 65732f636f72702f R10: 65732f636f72702f R11: 0000000000000246 R12: 00007f8d7e9c10ac R13: 00007f8d7e9231a0 R14: 0000200000000200 R15: 0000200000000250 Modules linked in: Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+1741b56d54536f4ec349@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/68a6767c.050a0220.3d78fd.0011.GAE@google.com/ Tested-by: syzbot+1741b56d54536f4ec349@syzkaller.appspotmail.com Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250821021901.2814721-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/linux/atmdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index 45f2f278b50a..70807c679f1a 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -185,6 +185,7 @@ struct atmdev_ops { /* only send is required */ int (*compat_ioctl)(struct atm_dev *dev,unsigned int cmd, void __user *arg); #endif + int (*pre_send)(struct atm_vcc *vcc, struct sk_buff *skb); int (*send)(struct atm_vcc *vcc,struct sk_buff *skb); int (*send_bh)(struct atm_vcc *vcc, struct sk_buff *skb); int (*send_oam)(struct atm_vcc *vcc,void *cell,int flags); -- cgit v1.2.3 From 3c716487936aa54083c130d46ad5747769695e09 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 14 Aug 2025 18:59:49 +0200 Subject: genirq: Remove GENERIC_IRQ_LEGACY IA64 is gone and with it the last GENERIC_IRQ_LEGACY user. Remove GENERIC_IRQ_LEGACY. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250814165949.hvtP03r4@linutronix.de --- include/linux/irq.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index 1d6b606a81ef..c9bcdbf6bc63 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -976,10 +976,6 @@ static inline void irq_free_desc(unsigned int irq) irq_free_descs(irq, 1); } -#ifdef CONFIG_GENERIC_IRQ_LEGACY -void irq_init_desc(unsigned int irq); -#endif - /** * struct irq_chip_regs - register offsets for struct irq_gci * @enable: Enable register offset to reg_base -- cgit v1.2.3 From 7a721a2fee2bce01af26699a87739db8ca8ea3c8 Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Thu, 14 Aug 2025 07:28:31 +0800 Subject: genirq: Add irq_chip_(startup/shutdown)_parent() As the MSI controller on SG2044 uses PLIC as the underlying interrupt controller, it needs to call irq_enable() and irq_disable() to startup/shutdown interrupts. Otherwise, the MSI interrupt can not be startup correctly and will not respond any incoming interrupt. Introduce irq_chip_startup_parent() and irq_chip_shutdown_parent() to allow the interrupt controller to call the irq_startup()/irq_shutdown() callbacks of the parent interrupt chip. In case the irq_startup()/irq_shutdown() callbacks are not implemented for the parent interrupt chip, this will fallback to irq_chip_enable_parent() or irq_chip_disable_parent(). Suggested-by: Thomas Gleixner Signed-off-by: Inochi Amaoto Signed-off-by: Thomas Gleixner Tested-by: Chen Wang # Pioneerbox Reviewed-by: Chen Wang Link: https://lore.kernel.org/all/20250813232835.43458-2-inochiama@gmail.com Link: https://lore.kernel.org/lkml/20250722224513.22125-1-inochiama@gmail.com/ --- include/linux/irq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index c9bcdbf6bc63..c67e76fbcc07 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -669,6 +669,8 @@ extern int irq_chip_set_parent_state(struct irq_data *data, extern int irq_chip_get_parent_state(struct irq_data *data, enum irqchip_irq_state which, bool *state); +extern void irq_chip_shutdown_parent(struct irq_data *data); +extern unsigned int irq_chip_startup_parent(struct irq_data *data); extern void irq_chip_enable_parent(struct irq_data *data); extern void irq_chip_disable_parent(struct irq_data *data); extern void irq_chip_ack_parent(struct irq_data *data); -- cgit v1.2.3 From 54f45a30c0d0153d2be091ba2d683ab6db6d1d5b Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Thu, 14 Aug 2025 07:28:32 +0800 Subject: PCI/MSI: Add startup/shutdown for per device domains As the RISC-V PLIC cannot apply affinity settings without invoking irq_enable(), it will make the interrupt unavailble when used as an underlying interrupt chip for the MSI controller. Implement the irq_startup() and irq_shutdown() callbacks for the PCI MSI and MSI-X templates. For chips that specify MSI_FLAG_PCI_MSI_STARTUP_PARENT, the parent startup and shutdown functions are invoked. That allows the interrupt on the parent chip to be enabled if the interrupt has not been enabled during allocation. This is necessary for MSI controllers which use PLIC as underlying parent interrupt chip. Suggested-by: Thomas Gleixner Signed-off-by: Inochi Amaoto Signed-off-by: Thomas Gleixner Tested-by: Chen Wang # Pioneerbox Reviewed-by: Chen Wang Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/all/20250813232835.43458-3-inochiama@gmail.com --- include/linux/msi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index e5e86a8529fb..3111ba95fbde 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -568,6 +568,8 @@ enum { MSI_FLAG_PARENT_PM_DEV = (1 << 8), /* Support for parent mask/unmask */ MSI_FLAG_PCI_MSI_MASK_PARENT = (1 << 9), + /* Support for parent startup/shutdown */ + MSI_FLAG_PCI_MSI_STARTUP_PARENT = (1 << 10), /* Mask for the generic functionality */ MSI_GENERIC_FLAGS_MASK = GENMASK(15, 0), -- cgit v1.2.3 From ab6d91d141a801dadf9eed7860b2ea09c9268149 Mon Sep 17 00:00:00 2001 From: Nickolay Goppen Date: Fri, 15 Aug 2025 19:56:51 +0300 Subject: dt-bindings: clock: gcc-sdm660: Add LPASS/CDSP vote clocks/GDSCs Add defines for the missing clocks, which are required to power up the related remote processors. Co-developed-by: Konrad Dybcio Signed-off-by: Konrad Dybcio Signed-off-by: Nickolay Goppen Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250815-gcc-sdm660-vote-clocks-and-gdscs-v1-1-c5a8af040093@yandex.ru Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,gcc-sdm660.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,gcc-sdm660.h b/include/dt-bindings/clock/qcom,gcc-sdm660.h index 74c22f67da21..f19018b742f5 100644 --- a/include/dt-bindings/clock/qcom,gcc-sdm660.h +++ b/include/dt-bindings/clock/qcom,gcc-sdm660.h @@ -138,10 +138,16 @@ #define GCC_UFS_UNIPRO_CORE_HW_CTL_CLK 128 #define GCC_RX0_USB2_CLKREF_CLK 129 #define GCC_RX1_USB2_CLKREF_CLK 130 +#define GCC_HLOS1_VOTE_LPASS_ADSP_SMMU_CLK 131 +#define GCC_HLOS1_VOTE_TURING_ADSP_SMMU_CLK 132 +#define GCC_HLOS2_VOTE_TURING_ADSP_SMMU_CLK 133 #define PCIE_0_GDSC 0 #define UFS_GDSC 1 #define USB_30_GDSC 2 +#define HLOS1_VOTE_TURING_ADSP_GDSC 3 +#define HLOS2_VOTE_TURING_ADSP_GDSC 4 +#define HLOS1_VOTE_LPASS_ADSP_GDSC 5 #define GCC_QUSB2PHY_PRIM_BCR 0 #define GCC_QUSB2PHY_SEC_BCR 1 -- cgit v1.2.3 From 7d50d9bf1cd00d6bab0abf3b01d5d261aa6a2b04 Mon Sep 17 00:00:00 2001 From: Troy Mitchell Date: Mon, 11 Aug 2025 21:40:33 +0800 Subject: dt-bindings: clock: spacemit: CLK_SSPA_I2S_BCLK for SSPA In order to use the virtual clock SSPAx_I2S_BCLK in the device tree and register it in the driver, this patch introduces the macro definition. Fixes: 1b72c59db0add ("clk: spacemit: Add clock support for SpacemiT K1 SoC") Acked-by: Rob Herring (Arm) Signed-off-by: Troy Mitchell Link: https://lore.kernel.org/r/20250811-k1-clk-i2s-v5-1-ebadd06e1e91@linux.spacemit.com Signed-off-by: Yixun Lan --- include/dt-bindings/clock/spacemit,k1-syscon.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/spacemit,k1-syscon.h b/include/dt-bindings/clock/spacemit,k1-syscon.h index 2714c3fe66cd..505205453d7f 100644 --- a/include/dt-bindings/clock/spacemit,k1-syscon.h +++ b/include/dt-bindings/clock/spacemit,k1-syscon.h @@ -182,6 +182,8 @@ #define CLK_SSPA1_BUS 97 #define CLK_TSEN_BUS 98 #define CLK_IPC_AP2AUD_BUS 99 +#define CLK_SSPA0_I2S_BCLK 100 +#define CLK_SSPA1_I2S_BCLK 101 /* APBC resets */ #define RESET_UART0 0 -- cgit v1.2.3 From 92a96b0a227e91dc42475265a1ce766b6cd044fa Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 17 Aug 2025 23:09:18 +0100 Subject: io_uring: add request poisoning Poison various request fields on free. __io_req_caches_free() is a slow path, so can be done unconditionally, but gate it on kasan for io_req_add_to_cache(). Note that some fields are logically retained between cache allocations and can't be poisoned in io_req_add_to_cache(). Ideally, it'd be replaced with KASAN'ed caches, but that can't be enabled because of some synchronisation nuances. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/7a78e8a7f5be434313c400650b862e36c211b312.1755459452.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/poison.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/poison.h b/include/linux/poison.h index 8ca2235f78d5..299e2dd7da6d 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -90,4 +90,7 @@ /********** lib/stackdepot.c **********/ #define STACK_DEPOT_POISON ((void *)(0xD390 + POISON_POINTER_DELTA)) +/********** io_uring/ **********/ +#define IO_URING_PTR_POISON ((void *)(0x1091UL + POISON_POINTER_DELTA)) + #endif -- cgit v1.2.3 From 5fda51255439addd1c9059098e30847a375a1008 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 20 Aug 2025 20:03:39 -0600 Subject: io_uring/kbuf: switch to storing struct io_buffer_list locally Currently the buffer list is stored in struct io_kiocb. The buffer list can be of two types: 1) Classic/legacy buffer list. These don't need to get referenced after a buffer pick, and hence storing them in struct io_kiocb is perfectly fine. 2) Ring provided buffer lists. These DO need to be referenced after the initial buffer pick, as they need to get consumed later on. This can be either just incrementing the head of the ring, or it can be consuming parts of a buffer if incremental buffer consumptions has been configured. For case 2, io_uring needs to be careful not to access the buffer list after the initial pick-and-execute context. The core does recycling of these, but it's easy to make a mistake, because it's stored in the io_kiocb which does persist across multiple execution contexts. Either because it's a multishot request, or simply because it needed some kind of async trigger (eg poll) for retry purposes. Add a struct io_buffer_list to struct io_br_sel, which is always on stack for the various users of it. This prevents the buffer list from leaking outside of that execution context, and additionally it enables kbuf to not even pass back the struct io_buffer_list if the given context isn't appropriately locked already. This doesn't fix any bugs, it's simply a defensive measure to prevent any issues with reuse of a buffer list. Link: https://lore.kernel.org/r/20250821020750.598432-12-axboe@kernel.dk Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 80a178f3d896..1d33984611bc 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -674,12 +674,6 @@ struct io_kiocb { /* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */ struct io_buffer *kbuf; - /* - * stores buffer ID for ring provided buffers, valid IFF - * REQ_F_BUFFER_RING is set. - */ - struct io_buffer_list *buf_list; - struct io_rsrc_node *buf_node; }; -- cgit v1.2.3 From d589bcddaa3f8b1668499c3f0466863df3abe37a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 21 Aug 2025 12:02:06 +0800 Subject: io-uring: move `struct io_br_sel` into io_uring_types.h Move `struct io_br_sel` into io_uring_types.h and prepare for supporting provided buffer on uring_cmd. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250821040210.1152145-2-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 1d33984611bc..9c6c548f43f5 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -85,6 +85,25 @@ struct io_mapped_region { unsigned flags; }; +/* + * Return value from io_buffer_list selection, to avoid stashing it in + * struct io_kiocb. For legacy/classic provided buffers, keeping a reference + * across execution contexts are fine. But for ring provided buffers, the + * list may go away as soon as ->uring_lock is dropped. As the io_kiocb + * persists, it's better to just keep the buffer local for those cases. + */ +struct io_br_sel { + struct io_buffer_list *buf_list; + /* + * Some selection parts return the user address, others return an error. + */ + union { + void __user *addr; + ssize_t val; + }; +}; + + /* * Arbitrary limit, can be raised if need be */ -- cgit v1.2.3 From 620a50c927004f5c9420a7ca9b1a55673dbf3941 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 21 Aug 2025 12:02:07 +0800 Subject: io_uring: uring_cmd: add multishot support Add UAPI flag IORING_URING_CMD_MULTISHOT for supporting multishot uring_cmd operations with provided buffer. This enables drivers to post multiple completion events from a single uring_cmd submission, which is useful for: - Notifying userspace of device events (e.g., interrupt handling) - Supporting devices with multiple event sources (e.g., multi-queue devices) - Avoiding the need for device poll() support when events originate from multiple sources device-wide The implementation adds two new APIs: - io_uring_cmd_select_buffer(): selects a buffer from the provided buffer group for multishot uring_cmd - io_uring_mshot_cmd_post_cqe(): posts a CQE after event data is pushed to the provided buffer Multishot uring_cmd must be used with buffer select (IOSQE_BUFFER_SELECT) and is mutually exclusive with IORING_URING_CMD_FIXED for now. The ublk driver will be the first user of this functionality: https://github.com/ming1/linux/commits/ublk-devel/ Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250821040210.1152145-3-ming.lei@redhat.com [axboe: fold in fix for !CONFIG_IO_URING] Signed-off-by: Jens Axboe --- include/linux/io_uring/cmd.h | 26 ++++++++++++++++++++++++++ include/uapi/linux/io_uring.h | 6 +++++- 2 files changed, 31 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h index cfa6d0c0c322..4bd3a7339243 100644 --- a/include/linux/io_uring/cmd.h +++ b/include/linux/io_uring/cmd.h @@ -70,6 +70,21 @@ void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd, /* Execute the request from a blocking context */ void io_uring_cmd_issue_blocking(struct io_uring_cmd *ioucmd); +/* + * Select a buffer from the provided buffer group for multishot uring_cmd. + * Returns the selected buffer address and size. + */ +struct io_br_sel io_uring_cmd_buffer_select(struct io_uring_cmd *ioucmd, + unsigned buf_group, size_t *len, + unsigned int issue_flags); + +/* + * Complete a multishot uring_cmd event. This will post a CQE to the completion + * queue and update the provided buffer. + */ +bool io_uring_mshot_cmd_post_cqe(struct io_uring_cmd *ioucmd, + struct io_br_sel *sel, unsigned int issue_flags); + #else static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, @@ -102,6 +117,17 @@ static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd, static inline void io_uring_cmd_issue_blocking(struct io_uring_cmd *ioucmd) { } +static inline struct io_br_sel +io_uring_cmd_buffer_select(struct io_uring_cmd *ioucmd, unsigned buf_group, + size_t *len, unsigned int issue_flags) +{ + return (struct io_br_sel) { .val = -EOPNOTSUPP }; +} +static inline bool io_uring_mshot_cmd_post_cqe(struct io_uring_cmd *ioucmd, + ssize_t ret, unsigned int issue_flags) +{ + return true; +} #endif /* diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 6957dc539d83..1e935f8901c5 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -298,9 +298,13 @@ enum io_uring_op { * sqe->uring_cmd_flags top 8bits aren't available for userspace * IORING_URING_CMD_FIXED use registered buffer; pass this flag * along with setting sqe->buf_index. + * IORING_URING_CMD_MULTISHOT must be used with buffer select, like other + * multishot commands. Not compatible with + * IORING_URING_CMD_FIXED, for now. */ #define IORING_URING_CMD_FIXED (1U << 0) -#define IORING_URING_CMD_MASK IORING_URING_CMD_FIXED +#define IORING_URING_CMD_MULTISHOT (1U << 1) +#define IORING_URING_CMD_MASK (IORING_URING_CMD_FIXED | IORING_URING_CMD_MULTISHOT) /* -- cgit v1.2.3 From d0201c4436c53412146d526855c585fa9d54ca13 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 7 Aug 2025 14:01:46 -0600 Subject: io_uring: remove io_ctx_cqe32() helper It's pretty pointless and only used for the tracing helper, get rid of it. Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 6 ------ include/trace/events/io_uring.h | 4 ++-- 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 9c6c548f43f5..d1e25f3fe0b3 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -740,10 +740,4 @@ struct io_overflow_cqe { struct list_head list; struct io_uring_cqe cqe; }; - -static inline bool io_ctx_cqe32(struct io_ring_ctx *ctx) -{ - return ctx->flags & IORING_SETUP_CQE32; -} - #endif diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h index 178ab6f611be..6a970625a3ea 100644 --- a/include/trace/events/io_uring.h +++ b/include/trace/events/io_uring.h @@ -340,8 +340,8 @@ TP_PROTO(struct io_ring_ctx *ctx, void *req, struct io_uring_cqe *cqe), __entry->user_data = cqe->user_data; __entry->res = cqe->res; __entry->cflags = cqe->flags; - __entry->extra1 = io_ctx_cqe32(ctx) ? cqe->big_cqe[0] : 0; - __entry->extra2 = io_ctx_cqe32(ctx) ? cqe->big_cqe[1] : 0; + __entry->extra1 = ctx->flags & IORING_SETUP_CQE32 ? cqe->big_cqe[0] : 0; + __entry->extra2 = ctx->flags & IORING_SETUP_CQE32 ? cqe->big_cqe[1] : 0; ), TP_printk("ring %p, req %p, user_data 0x%llx, result %d, cflags 0x%x " -- cgit v1.2.3 From b69458735d826f0676585623d028a0fd474f3e4f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 7 Aug 2025 14:08:14 -0600 Subject: io_uring: add UAPI definitions for mixed CQE postings This adds the CQE flags related to supporting a mixed CQ ring mode, where both normal (16b) and big (32b) CQEs may be posted. No functional changes in this patch. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 1e935f8901c5..7af8d10b3aba 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -491,12 +491,22 @@ struct io_uring_cqe { * other provided buffer type, all completions with a * buffer passed back is automatically returned to the * application. + * IORING_CQE_F_SKIP If set, then the application/liburing must ignore this + * CQE. It's only purpose is to fill a gap in the ring, + * if a large CQE is attempted posted when the ring has + * just a single small CQE worth of space left before + * wrapping. + * IORING_CQE_F_32 If set, this is a 32b/big-cqe posting. Use with rings + * setup in a mixed CQE mode, where both 16b and 32b + * CQEs may be posted to the CQ ring. */ #define IORING_CQE_F_BUFFER (1U << 0) #define IORING_CQE_F_MORE (1U << 1) #define IORING_CQE_F_SOCK_NONEMPTY (1U << 2) #define IORING_CQE_F_NOTIF (1U << 3) #define IORING_CQE_F_BUF_MORE (1U << 4) +#define IORING_CQE_F_SKIP (1U << 5) +#define IORING_CQE_F_32 (1U << 15) #define IORING_CQE_BUFFER_SHIFT 16 -- cgit v1.2.3 From 89a885972140ea68d3f55457d23d0da2350c96ac Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 7 Aug 2025 14:13:36 -0600 Subject: io_uring/trace: support completion tracing of mixed 32b CQEs Check for IORING_CQE_F_32 as well, not just if the ring was setup with IORING_SETUP_CQE32 to only support big CQEs. Signed-off-by: Jens Axboe --- include/trace/events/io_uring.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h index 6a970625a3ea..45d15460b495 100644 --- a/include/trace/events/io_uring.h +++ b/include/trace/events/io_uring.h @@ -340,8 +340,8 @@ TP_PROTO(struct io_ring_ctx *ctx, void *req, struct io_uring_cqe *cqe), __entry->user_data = cqe->user_data; __entry->res = cqe->res; __entry->cflags = cqe->flags; - __entry->extra1 = ctx->flags & IORING_SETUP_CQE32 ? cqe->big_cqe[0] : 0; - __entry->extra2 = ctx->flags & IORING_SETUP_CQE32 ? cqe->big_cqe[1] : 0; + __entry->extra1 = ctx->flags & IORING_SETUP_CQE32 || cqe->flags & IORING_CQE_F_32 ? cqe->big_cqe[0] : 0; + __entry->extra2 = ctx->flags & IORING_SETUP_CQE32 || cqe->flags & IORING_CQE_F_32 ? cqe->big_cqe[1] : 0; ), TP_printk("ring %p, req %p, user_data 0x%llx, result %d, cflags 0x%x " -- cgit v1.2.3 From 6e376f245f19feeadddafb2c3fa5fbd6469ecdfe Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 25 Aug 2025 11:48:42 +0200 Subject: gpio: generic: provide to_gpio_generic_chip() Provide a helper allowing to convert a struct gpio_chip address to the struct gpio_generic_chip that wraps it. Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20250825-gpio-mmio-gpio-conv-v1-1-356b4b1d5110@linaro.org Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/generic.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/gpio/generic.h b/include/linux/gpio/generic.h index f3a8db4598bb..5a85ecbef8d2 100644 --- a/include/linux/gpio/generic.h +++ b/include/linux/gpio/generic.h @@ -55,6 +55,12 @@ struct gpio_generic_chip { struct gpio_chip gc; }; +static inline struct gpio_generic_chip * +to_gpio_generic_chip(struct gpio_chip *gc) +{ + return container_of(gc, struct gpio_generic_chip, gc); +} + /** * gpio_generic_chip_init() - Initialize a generic GPIO chip. * @chip: Generic GPIO chip to set up. -- cgit v1.2.3 From 16397871b6e35fa46a2bec27b3558f93b050c6fc Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 25 Aug 2025 11:48:43 +0200 Subject: gpio: generic: provide helpers for reading and writing registers Provide helpers wrapping the read_reg() and write_reg() callbacks of the generic GPIO API that are called directly by many users. This is done to hide their implementation ahead of moving them into the separate generic GPIO struct. Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20250825-gpio-mmio-gpio-conv-v1-2-356b4b1d5110@linaro.org Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/generic.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include') diff --git a/include/linux/gpio/generic.h b/include/linux/gpio/generic.h index 5a85ecbef8d2..4c0626b53ec9 100644 --- a/include/linux/gpio/generic.h +++ b/include/linux/gpio/generic.h @@ -100,6 +100,37 @@ gpio_generic_chip_set(struct gpio_generic_chip *chip, unsigned int offset, return chip->gc.set(&chip->gc, offset, value); } +/** + * gpio_generic_read_reg() - Read a register using the underlying callback. + * @chip: Generic GPIO chip to use. + * @reg: Register to read. + * + * Returns: value read from register. + */ +static inline unsigned long +gpio_generic_read_reg(struct gpio_generic_chip *chip, void __iomem *reg) +{ + if (WARN_ON(!chip->gc.read_reg)) + return 0; + + return chip->gc.read_reg(reg); +} + +/** + * gpio_generic_write_reg() - Write a register using the underlying callback. + * @chip: Generic GPIO chip to use. + * @reg: Register to write to. + * @val: New value to write. + */ +static inline void gpio_generic_write_reg(struct gpio_generic_chip *chip, + void __iomem *reg, unsigned long val) +{ + if (WARN_ON(!chip->gc.write_reg)) + return; + + chip->gc.write_reg(reg, val); +} + #define gpio_generic_chip_lock(gen_gc) \ raw_spin_lock(&(gen_gc)->gc.bgpio_lock) -- cgit v1.2.3 From 60ad9a07319283e6e1094cef3e972e754315c024 Mon Sep 17 00:00:00 2001 From: Junjie Cao Date: Wed, 20 Aug 2025 08:47:55 +0800 Subject: iio: core: switch info_mask fields to unsigned long to match find_bit helpers for_each_set_bit()/find_*_bit() expect arrays of unsigned long (see include/linux/find.h), but industrialio-core passed const long * into iio_device_add_info_mask_type{,_avail}(). These masks are used purely as bit arrays and are populated via BIT() (1UL << n). Switch the info_mask_* fields and the corresponding function parameters to unsigned long so the types match the helpers. This removes sparse warnings about signedness mismatches (seen with 'make C=1' CF='-Wsparse-all') without changing behavior or struct layout. No functional change intended. Suggested-by: Jonathan Cameron Signed-off-by: Junjie Cao Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/20250820004755.69627-1-junjie.cao@intel.com Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 2f5560646ee4..872ebdf0dd77 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -271,14 +271,14 @@ struct iio_chan_spec { unsigned int num_ext_scan_type; }; }; - long info_mask_separate; - long info_mask_separate_available; - long info_mask_shared_by_type; - long info_mask_shared_by_type_available; - long info_mask_shared_by_dir; - long info_mask_shared_by_dir_available; - long info_mask_shared_by_all; - long info_mask_shared_by_all_available; + unsigned long info_mask_separate; + unsigned long info_mask_separate_available; + unsigned long info_mask_shared_by_type; + unsigned long info_mask_shared_by_type_available; + unsigned long info_mask_shared_by_dir; + unsigned long info_mask_shared_by_dir_available; + unsigned long info_mask_shared_by_all; + unsigned long info_mask_shared_by_all_available; const struct iio_event_spec *event_spec; unsigned int num_event_specs; const struct iio_chan_spec_ext_info *ext_info; -- cgit v1.2.3 From 5195b777552d2e2fa735c6cad75797efa132bd60 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Tue, 29 Jul 2025 02:50:10 +0300 Subject: media: v4l2-subdev: Make struct v4l2_subdev_stream_config private The v4l2_subdev_stream_config structure holds configuration data for a stream. It was meant to be used internally only, but already found its way into the ds90ub913 driver. Now that the driver has been fixed, make the structure private to v4l2-subdev.c to avoid using it by accident. Signed-off-by: Laurent Pinchart Reviewed-by: Jacopo Mondi Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/media/v4l2-subdev.h | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) (limited to 'include') diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h index 5dcf4065708f..8f54fd0d90ad 100644 --- a/include/media/v4l2-subdev.h +++ b/include/media/v4l2-subdev.h @@ -36,6 +36,7 @@ struct v4l2_event_subscription; struct v4l2_fh; struct v4l2_subdev; struct v4l2_subdev_fh; +struct v4l2_subdev_stream_config; struct tuner_setup; struct v4l2_mbus_frame_desc; struct led_classdev; @@ -683,30 +684,6 @@ struct v4l2_subdev_pad_config { struct v4l2_fract interval; }; -/** - * struct v4l2_subdev_stream_config - Used for storing stream configuration. - * - * @pad: pad number - * @stream: stream number - * @enabled: has the stream been enabled with v4l2_subdev_enable_streams() - * @fmt: &struct v4l2_mbus_framefmt - * @crop: &struct v4l2_rect to be used for crop - * @compose: &struct v4l2_rect to be used for compose - * @interval: frame interval - * - * This structure stores configuration for a stream. - */ -struct v4l2_subdev_stream_config { - u32 pad; - u32 stream; - bool enabled; - - struct v4l2_mbus_framefmt fmt; - struct v4l2_rect crop; - struct v4l2_rect compose; - struct v4l2_fract interval; -}; - /** * struct v4l2_subdev_stream_configs - A collection of stream configs. * -- cgit v1.2.3 From f37df9a0eb5e43fcfe02cbaef076123dc0d79c7e Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Fri, 8 Aug 2025 11:59:15 +0300 Subject: media: v4l2-subdev: Fix alloc failure check in v4l2_subdev_call_state_try() v4l2_subdev_call_state_try() macro allocates a subdev state with __v4l2_subdev_state_alloc(), but does not check the returned value. If __v4l2_subdev_state_alloc fails, it returns an ERR_PTR, and that would cause v4l2_subdev_call_state_try() to crash. Add proper error handling to v4l2_subdev_call_state_try(). Signed-off-by: Tomi Valkeinen Fixes: 982c0487185b ("media: subdev: Add v4l2_subdev_call_state_try() macro") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/aJTNtpDUbTz7eyJc%40stanley.mountain/ Cc: stable@vger.kernel.org Reviewed-by: Dan Carpenter Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/media/v4l2-subdev.h | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h index 8f54fd0d90ad..4b28086808c9 100644 --- a/include/media/v4l2-subdev.h +++ b/include/media/v4l2-subdev.h @@ -1939,19 +1939,23 @@ extern const struct v4l2_subdev_ops v4l2_subdev_call_wrappers; * * Note: only legacy non-MC drivers may need this macro. */ -#define v4l2_subdev_call_state_try(sd, o, f, args...) \ - ({ \ - int __result; \ - static struct lock_class_key __key; \ - const char *name = KBUILD_BASENAME \ - ":" __stringify(__LINE__) ":state->lock"; \ - struct v4l2_subdev_state *state = \ - __v4l2_subdev_state_alloc(sd, name, &__key); \ - v4l2_subdev_lock_state(state); \ - __result = v4l2_subdev_call(sd, o, f, state, ##args); \ - v4l2_subdev_unlock_state(state); \ - __v4l2_subdev_state_free(state); \ - __result; \ +#define v4l2_subdev_call_state_try(sd, o, f, args...) \ + ({ \ + int __result; \ + static struct lock_class_key __key; \ + const char *name = KBUILD_BASENAME \ + ":" __stringify(__LINE__) ":state->lock"; \ + struct v4l2_subdev_state *state = \ + __v4l2_subdev_state_alloc(sd, name, &__key); \ + if (IS_ERR(state)) { \ + __result = PTR_ERR(state); \ + } else { \ + v4l2_subdev_lock_state(state); \ + __result = v4l2_subdev_call(sd, o, f, state, ##args); \ + v4l2_subdev_unlock_state(state); \ + __v4l2_subdev_state_free(state); \ + } \ + __result; \ }) /** -- cgit v1.2.3 From 683342ce3c0dae068bf0ee157ee12c13088193f7 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 18 Aug 2025 16:49:39 +0300 Subject: media: v4l2-common: Drop the workaround from v4l2_get_link_freq() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the workaround that allowed calling v4l2_get_link_freq() on the control handler. Signed-off-by: Sakari Ailus Reviewed-by: Niklas Söderlund Reviewed-by: Laurent Pinchart Signed-off-by: Hans Verkuil --- include/media/v4l2-common.h | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/media/v4l2-common.h b/include/media/v4l2-common.h index 39dd0c78d70f..ab0ce8e605c3 100644 --- a/include/media/v4l2-common.h +++ b/include/media/v4l2-common.h @@ -560,15 +560,14 @@ int v4l2_fill_pixfmt_mp(struct v4l2_pix_format_mplane *pixfmt, u32 pixelformat, /** * v4l2_get_link_freq - Get link rate from transmitter * - * @pad: The transmitter's media pad (or control handler for non-MC users or - * compatibility reasons, don't use in new code) + * @pad: The transmitter's media pad * @mul: The multiplier between pixel rate and link frequency. Bits per pixel on * D-PHY, samples per clock on parallel. 0 otherwise. * @div: The divisor between pixel rate and link frequency. Number of data lanes * times two on D-PHY, 1 on parallel. 0 otherwise. * * This function is intended for obtaining the link frequency from the - * transmitter sub-devices. It returns the link rate, either from the + * transmitter sub-device's pad. It returns the link rate, either from the * V4L2_CID_LINK_FREQ control implemented by the transmitter, or value * calculated based on the V4L2_CID_PIXEL_RATE implemented by the transmitter. * @@ -578,19 +577,9 @@ int v4l2_fill_pixfmt_mp(struct v4l2_pix_format_mplane *pixfmt, u32 pixelformat, * * %-EINVAL: Invalid link frequency value */ #ifdef CONFIG_MEDIA_CONTROLLER -#define v4l2_get_link_freq(pad, mul, div) \ - _Generic(pad, \ - struct media_pad *: __v4l2_get_link_freq_pad, \ - struct v4l2_ctrl_handler *: __v4l2_get_link_freq_ctrl) \ - (pad, mul, div) -s64 __v4l2_get_link_freq_pad(struct media_pad *pad, unsigned int mul, - unsigned int div); -#else -#define v4l2_get_link_freq(handler, mul, div) \ - __v4l2_get_link_freq_ctrl(handler, mul, div) +s64 v4l2_get_link_freq(struct media_pad *pad, unsigned int mul, + unsigned int div); #endif -s64 __v4l2_get_link_freq_ctrl(struct v4l2_ctrl_handler *handler, - unsigned int mul, unsigned int div); void v4l2_simplify_fraction(u32 *numerator, u32 *denominator, unsigned int n_terms, unsigned int threshold); -- cgit v1.2.3 From bdc9776dac860cae7f61e2b48929f87597306644 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Thu, 21 Aug 2025 14:56:49 +0300 Subject: media: v4l2-common: Update v4l2_get_link_freq() documentation Document that v4l2_get_link_freq() obtains the link frequency primarily by calling the get_mbus_config sub-device pad operation. Signed-off-by: Sakari Ailus Reviewed-by: Laurent Pinchart Signed-off-by: Hans Verkuil --- include/media/v4l2-common.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/media/v4l2-common.h b/include/media/v4l2-common.h index ab0ce8e605c3..e31b4434ea5d 100644 --- a/include/media/v4l2-common.h +++ b/include/media/v4l2-common.h @@ -566,10 +566,12 @@ int v4l2_fill_pixfmt_mp(struct v4l2_pix_format_mplane *pixfmt, u32 pixelformat, * @div: The divisor between pixel rate and link frequency. Number of data lanes * times two on D-PHY, 1 on parallel. 0 otherwise. * - * This function is intended for obtaining the link frequency from the - * transmitter sub-device's pad. It returns the link rate, either from the - * V4L2_CID_LINK_FREQ control implemented by the transmitter, or value - * calculated based on the V4L2_CID_PIXEL_RATE implemented by the transmitter. + * This function obtains and returns the link frequency from the transmitter + * sub-device's pad. The link frequency is retrieved using the get_mbus_config + * sub-device pad operation. If this fails, the function falls back to obtaining + * the frequency either directly from the V4L2_CID_LINK_FREQ control if + * implemented by the transmitter, or by calculating it from the pixel rate + * obtained from the V4L2_CID_PIXEL_RATE control. * * Return: * * >0: Link frequency -- cgit v1.2.3 From 7b78fa862296f8931e42ecaec3703e307e4044d2 Mon Sep 17 00:00:00 2001 From: Jai Luthra Date: Mon, 11 Aug 2025 13:50:17 +0530 Subject: media: cadence: cdns-csi2rx: Support multiple pixels per clock cycle The output pixel interface is a parallel bus (32 bits), which supports sending multiple pixels (1, 2 or 4) per clock cycle for smaller pixel widths like RAW8-RAW16. Dual-pixel and Quad-pixel modes can be a requirement if the export rate of the Cadence IP in Single-pixel mode maxes out before the maximum supported DPHY-RX frequency, which is the case with TI's integration of this IP [1]. So, we export a function that lets the downstream hardware block request a higher pixel-per-clock on a particular output pad. We check if we can support the requested pixels per clock given the known maximum for the currently configured format. If not, we set it to the highest feasible value and return this value to the caller. [1] Section 12.6.1.4.8.14 CSI_RX_IF Programming Restrictions of AM62 TRM Link: https://www.ti.com/lit/pdf/spruj16 Tested-by: Yemike Abhilash Chandra (on SK-AM68) Signed-off-by: Jai Luthra Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/media/cadence/cdns-csi2rx.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 include/media/cadence/cdns-csi2rx.h (limited to 'include') diff --git a/include/media/cadence/cdns-csi2rx.h b/include/media/cadence/cdns-csi2rx.h new file mode 100644 index 000000000000..782d03fc36d1 --- /dev/null +++ b/include/media/cadence/cdns-csi2rx.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +#ifndef _CDNS_CSI2RX_H +#define _CDNS_CSI2RX_H + +#include + +/** + * cdns_csi2rx_negotiate_ppc - Negotiate pixel-per-clock on output interface + * + * @subdev: point to &struct v4l2_subdev + * @pad: pad number of the source pad + * @ppc: pointer to requested pixel-per-clock value + * + * Returns 0 on success, negative error code otherwise. + */ +int cdns_csi2rx_negotiate_ppc(struct v4l2_subdev *subdev, unsigned int pad, + u8 *ppc); + +#endif -- cgit v1.2.3 From 7a6fc1634cea6f220228a69b1c0210e6b8b1aaf0 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 13 Aug 2025 08:31:45 -0700 Subject: blk-mq-dma: create blk_map_iter type The req_iterator happens to have a similar fields to what the dma iterator needs, but we're not necessarily iterating a request's bi_io_vec. Create a new type that can be amended for additional future use. Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Reviewed-by: Kanchan Joshi Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20250813153153.3260897-2-kbusch@meta.com Signed-off-by: Jens Axboe --- include/linux/blk-mq-dma.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blk-mq-dma.h b/include/linux/blk-mq-dma.h index c26a01aeae00..6a7e3828673d 100644 --- a/include/linux/blk-mq-dma.h +++ b/include/linux/blk-mq-dma.h @@ -5,6 +5,11 @@ #include #include +struct blk_map_iter { + struct bvec_iter iter; + struct bio *bio; +}; + struct blk_dma_iter { /* Output address range for this iteration */ dma_addr_t addr; @@ -14,7 +19,7 @@ struct blk_dma_iter { blk_status_t status; /* Internal to blk_rq_dma_map_iter_* */ - struct req_iterator iter; + struct blk_map_iter iter; struct pci_p2pdma_map_state p2pdma; }; -- cgit v1.2.3 From dae75dead2359edd7c55e1964e0edf7d03535b31 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 13 Aug 2025 08:31:46 -0700 Subject: blk-mq-dma: provide the bio_vec array being iterated This will make it easier to add different sources of the bvec array, like for upcoming integrity support, rather than assume to use the bio's bi_io_vec. It also makes iterating "special" payloads more in common with iterating normal payloads. Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Reviewed-by: Kanchan Joshi Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20250813153153.3260897-3-kbusch@meta.com Signed-off-by: Jens Axboe --- include/linux/blk-mq-dma.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/blk-mq-dma.h b/include/linux/blk-mq-dma.h index 6a7e3828673d..e5cb5e46fc92 100644 --- a/include/linux/blk-mq-dma.h +++ b/include/linux/blk-mq-dma.h @@ -8,6 +8,7 @@ struct blk_map_iter { struct bvec_iter iter; struct bio *bio; + struct bio_vec *bvecs; }; struct blk_dma_iter { -- cgit v1.2.3 From 92fb75fd14b041038e30bc725ab4c1e625243573 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 13 Aug 2025 08:31:47 -0700 Subject: blk-mq-dma: require unmap caller provide p2p map type In preparing for integrity dma mappings, we can't rely on the request flag because data and metadata may have different mapping types. Signed-off-by: Keith Busch Reviewed-by: Kanchan Joshi Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20250813153153.3260897-4-kbusch@meta.com Signed-off-by: Jens Axboe --- include/linux/blk-mq-dma.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq-dma.h b/include/linux/blk-mq-dma.h index e5cb5e46fc92..881880095e0d 100644 --- a/include/linux/blk-mq-dma.h +++ b/include/linux/blk-mq-dma.h @@ -47,14 +47,15 @@ static inline bool blk_rq_dma_map_coalesce(struct dma_iova_state *state) * @dma_dev: device to unmap from * @state: DMA IOVA state * @mapped_len: number of bytes to unmap + * @is_p2p: true if mapped with PCI_P2PDMA_MAP_BUS_ADDR * * Returns %false if the callers need to manually unmap every DMA segment * mapped using @iter or %true if no work is left to be done. */ static inline bool blk_rq_dma_unmap(struct request *req, struct device *dma_dev, - struct dma_iova_state *state, size_t mapped_len) + struct dma_iova_state *state, size_t mapped_len, bool is_p2p) { - if (req->cmd_flags & REQ_P2PDMA) + if (is_p2p) return true; if (dma_use_iova(state)) { -- cgit v1.2.3 From 7092639031a1bd5320ab827e8f665350f332b7ce Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 13 Aug 2025 08:31:48 -0700 Subject: blk-mq: remove REQ_P2PDMA flag It's not serving any particular purpose. pci_p2pdma_state() already has all the appropriate checks, so the config and flag checks are not guarding anything. Signed-off-by: Keith Busch Reviewed-by: Kanchan Joshi Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20250813153153.3260897-5-kbusch@meta.com Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 09b99d52fd36..930daff207df 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -386,7 +386,6 @@ enum req_flag_bits { __REQ_DRV, /* for driver use */ __REQ_FS_PRIVATE, /* for file system (submitter) use */ __REQ_ATOMIC, /* for atomic write operations */ - __REQ_P2PDMA, /* contains P2P DMA pages */ /* * Command specific flags, keep last: */ @@ -419,7 +418,6 @@ enum req_flag_bits { #define REQ_DRV (__force blk_opf_t)(1ULL << __REQ_DRV) #define REQ_FS_PRIVATE (__force blk_opf_t)(1ULL << __REQ_FS_PRIVATE) #define REQ_ATOMIC (__force blk_opf_t)(1ULL << __REQ_ATOMIC) -#define REQ_P2PDMA (__force blk_opf_t)(1ULL << __REQ_P2PDMA) #define REQ_NOUNMAP (__force blk_opf_t)(1ULL << __REQ_NOUNMAP) -- cgit v1.2.3 From fec9b16dc5550191fd85af118271ea00e8dcc5f8 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 13 Aug 2025 08:31:50 -0700 Subject: blk-mq-dma: add scatter-less integrity data DMA mapping Similar to regular data, introduce more efficient integrity mapping helpers that does away with the scatterlist structure. This uses the block mapping iterator to add IOVA segments if IOMMU is enabled, or maps directly if not. This also supports P2P segements if integrity data ever wants to allocate that type of memory. Signed-off-by: Keith Busch Reviewed-by: Kanchan Joshi Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20250813153153.3260897-7-kbusch@meta.com Signed-off-by: Jens Axboe --- include/linux/blk-integrity.h | 17 +++++++++++++++++ include/linux/blk-mq-dma.h | 1 + 2 files changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h index e67a2b6e8f11..78fe2459e661 100644 --- a/include/linux/blk-integrity.h +++ b/include/linux/blk-integrity.h @@ -4,6 +4,7 @@ #include #include +#include struct request; @@ -31,6 +32,11 @@ int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf, ssize_t bytes); int blk_get_meta_cap(struct block_device *bdev, unsigned int cmd, struct logical_block_metadata_cap __user *argp); +bool blk_rq_integrity_dma_map_iter_start(struct request *req, + struct device *dma_dev, struct dma_iova_state *state, + struct blk_dma_iter *iter); +bool blk_rq_integrity_dma_map_iter_next(struct request *req, + struct device *dma_dev, struct blk_dma_iter *iter); static inline bool blk_integrity_queue_supports_integrity(struct request_queue *q) @@ -115,6 +121,17 @@ static inline int blk_rq_integrity_map_user(struct request *rq, { return -EINVAL; } +static inline bool blk_rq_integrity_dma_map_iter_start(struct request *req, + struct device *dma_dev, struct dma_iova_state *state, + struct blk_dma_iter *iter) +{ + return false; +} +static inline bool blk_rq_integrity_dma_map_iter_next(struct request *req, + struct device *dma_dev, struct blk_dma_iter *iter) +{ + return false; +} static inline struct blk_integrity *bdev_get_integrity(struct block_device *b) { return NULL; diff --git a/include/linux/blk-mq-dma.h b/include/linux/blk-mq-dma.h index 881880095e0d..0f45ea110ca1 100644 --- a/include/linux/blk-mq-dma.h +++ b/include/linux/blk-mq-dma.h @@ -9,6 +9,7 @@ struct blk_map_iter { struct bvec_iter iter; struct bio *bio; struct bio_vec *bvecs; + bool is_integrity; }; struct blk_dma_iter { -- cgit v1.2.3 From ac46f5b6c6614668727732e117842c9fa7a42c19 Mon Sep 17 00:00:00 2001 From: Maciej Strozek Date: Mon, 11 Aug 2025 14:45:05 +0100 Subject: ACPICA: Add SoundWire File Table (SWFT) signature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The File Download (FDL) process of SoundWire Class Audio (SDCA) driver, which provides code/data which may be required by an SDCA device, utilizes SWFT to obtain that code/data. There is a single SWFT for the system, and SWFT can contain multiple files (information about the file as well as its binary contents). The SWFT has a standard ACPI Descriptor Table Header, followed by SoundWire File definitions as described in Discovery and Configuration (DisCo) Specification for SoundWire® Link: https://github.com/acpica/acpica/commit/18c96022 Signed-off-by: Maciej Strozek Reviewed-by: Charles Keepax Link: https://patch.msgid.link/20250811134505.1162661-1-mstrozek@opensource.cirrus.com Signed-off-by: Rafael J. Wysocki --- include/acpi/actbl2.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include') diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index 048f5f47f8b8..f726bce3eb84 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -57,6 +57,7 @@ #define ACPI_SIG_SDEI "SDEI" /* Software Delegated Exception Interface Table */ #define ACPI_SIG_SDEV "SDEV" /* Secure Devices table */ #define ACPI_SIG_SVKL "SVKL" /* Storage Volume Key Location Table */ +#define ACPI_SIG_SWFT "SWFT" /* SoundWire File Table */ #define ACPI_SIG_TDEL "TDEL" /* TD Event Log Table */ /* @@ -3478,6 +3479,26 @@ enum acpi_svkl_format { ACPI_SVKL_FORMAT_RESERVED = 1 /* 1 and greater are reserved */ }; +/******************************************************************************* + * SWFT - SoundWire File Table + * + * Conforms to "Discovery and Configuration (DisCo) Specification for SoundWire" + * Version 2.1, 2 October 2023 + * + ******************************************************************************/ +struct acpi_sw_file { + u16 vendor_id; + u32 file_id; + u16 file_version; + u32 file_length; + u8 data[]; +}; + +struct acpi_table_swft { + struct acpi_table_header header; + struct acpi_sw_file files[]; +}; + /******************************************************************************* * * TDEL - TD-Event Log -- cgit v1.2.3 From 2eb22214c132374e11e681c44d7879c91f67f614 Mon Sep 17 00:00:00 2001 From: Pin-yen Lin Date: Mon, 18 Aug 2025 19:49:33 +0800 Subject: drm/panel: Allow powering on panel follower after panel is enabled Some touch controllers have to be powered on after the panel's backlight is enabled. To support these controllers, introduce .panel_enabled() and .panel_disabling() to panel_follower_funcs and use them to power on the device after the panel and its backlight are enabled. Signed-off-by: Pin-yen Lin Reviewed-by: Douglas Anderson Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20250818115015.2909525-1-treapking@chromium.org --- include/drm/drm_panel.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/drm/drm_panel.h b/include/drm/drm_panel.h index 843fb756a295..2407bfa60236 100644 --- a/include/drm/drm_panel.h +++ b/include/drm/drm_panel.h @@ -160,6 +160,20 @@ struct drm_panel_follower_funcs { * Called before the panel is powered off. */ int (*panel_unpreparing)(struct drm_panel_follower *follower); + + /** + * @panel_enabled: + * + * Called after the panel and the backlight have been enabled. + */ + int (*panel_enabled)(struct drm_panel_follower *follower); + + /** + * @panel_disabling: + * + * Called before the panel and the backlight are disabled. + */ + int (*panel_disabling)(struct drm_panel_follower *follower); }; struct drm_panel_follower { -- cgit v1.2.3 From cbdd16b818eef876dd2de9d503fe7397a0666cbe Mon Sep 17 00:00:00 2001 From: Pin-yen Lin Date: Mon, 18 Aug 2025 19:49:34 +0800 Subject: HID: i2c-hid: Make elan touch controllers power on after panel is enabled Introduce a new HID quirk to indicate that this device has to be enabled after the panel's backlight is enabled, and update the driver data for the elan devices to enable this quirk. This cannot be a I2C HID quirk because the kernel needs to acknowledge this before powering up the device and read the VID/PID. When this quirk is enabled, register .panel_enabled()/.panel_disabling() instead for the panel follower. Also rename the *panel_prepare* functions into *panel_follower* because they could be called in other situations now. Fixes: bd3cba00dcc63 ("HID: i2c-hid: elan: Add support for Elan eKTH6915 i2c-hid touchscreens") Fixes: d06651bebf99e ("HID: i2c-hid: elan: Add elan-ekth6a12nay timing") Reviewed-by: Douglas Anderson Signed-off-by: Pin-yen Lin Acked-by: Jiri Kosina Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20250818115015.2909525-2-treapking@chromium.org --- include/linux/hid.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index 2cc4f1e4ea96..c32425b5d011 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -364,6 +364,7 @@ struct hid_item { * | @HID_QUIRK_HAVE_SPECIAL_DRIVER: * | @HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE: * | @HID_QUIRK_IGNORE_SPECIAL_DRIVER + * | @HID_QUIRK_POWER_ON_AFTER_BACKLIGHT * | @HID_QUIRK_FULLSPEED_INTERVAL: * | @HID_QUIRK_NO_INIT_REPORTS: * | @HID_QUIRK_NO_IGNORE: @@ -391,6 +392,7 @@ struct hid_item { #define HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE BIT(20) #define HID_QUIRK_NOINVERT BIT(21) #define HID_QUIRK_IGNORE_SPECIAL_DRIVER BIT(22) +#define HID_QUIRK_POWER_ON_AFTER_BACKLIGHT BIT(23) #define HID_QUIRK_FULLSPEED_INTERVAL BIT(28) #define HID_QUIRK_NO_INIT_REPORTS BIT(29) #define HID_QUIRK_NO_IGNORE BIT(30) -- cgit v1.2.3 From a214365140cc3009f07d4e14a8b481fd3dc41d31 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 10 Jul 2025 15:15:28 +0300 Subject: rculist: move list_for_each_rcu() to where it belongs The list_for_each_rcu() relies on the rcu_dereference() API which is not provided by the list.h. At the same time list.h is a low-level basic header that must not have dependencies like RCU, besides the fact of the potential circular dependencies in some cases. With all that said, move RCU related API to the rculist.h where it belongs. Signed-off-by: Andy Shevchenko Reviewed-by: Simona Vetter Reviewed-by: "Paul E. McKenney" Signed-off-by: Neeraj Upadhyay (AMD) Signed-off-by: "Paul E. McKenney" --- include/linux/list.h | 10 ---------- include/linux/rculist.h | 10 ++++++++++ 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/list.h b/include/linux/list.h index e7e28afd28f8..e7bdad9b8618 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -686,16 +686,6 @@ static inline void list_splice_tail_init(struct list_head *list, #define list_for_each(pos, head) \ for (pos = (head)->next; !list_is_head(pos, (head)); pos = pos->next) -/** - * list_for_each_rcu - Iterate over a list in an RCU-safe fashion - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - */ -#define list_for_each_rcu(pos, head) \ - for (pos = rcu_dereference((head)->next); \ - !list_is_head(pos, (head)); \ - pos = rcu_dereference(pos->next)) - /** * list_for_each_continue - continue iteration over a list * @pos: the &struct list_head to use as a loop cursor. diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 1b11926ddd47..2abba7552605 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -42,6 +42,16 @@ static inline void INIT_LIST_HEAD_RCU(struct list_head *list) */ #define list_bidir_prev_rcu(list) (*((struct list_head __rcu **)(&(list)->prev))) +/** + * list_for_each_rcu - Iterate over a list in an RCU-safe fashion + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. + */ +#define list_for_each_rcu(pos, head) \ + for (pos = rcu_dereference((head)->next); \ + !list_is_head(pos, (head)); \ + pos = rcu_dereference(pos->next)) + /** * list_tail_rcu - returns the prev pointer of the head of the list * @head: the head of the list -- cgit v1.2.3 From e228e7d382fa85005ee2ebf303e1bf194aca49a8 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Mon, 25 Aug 2025 09:22:09 +0000 Subject: drm/gpuvm: fix various typos in .c and .h gpuvm file After working with this code for a while, I came across several typos. This patch fixes them. Signed-off-by: Alice Ryhl Link: https://lore.kernel.org/r/20250825-gpuvm-typo-fix-v1-1-14e9e78e28e6@google.com Signed-off-by: Danilo Krummrich --- include/drm/drm_gpuvm.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h index 274532facfd6..2e7088264355 100644 --- a/include/drm/drm_gpuvm.h +++ b/include/drm/drm_gpuvm.h @@ -103,7 +103,7 @@ struct drm_gpuva { } va; /** - * @gem: structure containing the &drm_gem_object and it's offset + * @gem: structure containing the &drm_gem_object and its offset */ struct { /** @@ -843,7 +843,7 @@ struct drm_gpuva_op_map { } va; /** - * @gem: structure containing the &drm_gem_object and it's offset + * @gem: structure containing the &drm_gem_object and its offset */ struct { /** @@ -1189,11 +1189,11 @@ struct drm_gpuvm_ops { /** * @sm_step_unmap: called from &drm_gpuvm_sm_map and - * &drm_gpuvm_sm_unmap to unmap an existent mapping + * &drm_gpuvm_sm_unmap to unmap an existing mapping * - * This callback is called when existent mapping needs to be unmapped. + * This callback is called when existing mapping needs to be unmapped. * This is the case when either a newly requested mapping encloses an - * existent mapping or an unmap of an existent mapping is requested. + * existing mapping or an unmap of an existing mapping is requested. * * The &priv pointer matches the one the driver passed to * &drm_gpuvm_sm_map or &drm_gpuvm_sm_unmap, respectively. -- cgit v1.2.3 From f45fc18b6de04483643e8aa2ab97737abfe03d59 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sat, 23 Aug 2025 09:56:03 +0200 Subject: net: airoha: Add airoha_ppe_dev struct definition Introduce airoha_ppe_dev struct as container for PPE offload callbacks consumed by the MT76 driver during flowtable offload for traffic received by the wlan NIC and forwarded to the wired one. Add airoha_ppe_setup_tc_block_cb routine to PPE offload ops for MT76 driver. Rely on airoha_ppe_dev pointer in airoha_ppe_setup_tc_block_cb signature. Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20250823-airoha-en7581-wlan-rx-offload-v3-2-f78600ec3ed8@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/soc/airoha/airoha_offload.h | 35 +++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include') diff --git a/include/linux/soc/airoha/airoha_offload.h b/include/linux/soc/airoha/airoha_offload.h index 117c63c2448d..4b4b8b9e426d 100644 --- a/include/linux/soc/airoha/airoha_offload.h +++ b/include/linux/soc/airoha/airoha_offload.h @@ -9,6 +9,41 @@ #include #include +struct airoha_ppe_dev { + struct { + int (*setup_tc_block_cb)(struct airoha_ppe_dev *dev, + void *type_data); + } ops; + + void *priv; +}; + +#if (IS_BUILTIN(CONFIG_NET_AIROHA) || IS_MODULE(CONFIG_NET_AIROHA)) +struct airoha_ppe_dev *airoha_ppe_get_dev(struct device *dev); +void airoha_ppe_put_dev(struct airoha_ppe_dev *dev); + +static inline int airoha_ppe_dev_setup_tc_block_cb(struct airoha_ppe_dev *dev, + void *type_data) +{ + return dev->ops.setup_tc_block_cb(dev, type_data); +} +#else +static inline struct airoha_ppe_dev *airoha_ppe_get_dev(struct device *dev) +{ + return NULL; +} + +static inline void airoha_ppe_put_dev(struct airoha_ppe_dev *dev) +{ +} + +static inline int airoha_ppe_setup_tc_block_cb(struct airoha_ppe_dev *dev, + void *type_data) +{ + return -EOPNOTSUPP; +} +#endif + #define NPU_NUM_CORES 8 #define NPU_NUM_IRQ 6 #define NPU_RX0_DESC_NUM 512 -- cgit v1.2.3 From a7cc1aa151e3a9c0314b995f06102f7763d3bd71 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sat, 23 Aug 2025 09:56:04 +0200 Subject: net: airoha: Introduce check_skb callback in ppe_dev ops Export airoha_ppe_check_skb routine in ppe_dev ops. check_skb callback will be used by the MT76 driver in order to offload the traffic received by the wlan NIC and forwarded to the ethernet one. Add rx_wlan parameter to airoha_ppe_check_skb routine signature. Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20250823-airoha-en7581-wlan-rx-offload-v3-3-f78600ec3ed8@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/soc/airoha/airoha_offload.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/soc/airoha/airoha_offload.h b/include/linux/soc/airoha/airoha_offload.h index 4b4b8b9e426d..1dc5b4e35ef9 100644 --- a/include/linux/soc/airoha/airoha_offload.h +++ b/include/linux/soc/airoha/airoha_offload.h @@ -9,10 +9,17 @@ #include #include +enum { + PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED = 0x0f, +}; + struct airoha_ppe_dev { struct { int (*setup_tc_block_cb)(struct airoha_ppe_dev *dev, void *type_data); + void (*check_skb)(struct airoha_ppe_dev *dev, + struct sk_buff *skb, u16 hash, + bool rx_wlan); } ops; void *priv; @@ -27,6 +34,13 @@ static inline int airoha_ppe_dev_setup_tc_block_cb(struct airoha_ppe_dev *dev, { return dev->ops.setup_tc_block_cb(dev, type_data); } + +static inline void airoha_ppe_dev_check_skb(struct airoha_ppe_dev *dev, + struct sk_buff *skb, + u16 hash, bool rx_wlan) +{ + dev->ops.check_skb(dev, skb, hash, rx_wlan); +} #else static inline struct airoha_ppe_dev *airoha_ppe_get_dev(struct device *dev) { @@ -42,6 +56,12 @@ static inline int airoha_ppe_setup_tc_block_cb(struct airoha_ppe_dev *dev, { return -EOPNOTSUPP; } + +static inline void airoha_ppe_dev_check_skb(struct airoha_ppe_dev *dev, + struct sk_buff *skb, u16 hash, + bool rx_wlan) +{ +} #endif #define NPU_NUM_CORES 8 -- cgit v1.2.3 From 2d842b6c670b9bffee7c16cda284eb49644d8169 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 22 Aug 2025 19:06:57 +0000 Subject: tcp: Remove timewait_sock_ops.twsk_destructor(). Since DCCP has been removed, sk->sk_prot->twsk_prot->twsk_destructor is always tcp_twsk_destructor(). Let's call tcp_twsk_destructor() directly in inet_twsk_free() and remove ->twsk_destructor(). While at it, tcp_twsk_destructor() is un-exported. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250822190803.540788-3-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/timewait_sock.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/net/timewait_sock.h b/include/net/timewait_sock.h index 62b3e9f2aed4..0a85ac64a66d 100644 --- a/include/net/timewait_sock.h +++ b/include/net/timewait_sock.h @@ -15,13 +15,6 @@ struct timewait_sock_ops { struct kmem_cache *twsk_slab; char *twsk_slab_name; unsigned int twsk_obj_size; - void (*twsk_destructor)(struct sock *sk); }; -static inline void twsk_destructor(struct sock *sk) -{ - if (sk->sk_prot->twsk_prot->twsk_destructor != NULL) - sk->sk_prot->twsk_prot->twsk_destructor(sk); -} - #endif /* _TIMEWAIT_SOCK_H */ -- cgit v1.2.3 From cb16f4b6c73df4be16b74099f826fea30ef72426 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 22 Aug 2025 19:06:59 +0000 Subject: tcp: Don't pass hashinfo to socket lookup helpers. These socket lookup functions required struct inet_hashinfo because they are shared by TCP and DCCP. * __inet_lookup_established() * __inet_lookup_listener() * __inet6_lookup_established() * inet6_lookup_listener() DCCP has gone, and we don't need to pass hashinfo down to them. Let's fetch net->ipv4.tcp_death_row.hashinfo directly in the above 4 functions. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250822190803.540788-5-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/inet6_hashtables.h | 18 ++++++------------ include/net/inet_hashtables.h | 37 +++++++++++++++---------------------- 2 files changed, 21 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index ab3929a2a956..1f985d2012ce 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -41,7 +41,6 @@ static inline unsigned int __inet6_ehashfn(const u32 lhash, * The sockhash lock must be held as a reader here. */ struct sock *__inet6_lookup_established(const struct net *net, - struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, @@ -65,7 +64,6 @@ struct sock *inet6_lookup_reuseport(const struct net *net, struct sock *sk, inet6_ehashfn_t *ehashfn); struct sock *inet6_lookup_listener(const struct net *net, - struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, @@ -83,7 +81,6 @@ struct sock *inet6_lookup_run_sk_lookup(const struct net *net, inet6_ehashfn_t *ehashfn); static inline struct sock *__inet6_lookup(const struct net *net, - struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, @@ -92,14 +89,14 @@ static inline struct sock *__inet6_lookup(const struct net *net, const int dif, const int sdif, bool *refcounted) { - struct sock *sk = __inet6_lookup_established(net, hashinfo, saddr, - sport, daddr, hnum, + struct sock *sk = __inet6_lookup_established(net, saddr, sport, + daddr, hnum, dif, sdif); *refcounted = true; if (sk) return sk; *refcounted = false; - return inet6_lookup_listener(net, hashinfo, skb, doff, saddr, sport, + return inet6_lookup_listener(net, skb, doff, saddr, sport, daddr, hnum, dif, sdif); } @@ -143,8 +140,7 @@ struct sock *inet6_steal_sock(struct net *net, struct sk_buff *skb, int doff, return reuse_sk; } -static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, - struct sk_buff *skb, int doff, +static inline struct sock *__inet6_lookup_skb(struct sk_buff *skb, int doff, const __be16 sport, const __be16 dport, int iif, int sdif, @@ -161,14 +157,12 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, if (sk) return sk; - return __inet6_lookup(net, hashinfo, skb, - doff, &ip6h->saddr, sport, + return __inet6_lookup(net, skb, doff, &ip6h->saddr, sport, &ip6h->daddr, ntohs(dport), iif, sdif, refcounted); } -struct sock *inet6_lookup(const struct net *net, struct inet_hashinfo *hashinfo, - struct sk_buff *skb, int doff, +struct sock *inet6_lookup(const struct net *net, struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const __be16 dport, const int dif); diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 19dbd9081d5a..a3b32241c2f2 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -294,7 +294,6 @@ int inet_hash(struct sock *sk); void inet_unhash(struct sock *sk); struct sock *__inet_lookup_listener(const struct net *net, - struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const __be32 saddr, const __be16 sport, const __be32 daddr, @@ -302,12 +301,12 @@ struct sock *__inet_lookup_listener(const struct net *net, const int dif, const int sdif); static inline struct sock *inet_lookup_listener(struct net *net, - struct inet_hashinfo *hashinfo, - struct sk_buff *skb, int doff, - __be32 saddr, __be16 sport, - __be32 daddr, __be16 dport, int dif, int sdif) + struct sk_buff *skb, int doff, + __be32 saddr, __be16 sport, + __be32 daddr, __be16 dport, + int dif, int sdif) { - return __inet_lookup_listener(net, hashinfo, skb, doff, saddr, sport, + return __inet_lookup_listener(net, skb, doff, saddr, sport, daddr, ntohs(dport), dif, sdif); } @@ -358,7 +357,6 @@ static inline bool inet_match(const struct net *net, const struct sock *sk, * not check it for lookups anymore, thanks Alexey. -DaveM */ struct sock *__inet_lookup_established(const struct net *net, - struct inet_hashinfo *hashinfo, const __be32 saddr, const __be16 sport, const __be32 daddr, const u16 hnum, const int dif, const int sdif); @@ -384,18 +382,16 @@ struct sock *inet_lookup_run_sk_lookup(const struct net *net, __be32 daddr, u16 hnum, const int dif, inet_ehashfn_t *ehashfn); -static inline struct sock * - inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo, - const __be32 saddr, const __be16 sport, - const __be32 daddr, const __be16 dport, - const int dif) +static inline struct sock *inet_lookup_established(struct net *net, + const __be32 saddr, const __be16 sport, + const __be32 daddr, const __be16 dport, + const int dif) { - return __inet_lookup_established(net, hashinfo, saddr, sport, daddr, + return __inet_lookup_established(net, saddr, sport, daddr, ntohs(dport), dif, 0); } static inline struct sock *__inet_lookup(struct net *net, - struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const __be32 saddr, const __be16 sport, const __be32 daddr, const __be16 dport, @@ -405,18 +401,17 @@ static inline struct sock *__inet_lookup(struct net *net, u16 hnum = ntohs(dport); struct sock *sk; - sk = __inet_lookup_established(net, hashinfo, saddr, sport, + sk = __inet_lookup_established(net, saddr, sport, daddr, hnum, dif, sdif); *refcounted = true; if (sk) return sk; *refcounted = false; - return __inet_lookup_listener(net, hashinfo, skb, doff, saddr, + return __inet_lookup_listener(net, skb, doff, saddr, sport, daddr, hnum, dif, sdif); } static inline struct sock *inet_lookup(struct net *net, - struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const __be32 saddr, const __be16 sport, const __be32 daddr, const __be16 dport, @@ -425,7 +420,7 @@ static inline struct sock *inet_lookup(struct net *net, struct sock *sk; bool refcounted; - sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr, + sk = __inet_lookup(net, skb, doff, saddr, sport, daddr, dport, dif, 0, &refcounted); if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt)) @@ -473,8 +468,7 @@ struct sock *inet_steal_sock(struct net *net, struct sk_buff *skb, int doff, return reuse_sk; } -static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, - struct sk_buff *skb, +static inline struct sock *__inet_lookup_skb(struct sk_buff *skb, int doff, const __be16 sport, const __be16 dport, @@ -492,8 +486,7 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, if (sk) return sk; - return __inet_lookup(net, hashinfo, skb, - doff, iph->saddr, sport, + return __inet_lookup(net, skb, doff, iph->saddr, sport, iph->daddr, dport, inet_iif(skb), sdif, refcounted); } -- cgit v1.2.3 From f1241200cd66b3e25fd2a44dd961d9720e965aa1 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 22 Aug 2025 19:07:00 +0000 Subject: tcp: Don't pass hashinfo to inet_diag helpers. These inet_diag functions required struct inet_hashinfo because they are shared by TCP and DCCP: * inet_diag_dump_icsk() * inet_diag_dump_one_icsk() * inet_diag_find_one_icsk() DCCP has gone, and we don't need to pass hashinfo down to them. Let's fetch net->ipv4.tcp_death_row.hashinfo directly in the first 2 functions. Note that inet_diag_find_one_icsk() don't need hashinfo since the previous patch. We will move TCP-specific functions to tcp_diag.c in the next patch. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250822190803.540788-6-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/linux/inet_diag.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index a9033696b0aa..34de992b5bd9 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -48,15 +48,13 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *req, u16 nlmsg_flags, bool net_admin); -void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb, +void inet_diag_dump_icsk(struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r); -int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, - struct netlink_callback *cb, +int inet_diag_dump_one_icsk(struct netlink_callback *cb, const struct inet_diag_req_v2 *req); struct sock *inet_diag_find_one_icsk(struct net *net, - struct inet_hashinfo *hashinfo, const struct inet_diag_req_v2 *req); int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk); -- cgit v1.2.3 From 382a4d9cb6dc07643345e15c49738088a727d29b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 22 Aug 2025 19:07:01 +0000 Subject: tcp: Move TCP-specific diag functions to tcp_diag.c. tcp_diag_dump() / tcp_diag_dump_one() is just a wrapper of inet_diag_dump_icsk() / inet_diag_dump_one_icsk(), respectively. Let's inline them in tcp_diag.c and move static callees as well. Note that inet_sk_attr_size() is merged into tcp_diag_get_aux_size(), and we remove inet_diag_handler.idiag_get_aux_size() accordingly. While at it, BUG_ON() is replaced with DEBUG_NET_WARN_ON_ONCE(). Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250822190803.540788-7-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/linux/inet_diag.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 34de992b5bd9..30bf8f7ea62b 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -24,9 +24,6 @@ struct inet_diag_handler { bool net_admin, struct sk_buff *skb); - size_t (*idiag_get_aux_size)(struct sock *sk, - bool net_admin); - int (*destroy)(struct sk_buff *in_skb, const struct inet_diag_req_v2 *req); @@ -48,14 +45,6 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *req, u16 nlmsg_flags, bool net_admin); -void inet_diag_dump_icsk(struct sk_buff *skb, - struct netlink_callback *cb, - const struct inet_diag_req_v2 *r); -int inet_diag_dump_one_icsk(struct netlink_callback *cb, - const struct inet_diag_req_v2 *req); - -struct sock *inet_diag_find_one_icsk(struct net *net, - const struct inet_diag_req_v2 *req); int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk); -- cgit v1.2.3 From df534e757321ae6efe848a6a787098c22a390ac6 Mon Sep 17 00:00:00 2001 From: David Yang Date: Sun, 24 Aug 2025 09:30:03 +0800 Subject: net: phylink: remove stale an_enabled from doc state->an_enabled was removed by commit 4ee9b0dcf09f ("net: phylink: remove an_enabled") but is left in mac_config() doc, so clean it. Signed-off-by: David Yang Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20250824013009.2443580-1-mmyangfl@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/phylink.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 30659b615fca..9af0411761d7 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -320,9 +320,8 @@ int mac_prepare(struct phylink_config *config, unsigned int mode, * If in 802.3z mode, the link speed is fixed, dependent on the * @state->interface. Duplex and pause modes are negotiated via * the in-band configuration word. Advertised pause modes are set - * according to the @state->an_enabled and @state->advertising - * flags. Beware of MACs which only support full duplex at gigabit - * and higher speeds. + * according to @state->advertising. Beware of MACs which only + * support full duplex at gigabit and higher speeds. * * If in Cisco SGMII mode, the link speed and duplex mode are passed * in the serial bitstream 16-bit configuration word, and the MAC @@ -331,7 +330,7 @@ int mac_prepare(struct phylink_config *config, unsigned int mode, * responsible for reading the configuration word and configuring * itself accordingly. * - * Valid state members: interface, an_enabled, pause, advertising. + * Valid state members: interface, pause, advertising. * * Implementations are expected to update the MAC to reflect the * requested settings - i.o.w., if nothing has changed between two -- cgit v1.2.3 From 1b93c03fb319d72a1f5f4723abd5df15ce40f4e2 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Thu, 21 Aug 2025 17:06:03 +0800 Subject: rcu: add rcu_read_lock_dont_migrate() migrate_disable() is called to disable migration in the kernel, and it is often used together with rcu_read_lock(). However, with PREEMPT_RCU disabled, it's unnecessary, as rcu_read_lock() will always disable preemption, which will also disable migration. Introduce rcu_read_lock_dont_migrate() and rcu_read_unlock_migrate(), which will do the migration enable and disable only when PREEMPT_RCU. Signed-off-by: Menglong Dong Reviewed-by: Paul E. McKenney Link: https://lore.kernel.org/r/20250821090609.42508-2-dongml2@chinatelecom.cn Signed-off-by: Alexei Starovoitov --- include/linux/rcupdate.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 120536f4c6eb..9691ca380a4f 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -962,6 +962,20 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) preempt_enable_notrace(); } +static __always_inline void rcu_read_lock_dont_migrate(void) +{ + if (IS_ENABLED(CONFIG_PREEMPT_RCU)) + migrate_disable(); + rcu_read_lock(); +} + +static inline void rcu_read_unlock_migrate(void) +{ + rcu_read_unlock(); + if (IS_ENABLED(CONFIG_PREEMPT_RCU)) + migrate_enable(); +} + /** * RCU_INIT_POINTER() - initialize an RCU protected pointer * @p: The pointer to be initialized. -- cgit v1.2.3 From 05db35963eef7a55f1782190185cb8ddb9d923b7 Mon Sep 17 00:00:00 2001 From: Krishna Chaitanya Chundru Date: Wed, 20 Aug 2025 13:58:47 +0530 Subject: OPP: Add support to find OPP for a set of keys Some clients, such as PCIe, may operate at the same clock frequency across different data rates by varying link width. In such cases, frequency alone is not sufficient to uniquely identify an OPP. To support these scenarios, introduce a new API dev_pm_opp_find_key_exact() that allows OPP lookup with different set of keys like freq, level & bandwidth. Signed-off-by: Krishna Chaitanya Chundru [ Viresh: Minor cleanups ] Signed-off-by: Viresh Kumar --- include/linux/pm_opp.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index cf477beae4bb..789406d95e69 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -98,6 +98,25 @@ struct dev_pm_opp_data { unsigned long u_volt; }; +/** + * struct dev_pm_opp_key - Key used to identify OPP entries + * @freq: Frequency in Hz. Use 0 if frequency is not to be matched. + * @level: Performance level associated with the OPP entry. + * Use OPP_LEVEL_UNSET if level is not to be matched. + * @bw: Bandwidth associated with the OPP entry. + * Use 0 if bandwidth is not to be matched. + * + * This structure is used to uniquely identify an OPP entry based on + * frequency, performance level, and bandwidth. Each field can be + * selectively ignored during matching by setting it to its respective + * NOP value. + */ +struct dev_pm_opp_key { + unsigned long freq; + unsigned int level; + u32 bw; +}; + #if defined(CONFIG_PM_OPP) struct opp_table *dev_pm_opp_get_opp_table(struct device *dev); @@ -131,6 +150,10 @@ struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, unsigned long freq, bool available); +struct dev_pm_opp *dev_pm_opp_find_key_exact(struct device *dev, + struct dev_pm_opp_key *key, + bool available); + struct dev_pm_opp * dev_pm_opp_find_freq_exact_indexed(struct device *dev, unsigned long freq, u32 index, bool available); @@ -289,6 +312,13 @@ static inline struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, return ERR_PTR(-EOPNOTSUPP); } +static inline struct dev_pm_opp *dev_pm_opp_find_key_exact(struct device *dev, + struct dev_pm_opp_key *key, + bool available) +{ + return ERR_PTR(-EOPNOTSUPP); +} + static inline struct dev_pm_opp * dev_pm_opp_find_freq_exact_indexed(struct device *dev, unsigned long freq, u32 index, bool available) -- cgit v1.2.3 From 231bb0ee7aa5d1f0d077d3a30663f8ffd6860fa3 Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Thu, 21 Aug 2025 23:00:45 +0530 Subject: drm/xe/uapi: Add madvise interface This commit introduces a new madvise interface to support driver-specific ioctl operations. The madvise interface allows for more efficient memory management by providing hints to the driver about the expected memory usage and pte update policy for gpuvma. v2 (Matthew/Thomas) - Drop num_ops support - Drop purgeable support - Add kernel-docs - IOWR/IOW v3 (Matthew/Thomas) - Reorder attributes - use __u16 for migration_policy - use __u64 for reserved in unions - Avoid usage of vma Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250821173104.3030148-2-himal.prasad.ghimiray@intel.com Signed-off-by: Himal Prasad Ghimiray --- include/uapi/drm/xe_drm.h | 130 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index c721e130c1d2..4e6e9a9164ee 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -81,6 +81,7 @@ extern "C" { * - &DRM_IOCTL_XE_EXEC * - &DRM_IOCTL_XE_WAIT_USER_FENCE * - &DRM_IOCTL_XE_OBSERVATION + * - &DRM_IOCTL_XE_MADVISE */ /* @@ -102,6 +103,7 @@ extern "C" { #define DRM_XE_EXEC 0x09 #define DRM_XE_WAIT_USER_FENCE 0x0a #define DRM_XE_OBSERVATION 0x0b +#define DRM_XE_MADVISE 0x0c /* Must be kept compact -- no holes */ @@ -117,6 +119,7 @@ extern "C" { #define DRM_IOCTL_XE_EXEC DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec) #define DRM_IOCTL_XE_WAIT_USER_FENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence) #define DRM_IOCTL_XE_OBSERVATION DRM_IOW(DRM_COMMAND_BASE + DRM_XE_OBSERVATION, struct drm_xe_observation_param) +#define DRM_IOCTL_XE_MADVISE DRM_IOW(DRM_COMMAND_BASE + DRM_XE_MADVISE, struct drm_xe_madvise) /** * DOC: Xe IOCTL Extensions @@ -1978,6 +1981,133 @@ struct drm_xe_query_eu_stall { __u64 sampling_rates[]; }; +/** + * struct drm_xe_madvise - Input of &DRM_IOCTL_XE_MADVISE + * + * This structure is used to set memory attributes for a virtual address range + * in a VM. The type of attribute is specified by @type, and the corresponding + * union member is used to provide additional parameters for @type. + * + * Supported attribute types: + * - DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC: Set preferred memory location. + * - DRM_XE_MEM_RANGE_ATTR_ATOMIC: Set atomic access policy. + * - DRM_XE_MEM_RANGE_ATTR_PAT: Set page attribute table index. + * + * Example: + * + * .. code-block:: C + * + * struct drm_xe_madvise madvise = { + *          .vm_id = vm_id, + *          .start = 0x100000, + *          .range = 0x2000, + *          .type = DRM_XE_MEM_RANGE_ATTR_ATOMIC, + *         .atomic_val = DRM_XE_ATOMIC_DEVICE, + * }; + * + * ioctl(fd, DRM_IOCTL_XE_MADVISE, &madvise); + * + */ +struct drm_xe_madvise { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @start: start of the virtual address range */ + __u64 start; + + /** @range: size of the virtual address range */ + __u64 range; + + /** @vm_id: vm_id of the virtual range */ + __u32 vm_id; + +#define DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC 0 +#define DRM_XE_MEM_RANGE_ATTR_ATOMIC 1 +#define DRM_XE_MEM_RANGE_ATTR_PAT 2 + /** @type: type of attribute */ + __u32 type; + + union { + /** + * @preferred_mem_loc: preferred memory location + * + * Used when @type == DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC + * + * Supported values for @preferred_mem_loc.devmem_fd: + * - DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE: set vram of faulting tile as preferred loc + * - DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM: set smem as preferred loc + * + * Supported values for @preferred_mem_loc.migration_policy: + * - DRM_XE_MIGRATE_ALL_PAGES + * - DRM_XE_MIGRATE_ONLY_SYSTEM_PAGES + */ + struct { +#define DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE 0 +#define DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM -1 + /** @preferred_mem_loc.devmem_fd: fd for preferred loc */ + __u32 devmem_fd; + +#define DRM_XE_MIGRATE_ALL_PAGES 0 +#define DRM_XE_MIGRATE_ONLY_SYSTEM_PAGES 1 + /** @preferred_mem_loc.migration_policy: Page migration policy */ + __u16 migration_policy; + + /** @preferred_mem_loc.pad : MBZ */ + __u16 pad; + + /** @preferred_mem_loc.reserved : Reserved */ + __u64 reserved; + } preferred_mem_loc; + + /** + * @atomic: Atomic access policy + * + * Used when @type == DRM_XE_MEM_RANGE_ATTR_ATOMIC. + * + * Supported values for @atomic.val: + * - DRM_XE_ATOMIC_UNDEFINED: Undefined or default behaviour + * Support both GPU and CPU atomic operations for system allocator + * Support GPU atomic operations for normal(bo) allocator + * - DRM_XE_ATOMIC_DEVICE: Support GPU atomic operations + * - DRM_XE_ATOMIC_GLOBAL: Support both GPU and CPU atomic operations + * - DRM_XE_ATOMIC_CPU: Support CPU atomic + */ + struct { +#define DRM_XE_ATOMIC_UNDEFINED 0 +#define DRM_XE_ATOMIC_DEVICE 1 +#define DRM_XE_ATOMIC_GLOBAL 2 +#define DRM_XE_ATOMIC_CPU 3 + /** @atomic.val: value of atomic operation */ + __u32 val; + + /** @atomic.pad: MBZ */ + __u32 pad; + + /** @atomic.reserved: Reserved */ + __u64 reserved; + } atomic; + + /** + * @pat_index: Page attribute table index + * + * Used when @type == DRM_XE_MEM_RANGE_ATTR_PAT. + */ + struct { + /** @pat_index.val: PAT index value */ + __u32 val; + + /** @pat_index.pad: MBZ */ + __u32 pad; + + /** @pat_index.reserved: Reserved */ + __u64 reserved; + } pat_index; + }; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From fa1a82c985dba642de66f0a1918fc531007bf90f Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Thu, 21 Aug 2025 23:00:56 +0530 Subject: drm/xe/uapi: Add flag for consulting madvise hints on svm prefetch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce flag DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC to ensure prefetching in madvise-advised memory regions v2 (Matthew Brost) - Add kernel-doc v3 (Matthew Brost) - Fix kernel-doc Cc: Matthew Brost Reviewed-by: Matthew Brost Reviewed-by: Thomas Hellström Link: https://lore.kernel.org/r/20250821173104.3030148-13-himal.prasad.ghimiray@intel.com Signed-off-by: Himal Prasad Ghimiray --- include/uapi/drm/xe_drm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 4e6e9a9164ee..115b9bca2a25 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1010,6 +1010,10 @@ struct drm_xe_vm_destroy { * valid on VMs with DRM_XE_VM_CREATE_FLAG_FAULT_MODE set. The CPU address * mirror flag are only valid for DRM_XE_VM_BIND_OP_MAP operations, the BO * handle MBZ, and the BO offset MBZ. + * + * The @prefetch_mem_region_instance for %DRM_XE_VM_BIND_OP_PREFETCH can also be: + * - %DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC, which ensures prefetching occurs in + * the memory region advised by madvise. */ struct drm_xe_vm_bind_op { /** @extensions: Pointer to the first extension struct, if any */ @@ -1115,6 +1119,7 @@ struct drm_xe_vm_bind_op { /** @flags: Bind flags */ __u32 flags; +#define DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC -1 /** * @prefetch_mem_region_instance: Memory region to prefetch VMA to. * It is a region instance, not a mask. -- cgit v1.2.3 From 418807860e94eb9c2fe07a6f5bf67de4c59a97e4 Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Thu, 21 Aug 2025 23:01:04 +0530 Subject: drm/xe/uapi: Add UAPI for querying VMA count and memory attributes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce the DRM_IOCTL_XE_VM_QUERY_MEMORY_RANGE_ATTRS ioctl to allow userspace to query memory attributes of VMAs within a user specified virtual address range. Userspace first calls the ioctl with num_mem_ranges = 0, sizeof_mem_ranges_attr = 0 and vector_of_vma_mem_attr = NULL to retrieve the number of memory ranges (vmas) and size of each memory range attribute. Then, it allocates a buffer of that size and calls the ioctl again to fill the buffer with memory range attributes. This two-step interface allows userspace to first query the required buffer size, then retrieve detailed attributes efficiently. v2 (Matthew Brost) - Use same ioctl to overload functionality v3 - Add kernel-doc v4 - Make uapi future proof by passing struct size (Matthew Brost) - make lock interruptible (Matthew Brost) - set reserved bits to zero (Matthew Brost) - s/__copy_to_user/copy_to_user (Matthew Brost) - Avod using VMA term in uapi (Thomas) - xe_vm_put(vm) is missing (Shuicheng) v5 - Nits - Fix kernel-doc Cc: Matthew Brost Cc: Shuicheng Lin Cc: Thomas Hellström Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250821173104.3030148-21-himal.prasad.ghimiray@intel.com Signed-off-by: Himal Prasad Ghimiray --- include/uapi/drm/xe_drm.h | 140 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 115b9bca2a25..7dedd45ab995 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -82,6 +82,7 @@ extern "C" { * - &DRM_IOCTL_XE_WAIT_USER_FENCE * - &DRM_IOCTL_XE_OBSERVATION * - &DRM_IOCTL_XE_MADVISE + * - &DRM_IOCTL_XE_VM_QUERY_MEM_RANGE_ATTRS */ /* @@ -104,6 +105,7 @@ extern "C" { #define DRM_XE_WAIT_USER_FENCE 0x0a #define DRM_XE_OBSERVATION 0x0b #define DRM_XE_MADVISE 0x0c +#define DRM_XE_VM_QUERY_MEM_RANGE_ATTRS 0x0d /* Must be kept compact -- no holes */ @@ -120,6 +122,7 @@ extern "C" { #define DRM_IOCTL_XE_WAIT_USER_FENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence) #define DRM_IOCTL_XE_OBSERVATION DRM_IOW(DRM_COMMAND_BASE + DRM_XE_OBSERVATION, struct drm_xe_observation_param) #define DRM_IOCTL_XE_MADVISE DRM_IOW(DRM_COMMAND_BASE + DRM_XE_MADVISE, struct drm_xe_madvise) +#define DRM_IOCTL_XE_VM_QUERY_MEM_RANGE_ATTRS DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_VM_QUERY_MEM_RANGE_ATTRS, struct drm_xe_vm_query_mem_range_attr) /** * DOC: Xe IOCTL Extensions @@ -2113,6 +2116,143 @@ struct drm_xe_madvise { __u64 reserved[2]; }; +/** + * struct drm_xe_mem_range_attr - Output of &DRM_IOCTL_XE_VM_QUERY_MEM_RANGES_ATTRS + * + * This structure is provided by userspace and filled by KMD in response to the + * DRM_IOCTL_XE_VM_QUERY_MEM_RANGES_ATTRS ioctl. It describes memory attributes of + * a memory ranges within a user specified address range in a VM. + * + * The structure includes information such as atomic access policy, + * page attribute table (PAT) index, and preferred memory location. + * Userspace allocates an array of these structures and passes a pointer to the + * ioctl to retrieve attributes for each memory ranges + * + * @extensions: Pointer to the first extension struct, if any + * @start: Start address of the memory range + * @end: End address of the virtual memory range + * + */ +struct drm_xe_mem_range_attr { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @start: start of the memory range */ + __u64 start; + + /** @end: end of the memory range */ + __u64 end; + + /** @preferred_mem_loc: preferred memory location */ + struct { + /** @preferred_mem_loc.devmem_fd: fd for preferred loc */ + __u32 devmem_fd; + + /** @preferred_mem_loc.migration_policy: Page migration policy */ + __u32 migration_policy; + } preferred_mem_loc; + + /** @atomic: Atomic access policy */ + struct { + /** @atomic.val: atomic attribute */ + __u32 val; + + /** @atomic.reserved: Reserved */ + __u32 reserved; + } atomic; + + /** @pat_index: Page attribute table index */ + struct { + /** @pat_index.val: PAT index */ + __u32 val; + + /** @pat_index.reserved: Reserved */ + __u32 reserved; + } pat_index; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +/** + * struct drm_xe_vm_query_mem_range_attr - Input of &DRM_IOCTL_XE_VM_QUERY_MEM_ATTRIBUTES + * + * This structure is used to query memory attributes of memory regions + * within a user specified address range in a VM. It provides detailed + * information about each memory range, including atomic access policy, + * page attribute table (PAT) index, and preferred memory location. + * + * Userspace first calls the ioctl with @num_mem_ranges = 0, + * @sizeof_mem_ranges_attr = 0 and @vector_of_vma_mem_attr = NULL to retrieve + * the number of memory regions and size of each memory range attribute. + * Then, it allocates a buffer of that size and calls the ioctl again to fill + * the buffer with memory range attributes. + * + * If second call fails with -ENOSPC, it means memory ranges changed between + * first call and now, retry IOCTL again with @num_mem_ranges = 0, + * @sizeof_mem_ranges_attr = 0 and @vector_of_vma_mem_attr = NULL followed by + * Second ioctl call. + * + * Example: + * + * .. code-block:: C + * struct drm_xe_vm_query_mem_range_attr query = { + * .vm_id = vm_id, + * .start = 0x100000, + * .range = 0x2000, + * }; + * + * // First ioctl call to get num of mem regions and sizeof each attribute + * ioctl(fd, DRM_IOCTL_XE_VM_QUERY_MEM_RANGE_ATTRS, &query); + * + * // Allocate buffer for the memory region attributes + * void *ptr = malloc(query.num_mem_ranges * query.sizeof_mem_range_attr); + * void *ptr_start = ptr; + * + * query.vector_of_mem_attr = (uintptr_t)ptr; + * + * // Second ioctl call to actually fill the memory attributes + * ioctl(fd, DRM_IOCTL_XE_VM_QUERY_MEM_RANGE_ATTRS, &query); + * + * // Iterate over the returned memory region attributes + * for (unsigned int i = 0; i < query.num_mem_ranges; ++i) { + * struct drm_xe_mem_range_attr *attr = (struct drm_xe_mem_range_attr *)ptr; + * + * // Do something with attr + * + * // Move pointer by one entry + * ptr += query.sizeof_mem_range_attr; + * } + * + * free(ptr_start); + */ +struct drm_xe_vm_query_mem_range_attr { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @vm_id: vm_id of the virtual range */ + __u32 vm_id; + + /** @num_mem_ranges: number of mem_ranges in range */ + __u32 num_mem_ranges; + + /** @start: start of the virtual address range */ + __u64 start; + + /** @range: size of the virtual address range */ + __u64 range; + + /** @sizeof_mem_range_attr: size of struct drm_xe_mem_range_attr */ + __u64 sizeof_mem_range_attr; + + /** @vector_of_mem_attr: userptr to array of struct drm_xe_mem_range_attr */ + __u64 vector_of_mem_attr; + + /** @reserved: Reserved */ + __u64 reserved[2]; + +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From ced17ee32a9988b8a260628e7c31a100d7dc082e Mon Sep 17 00:00:00 2001 From: Igor Torrente Date: Thu, 7 Aug 2025 09:41:45 -0300 Subject: Revert "virtio: reject shm region if length is zero" The commit 206cc44588f7 ("virtio: reject shm region if length is zero") breaks the Virtio-gpu `host_visible` feature. As you can see in the snippet below, host_visible_region is zero because of the `kzalloc`. It's using the `vm_get_shm_region` (drivers/virtio/virtio_mmio.c:536) to read the `addr` and `len` from qemu/crosvm. ``` drivers/gpu/drm/virtio/virtgpu_kms.c 132 vgdev = drmm_kzalloc(dev, sizeof(struct virtio_gpu_device), GFP_KERNEL); [...] 177 if (virtio_get_shm_region(vgdev->vdev, &vgdev->host_visible_region, 178 VIRTIO_GPU_SHM_ID_HOST_VISIBLE)) { ``` Now it always fails. To fix, revert the offending commit. Fixes: 206cc44588f7 ("virtio: reject shm region if length is zero") Signed-off-by: Igor Torrente Message-Id: <20250807124145.81816-1-igor.torrente@collabora.com> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 918cf25cd3c6..8bf156dde554 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -328,8 +328,6 @@ static inline bool virtio_get_shm_region(struct virtio_device *vdev, struct virtio_shm_region *region, u8 id) { - if (!region->len) - return false; if (!vdev->config->get_shm_region) return false; return vdev->config->get_shm_region(vdev, region, id); -- cgit v1.2.3 From 24fc631539cc78225f5c61f99c7666fcff48024d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 18 Aug 2025 23:39:57 -0700 Subject: vhost: Fix ioctl # for VHOST_[GS]ET_FORK_FROM_OWNER MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VHOST_[GS]ET_FEATURES_ARRAY ioctl already took 0x83 and it would result in a build error when the vhost uapi header is used for perf tool build like below. In file included from trace/beauty/ioctl.c:93: tools/perf/trace/beauty/generated/ioctl/vhost_virtio_ioctl_array.c: In function ‘ioctl__scnprintf_vhost_virtio_cmd’: tools/perf/trace/beauty/generated/ioctl/vhost_virtio_ioctl_array.c:36:18: error: initialized field overwritten [-Werror=override-init] 36 | [0x83] = "SET_FORK_FROM_OWNER", | ^~~~~~~~~~~~~~~~~~~~~ tools/perf/trace/beauty/generated/ioctl/vhost_virtio_ioctl_array.c:36:18: note: (near initialization for ‘vhost_virtio_ioctl_cmds[131]’) Fixes: 7d9896e9f6d02d8a ("vhost: Reintroduce kthread API and add mode selection") Signed-off-by: Namhyung Kim Message-Id: <20250819063958.833770-1-namhyung@kernel.org> Signed-off-by: Michael S. Tsirkin Tested-by: Lei Yang --- include/uapi/linux/vhost.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index 283348b64af9..c57674a6aa0d 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -260,7 +260,7 @@ * When fork_owner is set to VHOST_FORK_OWNER_KTHREAD: * - Vhost will create vhost workers as kernel threads. */ -#define VHOST_SET_FORK_FROM_OWNER _IOW(VHOST_VIRTIO, 0x83, __u8) +#define VHOST_SET_FORK_FROM_OWNER _IOW(VHOST_VIRTIO, 0x84, __u8) /** * VHOST_GET_FORK_OWNER - Get the current fork_owner flag for the vhost device. @@ -268,6 +268,6 @@ * * @return: An 8-bit value indicating the current thread mode. */ -#define VHOST_GET_FORK_FROM_OWNER _IOR(VHOST_VIRTIO, 0x84, __u8) +#define VHOST_GET_FORK_FROM_OWNER _IOR(VHOST_VIRTIO, 0x85, __u8) #endif -- cgit v1.2.3 From b3dcc9d1d806fb1e175f85978713eef868531da4 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Tue, 26 Aug 2025 10:19:46 +0300 Subject: memblock: fix kernel-doc for MEMBLOCK_RSRV_NOINIT The kernel-doc description of MEMBLOCK_RSRV_NOINIT and memblock_reserved_mark_noinit() do not accurately describe their functionality. Expand their kernel doc to make it clear that the user of MEMBLOCK_RSRV_NOINIT is responsible to properly initialize the struct pages for such regions and add more details about effects of using this flag. Reviewed-by: David Hildenbrand Link: https://lore.kernel.org/r/f8140a17-c4ec-489b-b314-d45abe48bf36@redhat.com Link: https://lore.kernel.org/r/20250826071947.1949725-1-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) --- include/linux/memblock.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index b96746376e17..fcda8481de9a 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -40,8 +40,9 @@ extern unsigned long long max_possible_pfn; * via a driver, and never indicated in the firmware-provided memory map as * system RAM. This corresponds to IORESOURCE_SYSRAM_DRIVER_MANAGED in the * kernel resource tree. - * @MEMBLOCK_RSRV_NOINIT: memory region for which struct pages are - * not initialized (only for reserved regions). + * @MEMBLOCK_RSRV_NOINIT: reserved memory region for which struct pages are not + * fully initialized. Users of this flag are responsible to properly initialize + * struct pages of this region * @MEMBLOCK_RSRV_KERN: memory region that is reserved for kernel use, * either explictitly with memblock_reserve_kern() or via memblock * allocation APIs. All memblock allocations set this flag. -- cgit v1.2.3 From e649bcda25b5ae1a30a182cc450f928a0b282c93 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 20 Aug 2025 14:03:39 -0400 Subject: perf: Remove get_perf_callchain() init_nr argument The 'init_nr' argument has double duty: it's used to initialize both the number of contexts and the number of stack entries. That's confusing and the callers always pass zero anyway. Hard code the zero. Signed-off-by: Josh Poimboeuf Signed-off-by: Steven Rostedt (Google) Signed-off-by: Peter Zijlstra (Intel) Acked-by: Namhyung Kim Acked-by: Alexei Starovoitov Link: https://lore.kernel.org/r/20250820180428.259565081@kernel.org --- include/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index bfbf9ea53f25..fd1d91017b99 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1719,7 +1719,7 @@ DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry); extern void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs); extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs); extern struct perf_callchain_entry * -get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, +get_perf_callchain(struct pt_regs *regs, bool kernel, bool user, u32 max_stack, bool crosstask, bool add_mark); extern int get_callchain_buffers(int max_stack); extern void put_callchain_buffers(void); -- cgit v1.2.3 From cff5fb82733c4f1acda458ffd2bb5c948fb59bd6 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 14 Jul 2025 17:13:01 +0200 Subject: video: pixel_format: Add compare helpers Add helpers that compare two pixel-format descriptions against each other. Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Link: https://lore.kernel.org/r/20250714151513.309475-2-tzimmermann@suse.de --- include/video/pixel_format.h | 58 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) (limited to 'include') diff --git a/include/video/pixel_format.h b/include/video/pixel_format.h index b5104b2a3a13..c57019cd6ea8 100644 --- a/include/video/pixel_format.h +++ b/include/video/pixel_format.h @@ -38,4 +38,62 @@ struct pixel_format { #define PIXEL_FORMAT_XRGB2101010 \ { 32, false, { .alpha = {0, 0}, .red = {20, 10}, .green = {10, 10}, .blue = {0, 10} } } +#define __pixel_format_cmp_field(lhs, rhs, name) \ + { \ + int ret = ((lhs)->name) - ((rhs)->name); \ + if (ret) \ + return ret; \ + } + +#define __pixel_format_cmp_bitfield(lhs, rhs, name) \ + { \ + __pixel_format_cmp_field(lhs, rhs, name.offset); \ + __pixel_format_cmp_field(lhs, rhs, name.length); \ + } + +/** + * pixel_format_cmp - Compares two pixel-format descriptions + * + * @lhs: a pixel-format description + * @rhs: a pixel-format description + * + * Compares two pixel-format descriptions for their order. The semantics + * are equivalent to memcmp(). + * + * Returns: + * 0 if both arguments describe the same pixel format, less-than-zero if lhs < rhs, + * or greater-than-zero if lhs > rhs. + */ +static inline int pixel_format_cmp(const struct pixel_format *lhs, const struct pixel_format *rhs) +{ + __pixel_format_cmp_field(lhs, rhs, bits_per_pixel); + __pixel_format_cmp_field(lhs, rhs, indexed); + + if (lhs->indexed) { + __pixel_format_cmp_bitfield(lhs, rhs, index); + } else { + __pixel_format_cmp_bitfield(lhs, rhs, alpha); + __pixel_format_cmp_bitfield(lhs, rhs, red); + __pixel_format_cmp_bitfield(lhs, rhs, green); + __pixel_format_cmp_bitfield(lhs, rhs, blue); + } + + return 0; +} + +/** + * pixel_format_equal - Compares two pixel-format descriptions for equality + * + * @lhs: a pixel-format description + * @rhs: a pixel-format description + * + * Returns: + * True if both arguments describe the same pixel format, or false otherwise. + */ +static inline bool pixel_format_equal(const struct pixel_format *lhs, + const struct pixel_format *rhs) +{ + return !pixel_format_cmp(lhs, rhs); +} + #endif -- cgit v1.2.3 From d6d05e2af796ca25094f80a73d8841505d54368b Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 14 Jul 2025 17:13:02 +0200 Subject: video: screen_info: Add pixel-format helper for linear framebuffers Add screen_info_pixel_format(), which converts a screen_info's information about the color format to struct pixel_format. The encoding within the screen_info structure is complex and therefore prone to errors. Later patches will convert callers to use the pixel format. Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Link: https://lore.kernel.org/r/20250714151513.309475-3-tzimmermann@suse.de --- include/linux/screen_info.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h index 923d68e07679..1690706206e8 100644 --- a/include/linux/screen_info.h +++ b/include/linux/screen_info.h @@ -12,6 +12,7 @@ #define SCREEN_INFO_MAX_RESOURCES 3 struct pci_dev; +struct pixel_format; struct resource; static inline bool __screen_info_has_lfb(unsigned int type) @@ -136,6 +137,7 @@ static inline u32 __screen_info_vesapm_info_base(const struct screen_info *si) ssize_t screen_info_resources(const struct screen_info *si, struct resource *r, size_t num); u32 __screen_info_lfb_bits_per_pixel(const struct screen_info *si); +int screen_info_pixel_format(const struct screen_info *si, struct pixel_format *f); #if defined(CONFIG_PCI) void screen_info_apply_fixups(void); -- cgit v1.2.3 From 7ff61177b7116825085587f007dcdfd042c7b33b Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 14 Jul 2025 17:13:05 +0200 Subject: drm/color-mgmt: Prepare for RGB332 palettes Add helper drm_crtc_fill_palette_332(), which fills palettes with RGB332 color data. Each color in RGB332 format serves as an index into an 8-bit palette that stores the corresponding component-based colors. Vesadrm will use the new helper to emulate RGB formats on top of framebuffers in C8 format. v2: - add comments on bit operations (Javier) Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Link: https://lore.kernel.org/r/20250714151513.309475-6-tzimmermann@suse.de --- include/drm/drm_color_mgmt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/drm_color_mgmt.h b/include/drm/drm_color_mgmt.h index 6cb577f6dba6..eccb71ab335a 100644 --- a/include/drm/drm_color_mgmt.h +++ b/include/drm/drm_color_mgmt.h @@ -143,6 +143,7 @@ void drm_crtc_fill_gamma_555(struct drm_crtc *crtc, drm_crtc_set_lut_func set_ga void drm_crtc_load_palette_8(struct drm_crtc *crtc, const struct drm_color_lut *lut, drm_crtc_set_lut_func set_palette); +void drm_crtc_fill_palette_332(struct drm_crtc *crtc, drm_crtc_set_lut_func set_palette); void drm_crtc_fill_palette_8(struct drm_crtc *crtc, drm_crtc_set_lut_func set_palette); #endif -- cgit v1.2.3 From 7399c13f619f33dc8bdce838f3c83e88a18765ee Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 14 Jul 2025 17:13:09 +0200 Subject: drm/vesadrm: Support DRM_FORMAT_C8 Add support for DRM_FORMAT_C8 to vesadrm. The new pixel-format description PIXEL_FORMAT_C8 describes the layout. Vesadrm's helpers vesadrm_fill_palette_lut() and vesadrm_load_palette_lut() set the hardware palette according to the CRTC's output format. The driver emulates XRGB8888 by converting the source buffer to RGB332 and using the resulting 256 colors as index into the hardware palette. The hardware palette converts back to RGB during scanout. This has no overhead compared to other format conversion, but allows common userspace, such as Wayland compositors, to operate on the display. Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Link: https://lore.kernel.org/r/20250714151513.309475-10-tzimmermann@suse.de --- include/video/pixel_format.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/video/pixel_format.h b/include/video/pixel_format.h index c57019cd6ea8..6874754b0474 100644 --- a/include/video/pixel_format.h +++ b/include/video/pixel_format.h @@ -20,6 +20,9 @@ struct pixel_format { }; }; +#define PIXEL_FORMAT_C8 \ + { 8, true, { .index = {0, 8}, } } + #define PIXEL_FORMAT_XRGB1555 \ { 16, false, { .alpha = {0, 0}, .red = {10, 5}, .green = {5, 5}, .blue = {0, 5} } } -- cgit v1.2.3 From 9c857a9d84e01332d031b55c4e38a66daecbae73 Mon Sep 17 00:00:00 2001 From: Riana Tauro Date: Tue, 26 Aug 2025 12:04:09 +0530 Subject: drm: Add a vendor-specific recovery method to drm device wedged uevent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address the need for a recovery method (firmware flash on Firmware errors) introduced in the later patches of Xe KMD. Whenever XE KMD detects a firmware error, a firmware flash is required to recover the device to normal operation. The initial proposal to use 'firmware-flash' as a recovery method was not applicable to other drivers and could cause multiple recovery methods specific to vendors to be added. To address this a more generic 'vendor-specific' method is introduced, guiding users to refer to vendor specific documentation and system logs for detailed vendor specific recovery procedure. Add a recovery method 'WEDGED=vendor-specific' for such errors. Vendors must provide additional recovery documentation if this method is used. It is the responsibility of the consumer to refer to the correct vendor specific documentation and usecase before attempting a recovery. For example: If driver is XE KMD, the consumer must refer to the documentation of 'Device Wedging' under 'Documentation/gpu/xe/'. v2: fix documentation (Raag) v3: add more details to commit message (Sima, Rodrigo, Raag) add an example script to the documentation (Raag) v4: use consistent naming (Raag) v5: fix commit message v6: add more documentation Cc: André Almeida Cc: Christian König Cc: David Airlie Cc: Simona Vetter Cc: Maxime Ripard Signed-off-by: Riana Tauro Reviewed-by: Rodrigo Vivi Acked-by: Maxime Ripard Link: https://lore.kernel.org/r/20250826063419.3022216-3-riana.tauro@intel.com Signed-off-by: Rodrigo Vivi --- include/drm/drm_device.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h index a33aedd5e9ec..59fd3f4d5995 100644 --- a/include/drm/drm_device.h +++ b/include/drm/drm_device.h @@ -26,10 +26,14 @@ struct pci_controller; * Recovery methods for wedged device in order of less to more side-effects. * To be used with drm_dev_wedged_event() as recovery @method. Callers can * use any one, multiple (or'd) or none depending on their needs. + * + * Refer to "Device Wedging" chapter in Documentation/gpu/drm-uapi.rst for more + * details. */ #define DRM_WEDGE_RECOVERY_NONE BIT(0) /* optional telemetry collection */ #define DRM_WEDGE_RECOVERY_REBIND BIT(1) /* unbind + bind driver */ #define DRM_WEDGE_RECOVERY_BUS_RESET BIT(2) /* unbind + reset bus device + bind */ +#define DRM_WEDGE_RECOVERY_VENDOR BIT(3) /* vendor specific recovery method */ /** * struct drm_wedge_task_info - information about the guilty task of a wedge dev -- cgit v1.2.3 From e164461349444ad27873e4ab2f492eb4465dbbb0 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 5 Aug 2025 15:28:49 -0700 Subject: lib/crypto: md5: Add MD5 and HMAC-MD5 library functions Add library functions for MD5, including HMAC support. The MD5 implementation is derived from crypto/md5.c. This closely mirrors the corresponding SHA-1 and SHA-2 changes. Like SHA-1 and SHA-2, support for architecture-optimized MD5 implementations is included. I originally proposed dropping those, but unfortunately there is an AF_ALG user of the PowerPC MD5 code (https://lore.kernel.org/r/c4191597-341d-4fd7-bc3d-13daf7666c41@csgroup.eu/), and dropping that code would be viewed as a performance regression. We don't add new software algorithm implementations purely for AF_ALG, as escalating to kernel mode merely to do calculations that could be done in userspace is inefficient and is completely the wrong design. But since this one already existed, it gets grandfathered in for now. An objection was also raised to dropping the SPARC64 MD5 code because it utilizes the CPU's direct support for MD5, although it remains unclear that anyone is using that. Regardless, we'll keep these around for now. Note that while MD5 is a legacy algorithm that is vulnerable to practical collision attacks, it still has various in-kernel users that implement legacy protocols. Switching to a simple library API, which is the way the code should have been organized originally, will greatly simplify their code. For example: MD5: drivers/md/dm-crypt.c (for lmk IV generation) fs/nfsd/nfs4recover.c fs/ecryptfs/ fs/smb/client/ net/{ipv4,ipv6}/ (for TCP-MD5 signatures) HMAC-MD5: fs/smb/client/ fs/smb/server/ (Also net/sctp/ if it continues using HMAC-MD5 for cookie generation. However, that use case has the flexibility to upgrade to a more modern algorithm, which I'll be proposing instead.) As usual, the "md5" and "hmac(md5)" crypto_shash algorithms will also be reimplemented on top of these library functions. For "hmac(md5)" this will provide a faster, more streamlined implementation. Link: https://lore.kernel.org/r/20250805222855.10362-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/crypto/md5.h | 181 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 180 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/crypto/md5.h b/include/crypto/md5.h index 28ee533a0507..c9aa5c3abc53 100644 --- a/include/crypto/md5.h +++ b/include/crypto/md5.h @@ -7,6 +7,7 @@ #define MD5_DIGEST_SIZE 16 #define MD5_HMAC_BLOCK_SIZE 64 +#define MD5_BLOCK_SIZE 64 #define MD5_BLOCK_WORDS 16 #define MD5_HASH_WORDS 4 #define MD5_STATE_SIZE 24 @@ -27,4 +28,182 @@ struct md5_state { u32 block[MD5_BLOCK_WORDS]; }; -#endif +/* State for the MD5 compression function */ +struct md5_block_state { + u32 h[MD5_HASH_WORDS]; +}; + +/** + * struct md5_ctx - Context for hashing a message with MD5 + * @state: the compression function state + * @bytecount: number of bytes processed so far + * @buf: partial block buffer; bytecount % MD5_BLOCK_SIZE bytes are valid + */ +struct md5_ctx { + struct md5_block_state state; + u64 bytecount; + u8 buf[MD5_BLOCK_SIZE] __aligned(__alignof__(__le64)); +}; + +/** + * md5_init() - Initialize an MD5 context for a new message + * @ctx: the context to initialize + * + * If you don't need incremental computation, consider md5() instead. + * + * Context: Any context. + */ +void md5_init(struct md5_ctx *ctx); + +/** + * md5_update() - Update an MD5 context with message data + * @ctx: the context to update; must have been initialized + * @data: the message data + * @len: the data length in bytes + * + * This can be called any number of times. + * + * Context: Any context. + */ +void md5_update(struct md5_ctx *ctx, const u8 *data, size_t len); + +/** + * md5_final() - Finish computing an MD5 message digest + * @ctx: the context to finalize; must have been initialized + * @out: (output) the resulting MD5 message digest + * + * After finishing, this zeroizes @ctx. So the caller does not need to do it. + * + * Context: Any context. + */ +void md5_final(struct md5_ctx *ctx, u8 out[MD5_DIGEST_SIZE]); + +/** + * md5() - Compute MD5 message digest in one shot + * @data: the message data + * @len: the data length in bytes + * @out: (output) the resulting MD5 message digest + * + * Context: Any context. + */ +void md5(const u8 *data, size_t len, u8 out[MD5_DIGEST_SIZE]); + +/** + * struct hmac_md5_key - Prepared key for HMAC-MD5 + * @istate: private + * @ostate: private + */ +struct hmac_md5_key { + struct md5_block_state istate; + struct md5_block_state ostate; +}; + +/** + * struct hmac_md5_ctx - Context for computing HMAC-MD5 of a message + * @hash_ctx: private + * @ostate: private + */ +struct hmac_md5_ctx { + struct md5_ctx hash_ctx; + struct md5_block_state ostate; +}; + +/** + * hmac_md5_preparekey() - Prepare a key for HMAC-MD5 + * @key: (output) the key structure to initialize + * @raw_key: the raw HMAC-MD5 key + * @raw_key_len: the key length in bytes. All key lengths are supported. + * + * Note: the caller is responsible for zeroizing both the struct hmac_md5_key + * and the raw key once they are no longer needed. + * + * Context: Any context. + */ +void hmac_md5_preparekey(struct hmac_md5_key *key, + const u8 *raw_key, size_t raw_key_len); + +/** + * hmac_md5_init() - Initialize an HMAC-MD5 context for a new message + * @ctx: (output) the HMAC context to initialize + * @key: the prepared HMAC key + * + * If you don't need incremental computation, consider hmac_md5() instead. + * + * Context: Any context. + */ +void hmac_md5_init(struct hmac_md5_ctx *ctx, const struct hmac_md5_key *key); + +/** + * hmac_md5_init_usingrawkey() - Initialize an HMAC-MD5 context for a new + * message, using a raw key + * @ctx: (output) the HMAC context to initialize + * @raw_key: the raw HMAC-MD5 key + * @raw_key_len: the key length in bytes. All key lengths are supported. + * + * If you don't need incremental computation, consider hmac_md5_usingrawkey() + * instead. + * + * Context: Any context. + */ +void hmac_md5_init_usingrawkey(struct hmac_md5_ctx *ctx, + const u8 *raw_key, size_t raw_key_len); + +/** + * hmac_md5_update() - Update an HMAC-MD5 context with message data + * @ctx: the HMAC context to update; must have been initialized + * @data: the message data + * @data_len: the data length in bytes + * + * This can be called any number of times. + * + * Context: Any context. + */ +static inline void hmac_md5_update(struct hmac_md5_ctx *ctx, + const u8 *data, size_t data_len) +{ + md5_update(&ctx->hash_ctx, data, data_len); +} + +/** + * hmac_md5_final() - Finish computing an HMAC-MD5 value + * @ctx: the HMAC context to finalize; must have been initialized + * @out: (output) the resulting HMAC-MD5 value + * + * After finishing, this zeroizes @ctx. So the caller does not need to do it. + * + * Context: Any context. + */ +void hmac_md5_final(struct hmac_md5_ctx *ctx, u8 out[MD5_DIGEST_SIZE]); + +/** + * hmac_md5() - Compute HMAC-MD5 in one shot, using a prepared key + * @key: the prepared HMAC key + * @data: the message data + * @data_len: the data length in bytes + * @out: (output) the resulting HMAC-MD5 value + * + * If you're using the key only once, consider using hmac_md5_usingrawkey(). + * + * Context: Any context. + */ +void hmac_md5(const struct hmac_md5_key *key, + const u8 *data, size_t data_len, u8 out[MD5_DIGEST_SIZE]); + +/** + * hmac_md5_usingrawkey() - Compute HMAC-MD5 in one shot, using a raw key + * @raw_key: the raw HMAC-MD5 key + * @raw_key_len: the key length in bytes. All key lengths are supported. + * @data: the message data + * @data_len: the data length in bytes + * @out: (output) the resulting HMAC-MD5 value + * + * If you're using the key multiple times, prefer to use hmac_md5_preparekey() + * followed by multiple calls to hmac_md5() instead. + * + * Context: Any context. + */ +void hmac_md5_usingrawkey(const u8 *raw_key, size_t raw_key_len, + const u8 *data, size_t data_len, + u8 out[MD5_DIGEST_SIZE]); + +#endif /* _CRYPTO_MD5_H */ -- cgit v1.2.3 From 1abe21ef1adf0c5b6dbb5878c2fa4573df8d29fc Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Sat, 23 Aug 2025 15:44:28 +0200 Subject: net: phy: introduce phy_id_compare_vendor() PHY ID helper Introduce phy_id_compare_vendor() PHY ID helper to compare a PHY ID with the PHY ID Vendor using the generic PHY ID Vendor mask. While at it also rework the PHY_ID_MATCH macro and move the mask to dedicated define so that PHY driver can make use of the mask if needed. Signed-off-by: Christian Marangi Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250823134431.4854-1-ansuelsmth@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 4c2b8b6e7187..b67079796402 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1268,9 +1268,13 @@ struct phy_driver { #define to_phy_driver(d) container_of_const(to_mdio_common_driver(d), \ struct phy_driver, mdiodrv) -#define PHY_ID_MATCH_EXACT(id) .phy_id = (id), .phy_id_mask = GENMASK(31, 0) -#define PHY_ID_MATCH_MODEL(id) .phy_id = (id), .phy_id_mask = GENMASK(31, 4) -#define PHY_ID_MATCH_VENDOR(id) .phy_id = (id), .phy_id_mask = GENMASK(31, 10) +#define PHY_ID_MATCH_EXTACT_MASK GENMASK(31, 0) +#define PHY_ID_MATCH_MODEL_MASK GENMASK(31, 4) +#define PHY_ID_MATCH_VENDOR_MASK GENMASK(31, 10) + +#define PHY_ID_MATCH_EXACT(id) .phy_id = (id), .phy_id_mask = PHY_ID_MATCH_EXTACT_MASK +#define PHY_ID_MATCH_MODEL(id) .phy_id = (id), .phy_id_mask = PHY_ID_MATCH_MODEL_MASK +#define PHY_ID_MATCH_VENDOR(id) .phy_id = (id), .phy_id_mask = PHY_ID_MATCH_VENDOR_MASK /** * phy_id_compare - compare @id1 with @id2 taking account of @mask @@ -1286,6 +1290,19 @@ static inline bool phy_id_compare(u32 id1, u32 id2, u32 mask) return !((id1 ^ id2) & mask); } +/** + * phy_id_compare_vendor - compare @id with @vendor mask + * @id: PHY ID + * @vendor_mask: PHY Vendor mask + * + * Return: true if the bits from @id match @vendor using the + * generic PHY Vendor mask. + */ +static inline bool phy_id_compare_vendor(u32 id, u32 vendor_mask) +{ + return phy_id_compare(id, vendor_mask, PHY_ID_MATCH_VENDOR_MASK); +} + /** * phydev_id_compare - compare @id with the PHY's Clause 22 ID * @phydev: the PHY device -- cgit v1.2.3 From 39e94fdce45fa611abf48472873e4ba2f67228a3 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 22 Aug 2025 22:36:11 +0200 Subject: net: phy: fixed: let fixed_phy_add always use addr 0 and remove return value We have only two users of fixed_phy_add(), both use address 0 and ignore the return value. So simplify fixed_phy_add() accordingly. Whilst at it, constify the fixed_phy_status configs. Note: fixed_phy_add() is a legacy function which shouldn't be used in new code, as it's use may be problematic: - No check whether a fixed phy exists already at the given address - If fixed_phy_register() is called afterwards by any other driver, then it will also use phy_addr 0, because fixed_phy_add() ignores the ida which manages address assignment Drivers using a fixed phy created by fixed_phy_add() in platform code, should dynamically create a fixed phy with fixed_phy_register() instead. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/762700e5-a0b1-41af-aa03-929822a39475@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/phy_fixed.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h index 5399b9e41e35..6227a1bdefec 100644 --- a/include/linux/phy_fixed.h +++ b/include/linux/phy_fixed.h @@ -17,7 +17,7 @@ struct net_device; #if IS_ENABLED(CONFIG_FIXED_PHY) extern int fixed_phy_change_carrier(struct net_device *dev, bool new_carrier); -int fixed_phy_add(int phy_id, const struct fixed_phy_status *status); +void fixed_phy_add(const struct fixed_phy_status *status); struct phy_device *fixed_phy_register(const struct fixed_phy_status *status, struct device_node *np); @@ -26,11 +26,7 @@ extern int fixed_phy_set_link_update(struct phy_device *phydev, int (*link_update)(struct net_device *, struct fixed_phy_status *)); #else -static inline int fixed_phy_add(int phy_id, - const struct fixed_phy_status *status) -{ - return -ENODEV; -} +static inline void fixed_phy_add(const struct fixed_phy_status *status) {} static inline struct phy_device * fixed_phy_register(const struct fixed_phy_status *status, struct device_node *np) -- cgit v1.2.3 From d2b007374551ac09db16badde575cdd698f6fc92 Mon Sep 17 00:00:00 2001 From: Shahar Shitrit Date: Sun, 24 Aug 2025 11:43:50 +0300 Subject: devlink: Move graceful period parameter to reporter ops Move the default graceful period from a parameter to devlink_health_reporter_create() to a field in the devlink_health_reporter_ops structure. This change improves consistency, as the graceful period is inherently tied to the reporter's behavior and recovery policy. It simplifies the signature of devlink_health_reporter_create() and its internal helper functions. It also centralizes the reporter configuration at the ops structure, preparing the groundwork for a downstream patch that will introduce a devlink health reporter burst period attribute whose default value will similarly be provided by the driver via the ops structure. Signed-off-by: Shahar Shitrit Reviewed-by: Jiri Pirko Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250824084354.533182-2-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index 3119d053bc4d..c7ad7a981b39 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -746,6 +746,8 @@ enum devlink_health_reporter_state { * if priv_ctx is NULL, run a full dump * @diagnose: callback to diagnose the current status * @test: callback to trigger a test event + * @default_graceful_period: default min time (in msec) + * between recovery attempts */ struct devlink_health_reporter_ops { @@ -760,6 +762,7 @@ struct devlink_health_reporter_ops { struct netlink_ext_ack *extack); int (*test)(struct devlink_health_reporter *reporter, struct netlink_ext_ack *extack); + u64 default_graceful_period; }; /** @@ -1928,22 +1931,22 @@ void devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name, struct devlink_health_reporter * devl_port_health_reporter_create(struct devlink_port *port, const struct devlink_health_reporter_ops *ops, - u64 graceful_period, void *priv); + void *priv); struct devlink_health_reporter * devlink_port_health_reporter_create(struct devlink_port *port, const struct devlink_health_reporter_ops *ops, - u64 graceful_period, void *priv); + void *priv); struct devlink_health_reporter * devl_health_reporter_create(struct devlink *devlink, const struct devlink_health_reporter_ops *ops, - u64 graceful_period, void *priv); + void *priv); struct devlink_health_reporter * devlink_health_reporter_create(struct devlink *devlink, const struct devlink_health_reporter_ops *ops, - u64 graceful_period, void *priv); + void *priv); void devl_health_reporter_destroy(struct devlink_health_reporter *reporter); -- cgit v1.2.3 From 6a06d8c40510ba1ecf27977f528b1eb74f290a60 Mon Sep 17 00:00:00 2001 From: Shahar Shitrit Date: Sun, 24 Aug 2025 11:43:52 +0300 Subject: devlink: Introduce burst period for health reporter Currently, the devlink health reporter starts the grace period immediately after handling an error, blocking any further recoveries until it finished. However, when a single root cause triggers multiple errors in a short time frame, it is desirable to treat them as a bulk of errors and to allow their recoveries, avoiding premature blocking of subsequent related errors, and reducing the risk of inconsistent or incomplete error handling. To address this, introduce a configurable burst period for devlink health reporter. Start this period when the first error is handled, and allow recovery attempts for reported errors during this window. Once burst period expires, begin the grace period to block further recoveries until it concludes. Timeline summary: ----|--------|------------------------------/----------------------/-- error is error is burst period grace period reported recovered (recoveries allowed) (recoveries blocked) For calculating the burst period duration, use the same last_recovery_ts as the grace period. Update it on recovery only when the burst period is inactive (either disabled or at the first error). This patch implements the framework for the burst period and effectively sets its value to 0 at reporter creation, so the current behavior remains unchanged, which ensures backward compatibility. A downstream patch will make the burst period configurable. Signed-off-by: Shahar Shitrit Reviewed-by: Jiri Pirko Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250824084354.533182-4-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index c7ad7a981b39..5f44e702c25c 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -748,6 +748,8 @@ enum devlink_health_reporter_state { * @test: callback to trigger a test event * @default_graceful_period: default min time (in msec) * between recovery attempts + * @default_burst_period: default time (in msec) for + * error recoveries before starting the grace period */ struct devlink_health_reporter_ops { @@ -763,6 +765,7 @@ struct devlink_health_reporter_ops { int (*test)(struct devlink_health_reporter *reporter, struct netlink_ext_ack *extack); u64 default_graceful_period; + u64 default_burst_period; }; /** -- cgit v1.2.3 From da0e2197645c8e01bb6080c7a2b86d9a56cc64a9 Mon Sep 17 00:00:00 2001 From: Shahar Shitrit Date: Sun, 24 Aug 2025 11:43:53 +0300 Subject: devlink: Make health reporter burst period configurable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable configuration of the burst period — a time window starting from the first error recovery, during which the reporter allows recovery attempts for each reported error. This feature is helpful when a single underlying issue causes multiple errors, as it delays the start of the grace period to allow sufficient time for recovering all related errors. For example, if multiple TX queues time out simultaneously, a sufficient burst period could allow all affected TX queues to be recovered within that window. Without this period, only the first TX queue that reports a timeout will undergo recovery, while the remaining TX queues will be blocked once the grace period begins. Configuration example: $ devlink health set pci/0000:00:09.0 reporter tx burst_period 500 Configuration example with ynl: ./tools/net/ynl/pyynl/cli.py \ --spec Documentation/netlink/specs/devlink.yaml \ --do health-reporter-set --json '{ "bus-name": "auxiliary", "dev-name": "mlx5_core.eth.0", "port-index": 65535, "health-reporter-name": "tx", "health-reporter-burst-period": 500 }' Signed-off-by: Shahar Shitrit Reviewed-by: Jiri Pirko Reviewed-by: Dragos Tatulea Reviewed-by: Carolina Jubran Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250824084354.533182-5-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/devlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 9fcb25a0f447..bcad11a787a5 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -636,6 +636,8 @@ enum devlink_attr { DEVLINK_ATTR_RATE_TC_BWS, /* nested */ + DEVLINK_ATTR_HEALTH_REPORTER_BURST_PERIOD, /* u64 */ + /* Add new attributes above here, update the spec in * Documentation/netlink/specs/devlink.yaml and re-generate * net/devlink/netlink_gen.c. -- cgit v1.2.3 From 1bec9d0c0046fe4e2bfb6a1c5aadcb5d56cdb0fb Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 25 Aug 2025 15:37:43 +0200 Subject: ipv4: Convert ->flowi4_tos to dscp_t. Convert the ->flowic_tos field of struct flowi_common from __u8 to dscp_t, rename it ->flowic_dscp and propagate these changes to struct flowi and struct flowi4. We've had several bugs in the past where ECN bits could interfere with IPv4 routing, because these bits were not properly cleared when setting ->flowi4_tos. These bugs should be fixed now and the dscp_t type has been introduced to ensure that variables carrying DSCP values don't accidentally have any ECN bits set. Several variables and structure fields have been converted to dscp_t already, but the main IPv4 routing structure, struct flowi4, is still using a __u8. To avoid any future regression, this patch converts it to dscp_t. There are many users to convert at once. Fortunately, around half of ->flowi4_tos users already have a dscp_t value at hand, which they currently convert to __u8 using inet_dscp_to_dsfield(). For all of these users, we just need to drop that conversion. But, although we try to do the __u8 <-> dscp_t conversions at the boundaries of the network or of user space, some places still store TOS/DSCP variables as __u8 in core networking code. Those can hardly be converted either because the data structure is part of UAPI or because the same variable or field is also used for handling ECN in other parts of the code. In all of these cases where we don't have a dscp_t variable at hand, we need to use inet_dsfield_to_dscp() when interacting with ->flowi4_dscp. Changes since v1: * Fix space alignment in __bpf_redirect_neigh_v4() (Ido). Signed-off-by: Guillaume Nault Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/29acecb45e911d17446b9a3dbdb1ab7b821ea371.1756128932.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- include/net/flow.h | 11 ++++++----- include/net/inet_dscp.h | 6 ++++++ include/net/ip_fib.h | 2 +- include/net/ip_tunnels.h | 4 +++- include/net/route.h | 2 +- include/trace/events/fib.h | 4 +++- 6 files changed, 20 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/flow.h b/include/net/flow.h index a1839c278d87..ae9481c40063 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -12,6 +12,7 @@ #include #include #include +#include struct flow_keys; @@ -32,7 +33,7 @@ struct flowi_common { int flowic_iif; int flowic_l3mdev; __u32 flowic_mark; - __u8 flowic_tos; + dscp_t flowic_dscp; __u8 flowic_scope; __u8 flowic_proto; __u8 flowic_flags; @@ -70,7 +71,7 @@ struct flowi4 { #define flowi4_iif __fl_common.flowic_iif #define flowi4_l3mdev __fl_common.flowic_l3mdev #define flowi4_mark __fl_common.flowic_mark -#define flowi4_tos __fl_common.flowic_tos +#define flowi4_dscp __fl_common.flowic_dscp #define flowi4_scope __fl_common.flowic_scope #define flowi4_proto __fl_common.flowic_proto #define flowi4_flags __fl_common.flowic_flags @@ -103,7 +104,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, fl4->flowi4_iif = LOOPBACK_IFINDEX; fl4->flowi4_l3mdev = 0; fl4->flowi4_mark = mark; - fl4->flowi4_tos = tos; + fl4->flowi4_dscp = inet_dsfield_to_dscp(tos); fl4->flowi4_scope = scope; fl4->flowi4_proto = proto; fl4->flowi4_flags = flags; @@ -141,7 +142,7 @@ struct flowi6 { #define flowi6_uid __fl_common.flowic_uid struct in6_addr daddr; struct in6_addr saddr; - /* Note: flowi6_tos is encoded in flowlabel, too. */ + /* Note: flowi6_dscp is encoded in flowlabel, too. */ __be32 flowlabel; union flowi_uli uli; #define fl6_sport uli.ports.sport @@ -163,7 +164,7 @@ struct flowi { #define flowi_iif u.__fl_common.flowic_iif #define flowi_l3mdev u.__fl_common.flowic_l3mdev #define flowi_mark u.__fl_common.flowic_mark -#define flowi_tos u.__fl_common.flowic_tos +#define flowi_dscp u.__fl_common.flowic_dscp #define flowi_scope u.__fl_common.flowic_scope #define flowi_proto u.__fl_common.flowic_proto #define flowi_flags u.__fl_common.flowic_flags diff --git a/include/net/inet_dscp.h b/include/net/inet_dscp.h index 72f250dffada..1aa9f04ed1ab 100644 --- a/include/net/inet_dscp.h +++ b/include/net/inet_dscp.h @@ -39,6 +39,12 @@ typedef u8 __bitwise dscp_t; #define INET_DSCP_MASK 0xfc +/* A few places in the IPv4 code need to ignore the three high order bits of + * DSCP because of backward compatibility (as these bits used to represent the + * IPv4 Precedence in RFC 791's TOS field and were ignored). + */ +#define INET_DSCP_LEGACY_TOS_MASK ((__force dscp_t)0x1c) + static inline dscp_t inet_dsfield_to_dscp(__u8 dsfield) { return (__force dscp_t)(dsfield & INET_DSCP_MASK); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 48bb3cf41469..b4495c38e0a0 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -440,7 +440,7 @@ static inline bool fib4_rules_early_flow_dissect(struct net *net, static inline bool fib_dscp_masked_match(dscp_t dscp, const struct flowi4 *fl4) { - return dscp == inet_dsfield_to_dscp(RT_TOS(fl4->flowi4_tos)); + return dscp == (fl4->flowi4_dscp & INET_DSCP_LEGACY_TOS_MASK); } /* Exported by fib_frontend.c */ diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 8cf1380f3656..4314a97702ea 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -11,7 +11,9 @@ #include #include +#include #include +#include #include #include #include @@ -362,7 +364,7 @@ static inline void ip_tunnel_init_flow(struct flowi4 *fl4, fl4->daddr = daddr; fl4->saddr = saddr; - fl4->flowi4_tos = tos; + fl4->flowi4_dscp = inet_dsfield_to_dscp(tos); fl4->flowi4_proto = proto; fl4->fl4_gre_key = key; fl4->flowi4_mark = mark; diff --git a/include/net/route.h b/include/net/route.h index 7ea840daa775..c71998f464f8 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -189,7 +189,7 @@ static inline struct rtable *ip_route_output(struct net *net, __be32 daddr, { struct flowi4 fl4 = { .flowi4_oif = oif, - .flowi4_tos = inet_dscp_to_dsfield(dscp), + .flowi4_dscp = dscp, .flowi4_scope = scope, .daddr = daddr, .saddr = saddr, diff --git a/include/trace/events/fib.h b/include/trace/events/fib.h index 20b914250ce9..feb28b359eff 100644 --- a/include/trace/events/fib.h +++ b/include/trace/events/fib.h @@ -7,6 +7,8 @@ #include #include +#include +#include #include #include @@ -44,7 +46,7 @@ TRACE_EVENT(fib_table_lookup, __entry->err = err; __entry->oif = flp->flowi4_oif; __entry->iif = flp->flowi4_iif; - __entry->tos = flp->flowi4_tos; + __entry->tos = inet_dscp_to_dsfield(flp->flowi4_dscp); __entry->scope = flp->flowi4_scope; __entry->flags = flp->flowi4_flags; -- cgit v1.2.3 From 095928e7d80186c524013a5b5d54889fa2ec1eaa Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 23 Aug 2025 21:36:43 -0400 Subject: ipv6: sr: Use HMAC-SHA1 and HMAC-SHA256 library functions Use the HMAC-SHA1 and HMAC-SHA256 library functions instead of crypto_shash. This is simpler and faster. Pre-allocating per-CPU hash transformation objects and descriptors is no longer needed, and a microbenchmark on x86_64 shows seg6_hmac_compute() (with HMAC-SHA256) dropping from ~2494 cycles to ~1978 cycles, a 20% improvement. Signed-off-by: Eric Biggers Link: https://patch.msgid.link/20250824013644.71928-2-ebiggers@kernel.org Signed-off-by: Jakub Kicinski --- include/net/seg6_hmac.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include') diff --git a/include/net/seg6_hmac.h b/include/net/seg6_hmac.h index 24f733b3e3fe..3fe4123dbbf0 100644 --- a/include/net/seg6_hmac.h +++ b/include/net/seg6_hmac.h @@ -19,7 +19,6 @@ #include #include -#define SEG6_HMAC_MAX_DIGESTSIZE 160 #define SEG6_HMAC_RING_SIZE 256 struct seg6_hmac_info { @@ -32,13 +31,6 @@ struct seg6_hmac_info { u8 alg_id; }; -struct seg6_hmac_algo { - u8 alg_id; - char name[64]; - struct crypto_shash * __percpu *tfms; - struct shash_desc * __percpu *shashs; -}; - extern int seg6_hmac_compute(struct seg6_hmac_info *hinfo, struct ipv6_sr_hdr *hdr, struct in6_addr *saddr, u8 *output); @@ -50,13 +42,9 @@ extern int seg6_push_hmac(struct net *net, struct in6_addr *saddr, struct ipv6_sr_hdr *srh); extern bool seg6_hmac_validate_skb(struct sk_buff *skb); #ifdef CONFIG_IPV6_SEG6_HMAC -extern int seg6_hmac_init(void); -extern void seg6_hmac_exit(void); extern int seg6_hmac_net_init(struct net *net); extern void seg6_hmac_net_exit(struct net *net); #else -static inline int seg6_hmac_init(void) { return 0; } -static inline void seg6_hmac_exit(void) {} static inline int seg6_hmac_net_init(struct net *net) { return 0; } static inline void seg6_hmac_net_exit(struct net *net) {} #endif -- cgit v1.2.3 From fe60065689048edf4df99fffdb180a2166f9a54d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 23 Aug 2025 21:36:44 -0400 Subject: ipv6: sr: Prepare HMAC key ahead of time Prepare the HMAC key when it is added to the kernel, instead of preparing it implicitly for every packet. This significantly improves the performance of seg6_hmac_compute(). A microbenchmark on x86_64 shows seg6_hmac_compute() (with HMAC-SHA256) dropping from ~1978 cycles to ~1419 cycles, a 28% improvement. The size of 'struct seg6_hmac_info' increases by 128 bytes, but that should be fine, since there should not be a massive number of keys. Signed-off-by: Eric Biggers Link: https://patch.msgid.link/20250824013644.71928-3-ebiggers@kernel.org Signed-off-by: Jakub Kicinski --- include/net/seg6_hmac.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/net/seg6_hmac.h b/include/net/seg6_hmac.h index 3fe4123dbbf0..e9f41725933e 100644 --- a/include/net/seg6_hmac.h +++ b/include/net/seg6_hmac.h @@ -9,6 +9,8 @@ #ifndef _NET_SEG6_HMAC_H #define _NET_SEG6_HMAC_H +#include +#include #include #include #include @@ -26,9 +28,15 @@ struct seg6_hmac_info { struct rcu_head rcu; u32 hmackeyid; + /* The raw key, kept only so it can be returned back to userspace */ char secret[SEG6_HMAC_SECRET_LEN]; u8 slen; u8 alg_id; + /* The prepared key, which the calculations actually use */ + union { + struct hmac_sha1_key sha1; + struct hmac_sha256_key sha256; + } key; }; extern int seg6_hmac_compute(struct seg6_hmac_info *hinfo, -- cgit v1.2.3 From 4b59300ba4d2362b02c2a9077047bbabceea67d7 Mon Sep 17 00:00:00 2001 From: Marie Zhussupova Date: Tue, 26 Aug 2025 17:13:31 +0800 Subject: kunit: Add parent kunit for parameterized test context Currently, KUnit parameterized tests lack a mechanism to share resources across parameter runs because the same `struct kunit` instance is cleaned up and reused for each run. This patch introduces parameterized test context, enabling test users to share resources between parameter runs. It also allows setting up resources that need to be available for all parameter runs only once, which is helpful in cases where setup is expensive. To establish a parameterized test context, this patch adds a parent pointer field to `struct kunit`. This allows resources added to the parent `struct kunit` to be shared and accessible across all parameter runs. In kunit_run_tests(), the default `struct kunit` created is now designated to act as the parameterized test context whenever a test is parameterized. Subsequently, a new `struct kunit` is made for each parameter run, and its parent pointer is set to the `struct kunit` that holds the parameterized test context. Link: https://lore.kernel.org/r/20250826091341.1427123-2-davidgow@google.com Reviewed-by: David Gow Reviewed-by: Rae Moar Signed-off-by: Marie Zhussupova Signed-off-by: David Gow Signed-off-by: Shuah Khan --- include/kunit/test.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/kunit/test.h b/include/kunit/test.h index d958ee53050e..9766403afd56 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -268,14 +268,18 @@ struct kunit_suite_set { * * @priv: for user to store arbitrary data. Commonly used to pass data * created in the init function (see &struct kunit_suite). + * @parent: reference to the parent context of type struct kunit that can + * be used for storing shared resources. * * Used to store information about the current context under which the test * is running. Most of this data is private and should only be accessed - * indirectly via public functions; the one exception is @priv which can be - * used by the test writer to store arbitrary data. + * indirectly via public functions; the two exceptions are @priv and @parent + * which can be used by the test writer to store arbitrary data and access the + * parent context, respectively. */ struct kunit { void *priv; + struct kunit *parent; /* private: internal use only. */ const char *name; /* Read only after initialization! */ -- cgit v1.2.3 From 241423580e5e8d8b10b14b382379f4928b87be17 Mon Sep 17 00:00:00 2001 From: Marie Zhussupova Date: Tue, 26 Aug 2025 17:13:32 +0800 Subject: kunit: Introduce param_init/exit for parameterized test context management Add (*param_init) and (*param_exit) function pointers to `struct kunit_case`. Users will be able to set them via the new KUNIT_CASE_PARAM_WITH_INIT() macro. param_init/exit will be invoked by kunit_run_tests() once before and once after the parameterized test, respectively. They will receive the `struct kunit` that holds the parameterized test context; facilitating init and exit for shared state. This patch also sets param_init/exit to None in rust/kernel/kunit.rs. Link: https://lore.kernel.org/r/20250826091341.1427123-3-davidgow@google.com Reviewed-by: Rae Moar Reviewed-by: David Gow Signed-off-by: Marie Zhussupova Signed-off-by: David Gow Signed-off-by: Shuah Khan --- include/kunit/test.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include') diff --git a/include/kunit/test.h b/include/kunit/test.h index 9766403afd56..fc8fd55b2dfb 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -92,6 +92,8 @@ struct kunit_attributes { * @name: the name of the test case. * @generate_params: the generator function for parameterized tests. * @attr: the attributes associated with the test + * @param_init: The init function to run before a parameterized test. + * @param_exit: The exit function to run after a parameterized test. * * A test case is a function with the signature, * ``void (*)(struct kunit *)`` @@ -128,6 +130,8 @@ struct kunit_case { const char *name; const void* (*generate_params)(const void *prev, char *desc); struct kunit_attributes attr; + int (*param_init)(struct kunit *test); + void (*param_exit)(struct kunit *test); /* private: internal use only. */ enum kunit_status status; @@ -218,6 +222,27 @@ static inline char *kunit_status_to_ok_not_ok(enum kunit_status status) .generate_params = gen_params, \ .attr = attributes, .module_name = KBUILD_MODNAME} +/** + * KUNIT_CASE_PARAM_WITH_INIT - Define a parameterized KUnit test case with custom + * param_init() and param_exit() functions. + * @test_name: The function implementing the test case. + * @gen_params: The function to generate parameters for the test case. + * @init: A reference to the param_init() function to run before a parameterized test. + * @exit: A reference to the param_exit() function to run after a parameterized test. + * + * Provides the option to register param_init() and param_exit() functions. + * param_init/exit will be passed the parameterized test context and run once + * before and once after the parameterized test. The init function can be used + * to add resources to share between parameter runs, and any other setup logic. + * The exit function can be used to clean up resources that were not managed by + * the parameterized test, and any other teardown logic. + */ +#define KUNIT_CASE_PARAM_WITH_INIT(test_name, gen_params, init, exit) \ + { .run_case = test_name, .name = #test_name, \ + .generate_params = gen_params, \ + .param_init = init, .param_exit = exit, \ + .module_name = KBUILD_MODNAME} + /** * struct kunit_suite - describes a related collection of &struct kunit_case * -- cgit v1.2.3 From b9a214b5f6aa55870b5678f31084f85c0c11ffdc Mon Sep 17 00:00:00 2001 From: Marie Zhussupova Date: Tue, 26 Aug 2025 17:13:33 +0800 Subject: kunit: Pass parameterized test context to generate_params() To enable more complex parameterized testing scenarios, the generate_params() function needs additional context beyond just the previously generated parameter. This patch modifies the generate_params() function signature to include an extra `struct kunit *test` argument, giving test users access to the parameterized test context when generating parameters. The `struct kunit *test` argument was added as the first parameter to the function signature as it aligns with the convention of other KUnit functions that accept `struct kunit *test` first. This also mirrors the "this" or "self" reference found in object-oriented programming languages. This patch also modifies xe_pci_live_device_gen_param() in xe_pci.c and nthreads_gen_params() in kcsan_test.c to reflect this signature change. Link: https://lore.kernel.org/r/20250826091341.1427123-4-davidgow@google.com Reviewed-by: David Gow Reviewed-by: Rae Moar Acked-by: Marco Elver Acked-by: Rodrigo Vivi Signed-off-by: Marie Zhussupova [Catch some additional gen_params signatures in drm/xe/tests --David] Signed-off-by: David Gow Signed-off-by: Shuah Khan --- include/kunit/test.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/kunit/test.h b/include/kunit/test.h index fc8fd55b2dfb..8eba1b03c3e3 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -128,7 +128,8 @@ struct kunit_attributes { struct kunit_case { void (*run_case)(struct kunit *test); const char *name; - const void* (*generate_params)(const void *prev, char *desc); + const void* (*generate_params)(struct kunit *test, + const void *prev, char *desc); struct kunit_attributes attr; int (*param_init)(struct kunit *test); void (*param_exit)(struct kunit *test); @@ -1703,7 +1704,8 @@ do { \ * Define function @name_gen_params which uses @array to generate parameters. */ #define KUNIT_ARRAY_PARAM(name, array, get_desc) \ - static const void *name##_gen_params(const void *prev, char *desc) \ + static const void *name##_gen_params(struct kunit *test, \ + const void *prev, char *desc) \ { \ typeof((array)[0]) *__next = prev ? ((typeof(__next)) prev) + 1 : (array); \ if (__next - (array) < ARRAY_SIZE((array))) { \ @@ -1724,7 +1726,8 @@ do { \ * Define function @name_gen_params which uses @array to generate parameters. */ #define KUNIT_ARRAY_PARAM_DESC(name, array, desc_member) \ - static const void *name##_gen_params(const void *prev, char *desc) \ + static const void *name##_gen_params(struct kunit *test, \ + const void *prev, char *desc) \ { \ typeof((array)[0]) *__next = prev ? ((typeof(__next)) prev) + 1 : (array); \ if (__next - (array) < ARRAY_SIZE((array))) { \ -- cgit v1.2.3 From b820b9077b7f4008cc44a40261aefa681c63c7d3 Mon Sep 17 00:00:00 2001 From: Marie Zhussupova Date: Tue, 26 Aug 2025 17:13:34 +0800 Subject: kunit: Enable direct registration of parameter arrays to a KUnit test KUnit parameterized tests currently support two primary methods f or getting parameters: 1. Defining custom logic within a generate_params() function. 2. Using the KUNIT_ARRAY_PARAM() and KUNIT_ARRAY_PARAM_DESC() macros with a pre-defined static array and passing the created *_gen_params() to KUNIT_CASE_PARAM(). These methods present limitations when dealing with dynamically generated parameter arrays, or in scenarios where populating parameters sequentially via generate_params() is inefficient or overly complex. This patch addresses these limitations by adding a new `params_array` field to `struct kunit`, of the type `kunit_params`. The `struct kunit_params` is designed to store the parameter array itself, along with essential metadata including the parameter count, parameter size, and a get_description() function for providing custom descriptions for individual parameters. The `params_array` field can be populated by calling the new kunit_register_params_array() macro from within a param_init() function. This will register the array as part of the parameterized test context. The user will then need to pass kunit_array_gen_params() to the KUNIT_CASE_PARAM_WITH_INIT() macro as the generator function, if not providing their own. kunit_array_gen_params() is a KUnit helper that will use the registered array to generate parameters. The arrays passed to KUNIT_ARRAY_PARAM(,DESC) will also be registered to the parameterized test context for consistency as well as for higher availability of the parameter count that will be used for outputting a KTAP test plan for a parameterized test. This modification provides greater flexibility to the KUnit framework, allowing testers to easily register and utilize both dynamic and static parameter arrays. Link: https://lore.kernel.org/r/20250826091341.1427123-5-davidgow@google.com Reviewed-by: David Gow Reviewed-by: Rae Moar Signed-off-by: Marie Zhussupova [Only output the test plan if using kunit_array_gen_params --David] Signed-off-by: David Gow Signed-off-by: Shuah Khan --- include/kunit/test.h | 65 +++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 59 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/kunit/test.h b/include/kunit/test.h index 8eba1b03c3e3..5ec5182b5e57 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -234,9 +234,13 @@ static inline char *kunit_status_to_ok_not_ok(enum kunit_status status) * Provides the option to register param_init() and param_exit() functions. * param_init/exit will be passed the parameterized test context and run once * before and once after the parameterized test. The init function can be used - * to add resources to share between parameter runs, and any other setup logic. - * The exit function can be used to clean up resources that were not managed by - * the parameterized test, and any other teardown logic. + * to add resources to share between parameter runs, pass parameter arrays, + * and any other setup logic. The exit function can be used to clean up resources + * that were not managed by the parameterized test, and any other teardown logic. + * + * Note: If you are registering a parameter array in param_init() with + * kunit_register_param_array() then you need to pass kunit_array_gen_params() + * to this as the generator function. */ #define KUNIT_CASE_PARAM_WITH_INIT(test_name, gen_params, init, exit) \ { .run_case = test_name, .name = #test_name, \ @@ -289,6 +293,20 @@ struct kunit_suite_set { struct kunit_suite * const *end; }; +/* Stores the pointer to the parameter array and its metadata. */ +struct kunit_params { + /* + * Reference to the parameter array for a parameterized test. This + * is NULL if a parameter array wasn't directly passed to the + * parameterized test context struct kunit via kunit_register_params_array(). + */ + const void *params; + /* Reference to a function that gets the description of a parameter. */ + void (*get_description)(struct kunit *test, const void *param, char *desc); + size_t num_params; + size_t elem_size; +}; + /** * struct kunit - represents a running instance of a test. * @@ -296,16 +314,18 @@ struct kunit_suite_set { * created in the init function (see &struct kunit_suite). * @parent: reference to the parent context of type struct kunit that can * be used for storing shared resources. + * @params_array: for storing the parameter array. * * Used to store information about the current context under which the test * is running. Most of this data is private and should only be accessed - * indirectly via public functions; the two exceptions are @priv and @parent - * which can be used by the test writer to store arbitrary data and access the - * parent context, respectively. + * indirectly via public functions; the exceptions are @priv, @parent and + * @params_array which can be used by the test writer to store arbitrary data, + * access the parent context, and to store the parameter array, respectively. */ struct kunit { void *priv; struct kunit *parent; + struct kunit_params params_array; /* private: internal use only. */ const char *name; /* Read only after initialization! */ @@ -376,6 +396,8 @@ void kunit_exec_list_tests(struct kunit_suite_set *suite_set, bool include_attr) struct kunit_suite_set kunit_merge_suite_sets(struct kunit_suite_set init_suite_set, struct kunit_suite_set suite_set); +const void *kunit_array_gen_params(struct kunit *test, const void *prev, char *desc); + #if IS_BUILTIN(CONFIG_KUNIT) int kunit_run_all_tests(void); #else @@ -1708,6 +1730,8 @@ do { \ const void *prev, char *desc) \ { \ typeof((array)[0]) *__next = prev ? ((typeof(__next)) prev) + 1 : (array); \ + if (!prev) \ + kunit_register_params_array(test, array, ARRAY_SIZE(array), NULL); \ if (__next - (array) < ARRAY_SIZE((array))) { \ void (*__get_desc)(typeof(__next), char *) = get_desc; \ if (__get_desc) \ @@ -1730,6 +1754,8 @@ do { \ const void *prev, char *desc) \ { \ typeof((array)[0]) *__next = prev ? ((typeof(__next)) prev) + 1 : (array); \ + if (!prev) \ + kunit_register_params_array(test, array, ARRAY_SIZE(array), NULL); \ if (__next - (array) < ARRAY_SIZE((array))) { \ strscpy(desc, __next->desc_member, KUNIT_PARAM_DESC_SIZE); \ return __next; \ @@ -1737,6 +1763,33 @@ do { \ return NULL; \ } +/** + * kunit_register_params_array() - Register parameter array for a KUnit test. + * @test: The KUnit test structure to which parameters will be added. + * @array: An array of test parameters. + * @param_count: Number of parameters. + * @get_desc: Function that generates a string description for a given parameter + * element. + * + * This macro initializes the @test's parameter array data, storing information + * including the parameter array, its count, the element size, and the parameter + * description function within `test->params_array`. + * + * Note: If using this macro in param_init(), kunit_array_gen_params() + * will then need to be manually provided as the parameter generator function to + * KUNIT_CASE_PARAM_WITH_INIT(). kunit_array_gen_params() is a KUnit + * function that uses the registered array to generate parameters + */ +#define kunit_register_params_array(test, array, param_count, get_desc) \ + do { \ + struct kunit *_test = (test); \ + const typeof((array)[0]) * _params_ptr = &(array)[0]; \ + _test->params_array.params = _params_ptr; \ + _test->params_array.num_params = (param_count); \ + _test->params_array.elem_size = sizeof(*_params_ptr); \ + _test->params_array.get_description = (get_desc); \ + } while (0) + // TODO(dlatypov@google.com): consider eventually migrating users to explicitly // include resource.h themselves if they need it. #include -- cgit v1.2.3 From 19a9a1ab5c3dce65fff4ac50700117039c23d525 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 29 Jul 2025 15:54:32 -0700 Subject: KVM: Rename CONFIG_KVM_PRIVATE_MEM to CONFIG_KVM_GUEST_MEMFD Rename the Kconfig option CONFIG_KVM_PRIVATE_MEM to CONFIG_KVM_GUEST_MEMFD. The original name implied that the feature only supported "private" memory. However, CONFIG_KVM_PRIVATE_MEM enables guest_memfd in general, which is not exclusively for private memory. Subsequent patches in this series will add guest_memfd support for non-CoCo VMs, whose memory is not private. Renaming the Kconfig option to CONFIG_KVM_GUEST_MEMFD more accurately reflects its broader scope as the main Kconfig option for all guest_memfd-backed memory. This provides clearer semantics for the option and avoids confusion as new features are introduced. Reviewed-by: Ira Weiny Reviewed-by: Gavin Shan Reviewed-by: Shivank Garg Reviewed-by: Vlastimil Babka Reviewed-by: Xiaoyao Li Co-developed-by: David Hildenbrand Signed-off-by: David Hildenbrand Signed-off-by: Fuad Tabba Signed-off-by: Sean Christopherson Message-ID: <20250729225455.670324-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 15656b7fba6c..8cdc0b3cc1b1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -602,7 +602,7 @@ struct kvm_memory_slot { short id; u16 as_id; -#ifdef CONFIG_KVM_PRIVATE_MEM +#ifdef CONFIG_KVM_GUEST_MEMFD struct { /* * Writes protected by kvm->slots_lock. Acquiring a @@ -720,10 +720,10 @@ static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu) #endif /* - * Arch code must define kvm_arch_has_private_mem if support for private memory - * is enabled. + * Arch code must define kvm_arch_has_private_mem if support for guest_memfd is + * enabled. */ -#if !defined(kvm_arch_has_private_mem) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM) +#if !defined(kvm_arch_has_private_mem) && !IS_ENABLED(CONFIG_KVM_GUEST_MEMFD) static inline bool kvm_arch_has_private_mem(struct kvm *kvm) { return false; @@ -2505,7 +2505,7 @@ bool kvm_arch_post_set_memory_attributes(struct kvm *kvm, static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) { - return IS_ENABLED(CONFIG_KVM_PRIVATE_MEM) && + return IS_ENABLED(CONFIG_KVM_GUEST_MEMFD) && kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE; } #else @@ -2515,7 +2515,7 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) } #endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */ -#ifdef CONFIG_KVM_PRIVATE_MEM +#ifdef CONFIG_KVM_GUEST_MEMFD int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, kvm_pfn_t *pfn, struct page **page, int *max_order); @@ -2528,7 +2528,7 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm, KVM_BUG_ON(1, kvm); return -EIO; } -#endif /* CONFIG_KVM_PRIVATE_MEM */ +#endif /* CONFIG_KVM_GUEST_MEMFD */ #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order); -- cgit v1.2.3 From 36cf63bb5df68836e55e2839f8174b404d47670b Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 29 Jul 2025 15:54:36 -0700 Subject: KVM: Rename CONFIG_KVM_GENERIC_PRIVATE_MEM to CONFIG_HAVE_KVM_ARCH_GMEM_POPULATE The original name was vague regarding its functionality. This Kconfig option specifically enables and gates the kvm_gmem_populate() function, which is responsible for populating a GPA range with guest data. The new name, HAVE_KVM_ARCH_GMEM_POPULATE, describes the purpose of the option: to enable arch-specific guest_memfd population mechanisms. It also follows the same pattern as the other HAVE_KVM_ARCH_* configuration options. This improves clarity for developers and ensures the name accurately reflects the functionality it controls, especially as guest_memfd support expands beyond purely "private" memory scenarios. Temporarily keep KVM_GENERIC_PRIVATE_MEM as an x86-only config so as to minimize churn, and to hopefully make it easier to see what features require HAVE_KVM_ARCH_GMEM_POPULATE. On that note, omit GMEM_POPULATE for KVM_X86_SW_PROTECTED_VM, as regular ol' memset() suffices for software-protected VMs. As for KVM_GENERIC_PRIVATE_MEM, a future change will select KVM_GUEST_MEMFD for all 64-bit KVM builds, at which point the intermediate config will become obsolete and can/will be dropped. Reviewed-by: Ira Weiny Reviewed-by: Gavin Shan Reviewed-by: Shivank Garg Reviewed-by: Vlastimil Babka Co-developed-by: David Hildenbrand Signed-off-by: David Hildenbrand Signed-off-by: Fuad Tabba Reviewed-by: Xiaoyao Li Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Message-ID: <20250729225455.670324-6-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8cdc0b3cc1b1..ddfb6cfe20a6 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2534,7 +2534,7 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm, int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order); #endif -#ifdef CONFIG_KVM_GENERIC_PRIVATE_MEM +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_POPULATE /** * kvm_gmem_populate() - Populate/prepare a GPA range with guest data * -- cgit v1.2.3 From 923310be23b275f730e8869abc783db6296fc043 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 29 Jul 2025 15:54:37 -0700 Subject: KVM: Rename kvm_slot_can_be_private() to kvm_slot_has_gmem() Rename kvm_slot_can_be_private() to kvm_slot_has_gmem() to improve clarity and accurately reflect its purpose. The function kvm_slot_can_be_private() was previously used to check if a given kvm_memory_slot is backed by guest_memfd. However, its name implied that the memory in such a slot was exclusively "private". As guest_memfd support expands to include non-private memory (e.g., shared host mappings), it's important to remove this association. The new name, kvm_slot_has_gmem(), states that the slot is backed by guest_memfd without making assumptions about the memory's privacy attributes. Reviewed-by: Ira Weiny Reviewed-by: Gavin Shan Reviewed-by: Shivank Garg Reviewed-by: Vlastimil Babka Reviewed-by: Xiaoyao Li Co-developed-by: David Hildenbrand Signed-off-by: David Hildenbrand Signed-off-by: Fuad Tabba Signed-off-by: Sean Christopherson Message-ID: <20250729225455.670324-7-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ddfb6cfe20a6..4c5e0a898652 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -615,7 +615,7 @@ struct kvm_memory_slot { #endif }; -static inline bool kvm_slot_can_be_private(const struct kvm_memory_slot *slot) +static inline bool kvm_slot_has_gmem(const struct kvm_memory_slot *slot) { return slot && (slot->flags & KVM_MEM_GUEST_MEMFD); } -- cgit v1.2.3 From 69116e01f6fee030db45d269f28f9c300b8dc9d6 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 29 Jul 2025 15:54:38 -0700 Subject: KVM: Fix comments that refer to slots_lock Fix comments so that they refer to slots_lock instead of slots_locks (remove trailing s). Reviewed-by: David Hildenbrand Reviewed-by: Ira Weiny Reviewed-by: Gavin Shan Reviewed-by: Shivank Garg Reviewed-by: Vlastimil Babka Reviewed-by: Xiaoyao Li Signed-off-by: Fuad Tabba Signed-off-by: Sean Christopherson Message-ID: <20250729225455.670324-8-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4c5e0a898652..5c25b03d3d50 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -860,7 +860,7 @@ struct kvm { struct notifier_block pm_notifier; #endif #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES - /* Protected by slots_locks (for writes) and RCU (for reads) */ + /* Protected by slots_lock (for writes) and RCU (for reads) */ struct xarray mem_attr_array; #endif char stats_id[KVM_STATS_NAME_SIZE]; -- cgit v1.2.3 From 68d189938709a5918d7308eb922c30bcbf16ebb9 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 29 Jul 2025 15:54:39 -0700 Subject: KVM: Fix comment that refers to kvm uapi header path The comment that points to the path where the user-visible memslot flags are refers to an outdated path and has a typo. Update the comment to refer to the correct path. Reviewed-by: David Hildenbrand Reviewed-by: Gavin Shan Reviewed-by: Shivank Garg Reviewed-by: Vlastimil Babka Reviewed-by: Xiaoyao Li Signed-off-by: Fuad Tabba Signed-off-by: Sean Christopherson Message-ID: <20250729225455.670324-9-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5c25b03d3d50..56ea8c862cfd 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -52,7 +52,7 @@ /* * The bit 16 ~ bit 31 of kvm_userspace_memory_region::flags are internally * used in kvm, other bits are visible for userspace which are defined in - * include/linux/kvm_h. + * include/uapi/linux/kvm.h. */ #define KVM_MEMSLOT_INVALID (1UL << 16) -- cgit v1.2.3 From d1e54dd08f163a9021433020d16a8f8f70ddc41c Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 29 Jul 2025 15:54:40 -0700 Subject: KVM: x86: Enable KVM_GUEST_MEMFD for all 64-bit builds Enable KVM_GUEST_MEMFD for all KVM x86 64-bit builds, i.e. for "default" VM types when running on 64-bit KVM. This will allow using guest_memfd to back non-private memory for all VM shapes, by supporting mmap() on guest_memfd. Opportunistically clean up various conditionals that become tautologies once x86 selects KVM_GUEST_MEMFD more broadly. Specifically, because SW protected VMs, SEV, and TDX are all 64-bit only, private memory no longer needs to take explicit dependencies on KVM_GUEST_MEMFD, because it is effectively a prerequisite. Suggested-by: Sean Christopherson Signed-off-by: Fuad Tabba Reviewed-by: Xiaoyao Li Reviewed-by: David Hildenbrand Signed-off-by: Sean Christopherson Message-ID: <20250729225455.670324-10-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 56ea8c862cfd..4d1c44622056 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -719,11 +719,7 @@ static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu) } #endif -/* - * Arch code must define kvm_arch_has_private_mem if support for guest_memfd is - * enabled. - */ -#if !defined(kvm_arch_has_private_mem) && !IS_ENABLED(CONFIG_KVM_GUEST_MEMFD) +#ifndef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES static inline bool kvm_arch_has_private_mem(struct kvm *kvm) { return false; @@ -2505,8 +2501,7 @@ bool kvm_arch_post_set_memory_attributes(struct kvm *kvm, static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) { - return IS_ENABLED(CONFIG_KVM_GUEST_MEMFD) && - kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE; + return kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE; } #else static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) -- cgit v1.2.3 From a12578e1477cbfb547256ed8dee6d5142a59cdcd Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 29 Jul 2025 15:54:41 -0700 Subject: KVM: guest_memfd: Add plumbing to host to map guest_memfd pages Introduce the core infrastructure to enable host userspace to mmap() guest_memfd-backed memory. This is needed for several evolving KVM use cases: * Non-CoCo VM backing: Allows VMMs like Firecracker to run guests entirely backed by guest_memfd, even for non-CoCo VMs [1]. This provides a unified memory management model and simplifies guest memory handling. * Direct map removal for enhanced security: This is an important step for direct map removal of guest memory [2]. By allowing host userspace to fault in guest_memfd pages directly, we can avoid maintaining host kernel direct maps of guest memory. This provides additional hardening against Spectre-like transient execution attacks by removing a potential attack surface within the kernel. * Future guest_memfd features: This also lays the groundwork for future enhancements to guest_memfd, such as supporting huge pages and enabling in-place sharing of guest memory with the host for CoCo platforms that permit it [3]. Enable the basic mmap and fault handling logic within guest_memfd, but hold off on allow userspace to actually do mmap() until the architecture support is also in place. [1] https://github.com/firecracker-microvm/firecracker/tree/feature/secret-hiding [2] https://lore.kernel.org/linux-mm/cc1bb8e9bc3e1ab637700a4d3defeec95b55060a.camel@amazon.com [3] https://lore.kernel.org/all/c1c9591d-218a-495c-957b-ba356c8f8e09@redhat.com/T/#u Reviewed-by: Gavin Shan Reviewed-by: Shivank Garg Acked-by: David Hildenbrand Co-developed-by: Ackerley Tng Signed-off-by: Ackerley Tng Signed-off-by: Fuad Tabba Reviewed-by: Xiaoyao Li Signed-off-by: Sean Christopherson Message-ID: <20250729225455.670324-11-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4d1c44622056..26bad600f9fa 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -726,6 +726,10 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm) } #endif +#ifdef CONFIG_KVM_GUEST_MEMFD +bool kvm_arch_supports_gmem_mmap(struct kvm *kvm); +#endif + #ifndef kvm_arch_has_readonly_mem static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm) { -- cgit v1.2.3 From 576d035e2aef52f8d8d3ce29af556d4c6bd2e0fe Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 29 Jul 2025 15:54:42 -0700 Subject: KVM: guest_memfd: Track guest_memfd mmap support in memslot Add a new internal flag, KVM_MEMSLOT_GMEM_ONLY, to the top half of memslot->flags (which makes it strictly for KVM's internal use). This flag tracks when a guest_memfd-backed memory slot supports host userspace mmap operations, which implies that all memory, not just private memory for CoCo VMs, is consumed through guest_memfd: "gmem only". This optimization avoids repeatedly checking the underlying guest_memfd file for mmap support, which would otherwise require taking and releasing a reference on the file for each check. By caching this information directly in the memslot, we reduce overhead and simplify the logic involved in handling guest_memfd-backed pages for host mappings. Reviewed-by: Gavin Shan Reviewed-by: Shivank Garg Reviewed-by: Xiaoyao Li Acked-by: David Hildenbrand Suggested-by: David Hildenbrand Signed-off-by: Fuad Tabba Signed-off-by: Sean Christopherson Message-ID: <20250729225455.670324-12-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 26bad600f9fa..8b47891adca1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -54,7 +54,8 @@ * used in kvm, other bits are visible for userspace which are defined in * include/uapi/linux/kvm.h. */ -#define KVM_MEMSLOT_INVALID (1UL << 16) +#define KVM_MEMSLOT_INVALID (1UL << 16) +#define KVM_MEMSLOT_GMEM_ONLY (1UL << 17) /* * Bit 63 of the memslot generation number is an "update in-progress flag", @@ -2490,6 +2491,14 @@ static inline void kvm_prepare_memory_fault_exit(struct kvm_vcpu *vcpu, vcpu->run->memory_fault.flags |= KVM_MEMORY_EXIT_FLAG_PRIVATE; } +static inline bool kvm_memslot_is_gmem_only(const struct kvm_memory_slot *slot) +{ + if (!IS_ENABLED(CONFIG_KVM_GUEST_MEMFD)) + return false; + + return slot->flags & KVM_MEMSLOT_GMEM_ONLY; +} + #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES static inline unsigned long kvm_get_memory_attributes(struct kvm *kvm, gfn_t gfn) { -- cgit v1.2.3 From 3d3a04fad25a6621828518a2abe536142d2c1a7d Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 29 Jul 2025 15:54:52 -0700 Subject: KVM: Allow and advertise support for host mmap() on guest_memfd files Now that all the x86 and arm64 plumbing for mmap() on guest_memfd is in place, allow userspace to set GUEST_MEMFD_FLAG_MMAP and advertise support via a new capability, KVM_CAP_GUEST_MEMFD_MMAP. The availability of this capability is determined per architecture, and its enablement for a specific guest_memfd instance is controlled by the GUEST_MEMFD_FLAG_MMAP flag at creation time. Update the KVM API documentation to detail the KVM_CAP_GUEST_MEMFD_MMAP capability, the associated GUEST_MEMFD_FLAG_MMAP, and provide essential information regarding support for mmap in guest_memfd. Reviewed-by: David Hildenbrand Reviewed-by: Gavin Shan Reviewed-by: Shivank Garg Reviewed-by: Xiaoyao Li Signed-off-by: Fuad Tabba Signed-off-by: Sean Christopherson Message-ID: <20250729225455.670324-22-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index f0f0d49d2544..6efa98a57ec1 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -962,6 +962,7 @@ struct kvm_enable_cap { #define KVM_CAP_ARM_EL2_E2H0 241 #define KVM_CAP_RISCV_MP_STATE_RESET 242 #define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243 +#define KVM_CAP_GUEST_MEMFD_MMAP 244 struct kvm_irq_routing_irqchip { __u32 irqchip; @@ -1598,6 +1599,7 @@ struct kvm_memory_attributes { #define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3) #define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd) +#define GUEST_MEMFD_FLAG_MMAP (1ULL << 0) struct kvm_create_guest_memfd { __u64 size; -- cgit v1.2.3 From 7a37f55af7af868119b4fb69285f5fa03ba8cf35 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Aug 2025 17:10:56 +0200 Subject: fuse: add COPY_FILE_RANGE_64 that allows large copies The FUSE protocol uses struct fuse_write_out to convey the return value of copy_file_range, which is restricted to uint32_t. But the COPY_FILE_RANGE interface supports a 64-bit size copies and there's no reason why copies should be limited to 32-bit. Introduce a new op COPY_FILE_RANGE_64, which is identical, except the number of bytes copied is returned in a 64-bit value. If the fuse server does not support COPY_FILE_RANGE_64, fall back to COPY_FILE_RANGE. Reported-by: Florian Weimer Closes: https://lore.kernel.org/all/lhuh5ynl8z5.fsf@oldenburg.str.redhat.com/ Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 122d6586e8d4..94621f68a5cc 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -235,6 +235,10 @@ * * 7.44 * - add FUSE_NOTIFY_INC_EPOCH + * + * 7.45 + * - add FUSE_COPY_FILE_RANGE_64 + * - add struct fuse_copy_file_range_out */ #ifndef _LINUX_FUSE_H @@ -270,7 +274,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 44 +#define FUSE_KERNEL_MINOR_VERSION 45 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -657,6 +661,7 @@ enum fuse_opcode { FUSE_SYNCFS = 50, FUSE_TMPFILE = 51, FUSE_STATX = 52, + FUSE_COPY_FILE_RANGE_64 = 53, /* CUSE specific operations */ CUSE_INIT = 4096, @@ -1148,6 +1153,11 @@ struct fuse_copy_file_range_in { uint64_t flags; }; +/* For FUSE_COPY_FILE_RANGE_64 */ +struct fuse_copy_file_range_out { + uint64_t bytes_copied; +}; + #define FUSE_SETUPMAPPING_FLAG_WRITE (1ull << 0) #define FUSE_SETUPMAPPING_FLAG_READ (1ull << 1) struct fuse_setupmapping_in { -- cgit v1.2.3 From dd6a5a71c811289eec234e78cb9ca34d055d2ad5 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 10 Jun 2025 13:52:28 +0900 Subject: sched/wait: Add wait_event_state_exclusive() Allows exclusive waits with a custom @state. Signed-off-by: Sergey Senozhatsky Acked-by: Peter Zijlstra (Intel) Signed-off-by: Miklos Szeredi --- include/linux/wait.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/wait.h b/include/linux/wait.h index 09855d819418..f648044466d5 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -965,6 +965,18 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *); __ret; \ }) +#define __wait_event_state_exclusive(wq, condition, state) \ + ___wait_event(wq, condition, state, 1, 0, schedule()) + +#define wait_event_state_exclusive(wq, condition, state) \ +({ \ + int __ret = 0; \ + might_sleep(); \ + if (!(condition)) \ + __ret = __wait_event_state_exclusive(wq, condition, state); \ + __ret; \ +}) + #define __wait_event_killable_timeout(wq_head, condition, timeout) \ ___wait_event(wq_head, ___wait_cond_timeout(condition), \ TASK_KILLABLE, 0, timeout, \ -- cgit v1.2.3 From 494d2f508883a6e5c4530e5c6b3c8b2bbfb7318d Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Mon, 7 Jul 2025 16:46:05 -0700 Subject: fuse: use default writeback accounting commit 0c58a97f919c ("fuse: remove tmp folio for writebacks and internal rb tree") removed temp folios for dirty page writeback. Consequently, fuse can now use the default writeback accounting. With switching fuse to use default writeback accounting, there are some added benefits. This updates wb->writeback_inodes tracking as well now and updates writeback throughput estimates after writeback completion. This commit also removes inc_wb_stat() and dec_wb_stat(). These have no callers anymore now that fuse does not call them. Signed-off-by: Joanne Koong Reviewed-by: David Hildenbrand Reviewed-by: Bernd Schubert Signed-off-by: Miklos Szeredi --- include/linux/backing-dev.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index e721148c95d0..9a1e895dd5df 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -66,16 +66,6 @@ static inline void wb_stat_mod(struct bdi_writeback *wb, percpu_counter_add_batch(&wb->stat[item], amount, WB_STAT_BATCH); } -static inline void inc_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) -{ - wb_stat_mod(wb, item, 1); -} - -static inline void dec_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) -{ - wb_stat_mod(wb, item, -1); -} - static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) { return percpu_counter_read_positive(&wb->stat[item]); -- cgit v1.2.3 From 2841808f35eebfd07150333f3af3007cb2904a09 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Mon, 7 Jul 2025 16:46:06 -0700 Subject: mm: remove BDI_CAP_WRITEBACK_ACCT There are no users of BDI_CAP_WRITEBACK_ACCT now that fuse doesn't do its own writeback accounting. This commit removes BDI_CAP_WRITEBACK_ACCT. Signed-off-by: Joanne Koong Acked-by: David Hildenbrand Signed-off-by: Miklos Szeredi --- include/linux/backing-dev.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 9a1e895dd5df..3e64f14739dd 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -108,12 +108,10 @@ int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit * * BDI_CAP_WRITEBACK: Supports dirty page writeback, and dirty pages * should contribute to accounting - * BDI_CAP_WRITEBACK_ACCT: Automatically account writeback pages * BDI_CAP_STRICTLIMIT: Keep number of dirty pages below bdi threshold */ #define BDI_CAP_WRITEBACK (1 << 0) -#define BDI_CAP_WRITEBACK_ACCT (1 << 1) -#define BDI_CAP_STRICTLIMIT (1 << 2) +#define BDI_CAP_STRICTLIMIT (1 << 1) extern struct backing_dev_info noop_backing_dev_info; -- cgit v1.2.3 From 08383cd479f8212fafee2f557b58cfd48818bee0 Mon Sep 17 00:00:00 2001 From: Christian Bruel Date: Wed, 20 Aug 2025 09:54:02 +0200 Subject: pinctrl: Add pinctrl_pm_select_init_state helper function If a platform requires an initial pinctrl state during probing, this helper function provides the client with access to the same initial state. eg: xxx_suspend_noirq ... pinctrl_pm_select_sleep_state xxx resume_noirq pinctrl_pm_select_init_state ... pinctrl_pm_select_default_state Signed-off-by: Christian Bruel Signed-off-by: Manivannan Sadhasivam Reviewed-by: Linus Walleij Link: https://patch.msgid.link/20250820075411.1178729-3-christian.bruel@foss.st.com --- include/linux/pinctrl/consumer.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/pinctrl/consumer.h b/include/linux/pinctrl/consumer.h index 73de70362b98..63ce16191eb9 100644 --- a/include/linux/pinctrl/consumer.h +++ b/include/linux/pinctrl/consumer.h @@ -48,6 +48,7 @@ int pinctrl_select_default_state(struct device *dev); #ifdef CONFIG_PM int pinctrl_pm_select_default_state(struct device *dev); +int pinctrl_pm_select_init_state(struct device *dev); int pinctrl_pm_select_sleep_state(struct device *dev); int pinctrl_pm_select_idle_state(struct device *dev); #else @@ -55,6 +56,10 @@ static inline int pinctrl_pm_select_default_state(struct device *dev) { return 0; } +static inline int pinctrl_pm_select_init_state(struct device *dev) +{ + return 0; +} static inline int pinctrl_pm_select_sleep_state(struct device *dev) { return 0; @@ -143,6 +148,11 @@ static inline int pinctrl_pm_select_default_state(struct device *dev) return 0; } +static inline int pinctrl_pm_select_init_state(struct device *dev) +{ + return 0; +} + static inline int pinctrl_pm_select_sleep_state(struct device *dev) { return 0; -- cgit v1.2.3 From dcb34659028f856c423a29ef9b4e2571d203444d Mon Sep 17 00:00:00 2001 From: Takamitsu Iwai Date: Sat, 23 Aug 2025 17:58:55 +0900 Subject: net: rose: split remove and free operations in rose_remove_neigh() The current rose_remove_neigh() performs two distinct operations: 1. Removes rose_neigh from rose_neigh_list 2. Frees the rose_neigh structure Split these operations into separate functions to improve maintainability and prepare for upcoming refcount_t conversion. The timer cleanup remains in rose_remove_neigh() because free operations can be called from timer itself. This patch introduce rose_neigh_put() to handle the freeing of rose_neigh structures and modify rose_remove_neigh() to handle removal only. Signed-off-by: Takamitsu Iwai Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250823085857.47674-2-takamitz@amazon.co.jp Signed-off-by: Jakub Kicinski --- include/net/rose.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/net/rose.h b/include/net/rose.h index 23267b4efcfa..174b4f605d84 100644 --- a/include/net/rose.h +++ b/include/net/rose.h @@ -151,6 +151,14 @@ struct rose_sock { #define rose_sk(sk) ((struct rose_sock *)(sk)) +static inline void rose_neigh_put(struct rose_neigh *rose_neigh) +{ + if (rose_neigh->ax25) + ax25_cb_put(rose_neigh->ax25); + kfree(rose_neigh->digipeat); + kfree(rose_neigh); +} + /* af_rose.c */ extern ax25_address rose_callsign; extern int sysctl_rose_restart_request_timeout; -- cgit v1.2.3 From d860d1faa6b2ce3becfdb8b0c2b048ad31800061 Mon Sep 17 00:00:00 2001 From: Takamitsu Iwai Date: Sat, 23 Aug 2025 17:58:56 +0900 Subject: net: rose: convert 'use' field to refcount_t The 'use' field in struct rose_neigh is used as a reference counter but lacks atomicity. This can lead to race conditions where a rose_neigh structure is freed while still being referenced by other code paths. For example, when rose_neigh->use becomes zero during an ioctl operation via rose_rt_ioctl(), the structure may be removed while its timer is still active, potentially causing use-after-free issues. This patch changes the type of 'use' from unsigned short to refcount_t and updates all code paths to use rose_neigh_hold() and rose_neigh_put() which operate reference counts atomically. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Takamitsu Iwai Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250823085857.47674-3-takamitz@amazon.co.jp Signed-off-by: Jakub Kicinski --- include/net/rose.h | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/rose.h b/include/net/rose.h index 174b4f605d84..2b5491bbf39a 100644 --- a/include/net/rose.h +++ b/include/net/rose.h @@ -8,6 +8,7 @@ #ifndef _ROSE_H #define _ROSE_H +#include #include #include #include @@ -96,7 +97,7 @@ struct rose_neigh { ax25_cb *ax25; struct net_device *dev; unsigned short count; - unsigned short use; + refcount_t use; unsigned int number; char restarted; char dce_mode; @@ -151,12 +152,19 @@ struct rose_sock { #define rose_sk(sk) ((struct rose_sock *)(sk)) +static inline void rose_neigh_hold(struct rose_neigh *rose_neigh) +{ + refcount_inc(&rose_neigh->use); +} + static inline void rose_neigh_put(struct rose_neigh *rose_neigh) { - if (rose_neigh->ax25) - ax25_cb_put(rose_neigh->ax25); - kfree(rose_neigh->digipeat); - kfree(rose_neigh); + if (refcount_dec_and_test(&rose_neigh->use)) { + if (rose_neigh->ax25) + ax25_cb_put(rose_neigh->ax25); + kfree(rose_neigh->digipeat); + kfree(rose_neigh); + } } /* af_rose.c */ -- cgit v1.2.3 From e26dca67fde194340582cfbb0c0bf661825e9e46 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 7 Aug 2025 14:14:41 -0600 Subject: io_uring: add support for IORING_SETUP_CQE_MIXED Normal rings support 16b CQEs for posting completions, while certain features require the ring to be configured with IORING_SETUP_CQE32, as they need to convey more information per completion. This, in turn, makes ALL the CQEs be 32b in size. This is somewhat wasteful and inefficient, particularly when only certain CQEs need to be of the bigger variant. This adds support for setting up a ring with mixed CQE sizes, using IORING_SETUP_CQE_MIXED. When setup in this mode, CQEs posted to the ring may be either 16b or 32b in size. If a CQE is 32b in size, then IORING_CQE_F_32 is set in the CQE flags to indicate that this is the case. If this flag isn't set, the CQE is the normal 16b variant. CQEs on these types of mixed rings may also have IORING_CQE_F_SKIP set. This can happen if the ring is one (small) CQE entry away from wrapping, and an attempt is made to post a 32b CQE. As CQEs must be contigious in the CQ ring, a 32b CQE cannot wrap the ring. For this case, a single dummy CQE is posted with the SKIP flag set. The application should simply ignore those. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 7af8d10b3aba..5135e1be0390 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -225,6 +225,12 @@ enum io_uring_sqe_flags_bit { /* Use hybrid poll in iopoll process */ #define IORING_SETUP_HYBRID_IOPOLL (1U << 17) +/* + * Allow both 16b and 32b CQEs. If a 32b CQE is posted, it will have + * IORING_CQE_F_32 set in cqe->flags. + */ +#define IORING_SETUP_CQE_MIXED (1U << 18) + enum io_uring_op { IORING_OP_NOP, IORING_OP_READV, -- cgit v1.2.3 From 806ecb209aa86fcc1d92bc9f10323cf773f64d6d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 7 Aug 2025 14:22:16 -0600 Subject: io_uring/nop: add support for IORING_SETUP_CQE_MIXED This adds support for setting IORING_NOP_CQE32 as a flag for a NOP command, in which case a 32b CQE will be posted rather than a regular one. This is the default if the ring has been setup with IORING_SETUP_CQE32. If the ring has been setup with IORING_SETUP_CQE_MIXED, then 16b CQEs will be posted without this flag set, and 32b CQEs if this flag is set. For the latter case, sqe->off is what will be posted as cqe->big_cqe[0] and sqe->addr is what will be posted as cqe->big_cqe[1]. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 5135e1be0390..04ebff33d0e6 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -464,6 +464,7 @@ enum io_uring_msg_ring_flags { #define IORING_NOP_FIXED_FILE (1U << 2) #define IORING_NOP_FIXED_BUFFER (1U << 3) #define IORING_NOP_TW (1U << 4) +#define IORING_NOP_CQE32 (1U << 5) /* * IO completion data structure (Completion Queue Entry) -- cgit v1.2.3 From 1df7dad4d5c49335b72e26d833def960b2de76e3 Mon Sep 17 00:00:00 2001 From: Nandakumar Edamana Date: Tue, 26 Aug 2025 09:15:23 +0530 Subject: bpf: Improve the general precision of tnum_mul Drop the value-mask decomposition technique and adopt straightforward long-multiplication with a twist: when LSB(a) is uncertain, find the two partial products (for LSB(a) = known 0 and LSB(a) = known 1) and take a union. Experiment shows that applying this technique in long multiplication improves the precision in a significant number of cases (at the cost of losing precision in a relatively lower number of cases). Signed-off-by: Nandakumar Edamana Signed-off-by: Andrii Nakryiko Tested-by: Harishankar Vishwanathan Reviewed-by: Harishankar Vishwanathan Acked-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20250826034524.2159515-1-nandakumar@nandakumar.co.in --- include/linux/tnum.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/tnum.h b/include/linux/tnum.h index 0ffb77ffe0e8..c52b862dad45 100644 --- a/include/linux/tnum.h +++ b/include/linux/tnum.h @@ -57,6 +57,9 @@ bool tnum_overlap(struct tnum a, struct tnum b); /* Return a tnum representing numbers satisfying both @a and @b */ struct tnum tnum_intersect(struct tnum a, struct tnum b); +/* Returns a tnum representing numbers satisfying either @a or @b */ +struct tnum tnum_union(struct tnum t1, struct tnum t2); + /* Return @a with all but the lowest @size bytes cleared */ struct tnum tnum_cast(struct tnum a, u8 size); -- cgit v1.2.3 From 97bcc5b6f45425ac56fb04b0893cdaa607ec7e45 Mon Sep 17 00:00:00 2001 From: Krishna Kumar Date: Mon, 25 Aug 2025 08:40:04 +0530 Subject: net: Prevent RPS table overwrite of active flows This patch fixes an issue where two different flows on the same RXq produce the same hash resulting in continuous flow overwrites. Flow #1: A packet for Flow #1 comes in, kernel calls the steering function. The driver gives back a filter id. The kernel saves this filter id in the selected slot. Later, the driver's service task checks if any filters have expired and then installs the rule for Flow #1. Flow #2: A packet for Flow #2 comes in. It goes through the same steps. But this time, the chosen slot is being used by Flow #1. The driver gives a new filter id and the kernel saves it in the same slot. When the driver's service task runs, it runs through all the flows, checks if Flow #1 should be expired, the kernel returns True as the slot has a different filter id, and then the driver installs the rule for Flow #2. Flow #1: Another packet for Flow #1 comes in. The same thing repeats. The slot is overwritten with a new filter id for Flow #1. This causes a repeated cycle of flow programming for missed packets, wasting CPU cycles while not improving performance. This problem happens at higher rates when the RPS table is small, but tests show it still happens even with 12,000 connections and an RPS size of 16K per queue (global table size = 144x16K = 64K). This patch prevents overwriting an rps_dev_flow entry if it is active. The intention is that it is better to do aRFS for the first flow instead of hurting all flows on the same hash. Without this, two (or more) flows on one RX queue with the same hash can keep overwriting each other. This causes the driver to reprogram the flow repeatedly. Changes: 1. Add a new 'hash' field to struct rps_dev_flow. 2. Add rps_flow_is_active(): a helper function to check if a flow is active or not, extracted from rps_may_expire_flow(). It is further simplified as per reviewer feedback. 3. In set_rps_cpu(): - Avoid overwriting by programming a new filter if: - The slot is not in use, or - The slot is in use but the flow is not active, or - The slot has an active flow with the same hash, but target CPU differs. - Save the hash in the rps_dev_flow entry. 4. rps_may_expire_flow(): Use earlier extracted rps_flow_is_active(). Testing & results: - Driver: ice (E810 NIC), Kernel: net-next - #CPUs = #RXq = 144 (1:1) - Number of flows: 12K - Eight RPS settings from 256 to 32768. Though RPS=256 is not ideal, it is still sufficient to cover 12K flows (256*144 rx-queues = 64K global table slots) - Global Table Size = 144 * RPS (effectively equal to 256 * RPS) - Each RPS test duration = 8 mins (org code) + 8 mins (new code). - Metrics captured on client Legend for following tables: Steer-C: #times ndo_rx_flow_steer() was Called by set_rps_cpu() Steer-L: #times ice_arfs_flow_steer() Looped over aRFS entries Add: #times driver actually programmed aRFS (ice_arfs_build_entry()) Del: #times driver deleted the flow (ice_arfs_del_flow_rules()) Units: K = 1,000 times, M = 1 million times |-------|---------|------| Org Code |---------|---------| | RPS | Latency | CPU | Add | Del | Steer-C | Steer-L | |-------|---------|------|--------|--------|---------|---------| | 256 | 227.0 | 93.2 | 1.6M | 1.6M | 121.7M | 267.6M | | 512 | 225.9 | 94.1 | 11.5M | 11.2M | 65.7M | 199.6M | | 1024 | 223.5 | 95.6 | 16.5M | 16.5M | 27.1M | 187.3M | | 2048 | 222.2 | 96.3 | 10.5M | 10.5M | 12.5M | 115.2M | | 4096 | 223.9 | 94.1 | 5.5M | 5.5M | 7.2M | 65.9M | | 8192 | 224.7 | 92.5 | 2.7M | 2.7M | 3.0M | 29.9M | | 16384 | 223.5 | 92.5 | 1.3M | 1.3M | 1.4M | 13.9M | | 32768 | 219.6 | 93.2 | 838.1K | 838.1K | 965.1K | 8.9M | |-------|---------|------| New Code |---------|---------| | 256 | 201.5 | 99.1 | 13.4K | 5.0K | 13.7K | 75.2K | | 512 | 202.5 | 98.2 | 11.2K | 5.9K | 11.2K | 55.5K | | 1024 | 207.3 | 93.9 | 11.5K | 9.7K | 11.5K | 59.6K | | 2048 | 207.5 | 96.7 | 11.8K | 11.1K | 15.5K | 79.3K | | 4096 | 206.9 | 96.6 | 11.8K | 11.7K | 11.8K | 63.2K | | 8192 | 205.8 | 96.7 | 11.9K | 11.8K | 11.9K | 63.9K | | 16384 | 200.9 | 98.2 | 11.9K | 11.9K | 11.9K | 64.2K | | 32768 | 202.5 | 98.0 | 11.9K | 11.9K | 11.9K | 64.2K | |-------|---------|------|--------|--------|---------|---------| Some observations: 1. Overall Latency improved: (1790.19-1634.94)/1790.19*100 = 8.67% 2. Overall CPU increased: (777.32-751.49)/751.45*100 = 3.44% 3. Flow Management (add/delete) remained almost constant at ~11K compared to values in millions. Signed-off-by: Krishna Kumar Link: https://patch.msgid.link/20250825031005.3674864-2-krikku@gmail.com Signed-off-by: Jakub Kicinski --- include/net/rps.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/rps.h b/include/net/rps.h index d8ab3a08bcc4..9917dce42ca4 100644 --- a/include/net/rps.h +++ b/include/net/rps.h @@ -25,13 +25,16 @@ struct rps_map { /* * The rps_dev_flow structure contains the mapping of a flow to a CPU, the - * tail pointer for that CPU's input queue at the time of last enqueue, and - * a hardware filter index. + * tail pointer for that CPU's input queue at the time of last enqueue, a + * hardware filter index, and the hash of the flow if aRFS is enabled. */ struct rps_dev_flow { u16 cpu; u16 filter; unsigned int last_qtail; +#ifdef CONFIG_RFS_ACCEL + u32 hash; +#endif }; #define RPS_NO_FILTER 0xffff -- cgit v1.2.3 From 6310c149e5dede74bb47110e0d7a38c78772c152 Mon Sep 17 00:00:00 2001 From: Brian Mak Date: Tue, 5 Aug 2025 14:15:26 -0700 Subject: kexec: add KEXEC_FILE_NO_CMA as a legal flag Commit 07d24902977e ("kexec: enable CMA based contiguous allocation") introduces logic to use CMA-based allocation in kexec by default. As part of the changes, it introduces a kexec_file_load flag to disable the use of CMA allocations from userspace. However, this flag is broken since it is missing from the list of legal flags for kexec_file_load. kexec_file_load returns EINVAL when attempting to use the flag. Fix this by adding the KEXEC_FILE_NO_CMA flag to the list of legal flags for kexec_file_load. Without this fix, kexec_file_load syscall will failed and return '-EINVAL' when KEXEC_FILE_NO_CMA is specified. Link: https://lkml.kernel.org/r/20250805211527.122367-2-makb@juniper.net Fixes: 07d24902977e ("kexec: enable CMA based contiguous allocation") Signed-off-by: Brian Mak Acked-by: Baoquan He Cc: Alexander Graf Cc: Borislav Betkov Cc: Dave Young Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Rob Herring Cc: Saravana Kannan Cc: Thomas Gleinxer Signed-off-by: Andrew Morton --- include/linux/kexec.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 1b10a5d84b68..39fe3e6cd282 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -460,7 +460,8 @@ bool kexec_load_permitted(int kexec_image_type); /* List of defined/legal kexec file flags */ #define KEXEC_FILE_FLAGS (KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \ - KEXEC_FILE_NO_INITRAMFS | KEXEC_FILE_DEBUG) + KEXEC_FILE_NO_INITRAMFS | KEXEC_FILE_DEBUG | \ + KEXEC_FILE_NO_CMA) /* flag to track if kexec reboot is in progress */ extern bool kexec_in_progress; -- cgit v1.2.3 From 7cc183f2e67d19b03ee5c13a6664b8c6cc37ff9d Mon Sep 17 00:00:00 2001 From: Harry Yoo Date: Mon, 18 Aug 2025 11:02:04 +0900 Subject: mm: move page table sync declarations to linux/pgtable.h During our internal testing, we started observing intermittent boot failures when the machine uses 4-level paging and has a large amount of persistent memory: BUG: unable to handle page fault for address: ffffe70000000034 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 0 P4D 0 Oops: 0002 [#1] SMP NOPTI RIP: 0010:__init_single_page+0x9/0x6d Call Trace: __init_zone_device_page+0x17/0x5d memmap_init_zone_device+0x154/0x1bb pagemap_range+0x2e0/0x40f memremap_pages+0x10b/0x2f0 devm_memremap_pages+0x1e/0x60 dev_dax_probe+0xce/0x2ec [device_dax] dax_bus_probe+0x6d/0xc9 [... snip ...] It turns out that the kernel panics while initializing vmemmap (struct page array) when the vmemmap region spans two PGD entries, because the new PGD entry is only installed in init_mm.pgd, but not in the page tables of other tasks. And looking at __populate_section_memmap(): if (vmemmap_can_optimize(altmap, pgmap)) // does not sync top level page tables r = vmemmap_populate_compound_pages(pfn, start, end, nid, pgmap); else // sync top level page tables in x86 r = vmemmap_populate(start, end, nid, altmap); In the normal path, vmemmap_populate() in arch/x86/mm/init_64.c synchronizes the top level page table (See commit 9b861528a801 ("x86-64, mem: Update all PGDs for direct mapping and vmemmap mapping changes")) so that all tasks in the system can see the new vmemmap area. However, when vmemmap_can_optimize() returns true, the optimized path skips synchronization of top-level page tables. This is because vmemmap_populate_compound_pages() is implemented in core MM code, which does not handle synchronization of the top-level page tables. Instead, the core MM has historically relied on each architecture to perform this synchronization manually. We're not the first party to encounter a crash caused by not-sync'd top level page tables: earlier this year, Gwan-gyeong Mun attempted to address the issue [1] [2] after hitting a kernel panic when x86 code accessed the vmemmap area before the corresponding top-level entries were synced. At that time, the issue was believed to be triggered only when struct page was enlarged for debugging purposes, and the patch did not get further updates. It turns out that current approach of relying on each arch to handle the page table sync manually is fragile because 1) it's easy to forget to sync the top level page table, and 2) it's also easy to overlook that the kernel should not access the vmemmap and direct mapping areas before the sync. # The solution: Make page table sync more code robust and harder to miss To address this, Dave Hansen suggested [3] [4] introducing {pgd,p4d}_populate_kernel() for updating kernel portion of the page tables and allow each architecture to explicitly perform synchronization when installing top-level entries. With this approach, we no longer need to worry about missing the sync step, reducing the risk of future regressions. The new interface reuses existing ARCH_PAGE_TABLE_SYNC_MASK, PGTBL_P*D_MODIFIED and arch_sync_kernel_mappings() facility used by vmalloc and ioremap to synchronize page tables. pgd_populate_kernel() looks like this: static inline void pgd_populate_kernel(unsigned long addr, pgd_t *pgd, p4d_t *p4d) { pgd_populate(&init_mm, pgd, p4d); if (ARCH_PAGE_TABLE_SYNC_MASK & PGTBL_PGD_MODIFIED) arch_sync_kernel_mappings(addr, addr); } It is worth noting that vmalloc() and apply_to_range() carefully synchronizes page tables by calling p*d_alloc_track() and arch_sync_kernel_mappings(), and thus they are not affected by this patch series. This series was hugely inspired by Dave Hansen's suggestion and hence added Suggested-by: Dave Hansen. Cc stable because lack of this series opens the door to intermittent boot failures. This patch (of 3): Move ARCH_PAGE_TABLE_SYNC_MASK and arch_sync_kernel_mappings() to linux/pgtable.h so that they can be used outside of vmalloc and ioremap. Link: https://lkml.kernel.org/r/20250818020206.4517-1-harry.yoo@oracle.com Link: https://lkml.kernel.org/r/20250818020206.4517-2-harry.yoo@oracle.com Link: https://lore.kernel.org/linux-mm/20250220064105.808339-1-gwan-gyeong.mun@intel.com [1] Link: https://lore.kernel.org/linux-mm/20250311114420.240341-1-gwan-gyeong.mun@intel.com [2] Link: https://lore.kernel.org/linux-mm/d1da214c-53d3-45ac-a8b6-51821c5416e4@intel.com [3] Link: https://lore.kernel.org/linux-mm/4d800744-7b88-41aa-9979-b245e8bf794b@intel.com [4] Fixes: 8d400913c231 ("x86/vmemmap: handle unpopulated sub-pmd ranges") Signed-off-by: Harry Yoo Acked-by: Kiryl Shutsemau Reviewed-by: Mike Rapoport (Microsoft) Reviewed-by: "Uladzislau Rezki (Sony)" Reviewed-by: Lorenzo Stoakes Acked-by: David Hildenbrand Cc: Alexander Potapenko Cc: Alistair Popple Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: "Aneesh Kumar K.V" Cc: Anshuman Khandual Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: bibo mao Cc: Borislav Betkov Cc: Christoph Lameter (Ampere) Cc: Dennis Zhou Cc: Dev Jain Cc: Dmitriy Vyukov Cc: Gwan-gyeong Mun Cc: Ingo Molnar Cc: Jane Chu Cc: Joao Martins Cc: Joerg Roedel Cc: John Hubbard Cc: Kevin Brodsky Cc: Liam Howlett Cc: Michal Hocko Cc: Oscar Salvador Cc: Peter Xu Cc: Peter Zijlstra Cc: Qi Zheng Cc: Ryan Roberts Cc: Suren Baghdasaryan Cc: Tejun Heo Cc: Thomas Gleinxer Cc: Thomas Huth Cc: Vincenzo Frascino Cc: Vlastimil Babka Cc: Dave Hansen Cc: Signed-off-by: Andrew Morton --- include/linux/pgtable.h | 16 ++++++++++++++++ include/linux/vmalloc.h | 16 ---------------- 2 files changed, 16 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 4c035637eeb7..ba699df6ef69 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1467,6 +1467,22 @@ static inline void modify_prot_commit_ptes(struct vm_area_struct *vma, unsigned } #endif +/* + * Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values + * and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings() + * needs to be called. + */ +#ifndef ARCH_PAGE_TABLE_SYNC_MASK +#define ARCH_PAGE_TABLE_SYNC_MASK 0 +#endif + +/* + * There is no default implementation for arch_sync_kernel_mappings(). It is + * relied upon the compiler to optimize calls out if ARCH_PAGE_TABLE_SYNC_MASK + * is 0. + */ +void arch_sync_kernel_mappings(unsigned long start, unsigned long end); + #endif /* CONFIG_MMU */ /* diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index fdc9aeb74a44..2759dac6be44 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -219,22 +219,6 @@ extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, int vmap_pages_range(unsigned long addr, unsigned long end, pgprot_t prot, struct page **pages, unsigned int page_shift); -/* - * Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values - * and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings() - * needs to be called. - */ -#ifndef ARCH_PAGE_TABLE_SYNC_MASK -#define ARCH_PAGE_TABLE_SYNC_MASK 0 -#endif - -/* - * There is no default implementation for arch_sync_kernel_mappings(). It is - * relied upon the compiler to optimize calls out if ARCH_PAGE_TABLE_SYNC_MASK - * is 0. - */ -void arch_sync_kernel_mappings(unsigned long start, unsigned long end); - /* * Lowlevel-APIs (not for driver use!) */ -- cgit v1.2.3 From f2d2f9598ebb0158a3fe17cda0106d7752e654a2 Mon Sep 17 00:00:00 2001 From: Harry Yoo Date: Mon, 18 Aug 2025 11:02:05 +0900 Subject: mm: introduce and use {pgd,p4d}_populate_kernel() Introduce and use {pgd,p4d}_populate_kernel() in core MM code when populating PGD and P4D entries for the kernel address space. These helpers ensure proper synchronization of page tables when updating the kernel portion of top-level page tables. Until now, the kernel has relied on each architecture to handle synchronization of top-level page tables in an ad-hoc manner. For example, see commit 9b861528a801 ("x86-64, mem: Update all PGDs for direct mapping and vmemmap mapping changes"). However, this approach has proven fragile for following reasons: 1) It is easy to forget to perform the necessary page table synchronization when introducing new changes. For instance, commit 4917f55b4ef9 ("mm/sparse-vmemmap: improve memory savings for compound devmaps") overlooked the need to synchronize page tables for the vmemmap area. 2) It is also easy to overlook that the vmemmap and direct mapping areas must not be accessed before explicit page table synchronization. For example, commit 8d400913c231 ("x86/vmemmap: handle unpopulated sub-pmd ranges")) caused crashes by accessing the vmemmap area before calling sync_global_pgds(). To address this, as suggested by Dave Hansen, introduce _kernel() variants of the page table population helpers, which invoke architecture-specific hooks to properly synchronize page tables. These are introduced in a new header file, include/linux/pgalloc.h, so they can be called from common code. They reuse existing infrastructure for vmalloc and ioremap. Synchronization requirements are determined by ARCH_PAGE_TABLE_SYNC_MASK, and the actual synchronization is performed by arch_sync_kernel_mappings(). This change currently targets only x86_64, so only PGD and P4D level helpers are introduced. Currently, these helpers are no-ops since no architecture sets PGTBL_{PGD,P4D}_MODIFIED in ARCH_PAGE_TABLE_SYNC_MASK. In theory, PUD and PMD level helpers can be added later if needed by other architectures. For now, 32-bit architectures (x86-32 and arm) only handle PGTBL_PMD_MODIFIED, so p*d_populate_kernel() will never affect them unless we introduce a PMD level helper. [harry.yoo@oracle.com: fix KASAN build error due to p*d_populate_kernel()] Link: https://lkml.kernel.org/r/20250822020727.202749-1-harry.yoo@oracle.com Link: https://lkml.kernel.org/r/20250818020206.4517-3-harry.yoo@oracle.com Fixes: 8d400913c231 ("x86/vmemmap: handle unpopulated sub-pmd ranges") Signed-off-by: Harry Yoo Suggested-by: Dave Hansen Acked-by: Kiryl Shutsemau Reviewed-by: Mike Rapoport (Microsoft) Reviewed-by: Lorenzo Stoakes Acked-by: David Hildenbrand Cc: Alexander Potapenko Cc: Alistair Popple Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: "Aneesh Kumar K.V" Cc: Anshuman Khandual Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: bibo mao Cc: Borislav Betkov Cc: Christoph Lameter (Ampere) Cc: Dennis Zhou Cc: Dev Jain Cc: Dmitriy Vyukov Cc: Gwan-gyeong Mun Cc: Ingo Molnar Cc: Jane Chu Cc: Joao Martins Cc: Joerg Roedel Cc: John Hubbard Cc: Kevin Brodsky Cc: Liam Howlett Cc: Michal Hocko Cc: Oscar Salvador Cc: Peter Xu Cc: Peter Zijlstra Cc: Qi Zheng Cc: Ryan Roberts Cc: Suren Baghdasaryan Cc: Tejun Heo Cc: Thomas Gleinxer Cc: Thomas Huth Cc: "Uladzislau Rezki (Sony)" Cc: Vincenzo Frascino Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- include/linux/pgalloc.h | 29 +++++++++++++++++++++++++++++ include/linux/pgtable.h | 13 +++++++------ 2 files changed, 36 insertions(+), 6 deletions(-) create mode 100644 include/linux/pgalloc.h (limited to 'include') diff --git a/include/linux/pgalloc.h b/include/linux/pgalloc.h new file mode 100644 index 000000000000..9174fa59bbc5 --- /dev/null +++ b/include/linux/pgalloc.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_PGALLOC_H +#define _LINUX_PGALLOC_H + +#include +#include + +/* + * {pgd,p4d}_populate_kernel() are defined as macros to allow + * compile-time optimization based on the configured page table levels. + * Without this, linking may fail because callers (e.g., KASAN) may rely + * on calls to these functions being optimized away when passing symbols + * that exist only for certain page table levels. + */ +#define pgd_populate_kernel(addr, pgd, p4d) \ + do { \ + pgd_populate(&init_mm, pgd, p4d); \ + if (ARCH_PAGE_TABLE_SYNC_MASK & PGTBL_PGD_MODIFIED) \ + arch_sync_kernel_mappings(addr, addr); \ + } while (0) + +#define p4d_populate_kernel(addr, p4d, pud) \ + do { \ + p4d_populate(&init_mm, p4d, pud); \ + if (ARCH_PAGE_TABLE_SYNC_MASK & PGTBL_P4D_MODIFIED) \ + arch_sync_kernel_mappings(addr, addr); \ + } while (0) + +#endif /* _LINUX_PGALLOC_H */ diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index ba699df6ef69..2b80fd456c8b 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1469,8 +1469,8 @@ static inline void modify_prot_commit_ptes(struct vm_area_struct *vma, unsigned /* * Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values - * and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings() - * needs to be called. + * and let generic vmalloc, ioremap and page table update code know when + * arch_sync_kernel_mappings() needs to be called. */ #ifndef ARCH_PAGE_TABLE_SYNC_MASK #define ARCH_PAGE_TABLE_SYNC_MASK 0 @@ -1954,10 +1954,11 @@ static inline bool arch_has_pfn_modify_check(void) /* * Page Table Modification bits for pgtbl_mod_mask. * - * These are used by the p?d_alloc_track*() set of functions an in the generic - * vmalloc/ioremap code to track at which page-table levels entries have been - * modified. Based on that the code can better decide when vmalloc and ioremap - * mapping changes need to be synchronized to other page-tables in the system. + * These are used by the p?d_alloc_track*() and p*d_populate_kernel() + * functions in the generic vmalloc, ioremap and page table update code + * to track at which page-table levels entries have been modified. + * Based on that the code can better decide when page table changes need + * to be synchronized to other page-tables in the system. */ #define __PGTBL_PGD_MODIFIED 0 #define __PGTBL_P4D_MODIFIED 1 -- cgit v1.2.3 From 9df8043a546d2eb3adfaba920c027c0d701c73a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 26 Aug 2025 15:18:57 +0300 Subject: iopoll: Generalize read_poll_timeout() into poll_timeout_us() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While read_poll_timeout() & co. were originally introduced just for simple I/O usage scenarios they have since been generalized to be useful in more cases. However the interface is very cumbersome to use in the general case. Attempt to make it more flexible by combining the 'op', 'var' and 'args' parameter into just a single 'op' that the caller can fully specify. For example i915 has one case where one might currently have to write something like: ret = read_poll_timeout(drm_dp_dpcd_read_byte, err, err || (status & mask), 0 * 1000, 200 * 1000, false, aux, DP_FEC_STATUS, &status); which is practically illegible, but with the adjusted macro we do: ret = poll_timeout_us(err = drm_dp_dpcd_read_byte(aux, DP_FEC_STATUS, &status), err || (status & mask), 0 * 1000, 200 * 1000, false); which much easier to understand. One could even combine the 'op' and 'cond' parameters into one, but that might make the caller a bit too unwieldly with assignments and checks being done on the same statement. This makes poll_timeout_us() closer to the i915 __wait_for() macro, with the main difference being that __wait_for() uses expenential backoff as opposed to the fixed polling interval used by poll_timeout_us(). Eventually we might be able to switch (at least most of) i915 to use poll_timeout_us(). v2: Fix typos (Jani) Fix delay_us docs for poll_timeout_us_atomic() (Jani) Cc: Lucas De Marchi Cc: Dibin Moolakadan Subrahmanian Cc: Imre Deak Cc: David Laight Cc: Geert Uytterhoeven Cc: Matt Wagantall Cc: Dejin Zheng Cc: intel-gfx@lists.freedesktop.org Cc: intel-xe@lists.freedesktop.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Jani Nikula Acked-by: Simona Vetter Signed-off-by: Ville Syrjälä Link: https://lore.kernel.org/r/20250826121859.15497-1-ville.syrjala@linux.intel.com Signed-off-by: Jani Nikula --- include/linux/iopoll.h | 110 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 78 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h index 91324c331a4b..440aca5b4b59 100644 --- a/include/linux/iopoll.h +++ b/include/linux/iopoll.h @@ -14,41 +14,38 @@ #include /** - * read_poll_timeout - Periodically poll an address until a condition is - * met or a timeout occurs - * @op: accessor function (takes @args as its arguments) - * @val: Variable to read the value into - * @cond: Break condition (usually involving @val) - * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops). Please - * read usleep_range() function description for details and + * poll_timeout_us - Periodically poll and perform an operation until + * a condition is met or a timeout occurs + * + * @op: Operation + * @cond: Break condition + * @sleep_us: Maximum time to sleep between operations in us (0 tight-loops). + * Please read usleep_range() function description for details and * limitations. * @timeout_us: Timeout in us, 0 means never timeout - * @sleep_before_read: if it is true, sleep @sleep_us before read. - * @args: arguments for @op poll + * @sleep_before_op: if it is true, sleep @sleep_us before operation. * * When available, you'll probably want to use one of the specialized * macros defined below rather than this macro directly. * - * Returns: 0 on success and -ETIMEDOUT upon a timeout. In either - * case, the last read value at @args is stored in @val. Must not + * Returns: 0 on success and -ETIMEDOUT upon a timeout. Must not * be called from atomic context if sleep_us or timeout_us are used. */ -#define read_poll_timeout(op, val, cond, sleep_us, timeout_us, \ - sleep_before_read, args...) \ +#define poll_timeout_us(op, cond, sleep_us, timeout_us, sleep_before_op) \ ({ \ u64 __timeout_us = (timeout_us); \ unsigned long __sleep_us = (sleep_us); \ ktime_t __timeout = ktime_add_us(ktime_get(), __timeout_us); \ might_sleep_if((__sleep_us) != 0); \ - if (sleep_before_read && __sleep_us) \ + if ((sleep_before_op) && __sleep_us) \ usleep_range((__sleep_us >> 2) + 1, __sleep_us); \ for (;;) { \ - (val) = op(args); \ + op; \ if (cond) \ break; \ if (__timeout_us && \ ktime_compare(ktime_get(), __timeout) > 0) { \ - (val) = op(args); \ + op; \ break; \ } \ if (__sleep_us) \ @@ -59,17 +56,16 @@ }) /** - * read_poll_timeout_atomic - Periodically poll an address until a condition is - * met or a timeout occurs - * @op: accessor function (takes @args as its arguments) - * @val: Variable to read the value into - * @cond: Break condition (usually involving @val) - * @delay_us: Time to udelay between reads in us (0 tight-loops). Please - * read udelay() function description for details and + * poll_timeout_us_atomic - Periodically poll and perform an operation until + * a condition is met or a timeout occurs + * + * @op: Operation + * @cond: Break condition + * @delay_us: Time to udelay between operations in us (0 tight-loops). + * Please read udelay() function description for details and * limitations. * @timeout_us: Timeout in us, 0 means never timeout - * @delay_before_read: if it is true, delay @delay_us before read. - * @args: arguments for @op poll + * @delay_before_op: if it is true, delay @delay_us before operation. * * This macro does not rely on timekeeping. Hence it is safe to call even when * timekeeping is suspended, at the expense of an underestimation of wall clock @@ -78,27 +74,26 @@ * When available, you'll probably want to use one of the specialized * macros defined below rather than this macro directly. * - * Returns: 0 on success and -ETIMEDOUT upon a timeout. In either - * case, the last read value at @args is stored in @val. + * Returns: 0 on success and -ETIMEDOUT upon a timeout. */ -#define read_poll_timeout_atomic(op, val, cond, delay_us, timeout_us, \ - delay_before_read, args...) \ +#define poll_timeout_us_atomic(op, cond, delay_us, timeout_us, \ + delay_before_op) \ ({ \ u64 __timeout_us = (timeout_us); \ s64 __left_ns = __timeout_us * NSEC_PER_USEC; \ unsigned long __delay_us = (delay_us); \ u64 __delay_ns = __delay_us * NSEC_PER_USEC; \ - if (delay_before_read && __delay_us) { \ + if ((delay_before_op) && __delay_us) { \ udelay(__delay_us); \ if (__timeout_us) \ __left_ns -= __delay_ns; \ } \ for (;;) { \ - (val) = op(args); \ + op; \ if (cond) \ break; \ if (__timeout_us && __left_ns < 0) { \ - (val) = op(args); \ + op; \ break; \ } \ if (__delay_us) { \ @@ -113,6 +108,57 @@ (cond) ? 0 : -ETIMEDOUT; \ }) +/** + * read_poll_timeout - Periodically poll an address until a condition is + * met or a timeout occurs + * @op: accessor function (takes @args as its arguments) + * @val: Variable to read the value into + * @cond: Break condition (usually involving @val) + * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops). Please + * read usleep_range() function description for details and + * limitations. + * @timeout_us: Timeout in us, 0 means never timeout + * @sleep_before_read: if it is true, sleep @sleep_us before read. + * @args: arguments for @op poll + * + * When available, you'll probably want to use one of the specialized + * macros defined below rather than this macro directly. + * + * Returns: 0 on success and -ETIMEDOUT upon a timeout. In either + * case, the last read value at @args is stored in @val. Must not + * be called from atomic context if sleep_us or timeout_us are used. + */ +#define read_poll_timeout(op, val, cond, sleep_us, timeout_us, \ + sleep_before_read, args...) \ + poll_timeout_us((val) = op(args), cond, sleep_us, timeout_us, sleep_before_read) + +/** + * read_poll_timeout_atomic - Periodically poll an address until a condition is + * met or a timeout occurs + * @op: accessor function (takes @args as its arguments) + * @val: Variable to read the value into + * @cond: Break condition (usually involving @val) + * @delay_us: Time to udelay between reads in us (0 tight-loops). Please + * read udelay() function description for details and + * limitations. + * @timeout_us: Timeout in us, 0 means never timeout + * @delay_before_read: if it is true, delay @delay_us before read. + * @args: arguments for @op poll + * + * This macro does not rely on timekeeping. Hence it is safe to call even when + * timekeeping is suspended, at the expense of an underestimation of wall clock + * time, which is rather minimal with a non-zero delay_us. + * + * When available, you'll probably want to use one of the specialized + * macros defined below rather than this macro directly. + * + * Returns: 0 on success and -ETIMEDOUT upon a timeout. In either + * case, the last read value at @args is stored in @val. + */ +#define read_poll_timeout_atomic(op, val, cond, sleep_us, timeout_us, \ + sleep_before_read, args...) \ + poll_timeout_us_atomic((val) = op(args), cond, sleep_us, timeout_us, sleep_before_read) + /** * readx_poll_timeout - Periodically poll an address until a condition is met or a timeout occurs * @op: accessor function (takes @addr as its only argument) -- cgit v1.2.3 From 563e5eca4ea3b6e1901cbc7cd6dc42731a8d2999 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 26 Aug 2025 15:18:58 +0300 Subject: iopoll: Avoid evaluating 'cond' twice in poll_timeout_us() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently poll_timeout_us() evaluates 'cond' twice at the end of the success case. This not desirable in case 'cond' itself is expensive. Avoid the double evaluation by tracking the return value in a variable. Need to use a triple undescore '___ret' name to avoid a conflict with an existing double undescore '__ret' variable in the regmap code. Cc: Lucas De Marchi Cc: Dibin Moolakadan Subrahmanian Cc: Imre Deak Cc: David Laight Cc: Geert Uytterhoeven Cc: Matt Wagantall Cc: Dejin Zheng Cc: intel-gfx@lists.freedesktop.org Cc: intel-xe@lists.freedesktop.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Jani Nikula Acked-by: Simona Vetter Signed-off-by: Ville Syrjälä Link: https://lore.kernel.org/r/20250826121859.15497-2-ville.syrjala@linux.intel.com Signed-off-by: Jani Nikula --- include/linux/iopoll.h | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h index 440aca5b4b59..d8c801ad68fa 100644 --- a/include/linux/iopoll.h +++ b/include/linux/iopoll.h @@ -36,23 +36,30 @@ u64 __timeout_us = (timeout_us); \ unsigned long __sleep_us = (sleep_us); \ ktime_t __timeout = ktime_add_us(ktime_get(), __timeout_us); \ + int ___ret; \ might_sleep_if((__sleep_us) != 0); \ if ((sleep_before_op) && __sleep_us) \ usleep_range((__sleep_us >> 2) + 1, __sleep_us); \ for (;;) { \ op; \ - if (cond) \ + if (cond) { \ + ___ret = 0; \ break; \ + } \ if (__timeout_us && \ ktime_compare(ktime_get(), __timeout) > 0) { \ op; \ + if (cond) \ + ___ret = 0; \ + else \ + ___ret = -ETIMEDOUT; \ break; \ } \ if (__sleep_us) \ usleep_range((__sleep_us >> 2) + 1, __sleep_us); \ cpu_relax(); \ } \ - (cond) ? 0 : -ETIMEDOUT; \ + ___ret; \ }) /** @@ -83,6 +90,7 @@ s64 __left_ns = __timeout_us * NSEC_PER_USEC; \ unsigned long __delay_us = (delay_us); \ u64 __delay_ns = __delay_us * NSEC_PER_USEC; \ + int ___ret; \ if ((delay_before_op) && __delay_us) { \ udelay(__delay_us); \ if (__timeout_us) \ @@ -90,10 +98,16 @@ } \ for (;;) { \ op; \ - if (cond) \ + if (cond) { \ + ___ret = 0; \ break; \ + } \ if (__timeout_us && __left_ns < 0) { \ op; \ + if (cond) \ + ___ret = 0; \ + else \ + ___ret = -ETIMEDOUT; \ break; \ } \ if (__delay_us) { \ @@ -105,7 +119,7 @@ if (__timeout_us) \ __left_ns--; \ } \ - (cond) ? 0 : -ETIMEDOUT; \ + ___ret; \ }) /** -- cgit v1.2.3 From 3b6f62b6b5777f56e3fedc45041c2f61645b5204 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 26 Aug 2025 15:18:59 +0300 Subject: iopoll: Reorder the timeout handling in poll_timeout_us() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently poll_timeout_us() evaluates 'op' and 'cond' twice within the loop, once at the start, and a second time after the timeout check. While it's probably not a big deal to do it twice almost back to back, it does make the macro a bit messy. Simplify the implementation to evaluate the timeout at the very start, then follow up with 'op'/'cond', and finally check if the timeout did in fact happen or not. For good measure throw in a compiler barrier between the timeout and 'op'/'cond' evaluations to make sure the compiler can't reoder the operations (which could cause false positive timeouts). The similar i915 __wait_for() macro already has the barrier, though there it is between the 'op' and 'cond' evaluations, which seems like it could still allow 'op' and the timeout evaluations to get reordered incorrectly. I suppose the ktime_get() might itself act as a sufficient barrier here, but better safe than sorry I guess. Cc: Lucas De Marchi Cc: Dibin Moolakadan Subrahmanian Cc: Imre Deak Cc: David Laight Cc: Geert Uytterhoeven Cc: Matt Wagantall Cc: Dejin Zheng Cc: intel-gfx@lists.freedesktop.org Cc: intel-xe@lists.freedesktop.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Jani Nikula Acked-by: Simona Vetter Signed-off-by: Ville Syrjälä Link: https://lore.kernel.org/r/20250826121859.15497-3-ville.syrjala@linux.intel.com Signed-off-by: Jani Nikula --- include/linux/iopoll.h | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h index d8c801ad68fa..bdd2e0652bc3 100644 --- a/include/linux/iopoll.h +++ b/include/linux/iopoll.h @@ -41,18 +41,17 @@ if ((sleep_before_op) && __sleep_us) \ usleep_range((__sleep_us >> 2) + 1, __sleep_us); \ for (;;) { \ + bool __expired = __timeout_us && \ + ktime_compare(ktime_get(), __timeout) > 0; \ + /* guarantee 'op' and 'cond' are evaluated after timeout expired */ \ + barrier(); \ op; \ if (cond) { \ ___ret = 0; \ break; \ } \ - if (__timeout_us && \ - ktime_compare(ktime_get(), __timeout) > 0) { \ - op; \ - if (cond) \ - ___ret = 0; \ - else \ - ___ret = -ETIMEDOUT; \ + if (__expired) { \ + ___ret = -ETIMEDOUT; \ break; \ } \ if (__sleep_us) \ @@ -97,17 +96,16 @@ __left_ns -= __delay_ns; \ } \ for (;;) { \ + bool __expired = __timeout_us && __left_ns < 0; \ + /* guarantee 'op' and 'cond' are evaluated after timeout expired */ \ + barrier(); \ op; \ if (cond) { \ ___ret = 0; \ break; \ } \ - if (__timeout_us && __left_ns < 0) { \ - op; \ - if (cond) \ - ___ret = 0; \ - else \ - ___ret = -ETIMEDOUT; \ + if (__expired) { \ + ___ret = -ETIMEDOUT; \ break; \ } \ if (__delay_us) { \ -- cgit v1.2.3 From e7fa80e2932c68c17e7003fdfd01addc123567f7 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Wed, 27 Aug 2025 13:38:37 +0000 Subject: drm_gem: add mutex to drm_gem_object.gpuva There are two main ways that GPUVM might be used: * staged mode, where VM_BIND ioctls update the GPUVM immediately so that the GPUVM reflects the state of the VM *including* staged changes that are not yet applied to the GPU's virtual address space. * immediate mode, where the GPUVM state is updated during run_job(), i.e., in the DMA fence signalling critical path, to ensure that the GPUVM and the GPU's virtual address space has the same state at all times. Currently, only Panthor uses GPUVM in immediate mode, but the Rust drivers Tyr and Nova will also use GPUVM in immediate mode, so it is worth to support both staged and immediate mode well in GPUVM. To use immediate mode, the GEMs gpuva list must be modified during the fence signalling path, which means that it must be protected by a lock that is fence signalling safe. For this reason, a mutex is added to struct drm_gem_object that is intended to achieve this purpose. Adding it directly in the GEM object both makes it easier to use GPUVM in immediate mode, but also makes it possible to take the gpuva lock from core drm code. As a follow-up, another change that should probably be made to support immediate mode is a mechanism to postpone cleanup of vm_bo objects, as dropping a vm_bo object in the fence signalling path is problematic for two reasons: * When using DRM_GPUVM_RESV_PROTECTED, you cannot remove the vm_bo from the extobj/evicted lists during the fence signalling path. * Dropping a vm_bo could lead to the GEM object getting destroyed. The requirement that GEM object cleanup is fence signalling safe is dubious and likely to be violated in practice. Panthor already has its own custom implementation of postponing vm_bo cleanup. Reviewed-by: Boris Brezillon Signed-off-by: Alice Ryhl Link: https://lore.kernel.org/r/20250827-gpuva-mutex-in-gem-v3-1-bd89f5a82c0d@google.com Signed-off-by: Danilo Krummrich --- include/drm/drm_gem.h | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h index d3a7b43e2c63..a995c0c1b63c 100644 --- a/include/drm/drm_gem.h +++ b/include/drm/drm_gem.h @@ -398,16 +398,28 @@ struct drm_gem_object { struct dma_resv _resv; /** - * @gpuva: - * - * Provides the list of GPU VAs attached to this GEM object. - * - * Drivers should lock list accesses with the GEMs &dma_resv lock - * (&drm_gem_object.resv) or a custom lock if one is provided. + * @gpuva: Fields used by GPUVM to manage mappings pointing to this GEM object. */ struct { + /** + * @gpuva.list: list of GPUVM mappings attached to this GEM object. + * + * Drivers should lock list accesses with either the GEMs + * &dma_resv lock (&drm_gem_object.resv) or the + * &drm_gem_object.gpuva.lock mutex. + */ struct list_head list; + /** + * @gpuva.lock: lock protecting access to &drm_gem_object.gpuva.list + * when the resv lock can't be used. + * + * Should only be used when the VM is being modified in a fence + * signalling path, otherwise you should use &drm_gem_object.resv to + * protect accesses to &drm_gem_object.gpuva.list. + */ + struct mutex lock; + #ifdef CONFIG_LOCKDEP struct lockdep_map *lock_dep_map; #endif -- cgit v1.2.3 From 3c8d31b8937a7ee6e5de74f0274810b8705d77ea Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Wed, 27 Aug 2025 13:38:39 +0000 Subject: gpuvm: remove gem.gpuva.lock_dep_map Since all users of gem.gpuva.lock_dep_map now rely on the mutex directly in gpuva, we may remove it. Whether the mutex is used is now tracked by a flag in gpuvm rather than by whether lock_dep_map is null. Note that a GEM object may not be pushed to multiple gpuvms that disagree on the value of this new flag. But that's okay because a single driver should use the same locking scheme everywhere, and a GEM object is driver specific (when a GEM is exported with prime, a new GEM object instance is created from the backing dma-buf). The flag is present even with CONFIG_LOCKDEP=n because the intent is that the flag will also cause vm_bo cleanup to become deferred. However, that will happen in a follow-up patch. Reviewed-by: Boris Brezillon Signed-off-by: Alice Ryhl Link: https://lore.kernel.org/r/20250827-gpuva-mutex-in-gem-v3-3-bd89f5a82c0d@google.com [ Use lockdep_is_held() instead of lock_is_held(). - Danilo ] Signed-off-by: Danilo Krummrich --- include/drm/drm_gem.h | 41 +++++++++++++++-------------------------- include/drm/drm_gpuvm.h | 30 +++++++++++++++++++++++++++--- 2 files changed, 42 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h index a995c0c1b63c..8d48d2af2649 100644 --- a/include/drm/drm_gem.h +++ b/include/drm/drm_gem.h @@ -399,6 +399,12 @@ struct drm_gem_object { /** * @gpuva: Fields used by GPUVM to manage mappings pointing to this GEM object. + * + * When DRM_GPUVM_IMMEDIATE_MODE is set, this list is protected by the + * mutex. Otherwise, the list is protected by the GEMs &dma_resv lock. + * + * Note that all entries in this list must agree on whether + * DRM_GPUVM_IMMEDIATE_MODE is set. */ struct { /** @@ -412,17 +418,14 @@ struct drm_gem_object { /** * @gpuva.lock: lock protecting access to &drm_gem_object.gpuva.list - * when the resv lock can't be used. + * when DRM_GPUVM_IMMEDIATE_MODE is used. * - * Should only be used when the VM is being modified in a fence - * signalling path, otherwise you should use &drm_gem_object.resv to - * protect accesses to &drm_gem_object.gpuva.list. + * Only used when DRM_GPUVM_IMMEDIATE_MODE is set. It should be + * safe to take this mutex during the fence signalling path, so + * do not allocate memory while holding this lock. Otherwise, + * the &dma_resv lock should be used. */ struct mutex lock; - -#ifdef CONFIG_LOCKDEP - struct lockdep_map *lock_dep_map; -#endif } gpuva; /** @@ -607,26 +610,12 @@ static inline bool drm_gem_is_imported(const struct drm_gem_object *obj) } #ifdef CONFIG_LOCKDEP -/** - * drm_gem_gpuva_set_lock() - Set the lock protecting accesses to the gpuva list. - * @obj: the &drm_gem_object - * @lock: the lock used to protect the gpuva list. The locking primitive - * must contain a dep_map field. - * - * Call this if you're not proctecting access to the gpuva list with the - * dma-resv lock, but with a custom lock. - */ -#define drm_gem_gpuva_set_lock(obj, lock) \ - if (!WARN((obj)->gpuva.lock_dep_map, \ - "GEM GPUVA lock should be set only once.")) \ - (obj)->gpuva.lock_dep_map = &(lock)->dep_map -#define drm_gem_gpuva_assert_lock_held(obj) \ - lockdep_assert((obj)->gpuva.lock_dep_map ? \ - lock_is_held((obj)->gpuva.lock_dep_map) : \ +#define drm_gem_gpuva_assert_lock_held(gpuvm, obj) \ + lockdep_assert(drm_gpuvm_immediate_mode(gpuvm) ? \ + lockdep_is_held(&(obj)->gpuva.lock) : \ dma_resv_held((obj)->resv)) #else -#define drm_gem_gpuva_set_lock(obj, lock) do {} while (0) -#define drm_gem_gpuva_assert_lock_held(obj) do {} while (0) +#define drm_gem_gpuva_assert_lock_held(gpuvm, obj) do {} while (0) #endif /** diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h index 4a22b9d848f7..727b8f336fad 100644 --- a/include/drm/drm_gpuvm.h +++ b/include/drm/drm_gpuvm.h @@ -196,10 +196,20 @@ enum drm_gpuvm_flags { */ DRM_GPUVM_RESV_PROTECTED = BIT(0), + /** + * @DRM_GPUVM_IMMEDIATE_MODE: use the locking scheme for GEMs designed + * for modifying the GPUVM during the fence signalling path + * + * When set, gpuva.lock is used to protect gpuva.list in all GEM + * objects associated with this GPUVM. Otherwise, the GEMs dma-resv is + * used. + */ + DRM_GPUVM_IMMEDIATE_MODE = BIT(1), + /** * @DRM_GPUVM_USERBITS: user defined bits */ - DRM_GPUVM_USERBITS = BIT(1), + DRM_GPUVM_USERBITS = BIT(2), }; /** @@ -369,6 +379,19 @@ drm_gpuvm_resv_protected(struct drm_gpuvm *gpuvm) return gpuvm->flags & DRM_GPUVM_RESV_PROTECTED; } +/** + * drm_gpuvm_immediate_mode() - indicates whether &DRM_GPUVM_IMMEDIATE_MODE is + * set + * @gpuvm: the &drm_gpuvm + * + * Returns: true if &DRM_GPUVM_IMMEDIATE_MODE is set, false otherwise. + */ +static inline bool +drm_gpuvm_immediate_mode(struct drm_gpuvm *gpuvm) +{ + return gpuvm->flags & DRM_GPUVM_IMMEDIATE_MODE; +} + /** * drm_gpuvm_resv() - returns the &drm_gpuvm's &dma_resv * @gpuvm__: the &drm_gpuvm @@ -742,9 +765,10 @@ drm_gpuvm_bo_gem_evict(struct drm_gem_object *obj, bool evict) { struct drm_gpuvm_bo *vm_bo; - drm_gem_gpuva_assert_lock_held(obj); - drm_gem_for_each_gpuvm_bo(vm_bo, obj) + drm_gem_for_each_gpuvm_bo(vm_bo, obj) { + drm_gem_gpuva_assert_lock_held(vm_bo->vm, obj); drm_gpuvm_bo_evict(vm_bo, evict); + } } void drm_gpuvm_bo_extobj_add(struct drm_gpuvm_bo *vm_bo); -- cgit v1.2.3 From c2a756891bb428104fa8899998ba277042274cdb Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 25 Aug 2025 13:18:28 -0700 Subject: uapi: wrap compiler_types.h in an ifdef instead of the implicit strip The uAPI stddef header includes compiler_types.h, a kernel-only header, to make sure that kernel definitions of annotations like __counted_by() take precedence. There is a hack in scripts/headers_install.sh which strips includes of compiler.h and compiler_types.h when installing uAPI headers. While explicit handling makes sense for compiler.h, which is included all over the uAPI, compiler_types.h is only included by stddef.h (within the uAPI, obviously it's included in kernel code a lot). Remove the stripping from scripts/headers_install.sh and wrap the include of compiler_types.h in #ifdef __KERNEL__ instead. This should be equivalent functionally, but is easier to understand to a casual reader of the code. It also makes it easier to work with kernel headers directly from under tools/ Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250825201828.2370083-1-kuba@kernel.org Signed-off-by: Paolo Abeni --- include/uapi/linux/stddef.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h index b87df1b485c2..9a28f7d9a334 100644 --- a/include/uapi/linux/stddef.h +++ b/include/uapi/linux/stddef.h @@ -2,7 +2,9 @@ #ifndef _UAPI_LINUX_STDDEF_H #define _UAPI_LINUX_STDDEF_H +#ifdef __KERNEL__ #include +#endif #ifndef __always_inline #define __always_inline inline -- cgit v1.2.3 From f86f42ed2c471da5b061492bb8ab1d3d73c19c58 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 26 Aug 2025 12:50:27 +0000 Subject: net: add sk_drops_read(), sk_drops_inc() and sk_drops_reset() helpers We want to split sk->sk_drops in the future to reduce potential contention on this field. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250826125031.1578842-2-edumazet@google.com Signed-off-by: Paolo Abeni --- include/net/sock.h | 17 ++++++++++++++++- include/net/tcp.h | 2 +- 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 63a6a48afb48..34d7029eb622 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2682,11 +2682,26 @@ struct sock_skb_cb { #define sock_skb_cb_check_size(size) \ BUILD_BUG_ON((size) > SOCK_SKB_CB_OFFSET) +static inline void sk_drops_inc(struct sock *sk) +{ + atomic_inc(&sk->sk_drops); +} + +static inline int sk_drops_read(const struct sock *sk) +{ + return atomic_read(&sk->sk_drops); +} + +static inline void sk_drops_reset(struct sock *sk) +{ + atomic_set(&sk->sk_drops, 0); +} + static inline void sock_skb_set_dropcount(const struct sock *sk, struct sk_buff *skb) { SOCK_SKB_CB(skb)->dropcount = sock_flag(sk, SOCK_RXQ_OVFL) ? - atomic_read(&sk->sk_drops) : 0; + sk_drops_read(sk) : 0; } static inline void sk_drops_add(struct sock *sk, const struct sk_buff *skb) diff --git a/include/net/tcp.h b/include/net/tcp.h index 2936b8175950..16dc9cebb9d2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2612,7 +2612,7 @@ static inline void tcp_segs_in(struct tcp_sock *tp, const struct sk_buff *skb) */ static inline void tcp_listendrop(const struct sock *sk) { - atomic_inc(&((struct sock *)sk)->sk_drops); + sk_drops_inc((struct sock *)sk); __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS); } -- cgit v1.2.3 From cb4d5a6eb600a43c2e3ec7f54e06d07aa33d8062 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 26 Aug 2025 12:50:28 +0000 Subject: net: add sk_drops_skbadd() helper Existing sk_drops_add() helper is renamed to sk_drops_skbadd(). Add sk_drops_add() and convert sk_drops_inc() to use it. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250826125031.1578842-3-edumazet@google.com Signed-off-by: Paolo Abeni --- include/linux/skmsg.h | 2 +- include/net/sock.h | 11 ++++++++--- include/net/udp.h | 2 +- 3 files changed, 10 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 0b9095a281b8..49847888c287 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -315,7 +315,7 @@ static inline bool sk_psock_test_state(const struct sk_psock *psock, static inline void sock_drop(struct sock *sk, struct sk_buff *skb) { - sk_drops_add(sk, skb); + sk_drops_skbadd(sk, skb); kfree_skb(skb); } diff --git a/include/net/sock.h b/include/net/sock.h index 34d7029eb622..9edb42ff0622 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2682,9 +2682,14 @@ struct sock_skb_cb { #define sock_skb_cb_check_size(size) \ BUILD_BUG_ON((size) > SOCK_SKB_CB_OFFSET) +static inline void sk_drops_add(struct sock *sk, int segs) +{ + atomic_add(segs, &sk->sk_drops); +} + static inline void sk_drops_inc(struct sock *sk) { - atomic_inc(&sk->sk_drops); + sk_drops_add(sk, 1); } static inline int sk_drops_read(const struct sock *sk) @@ -2704,11 +2709,11 @@ sock_skb_set_dropcount(const struct sock *sk, struct sk_buff *skb) sk_drops_read(sk) : 0; } -static inline void sk_drops_add(struct sock *sk, const struct sk_buff *skb) +static inline void sk_drops_skbadd(struct sock *sk, const struct sk_buff *skb) { int segs = max_t(u16, 1, skb_shinfo(skb)->gso_segs); - atomic_add(segs, &sk->sk_drops); + sk_drops_add(sk, segs); } static inline ktime_t sock_read_timestamp(struct sock *sk) diff --git a/include/net/udp.h b/include/net/udp.h index e2af3bda90c9..7b26d4c50f33 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -627,7 +627,7 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk, return segs; drop: - atomic_add(drop_count, &sk->sk_drops); + sk_drops_add(sk, drop_count); SNMP_ADD_STATS(__UDPX_MIB(sk, ipv4), UDP_MIB_INERRORS, drop_count); kfree_skb(skb); return NULL; -- cgit v1.2.3 From c51613fa276f038bdd18656a57a90ccc5d4e5200 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 26 Aug 2025 12:50:29 +0000 Subject: net: add sk->sk_drop_counters Some sockets suffer from heavy false sharing on sk->sk_drops, and fields in the same cache line. Add sk->sk_drop_counters to: - move the drop counter(s) to dedicated cache lines. - Add basic NUMA awareness to these drop counter(s). Following patches will use this infrastructure for UDP and RAW sockets. sk_clone_lock() is not yet ready, it would need to properly set newsk->sk_drop_counters if we plan to use this for TCP sockets. v2: used Paolo suggestion from https://lore.kernel.org/netdev/8f09830a-d83d-43c9-b36b-88ba0a23e9b2@redhat.com/ Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250826125031.1578842-4-edumazet@google.com Signed-off-by: Paolo Abeni --- include/net/sock.h | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 9edb42ff0622..73cd3316e288 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -102,6 +102,11 @@ struct net; typedef __u32 __bitwise __portpair; typedef __u64 __bitwise __addrpair; +struct socket_drop_counters { + atomic_t drops0 ____cacheline_aligned_in_smp; + atomic_t drops1 ____cacheline_aligned_in_smp; +}; + /** * struct sock_common - minimal network layer representation of sockets * @skc_daddr: Foreign IPv4 addr @@ -282,6 +287,7 @@ struct sk_filter; * @sk_err_soft: errors that don't cause failure but are the cause of a * persistent failure not just 'timed out' * @sk_drops: raw/udp drops counter + * @sk_drop_counters: optional pointer to socket_drop_counters * @sk_ack_backlog: current listen backlog * @sk_max_ack_backlog: listen backlog set in listen() * @sk_uid: user id of owner @@ -449,6 +455,7 @@ struct sock { #ifdef CONFIG_XFRM struct xfrm_policy __rcu *sk_policy[2]; #endif + struct socket_drop_counters *sk_drop_counters; __cacheline_group_end(sock_read_rxtx); __cacheline_group_begin(sock_write_rxtx); @@ -2684,7 +2691,18 @@ struct sock_skb_cb { static inline void sk_drops_add(struct sock *sk, int segs) { - atomic_add(segs, &sk->sk_drops); + struct socket_drop_counters *sdc = sk->sk_drop_counters; + + if (sdc) { + int n = numa_node_id() % 2; + + if (n) + atomic_add(segs, &sdc->drops1); + else + atomic_add(segs, &sdc->drops0); + } else { + atomic_add(segs, &sk->sk_drops); + } } static inline void sk_drops_inc(struct sock *sk) @@ -2694,11 +2712,23 @@ static inline void sk_drops_inc(struct sock *sk) static inline int sk_drops_read(const struct sock *sk) { + const struct socket_drop_counters *sdc = sk->sk_drop_counters; + + if (sdc) { + DEBUG_NET_WARN_ON_ONCE(atomic_read(&sk->sk_drops)); + return atomic_read(&sdc->drops0) + atomic_read(&sdc->drops1); + } return atomic_read(&sk->sk_drops); } static inline void sk_drops_reset(struct sock *sk) { + struct socket_drop_counters *sdc = sk->sk_drop_counters; + + if (sdc) { + atomic_set(&sdc->drops0, 0); + atomic_set(&sdc->drops1, 0); + } atomic_set(&sk->sk_drops, 0); } -- cgit v1.2.3 From 51132b99f01ce05f8008f0fb189d83eed484bd53 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 26 Aug 2025 12:50:30 +0000 Subject: udp: add drop_counters to udp socket When a packet flood hits one or more UDP sockets, many cpus have to update sk->sk_drops. This slows down other cpus, because currently sk_drops is in sock_write_rx group. Add a socket_drop_counters structure to udp sockets. Using dedicated cache lines to hold drop counters makes sure that consumers no longer suffer from false sharing if/when producers only change sk->sk_drops. This adds 128 bytes per UDP socket. Tested with the following stress test, sending about 11 Mpps to a dual socket AMD EPYC 7B13 64-Core. super_netperf 20 -t UDP_STREAM -H DUT -l10 -- -n -P,1000 -m 120 Note: due to socket lookup, only one UDP socket is receiving packets on DUT. Then measure receiver (DUT) behavior. We can see both consumer and BH handlers can process more packets per second. Before: nstat -n ; sleep 1 ; nstat | grep Udp Udp6InDatagrams 615091 0.0 Udp6InErrors 3904277 0.0 Udp6RcvbufErrors 3904277 0.0 After: nstat -n ; sleep 1 ; nstat | grep Udp Udp6InDatagrams 816281 0.0 Udp6InErrors 7497093 0.0 Udp6RcvbufErrors 7497093 0.0 Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250826125031.1578842-5-edumazet@google.com Signed-off-by: Paolo Abeni --- include/linux/udp.h | 1 + include/net/udp.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/udp.h b/include/linux/udp.h index 4e1a672af4c5..981506be1e15 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -108,6 +108,7 @@ struct udp_sock { * the last UDP socket cacheline. */ struct hlist_node tunnel_list; + struct socket_drop_counters drop_counters; }; #define udp_test_bit(nr, sk) \ diff --git a/include/net/udp.h b/include/net/udp.h index 7b26d4c50f33..93b159f30e88 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -288,6 +288,7 @@ static inline void udp_lib_init_sock(struct sock *sk) { struct udp_sock *up = udp_sk(sk); + sk->sk_drop_counters = &up->drop_counters; skb_queue_head_init(&up->reader_queue); INIT_HLIST_NODE(&up->tunnel_list); up->forward_threshold = sk->sk_rcvbuf >> 2; -- cgit v1.2.3 From b81aa23234d94d99951761d9864061d774633ba9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 26 Aug 2025 12:50:31 +0000 Subject: inet: raw: add drop_counters to raw sockets When a packet flood hits one or more RAW sockets, many cpus have to update sk->sk_drops. This slows down other cpus, because currently sk_drops is in sock_write_rx group. Add a socket_drop_counters structure to raw sockets. Using dedicated cache lines to hold drop counters makes sure that consumers no longer suffer from false sharing if/when producers only change sk->sk_drops. This adds 128 bytes per RAW socket. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250826125031.1578842-6-edumazet@google.com Signed-off-by: Paolo Abeni --- include/linux/ipv6.h | 2 +- include/net/raw.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index bc6ec2959173..261d02efb615 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -295,7 +295,7 @@ struct raw6_sock { __u32 offset; /* checksum offset */ struct icmp6_filter filter; __u32 ip6mr_table; - + struct socket_drop_counters drop_counters; struct ipv6_pinfo inet6; }; diff --git a/include/net/raw.h b/include/net/raw.h index 32a61481a253..d52709139060 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -81,6 +81,7 @@ struct raw_sock { struct inet_sock inet; struct icmp_filter filter; u32 ipmr_table; + struct socket_drop_counters drop_counters; }; #define raw_sk(ptr) container_of_const(ptr, struct raw_sock, inet.sk) -- cgit v1.2.3 From 3ea299d3dccdb8554057d0a87552e7673baea95d Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Mon, 11 Aug 2025 09:40:40 +0200 Subject: mtd: nand: qpic-common: remove a bunch of unused defines A bunch of definitions in the 'nand-qpic-common.h' header became unused after the conversion of the 'qcom_nandc' and 'spi-qpic-snand' drivers to use the FIELD_PREP() macro, so remove those. No functional changes. Signed-off-by: Gabor Juhos Signed-off-by: Miquel Raynal --- include/linux/mtd/nand-qpic-common.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include') diff --git a/include/linux/mtd/nand-qpic-common.h b/include/linux/mtd/nand-qpic-common.h index 4e694b1aabbd..e8201d1b7cf9 100644 --- a/include/linux/mtd/nand-qpic-common.h +++ b/include/linux/mtd/nand-qpic-common.h @@ -71,14 +71,10 @@ /* NAND_DEVn_CFG0 bits */ #define DISABLE_STATUS_AFTER_WRITE BIT(4) -#define CW_PER_PAGE 6 #define CW_PER_PAGE_MASK GENMASK(8, 6) -#define UD_SIZE_BYTES 9 #define UD_SIZE_BYTES_MASK GENMASK(18, 9) #define ECC_PARITY_SIZE_BYTES_RS GENMASK(22, 19) -#define SPARE_SIZE_BYTES 23 #define SPARE_SIZE_BYTES_MASK GENMASK(26, 23) -#define NUM_ADDR_CYCLES 27 #define NUM_ADDR_CYCLES_MASK GENMASK(29, 27) #define STATUS_BFR_READ BIT(30) #define SET_RD_MODE_AFTER_STATUS BIT(31) @@ -86,26 +82,20 @@ /* NAND_DEVn_CFG0 bits */ #define DEV0_CFG1_ECC_DISABLE BIT(0) #define WIDE_FLASH BIT(1) -#define NAND_RECOVERY_CYCLES 2 #define NAND_RECOVERY_CYCLES_MASK GENMASK(4, 2) #define CS_ACTIVE_BSY BIT(5) -#define BAD_BLOCK_BYTE_NUM 6 #define BAD_BLOCK_BYTE_NUM_MASK GENMASK(15, 6) #define BAD_BLOCK_IN_SPARE_AREA BIT(16) -#define WR_RD_BSY_GAP 17 #define WR_RD_BSY_GAP_MASK GENMASK(22, 17) #define ENABLE_BCH_ECC BIT(27) /* NAND_DEV0_ECC_CFG bits */ #define ECC_CFG_ECC_DISABLE BIT(0) #define ECC_SW_RESET BIT(1) -#define ECC_MODE 4 #define ECC_MODE_MASK GENMASK(5, 4) #define ECC_MODE_4BIT 0 #define ECC_MODE_8BIT 1 -#define ECC_PARITY_SIZE_BYTES_BCH 8 #define ECC_PARITY_SIZE_BYTES_BCH_MASK GENMASK(12, 8) -#define ECC_NUM_DATA_BYTES 16 #define ECC_NUM_DATA_BYTES_MASK GENMASK(25, 16) #define ECC_FORCE_CLK_OPEN BIT(30) @@ -120,7 +110,6 @@ #define SEQ_READ_START_VLD BIT(4) /* NAND_EBI2_ECC_BUF_CFG bits */ -#define NUM_STEPS 0 #define NUM_STEPS_MASK GENMASK(9, 0) /* NAND_ERASED_CW_DETECT_CFG bits */ @@ -141,11 +130,8 @@ #define ERASED_CW (CODEWORD_ALL_ERASED | CODEWORD_ERASED) /* NAND_READ_LOCATION_n bits */ -#define READ_LOCATION_OFFSET 0 #define READ_LOCATION_OFFSET_MASK GENMASK(9, 0) -#define READ_LOCATION_SIZE 16 #define READ_LOCATION_SIZE_MASK GENMASK(25, 16) -#define READ_LOCATION_LAST 31 #define READ_LOCATION_LAST_MASK BIT(31) /* Version Mask */ -- cgit v1.2.3 From 5f284dc15ca8695d0394414045ac64616a3b0e69 Mon Sep 17 00:00:00 2001 From: Tianling Shen Date: Mon, 25 Aug 2025 01:00:13 +0800 Subject: mtd: spinand: add support for FudanMicro FM25S01A Add support for FudanMicro FM25S01A SPI NAND. Datasheet: http://eng.fmsh.com/nvm/FM25S01A_ds_eng.pdf Signed-off-by: Tianling Shen Signed-off-by: Miquel Raynal --- include/linux/mtd/spinand.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 27a45bdab7ec..927c10d78769 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -355,6 +355,7 @@ struct spinand_manufacturer { extern const struct spinand_manufacturer alliancememory_spinand_manufacturer; extern const struct spinand_manufacturer ato_spinand_manufacturer; extern const struct spinand_manufacturer esmt_c8_spinand_manufacturer; +extern const struct spinand_manufacturer fmsh_spinand_manufacturer; extern const struct spinand_manufacturer foresee_spinand_manufacturer; extern const struct spinand_manufacturer gigadevice_spinand_manufacturer; extern const struct spinand_manufacturer macronix_spinand_manufacturer; -- cgit v1.2.3 From 86eecc3a9c2e06462f6a273fcd24150b6da787de Mon Sep 17 00:00:00 2001 From: Andy Yan Date: Fri, 22 Aug 2025 14:39:46 +0800 Subject: drm/bridge: synopsys: Add DW DPTX Controller support library The DW DP TX Controller is compliant with the DisplayPort Specification Version 1.4 with the following features: * DisplayPort 1.4a * Main Link: 1/2/4 lanes * Main Link Support 1.62Gbps, 2.7Gbps, 5.4Gbps and 8.1Gbps * AUX channel 1Mbps * Single Stream Transport(SST) * Multistream Transport (MST) * Type-C support (alternate mode) * HDCP 2.2, HDCP 1.3 * Supports up to 8/10 bits per color component * Supports RBG, YCbCr4:4:4, YCbCr4:2:2, YCbCr4:2:0 * Pixel clock up to 594MHz * I2S, SPDIF audio interface Add library with common helpers to make it can be shared with other SoC. Signed-off-by: Andy Yan Reviewed-by: Dmitry Baryshkov Tested-by: Sebastian Reichel Link: https://lore.kernel.org/r/20250822063959.692098-3-andyshrk@163.com Signed-off-by: Dmitry Baryshkov --- include/drm/bridge/dw_dp.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 include/drm/bridge/dw_dp.h (limited to 'include') diff --git a/include/drm/bridge/dw_dp.h b/include/drm/bridge/dw_dp.h new file mode 100644 index 000000000000..d05df49fd884 --- /dev/null +++ b/include/drm/bridge/dw_dp.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2025 Rockchip Electronics Co., Ltd. + */ + +#ifndef __DW_DP__ +#define __DW_DP__ + +#include + +struct drm_encoder; +struct dw_dp; + +struct dw_dp_plat_data { + u32 max_link_rate; +}; + +struct dw_dp *dw_dp_bind(struct device *dev, struct drm_encoder *encoder, + const struct dw_dp_plat_data *plat_data); +#endif /* __DW_DP__ */ -- cgit v1.2.3 From eeb8117f5f1c2e8e625e5cb39dbccd21d395caad Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Thu, 28 Aug 2025 12:45:16 +0530 Subject: drm/xe/uapi: Fix kernel-doc formatting for madvise and vma_query Correct kernel-doc formatting issues in the UAPI definitions for madvise and VMA query interfaces to resolve docutils warnings during documentation build. Fixes: 418807860e94 ("drm/xe/uapi: Add UAPI for querying VMA count and memory attributes") Fixes: 231bb0ee7aa5 ("drm/xe/uapi: Add madvise interface") Cc: Matthew Brost Cc: Lucas De Marchi Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250828071516.3838110-1-himal.prasad.ghimiray@intel.com Signed-off-by: Lucas De Marchi --- include/uapi/drm/xe_drm.h | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 7dedd45ab995..40ff19f52a8d 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1997,23 +1997,23 @@ struct drm_xe_query_eu_stall { * union member is used to provide additional parameters for @type. * * Supported attribute types: - * - DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC: Set preferred memory location. - * - DRM_XE_MEM_RANGE_ATTR_ATOMIC: Set atomic access policy. - * - DRM_XE_MEM_RANGE_ATTR_PAT: Set page attribute table index. + * - DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC: Set preferred memory location. + * - DRM_XE_MEM_RANGE_ATTR_ATOMIC: Set atomic access policy. + * - DRM_XE_MEM_RANGE_ATTR_PAT: Set page attribute table index. * * Example: * * .. code-block:: C * - * struct drm_xe_madvise madvise = { - *          .vm_id = vm_id, - *          .start = 0x100000, - *          .range = 0x2000, - *          .type = DRM_XE_MEM_RANGE_ATTR_ATOMIC, - *         .atomic_val = DRM_XE_ATOMIC_DEVICE, - * }; + * struct drm_xe_madvise madvise = { + * .vm_id = vm_id, + * .start = 0x100000, + * .range = 0x2000, + * .type = DRM_XE_MEM_RANGE_ATTR_ATOMIC, + * .atomic_val = DRM_XE_ATOMIC_DEVICE, + * }; * - * ioctl(fd, DRM_IOCTL_XE_MADVISE, &madvise); + * ioctl(fd, DRM_IOCTL_XE_MADVISE, &madvise); * */ struct drm_xe_madvise { @@ -2042,12 +2042,12 @@ struct drm_xe_madvise { * Used when @type == DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC * * Supported values for @preferred_mem_loc.devmem_fd: - * - DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE: set vram of faulting tile as preferred loc - * - DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM: set smem as preferred loc + * - DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE: set vram of fault tile as preferred loc + * - DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM: set smem as preferred loc * * Supported values for @preferred_mem_loc.migration_policy: - * - DRM_XE_MIGRATE_ALL_PAGES - * - DRM_XE_MIGRATE_ONLY_SYSTEM_PAGES + * - DRM_XE_MIGRATE_ALL_PAGES + * - DRM_XE_MIGRATE_ONLY_SYSTEM_PAGES */ struct { #define DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE 0 @@ -2073,12 +2073,12 @@ struct drm_xe_madvise { * Used when @type == DRM_XE_MEM_RANGE_ATTR_ATOMIC. * * Supported values for @atomic.val: - * - DRM_XE_ATOMIC_UNDEFINED: Undefined or default behaviour - * Support both GPU and CPU atomic operations for system allocator - * Support GPU atomic operations for normal(bo) allocator - * - DRM_XE_ATOMIC_DEVICE: Support GPU atomic operations - * - DRM_XE_ATOMIC_GLOBAL: Support both GPU and CPU atomic operations - * - DRM_XE_ATOMIC_CPU: Support CPU atomic + * - DRM_XE_ATOMIC_UNDEFINED: Undefined or default behaviour. + * Support both GPU and CPU atomic operations for system allocator. + * Support GPU atomic operations for normal(bo) allocator. + * - DRM_XE_ATOMIC_DEVICE: Support GPU atomic operations. + * - DRM_XE_ATOMIC_GLOBAL: Support both GPU and CPU atomic operations. + * - DRM_XE_ATOMIC_CPU: Support CPU atomic only, no GPU atomics supported. */ struct { #define DRM_XE_ATOMIC_UNDEFINED 0 @@ -2196,6 +2196,7 @@ struct drm_xe_mem_range_attr { * Example: * * .. code-block:: C + * * struct drm_xe_vm_query_mem_range_attr query = { * .vm_id = vm_id, * .start = 0x100000, -- cgit v1.2.3 From 30c2b98aa84c76f2ae60e66dd4ec2d9497713359 Mon Sep 17 00:00:00 2001 From: Neeraj Upadhyay Date: Thu, 28 Aug 2025 12:33:17 +0530 Subject: x86/apic: Add new driver for Secure AVIC The Secure AVIC feature provides SEV-SNP guests hardware acceleration for performance sensitive APIC accesses while securely managing the guest-owned APIC state through the use of a private APIC backing page. This helps prevent the hypervisor from generating unexpected interrupts for a vCPU or otherwise violate architectural assumptions around the APIC behavior. Add a new x2APIC driver that will serve as the base of the Secure AVIC support. It is initially the same as the x2APIC physical driver (without IPI callbacks), but will be modified as features are implemented. As the new driver does not implement Secure AVIC features yet, if the hypervisor sets the Secure AVIC bit in SEV_STATUS, maintain the existing behavior to enforce the guest termination. [ bp: Massage commit message. ] Co-developed-by: Kishon Vijay Abraham I Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Neeraj Upadhyay Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Tianyu Lan Link: https://lore.kernel.org/20250828070334.208401-2-Neeraj.Upadhyay@amd.com --- include/linux/cc_platform.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h index 0bf7d33a1048..7fcec025c5e0 100644 --- a/include/linux/cc_platform.h +++ b/include/linux/cc_platform.h @@ -96,6 +96,14 @@ enum cc_attr { * enabled to run SEV-SNP guests. */ CC_ATTR_HOST_SEV_SNP, + + /** + * @CC_ATTR_SNP_SECURE_AVIC: Secure AVIC mode is active. + * + * The host kernel is running with the necessary features enabled + * to run SEV-SNP guests with full Secure AVIC capabilities. + */ + CC_ATTR_SNP_SECURE_AVIC, }; #ifdef CONFIG_ARCH_HAS_CC_PLATFORM -- cgit v1.2.3 From 13d8e05adf9dd06c74fcc6ba42ec4bf780fd557f Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Wed, 27 Aug 2025 17:39:55 +0300 Subject: queue_api: add support for fetching per queue DMA dev For zerocopy (io_uring, devmem), there is an assumption that the parent device can do DMA. However that is not always the case: - Scalable Function netdevs [1] have the DMA device in the grandparent. - For Multi-PF netdevs [2] queues can be associated to different DMA devices. This patch introduces the a queue based interface for allowing drivers to expose a different DMA device for zerocopy. [1] Documentation/networking/device_drivers/ethernet/mellanox/mlx5/switchdev.rst [2] Documentation/networking/multi-pf-netdev.rst Signed-off-by: Dragos Tatulea Reviewed-by: Pavel Begunkov Reviewed-by: Mina Almasry Link: https://patch.msgid.link/20250827144017.1529208-3-dtatulea@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/netdev_queues.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index 6e835972abd1..b9d02bc65c97 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -127,6 +127,9 @@ void netdev_stat_queue_sum(struct net_device *netdev, * @ndo_queue_stop: Stop the RX queue at the specified index. The stopped * queue's memory is written at the specified address. * + * @ndo_queue_get_dma_dev: Get dma device for zero-copy operations to be used + * for this queue. Return NULL on error. + * * Note that @ndo_queue_mem_alloc and @ndo_queue_mem_free may be called while * the interface is closed. @ndo_queue_start and @ndo_queue_stop will only * be called for an interface which is open. @@ -144,6 +147,8 @@ struct netdev_queue_mgmt_ops { int (*ndo_queue_stop)(struct net_device *dev, void *per_queue_mem, int idx); + struct device * (*ndo_queue_get_dma_dev)(struct net_device *dev, + int idx); }; /** @@ -321,4 +326,6 @@ static inline void netif_subqueue_sent(const struct net_device *dev, get_desc, start_thrs); \ }) +struct device *netdev_queue_get_dma_dev(struct net_device *dev, int idx); + #endif -- cgit v1.2.3 From 48b5e5dbdb234ffc951cacceaec7f8ee37c83b2d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 Aug 2025 12:53:47 +0000 Subject: net_sched: act_vlan: use RCU in tcf_vlan_dump() Also storing tcf_action into struct tcf_vlan_params makes sure there is no discrepancy in tcf_vlan_act(). No longer block BH in tcf_vlan_init() when acquiring tcf_lock. Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250827125349.3505302-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/tc_act/tc_vlan.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h index 3f5e9242b5e8..beadee41669a 100644 --- a/include/net/tc_act/tc_vlan.h +++ b/include/net/tc_act/tc_vlan.h @@ -10,6 +10,7 @@ #include struct tcf_vlan_params { + int action; int tcfv_action; unsigned char tcfv_push_dst[ETH_ALEN]; unsigned char tcfv_push_src[ETH_ALEN]; -- cgit v1.2.3 From e97ae742972f6cb57986a5ebb846048f80b90003 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 Aug 2025 12:53:48 +0000 Subject: net_sched: act_tunnel_key: use RCU in tunnel_key_dump() Also storing tcf_action into struct tcf_tunnel_key_params makes sure there is no discrepancy in tunnel_key_act(). No longer block BH in tunnel_key_init() when acquiring tcf_lock. Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250827125349.3505302-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/tc_act/tc_tunnel_key.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h index 879fe8cff581..0f1925f97520 100644 --- a/include/net/tc_act/tc_tunnel_key.h +++ b/include/net/tc_act/tc_tunnel_key.h @@ -14,6 +14,7 @@ struct tcf_tunnel_key_params { struct rcu_head rcu; int tcft_action; + int action; struct metadata_dst *tcft_enc_metadata; }; -- cgit v1.2.3 From 53df77e7859042a92914d664c860f65d9689f88d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 Aug 2025 12:53:49 +0000 Subject: net_sched: act_skbmod: use RCU in tcf_skbmod_dump() Also storing tcf_action into struct tcf_skbmod_params makes sure there is no discrepancy in tcf_skbmod_act(). No longer block BH in tcf_skbmod_init() when acquiring tcf_lock. Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250827125349.3505302-5-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/tc_act/tc_skbmod.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/tc_act/tc_skbmod.h b/include/net/tc_act/tc_skbmod.h index 7c240d2fed4e..626704cd6241 100644 --- a/include/net/tc_act/tc_skbmod.h +++ b/include/net/tc_act/tc_skbmod.h @@ -12,6 +12,7 @@ struct tcf_skbmod_params { struct rcu_head rcu; u64 flags; /*up to 64 types of operations; extend if needed */ + int action; u8 eth_dst[ETH_ALEN]; u16 eth_type; u8 eth_src[ETH_ALEN]; -- cgit v1.2.3 From 4247053aaacda75480e1918e0d58a687ae5a266a Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Tue, 5 Aug 2025 22:47:17 +0200 Subject: media: uapi: Move colorimetry controls at the end of the file The colorimetry controls class is defined after the stateless codec class at the top of the controls header. It is currently defined in the middle of stateless codec controls. Move the colorimetry controls after the stateless codec controls, at the end of the file. Signed-off-by: Paul Kocialkowski Signed-off-by: Hans Verkuil --- include/uapi/linux/v4l2-controls.h | 68 +++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index f836512e9deb..4a483ff1c418 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -2549,40 +2549,6 @@ struct v4l2_ctrl_hevc_scaling_matrix { __u8 scaling_list_dc_coef_32x32[2]; }; -#define V4L2_CID_COLORIMETRY_CLASS_BASE (V4L2_CTRL_CLASS_COLORIMETRY | 0x900) -#define V4L2_CID_COLORIMETRY_CLASS (V4L2_CTRL_CLASS_COLORIMETRY | 1) - -#define V4L2_CID_COLORIMETRY_HDR10_CLL_INFO (V4L2_CID_COLORIMETRY_CLASS_BASE + 0) - -struct v4l2_ctrl_hdr10_cll_info { - __u16 max_content_light_level; - __u16 max_pic_average_light_level; -}; - -#define V4L2_CID_COLORIMETRY_HDR10_MASTERING_DISPLAY (V4L2_CID_COLORIMETRY_CLASS_BASE + 1) - -#define V4L2_HDR10_MASTERING_PRIMARIES_X_LOW 5 -#define V4L2_HDR10_MASTERING_PRIMARIES_X_HIGH 37000 -#define V4L2_HDR10_MASTERING_PRIMARIES_Y_LOW 5 -#define V4L2_HDR10_MASTERING_PRIMARIES_Y_HIGH 42000 -#define V4L2_HDR10_MASTERING_WHITE_POINT_X_LOW 5 -#define V4L2_HDR10_MASTERING_WHITE_POINT_X_HIGH 37000 -#define V4L2_HDR10_MASTERING_WHITE_POINT_Y_LOW 5 -#define V4L2_HDR10_MASTERING_WHITE_POINT_Y_HIGH 42000 -#define V4L2_HDR10_MASTERING_MAX_LUMA_LOW 50000 -#define V4L2_HDR10_MASTERING_MAX_LUMA_HIGH 100000000 -#define V4L2_HDR10_MASTERING_MIN_LUMA_LOW 1 -#define V4L2_HDR10_MASTERING_MIN_LUMA_HIGH 50000 - -struct v4l2_ctrl_hdr10_mastering_display { - __u16 display_primaries_x[3]; - __u16 display_primaries_y[3]; - __u16 white_point_x; - __u16 white_point_y; - __u32 max_display_mastering_luminance; - __u32 min_display_mastering_luminance; -}; - /* Stateless VP9 controls */ #define V4L2_VP9_LOOP_FILTER_FLAG_DELTA_ENABLED 0x1 @@ -3515,4 +3481,38 @@ struct v4l2_ctrl_av1_film_grain { #define V4L2_CID_MPEG_MFC51_BASE V4L2_CID_CODEC_MFC51_BASE #endif +#define V4L2_CID_COLORIMETRY_CLASS_BASE (V4L2_CTRL_CLASS_COLORIMETRY | 0x900) +#define V4L2_CID_COLORIMETRY_CLASS (V4L2_CTRL_CLASS_COLORIMETRY | 1) + +#define V4L2_CID_COLORIMETRY_HDR10_CLL_INFO (V4L2_CID_COLORIMETRY_CLASS_BASE + 0) + +struct v4l2_ctrl_hdr10_cll_info { + __u16 max_content_light_level; + __u16 max_pic_average_light_level; +}; + +#define V4L2_CID_COLORIMETRY_HDR10_MASTERING_DISPLAY (V4L2_CID_COLORIMETRY_CLASS_BASE + 1) + +#define V4L2_HDR10_MASTERING_PRIMARIES_X_LOW 5 +#define V4L2_HDR10_MASTERING_PRIMARIES_X_HIGH 37000 +#define V4L2_HDR10_MASTERING_PRIMARIES_Y_LOW 5 +#define V4L2_HDR10_MASTERING_PRIMARIES_Y_HIGH 42000 +#define V4L2_HDR10_MASTERING_WHITE_POINT_X_LOW 5 +#define V4L2_HDR10_MASTERING_WHITE_POINT_X_HIGH 37000 +#define V4L2_HDR10_MASTERING_WHITE_POINT_Y_LOW 5 +#define V4L2_HDR10_MASTERING_WHITE_POINT_Y_HIGH 42000 +#define V4L2_HDR10_MASTERING_MAX_LUMA_LOW 50000 +#define V4L2_HDR10_MASTERING_MAX_LUMA_HIGH 100000000 +#define V4L2_HDR10_MASTERING_MIN_LUMA_LOW 1 +#define V4L2_HDR10_MASTERING_MIN_LUMA_HIGH 50000 + +struct v4l2_ctrl_hdr10_mastering_display { + __u16 display_primaries_x[3]; + __u16 display_primaries_y[3]; + __u16 white_point_x; + __u16 white_point_y; + __u32 max_display_mastering_luminance; + __u32 min_display_mastering_luminance; +}; + #endif -- cgit v1.2.3 From 481c12018c252f7fc88b4bd05e882b9e1bf260c3 Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Tue, 5 Aug 2025 22:47:18 +0200 Subject: media: uapi: Cleanup tab after define in headers Some definitions use a tab after the define keyword instead of the usual single space. Replace it for better consistency. Signed-off-by: Paul Kocialkowski Signed-off-by: Hans Verkuil --- include/uapi/linux/v4l2-controls.h | 30 +++++++++++++++--------------- include/uapi/linux/videodev2.h | 18 +++++++++--------- 2 files changed, 24 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index 4a483ff1c418..7aef88465d04 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -1193,7 +1193,7 @@ enum v4l2_flash_strobe_source { #define V4L2_CID_JPEG_CLASS_BASE (V4L2_CTRL_CLASS_JPEG | 0x900) #define V4L2_CID_JPEG_CLASS (V4L2_CTRL_CLASS_JPEG | 1) -#define V4L2_CID_JPEG_CHROMA_SUBSAMPLING (V4L2_CID_JPEG_CLASS_BASE + 1) +#define V4L2_CID_JPEG_CHROMA_SUBSAMPLING (V4L2_CID_JPEG_CLASS_BASE + 1) enum v4l2_jpeg_chroma_subsampling { V4L2_JPEG_CHROMA_SUBSAMPLING_444 = 0, V4L2_JPEG_CHROMA_SUBSAMPLING_422 = 1, @@ -1202,15 +1202,15 @@ enum v4l2_jpeg_chroma_subsampling { V4L2_JPEG_CHROMA_SUBSAMPLING_410 = 4, V4L2_JPEG_CHROMA_SUBSAMPLING_GRAY = 5, }; -#define V4L2_CID_JPEG_RESTART_INTERVAL (V4L2_CID_JPEG_CLASS_BASE + 2) -#define V4L2_CID_JPEG_COMPRESSION_QUALITY (V4L2_CID_JPEG_CLASS_BASE + 3) +#define V4L2_CID_JPEG_RESTART_INTERVAL (V4L2_CID_JPEG_CLASS_BASE + 2) +#define V4L2_CID_JPEG_COMPRESSION_QUALITY (V4L2_CID_JPEG_CLASS_BASE + 3) -#define V4L2_CID_JPEG_ACTIVE_MARKER (V4L2_CID_JPEG_CLASS_BASE + 4) -#define V4L2_JPEG_ACTIVE_MARKER_APP0 (1 << 0) -#define V4L2_JPEG_ACTIVE_MARKER_APP1 (1 << 1) -#define V4L2_JPEG_ACTIVE_MARKER_COM (1 << 16) -#define V4L2_JPEG_ACTIVE_MARKER_DQT (1 << 17) -#define V4L2_JPEG_ACTIVE_MARKER_DHT (1 << 18) +#define V4L2_CID_JPEG_ACTIVE_MARKER (V4L2_CID_JPEG_CLASS_BASE + 4) +#define V4L2_JPEG_ACTIVE_MARKER_APP0 (1 << 0) +#define V4L2_JPEG_ACTIVE_MARKER_APP1 (1 << 1) +#define V4L2_JPEG_ACTIVE_MARKER_COM (1 << 16) +#define V4L2_JPEG_ACTIVE_MARKER_DQT (1 << 17) +#define V4L2_JPEG_ACTIVE_MARKER_DHT (1 << 18) /* Image source controls */ @@ -1243,10 +1243,10 @@ enum v4l2_jpeg_chroma_subsampling { #define V4L2_CID_DV_CLASS_BASE (V4L2_CTRL_CLASS_DV | 0x900) #define V4L2_CID_DV_CLASS (V4L2_CTRL_CLASS_DV | 1) -#define V4L2_CID_DV_TX_HOTPLUG (V4L2_CID_DV_CLASS_BASE + 1) -#define V4L2_CID_DV_TX_RXSENSE (V4L2_CID_DV_CLASS_BASE + 2) -#define V4L2_CID_DV_TX_EDID_PRESENT (V4L2_CID_DV_CLASS_BASE + 3) -#define V4L2_CID_DV_TX_MODE (V4L2_CID_DV_CLASS_BASE + 4) +#define V4L2_CID_DV_TX_HOTPLUG (V4L2_CID_DV_CLASS_BASE + 1) +#define V4L2_CID_DV_TX_RXSENSE (V4L2_CID_DV_CLASS_BASE + 2) +#define V4L2_CID_DV_TX_EDID_PRESENT (V4L2_CID_DV_CLASS_BASE + 3) +#define V4L2_CID_DV_TX_MODE (V4L2_CID_DV_CLASS_BASE + 4) enum v4l2_dv_tx_mode { V4L2_DV_TX_MODE_DVI_D = 0, V4L2_DV_TX_MODE_HDMI = 1, @@ -1267,7 +1267,7 @@ enum v4l2_dv_it_content_type { V4L2_DV_IT_CONTENT_TYPE_NO_ITC = 4, }; -#define V4L2_CID_DV_RX_POWER_PRESENT (V4L2_CID_DV_CLASS_BASE + 100) +#define V4L2_CID_DV_RX_POWER_PRESENT (V4L2_CID_DV_CLASS_BASE + 100) #define V4L2_CID_DV_RX_RGB_RANGE (V4L2_CID_DV_CLASS_BASE + 101) #define V4L2_CID_DV_RX_IT_CONTENT_TYPE (V4L2_CID_DV_CLASS_BASE + 102) @@ -2552,7 +2552,7 @@ struct v4l2_ctrl_hevc_scaling_matrix { /* Stateless VP9 controls */ #define V4L2_VP9_LOOP_FILTER_FLAG_DELTA_ENABLED 0x1 -#define V4L2_VP9_LOOP_FILTER_FLAG_DELTA_UPDATE 0x2 +#define V4L2_VP9_LOOP_FILTER_FLAG_DELTA_UPDATE 0x2 /** * struct v4l2_vp9_loop_filter - VP9 loop filter parameters diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 3dd9fa45dde1..64943f1a6149 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -1607,8 +1607,8 @@ struct v4l2_bt_timings { } __attribute__ ((packed)); /* Interlaced or progressive format */ -#define V4L2_DV_PROGRESSIVE 0 -#define V4L2_DV_INTERLACED 1 +#define V4L2_DV_PROGRESSIVE 0 +#define V4L2_DV_INTERLACED 1 /* Polarities. If bit is not set, it is assumed to be negative polarity */ #define V4L2_DV_VSYNC_POS_POL 0x00000001 @@ -2788,15 +2788,15 @@ struct v4l2_remove_buffers { * Only implemented if CONFIG_VIDEO_ADV_DEBUG is defined. * You must be root to use these ioctls. Never use these in applications! */ -#define VIDIOC_DBG_S_REGISTER _IOW('V', 79, struct v4l2_dbg_register) -#define VIDIOC_DBG_G_REGISTER _IOWR('V', 80, struct v4l2_dbg_register) +#define VIDIOC_DBG_S_REGISTER _IOW('V', 79, struct v4l2_dbg_register) +#define VIDIOC_DBG_G_REGISTER _IOWR('V', 80, struct v4l2_dbg_register) #define VIDIOC_S_HW_FREQ_SEEK _IOW('V', 82, struct v4l2_hw_freq_seek) -#define VIDIOC_S_DV_TIMINGS _IOWR('V', 87, struct v4l2_dv_timings) -#define VIDIOC_G_DV_TIMINGS _IOWR('V', 88, struct v4l2_dv_timings) -#define VIDIOC_DQEVENT _IOR('V', 89, struct v4l2_event) -#define VIDIOC_SUBSCRIBE_EVENT _IOW('V', 90, struct v4l2_event_subscription) -#define VIDIOC_UNSUBSCRIBE_EVENT _IOW('V', 91, struct v4l2_event_subscription) +#define VIDIOC_S_DV_TIMINGS _IOWR('V', 87, struct v4l2_dv_timings) +#define VIDIOC_G_DV_TIMINGS _IOWR('V', 88, struct v4l2_dv_timings) +#define VIDIOC_DQEVENT _IOR('V', 89, struct v4l2_event) +#define VIDIOC_SUBSCRIBE_EVENT _IOW('V', 90, struct v4l2_event_subscription) +#define VIDIOC_UNSUBSCRIBE_EVENT _IOW('V', 91, struct v4l2_event_subscription) #define VIDIOC_CREATE_BUFS _IOWR('V', 92, struct v4l2_create_buffers) #define VIDIOC_PREPARE_BUF _IOWR('V', 93, struct v4l2_buffer) #define VIDIOC_G_SELECTION _IOWR('V', 94, struct v4l2_selection) -- cgit v1.2.3 From 039b9302d64ec35f70c91919cd7bcdbc1aef3707 Mon Sep 17 00:00:00 2001 From: Jammy Huang Date: Tue, 26 Aug 2025 10:25:01 +0800 Subject: media: aspeed: Allow to capture from SoC display (GFX) ASPEED BMC IC has 2 different display engines. Please find AST2600's datasheet to get detailed information. 1. VGA on PCIe 2. SoC Display (GFX) By default, video engine (VE) will capture video from VGA. This patch adds an option to capture video from GFX with standard ioctl, vidioc_s_input. An enum, aspeed_video_input, is added for this purpose. enum aspeed_video_input { VIDEO_INPUT_VGA = 0, VIDEO_INPUT_GFX, VIDEO_INPUT_MAX }; To test this feature, you will need to enable GFX first. Please refer to ASPEED's SDK_User_Guide, 6.3.x Soc Display driver, for more information. In your application, you will need to use v4l2 ioctl, VIDIOC_S_INPUT, as below to select before start streaming. int rc; struct v4l2_input input; input.index = VIDEO_INPUT_GFX; rc = ioctl(fd, VIDIOC_S_INPUT, &input); if (rc < 0) { ... } Link: https://github.com/AspeedTech-BMC/openbmc/releases Signed-off-by: Jammy Huang Signed-off-by: Hans Verkuil [hverkuil: split up three overly long lines] --- include/uapi/linux/aspeed-video.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/aspeed-video.h b/include/uapi/linux/aspeed-video.h index 6586a65548c4..15168e8c931e 100644 --- a/include/uapi/linux/aspeed-video.h +++ b/include/uapi/linux/aspeed-video.h @@ -8,6 +8,13 @@ #include +/* aspeed video's input types */ +enum aspeed_video_input { + VIDEO_INPUT_VGA = 0, + VIDEO_INPUT_GFX, + VIDEO_INPUT_MAX +}; + #define V4L2_CID_ASPEED_HQ_MODE (V4L2_CID_USER_ASPEED_BASE + 1) #define V4L2_CID_ASPEED_HQ_JPEG_QUALITY (V4L2_CID_USER_ASPEED_BASE + 2) -- cgit v1.2.3 From 5d8c9c987fbdd65677315198c2b1f35a440d7cdf Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 27 Aug 2025 09:28:41 +0200 Subject: ALSA: hda: Introduce auto cleanup macros for PM The temporary power up/down of the codec via snd_hda_power_up() and _down() (or snd_hda_power_up_pm() and _down_pm()) is seen in various places. This patch introduces simple auto-cleanup macros for those call patterns, so that the drivers don't have to call the corresponding power-down calls explicitly. Namely, err = snd_hda_power_up(codec); if (err < 0) return err; .... snd_power_down(codec); can drop the *_down() call by replacing with CLASS(snd_hda_power, pm)(codec); if (pm.err < 0) return pm.err; Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250827072916.31933-2-tiwai@suse.de --- include/sound/hda_codec.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include') diff --git a/include/sound/hda_codec.h b/include/sound/hda_codec.h index 006d4e4a8195..5d9f0ef228af 100644 --- a/include/sound/hda_codec.h +++ b/include/sound/hda_codec.h @@ -503,6 +503,36 @@ static inline bool hda_codec_need_resume(struct hda_codec *codec) return !codec->relaxed_resume && codec->jacktbl.used; } +/* + * PM with auto-cleanup: call like CLASS(snd_hda_power, pm)(codec) + * If the error handling is needed, refer pm.err. + */ +struct __hda_power_obj { + struct hda_codec *codec; + int err; +}; + +static inline struct __hda_power_obj __snd_hda_power_up(struct hda_codec *codec) +{ + struct __hda_power_obj T = { .codec = codec }; + T.err = snd_hda_power_up(codec); + return T; +} + +static inline struct __hda_power_obj __snd_hda_power_up_pm(struct hda_codec *codec) +{ + struct __hda_power_obj T = { .codec = codec }; + T.err = snd_hda_power_up_pm(codec); + return T; +} + +DEFINE_CLASS(snd_hda_power, struct __hda_power_obj, + snd_hda_power_down((_T).codec), __snd_hda_power_up(codec), + struct hda_codec *codec) +DEFINE_CLASS(snd_hda_power_pm, struct __hda_power_obj, + snd_hda_power_down_pm((_T).codec), __snd_hda_power_up_pm(codec), + struct hda_codec *codec) + #ifdef CONFIG_SND_HDA_PATCH_LOADER /* * patch firmware -- cgit v1.2.3 From a23160c87986732590e68c1788e9b4929950ef67 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 27 Aug 2025 09:28:46 +0200 Subject: ALSA: hda: Use auto cleanup macros for DSP loader locks There are temporary DSP locking/unlocking patterns found in various places, and those can be cleaned up nicely with the guard() macro calling snd_hdac_dsp_lock() and *_unlock(). Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250827072916.31933-7-tiwai@suse.de --- include/sound/hdaudio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/sound/hdaudio.h b/include/sound/hdaudio.h index d38234f8fe44..4e0c1d8af09f 100644 --- a/include/sound/hdaudio.h +++ b/include/sound/hdaudio.h @@ -651,6 +651,7 @@ int snd_hdac_stream_set_lpib(struct hdac_stream *azx_dev, u32 value); #define snd_hdac_dsp_lock(dev) mutex_lock(&(dev)->dsp_mutex) #define snd_hdac_dsp_unlock(dev) mutex_unlock(&(dev)->dsp_mutex) #define snd_hdac_stream_is_locked(dev) ((dev)->locked) +DEFINE_GUARD(snd_hdac_dsp_lock, struct hdac_stream *, snd_hdac_dsp_lock(_T), snd_hdac_dsp_unlock(_T)) /* DSP loader helpers */ int snd_hdac_dsp_prepare(struct hdac_stream *azx_dev, unsigned int format, unsigned int byte_size, struct snd_dma_buffer *bufp); -- cgit v1.2.3 From 782d4613171e271b1e28ee1db9616beb8e6ad8a1 Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Fri, 29 Aug 2025 12:30:19 +0300 Subject: ASoC: SOF: IPC4: Add GET macros for module id and module instance id Add SOF_IPC4_MOD_INSTANCE_GET() and SOF_IPC4_MOD_ID_GET() for getting the ids from ipc4 header presentation. Signed-off-by: Jyri Sarha Reviewed-by: Liam Girdwood Signed-off-by: Peter Ujfalusi Message-ID: <20250829093022.32094-3-peter.ujfalusi@linux.intel.com> Signed-off-by: Mark Brown --- include/sound/sof/ipc4/header.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/sound/sof/ipc4/header.h b/include/sound/sof/ipc4/header.h index e85c7afd85a4..15fac532688e 100644 --- a/include/sound/sof/ipc4/header.h +++ b/include/sound/sof/ipc4/header.h @@ -326,10 +326,14 @@ struct sof_ipc4_base_module_cfg { #define SOF_IPC4_MOD_INSTANCE_SHIFT 16 #define SOF_IPC4_MOD_INSTANCE_MASK GENMASK(23, 16) #define SOF_IPC4_MOD_INSTANCE(x) ((x) << SOF_IPC4_MOD_INSTANCE_SHIFT) +#define SOF_IPC4_MOD_INSTANCE_GET(x) (((x) & SOF_IPC4_MOD_INSTANCE_MASK) \ + >> SOF_IPC4_MOD_INSTANCE_SHIFT) #define SOF_IPC4_MOD_ID_SHIFT 0 #define SOF_IPC4_MOD_ID_MASK GENMASK(15, 0) #define SOF_IPC4_MOD_ID(x) ((x) << SOF_IPC4_MOD_ID_SHIFT) +#define SOF_IPC4_MOD_ID_GET(x) (((x) & SOF_IPC4_MOD_ID_MASK) \ + >> SOF_IPC4_MOD_ID_SHIFT) /* init module ipc msg */ #define SOF_IPC4_MOD_EXT_PARAM_SIZE_SHIFT 0 -- cgit v1.2.3 From 9a98f9e84cfbeaa51af42ba2b8bbbde046c709a7 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 26 Aug 2025 11:39:01 -0400 Subject: fs: make the i_state flags an enum Adjusting i_state flags always means updating the values manually. Bring these forward into the 2020's and make a nice clean macro for defining the i_state values as an enum, providing __ variants for the cases where we need the bit position instead of the actual value, and leaving the actual NAME as the 1U << bit value. Reviewed-by: Christian Brauner Signed-off-by: Josef Bacik Link: https://lore.kernel.org/0da9348da6ece0dce12fccec07b1dd2b8e4cfdab.1756222464.git.josef@toxicpanda.com Signed-off-by: Christian Brauner --- include/linux/fs.h | 231 +++++++++++++++++++++++++++-------------------------- 1 file changed, 119 insertions(+), 112 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 12ecc6b0e6f9..c34554d8c4fe 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -664,6 +664,124 @@ is_uncached_acl(struct posix_acl *acl) #define IOP_MGTIME 0x0020 #define IOP_CACHED_LINK 0x0040 +/* + * Inode state bits. Protected by inode->i_lock + * + * Four bits determine the dirty state of the inode: I_DIRTY_SYNC, + * I_DIRTY_DATASYNC, I_DIRTY_PAGES, and I_DIRTY_TIME. + * + * Four bits define the lifetime of an inode. Initially, inodes are I_NEW, + * until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at + * various stages of removing an inode. + * + * Two bits are used for locking and completion notification, I_NEW and I_SYNC. + * + * I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on + * fdatasync() (unless I_DIRTY_DATASYNC is also set). + * Timestamp updates are the usual cause. + * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of + * these changes separately from I_DIRTY_SYNC so that we + * don't have to write inode on fdatasync() when only + * e.g. the timestamps have changed. + * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean. + * I_DIRTY_TIME The inode itself has dirty timestamps, and the + * lazytime mount option is enabled. We keep track of this + * separately from I_DIRTY_SYNC in order to implement + * lazytime. This gets cleared if I_DIRTY_INODE + * (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. But + * I_DIRTY_TIME can still be set if I_DIRTY_SYNC is already + * in place because writeback might already be in progress + * and we don't want to lose the time update + * I_NEW Serves as both a mutex and completion notification. + * New inodes set I_NEW. If two processes both create + * the same inode, one of them will release its inode and + * wait for I_NEW to be released before returning. + * Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can + * also cause waiting on I_NEW, without I_NEW actually + * being set. find_inode() uses this to prevent returning + * nearly-dead inodes. + * I_WILL_FREE Must be set when calling write_inode_now() if i_count + * is zero. I_FREEING must be set when I_WILL_FREE is + * cleared. + * I_FREEING Set when inode is about to be freed but still has dirty + * pages or buffers attached or the inode itself is still + * dirty. + * I_CLEAR Added by clear_inode(). In this state the inode is + * clean and can be destroyed. Inode keeps I_FREEING. + * + * Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are + * prohibited for many purposes. iget() must wait for + * the inode to be completely released, then create it + * anew. Other functions will just ignore such inodes, + * if appropriate. I_NEW is used for waiting. + * + * I_SYNC Writeback of inode is running. The bit is set during + * data writeback, and cleared with a wakeup on the bit + * address once it is done. The bit is also used to pin + * the inode in memory for flusher thread. + * + * I_REFERENCED Marks the inode as recently references on the LRU list. + * + * I_WB_SWITCH Cgroup bdi_writeback switching in progress. Used to + * synchronize competing switching instances and to tell + * wb stat updates to grab the i_pages lock. See + * inode_switch_wbs_work_fn() for details. + * + * I_OVL_INUSE Used by overlayfs to get exclusive ownership on upper + * and work dirs among overlayfs mounts. + * + * I_CREATING New object's inode in the middle of setting up. + * + * I_DONTCACHE Evict inode as soon as it is not used anymore. + * + * I_SYNC_QUEUED Inode is queued in b_io or b_more_io writeback lists. + * Used to detect that mark_inode_dirty() should not move + * inode between dirty lists. + * + * I_PINNING_FSCACHE_WB Inode is pinning an fscache object for writeback. + * + * I_LRU_ISOLATING Inode is pinned being isolated from LRU without holding + * i_count. + * + * Q: What is the difference between I_WILL_FREE and I_FREEING? + * + * __I_{SYNC,NEW,LRU_ISOLATING} are used to derive unique addresses to wait + * upon. There's one free address left. + */ + +enum inode_state_bits { + __I_NEW = 0U, + __I_SYNC = 1U, + __I_LRU_ISOLATING = 2U + /* reserved wait address bit 3 */ +}; + +enum inode_state_flags_t { + I_NEW = (1U << __I_NEW), + I_SYNC = (1U << __I_SYNC), + I_LRU_ISOLATING = (1U << __I_LRU_ISOLATING), + /* reserved flag bit 3 */ + I_DIRTY_SYNC = (1U << 4), + I_DIRTY_DATASYNC = (1U << 5), + I_DIRTY_PAGES = (1U << 6), + I_WILL_FREE = (1U << 7), + I_FREEING = (1U << 8), + I_CLEAR = (1U << 9), + I_REFERENCED = (1U << 10), + I_LINKABLE = (1U << 11), + I_DIRTY_TIME = (1U << 12), + I_WB_SWITCH = (1U << 13), + I_OVL_INUSE = (1U << 14), + I_CREATING = (1U << 15), + I_DONTCACHE = (1U << 16), + I_SYNC_QUEUED = (1U << 17), + I_PINNING_NETFS_WB = (1U << 18) +}; + +#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) +#define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES) +#define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME) + /* * Keep mostly read-only and often accessed (especially for * the RCU path lookup and 'stat' data) fields at the beginning @@ -722,7 +840,7 @@ struct inode { #endif /* Misc */ - u32 i_state; + enum inode_state_flags_t i_state; /* 32-bit hole */ struct rw_semaphore i_rwsem; @@ -2482,117 +2600,6 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, }; } -/* - * Inode state bits. Protected by inode->i_lock - * - * Four bits determine the dirty state of the inode: I_DIRTY_SYNC, - * I_DIRTY_DATASYNC, I_DIRTY_PAGES, and I_DIRTY_TIME. - * - * Four bits define the lifetime of an inode. Initially, inodes are I_NEW, - * until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at - * various stages of removing an inode. - * - * Two bits are used for locking and completion notification, I_NEW and I_SYNC. - * - * I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on - * fdatasync() (unless I_DIRTY_DATASYNC is also set). - * Timestamp updates are the usual cause. - * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of - * these changes separately from I_DIRTY_SYNC so that we - * don't have to write inode on fdatasync() when only - * e.g. the timestamps have changed. - * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean. - * I_DIRTY_TIME The inode itself has dirty timestamps, and the - * lazytime mount option is enabled. We keep track of this - * separately from I_DIRTY_SYNC in order to implement - * lazytime. This gets cleared if I_DIRTY_INODE - * (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. But - * I_DIRTY_TIME can still be set if I_DIRTY_SYNC is already - * in place because writeback might already be in progress - * and we don't want to lose the time update - * I_NEW Serves as both a mutex and completion notification. - * New inodes set I_NEW. If two processes both create - * the same inode, one of them will release its inode and - * wait for I_NEW to be released before returning. - * Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can - * also cause waiting on I_NEW, without I_NEW actually - * being set. find_inode() uses this to prevent returning - * nearly-dead inodes. - * I_WILL_FREE Must be set when calling write_inode_now() if i_count - * is zero. I_FREEING must be set when I_WILL_FREE is - * cleared. - * I_FREEING Set when inode is about to be freed but still has dirty - * pages or buffers attached or the inode itself is still - * dirty. - * I_CLEAR Added by clear_inode(). In this state the inode is - * clean and can be destroyed. Inode keeps I_FREEING. - * - * Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are - * prohibited for many purposes. iget() must wait for - * the inode to be completely released, then create it - * anew. Other functions will just ignore such inodes, - * if appropriate. I_NEW is used for waiting. - * - * I_SYNC Writeback of inode is running. The bit is set during - * data writeback, and cleared with a wakeup on the bit - * address once it is done. The bit is also used to pin - * the inode in memory for flusher thread. - * - * I_REFERENCED Marks the inode as recently references on the LRU list. - * - * I_WB_SWITCH Cgroup bdi_writeback switching in progress. Used to - * synchronize competing switching instances and to tell - * wb stat updates to grab the i_pages lock. See - * inode_switch_wbs_work_fn() for details. - * - * I_OVL_INUSE Used by overlayfs to get exclusive ownership on upper - * and work dirs among overlayfs mounts. - * - * I_CREATING New object's inode in the middle of setting up. - * - * I_DONTCACHE Evict inode as soon as it is not used anymore. - * - * I_SYNC_QUEUED Inode is queued in b_io or b_more_io writeback lists. - * Used to detect that mark_inode_dirty() should not move - * inode between dirty lists. - * - * I_PINNING_FSCACHE_WB Inode is pinning an fscache object for writeback. - * - * I_LRU_ISOLATING Inode is pinned being isolated from LRU without holding - * i_count. - * - * Q: What is the difference between I_WILL_FREE and I_FREEING? - * - * __I_{SYNC,NEW,LRU_ISOLATING} are used to derive unique addresses to wait - * upon. There's one free address left. - */ -#define __I_NEW 0 -#define I_NEW (1 << __I_NEW) -#define __I_SYNC 1 -#define I_SYNC (1 << __I_SYNC) -#define __I_LRU_ISOLATING 2 -#define I_LRU_ISOLATING (1 << __I_LRU_ISOLATING) - -#define I_DIRTY_SYNC (1 << 3) -#define I_DIRTY_DATASYNC (1 << 4) -#define I_DIRTY_PAGES (1 << 5) -#define I_WILL_FREE (1 << 6) -#define I_FREEING (1 << 7) -#define I_CLEAR (1 << 8) -#define I_REFERENCED (1 << 9) -#define I_LINKABLE (1 << 10) -#define I_DIRTY_TIME (1 << 11) -#define I_WB_SWITCH (1 << 12) -#define I_OVL_INUSE (1 << 13) -#define I_CREATING (1 << 14) -#define I_DONTCACHE (1 << 15) -#define I_SYNC_QUEUED (1 << 16) -#define I_PINNING_NETFS_WB (1 << 17) - -#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) -#define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES) -#define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME) - extern void __mark_inode_dirty(struct inode *, int); static inline void mark_inode_dirty(struct inode *inode) { -- cgit v1.2.3 From db2ab24a341ce89351a1bede37a96a3e3ce1726a Mon Sep 17 00:00:00 2001 From: Lauri Vasama Date: Wed, 27 Aug 2025 16:39:00 +0300 Subject: Add RWF_NOSIGNAL flag for pwritev2 For a user mode library to avoid generating SIGPIPE signals (e.g. because this behaviour is not portable across operating systems) is cumbersome. It is generally bad form to change the process-wide signal mask in a library, so a local solution is needed instead. For I/O performed directly using system calls (synchronous or readiness based asynchronous) this currently involves applying a thread-specific signal mask before the operation and reverting it afterwards. This can be avoided when it is known that the file descriptor refers to neither a pipe nor a socket, but a conservative implementation must always apply the mask. This incurs the cost of two additional system calls. In the case of sockets, the existing MSG_NOSIGNAL flag can be used with send. For asynchronous I/O performed using io_uring, currently the only option (apart from MSG_NOSIGNAL for sockets), is to mask SIGPIPE entirely in the call to io_uring_enter. Thankfully io_uring_enter takes a signal mask, so only a single syscall is needed. However, copying the signal mask on every call incurs a non-zero performance penalty. Furthermore, this mask applies to all completions, meaning that if the non-signaling behaviour is desired only for some subset of operations, the desired signals must be raised manually from user-mode depending on the completed operation. Add RWF_NOSIGNAL flag for pwritev2. This flag prevents the SIGPIPE signal from being raised when writing on disconnected pipes or sockets. The flag is handled directly by the pipe filesystem and converted to the existing MSG_NOSIGNAL flag for sockets. Signed-off-by: Lauri Vasama Link: https://lore.kernel.org/20250827133901.1820771-1-git@vasama.org Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- include/linux/fs.h | 1 + include/uapi/linux/fs.h | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 780e9c774c54..34693cae15a2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -356,6 +356,7 @@ struct readahead_control; #define IOCB_APPEND (__force int) RWF_APPEND #define IOCB_ATOMIC (__force int) RWF_ATOMIC #define IOCB_DONTCACHE (__force int) RWF_DONTCACHE +#define IOCB_NOSIGNAL (__force int) RWF_NOSIGNAL /* non-RWF related bits - start at 16 */ #define IOCB_EVENTFD (1 << 16) diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 0bd678a4a10e..beb4c2d1e41c 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -430,10 +430,13 @@ typedef int __bitwise __kernel_rwf_t; /* buffered IO that drops the cache after reading or writing data */ #define RWF_DONTCACHE ((__force __kernel_rwf_t)0x00000080) +/* prevent pipe and socket writes from raising SIGPIPE */ +#define RWF_NOSIGNAL ((__force __kernel_rwf_t)0x00000100) + /* mask of flags supported by the kernel */ #define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\ RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC |\ - RWF_DONTCACHE) + RWF_DONTCACHE | RWF_NOSIGNAL) #define PROCFS_IOCTL_MAGIC 'f' -- cgit v1.2.3 From df220cc5e689213c34a0eec7ef26d25f503c77ae Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 29 Aug 2025 08:25:11 -0700 Subject: lib/crypto: poly1305: Remove unused function poly1305_is_arch_optimized() poly1305_is_arch_optimized() is unused, so remove it. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250829152513.92459-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/crypto/poly1305.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h index e54abda8cfe9..d4daeec8da19 100644 --- a/include/crypto/poly1305.h +++ b/include/crypto/poly1305.h @@ -64,13 +64,4 @@ void poly1305_update(struct poly1305_desc_ctx *desc, const u8 *src, unsigned int nbytes); void poly1305_final(struct poly1305_desc_ctx *desc, u8 *digest); -#if IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305) -bool poly1305_is_arch_optimized(void); -#else -static inline bool poly1305_is_arch_optimized(void) -{ - return false; -} -#endif - #endif -- cgit v1.2.3 From b646b782e522da3509e61f971e5502fccb3a3723 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 29 Aug 2025 08:25:12 -0700 Subject: lib/crypto: poly1305: Consolidate into single module Consolidate the Poly1305 code into a single module, similar to various other algorithms (SHA-1, SHA-256, SHA-512, etc.): - Each arch now provides a header file lib/crypto/$(SRCARCH)/poly1305.h, replacing lib/crypto/$(SRCARCH)/poly1305*.c. The header defines poly1305_block_init(), poly1305_blocks(), poly1305_emit(), and optionally poly1305_mod_init_arch(). It is included by lib/crypto/poly1305.c, and thus the code gets built into the single libpoly1305 module, with improved inlining in some cases. - Whether arch-optimized Poly1305 is buildable is now controlled centrally by lib/crypto/Kconfig instead of by lib/crypto/$(SRCARCH)/Kconfig. The conditions for enabling it remain the same as before, and it remains enabled by default. (The PPC64 one remains unconditionally disabled due to 'depends on BROKEN'.) - Any additional arch-specific translation units for the optimized Poly1305 code, such as assembly files, are now compiled by lib/crypto/Makefile instead of lib/crypto/$(SRCARCH)/Makefile. A special consideration is needed because the Adiantum code uses the poly1305_core_*() functions directly. For now, just carry forward that approach. This means retaining the CRYPTO_LIB_POLY1305_GENERIC kconfig symbol, and keeping the poly1305_core_*() functions in separate translation units. So it's not quite as streamlined I've done with the other hash functions, but we still get a single libpoly1305 module. Note: to see the diff from the arm, arm64, and x86 .c files to the new .h files, view this commit with 'git show -M10'. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250829152513.92459-3-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/crypto/internal/poly1305.h | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/crypto/internal/poly1305.h b/include/crypto/internal/poly1305.h index c60315f47562..a72fff409ab8 100644 --- a/include/crypto/internal/poly1305.h +++ b/include/crypto/internal/poly1305.h @@ -30,12 +30,13 @@ void poly1305_core_blocks(struct poly1305_state *state, void poly1305_core_emit(const struct poly1305_state *state, const u32 nonce[4], void *dst); -void poly1305_block_init_arch(struct poly1305_block_state *state, - const u8 raw_key[POLY1305_BLOCK_SIZE]); -void poly1305_block_init_generic(struct poly1305_block_state *state, - const u8 raw_key[POLY1305_BLOCK_SIZE]); -void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, - unsigned int len, u32 padbit); +static inline void +poly1305_block_init_generic(struct poly1305_block_state *desc, + const u8 raw_key[POLY1305_BLOCK_SIZE]) +{ + poly1305_core_init(&desc->h); + poly1305_core_setkey(&desc->core_r, raw_key); +} static inline void poly1305_blocks_generic(struct poly1305_block_state *state, const u8 *src, unsigned int len, @@ -45,9 +46,6 @@ static inline void poly1305_blocks_generic(struct poly1305_block_state *state, len / POLY1305_BLOCK_SIZE, padbit); } -void poly1305_emit_arch(const struct poly1305_state *state, - u8 digest[POLY1305_DIGEST_SIZE], const u32 nonce[4]); - static inline void poly1305_emit_generic(const struct poly1305_state *state, u8 digest[POLY1305_DIGEST_SIZE], const u32 nonce[4]) -- cgit v1.2.3 From c4b846ff6ecab0427cc7dcccbe0af60b244a6d56 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 27 Aug 2025 08:11:22 -0700 Subject: lib/crypto: chacha: Remove unused function chacha_is_arch_optimized() chacha_is_arch_optimized() is no longer used, so remove it. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250827151131.27733-4-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/crypto/chacha.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h index 91f6b4cf561c..be25a0b65a05 100644 --- a/include/crypto/chacha.h +++ b/include/crypto/chacha.h @@ -119,13 +119,4 @@ static inline void chacha_zeroize_state(struct chacha_state *state) memzero_explicit(state, sizeof(*state)); } -#if IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA) -bool chacha_is_arch_optimized(void); -#else -static inline bool chacha_is_arch_optimized(void) -{ - return false; -} -#endif - #endif /* _CRYPTO_CHACHA_H */ -- cgit v1.2.3 From 13cecc526d8fe7eeb9b136159738688a1a10cd82 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 27 Aug 2025 08:11:25 -0700 Subject: lib/crypto: chacha: Consolidate into single module Consolidate the ChaCha code into a single module (excluding chacha-block-generic.c which remains always built-in for random.c), similar to various other algorithms: - Each arch now provides a header file lib/crypto/$(SRCARCH)/chacha.h, replacing lib/crypto/$(SRCARCH)/chacha*.c. The header defines chacha_crypt_arch() and hchacha_block_arch(). It is included by lib/crypto/chacha.c, and thus the code gets built into the single libchacha module, with improved inlining in some cases. - Whether arch-optimized ChaCha is buildable is now controlled centrally by lib/crypto/Kconfig instead of by lib/crypto/$(SRCARCH)/Kconfig. The conditions for enabling it remain the same as before, and it remains enabled by default. - Any additional arch-specific translation units for the optimized ChaCha code, such as assembly files, are now compiled by lib/crypto/Makefile instead of lib/crypto/$(SRCARCH)/Makefile. This removes the last use for the Makefile and Kconfig files in the arm64, mips, powerpc, riscv, and s390 subdirectories of lib/crypto/. So also remove those files and the references to them. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250827151131.27733-7-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/crypto/chacha.h | 28 ++++------------------------ 1 file changed, 4 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h index be25a0b65a05..38e26dff27b0 100644 --- a/include/crypto/chacha.h +++ b/include/crypto/chacha.h @@ -45,19 +45,11 @@ static inline void chacha20_block(struct chacha_state *state, chacha_block_generic(state, out, 20); } -void hchacha_block_arch(const struct chacha_state *state, - u32 out[HCHACHA_OUT_WORDS], int nrounds); void hchacha_block_generic(const struct chacha_state *state, u32 out[HCHACHA_OUT_WORDS], int nrounds); -static inline void hchacha_block(const struct chacha_state *state, - u32 out[HCHACHA_OUT_WORDS], int nrounds) -{ - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA)) - hchacha_block_arch(state, out, nrounds); - else - hchacha_block_generic(state, out, nrounds); -} +void hchacha_block(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds); enum chacha_constants { /* expand 32-byte k */ CHACHA_CONSTANT_EXPA = 0x61707865U, @@ -93,20 +85,8 @@ static inline void chacha_init(struct chacha_state *state, state->x[15] = get_unaligned_le32(iv + 12); } -void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, - unsigned int bytes, int nrounds); -void chacha_crypt_generic(struct chacha_state *state, u8 *dst, const u8 *src, - unsigned int bytes, int nrounds); - -static inline void chacha_crypt(struct chacha_state *state, - u8 *dst, const u8 *src, - unsigned int bytes, int nrounds) -{ - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA)) - chacha_crypt_arch(state, dst, src, bytes, nrounds); - else - chacha_crypt_generic(state, dst, src, bytes, nrounds); -} +void chacha_crypt(struct chacha_state *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds); static inline void chacha20_crypt(struct chacha_state *state, u8 *dst, const u8 *src, unsigned int bytes) -- cgit v1.2.3 From 126f5d90f6c855b39eebec17f93c2f9d2ce01ebb Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 27 Aug 2025 08:11:27 -0700 Subject: lib/crypto: blake2s: Remove obsolete self-test Remove the original BLAKE2s self-test, since it will be superseded by blake2s_kunit. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250827151131.27733-9-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/crypto/internal/blake2s.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/crypto/internal/blake2s.h b/include/crypto/internal/blake2s.h index 506d56530ca9..3b82572c9433 100644 --- a/include/crypto/internal/blake2s.h +++ b/include/crypto/internal/blake2s.h @@ -16,6 +16,4 @@ void blake2s_compress_generic(struct blake2s_state *state, const u8 *block, void blake2s_compress(struct blake2s_state *state, const u8 *block, size_t nblocks, const u32 inc); -bool blake2s_selftest(void); - #endif /* _CRYPTO_INTERNAL_BLAKE2S_H */ -- cgit v1.2.3 From 39ee3970f26d55b57343da392d45117d7f893205 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 27 Aug 2025 08:11:30 -0700 Subject: lib/crypto: blake2s: Consolidate into single C translation unit As was done with the other algorithms, reorganize the BLAKE2s code so that the generic implementation and the arch-specific "glue" code is consolidated into a single translation unit, so that the compiler will inline the functions and automatically decide whether to include the generic code in the resulting binary or not. Similarly, also consolidate the build rules into lib/crypto/{Makefile,Kconfig}. This removes the last uses of lib/crypto/{arm,x86}/{Makefile,Kconfig}, so remove those too. Don't keep the !KMSAN dependency. It was needed only for other algorithms such as ChaCha that initialize memory from assembly code. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250827151131.27733-12-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/crypto/internal/blake2s.h | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 include/crypto/internal/blake2s.h (limited to 'include') diff --git a/include/crypto/internal/blake2s.h b/include/crypto/internal/blake2s.h deleted file mode 100644 index 3b82572c9433..000000000000 --- a/include/crypto/internal/blake2s.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 OR MIT */ -/* - * Helper functions for BLAKE2s implementations. - * Keep this in sync with the corresponding BLAKE2b header. - */ - -#ifndef _CRYPTO_INTERNAL_BLAKE2S_H -#define _CRYPTO_INTERNAL_BLAKE2S_H - -#include -#include - -void blake2s_compress_generic(struct blake2s_state *state, const u8 *block, - size_t nblocks, const u32 inc); - -void blake2s_compress(struct blake2s_state *state, const u8 *block, - size_t nblocks, const u32 inc); - -#endif /* _CRYPTO_INTERNAL_BLAKE2S_H */ -- cgit v1.2.3 From 3d439e1ec3368fae17db379354bd7a9e568ca0ab Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Fri, 29 Aug 2025 18:11:01 +0300 Subject: ASoC: sof: ipc4-topology: Add support to sched_domain attribute MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add SOF_TKN_COMP_SCHED_DOMAIN and connect it to struct snd_sof_widget comp_domain member, with new get_token_comp_domain() function. The logic is such that if the topology attribute is not present in the widget node the corresponding IPC4 extension value is taken from the module's manifest like before. But if the attribute is found and recognized its value overrides what is there in the manifest. Signed-off-by: Jyri Sarha Reviewed-by: Péter Ujfalusi Reviewed-by: Ranjani Sridharan Signed-off-by: Peter Ujfalusi Message-ID: <20250829151101.27327-1-peter.ujfalusi@linux.intel.com> Signed-off-by: Mark Brown --- include/uapi/sound/sof/tokens.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/sound/sof/tokens.h b/include/uapi/sound/sof/tokens.h index c28c766270de..9ce72fbd6f11 100644 --- a/include/uapi/sound/sof/tokens.h +++ b/include/uapi/sound/sof/tokens.h @@ -106,6 +106,8 @@ */ #define SOF_TKN_COMP_NO_WNAME_IN_KCONTROL_NAME 417 +#define SOF_TKN_COMP_SCHED_DOMAIN 418 + /* SSP */ #define SOF_TKN_INTEL_SSP_CLKS_CONTROL 500 #define SOF_TKN_INTEL_SSP_MCLK_ID 501 -- cgit v1.2.3 From 72cdc67e7fa74931b055df3a76852bab551f1a04 Mon Sep 17 00:00:00 2001 From: Qingfang Deng Date: Thu, 28 Aug 2025 09:20:16 +0800 Subject: pppoe: remove rwlock usage Like ppp_generic.c, convert the PPPoE socket hash table to use RCU for lookups and a spinlock for updates. This removes rwlock usage and allows lockless readers on the fast path. - Mark hash table and list pointers as __rcu. - Use spin_lock() to protect writers. - Readers use rcu_dereference() under rcu_read_lock(). All known callers of get_item() already hold the RCU read lock, so no additional locking is needed. - get_item() now uses refcount_inc_not_zero() instead of sock_hold() to safely take a reference. This prevents crashes if a socket is already in the process of being freed (sk_refcnt == 0). - Set SOCK_RCU_FREE to defer socket freeing until after an RCU grace period. - Move skb_queue_purge() into sk_destruct callback to ensure purge happens after an RCU grace period. Signed-off-by: Qingfang Deng Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250828012018.15922-1-dqfext@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/if_pppox.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index ff3beda1312c..db45d6f1c4f4 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -43,7 +43,7 @@ struct pppox_sock { /* struct sock must be the first member of pppox_sock */ struct sock sk; struct ppp_channel chan; - struct pppox_sock *next; /* for hash table */ + struct pppox_sock __rcu *next; /* for hash table */ union { struct pppoe_opt pppoe; struct pptp_opt pptp; -- cgit v1.2.3 From 9529320ad64e614cfaf96e6b8e3d8c0a1245160c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 28 Aug 2025 10:27:37 +0000 Subject: inet_diag: change inet_diag_bc_sk() first argument We want to have access to the inet_diag_dump_data structure in the following patch. This patch removes duplication in callers. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250828102738.2065992-5-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/inet_diag.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 30bf8f7ea62b..86a0641ec36e 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -46,7 +46,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, const struct inet_diag_req_v2 *req, u16 nlmsg_flags, bool net_admin); -int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk); +int inet_diag_bc_sk(const struct inet_diag_dump_data *cb_data, struct sock *sk); void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk); -- cgit v1.2.3 From 95fa78830e5b2eb2041174c7f9549c746e003dd6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 28 Aug 2025 10:27:38 +0000 Subject: inet_diag: avoid cache line misses in inet_diag_bc_sk() inet_diag_bc_sk() pulls five cache lines per socket, while most filters only need the two first ones. Add three booleans to struct inet_diag_dump_data, that are selectively set if a filter needs specific socket fields. - mark_needed /* INET_DIAG_BC_MARK_COND present. */ - cgroup_needed /* INET_DIAG_BC_CGROUP_COND present. */ - userlocks_needed /* INET_DIAG_BC_AUTO present. */ This removes millions of cache lines misses per ss invocation when simple filters are specified on busy servers. offsetof(struct sock, sk_userlocks) = 0xf3 offsetof(struct sock, sk_mark) = 0x20c offsetof(struct sock, sk_cgrp_data) = 0x298 Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250828102738.2065992-6-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/inet_diag.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 86a0641ec36e..704fd415c2b4 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -38,6 +38,11 @@ struct inet_diag_dump_data { #define inet_diag_nla_bpf_stgs req_nlas[INET_DIAG_REQ_SK_BPF_STORAGES] struct bpf_sk_storage_diag *bpf_stg_diag; + bool mark_needed; /* INET_DIAG_BC_MARK_COND present. */ +#ifdef CONFIG_SOCK_CGROUP_DATA + bool cgroup_needed; /* INET_DIAG_BC_CGROUP_COND present. */ +#endif + bool userlocks_needed; /* INET_DIAG_BC_AUTO present. */ }; struct inet_connection_sock; -- cgit v1.2.3 From caedcc5b6df1b2e2b5f39079e3369c1d4d5c5f50 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 28 Aug 2025 19:58:16 +0000 Subject: net: dst: introduce dst->dev_rcu Followup of commit 88fe14253e18 ("net: dst: add four helpers to annotate data-races around dst->dev"). We want to gradually add explicit RCU protection to dst->dev, including lockdep support. Add an union to alias dst->dev_rcu and dst->dev. Add dst_dev_net_rcu() helper. Fixes: 4a6ce2b6f2ec ("net: introduce a new function dst_dev_put()") Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Link: https://patch.msgid.link/20250828195823.3958522-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/dst.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index bab01363bb97..f8aa1239b4db 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -24,7 +24,10 @@ struct sk_buff; struct dst_entry { - struct net_device *dev; + union { + struct net_device *dev; + struct net_device __rcu *dev_rcu; + }; struct dst_ops *ops; unsigned long _metrics; unsigned long expires; @@ -570,9 +573,12 @@ static inline struct net_device *dst_dev(const struct dst_entry *dst) static inline struct net_device *dst_dev_rcu(const struct dst_entry *dst) { - /* In the future, use rcu_dereference(dst->dev) */ - WARN_ON_ONCE(!rcu_read_lock_held()); - return READ_ONCE(dst->dev); + return rcu_dereference(dst->dev_rcu); +} + +static inline struct net *dst_dev_net_rcu(const struct dst_entry *dst) +{ + return dev_net_rcu(dst_dev_rcu(dst)); } static inline struct net_device *skb_dst_dev(const struct sk_buff *skb) @@ -592,7 +598,7 @@ static inline struct net *skb_dst_dev_net(const struct sk_buff *skb) static inline struct net *skb_dst_dev_net_rcu(const struct sk_buff *skb) { - return dev_net_rcu(skb_dst_dev(skb)); + return dev_net_rcu(skb_dst_dev_rcu(skb)); } struct dst_entry *dst_blackhole_check(struct dst_entry *dst, u32 cookie); -- cgit v1.2.3 From 99a2ace61b211b0be861b07fbaa062fca4b58879 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 28 Aug 2025 19:58:20 +0000 Subject: net: use dst_dev_rcu() in sk_setup_caps() Use RCU to protect accesses to dst->dev from sk_setup_caps() and sk_dst_gso_max_size(). Also use dst_dev_rcu() in ip6_dst_mtu_maybe_forward(), and ip_dst_mtu_maybe_forward(). ip4_dst_hoplimit() can use dst_dev_net_rcu(). Fixes: 4a6ce2b6f2ec ("net: introduce a new function dst_dev_put()") Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Link: https://patch.msgid.link/20250828195823.3958522-6-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/ip.h | 6 ++++-- include/net/ip6_route.h | 2 +- include/net/route.h | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index befcba575129..6dbd2bf8fa9c 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -467,12 +467,14 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, bool forwarding) { const struct rtable *rt = dst_rtable(dst); + const struct net_device *dev; unsigned int mtu, res; struct net *net; rcu_read_lock(); - net = dev_net_rcu(dst_dev(dst)); + dev = dst_dev_rcu(dst); + net = dev_net_rcu(dev); if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) || ip_mtu_locked(dst) || !forwarding) { @@ -486,7 +488,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, if (mtu) goto out; - mtu = READ_ONCE(dst_dev(dst)->mtu); + mtu = READ_ONCE(dev->mtu); if (unlikely(ip_mtu_locked(dst))) { if (rt->rt_uses_gateway && mtu > 576) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 9255f21818ee..59f48ca3abdf 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -337,7 +337,7 @@ static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst mtu = IPV6_MIN_MTU; rcu_read_lock(); - idev = __in6_dev_get(dst_dev(dst)); + idev = __in6_dev_get(dst_dev_rcu(dst)); if (idev) mtu = READ_ONCE(idev->cnf.mtu6); rcu_read_unlock(); diff --git a/include/net/route.h b/include/net/route.h index c71998f464f8..f90106f383c5 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -390,7 +390,7 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst) const struct net *net; rcu_read_lock(); - net = dev_net_rcu(dst_dev(dst)); + net = dst_dev_net_rcu(dst); hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl); rcu_read_unlock(); } -- cgit v1.2.3 From a59076f2669ec23a122549e1f4114e8d4255b632 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Sat, 16 Aug 2025 10:28:57 -0700 Subject: lsm: security_lsmblob_to_secctx module selection Add a parameter lsmid to security_lsmblob_to_secctx() to identify which of the security modules that may be active should provide the security context. If the value of lsmid is LSM_ID_UNDEF the first LSM providing a hook is used. security_secid_to_secctx() is unchanged, and will always report the first LSM providing a hook. Signed-off-by: Casey Schaufler [PM: subj tweak] Signed-off-by: Paul Moore --- include/linux/security.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/security.h b/include/linux/security.h index 521bcb5b9717..6d1ed6e7387b 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -567,7 +567,8 @@ int security_getprocattr(struct task_struct *p, int lsmid, const char *name, int security_setprocattr(int lsmid, const char *name, void *value, size_t size); int security_ismaclabel(const char *name); int security_secid_to_secctx(u32 secid, struct lsm_context *cp); -int security_lsmprop_to_secctx(struct lsm_prop *prop, struct lsm_context *cp); +int security_lsmprop_to_secctx(struct lsm_prop *prop, struct lsm_context *cp, + int lsmid); int security_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid); void security_release_secctx(struct lsm_context *cp); void security_inode_invalidate_secctx(struct inode *inode); @@ -1551,7 +1552,8 @@ static inline int security_secid_to_secctx(u32 secid, struct lsm_context *cp) } static inline int security_lsmprop_to_secctx(struct lsm_prop *prop, - struct lsm_context *cp) + struct lsm_context *cp, + int lsmid) { return -EOPNOTSUPP; } -- cgit v1.2.3 From eb59d494eebd4c5414728a35cdea6a0ba78ff26e Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Sat, 16 Aug 2025 10:28:58 -0700 Subject: audit: add record for multiple task security contexts Replace the single skb pointer in an audit_buffer with a list of skb pointers. Add the audit_stamp information to the audit_buffer as there's no guarantee that there will be an audit_context containing the stamp associated with the event. At audit_log_end() time create auxiliary records as have been added to the list. Functions are created to manage the skb list in the audit_buffer. Create a new audit record AUDIT_MAC_TASK_CONTEXTS. An example of the MAC_TASK_CONTEXTS record is: type=MAC_TASK_CONTEXTS msg=audit(1600880931.832:113) subj_apparmor=unconfined subj_smack=_ When an audit event includes a AUDIT_MAC_TASK_CONTEXTS record the "subj=" field in other records in the event will be "subj=?". An AUDIT_MAC_TASK_CONTEXTS record is supplied when the system has multiple security modules that may make access decisions based on a subject security context. Refactor audit_log_task_context(), creating a new audit_log_subj_ctx(). This is used in netlabel auditing to provide multiple subject security contexts as necessary. Suggested-by: Paul Moore Signed-off-by: Casey Schaufler [PM: subj tweak, audit example readability indents] Signed-off-by: Paul Moore --- include/linux/audit.h | 16 ++++++++++++++++ include/uapi/linux/audit.h | 1 + 2 files changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/audit.h b/include/linux/audit.h index e3f06eba9c6e..a1f068bcb3a0 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -37,6 +37,8 @@ struct audit_watch; struct audit_tree; struct sk_buff; struct kern_ipc_perm; +struct lsm_id; +struct lsm_prop; struct audit_krule { u32 pflags; @@ -147,6 +149,9 @@ extern unsigned compat_signal_class[]; #define AUDIT_TTY_ENABLE BIT(0) #define AUDIT_TTY_LOG_PASSWD BIT(1) +/* bit values for audit_cfg_lsm */ +#define AUDIT_CFG_LSM_SECCTX_SUBJECT BIT(0) + struct filename; #define AUDIT_OFF 0 @@ -185,6 +190,7 @@ extern void audit_log_path_denied(int type, const char *operation); extern void audit_log_lost(const char *message); +extern int audit_log_subj_ctx(struct audit_buffer *ab, struct lsm_prop *prop); extern int audit_log_task_context(struct audit_buffer *ab); extern void audit_log_task_info(struct audit_buffer *ab); @@ -210,6 +216,8 @@ extern u32 audit_enabled; extern int audit_signal_info(int sig, struct task_struct *t); +extern void audit_cfg_lsm(const struct lsm_id *lsmid, int flags); + #else /* CONFIG_AUDIT */ static inline __printf(4, 5) void audit_log(struct audit_context *ctx, gfp_t gfp_mask, int type, @@ -245,6 +253,11 @@ static inline void audit_log_key(struct audit_buffer *ab, char *key) { } static inline void audit_log_path_denied(int type, const char *operation) { } +static inline int audit_log_subj_ctx(struct audit_buffer *ab, + struct lsm_prop *prop) +{ + return 0; +} static inline int audit_log_task_context(struct audit_buffer *ab) { return 0; @@ -269,6 +282,9 @@ static inline int audit_signal_info(int sig, struct task_struct *t) return 0; } +static inline void audit_cfg_lsm(const struct lsm_id *lsmid, int flags) +{ } + #endif /* CONFIG_AUDIT */ #ifdef CONFIG_AUDIT_COMPAT_GENERIC diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 9a4ecc9f6dc5..8cad2f307719 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -148,6 +148,7 @@ #define AUDIT_IPE_POLICY_LOAD 1422 /* IPE policy load */ #define AUDIT_LANDLOCK_ACCESS 1423 /* Landlock denial */ #define AUDIT_LANDLOCK_DOMAIN 1424 /* Landlock domain status */ +#define AUDIT_MAC_TASK_CONTEXTS 1425 /* Multiple LSM task contexts */ #define AUDIT_FIRST_KERN_ANOM_MSG 1700 #define AUDIT_LAST_KERN_ANOM_MSG 1799 -- cgit v1.2.3 From 0ffbc876d03c80b83d70aeefac7bbb94a9f4e135 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Sat, 16 Aug 2025 10:28:59 -0700 Subject: audit: add record for multiple object contexts Create a new audit record AUDIT_MAC_OBJ_CONTEXTS. An example of the MAC_OBJ_CONTEXTS record is: type=MAC_OBJ_CONTEXTS msg=audit(1601152467.009:1050): obj_selinux=unconfined_u:object_r:user_home_t:s0 When an audit event includes a AUDIT_MAC_OBJ_CONTEXTS record the "obj=" field in other records in the event will be "obj=?". An AUDIT_MAC_OBJ_CONTEXTS record is supplied when the system has multiple security modules that may make access decisions based on an object security context. Signed-off-by: Casey Schaufler [PM: subj tweak, audit example readability indents] Signed-off-by: Paul Moore --- include/linux/audit.h | 7 +++++++ include/uapi/linux/audit.h | 1 + 2 files changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/audit.h b/include/linux/audit.h index a1f068bcb3a0..536f8ee8da81 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -151,6 +151,7 @@ extern unsigned compat_signal_class[]; /* bit values for audit_cfg_lsm */ #define AUDIT_CFG_LSM_SECCTX_SUBJECT BIT(0) +#define AUDIT_CFG_LSM_SECCTX_OBJECT BIT(1) struct filename; @@ -191,6 +192,7 @@ extern void audit_log_path_denied(int type, extern void audit_log_lost(const char *message); extern int audit_log_subj_ctx(struct audit_buffer *ab, struct lsm_prop *prop); +extern int audit_log_obj_ctx(struct audit_buffer *ab, struct lsm_prop *prop); extern int audit_log_task_context(struct audit_buffer *ab); extern void audit_log_task_info(struct audit_buffer *ab); @@ -258,6 +260,11 @@ static inline int audit_log_subj_ctx(struct audit_buffer *ab, { return 0; } +static inline int audit_log_obj_ctx(struct audit_buffer *ab, + struct lsm_prop *prop) +{ + return 0; +} static inline int audit_log_task_context(struct audit_buffer *ab) { return 0; diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 8cad2f307719..14a1c1fe013a 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -149,6 +149,7 @@ #define AUDIT_LANDLOCK_ACCESS 1423 /* Landlock denial */ #define AUDIT_LANDLOCK_DOMAIN 1424 /* Landlock domain status */ #define AUDIT_MAC_TASK_CONTEXTS 1425 /* Multiple LSM task contexts */ +#define AUDIT_MAC_OBJ_CONTEXTS 1426 /* Multiple LSM objext contexts */ #define AUDIT_FIRST_KERN_ANOM_MSG 1700 #define AUDIT_LAST_KERN_ANOM_MSG 1799 -- cgit v1.2.3 From 44b6169ada7fe3cf4cb91e6b06019eaa22719f28 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 27 Aug 2025 08:10:53 +0200 Subject: scsi: fc: Avoid -Wflex-array-member-not-at-end warnings -Wflex-array-member-not-at-end has been introduced in GCC-14, and we are getting ready to enable it, globally. So, in order to avoid ending up with a flexible-array member in the middle of multiple other structs, we use the '__struct_group()' helper to create a new tagged 'struct fc_df_desc_fpin_reg_hdr'. This structure groups together all the members of the flexible 'struct fc_df_desc_fpin_reg' except the flexible array. As a result, the array is effectively separated from the rest of the members without modifying the memory layout of the flexible structure. We then change the type of the middle struct members currently causing trouble from 'struct fc_df_desc_fpin_reg' to 'struct fc_df_desc_fpin_reg_hdr'. We also want to ensure that in case new members need to be added to the flexible structure, they are always included within the newly created tagged struct. For this, we use '_Static_assert()'. This ensures that the memory layout for both the flexible structure and the new tagged struct is the same after any changes. This approach avoids having to implement 'struct fc_df_desc_fpin_reg_hdr' as a completely separate structure, thus preventing having to maintain two independent but basically identical structures, closing the door to potential bugs in the future. The above is also done for flexible structures 'struct fc_els_rdf' and 'struct fc_els_rdf_resp' So, with these changes, fix the following warnings: drivers/scsi/lpfc/lpfc_hw4.h:4936:41: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] drivers/scsi/lpfc/lpfc_hw4.h:4942:41: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] drivers/scsi/lpfc/lpfc_hw4.h:4947:41: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/aK6hbQLyQlvlySf8@kspp Reviewed-by: Hannes Reinecke Reviewed-by: Justin Tee Signed-off-by: Martin K. Petersen --- include/uapi/scsi/fc/fc_els.h | 58 +++++++++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/uapi/scsi/fc/fc_els.h b/include/uapi/scsi/fc/fc_els.h index 16782c360de3..019096beb179 100644 --- a/include/uapi/scsi/fc/fc_els.h +++ b/include/uapi/scsi/fc/fc_els.h @@ -11,6 +11,12 @@ #include #include +#ifdef __KERNEL__ +#include /* for offsetof */ +#else +#include /* for offsetof */ +#endif + /* * Fibre Channel Switch - Enhanced Link Services definitions. * From T11 FC-LS Rev 1.2 June 7, 2005. @@ -1109,12 +1115,15 @@ struct fc_els_fpin { /* Diagnostic Function Descriptor - FPIN Registration */ struct fc_df_desc_fpin_reg { - __be32 desc_tag; /* FPIN Registration (0x00030001) */ - __be32 desc_len; /* Length of Descriptor (in bytes). - * Size of descriptor excluding - * desc_tag and desc_len fields. - */ - __be32 count; /* Number of desc_tags elements */ + /* New members MUST be added within the __struct_group() macro below. */ + __struct_group(fc_df_desc_fpin_reg_hdr, __hdr, /* no attrs */, + __be32 desc_tag; /* FPIN Registration (0x00030001) */ + __be32 desc_len; /* Length of Descriptor (in bytes). + * Size of descriptor excluding + * desc_tag and desc_len fields. + */ + __be32 count; /* Number of desc_tags elements */ + ); __be32 desc_tags[]; /* Array of Descriptor Tags. * Each tag indicates a function * supported by the N_Port (request) @@ -1124,33 +1133,44 @@ struct fc_df_desc_fpin_reg { * See ELS_FN_DTAG_xxx for tag values. */ }; +_Static_assert(offsetof(struct fc_df_desc_fpin_reg, desc_tags) == sizeof(struct fc_df_desc_fpin_reg_hdr), + "struct member likely outside of __struct_group()"); /* * ELS_RDF - Register Diagnostic Functions */ struct fc_els_rdf { - __u8 fpin_cmd; /* command (0x19) */ - __u8 fpin_zero[3]; /* specified as zero - part of cmd */ - __be32 desc_len; /* Length of Descriptor List (in bytes). - * Size of ELS excluding fpin_cmd, - * fpin_zero and desc_len fields. - */ + /* New members MUST be added within the __struct_group() macro below. */ + __struct_group(fc_els_rdf_hdr, __hdr, /* no attrs */, + __u8 fpin_cmd; /* command (0x19) */ + __u8 fpin_zero[3]; /* specified as zero - part of cmd */ + __be32 desc_len; /* Length of Descriptor List (in bytes). + * Size of ELS excluding fpin_cmd, + * fpin_zero and desc_len fields. + */ + ); struct fc_tlv_desc desc[]; /* Descriptor list */ }; +_Static_assert(offsetof(struct fc_els_rdf, desc) == sizeof(struct fc_els_rdf_hdr), + "struct member likely outside of __struct_group()"); /* * ELS RDF LS_ACC Response. */ struct fc_els_rdf_resp { - struct fc_els_ls_acc acc_hdr; - __be32 desc_list_len; /* Length of response (in - * bytes). Excludes acc_hdr - * and desc_list_len fields. - */ - struct fc_els_lsri_desc lsri; + /* New members MUST be added within the __struct_group() macro below. */ + __struct_group(fc_els_rdf_resp_hdr, __hdr, /* no attrs */, + struct fc_els_ls_acc acc_hdr; + __be32 desc_list_len; /* Length of response (in + * bytes). Excludes acc_hdr + * and desc_list_len fields. + */ + struct fc_els_lsri_desc lsri; + ); struct fc_tlv_desc desc[]; /* Supported Descriptor list */ }; - +_Static_assert(offsetof(struct fc_els_rdf_resp, desc) == sizeof(struct fc_els_rdf_resp_hdr), + "struct member likely outside of __struct_group()"); /* * Diagnostic Capability Descriptors for EDC ELS -- cgit v1.2.3 From b620462bba6655b47d127db70d18123c7af522d4 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 29 Aug 2025 08:38:16 -0700 Subject: scsi: ufs: core: Move the tracing enumeration types into a new file The header file defines constants and data structures related to the UFS standard. Move the enumeration types related to tracing into a new header file because these are not defined in the UFS standard. An intended side effect of this patch is that the tracing enumeration types are no longer visible to UFS host drivers. Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20250829153841.2201700-1-bvanassche@acm.org Reviewed-by: Avri Altman Signed-off-by: Martin K. Petersen --- include/ufs/ufs.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include') diff --git a/include/ufs/ufs.h b/include/ufs/ufs.h index 72fd385037a6..245a6a829ce9 100644 --- a/include/ufs/ufs.h +++ b/include/ufs/ufs.h @@ -653,21 +653,4 @@ struct ufs_dev_info { bool hid_sup; }; -/* - * This enum is used in string mapping in ufs_trace.h. - */ -enum ufs_trace_str_t { - UFS_CMD_SEND, UFS_CMD_COMP, UFS_DEV_COMP, - UFS_QUERY_SEND, UFS_QUERY_COMP, UFS_QUERY_ERR, - UFS_TM_SEND, UFS_TM_COMP, UFS_TM_ERR -}; - -/* - * Transaction Specific Fields (TSF) type in the UPIU package, this enum is - * used in ufs_trace.h for UFS command trace. - */ -enum ufs_trace_tsf_t { - UFS_TSF_CDB, UFS_TSF_OSF, UFS_TSF_TM_INPUT, UFS_TSF_TM_OUTPUT -}; - #endif /* End of Header */ -- cgit v1.2.3 From 6234d0df236ab1a71c6bd75e4f5fa15339d5272b Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Fri, 22 Aug 2025 03:27:21 +0300 Subject: media: v4l2-common: Constify media_pad argument to v4l2_get_link_freq() The v4l2_get_link_freq() macro doesn't modify the pad argument. Make it possible to call it with a const media_pad pointer. Link: https://lore.kernel.org/r/20250822002734.23516-2-laurent.pinchart@ideasonboard.com Signed-off-by: Laurent Pinchart Reviewed-by: Frank Li Acked-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/media/v4l2-common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/media/v4l2-common.h b/include/media/v4l2-common.h index e31b4434ea5d..d8e23991a656 100644 --- a/include/media/v4l2-common.h +++ b/include/media/v4l2-common.h @@ -579,7 +579,7 @@ int v4l2_fill_pixfmt_mp(struct v4l2_pix_format_mplane *pixfmt, u32 pixelformat, * * %-EINVAL: Invalid link frequency value */ #ifdef CONFIG_MEDIA_CONTROLLER -s64 v4l2_get_link_freq(struct media_pad *pad, unsigned int mul, +s64 v4l2_get_link_freq(const struct media_pad *pad, unsigned int mul, unsigned int div); #endif -- cgit v1.2.3 From 76f1e2ee545b3165e1e24293b59414699118266a Mon Sep 17 00:00:00 2001 From: Denzeel Oliva Date: Sat, 30 Aug 2025 16:28:41 +0000 Subject: dt-bindings: clock: exynos990: Extend clocks IDs Add missing clock definitions for DPU and CMUREF. Acked-by: Rob Herring (Arm) Signed-off-by: Denzeel Oliva Link: https://lore.kernel.org/r/20250830-fix-cmu-top-v5-4-7c62f608309e@gmail.com Signed-off-by: Krzysztof Kozlowski --- include/dt-bindings/clock/samsung,exynos990.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/samsung,exynos990.h b/include/dt-bindings/clock/samsung,exynos990.h index 6b9df09d2822..c5c79e078f2f 100644 --- a/include/dt-bindings/clock/samsung,exynos990.h +++ b/include/dt-bindings/clock/samsung,exynos990.h @@ -208,6 +208,10 @@ #define CLK_GOUT_CMU_SSP_BUS 197 #define CLK_GOUT_CMU_TNR_BUS 198 #define CLK_GOUT_CMU_VRA_BUS 199 +#define CLK_MOUT_CMU_CMUREF 200 +#define CLK_MOUT_CMU_DPU_BUS 201 +#define CLK_MOUT_CMU_CLK_CMUREF 202 +#define CLK_DOUT_CMU_CLK_CMUREF 203 /* CMU_HSI0 */ #define CLK_MOUT_HSI0_BUS_USER 1 -- cgit v1.2.3 From 91f98de42310c70f9a23595b3b20aa305717d955 Mon Sep 17 00:00:00 2001 From: Hakyeong Kim Date: Mon, 25 Aug 2025 17:14:27 +0530 Subject: dt-bindings: clock: Add ARTPEC-8 clock controller Add dt-schema for Axis ARTPEC-8 SoC clock controller. The Clock Management Unit (CMU) has a top-level block CMU_CMU which generates clocks for other blocks. Add device-tree binding definitions for following CMU blocks: - CMU_CMU - CMU_BUS - CMU_CORE - CMU_CPUCL - CMU_FSYS - CMU_IMEM - CMU_PERI Signed-off-by: Hakyeong Kim Signed-off-by: SeonGu Kang Reviewed-by: Rob Herring (Arm) Signed-off-by: Ravi Patel Link: https://lore.kernel.org/r/20250825114436.46882-2-ravi.patel@samsung.com Signed-off-by: Krzysztof Kozlowski --- include/dt-bindings/clock/axis,artpec8-clk.h | 169 +++++++++++++++++++++++++++ 1 file changed, 169 insertions(+) create mode 100644 include/dt-bindings/clock/axis,artpec8-clk.h (limited to 'include') diff --git a/include/dt-bindings/clock/axis,artpec8-clk.h b/include/dt-bindings/clock/axis,artpec8-clk.h new file mode 100644 index 000000000000..1e6e1409dd94 --- /dev/null +++ b/include/dt-bindings/clock/axis,artpec8-clk.h @@ -0,0 +1,169 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2025 Samsung Electronics Co., Ltd. + * https://www.samsung.com + * Copyright (c) 2025 Axis Communications AB. + * https://www.axis.com + * + * Device Tree binding constants for ARTPEC-8 clock controller. + */ + +#ifndef _DT_BINDINGS_CLOCK_ARTPEC8_H +#define _DT_BINDINGS_CLOCK_ARTPEC8_H + +/* CMU_CMU */ +#define CLK_FOUT_SHARED0_PLL 1 +#define CLK_DOUT_SHARED0_DIV2 2 +#define CLK_DOUT_SHARED0_DIV3 3 +#define CLK_DOUT_SHARED0_DIV4 4 +#define CLK_FOUT_SHARED1_PLL 5 +#define CLK_DOUT_SHARED1_DIV2 6 +#define CLK_DOUT_SHARED1_DIV3 7 +#define CLK_DOUT_SHARED1_DIV4 8 +#define CLK_FOUT_AUDIO_PLL 9 +#define CLK_DOUT_CMU_BUS 10 +#define CLK_DOUT_CMU_BUS_DLP 11 +#define CLK_DOUT_CMU_CDC_CORE 12 +#define CLK_DOUT_CMU_OTP 13 +#define CLK_DOUT_CMU_CORE_MAIN 14 +#define CLK_DOUT_CMU_CORE_DLP 15 +#define CLK_DOUT_CMU_CPUCL_SWITCH 16 +#define CLK_DOUT_CMU_DLP_CORE 17 +#define CLK_DOUT_CMU_FSYS_BUS 18 +#define CLK_DOUT_CMU_FSYS_IP 19 +#define CLK_DOUT_CMU_FSYS_SCAN0 20 +#define CLK_DOUT_CMU_FSYS_SCAN1 21 +#define CLK_DOUT_CMU_GPU_3D 22 +#define CLK_DOUT_CMU_GPU_2D 23 +#define CLK_DOUT_CMU_IMEM_ACLK 24 +#define CLK_DOUT_CMU_IMEM_JPEG 25 +#define CLK_DOUT_CMU_MIF_SWITCH 26 +#define CLK_DOUT_CMU_MIF_BUSP 27 +#define CLK_DOUT_CMU_PERI_DISP 28 +#define CLK_DOUT_CMU_PERI_IP 29 +#define CLK_DOUT_CMU_PERI_AUDIO 30 +#define CLK_DOUT_CMU_RSP_CORE 31 +#define CLK_DOUT_CMU_TRFM_CORE 32 +#define CLK_DOUT_CMU_VCA_ACE 33 +#define CLK_DOUT_CMU_VCA_OD 34 +#define CLK_DOUT_CMU_VIO_CORE 35 +#define CLK_DOUT_CMU_VIO_AUDIO 36 +#define CLK_DOUT_CMU_VIP0_CORE 37 +#define CLK_DOUT_CMU_VIP1_CORE 38 +#define CLK_DOUT_CMU_VPP_CORE 39 + +/* CMU_BUS */ +#define CLK_MOUT_BUS_ACLK_USER 1 +#define CLK_MOUT_BUS_DLP_USER 2 +#define CLK_DOUT_BUS_PCLK 3 + +/* CMU_CORE */ +#define CLK_MOUT_CORE_ACLK_USER 1 +#define CLK_MOUT_CORE_DLP_USER 2 +#define CLK_DOUT_CORE_PCLK 3 + +/* CMU_CPUCL */ +#define CLK_FOUT_CPUCL_PLL 1 +#define CLK_MOUT_CPUCL_PLL 2 +#define CLK_MOUT_CPUCL_SWITCH_USER 3 +#define CLK_DOUT_CPUCL_CPU 4 +#define CLK_DOUT_CPUCL_CLUSTER_ACLK 5 +#define CLK_DOUT_CPUCL_CLUSTER_PCLKDBG 6 +#define CLK_DOUT_CPUCL_CLUSTER_CNTCLK 7 +#define CLK_DOUT_CPUCL_CLUSTER_ATCLK 8 +#define CLK_DOUT_CPUCL_PCLK 9 +#define CLK_DOUT_CPUCL_CMUREF 10 +#define CLK_DOUT_CPUCL_DBG 11 +#define CLK_DOUT_CPUCL_PCLKDBG 12 +#define CLK_GOUT_CPUCL_CLUSTER_CPU 13 +#define CLK_GOUT_CPUCL_SHORTSTOP 14 +#define CLK_GOUT_CPUCL_CSSYS_IPCLKPORT_PCLKDBG 15 +#define CLK_GOUT_CPUCL_CSSYS_IPCLKPORT_ATCLK 16 + +/* CMU_FSYS */ +#define CLK_FOUT_FSYS_PLL 1 +#define CLK_MOUT_FSYS_SCAN0_USER 2 +#define CLK_MOUT_FSYS_SCAN1_USER 3 +#define CLK_MOUT_FSYS_BUS_USER 4 +#define CLK_MOUT_FSYS_MMC_USER 5 +#define CLK_DOUT_FSYS_PCIE_PIPE 6 +#define CLK_DOUT_FSYS_ADC 7 +#define CLK_DOUT_FSYS_PCIE_PHY_REFCLK_SYSPLL 8 +#define CLK_DOUT_FSYS_EQOS_INT125 9 +#define CLK_DOUT_FSYS_OTP_MEM 10 +#define CLK_DOUT_FSYS_SCLK_UART 11 +#define CLK_DOUT_FSYS_EQOS_25 12 +#define CLK_DOUT_FSYS_EQOS_2p5 13 +#define CLK_DOUT_FSYS_BUS300 14 +#define CLK_DOUT_FSYS_BUS_QSPI 15 +#define CLK_DOUT_FSYS_MMC_CARD0 16 +#define CLK_DOUT_FSYS_MMC_CARD1 17 +#define CLK_DOUT_SCAN_CLK_FSYS_125 18 +#define CLK_DOUT_FSYS_QSPI 19 +#define CLK_DOUT_FSYS_SFMC_NAND 20 +#define CLK_DOUT_FSYS_SCAN_CLK_MMC 21 +#define CLK_GOUT_FSYS_USB20DRD_IPCLKPORT_ACLK_PHYCTRL_20 22 +#define CLK_GOUT_FSYS_USB20DRD_IPCLKPORT_BUS_CLK_EARLY 23 +#define CLK_GOUT_FSYS_XHB_USB_IPCLKPORT_CLK 24 +#define CLK_GOUT_FSYS_XHB_AHBBR_IPCLKPORT_CLK 25 +#define CLK_GOUT_FSYS_I2C0_IPCLKPORT_I_PCLK 26 +#define CLK_GOUT_FSYS_I2C1_IPCLKPORT_I_PCLK 27 +#define CLK_GOUT_FSYS_PWM_IPCLKPORT_I_PCLK_S0 28 +#define CLK_GOUT_FSYS_DWC_PCIE_CTL_INST_0_MSTR_ACLK_UG 29 +#define CLK_GOUT_FSYS_DWC_PCIE_CTL_INXT_0_SLV_ACLK_UG 30 +#define CLK_GOUT_FSYS_DWC_PCIE_CTL_INST_0_DBI_ACLK_UG 31 +#define CLK_GOUT_FSYS_PIPE_PAL_INST_0_I_APB_PCLK 32 +#define CLK_GOUT_FSYS_EQOS_TOP_IPCLKPORT_ACLK_I 33 +#define CLK_GOUT_FSYS_EQOS_TOP_IPCLKPORT_CLK_CSR_I 34 +#define CLK_GOUT_FSYS_EQOS_TOP_IPCLKPORT_I_RGMII_TXCLK_2P5 35 +#define CLK_GOUT_FSYS_SFMC_IPCLKPORT_I_ACLK_NAND 36 +#define CLK_GOUT_FSYS_MMC0_IPCLKPORT_SDCLKIN 37 +#define CLK_GOUT_FSYS_MMC0_IPCLKPORT_I_ACLK 38 +#define CLK_GOUT_FSYS_MMC1_IPCLKPORT_SDCLKIN 39 +#define CLK_GOUT_FSYS_MMC1_IPCLKPORT_I_ACLK 40 +#define CLK_GOUT_FSYS_PCIE_PHY_REFCLK_IN 41 +#define CLK_GOUT_FSYS_UART0_PCLK 42 +#define CLK_GOUT_FSYS_UART0_SCLK_UART 43 +#define CLK_GOUT_FSYS_BUS_QSPI 44 +#define CLK_GOUT_FSYS_QSPI_IPCLKPORT_HCLK 45 +#define CLK_GOUT_FSYS_QSPI_IPCLKPORT_SSI_CLK 46 + +/* CMU_IMEM */ +#define CLK_MOUT_IMEM_ACLK_USER 1 +#define CLK_MOUT_IMEM_GIC_CA53 2 +#define CLK_MOUT_IMEM_GIC_CA5 3 +#define CLK_MOUT_IMEM_JPEG_USER 4 +#define CLK_GOUT_IMEM_MCT_PCLK 5 +#define CLK_GOUT_IMEM_PCLK_TMU0_APBIF 6 + +/* CMU_PERI */ +#define CLK_MOUT_PERI_IP_USER 1 +#define CLK_MOUT_PERI_AUDIO_USER 2 +#define CLK_MOUT_PERI_I2S0 3 +#define CLK_MOUT_PERI_I2S1 4 +#define CLK_MOUT_PERI_DISP_USER 5 +#define CLK_DOUT_PERI_SPI 6 +#define CLK_DOUT_PERI_UART1 7 +#define CLK_DOUT_PERI_UART2 8 +#define CLK_DOUT_PERI_PCLK 9 +#define CLK_DOUT_PERI_I2S0 10 +#define CLK_DOUT_PERI_I2S1 11 +#define CLK_DOUT_PERI_DSIM 12 +#define CLK_GOUT_PERI_UART1_PCLK 13 +#define CLK_GOUT_PERI_UART1_SCLK_UART 14 +#define CLK_GOUT_PERI_UART2_PCLK 15 +#define CLK_GOUT_PERI_UART2_SCLK_UART 16 +#define CLK_GOUT_PERI_I2C2_IPCLKPORT_I_PCLK 17 +#define CLK_GOUT_PERI_I2C3_IPCLKPORT_I_PCLK 18 +#define CLK_GOUT_PERI_SPI0_PCLK 19 +#define CLK_GOUT_PERI_SPI0_SCLK_SPI 20 +#define CLK_GOUT_PERI_APB_ASYNC_DSIM_IPCLKPORT_PCLKS 21 +#define CLK_GOUT_PERI_I2SSC0_IPCLKPORT_CLK_HST 22 +#define CLK_GOUT_PERI_I2SSC1_IPCLKPORT_CLK_HST 23 +#define CLK_GOUT_PERI_AUDIO_OUT_IPCLKPORT_CLK 24 +#define CLK_GOUT_PERI_I2SSC0_IPCLKPORT_CLK 25 +#define CLK_GOUT_PERI_I2SSC1_IPCLKPORT_CLK 26 +#define CLK_GOUT_PERI_DMA4DSIM_IPCLKPORT_CLK_APB_CLK 27 +#define CLK_GOUT_PERI_DMA4DSIM_IPCLKPORT_CLK_AXI_CLK 28 + +#endif /* _DT_BINDINGS_CLOCK_ARTPEC8_H */ -- cgit v1.2.3 From 1d8fdabe19267338f29b58f968499e5b55e6a3b6 Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Fri, 29 Aug 2025 12:25:43 +0100 Subject: iio: frequency: adf4350: Fix ADF4350_REG3_12BIT_CLKDIV_MODE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The clk div bits (2 bits wide) do not start in bit 16 but in bit 15. Fix it accordingly. Fixes: e31166f0fd48 ("iio: frequency: New driver for Analog Devices ADF4350/ADF4351 Wideband Synthesizers") Signed-off-by: Michael Hennerich Signed-off-by: Nuno Sá Link: https://patch.msgid.link/20250829-adf4350-fix-v2-2-0bf543ba797d@analog.com Cc: Signed-off-by: Jonathan Cameron --- include/linux/iio/frequency/adf4350.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iio/frequency/adf4350.h b/include/linux/iio/frequency/adf4350.h index de45cf2ee1e4..ce2086f97e3f 100644 --- a/include/linux/iio/frequency/adf4350.h +++ b/include/linux/iio/frequency/adf4350.h @@ -51,7 +51,7 @@ /* REG3 Bit Definitions */ #define ADF4350_REG3_12BIT_CLKDIV(x) ((x) << 3) -#define ADF4350_REG3_12BIT_CLKDIV_MODE(x) ((x) << 16) +#define ADF4350_REG3_12BIT_CLKDIV_MODE(x) ((x) << 15) #define ADF4350_REG3_12BIT_CSR_EN (1 << 18) #define ADF4351_REG3_CHARGE_CANCELLATION_EN (1 << 21) #define ADF4351_REG3_ANTI_BACKLASH_3ns_EN (1 << 22) -- cgit v1.2.3 From eef6dcbc52fa83c392a2f4a52845f347b233a584 Mon Sep 17 00:00:00 2001 From: Prathamesh Shete Date: Sat, 23 Aug 2025 11:24:19 +0530 Subject: dt-bindings: gpio: Add Tegra256 support Extend the existing Tegra186 GPIO controller device tree bindings with support for the GPIO controller found on Tegra256. The number of pins is slightly different, but the programming model remains the same Add a new header, include/dt-bindings/gpio/tegra256-gpio.h, that defines port IDs as well as the TEGRA256_MAIN_GPIO() helper, both of which are used in conjunction to create a unique specifier for each pin. The OS can reconstruct the port ID and pin from these values to determine the register region for the corresponding GPIO. However, the OS does not use the macro definitions in this file. The symbolic names help associate these GPIO specifiers with the names used in the technical documentation available for the chip. Signed-off-by: Prathamesh Shete Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20250823055420.24664-1-pshete@nvidia.com Signed-off-by: Bartosz Golaszewski --- include/dt-bindings/gpio/tegra256-gpio.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 include/dt-bindings/gpio/tegra256-gpio.h (limited to 'include') diff --git a/include/dt-bindings/gpio/tegra256-gpio.h b/include/dt-bindings/gpio/tegra256-gpio.h new file mode 100644 index 000000000000..a0353a302aeb --- /dev/null +++ b/include/dt-bindings/gpio/tegra256-gpio.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. */ + +/* + * This header provides constants for the nvidia,tegra256-gpio DT binding. + * + * The first cell in Tegra's GPIO specifier is the GPIO ID. + * The macros below provide names for this. + * + * The second cell contains standard flag values specified in gpio.h. + */ + +#ifndef _DT_BINDINGS_GPIO_TEGRA256_GPIO_H +#define _DT_BINDINGS_GPIO_TEGRA256_GPIO_H + +#include + +/* GPIOs implemented by main GPIO controller */ +#define TEGRA256_MAIN_GPIO_PORT_A 0 +#define TEGRA256_MAIN_GPIO_PORT_B 1 +#define TEGRA256_MAIN_GPIO_PORT_C 2 +#define TEGRA256_MAIN_GPIO_PORT_D 3 + +#define TEGRA256_MAIN_GPIO(port, offset) \ + ((TEGRA256_MAIN_GPIO_PORT_##port * 8) + (offset)) + +#endif + -- cgit v1.2.3 From 37b27bd5d6217b75d315f28b4399aad0a336f299 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 26 Aug 2025 11:39:02 -0400 Subject: fs: add an icount_read helper Instead of doing direct access to ->i_count, add a helper to handle this. This will make it easier to convert i_count to a refcount later. Signed-off-by: Josef Bacik Link: https://lore.kernel.org/9bc62a84c6b9d6337781203f60837bd98fbc4a96.1756222464.git.josef@toxicpanda.com Signed-off-by: Christian Brauner --- include/linux/fs.h | 5 +++++ include/trace/events/filelock.h | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index c34554d8c4fe..c4fd010cf5bf 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2611,6 +2611,11 @@ static inline void mark_inode_dirty_sync(struct inode *inode) __mark_inode_dirty(inode, I_DIRTY_SYNC); } +static inline int icount_read(const struct inode *inode) +{ + return atomic_read(&inode->i_count); +} + /* * Returns true if the given inode itself only has dirty timestamps (its pages * may still be dirty) and isn't currently being allocated or freed. diff --git a/include/trace/events/filelock.h b/include/trace/events/filelock.h index b8d1e00a7982..fdd36b1daa25 100644 --- a/include/trace/events/filelock.h +++ b/include/trace/events/filelock.h @@ -189,7 +189,7 @@ TRACE_EVENT(generic_add_lease, __entry->i_ino = inode->i_ino; __entry->wcount = atomic_read(&inode->i_writecount); __entry->rcount = atomic_read(&inode->i_readcount); - __entry->icount = atomic_read(&inode->i_count); + __entry->icount = icount_read(inode); __entry->owner = fl->c.flc_owner; __entry->flags = fl->c.flc_flags; __entry->type = fl->c.flc_type; -- cgit v1.2.3 From e5bca063c150877c45b88ff134b6ef7a5eae8e7a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 30 Aug 2025 12:55:39 +0200 Subject: fs: remove vfs_ioctl export vfs_ioctl() is no longer called by anything outside of fs/ioctl.c, so remove the global symbol and export as it is not needed. Cc: Alexander Viro Cc: Christian Brauner Cc: Jan Kara Signed-off-by: Greg Kroah-Hartman Link: https://lore.kernel.org/2025083038-carving-amuck-a4ae@gregkh Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/fs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 34693cae15a2..4daf9b30a641 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2053,8 +2053,6 @@ int vfs_fchown(struct file *file, uid_t user, gid_t group); int vfs_fchmod(struct file *file, umode_t mode); int vfs_utimes(const struct path *path, struct timespec64 *times); -int vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); - #ifdef CONFIG_COMPAT extern long compat_ptr_ioctl(struct file *file, unsigned int cmd, unsigned long arg); -- cgit v1.2.3 From 7c4a379e0622e7d8e7eb7dbc76445cdd6306aad8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 29 Aug 2025 16:43:02 +0200 Subject: ALSA: emu10k1: Use guard() for emu1010 FPGA locking The snd_emu1010_fpga_lock() and _unlock() call pairs can be simplified gracefully with the introduction of guard(). Only code refactoring, and no functional changes. Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250829144342.4290-28-tiwai@suse.de --- include/sound/emu10k1.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/sound/emu10k1.h b/include/sound/emu10k1.h index 38db50b280eb..4f94565c9d15 100644 --- a/include/sound/emu10k1.h +++ b/include/sound/emu10k1.h @@ -1842,8 +1842,7 @@ unsigned int snd_emu10k1_ptr20_read(struct snd_emu10k1 * emu, unsigned int reg, void snd_emu10k1_ptr20_write(struct snd_emu10k1 *emu, unsigned int reg, unsigned int chn, unsigned int data); int snd_emu10k1_spi_write(struct snd_emu10k1 * emu, unsigned int data); int snd_emu10k1_i2c_write(struct snd_emu10k1 *emu, u32 reg, u32 value); -static inline void snd_emu1010_fpga_lock(struct snd_emu10k1 *emu) { mutex_lock(&emu->emu1010.lock); }; -static inline void snd_emu1010_fpga_unlock(struct snd_emu10k1 *emu) { mutex_unlock(&emu->emu1010.lock); }; +DEFINE_GUARD(snd_emu1010_fpga_lock, struct snd_emu10k1 *, mutex_lock(&(_T)->emu1010.lock), mutex_unlock(&(_T)->emu1010.lock)) void snd_emu1010_fpga_write_lock(struct snd_emu10k1 *emu, u32 reg, u32 value); void snd_emu1010_fpga_write(struct snd_emu10k1 *emu, u32 reg, u32 value); void snd_emu1010_fpga_read(struct snd_emu10k1 *emu, u32 reg, u32 *value); -- cgit v1.2.3 From 3abb538fffc8af73859f16e6e274962d9b53c907 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 29 Aug 2025 16:52:47 +0200 Subject: ALSA: gus: Use guard() for mutex locks Replace the manual mutex lock/unlock pairs with guard() for code simplification. As replaced with the guard(mutex), no longer used snd_gf1_mem_lock() is dropped, too. Only code refactoring, and no behavior change. Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250829145300.5460-8-tiwai@suse.de --- include/sound/gus.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/sound/gus.h b/include/sound/gus.h index 1c8fb6c93e50..321ae93625eb 100644 --- a/include/sound/gus.h +++ b/include/sound/gus.h @@ -515,7 +515,6 @@ struct _SND_IW_LFO_PROGRAM { /* gus_mem.c */ -void snd_gf1_mem_lock(struct snd_gf1_mem * alloc, int xup); int snd_gf1_mem_xfree(struct snd_gf1_mem * alloc, struct snd_gf1_mem_block * block); struct snd_gf1_mem_block *snd_gf1_mem_alloc(struct snd_gf1_mem * alloc, int owner, char *name, int size, int w_16, -- cgit v1.2.3 From 826f35b829f43dc62fb847eca6f79e8698b4994d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 29 Aug 2025 17:13:19 +0200 Subject: ALSA: synth: Use guard() for preset locks Define a macro for the preset locking/unlocking pairs for soundfont using guard() macro as a further code cleanup. The new macro is put in soundfont.h (and some function renames) along with it for avoiding unnecessary troubles with clang. Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250829151335.7342-6-tiwai@suse.de --- include/sound/soundfont.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/sound/soundfont.h b/include/sound/soundfont.h index 8a40cc15f66d..48f8cf6de3ac 100644 --- a/include/sound/soundfont.h +++ b/include/sound/soundfont.h @@ -114,5 +114,23 @@ int snd_sf_calc_parm_decay(int msec); extern int snd_sf_vol_table[128]; int snd_sf_linear_to_log(unsigned int amount, int offset, int ratio); +/* lock access to sflist */ +static inline void snd_soundfont_lock_preset(struct snd_sf_list *sflist) +{ + mutex_lock(&sflist->presets_mutex); + guard(spinlock_irqsave)(&sflist->lock); + sflist->presets_locked = 1; +} + +/* remove lock */ +static inline void snd_soundfont_unlock_preset(struct snd_sf_list *sflist) +{ + guard(spinlock_irqsave)(&sflist->lock); + sflist->presets_locked = 0; + mutex_unlock(&sflist->presets_mutex); +} + +DEFINE_GUARD(snd_soundfont_lock_preset, struct snd_sf_list *, + snd_soundfont_lock_preset(_T), snd_soundfont_unlock_preset(_T)) #endif /* __SOUND_SOUNDFONT_H */ -- cgit v1.2.3 From bbf7a84787d0dc5910d121b025dd5f4dea060768 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 26 Aug 2025 06:22:05 +0000 Subject: ASoC: soc-dapm: rename snd_soc_dapm_kcontrol_widget() to snd_soc_dapm_kcontrol_to_widget() snd_soc_dapm_kcontrol_widget() is unclear naming, rename it to snd_soc_dapm_kcontrol_to_widget(). This is prepare for dapm cleanup. This patch keeps compatible by using define, but old name will be replaced on each drivers and removed from ASoC in the future. Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/875xeay54j.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index 2e9196b6ffba..220eb6d08534 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -719,7 +719,7 @@ int snd_soc_dapm_dai_get_connected_widgets(struct snd_soc_dai *dai, int stream, void snd_soc_dapm_dai_free_widgets(struct snd_soc_dapm_widget_list **list); struct snd_soc_dapm_context *snd_soc_dapm_kcontrol_dapm(struct snd_kcontrol *kcontrol); -struct snd_soc_dapm_widget *snd_soc_dapm_kcontrol_widget(struct snd_kcontrol *kcontrol); +struct snd_soc_dapm_widget *snd_soc_dapm_kcontrol_to_widget(struct snd_kcontrol *kcontrol); int snd_soc_dapm_force_bias_level(struct snd_soc_dapm_context *dapm, enum snd_soc_bias_level level); enum snd_soc_bias_level snd_soc_dapm_get_bias_level(struct snd_soc_dapm_context *dapm); @@ -729,6 +729,7 @@ void snd_soc_dapm_init_bias_level(struct snd_soc_dapm_context *dapm, enum snd_so #define snd_soc_component_force_bias_level(c, l) snd_soc_dapm_force_bias_level(&(c)->dapm, l) #define snd_soc_component_get_bias_level(c) snd_soc_dapm_get_bias_level(&(c)->dapm) #define snd_soc_component_init_bias_level(c, l) snd_soc_dapm_init_bias_level(&(c)->dapm, l) +#define snd_soc_dapm_kcontrol_widget snd_soc_dapm_kcontrol_to_widget #define for_each_dapm_widgets(list, i, widget) \ for ((i) = 0; \ -- cgit v1.2.3 From 2532041865305594e37c4c22bd650d52ea805ec8 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 26 Aug 2025 06:22:12 +0000 Subject: ASoC: soc-dapm: rename snd_soc_dapm_kcontrol_dapm() to snd_soc_dapm_kcontrol_to_dapm() snd_soc_dapm_kcontrol_dapm() is unclear naming, rename it to snd_soc_dapm_kcontrol_to_dapm(). This is prepare for dapm cleanup. This patch keeps compatible by using define, but old name will be replaced on each drivers and removed from ASoC in the future. Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/874ituy54c.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index 220eb6d08534..8add8de7a8c9 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -718,7 +718,7 @@ int snd_soc_dapm_dai_get_connected_widgets(struct snd_soc_dai *dai, int stream, bool (*custom_stop_condition)(struct snd_soc_dapm_widget *, enum snd_soc_dapm_direction)); void snd_soc_dapm_dai_free_widgets(struct snd_soc_dapm_widget_list **list); -struct snd_soc_dapm_context *snd_soc_dapm_kcontrol_dapm(struct snd_kcontrol *kcontrol); +struct snd_soc_dapm_context *snd_soc_dapm_kcontrol_to_dapm(struct snd_kcontrol *kcontrol); struct snd_soc_dapm_widget *snd_soc_dapm_kcontrol_to_widget(struct snd_kcontrol *kcontrol); int snd_soc_dapm_force_bias_level(struct snd_soc_dapm_context *dapm, enum snd_soc_bias_level level); @@ -730,6 +730,7 @@ void snd_soc_dapm_init_bias_level(struct snd_soc_dapm_context *dapm, enum snd_so #define snd_soc_component_get_bias_level(c) snd_soc_dapm_get_bias_level(&(c)->dapm) #define snd_soc_component_init_bias_level(c, l) snd_soc_dapm_init_bias_level(&(c)->dapm, l) #define snd_soc_dapm_kcontrol_widget snd_soc_dapm_kcontrol_to_widget +#define snd_soc_dapm_kcontrol_dapm snd_soc_dapm_kcontrol_to_dapm #define for_each_dapm_widgets(list, i, widget) \ for ((i) = 0; \ -- cgit v1.2.3 From f6883f0f03575ecc8c4c5d2a04339bac91eb33d7 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 26 Aug 2025 06:22:19 +0000 Subject: ASoC: soc-dapm: rename dapm_kcontrol_get_value() to snd_soc_dapm_kcontrol_get_value() dapm_kcontrol_get_value() is global function, adds snd_soc_ prefix This patch keeps compatible by using define, but old name will be replaced on each drivers and removed from ASoC in the future. Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/87349ey546.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index 8add8de7a8c9..cd02fedb2624 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -699,7 +699,6 @@ int snd_soc_dapm_sync_unlocked(struct snd_soc_dapm_context *dapm); int snd_soc_dapm_force_enable_pin(struct snd_soc_dapm_context *dapm, const char *pin); int snd_soc_dapm_force_enable_pin_unlocked(struct snd_soc_dapm_context *dapm, const char *pin); int snd_soc_dapm_ignore_suspend(struct snd_soc_dapm_context *dapm, const char *pin); -unsigned int dapm_kcontrol_get_value(const struct snd_kcontrol *kcontrol); void snd_soc_dapm_mark_endpoints_dirty(struct snd_soc_card *card); /* @@ -720,6 +719,7 @@ void snd_soc_dapm_dai_free_widgets(struct snd_soc_dapm_widget_list **list); struct snd_soc_dapm_context *snd_soc_dapm_kcontrol_to_dapm(struct snd_kcontrol *kcontrol); struct snd_soc_dapm_widget *snd_soc_dapm_kcontrol_to_widget(struct snd_kcontrol *kcontrol); +unsigned int snd_soc_dapm_kcontrol_get_value(const struct snd_kcontrol *kcontrol); int snd_soc_dapm_force_bias_level(struct snd_soc_dapm_context *dapm, enum snd_soc_bias_level level); enum snd_soc_bias_level snd_soc_dapm_get_bias_level(struct snd_soc_dapm_context *dapm); @@ -731,6 +731,7 @@ void snd_soc_dapm_init_bias_level(struct snd_soc_dapm_context *dapm, enum snd_so #define snd_soc_component_init_bias_level(c, l) snd_soc_dapm_init_bias_level(&(c)->dapm, l) #define snd_soc_dapm_kcontrol_widget snd_soc_dapm_kcontrol_to_widget #define snd_soc_dapm_kcontrol_dapm snd_soc_dapm_kcontrol_to_dapm +#define dapm_kcontrol_get_value snd_soc_dapm_kcontrol_get_value #define for_each_dapm_widgets(list, i, widget) \ for ((i) = 0; \ -- cgit v1.2.3 From 8a9772ec08f87c9e45ab1ad2c8d2b8c1763836eb Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 26 Aug 2025 06:22:26 +0000 Subject: ASoC: soc-dapm: rename snd_soc_kcontrol_component() to snd_soc_kcontrol_to_component() We have 2 similar functions, both converts date from snd_kcontrol to snd_soc_component. (A) snd_soc_kcontrol_component() (B) snd_soc_dapm_kcontrol_component() (A) is just wrapper for snd_kcontrol_chip(). (B) is for more complex conversion. Having similar functions is confusable. So (A) will be replaced to original snd_kcontrol_chip(). (B) will be stay, but the function name should be xx_to_xx(). And it is defined at soc-component.h. It should be implemented at soc-dapm.c. This patch renames it to snd_soc_dapm_kcontrol_to_component(), and move to soc-dapm.c. This patch keeps compatible by using define, but old name will be replaced on each drivers and removed from ASoC in the future. Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/871poyy53x.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc-component.h | 14 -------------- include/sound/soc-dapm.h | 2 ++ 2 files changed, 2 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h index 54bfa0cb1085..48e45cbe82e5 100644 --- a/include/sound/soc-component.h +++ b/include/sound/soc-component.h @@ -286,20 +286,6 @@ static inline struct snd_soc_dapm_context *snd_soc_component_get_dapm( return &component->dapm; } -/** - * snd_soc_dapm_kcontrol_component() - Returns the component associated to a - * kcontrol - * @kcontrol: The kcontrol - * - * This function must only be used on DAPM contexts that are known to be part of - * a COMPONENT (e.g. in a COMPONENT driver). Otherwise the behavior is undefined - */ -static inline struct snd_soc_component *snd_soc_dapm_kcontrol_component( - struct snd_kcontrol *kcontrol) -{ - return snd_soc_dapm_to_component(snd_soc_dapm_kcontrol_dapm(kcontrol)); -} - /** * snd_soc_component_cache_sync() - Sync the register cache with the hardware * @component: COMPONENT to sync diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index cd02fedb2624..ed39458b94bf 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -719,6 +719,7 @@ void snd_soc_dapm_dai_free_widgets(struct snd_soc_dapm_widget_list **list); struct snd_soc_dapm_context *snd_soc_dapm_kcontrol_to_dapm(struct snd_kcontrol *kcontrol); struct snd_soc_dapm_widget *snd_soc_dapm_kcontrol_to_widget(struct snd_kcontrol *kcontrol); +struct snd_soc_component *snd_soc_dapm_kcontrol_to_component(struct snd_kcontrol *kcontrol); unsigned int snd_soc_dapm_kcontrol_get_value(const struct snd_kcontrol *kcontrol); int snd_soc_dapm_force_bias_level(struct snd_soc_dapm_context *dapm, enum snd_soc_bias_level level); @@ -732,6 +733,7 @@ void snd_soc_dapm_init_bias_level(struct snd_soc_dapm_context *dapm, enum snd_so #define snd_soc_dapm_kcontrol_widget snd_soc_dapm_kcontrol_to_widget #define snd_soc_dapm_kcontrol_dapm snd_soc_dapm_kcontrol_to_dapm #define dapm_kcontrol_get_value snd_soc_dapm_kcontrol_get_value +#define snd_soc_dapm_kcontrol_component snd_soc_dapm_kcontrol_to_component #define for_each_dapm_widgets(list, i, widget) \ for ((i) = 0; \ -- cgit v1.2.3 From edd3cb05c00a040dc72bed20b14b5ba865188bce Mon Sep 17 00:00:00 2001 From: Simon Schuster Date: Mon, 1 Sep 2025 15:09:51 +0200 Subject: copy_process: pass clone_flags as u64 across calltree With the introduction of clone3 in commit 7f192e3cd316 ("fork: add clone3") the effective bit width of clone_flags on all architectures was increased from 32-bit to 64-bit, with a new type of u64 for the flags. However, for most consumers of clone_flags the interface was not changed from the previous type of unsigned long. While this works fine as long as none of the new 64-bit flag bits (CLONE_CLEAR_SIGHAND and CLONE_INTO_CGROUP) are evaluated, this is still undesirable in terms of the principle of least surprise. Thus, this commit fixes all relevant interfaces of callees to sys_clone3/copy_process (excluding the architecture-specific copy_thread) to consistently pass clone_flags as u64, so that no truncation to 32-bit integers occurs on 32-bit architectures. Signed-off-by: Simon Schuster Link: https://lore.kernel.org/20250901-nios2-implement-clone3-v2-2-53fcf5577d57@siemens-energy.com Acked-by: David Hildenbrand Reviewed-by: Lorenzo Stoakes Reviewed-by: Arnd Bergmann Signed-off-by: Christian Brauner --- include/linux/cgroup.h | 4 ++-- include/linux/cred.h | 2 +- include/linux/iocontext.h | 6 +++--- include/linux/ipc_namespace.h | 4 ++-- include/linux/lsm_hook_defs.h | 2 +- include/linux/mnt_namespace.h | 2 +- include/linux/nsproxy.h | 2 +- include/linux/pid_namespace.h | 4 ++-- include/linux/rseq.h | 4 ++-- include/linux/sched/task.h | 2 +- include/linux/security.h | 4 ++-- include/linux/sem.h | 4 ++-- include/linux/time_namespace.h | 4 ++-- include/linux/uprobes.h | 4 ++-- include/linux/user_events.h | 4 ++-- include/linux/utsname.h | 4 ++-- include/net/net_namespace.h | 4 ++-- include/trace/events/task.h | 6 +++--- 18 files changed, 33 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b18fb5fcb38e..56d9556a181a 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -796,7 +796,7 @@ extern struct cgroup_namespace init_cgroup_ns; void free_cgroup_ns(struct cgroup_namespace *ns); -struct cgroup_namespace *copy_cgroup_ns(unsigned long flags, +struct cgroup_namespace *copy_cgroup_ns(u64 flags, struct user_namespace *user_ns, struct cgroup_namespace *old_ns); @@ -818,7 +818,7 @@ static inline void put_cgroup_ns(struct cgroup_namespace *ns) static inline void free_cgroup_ns(struct cgroup_namespace *ns) { } static inline struct cgroup_namespace * -copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns, +copy_cgroup_ns(u64 flags, struct user_namespace *user_ns, struct cgroup_namespace *old_ns) { return old_ns; diff --git a/include/linux/cred.h b/include/linux/cred.h index a102a10f833f..89ae50ad2ace 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -148,7 +148,7 @@ struct cred { extern void __put_cred(struct cred *); extern void exit_creds(struct task_struct *); -extern int copy_creds(struct task_struct *, unsigned long); +extern int copy_creds(struct task_struct *, u64); extern const struct cred *get_task_cred(struct task_struct *); extern struct cred *cred_alloc_blank(void); extern struct cred *prepare_creds(void); diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 14f7eaf1b443..079d8773790c 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -118,8 +118,8 @@ struct task_struct; #ifdef CONFIG_BLOCK void put_io_context(struct io_context *ioc); void exit_io_context(struct task_struct *task); -int __copy_io(unsigned long clone_flags, struct task_struct *tsk); -static inline int copy_io(unsigned long clone_flags, struct task_struct *tsk) +int __copy_io(u64 clone_flags, struct task_struct *tsk); +static inline int copy_io(u64 clone_flags, struct task_struct *tsk) { if (!current->io_context) return 0; @@ -129,7 +129,7 @@ static inline int copy_io(unsigned long clone_flags, struct task_struct *tsk) struct io_context; static inline void put_io_context(struct io_context *ioc) { } static inline void exit_io_context(struct task_struct *task) { } -static inline int copy_io(unsigned long clone_flags, struct task_struct *tsk) +static inline int copy_io(u64 clone_flags, struct task_struct *tsk) { return 0; } diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index e8240cf2611a..4b399893e2b3 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -129,7 +129,7 @@ static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; } #endif #if defined(CONFIG_IPC_NS) -extern struct ipc_namespace *copy_ipcs(unsigned long flags, +extern struct ipc_namespace *copy_ipcs(u64 flags, struct user_namespace *user_ns, struct ipc_namespace *ns); static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) @@ -151,7 +151,7 @@ static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns extern void put_ipc_ns(struct ipc_namespace *ns); #else -static inline struct ipc_namespace *copy_ipcs(unsigned long flags, +static inline struct ipc_namespace *copy_ipcs(u64 flags, struct user_namespace *user_ns, struct ipc_namespace *ns) { if (flags & CLONE_NEWIPC) diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index fd11fffdd3c3..adbe234a6f6c 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -211,7 +211,7 @@ LSM_HOOK(int, 0, file_open, struct file *file) LSM_HOOK(int, 0, file_post_open, struct file *file, int mask) LSM_HOOK(int, 0, file_truncate, struct file *file) LSM_HOOK(int, 0, task_alloc, struct task_struct *task, - unsigned long clone_flags) + u64 clone_flags) LSM_HOOK(void, LSM_RET_VOID, task_free, struct task_struct *task) LSM_HOOK(int, 0, cred_alloc_blank, struct cred *cred, gfp_t gfp) LSM_HOOK(void, LSM_RET_VOID, cred_free, struct cred *cred) diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h index 70b366b64816..ff290c87b2e7 100644 --- a/include/linux/mnt_namespace.h +++ b/include/linux/mnt_namespace.h @@ -11,7 +11,7 @@ struct fs_struct; struct user_namespace; struct ns_common; -extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *, +extern struct mnt_namespace *copy_mnt_ns(u64, struct mnt_namespace *, struct user_namespace *, struct fs_struct *); extern void put_mnt_ns(struct mnt_namespace *ns); DEFINE_FREE(put_mnt_ns, struct mnt_namespace *, if (!IS_ERR_OR_NULL(_T)) put_mnt_ns(_T)) diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index dab6a1734a22..82533e899ff4 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -103,7 +103,7 @@ static inline struct cred *nsset_cred(struct nsset *set) * */ -int copy_namespaces(unsigned long flags, struct task_struct *tsk); +int copy_namespaces(u64 flags, struct task_struct *tsk); void exit_task_namespaces(struct task_struct *tsk); void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new); int exec_task_namespaces(void); diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 7c67a5811199..0620a3e08e83 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -78,7 +78,7 @@ static inline int pidns_memfd_noexec_scope(struct pid_namespace *ns) } #endif -extern struct pid_namespace *copy_pid_ns(unsigned long flags, +extern struct pid_namespace *copy_pid_ns(u64 flags, struct user_namespace *user_ns, struct pid_namespace *ns); extern void zap_pid_ns_processes(struct pid_namespace *pid_ns); extern int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd); @@ -97,7 +97,7 @@ static inline int pidns_memfd_noexec_scope(struct pid_namespace *ns) return 0; } -static inline struct pid_namespace *copy_pid_ns(unsigned long flags, +static inline struct pid_namespace *copy_pid_ns(u64 flags, struct user_namespace *user_ns, struct pid_namespace *ns) { if (flags & CLONE_NEWPID) diff --git a/include/linux/rseq.h b/include/linux/rseq.h index bc8af3eb5598..a96fd345aa38 100644 --- a/include/linux/rseq.h +++ b/include/linux/rseq.h @@ -65,7 +65,7 @@ static inline void rseq_migrate(struct task_struct *t) * If parent process has a registered restartable sequences area, the * child inherits. Unregister rseq for a clone with CLONE_VM set. */ -static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) +static inline void rseq_fork(struct task_struct *t, u64 clone_flags) { if (clone_flags & CLONE_VM) { t->rseq = NULL; @@ -107,7 +107,7 @@ static inline void rseq_preempt(struct task_struct *t) static inline void rseq_migrate(struct task_struct *t) { } -static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) +static inline void rseq_fork(struct task_struct *t, u64 clone_flags) { } static inline void rseq_execve(struct task_struct *t) diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index ea41795a352b..34d6a0e108c3 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -63,7 +63,7 @@ extern int lockdep_tasklist_lock_is_held(void); extern asmlinkage void schedule_tail(struct task_struct *prev); extern void init_idle(struct task_struct *idle, int cpu); -extern int sched_fork(unsigned long clone_flags, struct task_struct *p); +extern int sched_fork(u64 clone_flags, struct task_struct *p); extern int sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs); extern void sched_cancel_fork(struct task_struct *p); extern void sched_post_fork(struct task_struct *p); diff --git a/include/linux/security.h b/include/linux/security.h index 521bcb5b9717..9a1d4a6c8673 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -489,7 +489,7 @@ int security_file_receive(struct file *file); int security_file_open(struct file *file); int security_file_post_open(struct file *file, int mask); int security_file_truncate(struct file *file); -int security_task_alloc(struct task_struct *task, unsigned long clone_flags); +int security_task_alloc(struct task_struct *task, u64 clone_flags); void security_task_free(struct task_struct *task); int security_cred_alloc_blank(struct cred *cred, gfp_t gfp); void security_cred_free(struct cred *cred); @@ -1215,7 +1215,7 @@ static inline int security_file_truncate(struct file *file) } static inline int security_task_alloc(struct task_struct *task, - unsigned long clone_flags) + u64 clone_flags) { return 0; } diff --git a/include/linux/sem.h b/include/linux/sem.h index c4deefe42aeb..275269ce2ec8 100644 --- a/include/linux/sem.h +++ b/include/linux/sem.h @@ -9,12 +9,12 @@ struct task_struct; #ifdef CONFIG_SYSVIPC -extern int copy_semundo(unsigned long clone_flags, struct task_struct *tsk); +extern int copy_semundo(u64 clone_flags, struct task_struct *tsk); extern void exit_sem(struct task_struct *tsk); #else -static inline int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) +static inline int copy_semundo(u64 clone_flags, struct task_struct *tsk) { return 0; } diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h index bb2c52f4fc94..b6e36525e0be 100644 --- a/include/linux/time_namespace.h +++ b/include/linux/time_namespace.h @@ -43,7 +43,7 @@ static inline struct time_namespace *get_time_ns(struct time_namespace *ns) return ns; } -struct time_namespace *copy_time_ns(unsigned long flags, +struct time_namespace *copy_time_ns(u64 flags, struct user_namespace *user_ns, struct time_namespace *old_ns); void free_time_ns(struct time_namespace *ns); @@ -129,7 +129,7 @@ static inline void put_time_ns(struct time_namespace *ns) } static inline -struct time_namespace *copy_time_ns(unsigned long flags, +struct time_namespace *copy_time_ns(u64 flags, struct user_namespace *user_ns, struct time_namespace *old_ns) { diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 516217c39094..915303a82d84 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -205,7 +205,7 @@ extern void uprobe_start_dup_mmap(void); extern void uprobe_end_dup_mmap(void); extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm); extern void uprobe_free_utask(struct task_struct *t); -extern void uprobe_copy_process(struct task_struct *t, unsigned long flags); +extern void uprobe_copy_process(struct task_struct *t, u64 flags); extern int uprobe_post_sstep_notifier(struct pt_regs *regs); extern int uprobe_pre_sstep_notifier(struct pt_regs *regs); extern void uprobe_notify_resume(struct pt_regs *regs); @@ -281,7 +281,7 @@ static inline bool uprobe_deny_signal(void) static inline void uprobe_free_utask(struct task_struct *t) { } -static inline void uprobe_copy_process(struct task_struct *t, unsigned long flags) +static inline void uprobe_copy_process(struct task_struct *t, u64 flags) { } static inline void uprobe_clear_state(struct mm_struct *mm) diff --git a/include/linux/user_events.h b/include/linux/user_events.h index 8afa8c3a0973..57d1ff006090 100644 --- a/include/linux/user_events.h +++ b/include/linux/user_events.h @@ -33,7 +33,7 @@ extern void user_event_mm_dup(struct task_struct *t, extern void user_event_mm_remove(struct task_struct *t); static inline void user_events_fork(struct task_struct *t, - unsigned long clone_flags) + u64 clone_flags) { struct user_event_mm *old_mm; @@ -68,7 +68,7 @@ static inline void user_events_exit(struct task_struct *t) } #else static inline void user_events_fork(struct task_struct *t, - unsigned long clone_flags) + u64 clone_flags) { } diff --git a/include/linux/utsname.h b/include/linux/utsname.h index bf7613ba412b..ba34ec0e2f95 100644 --- a/include/linux/utsname.h +++ b/include/linux/utsname.h @@ -35,7 +35,7 @@ static inline void get_uts_ns(struct uts_namespace *ns) refcount_inc(&ns->ns.count); } -extern struct uts_namespace *copy_utsname(unsigned long flags, +extern struct uts_namespace *copy_utsname(u64 flags, struct user_namespace *user_ns, struct uts_namespace *old_ns); extern void free_uts_ns(struct uts_namespace *ns); @@ -55,7 +55,7 @@ static inline void put_uts_ns(struct uts_namespace *ns) { } -static inline struct uts_namespace *copy_utsname(unsigned long flags, +static inline struct uts_namespace *copy_utsname(u64 flags, struct user_namespace *user_ns, struct uts_namespace *old_ns) { if (flags & CLONE_NEWUTS) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 025a7574b275..0e008cfe159d 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -204,7 +204,7 @@ struct net { extern struct net init_net; #ifdef CONFIG_NET_NS -struct net *copy_net_ns(unsigned long flags, struct user_namespace *user_ns, +struct net *copy_net_ns(u64 flags, struct user_namespace *user_ns, struct net *old_net); void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid); @@ -218,7 +218,7 @@ extern struct task_struct *cleanup_net_task; #else /* CONFIG_NET_NS */ #include #include -static inline struct net *copy_net_ns(unsigned long flags, +static inline struct net *copy_net_ns(u64 flags, struct user_namespace *user_ns, struct net *old_net) { if (flags & CLONE_NEWNET) diff --git a/include/trace/events/task.h b/include/trace/events/task.h index af535b053033..4f0759634306 100644 --- a/include/trace/events/task.h +++ b/include/trace/events/task.h @@ -8,14 +8,14 @@ TRACE_EVENT(task_newtask, - TP_PROTO(struct task_struct *task, unsigned long clone_flags), + TP_PROTO(struct task_struct *task, u64 clone_flags), TP_ARGS(task, clone_flags), TP_STRUCT__entry( __field( pid_t, pid) __array( char, comm, TASK_COMM_LEN) - __field( unsigned long, clone_flags) + __field( u64, clone_flags) __field( short, oom_score_adj) ), @@ -26,7 +26,7 @@ TRACE_EVENT(task_newtask, __entry->oom_score_adj = task->signal->oom_score_adj; ), - TP_printk("pid=%d comm=%s clone_flags=%lx oom_score_adj=%hd", + TP_printk("pid=%d comm=%s clone_flags=%llx oom_score_adj=%hd", __entry->pid, __entry->comm, __entry->clone_flags, __entry->oom_score_adj) ); -- cgit v1.2.3 From 8bde81ec684238587decd5ab6b1bf18041814937 Mon Sep 17 00:00:00 2001 From: Marcus Folkesson Date: Mon, 21 Jul 2025 12:43:35 +0200 Subject: drm/format-helper: introduce drm_fb_xrgb8888_to_gray2() Convert XRGB8888 to 2bit grayscale. It uses drm_fb_xrgb8888_to_gray8() to convert the pixels to gray8 as an intermediate step before converting to gray2. Signed-off-by: Marcus Folkesson Reviewed-by: Javier Martinez Canillas Acked-by: Thomas Zimmermann Link: https://lore.kernel.org/r/20250721-st7571-format-v2-5-159f4134098c@gmail.com Signed-off-by: Javier Martinez Canillas --- include/drm/drm_format_helper.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h index 562bc383ece4..32d57d6c5327 100644 --- a/include/drm/drm_format_helper.h +++ b/include/drm/drm_format_helper.h @@ -136,4 +136,8 @@ void drm_fb_xrgb8888_to_mono(struct iosys_map *dst, const unsigned int *dst_pitc const struct iosys_map *src, const struct drm_framebuffer *fb, const struct drm_rect *clip, struct drm_format_conv_state *state); +void drm_fb_xrgb8888_to_gray2(struct iosys_map *dst, const unsigned int *dst_pitch, + const struct iosys_map *src, const struct drm_framebuffer *fb, + const struct drm_rect *clip, struct drm_format_conv_state *state); + #endif /* __LINUX_DRM_FORMAT_HELPER_H */ -- cgit v1.2.3 From 7051b54fb5aa2d0b77657aef7c272471b36c0327 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 29 Aug 2025 21:56:38 +0000 Subject: tcp: Remove sk->sk_prot->orphan_count. TCP tracks the number of orphaned (SOCK_DEAD but not yet destructed) sockets in tcp_orphan_count. In some code that was shared with DCCP, tcp_orphan_count is referenced via sk->sk_prot->orphan_count. Let's reference tcp_orphan_count directly. inet_csk_prepare_for_destroy_sock() is moved to inet_connection_sock.c due to header dependency. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Jason Xing Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250829215641.711664-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/inet_connection_sock.h | 8 +------- include/net/sock.h | 2 -- include/net/tcp.h | 10 ++++++++++ 3 files changed, 11 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 1735db332aab..0737d8e178dd 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -299,14 +299,8 @@ reqsk_timeout(struct request_sock *req, unsigned long max_timeout) return (unsigned long)min_t(u64, timeout, max_timeout); } -static inline void inet_csk_prepare_for_destroy_sock(struct sock *sk) -{ - /* The below has to be done to allow calling inet_csk_destroy_sock */ - sock_set_flag(sk, SOCK_DEAD); - this_cpu_inc(*sk->sk_prot->orphan_count); -} - void inet_csk_destroy_sock(struct sock *sk); +void inet_csk_prepare_for_destroy_sock(struct sock *sk); void inet_csk_prepare_forced_close(struct sock *sk); /* diff --git a/include/net/sock.h b/include/net/sock.h index 73cd3316e288..1e7f124871d2 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1353,8 +1353,6 @@ struct proto { unsigned int useroffset; /* Usercopy region offset */ unsigned int usersize; /* Usercopy region size */ - unsigned int __percpu *orphan_count; - struct request_sock_ops *rsk_prot; struct timewait_sock_ops *twsk_prot; diff --git a/include/net/tcp.h b/include/net/tcp.h index 16dc9cebb9d2..0fb7923b8367 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -54,6 +54,16 @@ extern struct inet_hashinfo tcp_hashinfo; DECLARE_PER_CPU(unsigned int, tcp_orphan_count); int tcp_orphan_count_sum(void); +static inline void tcp_orphan_count_inc(void) +{ + this_cpu_inc(tcp_orphan_count); +} + +static inline void tcp_orphan_count_dec(void) +{ + this_cpu_dec(tcp_orphan_count); +} + DECLARE_PER_CPU(u32, tcp_tw_isn); void tcp_time_wait(struct sock *sk, int state, int timeo); -- cgit v1.2.3 From 10343e7e6c7c6558217b56fb44a538ad04752adb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 29 Aug 2025 15:30:52 +0000 Subject: inet: ping: remove ping_hash() There is no point in keeping ping_hash(). Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Yue Haibing Link: https://patch.msgid.link/20250829153054.474201-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/ping.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/ping.h b/include/net/ping.h index bc7779262e60..9634b8800814 100644 --- a/include/net/ping.h +++ b/include/net/ping.h @@ -54,7 +54,6 @@ struct pingfakehdr { }; int ping_get_port(struct sock *sk, unsigned short ident); -int ping_hash(struct sock *sk); void ping_unhash(struct sock *sk); int ping_init_sock(struct sock *sk); -- cgit v1.2.3 From 689adb36bd433b24390080606a07d664cca2982e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 29 Aug 2025 15:30:53 +0000 Subject: inet: ping: make ping_port_rover per netns Provide isolation between netns for ping idents. Randomize initial ping_port_rover value at netns creation. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Link: https://patch.msgid.link/20250829153054.474201-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/netns/ipv4.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 6373e3f17da8..54a7d187f62a 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -251,6 +251,7 @@ struct netns_ipv4 { int sysctl_igmp_qrv; struct ping_group_range ping_group_range; + u16 ping_port_rover; atomic_t dev_addr_genid; -- cgit v1.2.3 From d9a3e9929452780df16f3414f0d59b5f69d058cf Mon Sep 17 00:00:00 2001 From: Thomas Andreatta Date: Wed, 27 Aug 2025 17:24:43 +0200 Subject: dmaengine: sh: setup_xref error handling This patch modifies the type of setup_xref from void to int and handles errors since the function can fail. `setup_xref` now returns the (eventual) error from `dmae_set_dmars`|`dmae_set_chcr`, while `shdma_tx_submit` handles the result, removing the chunks from the queue and marking PM as idle in case of an error. Signed-off-by: Thomas Andreatta Link: https://lore.kernel.org/r/20250827152442.90962-1-thomas.andreatta2000@gmail.com Signed-off-by: Vinod Koul --- include/linux/shdma-base.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/shdma-base.h b/include/linux/shdma-base.h index 6dfd05ef5c2d..03ba4dab2ef7 100644 --- a/include/linux/shdma-base.h +++ b/include/linux/shdma-base.h @@ -96,7 +96,7 @@ struct shdma_ops { int (*desc_setup)(struct shdma_chan *, struct shdma_desc *, dma_addr_t, dma_addr_t, size_t *); int (*set_slave)(struct shdma_chan *, int, dma_addr_t, bool); - void (*setup_xfer)(struct shdma_chan *, int); + int (*setup_xfer)(struct shdma_chan *, int); void (*start_xfer)(struct shdma_chan *, struct shdma_desc *); struct shdma_desc *(*embedded_desc)(void *, int); bool (*chan_irq)(struct shdma_chan *, int); -- cgit v1.2.3 From 7ea95d55e63176899eb96f7aaa34a5646f501b2c Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Tue, 26 Aug 2025 11:07:38 -0500 Subject: dmaengine: Fix dma_async_tx_descriptor->tx_submit documentation Commit 790fb9956eea ("linux/dmaengine.h: fix a few kernel-doc warnings") inserted new documentation for @desc_free in the middle of @tx_submit's description. Put @tx_submit's description back together, matching the indentation style of the rest of the documentation for dma_async_tx_descriptor. Fixes: 790fb9956eea ("linux/dmaengine.h: fix a few kernel-doc warnings") Reviewed-by: Dave Jiang Signed-off-by: Nathan Lynch Link: https://lore.kernel.org/r/20250826-dma_async_tx_desc-tx_submit-doc-fix-v1-1-18a4b51697db@amd.com Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 6de7c05d6bd8..99efe2b9b4ea 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -594,9 +594,9 @@ struct dma_descriptor_metadata_ops { * @phys: physical address of the descriptor * @chan: target channel for this operation * @tx_submit: accept the descriptor, assign ordered cookie and mark the + * descriptor pending. To be pushed on .issue_pending() call * @desc_free: driver's callback function to free a resusable descriptor * after completion - * descriptor pending. To be pushed on .issue_pending() call * @callback: routine to call after this operation is complete * @callback_result: error result from a DMA transaction * @callback_param: general parameter to pass to the callback routine -- cgit v1.2.3 From c9f62564252c21d739a5003e9b2d6ad0828aa7bd Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 30 Aug 2025 19:01:11 +0200 Subject: mtd: rawnand: s3c2410: Drop driver (no actual S3C64xx user) The s3c2410 NAND driver still supports S3C64xx platform, which in general is supported in the kernel. There are however no references of "s3c6400-nand" platform device ID or "s3c24xx-nand" driver, thus this driver cannot be instantiated for S3C64xx platform and is basically unused. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Miquel Raynal --- include/linux/platform_data/mtd-nand-s3c2410.h | 70 -------------------------- 1 file changed, 70 deletions(-) delete mode 100644 include/linux/platform_data/mtd-nand-s3c2410.h (limited to 'include') diff --git a/include/linux/platform_data/mtd-nand-s3c2410.h b/include/linux/platform_data/mtd-nand-s3c2410.h deleted file mode 100644 index 25390fc3e795..000000000000 --- a/include/linux/platform_data/mtd-nand-s3c2410.h +++ /dev/null @@ -1,70 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2004 Simtec Electronics - * Ben Dooks - * - * S3C2410 - NAND device controller platform_device info -*/ - -#ifndef __MTD_NAND_S3C2410_H -#define __MTD_NAND_S3C2410_H - -#include - -/** - * struct s3c2410_nand_set - define a set of one or more nand chips - * @flash_bbt: Openmoko u-boot can create a Bad Block Table - * Setting this flag will allow the kernel to - * look for it at boot time and also skip the NAND - * scan. - * @options: Default value to set into 'struct nand_chip' options. - * @nr_chips: Number of chips in this set - * @nr_partitions: Number of partitions pointed to by @partitions - * @name: Name of set (optional) - * @nr_map: Map for low-layer logical to physical chip numbers (option) - * @partitions: The mtd partition list - * - * define a set of one or more nand chips registered with an unique mtd. Also - * allows to pass flag to the underlying NAND layer. 'disable_ecc' will trigger - * a warning at boot time. - */ -struct s3c2410_nand_set { - unsigned int flash_bbt:1; - - unsigned int options; - int nr_chips; - int nr_partitions; - char *name; - int *nr_map; - struct mtd_partition *partitions; - struct device_node *of_node; -}; - -struct s3c2410_platform_nand { - /* timing information for controller, all times in nanoseconds */ - - int tacls; /* time for active CLE/ALE to nWE/nOE */ - int twrph0; /* active time for nWE/nOE */ - int twrph1; /* time for release CLE/ALE from nWE/nOE inactive */ - - unsigned int ignore_unset_ecc:1; - - enum nand_ecc_engine_type engine_type; - - int nr_sets; - struct s3c2410_nand_set *sets; - - void (*select_chip)(struct s3c2410_nand_set *, - int chip); -}; - -/** - * s3c_nand_set_platdata() - register NAND platform data. - * @nand: The NAND platform data to register with s3c_device_nand. - * - * This function copies the given NAND platform data, @nand and registers - * it with the s3c_device_nand. This allows @nand to be __initdata. -*/ -extern void s3c_nand_set_platdata(struct s3c2410_platform_nand *nand); - -#endif /*__MTD_NAND_S3C2410_H */ -- cgit v1.2.3 From dfb84c33079497bf27058b15780e1c7bba4c371b Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 22 Aug 2025 13:10:44 +0200 Subject: fuse: allow synchronous FUSE_INIT FUSE_INIT has always been asynchronous with mount. That means that the server processed this request after the mount syscall returned. This means that FUSE_INIT can't supply the root inode's ID, hence it currently has a hardcoded value. There are other limitations such as not being able to perform getxattr during mount, which is needed by selinux. To remove these limitations allow server to process FUSE_INIT while initializing the in-core super block for the fuse filesystem. This can only be done if the server is prepared to handle this, so add FUSE_DEV_IOC_SYNC_INIT ioctl, which a) lets the server know whether this feature is supported, returning ENOTTY othewrwise. b) lets the kernel know to perform a synchronous initialization The implementation is slightly tricky, since fuse_dev/fuse_conn are set up only during super block creation. This is solved by setting the private data of the fuse device file to a special value ((struct fuse_dev *) 1) and waiting for this to be turned into a proper fuse_dev before commecing with operations on the device file. Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 94621f68a5cc..6b9fb8b08768 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -1131,6 +1131,7 @@ struct fuse_backing_map { #define FUSE_DEV_IOC_BACKING_OPEN _IOW(FUSE_DEV_IOC_MAGIC, 1, \ struct fuse_backing_map) #define FUSE_DEV_IOC_BACKING_CLOSE _IOW(FUSE_DEV_IOC_MAGIC, 2, uint32_t) +#define FUSE_DEV_IOC_SYNC_INIT _IO(FUSE_DEV_IOC_MAGIC, 3) struct fuse_lseek_in { uint64_t fh; -- cgit v1.2.3 From 7df87820122acd3204565109f636a1367912655a Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Tue, 5 Aug 2025 15:45:08 +1000 Subject: pidns: move is-ancestor logic to helper This check will be needed in later patches, and there's no point open-coding it each time. Signed-off-by: Aleksa Sarai Link: https://lore.kernel.org/20250805-procfs-pidns-api-v4-1-705f984940e7@cyphar.com Signed-off-by: Christian Brauner --- include/linux/pid_namespace.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 7c67a5811199..17fdc059f8da 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -84,6 +84,9 @@ extern void zap_pid_ns_processes(struct pid_namespace *pid_ns); extern int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd); extern void put_pid_ns(struct pid_namespace *ns); +extern bool pidns_is_ancestor(struct pid_namespace *child, + struct pid_namespace *ancestor); + #else /* !CONFIG_PID_NS */ #include @@ -118,6 +121,12 @@ static inline int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd) { return 0; } + +static inline bool pidns_is_ancestor(struct pid_namespace *child, + struct pid_namespace *ancestor) +{ + return false; +} #endif /* CONFIG_PID_NS */ extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk); -- cgit v1.2.3 From cb86408b1fc2e3f6fe45ebe8509a5404060e01e0 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Fri, 1 Aug 2025 19:05:23 +0200 Subject: list: add list_last_entry_or_null() Add an equivalent of list_first_entry_or_null() to obtain the last element of a list. Acked-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250801-drm-bridge-alloc-getput-drm_bridge_get_next_bridge-v2-1-888912b0be13@bootlin.com Signed-off-by: Luca Ceresoli --- include/linux/list.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/list.h b/include/linux/list.h index e7e28afd28f8..7f7657e41620 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -636,6 +636,20 @@ static inline void list_splice_tail_init(struct list_head *list, pos__ != head__ ? list_entry(pos__, type, member) : NULL; \ }) +/** + * list_last_entry_or_null - get the last element from a list + * @ptr: the list head to take the element from. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_head within the struct. + * + * Note that if the list is empty, it returns NULL. + */ +#define list_last_entry_or_null(ptr, type, member) ({ \ + struct list_head *head__ = (ptr); \ + struct list_head *pos__ = READ_ONCE(head__->prev); \ + pos__ != head__ ? list_entry(pos__, type, member) : NULL; \ +}) + /** * list_next_entry - get the next element in list * @pos: the type * to cursor -- cgit v1.2.3 From d77ad5178e90f5aa4ce6085510b3b2f742abc5f0 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Fri, 1 Aug 2025 19:05:24 +0200 Subject: drm/bridge: add drm_bridge_chain_get_last_bridge() Add an equivalent of drm_bridge_chain_get_first_bridge() to get the last bridge. Reviewed-by: Maxime Ripard Link: https://lore.kernel.org/r/20250801-drm-bridge-alloc-getput-drm_bridge_get_next_bridge-v2-2-888912b0be13@bootlin.com Signed-off-by: Luca Ceresoli --- include/drm/drm_bridge.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index 8d9d4fd078e7..788517ab00d3 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -1410,6 +1410,24 @@ drm_bridge_chain_get_first_bridge(struct drm_encoder *encoder) struct drm_bridge, chain_node)); } +/** + * drm_bridge_chain_get_last_bridge() - Get the last bridge in the chain + * @encoder: encoder object + * + * The refcount of the returned bridge is incremented. Use drm_bridge_put() + * when done with it. + * + * RETURNS: + * the last bridge in the chain, or NULL if @encoder has no bridge attached + * to it. + */ +static inline struct drm_bridge * +drm_bridge_chain_get_last_bridge(struct drm_encoder *encoder) +{ + return drm_bridge_get(list_last_entry_or_null(&encoder->bridge_chain, + struct drm_bridge, chain_node)); +} + /** * drm_for_each_bridge_in_chain() - Iterate over all bridges present in a chain * @encoder: the encoder to iterate bridges on -- cgit v1.2.3 From e4cedfd4f0fc839bb3c783d7e827e9755e1af5cf Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Fri, 1 Aug 2025 19:05:27 +0200 Subject: drm/bridge: add drm_bridge_is_last() Some code needing to know whether a bridge is the last in a chain currently call drm_bridge_get_next_bridge(). However drm_bridge_get_next_bridge() will soon increment the refcount of the returned bridge, which would make such code more annoying to write. In preparation for drm_bridge_get_next_bridge() to increment the refcount, as well as to simplify such code, introduce a simple bool function to tell whether a bridge is the last in the chain. Reviewed-by: Maxime Ripard Link: https://lore.kernel.org/r/20250801-drm-bridge-alloc-getput-drm_bridge_get_next_bridge-v2-5-888912b0be13@bootlin.com Signed-off-by: Luca Ceresoli --- include/drm/drm_bridge.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index 788517ab00d3..76e05930f50e 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -1321,6 +1321,11 @@ static inline struct drm_bridge *of_drm_find_bridge(struct device_node *np) } #endif +static inline bool drm_bridge_is_last(struct drm_bridge *bridge) +{ + return list_is_last(&bridge->chain_node, &bridge->encoder->bridge_chain); +} + /** * drm_bridge_get_current_state() - Get the current bridge state * @bridge: bridge object -- cgit v1.2.3 From 28edfaa10ca1b370b1a27fde632000d35c43402c Mon Sep 17 00:00:00 2001 From: Maciej Strozek Date: Mon, 1 Sep 2025 16:15:07 +0100 Subject: ASoC: SDCA: Add quirk for incorrect function types for 3 systems Certain systems have CS42L43 DisCo that claims to conform to version 0.6.28 but uses the function types from the 1.0 spec. Add a quirk as a workaround. Closes: https://github.com/thesofproject/linux/issues/5515 Cc: stable@vger.kernel.org Signed-off-by: Maciej Strozek Reviewed-by: Pierre-Louis Bossart Link: https://patch.msgid.link/20250901151518.3197941-1-mstrozek@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/sdca.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/sound/sdca.h b/include/sound/sdca.h index 5a5d6de78d72..9c6a351c9d47 100644 --- a/include/sound/sdca.h +++ b/include/sound/sdca.h @@ -46,6 +46,7 @@ struct sdca_device_data { enum sdca_quirk { SDCA_QUIRKS_RT712_VB, + SDCA_QUIRKS_SKIP_FUNC_TYPE_PATCHING, }; #if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_SND_SOC_SDCA) -- cgit v1.2.3 From 72ca981dba5e98c2b1c2956016cc4be934d9fbea Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Sun, 31 Aug 2025 14:52:37 +0800 Subject: firmware: arm_scmi: Fix function name typo in scmi_perf_proto_ops struct The performance protocol ops table incorrectly referenced power_scale_mw_get instead of the correct power_scale_get. Fix the typo to use the proper function. Signed-off-by: Peng Fan Message-Id: <20250831-scmi-cpufreq-v1-1-493031cf6e9b@nxp.com> Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 688466a0e816..aafaac1496b0 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -153,7 +153,7 @@ struct scmi_perf_domain_info { * for a given device * @fast_switch_rate_limit: gets the minimum time (us) required between * successive fast_switching requests - * @power_scale_mw_get: indicates if the power values provided are in milliWatts + * @power_scale_get: indicates if the power values provided are in milliWatts * or in some other (abstract) scale */ struct scmi_perf_proto_ops { -- cgit v1.2.3 From 61f132ca8c46ffee368a951e516d19d4ae767ea8 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Fri, 29 Aug 2025 13:06:04 +0800 Subject: ptp: add helpers to get the phc_index by of_node or dev Some Ethernet controllers do not have an integrated PTP timer function. Instead, the PTP timer is a separated device and provides PTP hardware clock to the Ethernet controller to use. Therefore, the Ethernet controller driver needs to obtain the PTP clock's phc_index in its ethtool_ops::get_ts_info(). Currently, most drivers implement this in the following ways. 1. The PTP device driver adds a custom API and exports it to the Ethernet controller driver. 2. The PTP device driver adds private data to its device structure. So the private data structure needs to be exposed to the Ethernet controller driver. When registering the ptp clock, ptp_clock_register() always saves the ptp_clock pointer to the private data of ptp_clock::dev. Therefore, as long as ptp_clock::dev is obtained, the phc_index can be obtained. So the following generic APIs can be added to the ptp driver to obtain the phc_index. 1. ptp_clock_index_by_dev(): Obtain the phc_index by the device pointer of the PTP device. 2.ptp_clock_index_by_of_node(): Obtain the phc_index by the of_node pointer of the PTP device. Also, we can add another API like ptp_clock_index_by_fwnode() to get the phc_index by fwnode of PTP device. However, this API is not used in this patch set, so it is better to add it when needed. Suggested-by: Vladimir Oltean Signed-off-by: Wei Fang Reviewed-by: Frank Li Link: https://patch.msgid.link/20250829050615.1247468-4-wei.fang@nxp.com Signed-off-by: Paolo Abeni --- include/linux/ptp_clock_kernel.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include') diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 3d089bd4d5e9..7dd7951b23d5 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -360,6 +360,24 @@ extern void ptp_clock_event(struct ptp_clock *ptp, extern int ptp_clock_index(struct ptp_clock *ptp); +/** + * ptp_clock_index_by_of_node() - obtain the device index of + * a PTP clock based on the PTP device of_node + * + * @np: The device of_node pointer of the PTP device. + * Return: The PHC index on success or -1 on failure. + */ +int ptp_clock_index_by_of_node(struct device_node *np); + +/** + * ptp_clock_index_by_dev() - obtain the device index of + * a PTP clock based on the PTP device. + * + * @parent: The parent device (PTP device) pointer of the PTP clock. + * Return: The PHC index on success or -1 on failure. + */ +int ptp_clock_index_by_dev(struct device *parent); + /** * ptp_find_pin() - obtain the pin index of a given auxiliary function * @@ -425,6 +443,10 @@ static inline void ptp_clock_event(struct ptp_clock *ptp, { } static inline int ptp_clock_index(struct ptp_clock *ptp) { return -1; } +static inline int ptp_clock_index_by_of_node(struct device_node *np) +{ return -1; } +static inline int ptp_clock_index_by_dev(struct device *parent) +{ return -1; } static inline int ptp_find_pin(struct ptp_clock *ptp, enum ptp_pin_function func, unsigned int chan) { return -1; } -- cgit v1.2.3 From e551fa3159e3050c26ff010c3b595b45d7eb071a Mon Sep 17 00:00:00 2001 From: Qunqin Zhao Date: Sat, 5 Jul 2025 15:20:42 +0800 Subject: mfd: Add support for Loongson Security Engine chip controller Loongson Security Engine chip supports RNG, SM2, SM3 and SM4 accelerator engines. This is the base driver for other specific engine drivers. Co-developed-by: Yinggang Gu Signed-off-by: Yinggang Gu Signed-off-by: Qunqin Zhao Reviewed-by: Huacai Chen Link: https://lore.kernel.org/r/20250705072045.1067-2-zhaoqunqin@loongson.cn Signed-off-by: Lee Jones --- include/linux/mfd/loongson-se.h | 53 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 include/linux/mfd/loongson-se.h (limited to 'include') diff --git a/include/linux/mfd/loongson-se.h b/include/linux/mfd/loongson-se.h new file mode 100644 index 000000000000..07afa0c2524d --- /dev/null +++ b/include/linux/mfd/loongson-se.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* Copyright (C) 2025 Loongson Technology Corporation Limited */ + +#ifndef __MFD_LOONGSON_SE_H__ +#define __MFD_LOONGSON_SE_H__ + +#define LOONGSON_ENGINE_CMD_TIMEOUT_US 10000 +#define SE_SEND_CMD_REG 0x0 +#define SE_SEND_CMD_REG_LEN 0x8 +/* Controller command ID */ +#define SE_CMD_START 0x0 +#define SE_CMD_SET_DMA 0x3 +#define SE_CMD_SET_ENGINE_CMDBUF 0x4 + +#define SE_S2LINT_STAT 0x88 +#define SE_S2LINT_EN 0x8c +#define SE_S2LINT_CL 0x94 +#define SE_L2SINT_STAT 0x98 +#define SE_L2SINT_SET 0xa0 + +#define SE_INT_ALL 0xffffffff +#define SE_INT_CONTROLLER BIT(0) + +#define SE_ENGINE_MAX 16 +#define SE_ENGINE_RNG 1 +#define SE_CMD_RNG 0x100 + +#define SE_ENGINE_TPM 5 +#define SE_CMD_TPM 0x500 + +#define SE_ENGINE_CMD_SIZE 32 + +struct loongson_se_engine { + struct loongson_se *se; + int id; + + /* Command buffer */ + void *command; + void *command_ret; + + void *data_buffer; + uint buffer_size; + /* Data buffer offset to DMA base */ + uint buffer_off; + + struct completion completion; + +}; + +struct loongson_se_engine *loongson_se_init_engine(struct device *dev, int id); +int loongson_se_send_engine_cmd(struct loongson_se_engine *engine); + +#endif -- cgit v1.2.3 From a60a5abe19d6acd9d9ea4c1883745399fb5dc023 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 22 Aug 2025 10:15:37 +0200 Subject: netfilter: nf_tables: allow iter callbacks to sleep Quoting Sven Auhagen: we do see on occasions that we get the following error message, more so on x86 systems than on arm64: Error: Could not process rule: Cannot allocate memory delete table inet filter It is not a consistent error and does not happen all the time. We are on Kernel 6.6.80, seems to me like we have something along the lines of the nf_tables: allow clone callbacks to sleep problem using GFP_ATOMIC. As hinted at by Sven, this is because of GFP_ATOMIC allocations during set flush. When set is flushed, all elements are deactivated. This triggers a set walk and each element gets added to the transaction list. The rbtree and rhashtable sets don't allow the iter callback to sleep: rbtree walk acquires read side of an rwlock with bh disabled, rhashtable walk happens with rcu read lock held. Rbtree is simple enough to resolve: When the walk context is ITER_READ, no change is needed (the iter callback must not deactivate elements; we're not in a transaction). When the iter type is ITER_UPDATE, the rwlock isn't needed because the caller holds the transaction mutex, this prevents any and all changes to the ruleset, including add/remove of set elements. Rhashtable is slightly more complex. When the iter type is ITER_READ, no change is needed, like rbtree. For ITER_UPDATE, we hold transaction mutex which prevents elements from getting free'd, even outside of rcu read lock section. So build a temporary list of all elements while doing the rcu iteration and then call the iterator in a second pass. The disadvantage is the need to iterate twice, but this cost comes with the benefit to allow the iter callback to use GFP_KERNEL allocations in a followup patch. The new list based logic makes it necessary to catch recursive calls to the same set earlier. Such walk -> iter -> walk recursion for the same set can happen during ruleset validation in case userspace gave us a bogus (cyclic) ruleset where verdict map m jumps to chain that sooner or later also calls "vmap @m". Before the new ->in_update_walk test, the ruleset is rejected because the infinite recursion causes ctx->level to exceed the allowed maximum. But with the new logic added here, elements would get skipped: nft_rhash_walk_update would see elements that are on the walk_list of an older stack frame. As all recursive calls into same map results in -EMLINK, we can avoid this problem by using the new in_update_walk flag and reject immediately. Next patch converts the problematic GFP_ATOMIC allocations. Reported-by: Sven Auhagen Closes: https://lore.kernel.org/netfilter-devel/BY1PR18MB5874110CAFF1ED098D0BC4E7E07BA@BY1PR18MB5874.namprd18.prod.outlook.com/ Signed-off-by: Florian Westphal --- include/net/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 891e43a01bdc..e2128663b160 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -556,6 +556,7 @@ struct nft_set_elem_expr { * @size: maximum set size * @field_len: length of each field in concatenation, bytes * @field_count: number of concatenated fields in element + * @in_update_walk: true during ->walk() in transaction phase * @use: number of rules references to this set * @nelems: number of elements * @ndeact: number of deactivated elements queued for removal @@ -590,6 +591,7 @@ struct nft_set { u32 size; u8 field_len[NFT_REG32_COUNT]; u8 field_count; + bool in_update_walk; u32 use; atomic_t nelems; u32 ndeact; -- cgit v1.2.3 From f4f9e05904e11bbc772c031b35d0d25caa21d5e8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 13 Aug 2025 20:43:47 +0200 Subject: netfilter: nf_reject: remove unneeded exports These functions have no external callers and can be static. Signed-off-by: Florian Westphal --- include/net/netfilter/ipv4/nf_reject.h | 8 -------- include/net/netfilter/ipv6/nf_reject.h | 10 ---------- 2 files changed, 18 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/ipv4/nf_reject.h b/include/net/netfilter/ipv4/nf_reject.h index c653fcb88354..09de2f2686b5 100644 --- a/include/net/netfilter/ipv4/nf_reject.h +++ b/include/net/netfilter/ipv4/nf_reject.h @@ -10,14 +10,6 @@ void nf_send_unreach(struct sk_buff *skb_in, int code, int hook); void nf_send_reset(struct net *net, struct sock *, struct sk_buff *oldskb, int hook); -const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb, - struct tcphdr *_oth, int hook); -struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb, - const struct sk_buff *oldskb, - __u8 protocol, int ttl); -void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb, - const struct tcphdr *oth); - struct sk_buff *nf_reject_skb_v4_unreach(struct net *net, struct sk_buff *oldskb, const struct net_device *dev, diff --git a/include/net/netfilter/ipv6/nf_reject.h b/include/net/netfilter/ipv6/nf_reject.h index d729344ba644..94ec0b9f2838 100644 --- a/include/net/netfilter/ipv6/nf_reject.h +++ b/include/net/netfilter/ipv6/nf_reject.h @@ -9,16 +9,6 @@ void nf_send_unreach6(struct net *net, struct sk_buff *skb_in, unsigned char cod unsigned int hooknum); void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb, int hook); -const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb, - struct tcphdr *otcph, - unsigned int *otcplen, int hook); -struct ipv6hdr *nf_reject_ip6hdr_put(struct sk_buff *nskb, - const struct sk_buff *oldskb, - __u8 protocol, int hoplimit); -void nf_reject_ip6_tcphdr_put(struct sk_buff *nskb, - const struct sk_buff *oldskb, - const struct tcphdr *oth, unsigned int otcplen); - struct sk_buff *nf_reject_skb_v6_tcp_reset(struct net *net, struct sk_buff *oldskb, const struct net_device *dev, -- cgit v1.2.3 From 077dc4a275790b09e8a2ce80822ba8970e9dfb99 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Thu, 28 Aug 2025 14:48:31 +0200 Subject: netfilter: nft_payload: extend offset to 65535 bytes In some situations 255 bytes offset is not enough to match or manipulate the desired packet field. Increase the offset limit to 65535 or U16_MAX. In addition, the nla policy maximum value is not set anymore as it is limited to s16. Instead, the maximum value is checked during the payload expression initialization function. Tested with the nft command line tool. table ip filter { chain output { @nh,2040,8 set 0xff @nh,524280,8 set 0xff @nh,524280,8 0xff @nh,2040,8 0xff } } Signed-off-by: Fernando Fernandez Mancera Signed-off-by: Florian Westphal --- include/net/netfilter/nf_tables_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 6c2f483d9828..7644cfe9267d 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -73,7 +73,7 @@ struct nft_ct { struct nft_payload { enum nft_payload_bases base:8; - u8 offset; + u16 offset; u8 len; u8 dreg; }; -- cgit v1.2.3 From 7a8c994cbb2db3c5335cee35fd486557f5aaf7e1 Mon Sep 17 00:00:00 2001 From: Huisong Li Date: Mon, 28 Jul 2025 15:06:12 +0800 Subject: ACPI: processor: idle: Optimize ACPI idle driver registration Currently, the ACPI idle driver is registered from within a CPU hotplug callback. Although this didn't cause any functional issues, this is questionable and confusing. And it is better to register the cpuidle driver when all of the CPUs have been brought up. So add a new function to initialize acpi_idle_driver based on the power management information of an available CPU and register cpuidle driver in acpi_processor_driver_init(). Signed-off-by: Huisong Li Link: https://patch.msgid.link/20250728070612.1260859-3-lihuisong@huawei.com [ rjw: Added missing inline modifiers ] Signed-off-by: Rafael J. Wysocki --- include/acpi/processor.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/acpi/processor.h b/include/acpi/processor.h index d0eccbd920e5..360b673f05e5 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -423,6 +423,8 @@ int acpi_processor_power_init(struct acpi_processor *pr); int acpi_processor_power_exit(struct acpi_processor *pr); int acpi_processor_power_state_has_changed(struct acpi_processor *pr); int acpi_processor_hotplug(struct acpi_processor *pr); +void acpi_processor_register_idle_driver(void); +void acpi_processor_unregister_idle_driver(void); #else static inline int acpi_processor_power_init(struct acpi_processor *pr) { @@ -443,6 +445,12 @@ static inline int acpi_processor_hotplug(struct acpi_processor *pr) { return -ENODEV; } +static inline void acpi_processor_register_idle_driver(void) +{ +} +static inline void acpi_processor_unregister_idle_driver(void) +{ +} #endif /* CONFIG_ACPI_PROCESSOR_IDLE */ /* in processor_thermal.c */ -- cgit v1.2.3 From f9db1fc52ceb42f4a18506693349316f5e209ba6 Mon Sep 17 00:00:00 2001 From: David Francis Date: Mon, 16 Jun 2025 09:47:42 -0400 Subject: drm/amdgpu: Add ioctl to get all gem handles for a process MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add new ioctl DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES. This ioctl returns a list of bos with their handles, sizes, and flags and domains. This ioctl is meant to be used during CRIU checkpoint and provide information needed to reconstruct the bos in CRIU restore. Userspace for this and the next change can be found at https://github.com/checkpoint-restore/criu/pull/2613 Signed-off-by: David Francis Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index bdedbaccf776..902e30263fcc 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -57,6 +57,7 @@ extern "C" { #define DRM_AMDGPU_USERQ 0x16 #define DRM_AMDGPU_USERQ_SIGNAL 0x17 #define DRM_AMDGPU_USERQ_WAIT 0x18 +#define DRM_AMDGPU_GEM_LIST_HANDLES 0x19 #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create) #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap) @@ -77,6 +78,7 @@ extern "C" { #define DRM_IOCTL_AMDGPU_USERQ DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq) #define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal) #define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait) +#define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_LIST_HANDLES, struct drm_amdgpu_gem_list_handles) /** * DOC: memory domains @@ -811,6 +813,38 @@ struct drm_amdgpu_gem_op { __u64 value; }; +#define AMDGPU_GEM_LIST_HANDLES_FLAG_IS_IMPORT (1 << 0) + +struct drm_amdgpu_gem_list_handles { + /* User pointer to array of drm_amdgpu_gem_bo_info_entry */ + __u64 entries; + + /* Size of entries buffer / Number of handles in process (if larger than size of buffer, must retry) */ + __u32 num_entries; + + __u32 padding; +}; + +struct drm_amdgpu_gem_list_handles_entry { + /* gem handle of buffer object */ + __u32 gem_handle; + + /* Currently just one flag: IS_IMPORT */ + __u32 flags; + + /* Size of bo */ + __u64 size; + + /* Preferred domains for GEM_CREATE */ + __u64 preferred_domains; + + /* GEM_CREATE flags for re-creation of buffer */ + __u64 alloc_flags; + + /* physical start_addr alignment in bytes for some HW requirements */ + __u64 alignment; +}; + #define AMDGPU_VA_OP_MAP 1 #define AMDGPU_VA_OP_UNMAP 2 #define AMDGPU_VA_OP_CLEAR 3 -- cgit v1.2.3 From 4d82724f7f2b847eb0454b1aab5450545b39abd4 Mon Sep 17 00:00:00 2001 From: David Francis Date: Mon, 16 Jun 2025 09:49:33 -0400 Subject: drm/amdgpu: Add mapping info option for GEM_OP ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add new GEM_OP_IOCTL option GET_MAPPING_INFO, which returns a list of mappings associated with a given bo, along with their positions and offsets. Userspace for this and the previous change can be found at: https://github.com/checkpoint-restore/criu/pull/2613 Signed-off-by: David Francis Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 902e30263fcc..9cebd072a042 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -802,6 +802,21 @@ union drm_amdgpu_wait_fences { #define AMDGPU_GEM_OP_GET_GEM_CREATE_INFO 0 #define AMDGPU_GEM_OP_SET_PLACEMENT 1 +#define AMDGPU_GEM_OP_GET_MAPPING_INFO 2 + +struct drm_amdgpu_gem_vm_entry { + /* Start of mapping (in bytes) */ + __u64 addr; + + /* Size of mapping (in bytes) */ + __u64 size; + + /* Mapping offset */ + __u64 offset; + + /* flags needed to recreate mapping */ + __u64 flags; +}; /* Sets or returns a value associated with a buffer. */ struct drm_amdgpu_gem_op { @@ -809,8 +824,12 @@ struct drm_amdgpu_gem_op { __u32 handle; /** AMDGPU_GEM_OP_* */ __u32 op; - /** Input or return value */ + /** Input or return value. For MAPPING_INFO op: pointer to array of struct drm_amdgpu_gem_vm_entry */ __u64 value; + /** For MAPPING_INFO op: number of mappings (in/out) */ + __u32 num_entries; + + __u32 padding; }; #define AMDGPU_GEM_LIST_HANDLES_FLAG_IS_IMPORT (1 << 0) -- cgit v1.2.3 From 23a6037ce76cb44d93cfea23aec5c7f3971227d4 Mon Sep 17 00:00:00 2001 From: Jay Vosburgh Date: Fri, 29 Aug 2025 17:08:37 -0700 Subject: bonding: Remove support for use_carrier Remove the implementation of use_carrier, the link monitoring method that utilizes ethtool or ioctl to determine the link state of an interface in a bond. Bonding will always behaves as if use_carrier=1, which relies on netif_carrier_ok() to determine the link state of interfaces. To avoid acquiring RTNL many times per second, bonding inspects link state under RCU, but not under RTNL. However, ethtool implementations in drivers may sleep, and therefore this strategy is unsuitable for use with calls into driver ethtool functions. The use_carrier option was introduced in 2003, to provide backwards compatibility for network device drivers that did not support the then-new netif_carrier_ok/on/off system. Device drivers are now expected to support netif_carrier_*, and the use_carrier backwards compatibility logic is no longer necessary. The option itself remains, but when queried always returns 1, and may only be set to 1. Link: https://lore.kernel.org/000000000000eb54bf061cfd666a@google.com Link: https://lore.kernel.org/20240718122017.d2e33aaac43a.I10ab9c9ded97163aef4e4de10985cd8f7de60d28@changeid Signed-off-by: Jay Vosburgh Reported-by: syzbot+b8c48ea38ca27d150063@syzkaller.appspotmail.com Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/2029487.1756512517@famine Signed-off-by: Jakub Kicinski --- include/net/bonding.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/bonding.h b/include/net/bonding.h index e06f0d63b2c1..37335f62f579 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -126,7 +126,6 @@ struct bond_params { int arp_interval; int arp_validate; int arp_all_targets; - int use_carrier; int fail_over_mac; int updelay; int downdelay; -- cgit v1.2.3 From 65128868bb3b0621d2d8e71f19852675a064b373 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 29 Aug 2025 15:29:04 -0700 Subject: mm/memory_hotplug: Update comment for hotplug memory callback priorities Add clarification to comment for memory hotplug callback ordering as the current comment does not provide clear language on which callback happens first. Acked-by: David Hildenbrand Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20250829222907.1290912-2-dave.jiang@intel.com Signed-off-by: Dave Jiang --- include/linux/memory.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/memory.h b/include/linux/memory.h index 40eb70ccb09d..1305102688d0 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -115,8 +115,8 @@ struct notifier_block; struct mem_section; /* - * Priorities for the hotplug memory callback routines (stored in decreasing - * order in the callback chain) + * Priorities for the hotplug memory callback routines. Invoked from + * high to low. Higher priorities correspond to higher numbers. */ #define DEFAULT_CALLBACK_PRI 0 #define SLAB_CALLBACK_PRI 1 -- cgit v1.2.3 From b57fc652ca24ada3b0c888327f9944ed21559286 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 29 Aug 2025 15:29:05 -0700 Subject: drivers/base/node: Add a helper function node_update_perf_attrs() Add helper function node_update_perf_attrs() to allow update of node access coordinates computed by an external agent such as CXL. The helper allows updating of coordinates after the attribute being created by HMAT. Acked-by: David Hildenbrand Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20250829222907.1290912-3-dave.jiang@intel.com Signed-off-by: Dave Jiang --- include/linux/node.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/node.h b/include/linux/node.h index 2c7529335b21..866e3323f1fd 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -85,6 +85,8 @@ struct node_cache_attrs { void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs); void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord, enum access_coordinate_class access); +void node_update_perf_attrs(unsigned int nid, struct access_coordinate *coord, + enum access_coordinate_class access); #else static inline void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs) @@ -96,6 +98,12 @@ static inline void node_set_perf_attrs(unsigned int nid, enum access_coordinate_class access) { } + +static inline void node_update_perf_attrs(unsigned int nid, + struct access_coordinate *coord, + enum access_coordinate_class access) +{ +} #endif struct node { -- cgit v1.2.3 From 2e454fb8056df6da4bba7d89a57bf60e217463c0 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 29 Aug 2025 15:29:06 -0700 Subject: cxl, acpi/hmat: Update CXL access coordinates directly instead of through HMAT The current implementation of CXL memory hotplug notifier gets called before the HMAT memory hotplug notifier. The CXL driver calculates the access coordinates (bandwidth and latency values) for the CXL end to end path (i.e. CPU to endpoint). When the CXL region is onlined, the CXL memory hotplug notifier writes the access coordinates to the HMAT target structs. Then the HMAT memory hotplug notifier is called and it creates the access coordinates for the node sysfs attributes. During testing on an Intel platform, it was found that although the newly calculated coordinates were pushed to sysfs, the sysfs attributes for the access coordinates showed up with the wrong initiator. The system has 4 nodes (0, 1, 2, 3) where node 0 and 1 are CPU nodes and node 2 and 3 are CXL nodes. The expectation is that node 2 would show up as a target to node 0: /sys/devices/system/node/node2/access0/initiators/node0 However it was observed that node 2 showed up as a target under node 1: /sys/devices/system/node/node2/access0/initiators/node1 The original intent of the 'ext_updated' flag in HMAT handling code was to stop HMAT memory hotplug callback from clobbering the access coordinates after CXL has injected its calculated coordinates and replaced the generic target access coordinates provided by the HMAT table in the HMAT target structs. However the flag is hacky at best and blocks the updates from other CXL regions that are onlined in the same node later on. Remove the 'ext_updated' flag usage and just update the access coordinates for the nodes directly without touching HMAT target data. The hotplug memory callback ordering is changed. Instead of changing CXL, move HMAT back so there's room for the levels rather than have CXL share the same level as SLAB_CALLBACK_PRI. The change will resulting in the CXL callback to be executed after the HMAT callback. With the change, the CXL hotplug memory notifier runs after the HMAT callback. The HMAT callback will create the node sysfs attributes for access coordinates. The CXL callback will write the access coordinates to the now created node sysfs attributes directly and will not pollute the HMAT target values. A nodemask is introduced to keep track if a node has been updated and prevents further updates. Fixes: 067353a46d8c ("cxl/region: Add memory hotplug notifier for cxl region") Cc: stable@vger.kernel.org Tested-by: Marc Herbert Reviewed-by: Dan Williams Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20250829222907.1290912-4-dave.jiang@intel.com Signed-off-by: Dave Jiang --- include/linux/memory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/memory.h b/include/linux/memory.h index 1305102688d0..0b755d1ef1ec 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -120,8 +120,8 @@ struct mem_section; */ #define DEFAULT_CALLBACK_PRI 0 #define SLAB_CALLBACK_PRI 1 -#define HMAT_CALLBACK_PRI 2 #define CXL_CALLBACK_PRI 5 +#define HMAT_CALLBACK_PRI 6 #define MM_COMPUTE_BATCH_PRI 10 #define CPUSET_CALLBACK_PRI 10 #define MEMTIER_HOTPLUG_PRI 100 -- cgit v1.2.3 From e99ecbc4c89adf551cccbbc00b5cb08c50969af6 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 29 Aug 2025 15:29:07 -0700 Subject: acpi/hmat: Remove now unused hmat_update_target_coordinates() Remove deadcode since CXL no longer calls hmat_update_target_coordinates(). Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20250829222907.1290912-5-dave.jiang@intel.com Signed-off-by: Dave Jiang --- include/linux/acpi.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 1c5bb1e887cd..5ff5d99f6ead 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1595,18 +1595,6 @@ static inline void acpi_use_parent_companion(struct device *dev) ACPI_COMPANION_SET(dev, ACPI_COMPANION(dev->parent)); } -#ifdef CONFIG_ACPI_HMAT -int hmat_update_target_coordinates(int nid, struct access_coordinate *coord, - enum access_coordinate_class access); -#else -static inline int hmat_update_target_coordinates(int nid, - struct access_coordinate *coord, - enum access_coordinate_class access) -{ - return -EOPNOTSUPP; -} -#endif - #ifdef CONFIG_ACPI_NUMA bool acpi_node_backed_by_real_pxm(int nid); #else -- cgit v1.2.3 From 5d14bbf9d1d90cb7ca3e46fe2c8a4277572eab94 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 1 Sep 2025 09:31:41 +0000 Subject: net_sched: act: remove tcfa_qstats tcfa_qstats is currently only used to hold drops and overlimits counters. tcf_action_inc_drop_qstats() and tcf_action_inc_overlimit_qstats() currently acquire a->tcfa_lock to increment these counters. Switch to two atomic_t to get lock-free accounting. Signed-off-by: Eric Dumazet Reviewed-by: Jamal Hadi Salim Link: https://patch.msgid.link/20250901093141.2093176-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/act_api.h | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index 2894cfff2da3..91a24b5e0b93 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -33,7 +33,10 @@ struct tc_action { struct tcf_t tcfa_tm; struct gnet_stats_basic_sync tcfa_bstats; struct gnet_stats_basic_sync tcfa_bstats_hw; - struct gnet_stats_queue tcfa_qstats; + + atomic_t tcfa_drops; + atomic_t tcfa_overlimits; + struct net_rate_estimator __rcu *tcfa_rate_est; spinlock_t tcfa_lock; struct gnet_stats_basic_sync __percpu *cpu_bstats; @@ -53,7 +56,6 @@ struct tc_action { #define tcf_action common.tcfa_action #define tcf_tm common.tcfa_tm #define tcf_bstats common.tcfa_bstats -#define tcf_qstats common.tcfa_qstats #define tcf_rate_est common.tcfa_rate_est #define tcf_lock common.tcfa_lock @@ -241,9 +243,7 @@ static inline void tcf_action_inc_drop_qstats(struct tc_action *a) qstats_drop_inc(this_cpu_ptr(a->cpu_qstats)); return; } - spin_lock(&a->tcfa_lock); - qstats_drop_inc(&a->tcfa_qstats); - spin_unlock(&a->tcfa_lock); + atomic_inc(&a->tcfa_drops); } static inline void tcf_action_inc_overlimit_qstats(struct tc_action *a) @@ -252,9 +252,7 @@ static inline void tcf_action_inc_overlimit_qstats(struct tc_action *a) qstats_overlimit_inc(this_cpu_ptr(a->cpu_qstats)); return; } - spin_lock(&a->tcfa_lock); - qstats_overlimit_inc(&a->tcfa_qstats); - spin_unlock(&a->tcfa_lock); + atomic_inc(&a->tcfa_overlimits); } void tcf_action_update_stats(struct tc_action *a, u64 bytes, u64 packets, -- cgit v1.2.3 From 4beb44a2d62dddfe450f310aa1a950901731cb3a Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sun, 31 Aug 2025 18:34:33 +0100 Subject: net: phy: add phy_interface_weight() Signed-off-by: Russell King (Oracle) Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/E1uslwn-00000001SOx-0a7H@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 4c2b8b6e7187..bb45787d8684 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -169,6 +169,11 @@ static inline bool phy_interface_empty(const unsigned long *intf) return bitmap_empty(intf, PHY_INTERFACE_MODE_MAX); } +static inline unsigned int phy_interface_weight(const unsigned long *intf) +{ + return bitmap_weight(intf, PHY_INTERFACE_MODE_MAX); +} + static inline void phy_interface_and(unsigned long *dst, const unsigned long *a, const unsigned long *b) { -- cgit v1.2.3 From 21368fcbb124d51b5d8bd8fa0a286a23c34a0888 Mon Sep 17 00:00:00 2001 From: Nicolas Frattaroli Date: Mon, 25 Aug 2025 10:28:21 +0200 Subject: bitmap: introduce hardware-specific bitfield operations Hardware of various vendors, but very notably Rockchip, often uses 32-bit registers where the upper 16-bit half of the register is a write-enable mask for the lower half. This type of hardware setup allows for more granular concurrent register write access. Over the years, many drivers have hand-rolled their own version of this macro, usually without any checks, often called something like HIWORD_UPDATE or FIELD_PREP_HIWORD, commonly with slightly different semantics between them. Clearly there is a demand for such a macro, and thus the demand should be satisfied in a common header file. As this is a convention that spans across multiple vendors, and similar conventions may also have cross-vendor adoption, it's best if it lives in a vendor-agnostic header file that can be expanded over time. Add hw_bitfield.h with two macros: FIELD_PREP_WM16, and FIELD_PREP_WM16_CONST. The latter is a version that can be used in initializers, like FIELD_PREP_CONST. Suggested-by: Yury Norov (NVIDIA) Signed-off-by: Nicolas Frattaroli Acked-by: Jakub Kicinski Acked-by: Heiko Stuebner Signed-off-by: Yury Norov (NVIDIA) --- include/linux/hw_bitfield.h | 62 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 include/linux/hw_bitfield.h (limited to 'include') diff --git a/include/linux/hw_bitfield.h b/include/linux/hw_bitfield.h new file mode 100644 index 000000000000..df202e167ce4 --- /dev/null +++ b/include/linux/hw_bitfield.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2025, Collabora Ltd. + */ + +#ifndef _LINUX_HW_BITFIELD_H +#define _LINUX_HW_BITFIELD_H + +#include +#include +#include + +/** + * FIELD_PREP_WM16() - prepare a bitfield element with a mask in the upper half + * @_mask: shifted mask defining the field's length and position + * @_val: value to put in the field + * + * FIELD_PREP_WM16() masks and shifts up the value, as well as bitwise ORs the + * result with the mask shifted up by 16. + * + * This is useful for a common design of hardware registers where the upper + * 16-bit half of a 32-bit register is used as a write-enable mask. In such a + * register, a bit in the lower half is only updated if the corresponding bit + * in the upper half is high. + */ +#define FIELD_PREP_WM16(_mask, _val) \ + ({ \ + typeof(_val) __val = _val; \ + typeof(_mask) __mask = _mask; \ + __BF_FIELD_CHECK(__mask, ((u16)0U), __val, \ + "HWORD_UPDATE: "); \ + (((typeof(__mask))(__val) << __bf_shf(__mask)) & (__mask)) | \ + ((__mask) << 16); \ + }) + +/** + * FIELD_PREP_WM16_CONST() - prepare a constant bitfield element with a mask in + * the upper half + * @_mask: shifted mask defining the field's length and position + * @_val: value to put in the field + * + * FIELD_PREP_WM16_CONST() masks and shifts up the value, as well as bitwise ORs + * the result with the mask shifted up by 16. + * + * This is useful for a common design of hardware registers where the upper + * 16-bit half of a 32-bit register is used as a write-enable mask. In such a + * register, a bit in the lower half is only updated if the corresponding bit + * in the upper half is high. + * + * Unlike FIELD_PREP_WM16(), this is a constant expression and can therefore + * be used in initializers. Error checking is less comfortable for this + * version. + */ +#define FIELD_PREP_WM16_CONST(_mask, _val) \ + ( \ + FIELD_PREP_CONST(_mask, _val) | \ + (BUILD_BUG_ON_ZERO(const_true((u64)(_mask) > U16_MAX)) + \ + ((_mask) << 16)) \ + ) + + +#endif /* _LINUX_HW_BITFIELD_H */ -- cgit v1.2.3 From df3a7762ee24ba6a33d4215244e329ca300f4819 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Tue, 2 Sep 2025 10:06:56 -0600 Subject: io_uring/uring_cmd: add io_uring_cmd_tw_t type alias Introduce a function pointer type alias io_uring_cmd_tw_t for the uring_cmd task work callback. This avoids repeating the signature in several places. Also name both arguments to the callback to clarify what they represent. Signed-off-by: Caleb Sander Mateos Reviewed-by: Keith Busch Link: https://lore.kernel.org/r/20250902160657.1726828-1-csander@purestorage.com Signed-off-by: Jens Axboe --- include/linux/io_uring/cmd.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h index 4bd3a7339243..7211157edfe9 100644 --- a/include/linux/io_uring/cmd.h +++ b/include/linux/io_uring/cmd.h @@ -11,11 +11,14 @@ /* io_uring_cmd is being issued again */ #define IORING_URING_CMD_REISSUE (1U << 31) +typedef void (*io_uring_cmd_tw_t)(struct io_uring_cmd *cmd, + unsigned issue_flags); + struct io_uring_cmd { struct file *file; const struct io_uring_sqe *sqe; /* callback to defer completions to task context */ - void (*task_work_cb)(struct io_uring_cmd *cmd, unsigned); + io_uring_cmd_tw_t task_work_cb; u32 cmd_op; u32 flags; u8 pdu[32]; /* available inline for free use */ @@ -57,7 +60,7 @@ void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, u64 res2, unsigned issue_flags); void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd, - void (*task_work_cb)(struct io_uring_cmd *, unsigned), + io_uring_cmd_tw_t task_work_cb, unsigned flags); /* @@ -106,7 +109,7 @@ static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, { } static inline void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd, - void (*task_work_cb)(struct io_uring_cmd *, unsigned), + io_uring_tw_t task_work_cb, unsigned flags) { } @@ -143,13 +146,13 @@ static inline void io_uring_cmd_iopoll_done(struct io_uring_cmd *ioucmd, /* users must follow the IOU_F_TWQ_LAZY_WAKE semantics */ static inline void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd, - void (*task_work_cb)(struct io_uring_cmd *, unsigned)) + io_uring_cmd_tw_t task_work_cb) { __io_uring_cmd_do_in_task(ioucmd, task_work_cb, IOU_F_TWQ_LAZY_WAKE); } static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, - void (*task_work_cb)(struct io_uring_cmd *, unsigned)) + io_uring_cmd_tw_t task_work_cb) { __io_uring_cmd_do_in_task(ioucmd, task_work_cb, 0); } -- cgit v1.2.3 From 04a3134f88a4bd03001a3093144819523cfca99e Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 2 Sep 2025 22:45:24 -0700 Subject: net/mlx5: Add PSP capabilities structures and bits Add mlx5_ifc PSP related capabilities structures and HW definitions needed for PSP support in mlx5. Link: https://lore.kernel.org/netdev/20250828162953.2707727-1-daniel.zahka@gmail.com/ Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 4 ++ include/linux/mlx5/mlx5_ifc.h | 95 +++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 95 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 9d2467f982ad..72a83666e67f 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1248,6 +1248,7 @@ enum mlx5_cap_type { MLX5_CAP_IPSEC, MLX5_CAP_CRYPTO = 0x1a, MLX5_CAP_SHAMPO = 0x1d, + MLX5_CAP_PSP = 0x1e, MLX5_CAP_MACSEC = 0x1f, MLX5_CAP_GENERAL_2 = 0x20, MLX5_CAP_PORT_SELECTION = 0x25, @@ -1487,6 +1488,9 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_SHAMPO(mdev, cap) \ MLX5_GET(shampo_cap, mdev->caps.hca[MLX5_CAP_SHAMPO]->cur, cap) +#define MLX5_CAP_PSP(mdev, cap)\ + MLX5_GET(psp_cap, (mdev)->caps.hca[MLX5_CAP_PSP]->cur, cap) + enum { MLX5_CMD_STAT_OK = 0x0, MLX5_CMD_STAT_INT_ERR = 0x1, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 44d497272162..e9f14a0c7f4f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -314,6 +314,8 @@ enum { MLX5_CMD_OP_CREATE_UMEM = 0xa08, MLX5_CMD_OP_DESTROY_UMEM = 0xa0a, MLX5_CMD_OP_SYNC_STEERING = 0xb00, + MLX5_CMD_OP_PSP_GEN_SPI = 0xb10, + MLX5_CMD_OP_PSP_ROTATE_KEY = 0xb11, MLX5_CMD_OP_QUERY_VHCA_STATE = 0xb0d, MLX5_CMD_OP_MODIFY_VHCA_STATE = 0xb0e, MLX5_CMD_OP_SYNC_CRYPTO = 0xb12, @@ -489,12 +491,14 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 execute_aso[0x1]; u8 reserved_at_47[0x19]; - u8 reserved_at_60[0x2]; + u8 reformat_l2_to_l3_psp_tunnel[0x1]; + u8 reformat_l3_psp_tunnel_to_l2[0x1]; u8 reformat_insert[0x1]; u8 reformat_remove[0x1]; u8 macsec_encrypt[0x1]; u8 macsec_decrypt[0x1]; - u8 reserved_at_66[0x2]; + u8 psp_encrypt[0x1]; + u8 psp_decrypt[0x1]; u8 reformat_add_macsec[0x1]; u8 reformat_remove_macsec[0x1]; u8 reparse[0x1]; @@ -703,7 +707,7 @@ struct mlx5_ifc_fte_match_set_misc2_bits { u8 metadata_reg_a[0x20]; - u8 reserved_at_1a0[0x8]; + u8 psp_syndrome[0x8]; u8 macsec_syndrome[0x8]; u8 ipsec_syndrome[0x8]; u8 ipsec_next_header[0x8]; @@ -1511,6 +1515,21 @@ struct mlx5_ifc_macsec_cap_bits { u8 reserved_at_40[0x7c0]; }; +struct mlx5_ifc_psp_cap_bits { + u8 reserved_at_0[0x1]; + u8 psp_crypto_offload[0x1]; + u8 reserved_at_2[0x1]; + u8 psp_crypto_esp_aes_gcm_256_encrypt[0x1]; + u8 psp_crypto_esp_aes_gcm_128_encrypt[0x1]; + u8 psp_crypto_esp_aes_gcm_256_decrypt[0x1]; + u8 psp_crypto_esp_aes_gcm_128_decrypt[0x1]; + u8 reserved_at_7[0x4]; + u8 log_max_num_of_psp_spi[0x5]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x7e0]; +}; + enum { MLX5_WQ_TYPE_LINKED_LIST = 0x0, MLX5_WQ_TYPE_CYCLIC = 0x1, @@ -1876,7 +1895,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_2a0[0x7]; u8 mkey_pcie_tph[0x1]; - u8 reserved_at_2a8[0x3]; + u8 reserved_at_2a8[0x2]; + + u8 psp[0x1]; u8 shampo[0x1]; u8 reserved_at_2ac[0x4]; u8 max_wqe_sz_rq[0x10]; @@ -3803,6 +3824,7 @@ union mlx5_ifc_hca_cap_union_bits { struct mlx5_ifc_macsec_cap_bits macsec_cap; struct mlx5_ifc_crypto_cap_bits crypto_cap; struct mlx5_ifc_ipsec_cap_bits ipsec_cap; + struct mlx5_ifc_psp_cap_bits psp_cap; u8 reserved_at_0[0x8000]; }; @@ -3832,6 +3854,7 @@ enum { enum { MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_IPSEC = 0x0, MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_MACSEC = 0x1, + MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_PSP = 0x2, }; struct mlx5_ifc_vlan_bits { @@ -7159,6 +7182,8 @@ enum mlx5_reformat_ctx_type { MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT_OVER_UDP = 0xa, MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV6 = 0xb, MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_UDPV6 = 0xc, + MLX5_REFORMAT_TYPE_ADD_PSP_TUNNEL = 0xd, + MLX5_REFORMAT_TYPE_DEL_PSP_TUNNEL = 0xe, MLX5_REFORMAT_TYPE_INSERT_HDR = 0xf, MLX5_REFORMAT_TYPE_REMOVE_HDR = 0x10, MLX5_REFORMAT_TYPE_ADD_MACSEC = 0x11, @@ -7285,6 +7310,7 @@ enum { MLX5_ACTION_IN_FIELD_IPSEC_SYNDROME = 0x5D, MLX5_ACTION_IN_FIELD_OUT_EMD_47_32 = 0x6F, MLX5_ACTION_IN_FIELD_OUT_EMD_31_0 = 0x70, + MLX5_ACTION_IN_FIELD_PSP_SYNDROME = 0x71, }; struct mlx5_ifc_alloc_modify_header_context_out_bits { @@ -13079,6 +13105,7 @@ enum { MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_PURPOSE_TLS = 0x1, MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_PURPOSE_IPSEC = 0x2, MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_PURPOSE_MACSEC = 0x4, + MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_PURPOSE_PSP = 0x6, }; struct mlx5_ifc_tls_static_params_bits { @@ -13496,4 +13523,64 @@ enum mlx5e_pcie_cong_event_mod_field { MLX5_PCIE_CONG_EVENT_MOD_THRESH = BIT(2), }; +struct mlx5_ifc_psp_rotate_key_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_psp_rotate_key_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +enum mlx5_psp_gen_spi_in_key_size { + MLX5_PSP_GEN_SPI_IN_KEY_SIZE_128 = 0x0, + MLX5_PSP_GEN_SPI_IN_KEY_SIZE_256 = 0x1, +}; + +struct mlx5_ifc_key_spi_bits { + u8 spi[0x20]; + + u8 reserved_at_20[0x60]; + + u8 key[8][0x20]; +}; + +struct mlx5_ifc_psp_gen_spi_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x20]; + + u8 key_size[0x2]; + u8 reserved_at_62[0xe]; + u8 num_of_spi[0x10]; +}; + +struct mlx5_ifc_psp_gen_spi_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x10]; + u8 num_of_spi[0x10]; + + u8 reserved_at_60[0x20]; + + struct mlx5_ifc_key_spi_bits key_spi[]; +}; + #endif /* MLX5_IFC_H */ -- cgit v1.2.3 From ddeb66d2cb10f03a43d97a0ff2c3869d1951c87d Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 26 Aug 2025 11:54:36 +0200 Subject: gpio: nomadik: don't print out global GPIO numbers in debugfs callbacks In order to further limit the number of references to the GPIO base number stored in struct gpio_chip, replace the global GPIO numbers in the output of debugfs callbacks by hardware offsets. Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20250826-gpio-dbg-show-base-v1-2-7f27cd7f2256@linaro.org Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/gpio-nomadik.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/gpio/gpio-nomadik.h b/include/linux/gpio/gpio-nomadik.h index b5a84864650d..7ba53b499e16 100644 --- a/include/linux/gpio/gpio-nomadik.h +++ b/include/linux/gpio/gpio-nomadik.h @@ -261,8 +261,7 @@ struct platform_device; * true. */ void nmk_gpio_dbg_show_one(struct seq_file *s, struct pinctrl_dev *pctldev, - struct gpio_chip *chip, unsigned int offset, - unsigned int gpio); + struct gpio_chip *chip, unsigned int offset); #else -- cgit v1.2.3 From 661f951e371cc134ea31c84238dbdc9a898b8403 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 25 Aug 2025 12:02:44 +0000 Subject: sched/fair: Get rid of sched_domains_curr_level hack for tl->cpumask() Leon [1] and Vinicius [2] noted a topology_span_sane() warning during their testing starting from v6.16-rc1. Debug that followed pointed to the tl->mask() for the NODE domain being incorrectly resolved to that of the highest NUMA domain. tl->mask() for NODE is set to the sd_numa_mask() which depends on the global "sched_domains_curr_level" hack. "sched_domains_curr_level" is set to the "tl->numa_level" during tl traversal in build_sched_domains() calling sd_init() but was not reset before topology_span_sane(). Since "tl->numa_level" still reflected the old value from build_sched_domains(), topology_span_sane() for the NODE domain trips when the span of the last NUMA domain overlaps. Instead of replicating the "sched_domains_curr_level" hack, get rid of it entirely and instead, pass the entire "sched_domain_topology_level" object to tl->cpumask() function to prevent such mishap in the future. sd_numa_mask() now directly references "tl->numa_level" instead of relying on the global "sched_domains_curr_level" hack to index into sched_domains_numa_masks[]. The original warning was reproducible on the following NUMA topology reported by Leon: $ sudo numactl -H available: 5 nodes (0-4) node 0 cpus: 0 1 node 0 size: 2927 MB node 0 free: 1603 MB node 1 cpus: 2 3 node 1 size: 3023 MB node 1 free: 3008 MB node 2 cpus: 4 5 node 2 size: 3023 MB node 2 free: 3007 MB node 3 cpus: 6 7 node 3 size: 3023 MB node 3 free: 3002 MB node 4 cpus: 8 9 node 4 size: 3022 MB node 4 free: 2718 MB node distances: node 0 1 2 3 4 0: 10 39 38 37 36 1: 39 10 38 37 36 2: 38 38 10 37 36 3: 37 37 37 10 36 4: 36 36 36 36 10 The above topology can be mimicked using the following QEMU cmd that was used to reproduce the warning and test the fix: sudo qemu-system-x86_64 -enable-kvm -cpu host \ -m 20G -smp cpus=10,sockets=10 -machine q35 \ -object memory-backend-ram,size=4G,id=m0 \ -object memory-backend-ram,size=4G,id=m1 \ -object memory-backend-ram,size=4G,id=m2 \ -object memory-backend-ram,size=4G,id=m3 \ -object memory-backend-ram,size=4G,id=m4 \ -numa node,cpus=0-1,memdev=m0,nodeid=0 \ -numa node,cpus=2-3,memdev=m1,nodeid=1 \ -numa node,cpus=4-5,memdev=m2,nodeid=2 \ -numa node,cpus=6-7,memdev=m3,nodeid=3 \ -numa node,cpus=8-9,memdev=m4,nodeid=4 \ -numa dist,src=0,dst=1,val=39 \ -numa dist,src=0,dst=2,val=38 \ -numa dist,src=0,dst=3,val=37 \ -numa dist,src=0,dst=4,val=36 \ -numa dist,src=1,dst=0,val=39 \ -numa dist,src=1,dst=2,val=38 \ -numa dist,src=1,dst=3,val=37 \ -numa dist,src=1,dst=4,val=36 \ -numa dist,src=2,dst=0,val=38 \ -numa dist,src=2,dst=1,val=38 \ -numa dist,src=2,dst=3,val=37 \ -numa dist,src=2,dst=4,val=36 \ -numa dist,src=3,dst=0,val=37 \ -numa dist,src=3,dst=1,val=37 \ -numa dist,src=3,dst=2,val=37 \ -numa dist,src=3,dst=4,val=36 \ -numa dist,src=4,dst=0,val=36 \ -numa dist,src=4,dst=1,val=36 \ -numa dist,src=4,dst=2,val=36 \ -numa dist,src=4,dst=3,val=36 \ ... [ prateek: Moved common functions to include/linux/sched/topology.h, reuse the common bits for s390 and ppc, commit message ] Closes: https://lore.kernel.org/lkml/20250610110701.GA256154@unreal/ [1] Fixes: ccf74128d66c ("sched/topology: Assert non-NUMA topology masks don't (partially) overlap") # ce29a7da84cd, f55dac1dafb3 Signed-off-by: Peter Zijlstra (Intel) Reported-by: Leon Romanovsky Signed-off-by: K Prateek Nayak Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Reviewed-by: Shrikanth Hegde Tested-by: Valentin Schneider # x86 Tested-by: Shrikanth Hegde # powerpc Link: https://lore.kernel.org/lkml/a3de98387abad28592e6ab591f3ff6107fe01dc1.1755893468.git.tim.c.chen@linux.intel.com/ [2] --- include/linux/sched/topology.h | 28 +++++++++++++++++++++++++++- include/linux/topology.h | 2 +- 2 files changed, 28 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 5263746b63e8..a3a24e115d44 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -30,11 +30,19 @@ struct sd_flag_debug { }; extern const struct sd_flag_debug sd_flag_debug[]; +struct sched_domain_topology_level; + #ifdef CONFIG_SCHED_SMT static inline int cpu_smt_flags(void) { return SD_SHARE_CPUCAPACITY | SD_SHARE_LLC; } + +static inline const +struct cpumask *tl_smt_mask(struct sched_domain_topology_level *tl, int cpu) +{ + return cpu_smt_mask(cpu); +} #endif #ifdef CONFIG_SCHED_CLUSTER @@ -42,6 +50,12 @@ static inline int cpu_cluster_flags(void) { return SD_CLUSTER | SD_SHARE_LLC; } + +static inline const +struct cpumask *tl_cls_mask(struct sched_domain_topology_level *tl, int cpu) +{ + return cpu_clustergroup_mask(cpu); +} #endif #ifdef CONFIG_SCHED_MC @@ -49,8 +63,20 @@ static inline int cpu_core_flags(void) { return SD_SHARE_LLC; } + +static inline const +struct cpumask *tl_mc_mask(struct sched_domain_topology_level *tl, int cpu) +{ + return cpu_coregroup_mask(cpu); +} #endif +static inline const +struct cpumask *tl_pkg_mask(struct sched_domain_topology_level *tl, int cpu) +{ + return cpu_node_mask(cpu); +} + #ifdef CONFIG_NUMA static inline int cpu_numa_flags(void) { @@ -172,7 +198,7 @@ bool cpus_equal_capacity(int this_cpu, int that_cpu); bool cpus_share_cache(int this_cpu, int that_cpu); bool cpus_share_resources(int this_cpu, int that_cpu); -typedef const struct cpumask *(*sched_domain_mask_f)(int cpu); +typedef const struct cpumask *(*sched_domain_mask_f)(struct sched_domain_topology_level *tl, int cpu); typedef int (*sched_domain_flags_f)(void); struct sd_data { diff --git a/include/linux/topology.h b/include/linux/topology.h index 33b7fda97d39..6575af39fd10 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -260,7 +260,7 @@ static inline bool topology_is_primary_thread(unsigned int cpu) #endif -static inline const struct cpumask *cpu_cpu_mask(int cpu) +static inline const struct cpumask *cpu_node_mask(int cpu) { return cpumask_of_node(cpu_to_node(cpu)); } -- cgit v1.2.3 From 91c614f09abf1d45aac6b475d82a36c704b527ee Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 Aug 2025 10:55:55 +0200 Subject: sched: Move STDL_INIT() functions out-of-line Since all these functions are address-taken in SDTL_INIT() and called indirectly, it doesn't really make sense for them to be inline. Suggested-by: Christophe Leroy Signed-off-by: Peter Zijlstra (Intel) --- include/linux/sched/topology.h | 49 ++++++------------------------------------ 1 file changed, 7 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index a3a24e115d44..bbcfdf12aa6e 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -33,56 +33,21 @@ extern const struct sd_flag_debug sd_flag_debug[]; struct sched_domain_topology_level; #ifdef CONFIG_SCHED_SMT -static inline int cpu_smt_flags(void) -{ - return SD_SHARE_CPUCAPACITY | SD_SHARE_LLC; -} - -static inline const -struct cpumask *tl_smt_mask(struct sched_domain_topology_level *tl, int cpu) -{ - return cpu_smt_mask(cpu); -} +extern int cpu_smt_flags(void); +extern const struct cpumask *tl_smt_mask(struct sched_domain_topology_level *tl, int cpu); #endif #ifdef CONFIG_SCHED_CLUSTER -static inline int cpu_cluster_flags(void) -{ - return SD_CLUSTER | SD_SHARE_LLC; -} - -static inline const -struct cpumask *tl_cls_mask(struct sched_domain_topology_level *tl, int cpu) -{ - return cpu_clustergroup_mask(cpu); -} +extern int cpu_cluster_flags(void); +extern const struct cpumask *tl_cls_mask(struct sched_domain_topology_level *tl, int cpu); #endif #ifdef CONFIG_SCHED_MC -static inline int cpu_core_flags(void) -{ - return SD_SHARE_LLC; -} - -static inline const -struct cpumask *tl_mc_mask(struct sched_domain_topology_level *tl, int cpu) -{ - return cpu_coregroup_mask(cpu); -} +extern int cpu_core_flags(void); +extern const struct cpumask *tl_mc_mask(struct sched_domain_topology_level *tl, int cpu); #endif -static inline const -struct cpumask *tl_pkg_mask(struct sched_domain_topology_level *tl, int cpu) -{ - return cpu_node_mask(cpu); -} - -#ifdef CONFIG_NUMA -static inline int cpu_numa_flags(void) -{ - return SD_NUMA; -} -#endif +extern const struct cpumask *tl_pkg_mask(struct sched_domain_topology_level *tl, int cpu); extern int arch_asym_cpu_priority(int cpu); -- cgit v1.2.3 From 2cd571245b43492867bf1b4252485f3e6647b643 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Fri, 29 Aug 2025 16:11:16 +0800 Subject: sched/fair: Add related data structure for task based throttle Add related data structures for this new throttle functionality. Tesed-by: K Prateek Nayak Signed-off-by: Valentin Schneider Signed-off-by: Aaron Lu Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Chengming Zhou Tested-by: Valentin Schneider Tested-by: Matteo Martelli Link: https://lore.kernel.org/r/20250829081120.806-2-ziqianlu@bytedance.com --- include/linux/sched.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index f8188b833350..644a01bdae70 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -883,6 +883,11 @@ struct task_struct { #ifdef CONFIG_CGROUP_SCHED struct task_group *sched_task_group; +#ifdef CONFIG_CFS_BANDWIDTH + struct callback_head sched_throttle_work; + struct list_head throttle_node; + bool throttled; +#endif #endif -- cgit v1.2.3 From 74e2ef72bd4b25ce21c8f309d4f5b91b5df9ff5b Mon Sep 17 00:00:00 2001 From: Gokul Sivakumar Date: Thu, 24 Jul 2025 15:41:36 +0530 Subject: wifi: brcmfmac: fix 43752 SDIO FWVID incorrectly labelled as Cypress (CYW) Cypress(Infineon) is not the vendor for this 43752 SDIO WLAN chip, and so has not officially released any firmware binary for it. It is incorrect to maintain this WLAN chip with firmware vendor ID as "CYW". So relabel the chip's firmware Vendor ID as "WCC" as suggested by the maintainer. Fixes: d2587c57ffd8 ("brcmfmac: add 43752 SDIO ids and initialization") Fixes: f74f1ec22dc2 ("wifi: brcmfmac: add support for Cypress firmware api") Signed-off-by: Gokul Sivakumar Acked-by: Arend van Spriel Link: https://patch.msgid.link/20250724101136.6691-1-gokulkumar.sivakumar@infineon.com Signed-off-by: Johannes Berg --- include/linux/mmc/sdio_ids.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index fe3d6d98f8da..673cbdf43453 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -77,7 +77,7 @@ #define SDIO_DEVICE_ID_BROADCOM_43439 0xa9af #define SDIO_DEVICE_ID_BROADCOM_43455 0xa9bf #define SDIO_DEVICE_ID_BROADCOM_43751 0xaae7 -#define SDIO_DEVICE_ID_BROADCOM_CYPRESS_43752 0xaae8 +#define SDIO_DEVICE_ID_BROADCOM_43752 0xaae8 #define SDIO_VENDOR_ID_CYPRESS 0x04b4 #define SDIO_DEVICE_ID_BROADCOM_CYPRESS_43439 0xbd3d -- cgit v1.2.3 From 762af5a2aa0ad18da1316666dae30d369268d44c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 25 Aug 2025 15:26:35 +0200 Subject: vdso/vsyscall: Avoid slow division loop in auxiliary clock update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The call to __iter_div_u64_rem() in vdso_time_update_aux() is a wrapper around subtraction. It cannot be used to divide large numbers, as that introduces long, computationally expensive delays. A regular u64 division is also not possible in the timekeeper update path as it can be too slow. Instead of splitting the ktime_t offset into into second and subsecond components during the timekeeper update fast-path, do it together with the adjustment of tk->offs_aux in the slow-path. Equivalent to the handling of offs_boot and monotonic_to_boot. Reuse the storage of monotonic_to_boot for the new field, as it is not used by auxiliary timekeepers. Fixes: 380b84e168e5 ("vdso/vsyscall: Update auxiliary clock data in the datapage") Reported-by: Miroslav Lichvar Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250825-vdso-auxclock-division-v1-1-a1d32a16a313@linutronix.de Closes: https://lore.kernel.org/lkml/aKwsNNWsHJg8IKzj@localhost/ --- include/linux/timekeeper_internal.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index c27aac67cb3f..b8ae89ea28ab 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -76,6 +76,7 @@ struct tk_read_base { * @cs_was_changed_seq: The sequence number of clocksource change events * @clock_valid: Indicator for valid clock * @monotonic_to_boot: CLOCK_MONOTONIC to CLOCK_BOOTTIME offset + * @monotonic_to_aux: CLOCK_MONOTONIC to CLOCK_AUX offset * @cycle_interval: Number of clock cycles in one NTP interval * @xtime_interval: Number of clock shifted nano seconds in one NTP * interval. @@ -117,6 +118,9 @@ struct tk_read_base { * @offs_aux is used by the auxiliary timekeepers which do not utilize any * of the regular timekeeper offset fields. * + * @monotonic_to_aux is a timespec64 representation of @offs_aux to + * accelerate the VDSO update for CLOCK_AUX. + * * The cacheline ordering of the structure is optimized for in kernel usage of * the ktime_get() and ktime_get_ts64() family of time accessors. Struct * timekeeper is prepended in the core timekeeping code with a sequence count, @@ -159,7 +163,10 @@ struct timekeeper { u8 cs_was_changed_seq; u8 clock_valid; - struct timespec64 monotonic_to_boot; + union { + struct timespec64 monotonic_to_boot; + struct timespec64 monotonic_to_aux; + }; u64 cycle_interval; u64 xtime_interval; -- cgit v1.2.3 From 0a26e5eb78fb1627beb8e3eb172737f8492d2799 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 25 Aug 2025 15:34:23 -0500 Subject: jiffies: Remove obsolete SHIFTED_HZ comment b3c869d35b9b ("jiffies: Remove compile time assumptions about CLOCK_TICK_RATE") removed the last definition of SHIFTED_HZ but left behind comments about it. Remove the comments as well. Signed-off-by: Bjorn Helgaas Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250825203425.796034-1-helgaas@kernel.org --- include/linux/jiffies.h | 2 +- include/vdso/jiffies.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 91b20788273d..0d1927da8055 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -61,7 +61,7 @@ extern void register_refined_jiffies(long clock_tick_rate); -/* TICK_USEC is the time between ticks in usec assuming SHIFTED_HZ */ +/* TICK_USEC is the time between ticks in usec */ #define TICK_USEC ((USEC_PER_SEC + HZ/2) / HZ) /* USER_TICK_USEC is the time between ticks in usec assuming fake USER_HZ */ diff --git a/include/vdso/jiffies.h b/include/vdso/jiffies.h index 2f9d596c8b29..8ca04a141412 100644 --- a/include/vdso/jiffies.h +++ b/include/vdso/jiffies.h @@ -5,7 +5,7 @@ #include /* for HZ */ #include -/* TICK_NSEC is the time between ticks in nsec assuming SHIFTED_HZ */ +/* TICK_NSEC is the time between ticks in nsec */ #define TICK_NSEC ((NSEC_PER_SEC+HZ/2)/HZ) #endif /* __VDSO_JIFFIES_H */ -- cgit v1.2.3 From 4a76a0a889cef284327f265f97edc4ff2f3e11cc Mon Sep 17 00:00:00 2001 From: WeiHao Li Date: Sun, 31 Aug 2025 18:48:51 +0800 Subject: dt-bindings: clock: rk3368: Add SCLK_MIPIDSI_24M Add a clock id for mipi dsi reference clock, mipi dsi node used it. Signed-off-by: WeiHao Li Acked-by: "Rob Herring (Arm)" Link: https://lore.kernel.org/r/20250831104855.45883-4-cn.liweihao@gmail.com Signed-off-by: Heiko Stuebner --- include/dt-bindings/clock/rk3368-cru.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/clock/rk3368-cru.h b/include/dt-bindings/clock/rk3368-cru.h index ebae3cbf8192..b951e2906948 100644 --- a/include/dt-bindings/clock/rk3368-cru.h +++ b/include/dt-bindings/clock/rk3368-cru.h @@ -72,6 +72,7 @@ #define SCLK_SFC 126 #define SCLK_MAC 127 #define SCLK_MACREF_OUT 128 +#define SCLK_MIPIDSI_24M 129 #define SCLK_TIMER10 133 #define SCLK_TIMER11 134 #define SCLK_TIMER12 135 -- cgit v1.2.3 From a576a849d5f33356e0d8fd3eae4fbaf8869417e5 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Tue, 5 Aug 2025 15:34:43 +0200 Subject: of/irq: Convert of_msi_map_id() callers to of_msi_xlate() With the introduction of the of_msi_xlate() function, the OF layer provides an API to map a device ID and retrieve the MSI controller node the ID is mapped to with a single call. of_msi_map_id() is currently used to map a deviceID to a specific MSI controller node; of_msi_xlate() can be used for that purpose too, there is no need to keep the two functions. Convert of_msi_map_id() to of_msi_xlate() calls and update the of_msi_xlate() documentation to describe how the struct device_node pointer passed in should be set-up to either provide the MSI controller node target or receive its pointer upon mapping completion. Signed-off-by: Lorenzo Pieralisi Cc: Thomas Gleixner Cc: Rob Herring Cc: Marc Zyngier Acked-by: Thomas Gleixner Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250805133443.936955-1-lpieralisi@kernel.org Signed-off-by: Rob Herring (Arm) --- include/linux/of_irq.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h index a480063c9cb1..1db8543dfc8a 100644 --- a/include/linux/of_irq.h +++ b/include/linux/of_irq.h @@ -55,7 +55,6 @@ extern struct irq_domain *of_msi_map_get_device_domain(struct device *dev, u32 bus_token); extern void of_msi_configure(struct device *dev, const struct device_node *np); extern u32 of_msi_xlate(struct device *dev, struct device_node **msi_np, u32 id_in); -u32 of_msi_map_id(struct device *dev, struct device_node *msi_np, u32 id_in); #else static inline void of_irq_init(const struct of_device_id *matches) { @@ -105,11 +104,6 @@ static inline u32 of_msi_xlate(struct device *dev, struct device_node **msi_np, { return id_in; } -static inline u32 of_msi_map_id(struct device *dev, - struct device_node *msi_np, u32 id_in) -{ - return id_in; -} #endif #if defined(CONFIG_OF_IRQ) || defined(CONFIG_SPARC) -- cgit v1.2.3 From 6eee1ef9e59853a49e926d116a004c53a9819dfd Mon Sep 17 00:00:00 2001 From: Antheas Kapenekakis Date: Fri, 29 Aug 2025 16:55:37 +0200 Subject: drm: panel-backlight-quirks: Convert brightness quirk to generic structure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the brightness quirk is limited to minimum brightness only. Refactor it to a structure, so that more quirks can be added in the future. Reserve 0 value for "no quirk", and use u16 to allow minimum brightness up to 255. Tested-by: Philip Müller Reviewed-by: Mario Limonciello Signed-off-by: Antheas Kapenekakis Link: https://lore.kernel.org/r/20250829145541.512671-3-lkml@antheas.dev Acked-by: Alex Deucher Signed-off-by: Mario Limonciello (AMD) --- include/drm/drm_utils.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_utils.h b/include/drm/drm_utils.h index 15fa9b6865f4..82eeee4a58ab 100644 --- a/include/drm/drm_utils.h +++ b/include/drm/drm_utils.h @@ -16,7 +16,12 @@ struct drm_edid; int drm_get_panel_orientation_quirk(int width, int height); -int drm_get_panel_min_brightness_quirk(const struct drm_edid *edid); +struct drm_panel_backlight_quirk { + u16 min_brightness; +}; + +const struct drm_panel_backlight_quirk * +drm_get_panel_backlight_quirk(const struct drm_edid *edid); signed long drm_timeout_abs_to_jiffies(int64_t timeout_nsec); -- cgit v1.2.3 From aef10b1138e995ba9aa4357ed78cd05686cabbe1 Mon Sep 17 00:00:00 2001 From: Antheas Kapenekakis Date: Fri, 29 Aug 2025 16:55:39 +0200 Subject: drm: panel-backlight-quirks: Add brightness mask quirk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Certain OLED devices malfunction on specific brightness levels. Specifically, when DP_SOURCE_BACKLIGHT_LEVEL is written to with the first byte being 0x00 and sometimes 0x01, the panel forcibly turns off until the device sleeps again. Below are some examples. This was found by iterating over brighness ranges while printing DP_SOURCE_BACKLIGHT_LEVEL. It was found that the screen would malfunction on specific values, and some of them were collected. Therefore, introduce a quirk where the minor byte of brightness is OR'd with 0x03 to avoid the range of invalid values. This quirk was tested by removing the workarounds and iterating from 0 to 50_000 value ranges with a cadence of 0.2s/it. The range of the panel is 1000...400_000, so the values were slightly interpolated during testing. The custom brightness curve added on 6.15 was disabled. 86016: 10101000000000000 86272: 10101000100000000 87808: 10101011100000000 251648: 111101011100000000 251649: 111101011100000001 86144: 10101000010000000 87809: 10101011100000001 251650: 111101011100000010 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3803 Tested-by: Philip Müller Reviewed-by: Mario Limonciello Signed-off-by: Antheas Kapenekakis Link: https://lore.kernel.org/r/20250829145541.512671-5-lkml@antheas.dev Acked-by: Alex Deucher Signed-off-by: Mario Limonciello (AMD) --- include/drm/drm_utils.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/drm_utils.h b/include/drm/drm_utils.h index 82eeee4a58ab..6a46f755daba 100644 --- a/include/drm/drm_utils.h +++ b/include/drm/drm_utils.h @@ -18,6 +18,7 @@ int drm_get_panel_orientation_quirk(int width, int height); struct drm_panel_backlight_quirk { u16 min_brightness; + u32 brightness_mask; }; const struct drm_panel_backlight_quirk * -- cgit v1.2.3 From 07bab7b81d8a4de604ae4175978adf37137c35d6 Mon Sep 17 00:00:00 2001 From: Andrea della Porta Date: Mon, 23 Jun 2025 23:46:27 +0200 Subject: dt-bindings: clock: rp1: Add missing MIPI DSI defines Declare the positional index for the RP1 MIPI clocks. Signed-off-by: Andrea della Porta Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/c20066500908db854aa4816b40e956296bab526a.1750714412.git.andrea.porta@suse.com Signed-off-by: Florian Fainelli --- include/dt-bindings/clock/raspberrypi,rp1-clocks.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/raspberrypi,rp1-clocks.h b/include/dt-bindings/clock/raspberrypi,rp1-clocks.h index 248efb895f35..7915fb8197bf 100644 --- a/include/dt-bindings/clock/raspberrypi,rp1-clocks.h +++ b/include/dt-bindings/clock/raspberrypi,rp1-clocks.h @@ -58,4 +58,8 @@ #define RP1_PLL_VIDEO_PRI_PH 43 #define RP1_PLL_AUDIO_TERN 44 +/* MIPI clocks managed by the DSI driver */ +#define RP1_CLK_MIPI0_DSI_BYTECLOCK 45 +#define RP1_CLK_MIPI1_DSI_BYTECLOCK 46 + #endif -- cgit v1.2.3 From 54dbd2a8e974b900b18639e75f62702a4334ddc0 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 3 Sep 2025 14:52:56 +0300 Subject: PCI/P2PDMA: Reduce scope of pci_has_p2pmem() pci_has_p2pmem() is not used outside of p2pdma.c, and there is no need to export it for use by modules. Signed-off-by: Leon Romanovsky Signed-off-by: Bjorn Helgaas Reviewed-by: Logan Gunthorpe Link: https://patch.msgid.link/d40f3f1decf54c9236bc38b48a6aae612a5c182f.1756900291.git.leon@kernel.org --- include/linux/pci-p2pdma.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/pci-p2pdma.h b/include/linux/pci-p2pdma.h index 075c20b161d9..951f81a38f3a 100644 --- a/include/linux/pci-p2pdma.h +++ b/include/linux/pci-p2pdma.h @@ -21,7 +21,6 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, u64 offset); int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients, int num_clients, bool verbose); -bool pci_has_p2pmem(struct pci_dev *pdev); struct pci_dev *pci_p2pmem_find_many(struct device **clients, int num_clients); void *pci_alloc_p2pmem(struct pci_dev *pdev, size_t size); void pci_free_p2pmem(struct pci_dev *pdev, void *addr, size_t size); @@ -45,10 +44,6 @@ static inline int pci_p2pdma_distance_many(struct pci_dev *provider, { return -1; } -static inline bool pci_has_p2pmem(struct pci_dev *pdev) -{ - return false; -} static inline struct pci_dev *pci_p2pmem_find_many(struct device **clients, int num_clients) { -- cgit v1.2.3 From 929324913e0caabea91b50fa71e41d70b766f7dc Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Tue, 2 Sep 2025 23:11:35 +0200 Subject: net: Add rfs_needed() helper Add a helper to check if RFS is needed or not. Allows to make the code a bit cleaner and the next patch to have MPTCP use this helper to decide whether or not to iterate over the subflows. tun_flow_update() was calling sock_rps_record_flow_hash() regardless of the state of rfs_needed. This was not really a bug as sock_flow_table simply ends up being NULL and thus everything will be fine. This commit here thus also implicitly makes tun_flow_update() respect the state of rfs_needed. Suggested-by: Matthieu Baerts Signed-off-by: Christoph Paasch Acked-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250902-net-next-mptcp-misc-feat-6-18-v2-3-fa02bb3188b1@kernel.org Signed-off-by: Jakub Kicinski --- include/net/rps.h | 85 ++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 56 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/net/rps.h b/include/net/rps.h index 9917dce42ca4..f1794cd2e7fb 100644 --- a/include/net/rps.h +++ b/include/net/rps.h @@ -85,11 +85,8 @@ static inline void rps_record_sock_flow(struct rps_sock_flow_table *table, WRITE_ONCE(table->ents[index], val); } -#endif /* CONFIG_RPS */ - -static inline void sock_rps_record_flow_hash(__u32 hash) +static inline void _sock_rps_record_flow_hash(__u32 hash) { -#ifdef CONFIG_RPS struct rps_sock_flow_table *sock_flow_table; if (!hash) @@ -99,42 +96,33 @@ static inline void sock_rps_record_flow_hash(__u32 hash) if (sock_flow_table) rps_record_sock_flow(sock_flow_table, hash); rcu_read_unlock(); -#endif } -static inline void sock_rps_record_flow(const struct sock *sk) +static inline void _sock_rps_record_flow(const struct sock *sk) { -#ifdef CONFIG_RPS - if (static_branch_unlikely(&rfs_needed)) { - /* Reading sk->sk_rxhash might incur an expensive cache line - * miss. - * - * TCP_ESTABLISHED does cover almost all states where RFS - * might be useful, and is cheaper [1] than testing : - * IPv4: inet_sk(sk)->inet_daddr - * IPv6: ipv6_addr_any(&sk->sk_v6_daddr) - * OR an additional socket flag - * [1] : sk_state and sk_prot are in the same cache line. + /* Reading sk->sk_rxhash might incur an expensive cache line + * miss. + * + * TCP_ESTABLISHED does cover almost all states where RFS + * might be useful, and is cheaper [1] than testing : + * IPv4: inet_sk(sk)->inet_daddr + * IPv6: ipv6_addr_any(&sk->sk_v6_daddr) + * OR an additional socket flag + * [1] : sk_state and sk_prot are in the same cache line. + */ + if (sk->sk_state == TCP_ESTABLISHED) { + /* This READ_ONCE() is paired with the WRITE_ONCE() + * from sock_rps_save_rxhash() and sock_rps_reset_rxhash(). */ - if (sk->sk_state == TCP_ESTABLISHED) { - /* This READ_ONCE() is paired with the WRITE_ONCE() - * from sock_rps_save_rxhash() and sock_rps_reset_rxhash(). - */ - sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash)); - } + _sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash)); } -#endif } -static inline void sock_rps_delete_flow(const struct sock *sk) +static inline void _sock_rps_delete_flow(const struct sock *sk) { -#ifdef CONFIG_RPS struct rps_sock_flow_table *table; u32 hash, index; - if (!static_branch_unlikely(&rfs_needed)) - return; - hash = READ_ONCE(sk->sk_rxhash); if (!hash) return; @@ -147,6 +135,45 @@ static inline void sock_rps_delete_flow(const struct sock *sk) WRITE_ONCE(table->ents[index], RPS_NO_CPU); } rcu_read_unlock(); +} +#endif /* CONFIG_RPS */ + +static inline bool rfs_is_needed(void) +{ +#ifdef CONFIG_RPS + return static_branch_unlikely(&rfs_needed); +#else + return false; +#endif +} + +static inline void sock_rps_record_flow_hash(__u32 hash) +{ +#ifdef CONFIG_RPS + if (!rfs_is_needed()) + return; + + _sock_rps_record_flow_hash(hash); +#endif +} + +static inline void sock_rps_record_flow(const struct sock *sk) +{ +#ifdef CONFIG_RPS + if (!rfs_is_needed()) + return; + + _sock_rps_record_flow(sk); +#endif +} + +static inline void sock_rps_delete_flow(const struct sock *sk) +{ +#ifdef CONFIG_RPS + if (!rfs_is_needed()) + return; + + _sock_rps_delete_flow(sk); #endif } -- cgit v1.2.3 From 781c118c3ece2b546c6c41f617cccb36cb9534f1 Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Fri, 29 Aug 2025 13:28:03 +0530 Subject: dt-bindings: clock: Add DISPCC and reset controller for GLYMUR SoC Add the device tree bindings for the display clock controller which are required on Qualcomm Glymur SoC. Signed-off-by: Taniya Das Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250829-glymur-disp-clock-controllers-v1-1-0ce6fabd837c@oss.qualcomm.com [bjorn: Dropped unnecessary include in DT example] Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,glymur-dispcc.h | 114 +++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,glymur-dispcc.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,glymur-dispcc.h b/include/dt-bindings/clock/qcom,glymur-dispcc.h new file mode 100644 index 000000000000..a845d76defe2 --- /dev/null +++ b/include/dt-bindings/clock/qcom,glymur-dispcc.h @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2025, Qualcomm Technologies, Inc. and/or its subsidiaries. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_DISP_CC_GLYMUR_H +#define _DT_BINDINGS_CLK_QCOM_DISP_CC_GLYMUR_H + +/* DISP_CC clocks */ +#define DISP_CC_ESYNC0_CLK 0 +#define DISP_CC_ESYNC0_CLK_SRC 1 +#define DISP_CC_ESYNC1_CLK 2 +#define DISP_CC_ESYNC1_CLK_SRC 3 +#define DISP_CC_MDSS_ACCU_SHIFT_CLK 4 +#define DISP_CC_MDSS_AHB1_CLK 5 +#define DISP_CC_MDSS_AHB_CLK 6 +#define DISP_CC_MDSS_AHB_CLK_SRC 7 +#define DISP_CC_MDSS_BYTE0_CLK 8 +#define DISP_CC_MDSS_BYTE0_CLK_SRC 9 +#define DISP_CC_MDSS_BYTE0_DIV_CLK_SRC 10 +#define DISP_CC_MDSS_BYTE0_INTF_CLK 11 +#define DISP_CC_MDSS_BYTE1_CLK 12 +#define DISP_CC_MDSS_BYTE1_CLK_SRC 13 +#define DISP_CC_MDSS_BYTE1_DIV_CLK_SRC 14 +#define DISP_CC_MDSS_BYTE1_INTF_CLK 15 +#define DISP_CC_MDSS_DPTX0_AUX_CLK 16 +#define DISP_CC_MDSS_DPTX0_AUX_CLK_SRC 17 +#define DISP_CC_MDSS_DPTX0_LINK_CLK 18 +#define DISP_CC_MDSS_DPTX0_LINK_CLK_SRC 19 +#define DISP_CC_MDSS_DPTX0_LINK_DIV_CLK_SRC 20 +#define DISP_CC_MDSS_DPTX0_LINK_DPIN_CLK 21 +#define DISP_CC_MDSS_DPTX0_LINK_DPIN_DIV_CLK_SRC 22 +#define DISP_CC_MDSS_DPTX0_LINK_INTF_CLK 23 +#define DISP_CC_MDSS_DPTX0_PIXEL0_CLK 24 +#define DISP_CC_MDSS_DPTX0_PIXEL0_CLK_SRC 25 +#define DISP_CC_MDSS_DPTX0_PIXEL1_CLK 26 +#define DISP_CC_MDSS_DPTX0_PIXEL1_CLK_SRC 27 +#define DISP_CC_MDSS_DPTX0_USB_ROUTER_LINK_INTF_CLK 28 +#define DISP_CC_MDSS_DPTX1_AUX_CLK 29 +#define DISP_CC_MDSS_DPTX1_AUX_CLK_SRC 30 +#define DISP_CC_MDSS_DPTX1_LINK_CLK 31 +#define DISP_CC_MDSS_DPTX1_LINK_CLK_SRC 32 +#define DISP_CC_MDSS_DPTX1_LINK_DIV_CLK_SRC 33 +#define DISP_CC_MDSS_DPTX1_LINK_DPIN_CLK 34 +#define DISP_CC_MDSS_DPTX1_LINK_DPIN_DIV_CLK_SRC 35 +#define DISP_CC_MDSS_DPTX1_LINK_INTF_CLK 36 +#define DISP_CC_MDSS_DPTX1_PIXEL0_CLK 37 +#define DISP_CC_MDSS_DPTX1_PIXEL0_CLK_SRC 38 +#define DISP_CC_MDSS_DPTX1_PIXEL1_CLK 39 +#define DISP_CC_MDSS_DPTX1_PIXEL1_CLK_SRC 40 +#define DISP_CC_MDSS_DPTX1_USB_ROUTER_LINK_INTF_CLK 41 +#define DISP_CC_MDSS_DPTX2_AUX_CLK 42 +#define DISP_CC_MDSS_DPTX2_AUX_CLK_SRC 43 +#define DISP_CC_MDSS_DPTX2_LINK_CLK 44 +#define DISP_CC_MDSS_DPTX2_LINK_CLK_SRC 45 +#define DISP_CC_MDSS_DPTX2_LINK_DIV_CLK_SRC 46 +#define DISP_CC_MDSS_DPTX2_LINK_DPIN_CLK 47 +#define DISP_CC_MDSS_DPTX2_LINK_DPIN_DIV_CLK_SRC 48 +#define DISP_CC_MDSS_DPTX2_LINK_INTF_CLK 49 +#define DISP_CC_MDSS_DPTX2_PIXEL0_CLK 50 +#define DISP_CC_MDSS_DPTX2_PIXEL0_CLK_SRC 51 +#define DISP_CC_MDSS_DPTX2_PIXEL1_CLK 52 +#define DISP_CC_MDSS_DPTX2_PIXEL1_CLK_SRC 53 +#define DISP_CC_MDSS_DPTX2_USB_ROUTER_LINK_INTF_CLK 54 +#define DISP_CC_MDSS_DPTX3_AUX_CLK 55 +#define DISP_CC_MDSS_DPTX3_AUX_CLK_SRC 56 +#define DISP_CC_MDSS_DPTX3_LINK_CLK 57 +#define DISP_CC_MDSS_DPTX3_LINK_CLK_SRC 58 +#define DISP_CC_MDSS_DPTX3_LINK_DIV_CLK_SRC 59 +#define DISP_CC_MDSS_DPTX3_LINK_DPIN_CLK 60 +#define DISP_CC_MDSS_DPTX3_LINK_DPIN_DIV_CLK_SRC 61 +#define DISP_CC_MDSS_DPTX3_LINK_INTF_CLK 62 +#define DISP_CC_MDSS_DPTX3_PIXEL0_CLK 63 +#define DISP_CC_MDSS_DPTX3_PIXEL0_CLK_SRC 64 +#define DISP_CC_MDSS_ESC0_CLK 65 +#define DISP_CC_MDSS_ESC0_CLK_SRC 66 +#define DISP_CC_MDSS_ESC1_CLK 67 +#define DISP_CC_MDSS_ESC1_CLK_SRC 68 +#define DISP_CC_MDSS_MDP1_CLK 69 +#define DISP_CC_MDSS_MDP_CLK 70 +#define DISP_CC_MDSS_MDP_CLK_SRC 71 +#define DISP_CC_MDSS_MDP_LUT1_CLK 72 +#define DISP_CC_MDSS_MDP_LUT_CLK 73 +#define DISP_CC_MDSS_NON_GDSC_AHB_CLK 74 +#define DISP_CC_MDSS_PCLK0_CLK 75 +#define DISP_CC_MDSS_PCLK0_CLK_SRC 76 +#define DISP_CC_MDSS_PCLK1_CLK 77 +#define DISP_CC_MDSS_PCLK1_CLK_SRC 78 +#define DISP_CC_MDSS_PCLK2_CLK 79 +#define DISP_CC_MDSS_PCLK2_CLK_SRC 80 +#define DISP_CC_MDSS_RSCC_AHB_CLK 81 +#define DISP_CC_MDSS_RSCC_VSYNC_CLK 82 +#define DISP_CC_MDSS_VSYNC1_CLK 83 +#define DISP_CC_MDSS_VSYNC_CLK 84 +#define DISP_CC_MDSS_VSYNC_CLK_SRC 85 +#define DISP_CC_OSC_CLK 86 +#define DISP_CC_OSC_CLK_SRC 87 +#define DISP_CC_PLL0 88 +#define DISP_CC_PLL1 89 +#define DISP_CC_SLEEP_CLK 90 +#define DISP_CC_SLEEP_CLK_SRC 91 +#define DISP_CC_XO_CLK 92 +#define DISP_CC_XO_CLK_SRC 93 + +/* DISP_CC power domains */ +#define DISP_CC_MDSS_CORE_GDSC 0 +#define DISP_CC_MDSS_CORE_INT2_GDSC 1 + +/* DISP_CC resets */ +#define DISP_CC_MDSS_CORE_BCR 0 +#define DISP_CC_MDSS_CORE_INT2_BCR 1 +#define DISP_CC_MDSS_RSCC_BCR 2 + +#endif -- cgit v1.2.3 From 017bda80fd0ddd24ea8cf932c3bd970491e0abc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Asbj=C3=B8rn=20Sloth=20T=C3=B8nnesen?= Date: Tue, 2 Sep 2025 15:46:37 +0000 Subject: genetlink: fix typo in comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In this context "not that ..." should properly be "note that ...". Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Donald Hunter Link: https://patch.msgid.link/20250902154640.759815-4-ast@fiberby.net Signed-off-by: Jakub Kicinski --- include/net/genetlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/genetlink.h b/include/net/genetlink.h index a03d56765832..7b84f2cef8b1 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -62,7 +62,7 @@ struct genl_info; * @small_ops: the small-struct operations supported by this family * @n_small_ops: number of small-struct operations supported by this family * @split_ops: the split do/dump form of operation definition - * @n_split_ops: number of entries in @split_ops, not that with split do/dump + * @n_split_ops: number of entries in @split_ops, note that with split do/dump * ops the number of entries is not the same as number of commands * @sock_priv_size: the size of per-socket private memory * @sock_priv_init: the per-socket private memory initializer -- cgit v1.2.3 From 5d6b58c932ec451a5c41482790eb5b1ecf165a94 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 2 Sep 2025 18:36:03 +0000 Subject: net: lockless sock_i_ino() Followup of commit c51da3f7a161 ("net: remove sock_i_uid()") A recent syzbot report was the trigger for this change. Over the years, we had many problems caused by the read_lock[_bh](&sk->sk_callback_lock) in sock_i_uid(). We could fix smc_diag_dump_proto() or make a more radical move: Instead of waiting for new syzbot reports, cache the socket inode number in sk->sk_ino, so that we no longer need to acquire sk->sk_callback_lock in sock_i_ino(). This makes socket dumps faster (one less cache line miss, and two atomic ops avoided). Prior art: commit 25a9c8a4431c ("netlink: Add __sock_i_ino() for __netlink_diag_dump().") commit 4f9bf2a2f5aa ("tcp: Don't acquire inet_listen_hashbucket::lock with disabled BH.") commit efc3dbc37412 ("rds: Make rds_sock_lock BH rather than IRQ safe.") Fixes: d2d6422f8bd1 ("x86: Allow to enable PREEMPT_RT.") Reported-by: syzbot+50603c05bbdf4dfdaffa@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/68b73804.050a0220.3db4df.01d8.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Reviewed-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20250902183603.740428-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index c8a4b283df6f..fb13322a11fc 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -285,6 +285,7 @@ struct sk_filter; * @sk_ack_backlog: current listen backlog * @sk_max_ack_backlog: listen backlog set in listen() * @sk_uid: user id of owner + * @sk_ino: inode number (zero if orphaned) * @sk_prefer_busy_poll: prefer busypolling over softirq processing * @sk_busy_poll_budget: napi processing budget when busypolling * @sk_priority: %SO_PRIORITY setting @@ -518,6 +519,7 @@ struct sock { u32 sk_ack_backlog; u32 sk_max_ack_backlog; kuid_t sk_uid; + unsigned long sk_ino; spinlock_t sk_peer_lock; int sk_bind_phc; struct pid *sk_peer_pid; @@ -2056,6 +2058,10 @@ static inline int sk_rx_queue_get(const struct sock *sk) static inline void sk_set_socket(struct sock *sk, struct socket *sock) { sk->sk_socket = sock; + if (sock) { + WRITE_ONCE(sk->sk_uid, SOCK_INODE(sock)->i_uid); + WRITE_ONCE(sk->sk_ino, SOCK_INODE(sock)->i_ino); + } } static inline wait_queue_head_t *sk_sleep(struct sock *sk) @@ -2077,6 +2083,7 @@ static inline void sock_orphan(struct sock *sk) sk_set_socket(sk, NULL); sk->sk_wq = NULL; /* Note: sk_uid is unchanged. */ + WRITE_ONCE(sk->sk_ino, 0); write_unlock_bh(&sk->sk_callback_lock); } @@ -2087,20 +2094,22 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) rcu_assign_pointer(sk->sk_wq, &parent->wq); parent->sk = sk; sk_set_socket(sk, parent); - WRITE_ONCE(sk->sk_uid, SOCK_INODE(parent)->i_uid); security_sock_graft(sk, parent); write_unlock_bh(&sk->sk_callback_lock); } +static inline unsigned long sock_i_ino(const struct sock *sk) +{ + /* Paired with WRITE_ONCE() in sock_graft() and sock_orphan() */ + return READ_ONCE(sk->sk_ino); +} + static inline kuid_t sk_uid(const struct sock *sk) { /* Paired with WRITE_ONCE() in sockfs_setattr() */ return READ_ONCE(sk->sk_uid); } -unsigned long __sock_i_ino(struct sock *sk); -unsigned long sock_i_ino(struct sock *sk); - static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk) { return sk ? sk_uid(sk) : make_kuid(net->user_ns, 0); -- cgit v1.2.3 From ae5b84788e5a7876a67f64761b7265529cb5a39a Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Mon, 25 Aug 2025 23:49:09 +0530 Subject: dt-bindings: clock: qcom: Document the Glymur SoC TCSR Clock Controller The Glymur SoC TCSR block provides CLKREF clocks for EDP, PCIe and USB. Add this to the TCSR clock controller binding together with identifiers for the clocks. Reviewed-by: Krzysztof Kozlowski Reviewed-by: Bjorn Andersson Signed-off-by: Taniya Das Link: https://lore.kernel.org/r/20250825-glymur-clock-controller-v5-v5-2-01b8c8681bcd@oss.qualcomm.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,glymur-tcsr.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,glymur-tcsr.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,glymur-tcsr.h b/include/dt-bindings/clock/qcom,glymur-tcsr.h new file mode 100644 index 000000000000..72614226b113 --- /dev/null +++ b/include/dt-bindings/clock/qcom,glymur-tcsr.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_TCSR_CC_GLYMUR_H +#define _DT_BINDINGS_CLK_QCOM_TCSR_CC_GLYMUR_H + +/* TCSR_CC clocks */ +#define TCSR_EDP_CLKREF_EN 0 +#define TCSR_PCIE_1_CLKREF_EN 1 +#define TCSR_PCIE_2_CLKREF_EN 2 +#define TCSR_PCIE_3_CLKREF_EN 3 +#define TCSR_PCIE_4_CLKREF_EN 4 +#define TCSR_USB2_1_CLKREF_EN 5 +#define TCSR_USB2_2_CLKREF_EN 6 +#define TCSR_USB2_3_CLKREF_EN 7 +#define TCSR_USB2_4_CLKREF_EN 8 +#define TCSR_USB3_0_CLKREF_EN 9 +#define TCSR_USB3_1_CLKREF_EN 10 +#define TCSR_USB4_1_CLKREF_EN 11 +#define TCSR_USB4_2_CLKREF_EN 12 + +#endif -- cgit v1.2.3 From ee2d967030fee156ceb2de80ef63ddeb80d60779 Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Mon, 25 Aug 2025 23:49:13 +0530 Subject: dt-bindings: clock: qcom: document the Glymur Global Clock Controller Add device tree bindings for global clock controller on Glymur SoC. Signed-off-by: Taniya Das Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250825-glymur-clock-controller-v5-v5-6-01b8c8681bcd@oss.qualcomm.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,glymur-gcc.h | 578 ++++++++++++++++++++++++++++ 1 file changed, 578 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,glymur-gcc.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,glymur-gcc.h b/include/dt-bindings/clock/qcom,glymur-gcc.h new file mode 100644 index 000000000000..10c12b8c51c3 --- /dev/null +++ b/include/dt-bindings/clock/qcom,glymur-gcc.h @@ -0,0 +1,578 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_GCC_GLYMUR_H +#define _DT_BINDINGS_CLK_QCOM_GCC_GLYMUR_H + +/* GCC clocks */ +#define GCC_GPLL0 0 +#define GCC_GPLL0_OUT_EVEN 1 +#define GCC_GPLL1 2 +#define GCC_GPLL14 3 +#define GCC_GPLL14_OUT_EVEN 4 +#define GCC_GPLL4 5 +#define GCC_GPLL5 6 +#define GCC_GPLL7 7 +#define GCC_GPLL8 8 +#define GCC_GPLL9 9 +#define GCC_AGGRE_NOC_PCIE_3A_WEST_SF_AXI_CLK 10 +#define GCC_AGGRE_NOC_PCIE_3B_WEST_SF_AXI_CLK 11 +#define GCC_AGGRE_NOC_PCIE_4_WEST_SF_AXI_CLK 12 +#define GCC_AGGRE_NOC_PCIE_5_EAST_SF_AXI_CLK 13 +#define GCC_AGGRE_NOC_PCIE_6_WEST_SF_AXI_CLK 14 +#define GCC_AGGRE_UFS_PHY_AXI_CLK 15 +#define GCC_AGGRE_UFS_PHY_AXI_HW_CTL_CLK 16 +#define GCC_AGGRE_USB2_PRIM_AXI_CLK 17 +#define GCC_AGGRE_USB3_MP_AXI_CLK 18 +#define GCC_AGGRE_USB3_PRIM_AXI_CLK 19 +#define GCC_AGGRE_USB3_SEC_AXI_CLK 20 +#define GCC_AGGRE_USB3_TERT_AXI_CLK 21 +#define GCC_AGGRE_USB4_0_AXI_CLK 22 +#define GCC_AGGRE_USB4_1_AXI_CLK 23 +#define GCC_AGGRE_USB4_2_AXI_CLK 24 +#define GCC_AV1E_AHB_CLK 25 +#define GCC_AV1E_AXI_CLK 26 +#define GCC_AV1E_XO_CLK 27 +#define GCC_BOOT_ROM_AHB_CLK 28 +#define GCC_CAMERA_AHB_CLK 29 +#define GCC_CAMERA_HF_AXI_CLK 30 +#define GCC_CAMERA_SF_AXI_CLK 31 +#define GCC_CAMERA_XO_CLK 32 +#define GCC_CFG_NOC_PCIE_ANOC_AHB_CLK 33 +#define GCC_CFG_NOC_PCIE_ANOC_SOUTH_AHB_CLK 34 +#define GCC_CFG_NOC_USB2_PRIM_AXI_CLK 35 +#define GCC_CFG_NOC_USB3_MP_AXI_CLK 36 +#define GCC_CFG_NOC_USB3_PRIM_AXI_CLK 37 +#define GCC_CFG_NOC_USB3_SEC_AXI_CLK 38 +#define GCC_CFG_NOC_USB3_TERT_AXI_CLK 39 +#define GCC_CFG_NOC_USB_ANOC_AHB_CLK 40 +#define GCC_CFG_NOC_USB_ANOC_SOUTH_AHB_CLK 41 +#define GCC_DISP_AHB_CLK 42 +#define GCC_DISP_HF_AXI_CLK 43 +#define GCC_EVA_AHB_CLK 44 +#define GCC_EVA_AXI0_CLK 45 +#define GCC_EVA_AXI0C_CLK 46 +#define GCC_EVA_XO_CLK 47 +#define GCC_GP1_CLK 48 +#define GCC_GP1_CLK_SRC 49 +#define GCC_GP2_CLK 50 +#define GCC_GP2_CLK_SRC 51 +#define GCC_GP3_CLK 52 +#define GCC_GP3_CLK_SRC 53 +#define GCC_GPU_CFG_AHB_CLK 54 +#define GCC_GPU_GEMNOC_GFX_CLK 55 +#define GCC_GPU_GPLL0_CLK_SRC 56 +#define GCC_GPU_GPLL0_DIV_CLK_SRC 57 +#define GCC_PCIE_0_AUX_CLK 58 +#define GCC_PCIE_0_AUX_CLK_SRC 59 +#define GCC_PCIE_0_CFG_AHB_CLK 60 +#define GCC_PCIE_0_MSTR_AXI_CLK 61 +#define GCC_PCIE_0_PHY_RCHNG_CLK 62 +#define GCC_PCIE_0_PHY_RCHNG_CLK_SRC 63 +#define GCC_PCIE_0_PIPE_CLK 64 +#define GCC_PCIE_0_SLV_AXI_CLK 65 +#define GCC_PCIE_0_SLV_Q2A_AXI_CLK 66 +#define GCC_PCIE_1_AUX_CLK 67 +#define GCC_PCIE_1_AUX_CLK_SRC 68 +#define GCC_PCIE_1_CFG_AHB_CLK 69 +#define GCC_PCIE_1_MSTR_AXI_CLK 70 +#define GCC_PCIE_1_PHY_RCHNG_CLK 71 +#define GCC_PCIE_1_PHY_RCHNG_CLK_SRC 72 +#define GCC_PCIE_1_PIPE_CLK 73 +#define GCC_PCIE_1_SLV_AXI_CLK 74 +#define GCC_PCIE_1_SLV_Q2A_AXI_CLK 75 +#define GCC_PCIE_2_AUX_CLK 76 +#define GCC_PCIE_2_AUX_CLK_SRC 77 +#define GCC_PCIE_2_CFG_AHB_CLK 78 +#define GCC_PCIE_2_MSTR_AXI_CLK 79 +#define GCC_PCIE_2_PHY_RCHNG_CLK 80 +#define GCC_PCIE_2_PHY_RCHNG_CLK_SRC 81 +#define GCC_PCIE_2_PIPE_CLK 82 +#define GCC_PCIE_2_SLV_AXI_CLK 83 +#define GCC_PCIE_2_SLV_Q2A_AXI_CLK 84 +#define GCC_PCIE_3A_AUX_CLK 85 +#define GCC_PCIE_3A_AUX_CLK_SRC 86 +#define GCC_PCIE_3A_CFG_AHB_CLK 87 +#define GCC_PCIE_3A_MSTR_AXI_CLK 88 +#define GCC_PCIE_3A_PHY_RCHNG_CLK 89 +#define GCC_PCIE_3A_PHY_RCHNG_CLK_SRC 90 +#define GCC_PCIE_3A_PIPE_CLK 91 +#define GCC_PCIE_3A_PIPE_CLK_SRC 92 +#define GCC_PCIE_3A_SLV_AXI_CLK 93 +#define GCC_PCIE_3A_SLV_Q2A_AXI_CLK 94 +#define GCC_PCIE_3B_AUX_CLK 95 +#define GCC_PCIE_3B_AUX_CLK_SRC 96 +#define GCC_PCIE_3B_CFG_AHB_CLK 97 +#define GCC_PCIE_3B_MSTR_AXI_CLK 98 +#define GCC_PCIE_3B_PHY_RCHNG_CLK 99 +#define GCC_PCIE_3B_PHY_RCHNG_CLK_SRC 100 +#define GCC_PCIE_3B_PIPE_CLK 101 +#define GCC_PCIE_3B_PIPE_CLK_SRC 102 +#define GCC_PCIE_3B_PIPE_DIV2_CLK 103 +#define GCC_PCIE_3B_PIPE_DIV_CLK_SRC 104 +#define GCC_PCIE_3B_SLV_AXI_CLK 105 +#define GCC_PCIE_3B_SLV_Q2A_AXI_CLK 106 +#define GCC_PCIE_4_AUX_CLK 107 +#define GCC_PCIE_4_AUX_CLK_SRC 108 +#define GCC_PCIE_4_CFG_AHB_CLK 109 +#define GCC_PCIE_4_MSTR_AXI_CLK 110 +#define GCC_PCIE_4_PHY_RCHNG_CLK 111 +#define GCC_PCIE_4_PHY_RCHNG_CLK_SRC 112 +#define GCC_PCIE_4_PIPE_CLK 113 +#define GCC_PCIE_4_PIPE_CLK_SRC 114 +#define GCC_PCIE_4_PIPE_DIV2_CLK 115 +#define GCC_PCIE_4_PIPE_DIV_CLK_SRC 116 +#define GCC_PCIE_4_SLV_AXI_CLK 117 +#define GCC_PCIE_4_SLV_Q2A_AXI_CLK 118 +#define GCC_PCIE_5_AUX_CLK 119 +#define GCC_PCIE_5_AUX_CLK_SRC 120 +#define GCC_PCIE_5_CFG_AHB_CLK 121 +#define GCC_PCIE_5_MSTR_AXI_CLK 122 +#define GCC_PCIE_5_PHY_RCHNG_CLK 123 +#define GCC_PCIE_5_PHY_RCHNG_CLK_SRC 124 +#define GCC_PCIE_5_PIPE_CLK 125 +#define GCC_PCIE_5_PIPE_CLK_SRC 126 +#define GCC_PCIE_5_PIPE_DIV2_CLK 127 +#define GCC_PCIE_5_PIPE_DIV_CLK_SRC 128 +#define GCC_PCIE_5_SLV_AXI_CLK 129 +#define GCC_PCIE_5_SLV_Q2A_AXI_CLK 130 +#define GCC_PCIE_6_AUX_CLK 131 +#define GCC_PCIE_6_AUX_CLK_SRC 132 +#define GCC_PCIE_6_CFG_AHB_CLK 133 +#define GCC_PCIE_6_MSTR_AXI_CLK 134 +#define GCC_PCIE_6_PHY_RCHNG_CLK 135 +#define GCC_PCIE_6_PHY_RCHNG_CLK_SRC 136 +#define GCC_PCIE_6_PIPE_CLK 137 +#define GCC_PCIE_6_PIPE_CLK_SRC 138 +#define GCC_PCIE_6_PIPE_DIV2_CLK 139 +#define GCC_PCIE_6_PIPE_DIV_CLK_SRC 140 +#define GCC_PCIE_6_SLV_AXI_CLK 141 +#define GCC_PCIE_6_SLV_Q2A_AXI_CLK 142 +#define GCC_PCIE_NOC_PWRCTL_CLK 143 +#define GCC_PCIE_NOC_QOSGEN_EXTREF_CLK 144 +#define GCC_PCIE_NOC_SF_CENTER_CLK 145 +#define GCC_PCIE_NOC_SLAVE_SF_EAST_CLK 146 +#define GCC_PCIE_NOC_SLAVE_SF_WEST_CLK 147 +#define GCC_PCIE_NOC_TSCTR_CLK 148 +#define GCC_PCIE_PHY_3A_AUX_CLK 149 +#define GCC_PCIE_PHY_3A_AUX_CLK_SRC 150 +#define GCC_PCIE_PHY_3B_AUX_CLK 151 +#define GCC_PCIE_PHY_3B_AUX_CLK_SRC 152 +#define GCC_PCIE_PHY_4_AUX_CLK 153 +#define GCC_PCIE_PHY_4_AUX_CLK_SRC 154 +#define GCC_PCIE_PHY_5_AUX_CLK 155 +#define GCC_PCIE_PHY_5_AUX_CLK_SRC 156 +#define GCC_PCIE_PHY_6_AUX_CLK 157 +#define GCC_PCIE_PHY_6_AUX_CLK_SRC 158 +#define GCC_PCIE_RSCC_CFG_AHB_CLK 159 +#define GCC_PCIE_RSCC_XO_CLK 160 +#define GCC_PDM2_CLK 161 +#define GCC_PDM2_CLK_SRC 162 +#define GCC_PDM_AHB_CLK 163 +#define GCC_PDM_XO4_CLK 164 +#define GCC_QMIP_AV1E_AHB_CLK 165 +#define GCC_QMIP_CAMERA_CMD_AHB_CLK 166 +#define GCC_QMIP_CAMERA_NRT_AHB_CLK 167 +#define GCC_QMIP_CAMERA_RT_AHB_CLK 168 +#define GCC_QMIP_GPU_AHB_CLK 169 +#define GCC_QMIP_PCIE_3A_AHB_CLK 170 +#define GCC_QMIP_PCIE_3B_AHB_CLK 171 +#define GCC_QMIP_PCIE_4_AHB_CLK 172 +#define GCC_QMIP_PCIE_5_AHB_CLK 173 +#define GCC_QMIP_PCIE_6_AHB_CLK 174 +#define GCC_QMIP_VIDEO_CV_CPU_AHB_CLK 175 +#define GCC_QMIP_VIDEO_CVP_AHB_CLK 176 +#define GCC_QMIP_VIDEO_V_CPU_AHB_CLK 177 +#define GCC_QMIP_VIDEO_VCODEC1_AHB_CLK 178 +#define GCC_QMIP_VIDEO_VCODEC_AHB_CLK 179 +#define GCC_QUPV3_OOB_CORE_2X_CLK 180 +#define GCC_QUPV3_OOB_CORE_CLK 181 +#define GCC_QUPV3_OOB_M_AHB_CLK 182 +#define GCC_QUPV3_OOB_QSPI_S0_CLK 183 +#define GCC_QUPV3_OOB_QSPI_S0_CLK_SRC 184 +#define GCC_QUPV3_OOB_QSPI_S1_CLK 185 +#define GCC_QUPV3_OOB_QSPI_S1_CLK_SRC 186 +#define GCC_QUPV3_OOB_S0_CLK 187 +#define GCC_QUPV3_OOB_S0_CLK_SRC 188 +#define GCC_QUPV3_OOB_S1_CLK 189 +#define GCC_QUPV3_OOB_S1_CLK_SRC 190 +#define GCC_QUPV3_OOB_S_AHB_CLK 191 +#define GCC_QUPV3_OOB_TCXO_CLK 192 +#define GCC_QUPV3_WRAP0_CORE_2X_CLK 193 +#define GCC_QUPV3_WRAP0_CORE_CLK 194 +#define GCC_QUPV3_WRAP0_QSPI_S2_CLK 195 +#define GCC_QUPV3_WRAP0_QSPI_S2_CLK_SRC 196 +#define GCC_QUPV3_WRAP0_QSPI_S3_CLK 197 +#define GCC_QUPV3_WRAP0_QSPI_S3_CLK_SRC 198 +#define GCC_QUPV3_WRAP0_QSPI_S6_CLK 199 +#define GCC_QUPV3_WRAP0_QSPI_S6_CLK_SRC 200 +#define GCC_QUPV3_WRAP0_S0_CLK 201 +#define GCC_QUPV3_WRAP0_S0_CLK_SRC 202 +#define GCC_QUPV3_WRAP0_S1_CLK 203 +#define GCC_QUPV3_WRAP0_S1_CLK_SRC 204 +#define GCC_QUPV3_WRAP0_S2_CLK 205 +#define GCC_QUPV3_WRAP0_S2_CLK_SRC 206 +#define GCC_QUPV3_WRAP0_S3_CLK 207 +#define GCC_QUPV3_WRAP0_S3_CLK_SRC 208 +#define GCC_QUPV3_WRAP0_S4_CLK 209 +#define GCC_QUPV3_WRAP0_S4_CLK_SRC 210 +#define GCC_QUPV3_WRAP0_S5_CLK 211 +#define GCC_QUPV3_WRAP0_S5_CLK_SRC 212 +#define GCC_QUPV3_WRAP0_S6_CLK 213 +#define GCC_QUPV3_WRAP0_S6_CLK_SRC 214 +#define GCC_QUPV3_WRAP0_S7_CLK 215 +#define GCC_QUPV3_WRAP0_S7_CLK_SRC 216 +#define GCC_QUPV3_WRAP1_CORE_2X_CLK 217 +#define GCC_QUPV3_WRAP1_CORE_CLK 218 +#define GCC_QUPV3_WRAP1_QSPI_S2_CLK 219 +#define GCC_QUPV3_WRAP1_QSPI_S2_CLK_SRC 220 +#define GCC_QUPV3_WRAP1_QSPI_S3_CLK 221 +#define GCC_QUPV3_WRAP1_QSPI_S3_CLK_SRC 222 +#define GCC_QUPV3_WRAP1_QSPI_S6_CLK 223 +#define GCC_QUPV3_WRAP1_QSPI_S6_CLK_SRC 224 +#define GCC_QUPV3_WRAP1_S0_CLK 225 +#define GCC_QUPV3_WRAP1_S0_CLK_SRC 226 +#define GCC_QUPV3_WRAP1_S1_CLK 227 +#define GCC_QUPV3_WRAP1_S1_CLK_SRC 228 +#define GCC_QUPV3_WRAP1_S2_CLK 229 +#define GCC_QUPV3_WRAP1_S2_CLK_SRC 230 +#define GCC_QUPV3_WRAP1_S3_CLK 231 +#define GCC_QUPV3_WRAP1_S3_CLK_SRC 232 +#define GCC_QUPV3_WRAP1_S4_CLK 233 +#define GCC_QUPV3_WRAP1_S4_CLK_SRC 234 +#define GCC_QUPV3_WRAP1_S5_CLK 235 +#define GCC_QUPV3_WRAP1_S5_CLK_SRC 236 +#define GCC_QUPV3_WRAP1_S6_CLK 237 +#define GCC_QUPV3_WRAP1_S6_CLK_SRC 238 +#define GCC_QUPV3_WRAP1_S7_CLK 239 +#define GCC_QUPV3_WRAP1_S7_CLK_SRC 240 +#define GCC_QUPV3_WRAP2_CORE_2X_CLK 241 +#define GCC_QUPV3_WRAP2_CORE_CLK 242 +#define GCC_QUPV3_WRAP2_QSPI_S2_CLK 243 +#define GCC_QUPV3_WRAP2_QSPI_S2_CLK_SRC 244 +#define GCC_QUPV3_WRAP2_QSPI_S3_CLK 245 +#define GCC_QUPV3_WRAP2_QSPI_S3_CLK_SRC 246 +#define GCC_QUPV3_WRAP2_QSPI_S6_CLK 247 +#define GCC_QUPV3_WRAP2_QSPI_S6_CLK_SRC 248 +#define GCC_QUPV3_WRAP2_S0_CLK 249 +#define GCC_QUPV3_WRAP2_S0_CLK_SRC 250 +#define GCC_QUPV3_WRAP2_S1_CLK 251 +#define GCC_QUPV3_WRAP2_S1_CLK_SRC 252 +#define GCC_QUPV3_WRAP2_S2_CLK 253 +#define GCC_QUPV3_WRAP2_S2_CLK_SRC 254 +#define GCC_QUPV3_WRAP2_S3_CLK 255 +#define GCC_QUPV3_WRAP2_S3_CLK_SRC 256 +#define GCC_QUPV3_WRAP2_S4_CLK 257 +#define GCC_QUPV3_WRAP2_S4_CLK_SRC 258 +#define GCC_QUPV3_WRAP2_S5_CLK 259 +#define GCC_QUPV3_WRAP2_S5_CLK_SRC 260 +#define GCC_QUPV3_WRAP2_S6_CLK 261 +#define GCC_QUPV3_WRAP2_S6_CLK_SRC 262 +#define GCC_QUPV3_WRAP2_S7_CLK 263 +#define GCC_QUPV3_WRAP2_S7_CLK_SRC 264 +#define GCC_QUPV3_WRAP_0_M_AHB_CLK 265 +#define GCC_QUPV3_WRAP_0_S_AHB_CLK 266 +#define GCC_QUPV3_WRAP_1_M_AHB_CLK 267 +#define GCC_QUPV3_WRAP_1_S_AHB_CLK 268 +#define GCC_QUPV3_WRAP_2_M_AHB_CLK 269 +#define GCC_QUPV3_WRAP_2_S_AHB_CLK 270 +#define GCC_SDCC2_AHB_CLK 271 +#define GCC_SDCC2_APPS_CLK 272 +#define GCC_SDCC2_APPS_CLK_SRC 273 +#define GCC_SDCC4_AHB_CLK 274 +#define GCC_SDCC4_APPS_CLK 275 +#define GCC_SDCC4_APPS_CLK_SRC 276 +#define GCC_UFS_PHY_AHB_CLK 277 +#define GCC_UFS_PHY_AXI_CLK 278 +#define GCC_UFS_PHY_AXI_CLK_SRC 279 +#define GCC_UFS_PHY_AXI_HW_CTL_CLK 280 +#define GCC_UFS_PHY_ICE_CORE_CLK 281 +#define GCC_UFS_PHY_ICE_CORE_CLK_SRC 282 +#define GCC_UFS_PHY_ICE_CORE_HW_CTL_CLK 283 +#define GCC_UFS_PHY_PHY_AUX_CLK 284 +#define GCC_UFS_PHY_PHY_AUX_CLK_SRC 285 +#define GCC_UFS_PHY_PHY_AUX_HW_CTL_CLK 286 +#define GCC_UFS_PHY_RX_SYMBOL_0_CLK 287 +#define GCC_UFS_PHY_RX_SYMBOL_0_CLK_SRC 288 +#define GCC_UFS_PHY_RX_SYMBOL_1_CLK 289 +#define GCC_UFS_PHY_RX_SYMBOL_1_CLK_SRC 290 +#define GCC_UFS_PHY_TX_SYMBOL_0_CLK 291 +#define GCC_UFS_PHY_TX_SYMBOL_0_CLK_SRC 292 +#define GCC_UFS_PHY_UNIPRO_CORE_CLK 293 +#define GCC_UFS_PHY_UNIPRO_CORE_CLK_SRC 294 +#define GCC_UFS_PHY_UNIPRO_CORE_HW_CTL_CLK 295 +#define GCC_USB20_MASTER_CLK 296 +#define GCC_USB20_MASTER_CLK_SRC 297 +#define GCC_USB20_MOCK_UTMI_CLK 298 +#define GCC_USB20_MOCK_UTMI_CLK_SRC 299 +#define GCC_USB20_MOCK_UTMI_POSTDIV_CLK_SRC 300 +#define GCC_USB20_SLEEP_CLK 301 +#define GCC_USB30_MP_MASTER_CLK 302 +#define GCC_USB30_MP_MASTER_CLK_SRC 303 +#define GCC_USB30_MP_MOCK_UTMI_CLK 304 +#define GCC_USB30_MP_MOCK_UTMI_CLK_SRC 305 +#define GCC_USB30_MP_MOCK_UTMI_POSTDIV_CLK_SRC 306 +#define GCC_USB30_MP_SLEEP_CLK 307 +#define GCC_USB30_PRIM_MASTER_CLK 308 +#define GCC_USB30_PRIM_MASTER_CLK_SRC 309 +#define GCC_USB30_PRIM_MOCK_UTMI_CLK 310 +#define GCC_USB30_PRIM_MOCK_UTMI_CLK_SRC 311 +#define GCC_USB30_PRIM_MOCK_UTMI_POSTDIV_CLK_SRC 312 +#define GCC_USB30_PRIM_SLEEP_CLK 313 +#define GCC_USB30_SEC_MASTER_CLK 314 +#define GCC_USB30_SEC_MASTER_CLK_SRC 315 +#define GCC_USB30_SEC_MOCK_UTMI_CLK 316 +#define GCC_USB30_SEC_MOCK_UTMI_CLK_SRC 317 +#define GCC_USB30_SEC_MOCK_UTMI_POSTDIV_CLK_SRC 318 +#define GCC_USB30_SEC_SLEEP_CLK 319 +#define GCC_USB30_TERT_MASTER_CLK 320 +#define GCC_USB30_TERT_MASTER_CLK_SRC 321 +#define GCC_USB30_TERT_MOCK_UTMI_CLK 322 +#define GCC_USB30_TERT_MOCK_UTMI_CLK_SRC 323 +#define GCC_USB30_TERT_MOCK_UTMI_POSTDIV_CLK_SRC 324 +#define GCC_USB30_TERT_SLEEP_CLK 325 +#define GCC_USB34_PRIM_PHY_PIPE_CLK_SRC 326 +#define GCC_USB34_SEC_PHY_PIPE_CLK_SRC 327 +#define GCC_USB34_TERT_PHY_PIPE_CLK_SRC 328 +#define GCC_USB3_MP_PHY_AUX_CLK 329 +#define GCC_USB3_MP_PHY_AUX_CLK_SRC 330 +#define GCC_USB3_MP_PHY_COM_AUX_CLK 331 +#define GCC_USB3_MP_PHY_PIPE_0_CLK 332 +#define GCC_USB3_MP_PHY_PIPE_0_CLK_SRC 333 +#define GCC_USB3_MP_PHY_PIPE_1_CLK 334 +#define GCC_USB3_MP_PHY_PIPE_1_CLK_SRC 335 +#define GCC_USB3_PRIM_PHY_AUX_CLK 336 +#define GCC_USB3_PRIM_PHY_AUX_CLK_SRC 337 +#define GCC_USB3_PRIM_PHY_COM_AUX_CLK 338 +#define GCC_USB3_PRIM_PHY_PIPE_CLK 339 +#define GCC_USB3_PRIM_PHY_PIPE_CLK_SRC 340 +#define GCC_USB3_SEC_PHY_AUX_CLK 341 +#define GCC_USB3_SEC_PHY_AUX_CLK_SRC 342 +#define GCC_USB3_SEC_PHY_COM_AUX_CLK 343 +#define GCC_USB3_SEC_PHY_PIPE_CLK 344 +#define GCC_USB3_SEC_PHY_PIPE_CLK_SRC 345 +#define GCC_USB3_TERT_PHY_AUX_CLK 346 +#define GCC_USB3_TERT_PHY_AUX_CLK_SRC 347 +#define GCC_USB3_TERT_PHY_COM_AUX_CLK 348 +#define GCC_USB3_TERT_PHY_PIPE_CLK 349 +#define GCC_USB3_TERT_PHY_PIPE_CLK_SRC 350 +#define GCC_USB4_0_CFG_AHB_CLK 351 +#define GCC_USB4_0_DP0_CLK 352 +#define GCC_USB4_0_DP1_CLK 353 +#define GCC_USB4_0_MASTER_CLK 354 +#define GCC_USB4_0_MASTER_CLK_SRC 355 +#define GCC_USB4_0_PHY_DP0_CLK_SRC 356 +#define GCC_USB4_0_PHY_DP0_GMUX_CLK_SRC 357 +#define GCC_USB4_0_PHY_DP1_CLK_SRC 358 +#define GCC_USB4_0_PHY_DP1_GMUX_CLK_SRC 359 +#define GCC_USB4_0_PHY_P2RR2P_PIPE_CLK 360 +#define GCC_USB4_0_PHY_P2RR2P_PIPE_CLK_SRC 361 +#define GCC_USB4_0_PHY_PCIE_PIPE_CLK 362 +#define GCC_USB4_0_PHY_PCIE_PIPE_CLK_SRC 363 +#define GCC_USB4_0_PHY_PCIE_PIPE_MUX_CLK_SRC 364 +#define GCC_USB4_0_PHY_PCIE_PIPEGMUX_CLK_SRC 365 +#define GCC_USB4_0_PHY_PIPEGMUX_CLK_SRC 366 +#define GCC_USB4_0_PHY_RX0_CLK 367 +#define GCC_USB4_0_PHY_RX0_CLK_SRC 368 +#define GCC_USB4_0_PHY_RX1_CLK 369 +#define GCC_USB4_0_PHY_RX1_CLK_SRC 370 +#define GCC_USB4_0_PHY_SYS_CLK_SRC 371 +#define GCC_USB4_0_PHY_SYS_PIPEGMUX_CLK_SRC 372 +#define GCC_USB4_0_PHY_USB_PIPE_CLK 373 +#define GCC_USB4_0_SB_IF_CLK 374 +#define GCC_USB4_0_SB_IF_CLK_SRC 375 +#define GCC_USB4_0_SYS_CLK 376 +#define GCC_USB4_0_TMU_CLK 377 +#define GCC_USB4_0_TMU_CLK_SRC 378 +#define GCC_USB4_0_UC_HRR_CLK 379 +#define GCC_USB4_1_CFG_AHB_CLK 380 +#define GCC_USB4_1_DP0_CLK 381 +#define GCC_USB4_1_DP1_CLK 382 +#define GCC_USB4_1_MASTER_CLK 383 +#define GCC_USB4_1_MASTER_CLK_SRC 384 +#define GCC_USB4_1_PHY_DP0_CLK_SRC 385 +#define GCC_USB4_1_PHY_DP0_GMUX_2_CLK_SRC 386 +#define GCC_USB4_1_PHY_DP1_CLK_SRC 387 +#define GCC_USB4_1_PHY_DP1_GMUX_2_CLK_SRC 388 +#define GCC_USB4_1_PHY_P2RR2P_PIPE_CLK 389 +#define GCC_USB4_1_PHY_P2RR2P_PIPE_CLK_SRC 390 +#define GCC_USB4_1_PHY_PCIE_PIPE_CLK 391 +#define GCC_USB4_1_PHY_PCIE_PIPE_CLK_SRC 392 +#define GCC_USB4_1_PHY_PCIE_PIPE_MUX_CLK_SRC 393 +#define GCC_USB4_1_PHY_PCIE_PIPEGMUX_CLK_SRC 394 +#define GCC_USB4_1_PHY_PIPEGMUX_CLK_SRC 395 +#define GCC_USB4_1_PHY_PLL_PIPE_CLK_SRC 396 +#define GCC_USB4_1_PHY_RX0_CLK 397 +#define GCC_USB4_1_PHY_RX0_CLK_SRC 398 +#define GCC_USB4_1_PHY_RX1_CLK 399 +#define GCC_USB4_1_PHY_RX1_CLK_SRC 400 +#define GCC_USB4_1_PHY_SYS_CLK_SRC 401 +#define GCC_USB4_1_PHY_SYS_PIPEGMUX_CLK_SRC 402 +#define GCC_USB4_1_PHY_USB_PIPE_CLK 403 +#define GCC_USB4_1_SB_IF_CLK 404 +#define GCC_USB4_1_SB_IF_CLK_SRC 405 +#define GCC_USB4_1_SYS_CLK 406 +#define GCC_USB4_1_TMU_CLK 407 +#define GCC_USB4_1_TMU_CLK_SRC 408 +#define GCC_USB4_1_UC_HRR_CLK 409 +#define GCC_USB4_2_CFG_AHB_CLK 410 +#define GCC_USB4_2_DP0_CLK 411 +#define GCC_USB4_2_DP1_CLK 412 +#define GCC_USB4_2_MASTER_CLK 413 +#define GCC_USB4_2_MASTER_CLK_SRC 414 +#define GCC_USB4_2_PHY_DP0_CLK_SRC 415 +#define GCC_USB4_2_PHY_DP0_GMUX_CLK_SRC 416 +#define GCC_USB4_2_PHY_DP1_CLK_SRC 417 +#define GCC_USB4_2_PHY_DP1_GMUX_CLK_SRC 418 +#define GCC_USB4_2_PHY_P2RR2P_PIPE_CLK 419 +#define GCC_USB4_2_PHY_P2RR2P_PIPE_CLK_SRC 420 +#define GCC_USB4_2_PHY_PCIE_PIPE_CLK 421 +#define GCC_USB4_2_PHY_PCIE_PIPE_CLK_SRC 422 +#define GCC_USB4_2_PHY_PCIE_PIPE_MUX_CLK_SRC 423 +#define GCC_USB4_2_PHY_PCIE_PIPEGMUX_CLK_SRC 424 +#define GCC_USB4_2_PHY_PIPEGMUX_CLK_SRC 425 +#define GCC_USB4_2_PHY_RX0_CLK 426 +#define GCC_USB4_2_PHY_RX0_CLK_SRC 427 +#define GCC_USB4_2_PHY_RX1_CLK 428 +#define GCC_USB4_2_PHY_RX1_CLK_SRC 429 +#define GCC_USB4_2_PHY_SYS_CLK_SRC 430 +#define GCC_USB4_2_PHY_SYS_PIPEGMUX_CLK_SRC 431 +#define GCC_USB4_2_PHY_USB_PIPE_CLK 432 +#define GCC_USB4_2_SB_IF_CLK 433 +#define GCC_USB4_2_SB_IF_CLK_SRC 434 +#define GCC_USB4_2_SYS_CLK 435 +#define GCC_USB4_2_TMU_CLK 436 +#define GCC_USB4_2_TMU_CLK_SRC 437 +#define GCC_USB4_2_UC_HRR_CLK 438 +#define GCC_VIDEO_AHB_CLK 439 +#define GCC_VIDEO_AXI0_CLK 440 +#define GCC_VIDEO_AXI0C_CLK 441 +#define GCC_VIDEO_AXI1_CLK 442 +#define GCC_VIDEO_XO_CLK 443 + +/* GCC power domains */ +#define GCC_PCIE_0_TUNNEL_GDSC 0 +#define GCC_PCIE_1_TUNNEL_GDSC 1 +#define GCC_PCIE_2_TUNNEL_GDSC 2 +#define GCC_PCIE_3A_GDSC 3 +#define GCC_PCIE_3A_PHY_GDSC 4 +#define GCC_PCIE_3B_GDSC 5 +#define GCC_PCIE_3B_PHY_GDSC 6 +#define GCC_PCIE_4_GDSC 7 +#define GCC_PCIE_4_PHY_GDSC 8 +#define GCC_PCIE_5_GDSC 9 +#define GCC_PCIE_5_PHY_GDSC 10 +#define GCC_PCIE_6_GDSC 11 +#define GCC_PCIE_6_PHY_GDSC 12 +#define GCC_UFS_PHY_GDSC 13 +#define GCC_USB20_PRIM_GDSC 14 +#define GCC_USB30_MP_GDSC 15 +#define GCC_USB30_PRIM_GDSC 16 +#define GCC_USB30_SEC_GDSC 17 +#define GCC_USB30_TERT_GDSC 18 +#define GCC_USB3_MP_SS0_PHY_GDSC 19 +#define GCC_USB3_MP_SS1_PHY_GDSC 20 +#define GCC_USB4_0_GDSC 21 +#define GCC_USB4_1_GDSC 22 +#define GCC_USB4_2_GDSC 23 +#define GCC_USB_0_PHY_GDSC 24 +#define GCC_USB_1_PHY_GDSC 25 +#define GCC_USB_2_PHY_GDSC 26 + +/* GCC resets */ +#define GCC_AV1E_BCR 0 +#define GCC_CAMERA_BCR 1 +#define GCC_DISPLAY_BCR 2 +#define GCC_EVA_BCR 3 +#define GCC_GPU_BCR 4 +#define GCC_PCIE_0_LINK_DOWN_BCR 5 +#define GCC_PCIE_0_NOCSR_COM_PHY_BCR 6 +#define GCC_PCIE_0_PHY_BCR 7 +#define GCC_PCIE_0_PHY_NOCSR_COM_PHY_BCR 8 +#define GCC_PCIE_0_TUNNEL_BCR 9 +#define GCC_PCIE_1_LINK_DOWN_BCR 10 +#define GCC_PCIE_1_NOCSR_COM_PHY_BCR 11 +#define GCC_PCIE_1_PHY_BCR 12 +#define GCC_PCIE_1_PHY_NOCSR_COM_PHY_BCR 13 +#define GCC_PCIE_1_TUNNEL_BCR 14 +#define GCC_PCIE_2_LINK_DOWN_BCR 15 +#define GCC_PCIE_2_NOCSR_COM_PHY_BCR 16 +#define GCC_PCIE_2_PHY_BCR 17 +#define GCC_PCIE_2_PHY_NOCSR_COM_PHY_BCR 18 +#define GCC_PCIE_2_TUNNEL_BCR 19 +#define GCC_PCIE_3A_BCR 20 +#define GCC_PCIE_3A_LINK_DOWN_BCR 21 +#define GCC_PCIE_3A_NOCSR_COM_PHY_BCR 22 +#define GCC_PCIE_3A_PHY_BCR 23 +#define GCC_PCIE_3A_PHY_NOCSR_COM_PHY_BCR 24 +#define GCC_PCIE_3B_BCR 25 +#define GCC_PCIE_3B_LINK_DOWN_BCR 26 +#define GCC_PCIE_3B_NOCSR_COM_PHY_BCR 27 +#define GCC_PCIE_3B_PHY_BCR 28 +#define GCC_PCIE_3B_PHY_NOCSR_COM_PHY_BCR 29 +#define GCC_PCIE_4_BCR 30 +#define GCC_PCIE_4_LINK_DOWN_BCR 31 +#define GCC_PCIE_4_NOCSR_COM_PHY_BCR 32 +#define GCC_PCIE_4_PHY_BCR 33 +#define GCC_PCIE_4_PHY_NOCSR_COM_PHY_BCR 34 +#define GCC_PCIE_5_BCR 35 +#define GCC_PCIE_5_LINK_DOWN_BCR 36 +#define GCC_PCIE_5_NOCSR_COM_PHY_BCR 37 +#define GCC_PCIE_5_PHY_BCR 38 +#define GCC_PCIE_5_PHY_NOCSR_COM_PHY_BCR 39 +#define GCC_PCIE_6_BCR 40 +#define GCC_PCIE_6_LINK_DOWN_BCR 41 +#define GCC_PCIE_6_NOCSR_COM_PHY_BCR 42 +#define GCC_PCIE_6_PHY_BCR 43 +#define GCC_PCIE_6_PHY_NOCSR_COM_PHY_BCR 44 +#define GCC_PCIE_NOC_BCR 45 +#define GCC_PCIE_PHY_BCR 46 +#define GCC_PCIE_PHY_CFG_AHB_BCR 47 +#define GCC_PCIE_PHY_COM_BCR 48 +#define GCC_PCIE_RSCC_BCR 49 +#define GCC_PDM_BCR 50 +#define GCC_QUPV3_WRAPPER_0_BCR 51 +#define GCC_QUPV3_WRAPPER_1_BCR 52 +#define GCC_QUPV3_WRAPPER_2_BCR 53 +#define GCC_QUPV3_WRAPPER_OOB_BCR 54 +#define GCC_QUSB2PHY_HS0_MP_BCR 55 +#define GCC_QUSB2PHY_HS1_MP_BCR 56 +#define GCC_QUSB2PHY_PRIM_BCR 57 +#define GCC_QUSB2PHY_SEC_BCR 58 +#define GCC_QUSB2PHY_TERT_BCR 59 +#define GCC_QUSB2PHY_USB20_HS_BCR 60 +#define GCC_SDCC2_BCR 61 +#define GCC_SDCC4_BCR 62 +#define GCC_TCSR_PCIE_BCR 63 +#define GCC_UFS_PHY_BCR 64 +#define GCC_USB20_PRIM_BCR 65 +#define GCC_USB30_MP_BCR 66 +#define GCC_USB30_PRIM_BCR 67 +#define GCC_USB30_SEC_BCR 68 +#define GCC_USB30_TERT_BCR 69 +#define GCC_USB3_MP_SS0_PHY_BCR 70 +#define GCC_USB3_MP_SS1_PHY_BCR 71 +#define GCC_USB3_PHY_PRIM_BCR 72 +#define GCC_USB3_PHY_SEC_BCR 73 +#define GCC_USB3_PHY_TERT_BCR 74 +#define GCC_USB3_UNIPHY_MP0_BCR 75 +#define GCC_USB3_UNIPHY_MP1_BCR 76 +#define GCC_USB3PHY_PHY_PRIM_BCR 77 +#define GCC_USB3PHY_PHY_SEC_BCR 78 +#define GCC_USB3PHY_PHY_TERT_BCR 79 +#define GCC_USB3UNIPHY_PHY_MP0_BCR 80 +#define GCC_USB3UNIPHY_PHY_MP1_BCR 81 +#define GCC_USB4_0_BCR 82 +#define GCC_USB4_0_DP0_PHY_PRIM_BCR 83 +#define GCC_USB4_1_BCR 84 +#define GCC_USB4_2_BCR 85 +#define GCC_USB_0_PHY_BCR 86 +#define GCC_USB_1_PHY_BCR 87 +#define GCC_USB_2_PHY_BCR 88 +#define GCC_VIDEO_AXI0_CLK_ARES 89 +#define GCC_VIDEO_AXI1_CLK_ARES 90 +#define GCC_VIDEO_BCR 91 + +#endif -- cgit v1.2.3 From dd386b0d5e61556927189cd7b59a628d22cb6851 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Mon, 1 Sep 2025 19:26:07 -0600 Subject: io_uring/uring_cmd: correct io_uring_cmd_done() ret type io_uring_cmd_done() takes the result code for the CQE as a ssize_t ret argument. However, the CQE res field is a s32 value, as is the argument to io_req_set_res(). To clarify that only s32 values can be faithfully represented without truncation, change io_uring_cmd_done()'s ret argument type to s32. Signed-off-by: Caleb Sander Mateos Link: https://lore.kernel.org/r/20250902012609.1513123-1-csander@purestorage.com Signed-off-by: Jens Axboe --- include/linux/io_uring/cmd.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h index 7211157edfe9..c4d7874016bb 100644 --- a/include/linux/io_uring/cmd.h +++ b/include/linux/io_uring/cmd.h @@ -56,7 +56,7 @@ int io_uring_cmd_import_fixed_vec(struct io_uring_cmd *ioucmd, * Note: the caller should never hard code @issue_flags and is only allowed * to pass the mask provided by the core io_uring code. */ -void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, u64 res2, +void io_uring_cmd_done(struct io_uring_cmd *cmd, s32 ret, u64 res2, unsigned issue_flags); void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd, @@ -104,7 +104,7 @@ static inline int io_uring_cmd_import_fixed_vec(struct io_uring_cmd *ioucmd, { return -EOPNOTSUPP; } -static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, +static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, s32 ret, u64 ret2, unsigned issue_flags) { } -- cgit v1.2.3 From 9f8608fce90fbcd2a98ceefad0bc762423927629 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Mon, 1 Sep 2025 19:33:27 -0600 Subject: io_uring/cmd: remove unused io_uring_cmd_iopoll_done() io_uring_cmd_iopoll_done()'s only caller was removed in commit 9ce6c9875f3e ("nvme: always punt polled uring_cmd end_io work to task_work"). So remove the unused function too. Signed-off-by: Caleb Sander Mateos Link: https://lore.kernel.org/r/20250902013328.1517686-1-csander@purestorage.com Signed-off-by: Jens Axboe --- include/linux/io_uring/cmd.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include') diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h index c4d7874016bb..50dd6a53cb5e 100644 --- a/include/linux/io_uring/cmd.h +++ b/include/linux/io_uring/cmd.h @@ -133,17 +133,6 @@ static inline bool io_uring_mshot_cmd_post_cqe(struct io_uring_cmd *ioucmd, } #endif -/* - * Polled completions must ensure they are coming from a poll queue, and - * hence are completed inside the usual poll handling loops. - */ -static inline void io_uring_cmd_iopoll_done(struct io_uring_cmd *ioucmd, - ssize_t ret, ssize_t res2) -{ - lockdep_assert(in_task()); - io_uring_cmd_done(ioucmd, ret, res2, 0); -} - /* users must follow the IOU_F_TWQ_LAZY_WAKE semantics */ static inline void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd, io_uring_cmd_tw_t task_work_cb) -- cgit v1.2.3 From 21f82062d0f241e55dd59eb630e8710862cc90b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juraj=20=C5=A0arinay?= Date: Tue, 2 Sep 2025 13:36:28 +0200 Subject: net: nfc: nci: Increase NCI_DATA_TIMEOUT to 3000 ms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An exchange with a NFC target must complete within NCI_DATA_TIMEOUT. A delay of 700 ms is not sufficient for cryptographic operations on smart cards. CardOS 6.0 may need up to 1.3 seconds to perform 256-bit ECDH or 3072-bit RSA. To prevent brute-force attacks, passports and similar documents introduce even longer delays into access control protocols (BAC/PACE). The timeout should be higher, but not too much. The expiration allows us to detect that a NFC target has disappeared. Signed-off-by: Juraj Šarinay Reviewed-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20250902113630.62393-1-juraj@sarinay.com Signed-off-by: Jakub Kicinski --- include/net/nfc/nci_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index e180bdf2f82b..664d5058e66e 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -52,7 +52,7 @@ enum nci_state { #define NCI_RF_DISC_SELECT_TIMEOUT 5000 #define NCI_RF_DEACTIVATE_TIMEOUT 30000 #define NCI_CMD_TIMEOUT 5000 -#define NCI_DATA_TIMEOUT 700 +#define NCI_DATA_TIMEOUT 3000 struct nci_dev; -- cgit v1.2.3 From 8c94db0ae97c72c253a615f990bd466b456e94f6 Mon Sep 17 00:00:00 2001 From: Svetlana Parfenova Date: Mon, 1 Sep 2025 20:53:50 +0700 Subject: binfmt_elf: preserve original ELF e_flags for core dumps Some architectures, such as RISC-V, use the ELF e_flags field to encode ABI-specific information (e.g., ISA extensions, fpu support). Debuggers like GDB rely on these flags in core dumps to correctly interpret optional register sets. If the flags are missing or incorrect, GDB may warn and ignore valid data, for example: warning: Unexpected size of section '.reg2/213' in core file. This can prevent access to fpu or other architecture-specific registers even when they were dumped. Save the e_flags field during ELF binary loading (in load_elf_binary()) into the mm_struct, and later retrieve it during core dump generation (in fill_note_info()). Kconfig option CONFIG_ARCH_HAS_ELF_CORE_EFLAGS is introduced for architectures that require this behaviour. Signed-off-by: Svetlana Parfenova Link: https://lore.kernel.org/r/20250901135350.619485-1-svetlana.parfenova@syntacore.com Signed-off-by: Kees Cook --- include/linux/mm_types.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 08bc2442db93..04a2857f12f2 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1102,6 +1102,11 @@ struct mm_struct { unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */ +#ifdef CONFIG_ARCH_HAS_ELF_CORE_EFLAGS + /* the ABI-related flags from the ELF header. Used for core dump */ + unsigned long saved_e_flags; +#endif + struct percpu_counter rss_stat[NR_MM_COUNTERS]; struct linux_binfmt *binfmt; -- cgit v1.2.3 From 4039ce7ef40474d5ba46f414c50cc7020b9cf8ae Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 7 Aug 2025 15:49:59 +0200 Subject: netfilter: nf_tables: Introduce NFTA_DEVICE_PREFIX This new attribute is supposed to be used instead of NFTA_DEVICE_NAME for simple wildcard interface specs. It holds a NUL-terminated string representing an interface name prefix to match on. While kernel code to distinguish full names from prefixes in NFTA_DEVICE_NAME is simpler than this solution, reusing the existing attribute with different semantics leads to confusion between different versions of kernel and user space though: * With old kernels, wildcards submitted by user space are accepted yet silently treated as regular names. * With old user space, wildcards submitted by kernel may cause crashes since libnftnl expects NUL-termination when there is none. Using a distinct attribute type sanitizes these situations as the receiving part detects and rejects the unexpected attribute nested in *_HOOK_DEVS attributes. Fixes: 6d07a289504a ("netfilter: nf_tables: Support wildcard netdev hook specs") Signed-off-by: Phil Sutter Signed-off-by: Florian Westphal --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 2beb30be2c5f..8e0eb832bc01 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1784,10 +1784,12 @@ enum nft_synproxy_attributes { * enum nft_device_attributes - nf_tables device netlink attributes * * @NFTA_DEVICE_NAME: name of this device (NLA_STRING) + * @NFTA_DEVICE_PREFIX: device name prefix, a simple wildcard (NLA_STRING) */ enum nft_devices_attributes { NFTA_DEVICE_UNSPEC, NFTA_DEVICE_NAME, + NFTA_DEVICE_PREFIX, __NFTA_DEVICE_MAX }; #define NFTA_DEVICE_MAX (__NFTA_DEVICE_MAX - 1) -- cgit v1.2.3 From 3ceb08838b576b20108d7facf6baa3dbf792afe9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 1 Sep 2025 14:12:10 -0700 Subject: net: add helper to pre-check if PP for an Rx queue will be unreadable mlx5 pokes into the rxq state to check if the queue has a memory provider, and therefore whether it may produce unreadable mem. Add a helper for doing this in the page pool API. fbnic will want a similar thing (tho, for a slightly different reason). Reviewed-by: Mina Almasry Signed-off-by: Jakub Kicinski Link: https://patch.msgid.link/20250901211214.1027927-11-kuba@kernel.org Signed-off-by: Paolo Abeni --- include/net/netdev_queues.h | 2 ++ include/net/page_pool/helpers.h | 12 ++++++++++++ 2 files changed, 14 insertions(+) (limited to 'include') diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index b9d02bc65c97..cd00e0406cf4 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -151,6 +151,8 @@ struct netdev_queue_mgmt_ops { int idx); }; +bool netif_rxq_has_unreadable_mp(struct net_device *dev, int idx); + /** * DOC: Lockless queue stopping / waking helpers. * diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index aa3719f28216..3247026e096a 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -505,6 +505,18 @@ static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid) page_pool_update_nid(pool, new_nid); } +/** + * page_pool_is_unreadable() - will allocated buffers be unreadable for the CPU + * @pool: queried page pool + * + * Check if page pool will return buffers which are unreadable to the CPU / + * kernel. This will only be the case if user space bound a memory provider (mp) + * which returns unreadable memory to the queue served by the page pool. + * If %PP_FLAG_ALLOW_UNREADABLE_NETMEM was set but there is no mp bound + * this helper will return false. See also netif_rxq_has_unreadable_mp(). + * + * Return: true if memory allocated by the page pool may be unreadable + */ static inline bool page_pool_is_unreadable(struct page_pool *pool) { return !!pool->mp_ops; -- cgit v1.2.3 From 34837c444cd42236b2b43ce871f30d83776a3431 Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Sun, 24 Aug 2025 20:07:34 +0200 Subject: media: uapi: v4l2-controls: Cleanup codec definitions Move some fields closer to where they are used, add missing tabs and remove an extra newline. Signed-off-by: Paul Kocialkowski Reviewed-by: Nicolas Dufresne Signed-off-by: Nicolas Dufresne Signed-off-by: Hans Verkuil --- include/uapi/linux/v4l2-controls.h | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index 7aef88465d04..2d30107e047e 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -1537,15 +1537,6 @@ struct v4l2_ctrl_h264_pred_weights { struct v4l2_h264_weight_factors weight_factors[2]; }; -#define V4L2_H264_SLICE_TYPE_P 0 -#define V4L2_H264_SLICE_TYPE_B 1 -#define V4L2_H264_SLICE_TYPE_I 2 -#define V4L2_H264_SLICE_TYPE_SP 3 -#define V4L2_H264_SLICE_TYPE_SI 4 - -#define V4L2_H264_SLICE_FLAG_DIRECT_SPATIAL_MV_PRED 0x01 -#define V4L2_H264_SLICE_FLAG_SP_FOR_SWITCH 0x02 - #define V4L2_H264_TOP_FIELD_REF 0x1 #define V4L2_H264_BOTTOM_FIELD_REF 0x2 #define V4L2_H264_FRAME_REF 0x3 @@ -1566,8 +1557,17 @@ struct v4l2_h264_reference { * Maximum DPB size, as specified by section 'A.3.1 Level limits * common to the Baseline, Main, and Extended profiles'. */ -#define V4L2_H264_NUM_DPB_ENTRIES 16 -#define V4L2_H264_REF_LIST_LEN (2 * V4L2_H264_NUM_DPB_ENTRIES) +#define V4L2_H264_NUM_DPB_ENTRIES 16 +#define V4L2_H264_REF_LIST_LEN (2 * V4L2_H264_NUM_DPB_ENTRIES) + +#define V4L2_H264_SLICE_TYPE_P 0 +#define V4L2_H264_SLICE_TYPE_B 1 +#define V4L2_H264_SLICE_TYPE_I 2 +#define V4L2_H264_SLICE_TYPE_SP 3 +#define V4L2_H264_SLICE_TYPE_SI 4 + +#define V4L2_H264_SLICE_FLAG_DIRECT_SPATIAL_MV_PRED 0x01 +#define V4L2_H264_SLICE_FLAG_SP_FOR_SWITCH 0x02 #define V4L2_CID_STATELESS_H264_SLICE_PARAMS (V4L2_CID_CODEC_STATELESS_BASE + 6) /** @@ -1707,7 +1707,6 @@ struct v4l2_ctrl_h264_decode_params { __u32 flags; }; - /* Stateless FWHT control, used by the vicodec driver */ /* Current FWHT version */ -- cgit v1.2.3 From ee63609454838ea2b108f96f74a287be72d281ee Mon Sep 17 00:00:00 2001 From: Lachlan Hodges Date: Fri, 25 Jul 2025 23:22:19 +1000 Subject: wifi: mac80211: support block bitmap S1G TIM encoding An S1G TIM PVB is encoded differently compared to a non-s1g TIM PVB. As the AP dictates which encoding mode it uses, here we only implement block bitmap encoding. This is the default encoding mode used by all current vendor implementations. Additionally, S1G has a maximum AID count of 8192, however we are limiting the current implementation to 1600. This has no resemblence to the standard and is purely an implementation detail. The reason for this is due to the TIM elements maximum length of 255. This allows for, at most, 25 encoded blocks for a PVB encoded with block bitmap. Support for the maximum of 8192 AIDs will require an implementation of page slicing to be added to mac80211. As a result, we perform extra validation on both the STA and AP side when receiving an AID as an S1G interface. Add support for block bitmap encoding for an S1G AP and limit the maximum AID count to 1600 for the current mac80211 implementations. Signed-off-by: Lachlan Hodges Link: https://patch.msgid.link/20250725132221.258217-2-lachlan.hodges@morsemicro.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index d1a14f2892d9..a4bc0c2729f6 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2283,7 +2283,8 @@ enum nl80211_commands { * @NL80211_ATTR_PEER_AID: Association ID for the peer TDLS station (u16). * This is similar to @NL80211_ATTR_STA_AID but with a difference of being * allowed to be used with the first @NL80211_CMD_SET_STATION command to - * update a TDLS peer STA entry. + * update a TDLS peer STA entry. For S1G interfaces, this is limited to + * 1600 for the current mac80211 implementation. * * @NL80211_ATTR_COALESCE_RULE: Coalesce rule information. * -- cgit v1.2.3 From e0c47c6229c25b54440fe1f84a0ff533942290b1 Mon Sep 17 00:00:00 2001 From: Lachlan Hodges Date: Fri, 25 Jul 2025 23:22:20 +1000 Subject: wifi: mac80211: support parsing S1G TIM PVB An S1G TIM PVB has 3 mandatory encoding modes, that being block bitmap, single AID and OBL alongside the ability for each encoding mode to be inverted. Introduce the ability to parse the 3 encoding formats. The implementation specification for the encoding formats can be found in IEEE80211-2024 9.4.2.5. Signed-off-by: Arien Judge Signed-off-by: Lachlan Hodges Link: https://patch.msgid.link/20250725132221.258217-3-lachlan.hodges@morsemicro.com Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 265 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 256 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index e5a2096e022e..d350263f23f3 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -220,6 +220,12 @@ static inline u16 ieee80211_sn_sub(u16 sn1, u16 sn2) #define IEEE80211_MAX_AID_S1G 8191 #define IEEE80211_MAX_TIM_LEN 251 #define IEEE80211_MAX_MESH_PEERINGS 63 + +/* S1G encoding types */ +#define IEEE80211_S1G_TIM_ENC_MODE_BLOCK 0 +#define IEEE80211_S1G_TIM_ENC_MODE_SINGLE 1 +#define IEEE80211_S1G_TIM_ENC_MODE_OLB 2 + /* Maximum size for the MA-UNITDATA primitive, 802.11 standard section 6.2.1.1.2. @@ -4757,15 +4763,8 @@ static inline unsigned long ieee80211_tu_to_usec(unsigned long tu) return 1024 * tu; } -/** - * ieee80211_check_tim - check if AID bit is set in TIM - * @tim: the TIM IE - * @tim_len: length of the TIM IE - * @aid: the AID to look for - * Return: whether or not traffic is indicated in the TIM for the given AID - */ -static inline bool ieee80211_check_tim(const struct ieee80211_tim_ie *tim, - u8 tim_len, u16 aid) +static inline bool __ieee80211_check_tim(const struct ieee80211_tim_ie *tim, + u8 tim_len, u16 aid) { u8 mask; u8 index, indexn1, indexn2; @@ -4788,6 +4787,254 @@ static inline bool ieee80211_check_tim(const struct ieee80211_tim_ie *tim, return !!(tim->virtual_map[index] & mask); } +struct s1g_tim_aid { + u16 aid; + u8 target_blk; /* Target block index */ + u8 target_subblk; /* Target subblock index */ + u8 target_subblk_bit; /* Target subblock bit */ +}; + +struct s1g_tim_enc_block { + u8 enc_mode; + bool inverse; + const u8 *ptr; + u8 len; + + /* + * For an OLB encoded block that spans multiple blocks, this + * is the offset into the span described by that encoded block. + */ + u8 olb_blk_offset; +}; + +/* + * Helper routines to quickly extract the length of an encoded block. Validation + * is also performed to ensure the length extracted lies within the TIM. + */ + +static inline int ieee80211_s1g_len_bitmap(const u8 *ptr, const u8 *end) +{ + u8 blkmap; + u8 n_subblks; + + if (ptr >= end) + return -EINVAL; + + blkmap = *ptr; + n_subblks = hweight8(blkmap); + + if (ptr + 1 + n_subblks > end) + return -EINVAL; + + return 1 + n_subblks; +} + +static inline int ieee80211_s1g_len_single(const u8 *ptr, const u8 *end) +{ + return (ptr + 1 > end) ? -EINVAL : 1; +} + +static inline int ieee80211_s1g_len_olb(const u8 *ptr, const u8 *end) +{ + if (ptr >= end) + return -EINVAL; + + return (ptr + 1 + *ptr > end) ? -EINVAL : 1 + *ptr; +} + +/* + * Enumerate all encoded blocks until we find the encoded block that describes + * our target AID. OLB is a special case as a single encoded block can describe + * multiple blocks as a single encoded block. + */ +static inline int ieee80211_s1g_find_target_block(struct s1g_tim_enc_block *enc, + const struct s1g_tim_aid *aid, + const u8 *ptr, const u8 *end) +{ + /* need at least block-control octet */ + while (ptr + 1 <= end) { + u8 ctrl = *ptr++; + u8 mode = ctrl & 0x03; + bool contains, inverse = ctrl & BIT(2); + u8 span, blk_off = ctrl >> 3; + int len; + + switch (mode) { + case IEEE80211_S1G_TIM_ENC_MODE_BLOCK: + len = ieee80211_s1g_len_bitmap(ptr, end); + contains = blk_off == aid->target_blk; + break; + case IEEE80211_S1G_TIM_ENC_MODE_SINGLE: + len = ieee80211_s1g_len_single(ptr, end); + contains = blk_off == aid->target_blk; + break; + case IEEE80211_S1G_TIM_ENC_MODE_OLB: + len = ieee80211_s1g_len_olb(ptr, end); + /* + * An OLB encoded block can describe more then one + * block, meaning an encoded OLB block can span more + * then a single block. + */ + if (len > 0) { + /* Minus one for the length octet */ + span = DIV_ROUND_UP(len - 1, 8); + /* + * Check if our target block lies within the + * block span described by this encoded block. + */ + contains = (aid->target_blk >= blk_off) && + (aid->target_blk < blk_off + span); + } + break; + default: + return -EOPNOTSUPP; + } + + if (len < 0) + return len; + + if (contains) { + enc->enc_mode = mode; + enc->inverse = inverse; + enc->ptr = ptr; + enc->len = (u8)len; + enc->olb_blk_offset = blk_off; + return 0; + } + + ptr += len; + } + + return -ENOENT; +} + +static inline bool ieee80211_s1g_parse_bitmap(struct s1g_tim_enc_block *enc, + struct s1g_tim_aid *aid) +{ + const u8 *ptr = enc->ptr; + u8 blkmap = *ptr++; + + /* + * If our block bitmap does not contain a set bit that corresponds + * to our AID, it could mean a variety of things depending on if + * the encoding mode is inverted or not. + * + * 1. If inverted, it means the entire subblock is present and hence + * our AID has been set. + * 2. If not inverted, it means our subblock is not present and hence + * it is all zero meaning our AID is not set. + */ + if (!(blkmap & BIT(aid->target_subblk))) + return enc->inverse; + + /* + * Increment ptr by the number of set subblocks that appear before our + * target subblock. If our target subblock is 0, do nothing as ptr + * already points to our target subblock. + */ + if (aid->target_subblk) + ptr += hweight8(blkmap & GENMASK(aid->target_subblk - 1, 0)); + + return !!(*ptr & BIT(aid->target_subblk_bit)) ^ enc->inverse; +} + +static inline bool ieee80211_s1g_parse_single(struct s1g_tim_enc_block *enc, + struct s1g_tim_aid *aid) +{ + /* + * Single AID mode describes, as the name suggests, a single AID + * within the block described by the encoded block. The octet + * contains the 6 LSBs of the AID described in the block. The other + * 2 bits are reserved. When inversed, every single AID described + * by the current block have buffered traffic except for the AID + * described in the single AID octet. + */ + return ((*enc->ptr & 0x3f) == (aid->aid & 0x3f)) ^ enc->inverse; +} + +static inline bool ieee80211_s1g_parse_olb(struct s1g_tim_enc_block *enc, + struct s1g_tim_aid *aid) +{ + const u8 *ptr = enc->ptr; + u8 blk_len = *ptr++; + /* + * Given an OLB encoded block that describes multiple blocks, + * calculate the offset into the span. Then calculate the + * subblock location normally. + */ + u16 span_offset = aid->target_blk - enc->olb_blk_offset; + u16 subblk_idx = span_offset * 8 + aid->target_subblk; + + if (subblk_idx >= blk_len) + return enc->inverse; + + return !!(ptr[subblk_idx] & BIT(aid->target_subblk_bit)) ^ enc->inverse; +} + +/* + * An S1G PVB has 3 non optional encoding types, each that can be inverted. + * An S1G PVB is constructed with zero or more encoded block subfields. Each + * encoded block represents a single "block" of AIDs (64), and each encoded + * block can contain one of the 3 encoding types alongside a single bit for + * whether the bits should be inverted. + * + * As the standard makes no guarantee about the ordering of encoded blocks, + * we must parse every encoded block in the worst case scenario given an + * AID that lies within the last block. + */ +static inline bool ieee80211_s1g_check_tim(const struct ieee80211_tim_ie *tim, + u8 tim_len, u16 aid) +{ + int err; + struct s1g_tim_aid target_aid; + struct s1g_tim_enc_block enc_blk; + + if (tim_len < 3) + return false; + + target_aid.aid = aid; + target_aid.target_blk = (aid >> 6) & 0x1f; + target_aid.target_subblk = (aid >> 3) & 0x7; + target_aid.target_subblk_bit = aid & 0x7; + + /* + * Find our AIDs target encoded block and fill &enc_blk with the + * encoded blocks information. If no entry is found or an error + * occurs return false. + */ + err = ieee80211_s1g_find_target_block(&enc_blk, &target_aid, + tim->virtual_map, + (const u8 *)tim + tim_len + 2); + if (err) + return false; + + switch (enc_blk.enc_mode) { + case IEEE80211_S1G_TIM_ENC_MODE_BLOCK: + return ieee80211_s1g_parse_bitmap(&enc_blk, &target_aid); + case IEEE80211_S1G_TIM_ENC_MODE_SINGLE: + return ieee80211_s1g_parse_single(&enc_blk, &target_aid); + case IEEE80211_S1G_TIM_ENC_MODE_OLB: + return ieee80211_s1g_parse_olb(&enc_blk, &target_aid); + default: + return false; + } +} + +/** + * ieee80211_check_tim - check if AID bit is set in TIM + * @tim: the TIM IE + * @tim_len: length of the TIM IE + * @aid: the AID to look for + * @s1g: whether the TIM is from an S1G PPDU + * Return: whether or not traffic is indicated in the TIM for the given AID + */ +static inline bool ieee80211_check_tim(const struct ieee80211_tim_ie *tim, + u8 tim_len, u16 aid, bool s1g) +{ + return s1g ? ieee80211_s1g_check_tim(tim, tim_len, aid) : + __ieee80211_check_tim(tim, tim_len, aid); +} + /** * ieee80211_get_tdls_action - get TDLS action code * @skb: the skb containing the frame, length will not be checked -- cgit v1.2.3 From 5f9d5fd8e08968e66d0212f782fc24d76e52800f Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Tue, 12 Aug 2025 12:53:28 +0530 Subject: wifi: cfg80211: fix return value in cfg80211_get_radio_idx_by_chan() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a valid radio index is not found, the function returns -ENOENT. If the channel argument itself is invalid, it returns -EINVAL. However, since the caller only checks for < 0, the distinction between these error codes is not utilized much. Also, handling these two distinct error codes throughout the codebase adds complexity, as both cases must be addressed separately. A subsequent change aims to simplify this by using a single error code for all invalid cases, making error handling more consistent and streamlined. To support this change, update the return value to -EINVAL when a valid radio index is not found. This is still appropriate because, even if the channel argument is structurally valid, the absence of a corresponding radio index implies that the argument is effectively invalid—otherwise, a valid index would have been found. Signed-off-by: Aditya Kumar Singh Link: https://patch.msgid.link/20250812-fix_scan_ap_flag_requirement_during_mlo-v4-1-383ffb6da213@oss.qualcomm.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 406626ff6cc8..cb1c36be2749 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -9548,7 +9548,7 @@ int cfg80211_iter_combinations(struct wiphy *wiphy, * @wiphy: the wiphy * @chan: channel for which the supported radio index is required * - * Return: radio index on success or a negative error code + * Return: radio index on success or -EINVAL otherwise */ int cfg80211_get_radio_idx_by_chan(struct wiphy *wiphy, const struct ieee80211_channel *chan); -- cgit v1.2.3 From d0bf06158c39e7129524dd8b43b82aed84d68faa Mon Sep 17 00:00:00 2001 From: Muna Sinada Date: Fri, 15 Aug 2025 14:30:11 -0700 Subject: wifi: nl80211: Add EHT fixed Tx rate support Add new attributes to support EHT MCS/NSS Tx rates and EHT GI/LTF. Parse EHT fixed MCS/NSS Tx rates and EHT GI/LTF values passed by the userspace, validate and add as part of cfg80211_bitrate_mask. MCS mask is constructed by new function, eht_build_mcs_mask(). Max NSS supported for MCS rates of 7, 9, 11 and 13 is utilized to set MCS bitmask for each NSS. MCS rates 14, and 15 if supported, are set only for NSS = 0. Co-developed-by: Aloka Dixit Signed-off-by: Aloka Dixit Signed-off-by: Muna Sinada Link: https://patch.msgid.link/20250815213011.2704803-1-muna.sinada@oss.qualcomm.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 3 +++ include/uapi/linux/nl80211.h | 41 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 42 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index cb1c36be2749..7d881aa7e48b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -841,9 +841,12 @@ struct cfg80211_bitrate_mask { u8 ht_mcs[IEEE80211_HT_MCS_MASK_LEN]; u16 vht_mcs[NL80211_VHT_NSS_MAX]; u16 he_mcs[NL80211_HE_NSS_MAX]; + u16 eht_mcs[NL80211_EHT_NSS_MAX]; enum nl80211_txrate_gi gi; enum nl80211_he_gi he_gi; + enum nl80211_eht_gi eht_gi; enum nl80211_he_ltf he_ltf; + enum nl80211_eht_ltf eht_ltf; } control[NUM_NL80211_BANDS]; }; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index a4bc0c2729f6..4f08264bbc8e 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1943,8 +1943,9 @@ enum nl80211_commands { * The driver must also specify support for this with the extended * features NL80211_EXT_FEATURE_BEACON_RATE_LEGACY, * NL80211_EXT_FEATURE_BEACON_RATE_HT, - * NL80211_EXT_FEATURE_BEACON_RATE_VHT and - * NL80211_EXT_FEATURE_BEACON_RATE_HE. + * NL80211_EXT_FEATURE_BEACON_RATE_VHT, + * NL80211_EXT_FEATURE_BEACON_RATE_HE and + * NL80211_EXT_FEATURE_BEACON_RATE_EHT. * * @NL80211_ATTR_FRAME_MATCH: A binary attribute which typically must contain * at least one byte, currently used with @NL80211_CMD_REGISTER_FRAME. @@ -3736,6 +3737,22 @@ enum nl80211_eht_gi { NL80211_RATE_INFO_EHT_GI_3_2, }; +/** + * enum nl80211_eht_ltf - EHT long training field + * @NL80211_RATE_INFO_EHT_1XLTF: 3.2 usec + * @NL80211_RATE_INFO_EHT_2XLTF: 6.4 usec + * @NL80211_RATE_INFO_EHT_4XLTF: 12.8 usec + * @NL80211_RATE_INFO_EHT_6XLTF: 19.2 usec + * @NL80211_RATE_INFO_EHT_8XLTF: 25.6 usec + */ +enum nl80211_eht_ltf { + NL80211_RATE_INFO_EHT_1XLTF, + NL80211_RATE_INFO_EHT_2XLTF, + NL80211_RATE_INFO_EHT_4XLTF, + NL80211_RATE_INFO_EHT_6XLTF, + NL80211_RATE_INFO_EHT_8XLTF, +}; + /** * enum nl80211_eht_ru_alloc - EHT RU allocation values * @NL80211_RATE_INFO_EHT_RU_ALLOC_26: 26-tone RU allocation @@ -5482,6 +5499,10 @@ enum nl80211_key_attributes { * see &struct nl80211_txrate_he * @NL80211_TXRATE_HE_GI: configure HE GI, 0.8us, 1.6us and 3.2us. * @NL80211_TXRATE_HE_LTF: configure HE LTF, 1XLTF, 2XLTF and 4XLTF. + * @NL80211_TXRATE_EHT: EHT rates allowed for TX rate selection, + * see &struct nl80211_txrate_eht + * @NL80211_TXRATE_EHT_GI: configure EHT GI, (u8, see &enum nl80211_eht_gi) + * @NL80211_TXRATE_EHT_LTF: configure EHT LTF, (u8, see &enum nl80211_eht_ltf) * @__NL80211_TXRATE_AFTER_LAST: internal * @NL80211_TXRATE_MAX: highest TX rate attribute */ @@ -5494,6 +5515,9 @@ enum nl80211_tx_rate_attributes { NL80211_TXRATE_HE, NL80211_TXRATE_HE_GI, NL80211_TXRATE_HE_LTF, + NL80211_TXRATE_EHT, + NL80211_TXRATE_EHT_GI, + NL80211_TXRATE_EHT_LTF, /* keep last */ __NL80211_TXRATE_AFTER_LAST, @@ -5526,6 +5550,15 @@ enum nl80211_txrate_gi { NL80211_TXRATE_FORCE_LGI, }; +#define NL80211_EHT_NSS_MAX 16 +/** + * struct nl80211_txrate_eht - EHT MCS/NSS txrate bitmap + * @mcs: MCS bitmap table for each NSS (array index 0 for 1 stream, etc.) + */ +struct nl80211_txrate_eht { + __u16 mcs[NL80211_EHT_NSS_MAX]; +}; + /** * enum nl80211_band - Frequency band * @NL80211_BAND_2GHZ: 2.4 GHz ISM band @@ -6650,6 +6683,9 @@ enum nl80211_feature_flags { * (signaling and payload protected) A-MSDUs and this shall be advertised * in the RSNXE. * + * @NL80211_EXT_FEATURE_BEACON_RATE_EHT: Driver supports beacon rate + * configuration (AP/mesh) with EHT rates. + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -6725,6 +6761,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_OWE_OFFLOAD_AP, NL80211_EXT_FEATURE_DFS_CONCURRENT, NL80211_EXT_FEATURE_SPP_AMSDU_SUPPORT, + NL80211_EXT_FEATURE_BEACON_RATE_EHT, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, -- cgit v1.2.3 From 24185534915b5d926ded098336f47bdcca333aec Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Sun, 17 Aug 2025 21:04:32 +0200 Subject: wifi: nl80211: allow drivers to support subset of NL80211_CMD_SET_BSS The so-called fullmac devices rely on firmware functionality and/or API to change BSS parameters. Today there are limited drivers supporting the nl80211 primitive, but they only handle a subset of the bss parameters passed if any. The mac80211 driver does handle all parameters and stores their configured values. Some of the BSS parameters were already conditional by wiphy->features. For these the wiphy->bss_param_support and wiphy->features fields are silently aligned in wiphy_register(). Maybe better to issue a warning instead when they are misaligned. Signed-off-by: Arend van Spriel Link: https://patch.msgid.link/20250817190435.1495094-2-arend.vanspriel@broadcom.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 29 +++++++++++++++++++++++++++++ include/uapi/linux/nl80211.h | 4 ++++ 2 files changed, 33 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 7d881aa7e48b..4072a67c9cc9 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2459,6 +2459,29 @@ struct mpath_info { int generation; }; +/** + * enum wiphy_bss_param_flags - bit positions for supported bss parameters. + * + * @WIPHY_BSS_PARAM_CTS_PROT: support changing CTS protection. + * @WIPHY_BSS_PARAM_SHORT_PREAMBLE: support changing short preamble usage. + * @WIPHY_BSS_PARAM_SHORT_SLOT_TIME: support changing short slot time usage. + * @WIPHY_BSS_PARAM_BASIC_RATES: support reconfiguring basic rates. + * @WIPHY_BSS_PARAM_AP_ISOLATE: support changing AP isolation. + * @WIPHY_BSS_PARAM_HT_OPMODE: support changing HT operating mode. + * @WIPHY_BSS_PARAM_P2P_CTWINDOW: support reconfiguring ctwindow. + * @WIPHY_BSS_PARAM_P2P_OPPPS: support changing P2P opportunistic power-save. + */ +enum wiphy_bss_param_flags { + WIPHY_BSS_PARAM_CTS_PROT = BIT(0), + WIPHY_BSS_PARAM_SHORT_PREAMBLE = BIT(1), + WIPHY_BSS_PARAM_SHORT_SLOT_TIME = BIT(2), + WIPHY_BSS_PARAM_BASIC_RATES = BIT(3), + WIPHY_BSS_PARAM_AP_ISOLATE = BIT(4), + WIPHY_BSS_PARAM_HT_OPMODE = BIT(5), + WIPHY_BSS_PARAM_P2P_CTWINDOW = BIT(6), + WIPHY_BSS_PARAM_P2P_OPPPS = BIT(7), +}; + /** * struct bss_parameters - BSS parameters * @@ -5785,6 +5808,11 @@ struct wiphy_radio { * and probe responses. This value should be set if the driver * wishes to limit the number of csa counters. Default (0) means * infinite. + * @bss_param_support: bitmask indicating which bss_parameters as defined in + * &struct bss_parameters the driver can actually handle in the + * .change_bss() callback. The bit positions are defined in &enum + * wiphy_bss_param_flags. + * * @bss_select_support: bitmask indicating the BSS selection criteria supported * by the driver in the .connect() callback. The bit position maps to the * attribute indices defined in &enum nl80211_bss_select_attr. @@ -5970,6 +5998,7 @@ struct wiphy { u8 max_num_csa_counters; + u32 bss_param_support; u32 bss_select_support; u8 nan_supported_bands; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 4f08264bbc8e..6c07100fc01f 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2930,6 +2930,9 @@ enum nl80211_commands { * required alongside this attribute. Refer to * @enum nl80211_s1g_short_beacon_attrs for the attribute definitions. * + * @NL80211_ATTR_BSS_PARAM: nested attribute used with %NL80211_CMD_GET_WIPHY + * which indicates which BSS parameters can be modified. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3491,6 +3494,7 @@ enum nl80211_attrs { NL80211_ATTR_S1G_LONG_BEACON_PERIOD, NL80211_ATTR_S1G_SHORT_BEACON, + NL80211_ATTR_BSS_PARAM, /* add attributes here, update the policy in nl80211.c */ -- cgit v1.2.3 From 4f652a390db4246c5d3c51bf25d03ed0e4178fdc Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Sun, 17 Aug 2025 21:04:34 +0200 Subject: wifi: nl80211: strict checking attributes for NL80211_CMD_SET_BSS Assure user-space only modifies attributes for NL80211_CMD_SET_BSS that are supported by the driver. This stricter checking is only done when user-space commits to it by including NL80211_ATTR_BSS_PARAM. Signed-off-by: Arend van Spriel Link: https://patch.msgid.link/20250817190435.1495094-4-arend.vanspriel@broadcom.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 6c07100fc01f..aed0b4c5d5e8 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2931,7 +2931,10 @@ enum nl80211_commands { * @enum nl80211_s1g_short_beacon_attrs for the attribute definitions. * * @NL80211_ATTR_BSS_PARAM: nested attribute used with %NL80211_CMD_GET_WIPHY - * which indicates which BSS parameters can be modified. + * which indicates which BSS parameters can be modified. The attribute can + * also be used as flag attribute by user-space in %NL80211_CMD_SET_BSS to + * indicate that it wants strict checking on the BSS parameters to be + * modified. * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined -- cgit v1.2.3 From 7d298d25ce81251068bb4ea1d92813ec764a9fec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 26 Aug 2025 08:17:06 +0200 Subject: vdso: Move ENABLE_COMPAT_VDSO from core to arm64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ENABLE_COMAPT_VDSO symbol is only used by arm64 and only for the time-related functionality. There should be no new users, so it doesn't need to be in the generic vDSO code. Move the logic into arm64 architecture-specific code and replace the explicit define by the standard '#ifdef __aarch64__'. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Acked-by: Catalin Marinas Link: https://lore.kernel.org/all/20250826-vdso-cleanups-v1-3-d9b65750e49f@linutronix.de --- include/vdso/datapage.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index 02533038640e..0b1982f15de4 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -196,11 +196,7 @@ enum vdso_pages { * - clock_gettime_fallback(): fallback for clock_gettime. * - clock_getres_fallback(): fallback for clock_getres. */ -#ifdef ENABLE_COMPAT_VDSO -#include -#else #include -#endif /* ENABLE_COMPAT_VDSO */ #else /* !__ASSEMBLY__ */ -- cgit v1.2.3 From 7b338f6d4e3d6baa057e3505592a86f6410d68ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 26 Aug 2025 08:17:12 +0200 Subject: vdso: Drop Kconfig GENERIC_VDSO_DATA_STORE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All users of the generic vDSO library also use the generic vDSO datastore. Remove the now unnecessary Kconfig symbol. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Acked-by: Catalin Marinas Link: https://lore.kernel.org/all/20250826-vdso-cleanups-v1-9-d9b65750e49f@linutronix.de --- include/asm-generic/vdso/vsyscall.h | 4 ---- include/vdso/datapage.h | 5 +---- 2 files changed, 1 insertion(+), 8 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vdso/vsyscall.h b/include/asm-generic/vdso/vsyscall.h index 7fc0b560007d..5c6d9799f4e7 100644 --- a/include/asm-generic/vdso/vsyscall.h +++ b/include/asm-generic/vdso/vsyscall.h @@ -4,8 +4,6 @@ #ifndef __ASSEMBLY__ -#ifdef CONFIG_GENERIC_VDSO_DATA_STORE - #ifndef __arch_get_vdso_u_time_data static __always_inline const struct vdso_time_data *__arch_get_vdso_u_time_data(void) { @@ -20,8 +18,6 @@ static __always_inline const struct vdso_rng_data *__arch_get_vdso_u_rng_data(vo } #endif -#endif /* CONFIG_GENERIC_VDSO_DATA_STORE */ - #ifndef __arch_update_vdso_clock static __always_inline void __arch_update_vdso_clock(struct vdso_clock *vc) { diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index 0b1982f15de4..23c39b96190f 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -31,7 +31,7 @@ struct arch_vdso_time_data {}; #if defined(CONFIG_ARCH_HAS_VDSO_ARCH_DATA) #include -#elif defined(CONFIG_GENERIC_VDSO_DATA_STORE) +#else struct vdso_arch_data { /* Needed for the generic code, never actually used at runtime */ char __unused; @@ -164,7 +164,6 @@ struct vdso_rng_data { * With the hidden visibility, the compiler simply generates a PC-relative * relocation, and this is what we need. */ -#ifdef CONFIG_GENERIC_VDSO_DATA_STORE extern struct vdso_time_data vdso_u_time_data __attribute__((visibility("hidden"))); extern struct vdso_rng_data vdso_u_rng_data __attribute__((visibility("hidden"))); extern struct vdso_arch_data vdso_u_arch_data __attribute__((visibility("hidden"))); @@ -185,8 +184,6 @@ enum vdso_pages { VDSO_NR_PAGES }; -#endif /* CONFIG_GENERIC_VDSO_DATA_STORE */ - /* * The generic vDSO implementation requires that gettimeofday.h * provides: -- cgit v1.2.3 From 0dcfb6fcdd085bbfcdfdcf64a7d4a75c63c108af Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Thu, 4 Sep 2025 08:19:53 +0100 Subject: dt-bindings: clock: renesas,r9a09g077/87: Add Ethernet clock IDs Add clock definitions for Ethernet (ETCLK A-E) to both R9A09G077 and R9A09G087 SoCs. These definitions are required for describing Ethernet devices in DT. Signed-off-by: Lad Prabhakar Acked-by: Conor Dooley Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/20250904071954.3176806-2-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- include/dt-bindings/clock/renesas,r9a09g077-cpg-mssr.h | 5 +++++ include/dt-bindings/clock/renesas,r9a09g087-cpg-mssr.h | 5 +++++ 2 files changed, 10 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/renesas,r9a09g077-cpg-mssr.h b/include/dt-bindings/clock/renesas,r9a09g077-cpg-mssr.h index 0c2ce81a8744..2a805e06487b 100644 --- a/include/dt-bindings/clock/renesas,r9a09g077-cpg-mssr.h +++ b/include/dt-bindings/clock/renesas,r9a09g077-cpg-mssr.h @@ -26,5 +26,10 @@ #define R9A09G077_CLK_PCLKL 14 #define R9A09G077_SDHI_CLKHS 15 #define R9A09G077_USB_CLK 16 +#define R9A09G077_ETCLKA 17 +#define R9A09G077_ETCLKB 18 +#define R9A09G077_ETCLKC 19 +#define R9A09G077_ETCLKD 20 +#define R9A09G077_ETCLKE 21 #endif /* __DT_BINDINGS_CLOCK_RENESAS_R9A09G077_CPG_H__ */ diff --git a/include/dt-bindings/clock/renesas,r9a09g087-cpg-mssr.h b/include/dt-bindings/clock/renesas,r9a09g087-cpg-mssr.h index 70ee883f2386..09da0ad33be6 100644 --- a/include/dt-bindings/clock/renesas,r9a09g087-cpg-mssr.h +++ b/include/dt-bindings/clock/renesas,r9a09g087-cpg-mssr.h @@ -26,5 +26,10 @@ #define R9A09G087_CLK_PCLKL 14 #define R9A09G087_SDHI_CLKHS 15 #define R9A09G087_USB_CLK 16 +#define R9A09G087_ETCLKA 17 +#define R9A09G087_ETCLKB 18 +#define R9A09G087_ETCLKC 19 +#define R9A09G087_ETCLKD 20 +#define R9A09G087_ETCLKE 21 #endif /* __DT_BINDINGS_CLOCK_RENESAS_R9A09G087_CPG_H__ */ -- cgit v1.2.3 From 9428fff44f0c5823fde733b64a344d7a6eda3873 Mon Sep 17 00:00:00 2001 From: "hongyu.chen1" Date: Fri, 22 Aug 2025 13:39:55 +0800 Subject: dt-bindings: power: add Amlogic S6 S7 S7D power domains Add devicetree binding document and related header file for Amlogic S6 S7 S7D secure power domains. Signed-off-by: hongyu.chen1 Signed-off-by: Xianwei Zhao Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20250822-pm-s6-s7-s7d-v1-1-82e3f3aff327@amlogic.com Signed-off-by: Ulf Hansson --- include/dt-bindings/power/amlogic,s6-pwrc.h | 29 ++++++++++++++++++++++++++++ include/dt-bindings/power/amlogic,s7-pwrc.h | 20 +++++++++++++++++++ include/dt-bindings/power/amlogic,s7d-pwrc.h | 27 ++++++++++++++++++++++++++ 3 files changed, 76 insertions(+) create mode 100644 include/dt-bindings/power/amlogic,s6-pwrc.h create mode 100644 include/dt-bindings/power/amlogic,s7-pwrc.h create mode 100644 include/dt-bindings/power/amlogic,s7d-pwrc.h (limited to 'include') diff --git a/include/dt-bindings/power/amlogic,s6-pwrc.h b/include/dt-bindings/power/amlogic,s6-pwrc.h new file mode 100644 index 000000000000..2c005864ae73 --- /dev/null +++ b/include/dt-bindings/power/amlogic,s6-pwrc.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR MIT) */ +/* + * Copyright (C) 2025 Amlogic, Inc. All rights reserved + */ +#ifndef _DT_BINDINGS_AMLOGIC_S6_POWER_H +#define _DT_BINDINGS_AMLOGIC_S6_POWER_H + +#define PWRC_S6_DSPA_ID 0 +#define PWRC_S6_DOS_HEVC_ID 1 +#define PWRC_S6_DOS_VDEC_ID 2 +#define PWRC_S6_VPU_HDMI_ID 3 +#define PWRC_S6_U2DRD_ID 4 +#define PWRC_S6_U3DRD_ID 5 +#define PWRC_S6_SD_EMMC_C_ID 6 +#define PWRC_S6_GE2D_ID 7 +#define PWRC_S6_AMFC_ID 8 +#define PWRC_S6_VC9000E_ID 9 +#define PWRC_S6_DEWARP_ID 10 +#define PWRC_S6_VICP_ID 11 +#define PWRC_S6_SD_EMMC_A_ID 12 +#define PWRC_S6_SD_EMMC_B_ID 13 +#define PWRC_S6_ETH_ID 14 +#define PWRC_S6_PCIE_ID 15 +#define PWRC_S6_NNA_4T_ID 16 +#define PWRC_S6_AUDIO_ID 17 +#define PWRC_S6_AUCPU_ID 18 +#define PWRC_S6_ADAPT_ID 19 + +#endif diff --git a/include/dt-bindings/power/amlogic,s7-pwrc.h b/include/dt-bindings/power/amlogic,s7-pwrc.h new file mode 100644 index 000000000000..3f21d095f784 --- /dev/null +++ b/include/dt-bindings/power/amlogic,s7-pwrc.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR MIT) */ +/* + * Copyright (C) 2025 Amlogic, Inc. All rights reserved + */ +#ifndef _DT_BINDINGS_AMLOGIC_S7_POWER_H +#define _DT_BINDINGS_AMLOGIC_S7_POWER_H + +#define PWRC_S7_DOS_HEVC_ID 0 +#define PWRC_S7_DOS_VDEC_ID 1 +#define PWRC_S7_VPU_HDMI_ID 2 +#define PWRC_S7_USB_COMB_ID 3 +#define PWRC_S7_SD_EMMC_C_ID 4 +#define PWRC_S7_GE2D_ID 5 +#define PWRC_S7_SD_EMMC_A_ID 6 +#define PWRC_S7_SD_EMMC_B_ID 7 +#define PWRC_S7_ETH_ID 8 +#define PWRC_S7_AUCPU_ID 9 +#define PWRC_S7_AUDIO_ID 10 + +#endif diff --git a/include/dt-bindings/power/amlogic,s7d-pwrc.h b/include/dt-bindings/power/amlogic,s7d-pwrc.h new file mode 100644 index 000000000000..c6998553670a --- /dev/null +++ b/include/dt-bindings/power/amlogic,s7d-pwrc.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR MIT) */ +/* + * Copyright (C) 2025 Amlogic, Inc. All rights reserved + */ +#ifndef _DT_BINDINGS_AMLOGIC_S7D_POWER_H +#define _DT_BINDINGS_AMLOGIC_S7D_POWER_H + +#define PWRC_S7D_DOS_HCODEC_ID 0 +#define PWRC_S7D_DOS_HEVC_ID 1 +#define PWRC_S7D_DOS_VDEC_ID 2 +#define PWRC_S7D_VPU_HDMI_ID 3 +#define PWRC_S7D_USB_U2DRD_ID 4 +#define PWRC_S7D_USB_U2H_ID 5 +#define PWRC_S7D_SSD_EMMC_C_ID 6 +#define PWRC_S7D_GE2D_ID 7 +#define PWRC_S7D_AMFC_ID 8 +#define PWRC_S7D_EMMC_A_ID 9 +#define PWRC_S7D_EMMC_B_ID 10 +#define PWRC_S7D_ETH_ID 11 +#define PWRC_S7D_AUCPU_ID 12 +#define PWRC_S7D_AUDIO_ID 13 +#define PWRC_S7D_SRAMA_ID 14 +#define PWRC_S7D_DMC0_ID 15 +#define PWRC_S7D_DMC1_ID 16 +#define PWRC_S7D_DDR_ID 17 + +#endif -- cgit v1.2.3 From ec630c2c8ce215dd365b8c3644f004f645714a0f Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 20 Aug 2025 17:37:17 +0100 Subject: ASoC: SDCA: Reorder members of hide struct to remove holes Remove some padding holes in the sdca_entity_hide struct by reordering the members. Signed-off-by: Charles Keepax Message-ID: <20250820163717.1095846-4-ckeepax@opensource.cirrus.com> Signed-off-by: Mark Brown --- include/sound/sdca_function.h | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/sound/sdca_function.h b/include/sound/sdca_function.h index 06ec126cdcc3..ea68856e4c8c 100644 --- a/include/sound/sdca_function.h +++ b/include/sound/sdca_function.h @@ -1063,27 +1063,30 @@ struct sdca_entity_ge { /** * struct sdca_entity_hide - information specific to HIDE Entities * @hid: HID device structure - * @hidtx_ids: HIDTx Report ID * @num_hidtx_ids: number of HIDTx Report ID - * @hidrx_ids: HIDRx Report ID * @num_hidrx_ids: number of HIDRx Report ID - * @hide_reside_function_num: indicating which Audio Function Numbers within this Device - * @max_delay: the maximum time in microseconds allowed for the Device to change the ownership from Device to Host - * @af_number_list: which Audio Function Numbers within this Device are sending/receiving the messages in this HIDE - * @hid_desc: HID descriptor for the HIDE Entity + * @hidtx_ids: HIDTx Report ID + * @hidrx_ids: HIDRx Report ID + * @af_number_list: which Audio Function Numbers within this Device are + * sending/receiving the messages in this HIDE + * @hide_reside_function_num: indicating which Audio Function Numbers + * within this Device + * @max_delay: the maximum time in microseconds allowed for the Device + * to change the ownership from Device to Host * @hid_report_desc: HID Report Descriptor for the HIDE Entity + * @hid_desc: HID descriptor for the HIDE Entity */ struct sdca_entity_hide { struct hid_device *hid; unsigned int *hidtx_ids; - int num_hidtx_ids; unsigned int *hidrx_ids; + int num_hidtx_ids; int num_hidrx_ids; + unsigned int af_number_list[SDCA_MAX_FUNCTION_COUNT]; unsigned int hide_reside_function_num; unsigned int max_delay; - unsigned int af_number_list[SDCA_MAX_FUNCTION_COUNT]; - struct hid_descriptor hid_desc; unsigned char *hid_report_desc; + struct hid_descriptor hid_desc; }; /** -- cgit v1.2.3 From 4d32c1f66a768dd281edadbd244dce1c63c0899d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Barnab=C3=A1s=20Cz=C3=A9m=C3=A1n?= Date: Wed, 3 Sep 2025 23:08:21 +0200 Subject: dt-bindings: clock: qcom: Add MSM8937 Global Clock Controller MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add device tree bindings for the global clock controller on Qualcomm MSM8937 platform. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Barnabás Czémán Link: https://lore.kernel.org/r/20250903-msm8937-v9-1-a097c91c5801@mainlining.org Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,gcc-msm8917.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,gcc-msm8917.h b/include/dt-bindings/clock/qcom,gcc-msm8917.h index 4b421e7414b5..4e3897b3669d 100644 --- a/include/dt-bindings/clock/qcom,gcc-msm8917.h +++ b/include/dt-bindings/clock/qcom,gcc-msm8917.h @@ -170,6 +170,23 @@ #define VFE1_CLK_SRC 163 #define VSYNC_CLK_SRC 164 #define GPLL0_SLEEP_CLK_SRC 165 +/* Addtional MSM8937-specific clocks */ +#define MSM8937_BLSP1_QUP1_I2C_APPS_CLK_SRC 166 +#define MSM8937_BLSP1_QUP1_SPI_APPS_CLK_SRC 167 +#define MSM8937_BLSP2_QUP4_I2C_APPS_CLK_SRC 168 +#define MSM8937_BLSP2_QUP4_SPI_APPS_CLK_SRC 169 +#define MSM8937_BYTE1_CLK_SRC 170 +#define MSM8937_ESC1_CLK_SRC 171 +#define MSM8937_PCLK1_CLK_SRC 172 +#define MSM8937_GCC_BLSP1_QUP1_I2C_APPS_CLK 173 +#define MSM8937_GCC_BLSP1_QUP1_SPI_APPS_CLK 174 +#define MSM8937_GCC_BLSP2_QUP4_I2C_APPS_CLK 175 +#define MSM8937_GCC_BLSP2_QUP4_SPI_APPS_CLK 176 +#define MSM8937_GCC_MDSS_BYTE1_CLK 177 +#define MSM8937_GCC_MDSS_ESC1_CLK 178 +#define MSM8937_GCC_MDSS_PCLK1_CLK 179 +#define MSM8937_GCC_OXILI_AON_CLK 180 +#define MSM8937_GCC_OXILI_TIMER_CLK 181 /* GCC block resets */ #define GCC_CAMSS_MICRO_BCR 0 @@ -187,5 +204,7 @@ #define VENUS_GDSC 5 #define VFE0_GDSC 6 #define VFE1_GDSC 7 +/* Additional MSM8937-specific GDSCs */ +#define MSM8937_OXILI_CX_GDSC 8 #endif -- cgit v1.2.3 From eb9bc162775cabfc4cf2b37cb0d3c2c2bf4c4b54 Mon Sep 17 00:00:00 2001 From: Denzeel Oliva Date: Sun, 31 Aug 2025 12:13:14 +0000 Subject: dt-bindings: clock: exynos990: Add LHS_ACEL clock ID for HSI0 block Add the missing LHS_ACEL clock ID for the HSI0 block. This clock is required for proper USB operation, as without it, USB connections fail with errors like device descriptor read timeouts and address response issues. Signed-off-by: Denzeel Oliva Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250831-usb-v2-1-00b9c0559733@gmail.com Signed-off-by: Krzysztof Kozlowski --- include/dt-bindings/clock/samsung,exynos990.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/clock/samsung,exynos990.h b/include/dt-bindings/clock/samsung,exynos990.h index c5c79e078f2f..c60f15503d5b 100644 --- a/include/dt-bindings/clock/samsung,exynos990.h +++ b/include/dt-bindings/clock/samsung,exynos990.h @@ -236,6 +236,7 @@ #define CLK_GOUT_HSI0_VGEN_LITE_HSI0_CLK 20 #define CLK_GOUT_HSI0_CMU_HSI0_PCLK 21 #define CLK_GOUT_HSI0_XIU_D_HSI0_ACLK 22 +#define CLK_GOUT_HSI0_LHS_ACEL_D_HSI0_CLK 23 /* CMU_PERIS */ #define CLK_MOUT_PERIS_BUS_USER 1 -- cgit v1.2.3 From c8ab5e888bb6721e6e084881e6e24ef2678832c3 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Mon, 1 Sep 2025 09:44:52 +0200 Subject: PCI/AER: Print TLP Log for errors introduced since PCIe r1.1 When reporting an error, the AER driver prints the TLP Header / Prefix Log only for errors enumerated in the AER_LOG_TLP_MASKS macro. The macro was never amended since its introduction in 2006 with commit 6c2b374d7485 ("PCI-Express AER implemetation: AER core and aerdriver"). At the time, PCIe r1.1 was the latest spec revision. Amend the macro with errors defined since then to avoid omitting the TLP Header / Prefix Log for newer errors. The order of the errors in AER_LOG_TLP_MASKS follows PCIe r1.1 sec 6.2.7 rather than 7.10.2, because only the former documents for which errors a TLP Header / Prefix is logged. Retain this order. The section number is still 6.2.7 in today's PCIe r7.0. For Completion Timeouts, the TLP Header / Prefix is only logged if the Completion Timeout Prefix / Header Log Capable bit is set in the AER Capabilities and Control register. Introduce a tlp_header_logged() helper to check whether the TLP Header / Prefix Log is populated and use it in the two places which currently match against AER_LOG_TLP_MASKS directly. For Uncorrectable Internal Errors, logging of the TLP Header / Prefix is optional per PCIe r7.0 sec 6.2.7. If needed, drivers could indicate through a flag whether devices are capable and tlp_header_logged() could then check that flag. pcitools introduced macros for newer errors with commit 144b0911cc0b ("ls-ecaps: extend decode support for more fields for AER CE and UE status"): https://git.kernel.org/pub/scm/utils/pciutils/pciutils.git/commit/?id=144b0911cc0b Unfortunately some of those macros are overly long: PCI_ERR_UNC_POISONED_TLP_EGRESS PCI_ERR_UNC_DMWR_REQ_EGRESS_BLOCKED PCI_ERR_UNC_IDE_CHECK PCI_ERR_UNC_MISR_IDE_TLP PCI_ERR_UNC_PCRC_CHECK PCI_ERR_UNC_TLP_XLAT_EGRESS_BLOCKED This seems unsuitable for , so shorten to: PCI_ERR_UNC_POISON_BLK PCI_ERR_UNC_DMWR_BLK PCI_ERR_UNC_IDE_CHECK PCI_ERR_UNC_MISR_IDE PCI_ERR_UNC_PCRC_CHECK PCI_ERR_UNC_XLAT_BLK Note that some of the existing macros in do not match exactly with pcitools (e.g. PCI_ERR_UNC_SDES versus PCI_ERR_UNC_SURPDN), so it does not seem mandatory for them to be identical. Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/5f707caf1260bd8f15012bb032f7da9a9b898aba.1756712066.git.lukas@wunner.de --- include/uapi/linux/pci_regs.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index f5b17745de60..ae1f52e8d515 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -776,6 +776,12 @@ #define PCI_ERR_UNC_MCBTLP 0x00800000 /* MC blocked TLP */ #define PCI_ERR_UNC_ATOMEG 0x01000000 /* Atomic egress blocked */ #define PCI_ERR_UNC_TLPPRE 0x02000000 /* TLP prefix blocked */ +#define PCI_ERR_UNC_POISON_BLK 0x04000000 /* Poisoned TLP Egress Blocked */ +#define PCI_ERR_UNC_DMWR_BLK 0x08000000 /* DMWr Request Egress Blocked */ +#define PCI_ERR_UNC_IDE_CHECK 0x10000000 /* IDE Check Failed */ +#define PCI_ERR_UNC_MISR_IDE 0x20000000 /* Misrouted IDE TLP */ +#define PCI_ERR_UNC_PCRC_CHECK 0x40000000 /* PCRC Check Failed */ +#define PCI_ERR_UNC_XLAT_BLK 0x80000000 /* TLP Translation Egress Blocked */ #define PCI_ERR_UNCOR_MASK 0x08 /* Uncorrectable Error Mask */ /* Same bits as above */ #define PCI_ERR_UNCOR_SEVER 0x0c /* Uncorrectable Error Severity */ @@ -798,6 +804,7 @@ #define PCI_ERR_CAP_ECRC_CHKC 0x00000080 /* ECRC Check Capable */ #define PCI_ERR_CAP_ECRC_CHKE 0x00000100 /* ECRC Check Enable */ #define PCI_ERR_CAP_PREFIX_LOG_PRESENT 0x00000800 /* TLP Prefix Log Present */ +#define PCI_ERR_CAP_COMP_TIME_LOG 0x00001000 /* Completion Timeout Prefix/Header Log Capable */ #define PCI_ERR_CAP_TLP_LOG_FLIT 0x00040000 /* TLP was logged in Flit Mode */ #define PCI_ERR_CAP_TLP_LOG_SIZE 0x00f80000 /* Logged TLP Size (only in Flit mode) */ #define PCI_ERR_HEADER_LOG 0x1c /* Header Log Register (16 bytes) */ -- cgit v1.2.3 From 2f509fe6a42cda845890273fe759fb7ba9edad97 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Tue, 2 Sep 2025 22:34:02 -0700 Subject: accel/amdxdna: Add ioctl DRM_IOCTL_AMDXDNA_GET_ARRAY Add interface for applications to get information array. The application provides a buffer pointer along with information type, maximum number of entries and maximum size of each entry. The buffer may also contain match conditions based on the information type. After the ioctl completes, the actual number of entries and entry size are returned. (see [1], used by driver runtime library) [1] https://github.com/amd/xdna-driver/blob/main/src/shim/host/platform_host.cpp#L337 Reviewed-by: Mario Limonciello (AMD) Reviewed-by: Maciej Falkowski Signed-off-by: Lizhi Hou Link: https://lore.kernel.org/r/20250903053402.2103196-1-lizhi.hou@amd.com --- include/uapi/drm/amdxdna_accel.h | 111 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h index ce523e9ccc52..a1fb9785db77 100644 --- a/include/uapi/drm/amdxdna_accel.h +++ b/include/uapi/drm/amdxdna_accel.h @@ -34,6 +34,7 @@ enum amdxdna_drm_ioctl_id { DRM_AMDXDNA_EXEC_CMD, DRM_AMDXDNA_GET_INFO, DRM_AMDXDNA_SET_STATE, + DRM_AMDXDNA_GET_ARRAY = 10, }; /** @@ -455,6 +456,112 @@ struct amdxdna_drm_get_info { __u64 buffer; /* in/out */ }; +#define AMDXDNA_HWCTX_STATE_IDLE 0 +#define AMDXDNA_HWCTX_STATE_ACTIVE 1 + +/** + * struct amdxdna_drm_hwctx_entry - The hardware context array entry + */ +struct amdxdna_drm_hwctx_entry { + /** @context_id: Context ID. */ + __u32 context_id; + /** @start_col: Start AIE array column assigned to context. */ + __u32 start_col; + /** @num_col: Number of AIE array columns assigned to context. */ + __u32 num_col; + /** @hwctx_id: The real hardware context id. */ + __u32 hwctx_id; + /** @pid: ID of process which created this context. */ + __s64 pid; + /** @command_submissions: Number of commands submitted. */ + __u64 command_submissions; + /** @command_completions: Number of commands completed. */ + __u64 command_completions; + /** @migrations: Number of times been migrated. */ + __u64 migrations; + /** @preemptions: Number of times been preempted. */ + __u64 preemptions; + /** @errors: Number of errors happened. */ + __u64 errors; + /** @priority: Context priority. */ + __u64 priority; + /** @heap_usage: Usage of device heap buffer. */ + __u64 heap_usage; + /** @suspensions: Number of times been suspended. */ + __u64 suspensions; + /** + * @state: Context state. + * %AMDXDNA_HWCTX_STATE_IDLE + * %AMDXDNA_HWCTX_STATE_ACTIVE + */ + __u32 state; + /** @pasid: PASID been bound. */ + __u32 pasid; + /** @gops: Giga operations per second. */ + __u32 gops; + /** @fps: Frames per second. */ + __u32 fps; + /** @dma_bandwidth: DMA bandwidth. */ + __u32 dma_bandwidth; + /** @latency: Frame response latency. */ + __u32 latency; + /** @frame_exec_time: Frame execution time. */ + __u32 frame_exec_time; + /** @txn_op_idx: Index of last control code executed. */ + __u32 txn_op_idx; + /** @ctx_pc: Program counter. */ + __u32 ctx_pc; + /** @fatal_error_type: Fatal error type if context crashes. */ + __u32 fatal_error_type; + /** @fatal_error_exception_type: Firmware exception type. */ + __u32 fatal_error_exception_type; + /** @fatal_error_exception_pc: Firmware exception program counter. */ + __u32 fatal_error_exception_pc; + /** @fatal_error_app_module: Exception module name. */ + __u32 fatal_error_app_module; + /** @pad: Structure pad. */ + __u32 pad; +}; + +#define DRM_AMDXDNA_HW_CONTEXT_ALL 0 + +/** + * struct amdxdna_drm_get_array - Get information array. + */ +struct amdxdna_drm_get_array { + /** + * @param: + * + * Supported params: + * + * %DRM_AMDXDNA_HW_CONTEXT_ALL: + * Returns all created hardware contexts. + */ + __u32 param; + /** + * @element_size: + * + * Specifies maximum element size and returns the actual element size. + */ + __u32 element_size; + /** + * @num_element: + * + * Specifies maximum number of elements and returns the actual number + * of elements. + */ + __u32 num_element; /* in/out */ + /** @pad: MBZ */ + __u32 pad; + /** + * @buffer: + * + * Specifies the match conditions and returns the matched information + * array. + */ + __u64 buffer; +}; + enum amdxdna_drm_set_param { DRM_AMDXDNA_SET_POWER_MODE, DRM_AMDXDNA_WRITE_AIE_MEM, @@ -519,6 +626,10 @@ struct amdxdna_drm_set_power_mode { DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_SET_STATE, \ struct amdxdna_drm_set_state) +#define DRM_IOCTL_AMDXDNA_GET_ARRAY \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_GET_ARRAY, \ + struct amdxdna_drm_get_array) + #if defined(__cplusplus) } /* extern c end */ #endif -- cgit v1.2.3 From d8b269e009bbc471cb2735b5f737839495efce3b Mon Sep 17 00:00:00 2001 From: Chuyi Zhou Date: Thu, 4 Sep 2025 15:45:05 +0800 Subject: cgroup: Remove unused cgroup_subsys::post_attach cgroup_subsys::post_attach callback was introduced in commit 5cf1cacb49ae ("cgroup, cpuset: replace cpuset_post_attach_flush() with cgroup_subsys->post_attach callback") and only cpuset would use this callback to wait for the mm migration to complete at the end of __cgroup_procs_write(). Since the previous patch defer the flush operation until returning to userspace, no one use this callback now. Remove this callback from cgroup_subsys. Signed-off-by: Chuyi Zhou Acked-by: Waiman Long Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 539c64eeef38..92ed6d18266d 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -763,7 +763,6 @@ struct cgroup_subsys { int (*can_attach)(struct cgroup_taskset *tset); void (*cancel_attach)(struct cgroup_taskset *tset); void (*attach)(struct cgroup_taskset *tset); - void (*post_attach)(void); int (*can_fork)(struct task_struct *task, struct css_set *cset); void (*cancel_fork)(struct task_struct *task, struct css_set *cset); -- cgit v1.2.3 From b28f9eba12a4967eff6e8a1c0512f86f1ac7fa68 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 28 Jun 2025 11:37:30 -0400 Subject: change the calling conventions for vfs_parse_fs_string() Absolute majority of callers are passing the 4th argument equal to strlen() of the 3rd one. Drop the v_size argument, add vfs_parse_fs_qstr() for the cases that want independent length. Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- include/linux/fs_context.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index 7773eb870039..97b514a79a49 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -134,8 +134,13 @@ extern struct fs_context *fs_context_for_submount(struct file_system_type *fs_ty extern struct fs_context *vfs_dup_fs_context(struct fs_context *fc); extern int vfs_parse_fs_param(struct fs_context *fc, struct fs_parameter *param); -extern int vfs_parse_fs_string(struct fs_context *fc, const char *key, - const char *value, size_t v_size); +extern int vfs_parse_fs_qstr(struct fs_context *fc, const char *key, + const struct qstr *value); +static inline int vfs_parse_fs_string(struct fs_context *fc, const char *key, + const char *value) +{ + return vfs_parse_fs_qstr(fc, key, value ? &QSTR(value) : NULL); +} int vfs_parse_monolithic_sep(struct fs_context *fc, void *data, char *(*sep)(char **)); extern int generic_parse_monolithic(struct fs_context *fc, void *data); -- cgit v1.2.3 From 038c7dc66e2744e5df57163b8f957745ae10d23e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 3 Sep 2025 20:46:40 -0700 Subject: compiler_types.h: Move __nocfi out of compiler-specific header Prepare for GCC KCFI support and move the __nocfi attribute from compiler-clang.h to compiler_types.h. This was already gated by CONFIG_CFI_CLANG, so this remains safe for non-KCFI GCC builds. Signed-off-by: Kees Cook Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Nathan Chancellor Link: https://lore.kernel.org/r/20250904034656.3670313-1-kees@kernel.org --- include/linux/compiler-clang.h | 5 ----- include/linux/compiler_types.h | 4 +++- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index fa4ffe037bc7..7a4568e421dc 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -96,11 +96,6 @@ # define __noscs __attribute__((__no_sanitize__("shadow-call-stack"))) #endif -#if __has_feature(kcfi) -/* Disable CFI checking inside a function. */ -#define __nocfi __attribute__((__no_sanitize__("kcfi"))) -#endif - /* * Turn individual warnings and errors on and off locally, depending * on version. diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 16755431fc11..a910f9fa5341 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -432,7 +432,9 @@ struct ftrace_likely_data { # define __noscs #endif -#ifndef __nocfi +#if defined(CONFIG_CFI_CLANG) +# define __nocfi __attribute__((__no_sanitize__("kcfi"))) +#else # define __nocfi #endif -- cgit v1.2.3 From 0b815825b1b0bd6762ca028e9b6631b002efb7ca Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 3 Sep 2025 20:46:45 -0700 Subject: x86/cfi: Remove __noinitretpoline and __noretpoline Commit 66f793099a63 ("x86/retpoline: Avoid retpolines for built-in __init functions") disabled retpolines in __init sections (__noinitretpoline) as a precaution against potential issues with retpolines in early boot, but it has not been a problem in practice (i.e. see Clang below). Commit 87358710c1fb ("x86/retpoline: Support retpoline builds with Clang") narrowed this to only GCC, as Clang doesn't have per-function control over retpoline emission. As such, Clang has been booting with retpolines in __init since retpoline support was introduced. Clang KCFI has been instrumenting __init since CFI was introduced. With the introduction of KCFI for GCC, KCFI instrumentation with retpolines disabled means that objtool does not construct .retpoline_sites section entries for the non-retpoline KCFI calls. At boot, the KCFI rehashing code, via __apply_fineibt(), misses all __init KCFI calls (since they are not retpolines), resulting in immediate hash mismatches: all preambles are rehashed (via .cfi_sites) and none of the __init call sites are rehashed. Remove __noinitretpoline since it provides no meaningful utility and creates problems with CFI. Additionally remove __noretpoline since it is now unused. Alternatively, cfi_rand_callers() could walk the .kcfi_traps section which is exactly the list of KCFI instrumentation sites. But it seems better to have as few differences in common instruction sequences between compilers as possible, so better to remove the special handling of retpolines in __init for GCC. Signed-off-by: Kees Cook Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250904034656.3670313-6-kees@kernel.org --- include/linux/compiler-gcc.h | 4 ---- include/linux/init.h | 8 -------- 2 files changed, 12 deletions(-) (limited to 'include') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 5d07c469b571..5de824a0b3d7 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -35,10 +35,6 @@ (typeof(ptr)) (__ptr + (off)); \ }) -#ifdef CONFIG_MITIGATION_RETPOLINE -#define __noretpoline __attribute__((__indirect_branch__("keep"))) -#endif - #if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__) #define __latent_entropy __attribute__((latent_entropy)) #endif diff --git a/include/linux/init.h b/include/linux/init.h index a60d32d227ee..17c1bc712e23 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -7,13 +7,6 @@ #include #include -/* Built-in __init functions needn't be compiled with retpoline */ -#if defined(__noretpoline) && !defined(MODULE) -#define __noinitretpoline __noretpoline -#else -#define __noinitretpoline -#endif - /* These macros are used to mark some functions or * initialized data (doesn't apply to uninitialized data) * as `initialization' functions. The kernel can take this @@ -50,7 +43,6 @@ /* These are for everybody (although not all archs will actually discard it in modules) */ #define __init __section(".init.text") __cold __latent_entropy \ - __noinitretpoline \ __no_kstack_erase #define __initdata __section(".init.data") #define __initconst __section(".init.rodata") -- cgit v1.2.3 From 54728bd535fb3899ad51489dc1e05eb5bb53cb95 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Mon, 1 Sep 2025 15:27:43 +0200 Subject: bpf: Return an error pointer for skb metadata when CONFIG_NET=n Kernel Test Robot reported a compiler warning - a null pointer may be passed to memmove in __bpf_dynptr_{read,write} when building without networking support. The warning is correct from a static analysis standpoint, but not actually reachable. Without CONFIG_NET, creating dynptrs to skb metadata is impossible since the constructor kfunc is missing. Silence the false-postive diagnostic message by returning an error pointer from bpf_skb_meta_pointer stub when CONFIG_NET=n. Fixes: 6877cd392bae ("bpf: Enable read/write access to skb metadata through a dynptr") Closes: https://lore.kernel.org/oe-kbuild-all/202508212031.ir9b3B6Q-lkp@intel.com/ Reported-by: kernel test robot Suggested-by: Alexei Starovoitov Signed-off-by: Jakub Sitnicki Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20250901-dynptr-skb-meta-no-net-v2-1-ce607fcb6091@cloudflare.com --- include/linux/filter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 9ed21b65e2e9..af6d9354662c 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1822,7 +1822,7 @@ static inline void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, voi static inline void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset) { - return NULL; + return ERR_PTR(-EOPNOTSUPP); } #endif /* CONFIG_NET */ -- cgit v1.2.3 From 0a0fdb98d16e334e259352893462030f15fb887f Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 26 Aug 2025 17:08:19 +0200 Subject: fuse: remove FUSE_NOTIFY_CODE_MAX from Constants that change value from version to version have no place in an interface definition. Hopefully this won't break anything. Reviewed-by: Joanne Koong Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 6b9fb8b08768..30bf0846547f 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -680,7 +680,6 @@ enum fuse_notify_code { FUSE_NOTIFY_DELETE = 6, FUSE_NOTIFY_RESEND = 7, FUSE_NOTIFY_INC_EPOCH = 8, - FUSE_NOTIFY_CODE_MAX, }; /* The read buffer is required to be at least 8k, but may be much larger */ -- cgit v1.2.3 From 3f29d59e92a96d843c2ff10ebfed92ac26878658 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 2 Sep 2025 10:22:06 +0200 Subject: fuse: add prune notification Some fuse servers need to prune their caches, which can only be done if the kernel's own dentry/inode caches are pruned first to avoid dangling references. Add FUSE_NOTIFY_PRUNE, which takes an array of node ID's to try and get rid of. Inodes with active references are skipped. A similar functionality is already provided by FUSE_NOTIFY_INVAL_ENTRY with the FUSE_EXPIRE_ONLY flag. Differences in the interface are FUSE_NOTIFY_INVAL_ENTRY: - can only prune one dentry - dentry is determined by parent ID and name - if inode has multiple aliases (cached hard links), then they would have to be invalidated individually to be able to get rid of the inode FUSE_NOTIFY_PRUNE: - can prune multiple inodes - inodes determined by their node ID - aliases are taken care of automatically Reviewed-by: Joanne Koong Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 30bf0846547f..c13e1f9a2f12 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -239,6 +239,7 @@ * 7.45 * - add FUSE_COPY_FILE_RANGE_64 * - add struct fuse_copy_file_range_out + * - add FUSE_NOTIFY_PRUNE */ #ifndef _LINUX_FUSE_H @@ -680,6 +681,7 @@ enum fuse_notify_code { FUSE_NOTIFY_DELETE = 6, FUSE_NOTIFY_RESEND = 7, FUSE_NOTIFY_INC_EPOCH = 8, + FUSE_NOTIFY_PRUNE = 9, }; /* The read buffer is required to be at least 8k, but may be much larger */ @@ -1118,6 +1120,12 @@ struct fuse_notify_retrieve_in { uint64_t dummy4; }; +struct fuse_notify_prune_out { + uint32_t count; + uint32_t padding; + uint64_t spare; +}; + struct fuse_backing_map { int32_t fd; uint32_t flags; -- cgit v1.2.3 From f70da6f99d4f40c5f481c92e3b65d5e36eaa6dc9 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 28 Aug 2025 15:24:34 +0100 Subject: drm/gpusvm: pull out drm_gpusvm_pages substructure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pull the pages stuff from the svm range into its own substructure, with the idea of having the main pages related routines, like get_pages(), unmap_pages() and free_pages() all operating on some lower level structures, which can then be re-used for stuff like userptr. v2: - Move seq into pages struct (Matt B) v3: - Small kernel-doc fixes Suggested-by: Matthew Brost Signed-off-by: Matthew Auld Cc: Thomas Hellström Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250828142430.615826-13-matthew.auld@intel.com --- include/drm/drm_gpusvm.h | 48 +++++++++++++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/drm/drm_gpusvm.h b/include/drm/drm_gpusvm.h index 0e336148309d..1ee4188c3067 100644 --- a/include/drm/drm_gpusvm.h +++ b/include/drm/drm_gpusvm.h @@ -106,16 +106,16 @@ struct drm_gpusvm_notifier { }; /** - * struct drm_gpusvm_range_flags - Structure representing a GPU SVM range flags + * struct drm_gpusvm_pages_flags - Structure representing a GPU SVM pages flags * - * @migrate_devmem: Flag indicating whether the range can be migrated to device memory - * @unmapped: Flag indicating if the range has been unmapped - * @partial_unmap: Flag indicating if the range has been partially unmapped - * @has_devmem_pages: Flag indicating if the range has devmem pages - * @has_dma_mapping: Flag indicating if the range has a DMA mapping - * @__flags: Flags for range in u16 form (used for READ_ONCE) + * @migrate_devmem: Flag indicating whether the pages can be migrated to device memory + * @unmapped: Flag indicating if the pages has been unmapped + * @partial_unmap: Flag indicating if the pages has been partially unmapped + * @has_devmem_pages: Flag indicating if the pages has devmem pages + * @has_dma_mapping: Flag indicating if the pages has a DMA mapping + * @__flags: Flags for pages in u16 form (used for READ_ONCE) */ -struct drm_gpusvm_range_flags { +struct drm_gpusvm_pages_flags { union { struct { /* All flags below must be set upon creation */ @@ -130,6 +130,27 @@ struct drm_gpusvm_range_flags { }; }; +/** + * struct drm_gpusvm_pages - Structure representing a GPU SVM mapped pages + * + * @dma_addr: Device address array + * @dpagemap: The struct drm_pagemap of the device pages we're dma-mapping. + * Note this is assuming only one drm_pagemap per range is allowed. + * @notifier_seq: Notifier sequence number of the range's pages + * @flags: Flags for range + * @flags.migrate_devmem: Flag indicating whether the range can be migrated to device memory + * @flags.unmapped: Flag indicating if the range has been unmapped + * @flags.partial_unmap: Flag indicating if the range has been partially unmapped + * @flags.has_devmem_pages: Flag indicating if the range has devmem pages + * @flags.has_dma_mapping: Flag indicating if the range has a DMA mapping + */ +struct drm_gpusvm_pages { + struct drm_pagemap_addr *dma_addr; + struct drm_pagemap *dpagemap; + unsigned long notifier_seq; + struct drm_gpusvm_pages_flags flags; +}; + /** * struct drm_gpusvm_range - Structure representing a GPU SVM range * @@ -138,11 +159,7 @@ struct drm_gpusvm_range_flags { * @refcount: Reference count for the range * @itree: Interval tree node for the range (inserted in GPU SVM notifier) * @entry: List entry to fast interval tree traversal - * @notifier_seq: Notifier sequence number of the range's pages - * @dma_addr: Device address array - * @dpagemap: The struct drm_pagemap of the device pages we're dma-mapping. - * Note this is assuming only one drm_pagemap per range is allowed. - * @flags: Flags for range + * @pages: The pages for this range. * * This structure represents a GPU SVM range used for tracking memory ranges * mapped in a DRM device. @@ -153,10 +170,7 @@ struct drm_gpusvm_range { struct kref refcount; struct interval_tree_node itree; struct list_head entry; - unsigned long notifier_seq; - struct drm_pagemap_addr *dma_addr; - struct drm_pagemap *dpagemap; - struct drm_gpusvm_range_flags flags; + struct drm_gpusvm_pages pages; }; /** -- cgit v1.2.3 From 83f706ecbde1dfdc377bafda773fdc57644cd479 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 28 Aug 2025 15:24:36 +0100 Subject: drm/gpusvm: export drm_gpusvm_pages API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Export get/unmap/free pages API. We also need to tweak the SVM init to allow skipping much of the unneeded parts. Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250828142430.615826-15-matthew.auld@intel.com --- include/drm/drm_gpusvm.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/drm/drm_gpusvm.h b/include/drm/drm_gpusvm.h index 1ee4188c3067..5434048a2ca4 100644 --- a/include/drm/drm_gpusvm.h +++ b/include/drm/drm_gpusvm.h @@ -307,6 +307,22 @@ drm_gpusvm_range_find(struct drm_gpusvm_notifier *notifier, unsigned long start, void drm_gpusvm_range_set_unmapped(struct drm_gpusvm_range *range, const struct mmu_notifier_range *mmu_range); +int drm_gpusvm_get_pages(struct drm_gpusvm *gpusvm, + struct drm_gpusvm_pages *svm_pages, + struct mm_struct *mm, + struct mmu_interval_notifier *notifier, + unsigned long pages_start, unsigned long pages_end, + const struct drm_gpusvm_ctx *ctx); + +void drm_gpusvm_unmap_pages(struct drm_gpusvm *gpusvm, + struct drm_gpusvm_pages *svm_pages, + unsigned long npages, + const struct drm_gpusvm_ctx *ctx); + +void drm_gpusvm_free_pages(struct drm_gpusvm *gpusvm, + struct drm_gpusvm_pages *svm_pages, + unsigned long npages); + #ifdef CONFIG_LOCKDEP /** * drm_gpusvm_driver_set_lock() - Set the lock protecting accesses to GPU SVM -- cgit v1.2.3 From 74a0e72f03ffd01b5d88b411f02d9b9861fdb99e Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Thu, 21 Aug 2025 12:15:59 +0200 Subject: iommu/io-pgtable-dart: Add 4-level page table support DARTs on t602x SoCs are of the t8110 variant but have an IAS of 42, which means optional support for an extra page table level. Refactor the PTE management to support an arbitrary level count, and then calculate how many levels we need for any given configuration. Signed-off-by: Hector Martin Signed-off-by: Janne Grunau Reviewed-by: Sven Peter Reviewed-by: Neal Gompa Link: https://lore.kernel.org/r/20250821-apple-dart-4levels-v2-2-e39af79daa37@jannau.net Signed-off-by: Joerg Roedel --- include/linux/io-pgtable.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 138fbd89b1e6..8a823c6f2b4a 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -180,6 +180,7 @@ struct io_pgtable_cfg { struct { u64 ttbr[4]; u32 n_ttbrs; + u32 n_levels; } apple_dart_cfg; struct { -- cgit v1.2.3 From 8f77295525825086cb43675cd1a4f3716b119d7f Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Mon, 18 Aug 2025 10:28:05 +0530 Subject: ACPI: RISC-V: Add support for RIMT RISC-V IO Mapping Table (RIMT) is a static ACPI table to communicate IOMMU information to the OS. The spec is available at [1]. The changes at high level are, a) Initialize data structures required for IOMMU/device configuration using the data from RIMT. Provide APIs required for device configuration. b) Provide an API for IOMMU drivers to register the fwnode with RIMT data structures. This API will create a fwnode for PCIe IOMMU. [1] - https://github.com/riscv-non-isa/riscv-acpi-rimt Signed-off-by: Sunil V L Reviewed-by: Anup Patel Link: https://lore.kernel.org/r/20250818045807.763922-2-sunilvl@ventanamicro.com Signed-off-by: Joerg Roedel --- include/linux/acpi_rimt.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 include/linux/acpi_rimt.h (limited to 'include') diff --git a/include/linux/acpi_rimt.h b/include/linux/acpi_rimt.h new file mode 100644 index 000000000000..fad3adc4d899 --- /dev/null +++ b/include/linux/acpi_rimt.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2024-2025, Ventana Micro Systems Inc. + * Author: Sunil V L + */ + +#ifndef _ACPI_RIMT_H +#define _ACPI_RIMT_H + +#ifdef CONFIG_ACPI_RIMT +int rimt_iommu_register(struct device *dev); +#else +static inline int rimt_iommu_register(struct device *dev) +{ + return -ENODEV; +} +#endif + +#if defined(CONFIG_IOMMU_API) && defined(CONFIG_ACPI_RIMT) +int rimt_iommu_configure_id(struct device *dev, const u32 *id_in); +#else +static inline int rimt_iommu_configure_id(struct device *dev, const u32 *id_in) +{ + return -ENODEV; +} +#endif + +#endif /* _ACPI_RIMT_H */ -- cgit v1.2.3 From c593b9d6c446510684da400833f9d632651942f0 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 3 Sep 2025 11:23:33 -0400 Subject: filelock: add FL_RECLAIM to show_fl_flags() macro Show the FL_RECLAIM flag symbolically in tracepoints. Fixes: bb0a55bb7148 ("nfs: don't allow reexport reclaims") Signed-off-by: Jeff Layton Link: https://lore.kernel.org/20250903-filelock-v1-1-f2926902962d@kernel.org Signed-off-by: Christian Brauner --- include/trace/events/filelock.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/filelock.h b/include/trace/events/filelock.h index b8d1e00a7982..2dfeb158e848 100644 --- a/include/trace/events/filelock.h +++ b/include/trace/events/filelock.h @@ -27,7 +27,8 @@ { FL_SLEEP, "FL_SLEEP" }, \ { FL_DOWNGRADE_PENDING, "FL_DOWNGRADE_PENDING" }, \ { FL_UNLOCK_PENDING, "FL_UNLOCK_PENDING" }, \ - { FL_OFDLCK, "FL_OFDLCK" }) + { FL_OFDLCK, "FL_OFDLCK" }, \ + { FL_RECLAIM, "FL_RECLAIM"}) #define show_fl_type(val) \ __print_symbolic(val, \ -- cgit v1.2.3 From f6cfa602d2ba7e5ca9dc65ec4141521aca80bda2 Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Fri, 5 Sep 2025 11:13:23 +0200 Subject: workqueue: replace use of system_unbound_wq with system_dfl_wq Currently if a user enqueue a work item using schedule_delayed_work() the used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to schedule_work() that is using system_wq and queue_work(), that makes use again of WORK_CPU_UNBOUND. This lack of consistentcy cannot be addressed without refactoring the API. system_unbound_wq should be the default workqueue so as not to enforce locality constraints for random work whenever it's not required. Adding system_dfl_wq to encourage its use when unbound work should be used. queue_work() / queue_delayed_work() / mod_delayed_work() will now use the new unbound wq: whether the user still use the old wq a warn will be printed along with a wq redirect to the new one. The old system_unbound_wq will be kept for a few release cycles. Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 45d5dd470ff6..af860e8f8481 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -783,8 +783,8 @@ extern void __warn_flushing_systemwide_wq(void) _wq == system_highpri_wq) || \ (__builtin_constant_p(_wq == system_long_wq) && \ _wq == system_long_wq) || \ - (__builtin_constant_p(_wq == system_unbound_wq) && \ - _wq == system_unbound_wq) || \ + (__builtin_constant_p(_wq == system_dfl_wq) && \ + _wq == system_dfl_wq) || \ (__builtin_constant_p(_wq == system_freezable_wq) && \ _wq == system_freezable_wq) || \ (__builtin_constant_p(_wq == system_power_efficient_wq) && \ -- cgit v1.2.3 From a2be943b46b4a7478ea8ddf9bb8e5251c59fceb7 Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Fri, 5 Sep 2025 11:13:24 +0200 Subject: workqueue: replace use of system_wq with system_percpu_wq Currently if a user enqueue a work item using schedule_delayed_work() the used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to schedule_work() that is using system_wq and queue_work(), that makes use again of WORK_CPU_UNBOUND. This lack of consistentcy cannot be addressed without refactoring the API. system_wq is a per-CPU worqueue, yet nothing in its name tells about that CPU affinity constraint, which is very often not required by users. Make it clear by adding a system_percpu_wq. queue_work() / queue_delayed_work() mod_delayed_work() will now use the new per-cpu wq: whether the user still stick on the old name a warn will be printed along a wq redirect to the new one. This patch add the new system_percpu_wq except for mm, fs and net subsystem, whom are handled in separated patches. The old wq will be kept for a few release cylces. Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index af860e8f8481..b6834b7aee4b 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -434,10 +434,10 @@ enum wq_consts { * short queue flush time. Don't queue works which can run for too * long. * - * system_highpri_wq is similar to system_wq but for work items which + * system_highpri_wq is similar to system_percpu_wq but for work items which * require WQ_HIGHPRI. * - * system_long_wq is similar to system_wq but may host long running + * system_long_wq is similar to system_percpu_wq but may host long running * works. Queue flushing might take relatively long. * * system_dfl_wq is unbound workqueue. Workers are not bound to @@ -445,13 +445,13 @@ enum wq_consts { * executed immediately as long as max_active limit is not reached and * resources are available. * - * system_freezable_wq is equivalent to system_wq except that it's + * system_freezable_wq is equivalent to system_percpu_wq except that it's * freezable. * * *_power_efficient_wq are inclined towards saving power and converted * into WQ_UNBOUND variants if 'wq_power_efficient' is enabled; otherwise, * they are same as their non-power-efficient counterparts - e.g. - * system_power_efficient_wq is identical to system_wq if + * system_power_efficient_wq is identical to system_percpu_wq if * 'wq_power_efficient' is disabled. See WQ_POWER_EFFICIENT for more info. * * system_bh[_highpri]_wq are convenience interface to softirq. BH work items @@ -708,7 +708,7 @@ static inline bool mod_delayed_work(struct workqueue_struct *wq, */ static inline bool schedule_work_on(int cpu, struct work_struct *work) { - return queue_work_on(cpu, system_wq, work); + return queue_work_on(cpu, system_percpu_wq, work); } /** @@ -727,7 +727,7 @@ static inline bool schedule_work_on(int cpu, struct work_struct *work) */ static inline bool schedule_work(struct work_struct *work) { - return queue_work(system_wq, work); + return queue_work(system_percpu_wq, work); } /** @@ -770,15 +770,15 @@ extern void __warn_flushing_systemwide_wq(void) #define flush_scheduled_work() \ ({ \ __warn_flushing_systemwide_wq(); \ - __flush_workqueue(system_wq); \ + __flush_workqueue(system_percpu_wq); \ }) #define flush_workqueue(wq) \ ({ \ struct workqueue_struct *_wq = (wq); \ \ - if ((__builtin_constant_p(_wq == system_wq) && \ - _wq == system_wq) || \ + if ((__builtin_constant_p(_wq == system_percpu_wq) && \ + _wq == system_percpu_wq) || \ (__builtin_constant_p(_wq == system_highpri_wq) && \ _wq == system_highpri_wq) || \ (__builtin_constant_p(_wq == system_long_wq) && \ @@ -807,7 +807,7 @@ extern void __warn_flushing_systemwide_wq(void) static inline bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay) { - return queue_delayed_work_on(cpu, system_wq, dwork, delay); + return queue_delayed_work_on(cpu, system_percpu_wq, dwork, delay); } /** @@ -821,7 +821,7 @@ static inline bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork, static inline bool schedule_delayed_work(struct delayed_work *dwork, unsigned long delay) { - return queue_delayed_work(system_wq, dwork, delay); + return queue_delayed_work(system_percpu_wq, dwork, delay); } #ifndef CONFIG_SMP -- cgit v1.2.3 From 97248d05b70edc674f2f2fa835fed33172686b1d Mon Sep 17 00:00:00 2001 From: Zihuan Zhang Date: Tue, 2 Sep 2025 15:33:23 +0800 Subject: cpufreq: Drop redundant freq_table parameter Since commit e0b3165ba521 ("cpufreq: add 'freq_table' in struct cpufreq_policy"), freq_table has been stored in struct cpufreq_policy instead of being maintained separately. However, several helpers in freq_table.c still take both policy and freq_table as parameters, even though policy->freq_table can always be used. This leads to redundant function arguments and increases the chance of inconsistencies. This patch removes the unnecessary freq_table argument from these functions and updates their callers to only pass policy. This makes the code simpler, more consistent, and avoids duplication. Signed-off-by: Zihuan Zhang Acked-by: Viresh Kumar Link: https://patch.msgid.link/20250902073323.48330-1-zhangzihuan@kylinos.cn Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 95f3807c8c55..40966512ea18 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -780,11 +780,10 @@ struct cpufreq_frequency_table { else -int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, - struct cpufreq_frequency_table *table); +int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy); + +int cpufreq_frequency_table_verify(struct cpufreq_policy_data *policy); -int cpufreq_frequency_table_verify(struct cpufreq_policy_data *policy, - struct cpufreq_frequency_table *table); int cpufreq_generic_frequency_table_verify(struct cpufreq_policy_data *policy); int cpufreq_table_index_unsorted(struct cpufreq_policy *policy, -- cgit v1.2.3 From 9d68320b2bca876278856fdc1e8684a7494dd069 Mon Sep 17 00:00:00 2001 From: Huisong Li Date: Fri, 5 Sep 2025 16:18:58 +0800 Subject: ACPI: processor: idle: Fix function defined but not used warning If CONFIG_ACPI_PROCESSOR_IDLE=n, acpi_processor_register_idle_driver() and acpi_processor_unregister_idle_driver() are never used and the empty stubs of them are not needed. Moreover, they cause the compiler to complain [1], so remove them. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202508300519.tZQHY6HA-lkp@intel.com/ [1] Fixes: 7a8c994cbb2d ("ACPI: processor: idle: Optimize ACPI idle driver registration") Signed-off-by: Huisong Li Link: https://patch.msgid.link/20250905081900.663869-2-lihuisong@huawei.com [ rjw: Changelog rewrite ] Signed-off-by: Rafael J. Wysocki --- include/acpi/processor.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 360b673f05e5..ff864c1cee3a 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -445,12 +445,6 @@ static inline int acpi_processor_hotplug(struct acpi_processor *pr) { return -ENODEV; } -static inline void acpi_processor_register_idle_driver(void) -{ -} -static inline void acpi_processor_unregister_idle_driver(void) -{ -} #endif /* CONFIG_ACPI_PROCESSOR_IDLE */ /* in processor_thermal.c */ -- cgit v1.2.3 From 437054b1bbe11be87ab0a522b8ccbae3f785c642 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Fri, 15 Aug 2025 12:41:10 +0200 Subject: vdso: Add struct __kernel_old_timeval forward declaration to gettime.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The prototype of __vdso_gettimeofday() uses this struct. However gettime.h's own includes do not provide a definition for it. Add a forward declaration, similar to other used structs. Fixes: 42874e4eb35b ("arch: vdso: consolidate gettime prototypes") Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250815-vdso-sparc64-generic-2-v2-1-b5ff80672347@linutronix.de --- include/vdso/gettime.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/vdso/gettime.h b/include/vdso/gettime.h index c50d152e7b3e..9ac161866653 100644 --- a/include/vdso/gettime.h +++ b/include/vdso/gettime.h @@ -5,6 +5,7 @@ #include struct __kernel_timespec; +struct __kernel_old_timeval; struct timezone; #if !defined(CONFIG_64BIT) || defined(BUILD_VDSO32_64) -- cgit v1.2.3 From 1544344563376b2a2ae2af5af1db00d6410c18e0 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Fri, 29 Aug 2025 15:28:44 +0800 Subject: rhashtable: Use __always_inline instead of inline Sometimes, the compiler is not clever enough to inline the rhashtable_lookup() for us, even if the "obj_cmpfn" and "key_len" in params is const. This can introduce more overhead. Therefore, use __always_inline for the rhashtable. Signed-off-by: Menglong Dong Signed-off-by: Herbert Xu --- include/linux/rhashtable.h | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 6c85b28ea30b..e740157f3cd7 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -122,7 +122,7 @@ static inline unsigned int rht_bucket_index(const struct bucket_table *tbl, return hash & (tbl->size - 1); } -static inline unsigned int rht_key_get_hash(struct rhashtable *ht, +static __always_inline unsigned int rht_key_get_hash(struct rhashtable *ht, const void *key, const struct rhashtable_params params, unsigned int hash_rnd) { @@ -152,7 +152,7 @@ static inline unsigned int rht_key_get_hash(struct rhashtable *ht, return hash; } -static inline unsigned int rht_key_hashfn( +static __always_inline unsigned int rht_key_hashfn( struct rhashtable *ht, const struct bucket_table *tbl, const void *key, const struct rhashtable_params params) { @@ -161,7 +161,7 @@ static inline unsigned int rht_key_hashfn( return rht_bucket_index(tbl, hash); } -static inline unsigned int rht_head_hashfn( +static __always_inline unsigned int rht_head_hashfn( struct rhashtable *ht, const struct bucket_table *tbl, const struct rhash_head *he, const struct rhashtable_params params) { @@ -586,7 +586,7 @@ static inline int rhashtable_compare(struct rhashtable_compare_arg *arg, } /* Internal function, do not use. */ -static inline struct rhash_head *__rhashtable_lookup( +static __always_inline struct rhash_head *__rhashtable_lookup( struct rhashtable *ht, const void *key, const struct rhashtable_params params) { @@ -639,7 +639,7 @@ restart: * * Returns the first entry on which the compare function returned true. */ -static inline void *rhashtable_lookup( +static __always_inline void *rhashtable_lookup( struct rhashtable *ht, const void *key, const struct rhashtable_params params) { @@ -662,7 +662,7 @@ static inline void *rhashtable_lookup( * * Returns the first entry on which the compare function returned true. */ -static inline void *rhashtable_lookup_fast( +static __always_inline void *rhashtable_lookup_fast( struct rhashtable *ht, const void *key, const struct rhashtable_params params) { @@ -689,7 +689,7 @@ static inline void *rhashtable_lookup_fast( * * Returns the list of entries that match the given key. */ -static inline struct rhlist_head *rhltable_lookup( +static __always_inline struct rhlist_head *rhltable_lookup( struct rhltable *hlt, const void *key, const struct rhashtable_params params) { @@ -702,7 +702,7 @@ static inline struct rhlist_head *rhltable_lookup( * function returns the existing element already in hashes if there is a clash, * otherwise it returns an error via ERR_PTR(). */ -static inline void *__rhashtable_insert_fast( +static __always_inline void *__rhashtable_insert_fast( struct rhashtable *ht, const void *key, struct rhash_head *obj, const struct rhashtable_params params, bool rhlist) { @@ -825,7 +825,7 @@ out_unlock: * Will trigger an automatic deferred table resizing if residency in the * table grows beyond 70%. */ -static inline int rhashtable_insert_fast( +static __always_inline int rhashtable_insert_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params) { @@ -854,7 +854,7 @@ static inline int rhashtable_insert_fast( * Will trigger an automatic deferred table resizing if residency in the * table grows beyond 70%. */ -static inline int rhltable_insert_key( +static __always_inline int rhltable_insert_key( struct rhltable *hlt, const void *key, struct rhlist_head *list, const struct rhashtable_params params) { @@ -877,7 +877,7 @@ static inline int rhltable_insert_key( * Will trigger an automatic deferred table resizing if residency in the * table grows beyond 70%. */ -static inline int rhltable_insert( +static __always_inline int rhltable_insert( struct rhltable *hlt, struct rhlist_head *list, const struct rhashtable_params params) { @@ -902,7 +902,7 @@ static inline int rhltable_insert( * Will trigger an automatic deferred table resizing if residency in the * table grows beyond 70%. */ -static inline int rhashtable_lookup_insert_fast( +static __always_inline int rhashtable_lookup_insert_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params) { @@ -929,7 +929,7 @@ static inline int rhashtable_lookup_insert_fast( * object if it exists, NULL if it did not and the insertion was successful, * and an ERR_PTR otherwise. */ -static inline void *rhashtable_lookup_get_insert_fast( +static __always_inline void *rhashtable_lookup_get_insert_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params) { @@ -956,7 +956,7 @@ static inline void *rhashtable_lookup_get_insert_fast( * * Returns zero on success. */ -static inline int rhashtable_lookup_insert_key( +static __always_inline int rhashtable_lookup_insert_key( struct rhashtable *ht, const void *key, struct rhash_head *obj, const struct rhashtable_params params) { @@ -982,7 +982,7 @@ static inline int rhashtable_lookup_insert_key( * object if it exists, NULL if it does not and the insertion was successful, * and an ERR_PTR otherwise. */ -static inline void *rhashtable_lookup_get_insert_key( +static __always_inline void *rhashtable_lookup_get_insert_key( struct rhashtable *ht, const void *key, struct rhash_head *obj, const struct rhashtable_params params) { @@ -992,7 +992,7 @@ static inline void *rhashtable_lookup_get_insert_key( } /* Internal function, please use rhashtable_remove_fast() instead */ -static inline int __rhashtable_remove_fast_one( +static __always_inline int __rhashtable_remove_fast_one( struct rhashtable *ht, struct bucket_table *tbl, struct rhash_head *obj, const struct rhashtable_params params, bool rhlist) @@ -1074,7 +1074,7 @@ unlocked: } /* Internal function, please use rhashtable_remove_fast() instead */ -static inline int __rhashtable_remove_fast( +static __always_inline int __rhashtable_remove_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params, bool rhlist) { @@ -1115,7 +1115,7 @@ static inline int __rhashtable_remove_fast( * * Returns zero on success, -ENOENT if the entry could not be found. */ -static inline int rhashtable_remove_fast( +static __always_inline int rhashtable_remove_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params) { @@ -1137,7 +1137,7 @@ static inline int rhashtable_remove_fast( * * Returns zero on success, -ENOENT if the entry could not be found. */ -static inline int rhltable_remove( +static __always_inline int rhltable_remove( struct rhltable *hlt, struct rhlist_head *list, const struct rhashtable_params params) { @@ -1145,7 +1145,7 @@ static inline int rhltable_remove( } /* Internal function, please use rhashtable_replace_fast() instead */ -static inline int __rhashtable_replace_fast( +static __always_inline int __rhashtable_replace_fast( struct rhashtable *ht, struct bucket_table *tbl, struct rhash_head *obj_old, struct rhash_head *obj_new, const struct rhashtable_params params) @@ -1208,7 +1208,7 @@ unlocked: * Returns zero on success, -ENOENT if the entry could not be found, * -EINVAL if hash is not the same for the old and new objects. */ -static inline int rhashtable_replace_fast( +static __always_inline int rhashtable_replace_fast( struct rhashtable *ht, struct rhash_head *obj_old, struct rhash_head *obj_new, const struct rhashtable_params params) -- cgit v1.2.3 From 886d6981208263b55a1eb8b39c5d00db1544b9bb Mon Sep 17 00:00:00 2001 From: Zhushuai Yin Date: Sat, 30 Aug 2025 18:27:57 +0800 Subject: crypto: hisilicon/zip - add hashjoin, gather, and UDMA data move features The new version of the hisilicon zip driver supports the hash join and gather features, as well as the data move feature (UDMA), including data copying and memory initialization functions.These features are registered to the uacce subsystem. Signed-off-by: Zhushuai Yin Signed-off-by: Chenghai Huang Signed-off-by: Herbert Xu --- include/linux/hisi_acc_qm.h | 1 + include/uapi/misc/uacce/hisi_qm.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 0c4c84b8c3be..f2254ddc327c 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -125,6 +125,7 @@ enum qm_hw_ver { QM_HW_V2 = 0x21, QM_HW_V3 = 0x30, QM_HW_V4 = 0x50, + QM_HW_V5 = 0x51, }; enum qm_fun_type { diff --git a/include/uapi/misc/uacce/hisi_qm.h b/include/uapi/misc/uacce/hisi_qm.h index 3e66dbc2f323..10504b48eabf 100644 --- a/include/uapi/misc/uacce/hisi_qm.h +++ b/include/uapi/misc/uacce/hisi_qm.h @@ -31,6 +31,7 @@ struct hisi_qp_info { #define HISI_QM_API_VER_BASE "hisi_qm_v1" #define HISI_QM_API_VER2_BASE "hisi_qm_v2" #define HISI_QM_API_VER3_BASE "hisi_qm_v3" +#define HISI_QM_API_VER5_BASE "hisi_qm_v5" /* UACCE_CMD_QM_SET_QP_CTX: Set qp algorithm type */ #define UACCE_CMD_QM_SET_QP_CTX _IOWR('H', 10, struct hisi_qp_ctx) -- cgit v1.2.3 From c2ce2453413d429e302659abc5ace634e873f6f5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 28 Aug 2025 12:59:24 +0200 Subject: driver core/PM: Set power.no_callbacks along with power.no_pm Devices with power.no_pm set are not expected to need any power management at all, so modify device_set_pm_not_required() to set power.no_callbacks for them too in case runtime PM will be enabled for any of them (which in principle may be done for convenience if such a device participates in a dependency chain). Since device_set_pm_not_required() must be called before device_add() or it would not have any effect, it can update power.no_callbacks without locking, unlike pm_runtime_no_callbacks() that can be called after registering the target device. Signed-off-by: Rafael J. Wysocki Cc: stable Reviewed-by: Sudeep Holla Link: https://lore.kernel.org/r/1950054.tdWV9SEqCh@rafael.j.wysocki Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/device.h b/include/linux/device.h index 0470d19da7f2..b031ff71a5bd 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -851,6 +851,9 @@ static inline bool device_pm_not_required(struct device *dev) static inline void device_set_pm_not_required(struct device *dev) { dev->power.no_pm = true; +#ifdef CONFIG_PM + dev->power.no_callbacks = true; +#endif } static inline void dev_pm_syscore_device(struct device *dev, bool val) -- cgit v1.2.3 From 20f988320d2718ef28b1f0635acc88c12a216d29 Mon Sep 17 00:00:00 2001 From: "Rai, Amardeep" Date: Wed, 20 Aug 2025 17:38:19 +0300 Subject: usb: core: Add a function to get USB version independent periodic payload Add usb_endpoint_max_periodic_payload() to obtain maximum payload bytes in a service interval for isochronous and interrupt endpoints in a USB version independent way. Signed-off-by: Rai, Amardeep Signed-off-by: Mathias Nyman Co-developed-by: Sakari Ailus Signed-off-by: Sakari Ailus Reviewed-by: Hans de Goede Acked-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20250820143824.551777-5-sakari.ailus@linux.intel.com --- include/linux/usb.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/usb.h b/include/linux/usb.h index 9d662c6abb4d..e9cf2786d8bd 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -2039,6 +2039,9 @@ static inline u16 usb_maxpacket(struct usb_device *udev, int pipe) return usb_endpoint_maxp(&ep->desc); } +u32 usb_endpoint_max_periodic_payload(struct usb_device *udev, + const struct usb_host_endpoint *ep); + /* translate USB error codes to codes user space understands */ static inline int usb_translate_errors(int error_code) { -- cgit v1.2.3 From d6725169a9bbcb5bd1dd14b2891b874614c59f52 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Wed, 20 Aug 2025 17:38:21 +0300 Subject: usb: core: Introduce usb_endpoint_is_hs_isoc_double() Introduce usb_endpoint_is_hs_isoc_double() tell whether an endpoint conforms to USB 2.0 Isochronous Double IN Bandwidth ECN. Signed-off-by: Sakari Ailus Acked-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20250820143824.551777-7-sakari.ailus@linux.intel.com --- include/linux/usb.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/usb.h b/include/linux/usb.h index e9cf2786d8bd..70ef00c42d22 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -2042,6 +2042,9 @@ static inline u16 usb_maxpacket(struct usb_device *udev, int pipe) u32 usb_endpoint_max_periodic_payload(struct usb_device *udev, const struct usb_host_endpoint *ep); +bool usb_endpoint_is_hs_isoc_double(struct usb_device *udev, + const struct usb_host_endpoint *ep); + /* translate USB error codes to codes user space understands */ static inline int usb_translate_errors(int error_code) { -- cgit v1.2.3 From 23743ba64709a9c137c1b928f8b8e00d846af9cc Mon Sep 17 00:00:00 2001 From: Calixte Pernot Date: Mon, 25 Aug 2025 14:56:09 +0200 Subject: vt: add support for smput/rmput escape codes Support "\e[?1049h" and "\e[?1049l" escape codes. This patch allows programs to enter and leave alternate screens. This feature is widely available in graphical terminal emulators and mostly used by fullscreen terminal-based user interfaces such as text editors. Most editors such as vim and nano assume this escape code in not supported and will not try to print the escape sequence if TERM=linux. To try out this patch, run `TERM=xterm-256color vim` inside a VT. Signed-off-by: Calixte Pernot Link: https://lore.kernel.org/r/20250825125607.2478-3-calixte.pernot@grenoble-inp.org Signed-off-by: Greg Kroah-Hartman --- include/linux/console_struct.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h index 59b4fec5f254..13b35637bd5a 100644 --- a/include/linux/console_struct.h +++ b/include/linux/console_struct.h @@ -159,6 +159,9 @@ struct vc_data { struct uni_pagedict *uni_pagedict; struct uni_pagedict **uni_pagedict_loc; /* [!] Location of uni_pagedict variable for this console */ u32 **vc_uni_lines; /* unicode screen content */ + u16 *vc_saved_screen; + unsigned int vc_saved_cols; + unsigned int vc_saved_rows; /* additional information is in vt_kern.h */ }; -- cgit v1.2.3 From afc4e4a5f122183b38095daba2264123cc86d8ab Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 6 Sep 2025 14:35:18 -0700 Subject: lib/crypto: tests: Migrate Curve25519 self-test to KUnit Move the Curve25519 test from an ad-hoc self-test to a KUnit test. Generally keep the same test logic for now, just translated to KUnit. There's one exception, which is that I dropped the incomplete test of curve25519_generic(). The approach I'm taking to cover the different implementations with the KUnit tests is to just rely on booting kernels in QEMU with different '-cpu' options, rather than try to make the tests (incompletely) test multiple implementations on one CPU. This way, both the test and the library API are simpler. This commit makes the file lib/crypto/curve25519.c no longer needed, as its only purpose was to call the self-test. However, keep it for now, since a later commit will add code to it again. Temporarily omit the default value of CRYPTO_SELFTESTS that the other lib/crypto/ KUnit tests have. It would cause a recursive kconfig dependency, since the Curve25519 code is still entangled with CRYPTO. A later commit will fix that. Link: https://lore.kernel.org/r/20250906213523.84915-8-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/crypto/curve25519.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/crypto/curve25519.h b/include/crypto/curve25519.h index ece6a9b5fafc..4e6dc840b159 100644 --- a/include/crypto/curve25519.h +++ b/include/crypto/curve25519.h @@ -28,8 +28,6 @@ void curve25519_arch(u8 out[CURVE25519_KEY_SIZE], void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], const u8 secret[CURVE25519_KEY_SIZE]); -bool curve25519_selftest(void); - static inline bool __must_check curve25519(u8 mypublic[CURVE25519_KEY_SIZE], const u8 secret[CURVE25519_KEY_SIZE], -- cgit v1.2.3 From 8c06b330e8f79834924305362227e38e4e2469ae Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 6 Sep 2025 14:35:20 -0700 Subject: lib/crypto: curve25519: Move a couple functions out-of-line Move curve25519() and curve25519_generate_public() from curve25519.h to curve25519.c. There's no good reason for them to be inline. Link: https://lore.kernel.org/r/20250906213523.84915-10-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/crypto/curve25519.h | 28 +++------------------------- 1 file changed, 3 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/crypto/curve25519.h b/include/crypto/curve25519.h index 4e6dc840b159..78aa5f28c847 100644 --- a/include/crypto/curve25519.h +++ b/include/crypto/curve25519.h @@ -6,7 +6,6 @@ #ifndef CURVE25519_H #define CURVE25519_H -#include // For crypto_memneq. #include #include @@ -28,33 +27,12 @@ void curve25519_arch(u8 out[CURVE25519_KEY_SIZE], void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], const u8 secret[CURVE25519_KEY_SIZE]); -static inline bool __must_check curve25519(u8 mypublic[CURVE25519_KEY_SIZE], const u8 secret[CURVE25519_KEY_SIZE], - const u8 basepoint[CURVE25519_KEY_SIZE]) -{ - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519)) - curve25519_arch(mypublic, secret, basepoint); - else - curve25519_generic(mypublic, secret, basepoint); - return crypto_memneq(mypublic, curve25519_null_point, - CURVE25519_KEY_SIZE); -} + const u8 basepoint[CURVE25519_KEY_SIZE]); -static inline bool -__must_check curve25519_generate_public(u8 pub[CURVE25519_KEY_SIZE], - const u8 secret[CURVE25519_KEY_SIZE]) -{ - if (unlikely(!crypto_memneq(secret, curve25519_null_point, - CURVE25519_KEY_SIZE))) - return false; - - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519)) - curve25519_base_arch(pub, secret); - else - curve25519_generic(pub, secret, curve25519_base_point); - return crypto_memneq(pub, curve25519_null_point, CURVE25519_KEY_SIZE); -} +bool __must_check curve25519_generate_public(u8 pub[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE]); static inline void curve25519_clamp_secret(u8 secret[CURVE25519_KEY_SIZE]) { -- cgit v1.2.3 From 68546e5632c0b982663af575ae12cc5d81facc91 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 6 Sep 2025 14:35:21 -0700 Subject: lib/crypto: curve25519: Consolidate into single module Reorganize the Curve25519 library code: - Build a single libcurve25519 module, instead of up to three modules: libcurve25519, libcurve25519-generic, and an arch-specific module. - Move the arch-specific Curve25519 code from arch/$(SRCARCH)/crypto/ to lib/crypto/$(SRCARCH)/. Centralize the build rules into lib/crypto/Makefile and lib/crypto/Kconfig. - Include the arch-specific code directly in lib/crypto/curve25519.c via a header, rather than using a separate .c file. - Eliminate the entanglement with CRYPTO. CRYPTO_LIB_CURVE25519 no longer selects CRYPTO, and the arch-specific Curve25519 code no longer depends on CRYPTO. This brings Curve25519 in line with the latest conventions for lib/crypto/, used by other algorithms. The exception is that I kept the generic code in separate translation units for now. (Some of the function names collide between the x86 and generic Curve25519 code. And the Curve25519 functions are very long anyway, so inlining doesn't matter as much for Curve25519 as it does for some other algorithms.) Link: https://lore.kernel.org/r/20250906213523.84915-11-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/crypto/curve25519.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/crypto/curve25519.h b/include/crypto/curve25519.h index 78aa5f28c847..db63a5577c00 100644 --- a/include/crypto/curve25519.h +++ b/include/crypto/curve25519.h @@ -13,20 +13,10 @@ enum curve25519_lengths { CURVE25519_KEY_SIZE = 32 }; -extern const u8 curve25519_null_point[]; -extern const u8 curve25519_base_point[]; - void curve25519_generic(u8 out[CURVE25519_KEY_SIZE], const u8 scalar[CURVE25519_KEY_SIZE], const u8 point[CURVE25519_KEY_SIZE]); -void curve25519_arch(u8 out[CURVE25519_KEY_SIZE], - const u8 scalar[CURVE25519_KEY_SIZE], - const u8 point[CURVE25519_KEY_SIZE]); - -void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], - const u8 secret[CURVE25519_KEY_SIZE]); - bool __must_check curve25519(u8 mypublic[CURVE25519_KEY_SIZE], const u8 secret[CURVE25519_KEY_SIZE], const u8 basepoint[CURVE25519_KEY_SIZE]); -- cgit v1.2.3 From 0b6cb344829b4b8605a3f6e930b207d47dee1d12 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 22 Jul 2025 09:34:35 +0200 Subject: media: v4l2-core: v4l2-dv-timings: support DRM IFs Add support for DRM (Dynamic Range and Mastering) InfoFrames. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-dv-timings.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/media/v4l2-dv-timings.h b/include/media/v4l2-dv-timings.h index 714075c72f77..2b42e5d81f9e 100644 --- a/include/media/v4l2-dv-timings.h +++ b/include/media/v4l2-dv-timings.h @@ -275,6 +275,7 @@ int v4l2_phys_addr_validate(u16 phys_addr, u16 *parent, u16 *port); #define V4L2_DEBUGFS_IF_AUDIO BIT(1) #define V4L2_DEBUGFS_IF_SPD BIT(2) #define V4L2_DEBUGFS_IF_HDMI BIT(3) +#define V4L2_DEBUGFS_IF_DRM BIT(4) typedef ssize_t (*v4l2_debugfs_if_read_t)(u32 type, void *priv, struct file *filp, char __user *ubuf, -- cgit v1.2.3 From 43bd82eb33b2ac33232724a8ddb9e07cde492328 Mon Sep 17 00:00:00 2001 From: Denzeel Oliva Date: Thu, 4 Sep 2025 14:07:11 +0000 Subject: dt-bindings: clock: exynos990: Add PERIC0 and PERIC1 clock units Add clock management unit bindings for PERIC0 and PERIC1 blocks which provide clocks for USI, I2C and UART peripherals. Signed-off-by: Denzeel Oliva Signed-off-by: Krzysztof Kozlowski --- include/dt-bindings/clock/samsung,exynos990.h | 176 ++++++++++++++++++++++++++ 1 file changed, 176 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/samsung,exynos990.h b/include/dt-bindings/clock/samsung,exynos990.h index c60f15503d5b..47540307cb52 100644 --- a/include/dt-bindings/clock/samsung,exynos990.h +++ b/include/dt-bindings/clock/samsung,exynos990.h @@ -238,6 +238,182 @@ #define CLK_GOUT_HSI0_XIU_D_HSI0_ACLK 22 #define CLK_GOUT_HSI0_LHS_ACEL_D_HSI0_CLK 23 +/* CMU_PERIC0 */ +#define CLK_MOUT_PERIC0_BUS_USER 1 +#define CLK_MOUT_PERIC0_UART_DBG 2 +#define CLK_MOUT_PERIC0_USI00_USI_USER 3 +#define CLK_MOUT_PERIC0_USI01_USI_USER 4 +#define CLK_MOUT_PERIC0_USI02_USI_USER 5 +#define CLK_MOUT_PERIC0_USI03_USI_USER 6 +#define CLK_MOUT_PERIC0_USI04_USI_USER 7 +#define CLK_MOUT_PERIC0_USI05_USI_USER 8 +#define CLK_MOUT_PERIC0_USI13_USI_USER 9 +#define CLK_MOUT_PERIC0_USI14_USI_USER 10 +#define CLK_MOUT_PERIC0_USI15_USI_USER 11 +#define CLK_MOUT_PERIC0_USI_I2C_USER 12 +#define CLK_DOUT_PERIC0_UART_DBG 13 +#define CLK_DOUT_PERIC0_USI00_USI 14 +#define CLK_DOUT_PERIC0_USI01_USI 15 +#define CLK_DOUT_PERIC0_USI02_USI 16 +#define CLK_DOUT_PERIC0_USI03_USI 17 +#define CLK_DOUT_PERIC0_USI04_USI 18 +#define CLK_DOUT_PERIC0_USI05_USI 19 +#define CLK_DOUT_PERIC0_USI13_USI 20 +#define CLK_DOUT_PERIC0_USI14_USI 21 +#define CLK_DOUT_PERIC0_USI15_USI 22 +#define CLK_DOUT_PERIC0_USI_I2C 23 +#define CLK_GOUT_PERIC0_CMU_PCLK 24 +#define CLK_GOUT_PERIC0_OSCCLK_CLK 25 +#define CLK_GOUT_PERIC0_D_TZPC_PCLK 26 +#define CLK_GOUT_PERIC0_GPIO_PCLK 27 +#define CLK_GOUT_PERIC0_LHM_AXI_P_CLK 28 +#define CLK_GOUT_PERIC0_TOP0_IPCLK_10 29 +#define CLK_GOUT_PERIC0_TOP0_IPCLK_11 30 +#define CLK_GOUT_PERIC0_TOP0_IPCLK_12 31 +#define CLK_GOUT_PERIC0_TOP0_IPCLK_13 32 +#define CLK_GOUT_PERIC0_TOP0_IPCLK_14 33 +#define CLK_GOUT_PERIC0_TOP0_IPCLK_15 34 +#define CLK_GOUT_PERIC0_TOP0_IPCLK_4 35 +#define CLK_GOUT_PERIC0_TOP0_IPCLK_5 36 +#define CLK_GOUT_PERIC0_TOP0_IPCLK_6 37 +#define CLK_GOUT_PERIC0_TOP0_IPCLK_7 38 +#define CLK_GOUT_PERIC0_TOP0_IPCLK_8 39 +#define CLK_GOUT_PERIC0_TOP0_IPCLK_9 40 +#define CLK_GOUT_PERIC0_TOP0_PCLK_10 41 +#define CLK_GOUT_PERIC0_TOP0_PCLK_11 42 +#define CLK_GOUT_PERIC0_TOP0_PCLK_12 43 +#define CLK_GOUT_PERIC0_TOP0_PCLK_13 44 +#define CLK_GOUT_PERIC0_TOP0_PCLK_14 45 +#define CLK_GOUT_PERIC0_TOP0_PCLK_15 46 +#define CLK_GOUT_PERIC0_TOP0_PCLK_4 47 +#define CLK_GOUT_PERIC0_TOP0_PCLK_5 48 +#define CLK_GOUT_PERIC0_TOP0_PCLK_6 49 +#define CLK_GOUT_PERIC0_TOP0_PCLK_7 50 +#define CLK_GOUT_PERIC0_TOP0_PCLK_8 51 +#define CLK_GOUT_PERIC0_TOP0_PCLK_9 52 +#define CLK_GOUT_PERIC0_TOP1_IPCLK_0 53 +#define CLK_GOUT_PERIC0_TOP1_IPCLK_3 54 +#define CLK_GOUT_PERIC0_TOP1_IPCLK_4 55 +#define CLK_GOUT_PERIC0_TOP1_IPCLK_5 56 +#define CLK_GOUT_PERIC0_TOP1_IPCLK_6 57 +#define CLK_GOUT_PERIC0_TOP1_IPCLK_7 58 +#define CLK_GOUT_PERIC0_TOP1_IPCLK_8 59 +#define CLK_GOUT_PERIC0_TOP1_PCLK_0 60 +#define CLK_GOUT_PERIC0_TOP1_PCLK_15 61 +#define CLK_GOUT_PERIC0_TOP1_PCLK_3 62 +#define CLK_GOUT_PERIC0_TOP1_PCLK_4 63 +#define CLK_GOUT_PERIC0_TOP1_PCLK_5 64 +#define CLK_GOUT_PERIC0_TOP1_PCLK_6 65 +#define CLK_GOUT_PERIC0_TOP1_PCLK_7 66 +#define CLK_GOUT_PERIC0_TOP1_PCLK_8 67 +#define CLK_GOUT_PERIC0_BUSP_CLK 68 +#define CLK_GOUT_PERIC0_UART_DBG_CLK 69 +#define CLK_GOUT_PERIC0_USI00_USI_CLK 70 +#define CLK_GOUT_PERIC0_USI01_USI_CLK 71 +#define CLK_GOUT_PERIC0_USI02_USI_CLK 72 +#define CLK_GOUT_PERIC0_USI03_USI_CLK 73 +#define CLK_GOUT_PERIC0_USI04_USI_CLK 74 +#define CLK_GOUT_PERIC0_USI05_USI_CLK 75 +#define CLK_GOUT_PERIC0_USI13_USI_CLK 76 +#define CLK_GOUT_PERIC0_USI14_USI_CLK 77 +#define CLK_GOUT_PERIC0_USI15_USI_CLK 78 +#define CLK_GOUT_PERIC0_USI_I2C_CLK 79 +#define CLK_GOUT_PERIC0_SYSREG_PCLK 80 + +/* CMU_PERIC1 */ +#define CLK_MOUT_PERIC1_BUS_USER 1 +#define CLK_MOUT_PERIC1_UART_BT_USER 2 +#define CLK_MOUT_PERIC1_USI06_USI_USER 3 +#define CLK_MOUT_PERIC1_USI07_USI_USER 4 +#define CLK_MOUT_PERIC1_USI08_USI_USER 5 +#define CLK_MOUT_PERIC1_USI09_USI_USER 6 +#define CLK_MOUT_PERIC1_USI10_USI_USER 7 +#define CLK_MOUT_PERIC1_USI11_USI_USER 8 +#define CLK_MOUT_PERIC1_USI12_USI_USER 9 +#define CLK_MOUT_PERIC1_USI18_USI_USER 10 +#define CLK_MOUT_PERIC1_USI16_USI_USER 11 +#define CLK_MOUT_PERIC1_USI17_USI_USER 12 +#define CLK_MOUT_PERIC1_USI_I2C_USER 13 +#define CLK_DOUT_PERIC1_UART_BT 14 +#define CLK_DOUT_PERIC1_USI06_USI 15 +#define CLK_DOUT_PERIC1_USI07_USI 16 +#define CLK_DOUT_PERIC1_USI08_USI 17 +#define CLK_DOUT_PERIC1_USI18_USI 18 +#define CLK_DOUT_PERIC1_USI12_USI 19 +#define CLK_DOUT_PERIC1_USI09_USI 20 +#define CLK_DOUT_PERIC1_USI10_USI 21 +#define CLK_DOUT_PERIC1_USI11_USI 22 +#define CLK_DOUT_PERIC1_USI16_USI 23 +#define CLK_DOUT_PERIC1_USI17_USI 24 +#define CLK_DOUT_PERIC1_USI_I2C 25 +#define CLK_GOUT_PERIC1_CMU_PCLK 26 +#define CLK_GOUT_PERIC1_UART_BT_CLK 27 +#define CLK_GOUT_PERIC1_USI12_USI_CLK 28 +#define CLK_GOUT_PERIC1_USI18_USI_CLK 29 +#define CLK_GOUT_PERIC1_D_TZPC_PCLK 30 +#define CLK_GOUT_PERIC1_GPIO_PCLK 31 +#define CLK_GOUT_PERIC1_LHM_AXI_P_CSIS_CLK 32 +#define CLK_GOUT_PERIC1_LHM_AXI_P_CLK 33 +#define CLK_GOUT_PERIC1_TOP0_IPCLK_10 34 +#define CLK_GOUT_PERIC1_TOP0_IPCLK_11 35 +#define CLK_GOUT_PERIC1_TOP0_IPCLK_12 36 +#define CLK_GOUT_PERIC1_TOP0_IPCLK_13 37 +#define CLK_GOUT_PERIC1_TOP0_IPCLK_14 38 +#define CLK_GOUT_PERIC1_TOP0_IPCLK_15 39 +#define CLK_GOUT_PERIC1_TOP0_IPCLK_4 40 +#define CLK_GOUT_PERIC1_TOP0_PCLK_10 41 +#define CLK_GOUT_PERIC1_TOP0_PCLK_11 42 +#define CLK_GOUT_PERIC1_TOP0_PCLK_12 43 +#define CLK_GOUT_PERIC1_TOP0_PCLK_13 44 +#define CLK_GOUT_PERIC1_TOP0_PCLK_14 45 +#define CLK_GOUT_PERIC1_TOP0_PCLK_15 46 +#define CLK_GOUT_PERIC1_TOP0_PCLK_4 47 +#define CLK_GOUT_PERIC1_TOP1_IPCLK_0 48 +#define CLK_GOUT_PERIC1_TOP1_IPCLK_1 49 +#define CLK_GOUT_PERIC1_TOP1_IPCLK_10 50 +#define CLK_GOUT_PERIC1_TOP1_IPCLK_12 51 +#define CLK_GOUT_PERIC1_TOP1_IPCLK_13 52 +#define CLK_GOUT_PERIC1_TOP1_IPCLK_14 53 +#define CLK_GOUT_PERIC1_TOP1_IPCLK_15 54 +#define CLK_GOUT_PERIC1_TOP1_IPCLK_2 55 +#define CLK_GOUT_PERIC1_TOP1_IPCLK_3 56 +#define CLK_GOUT_PERIC1_TOP1_IPCLK_4 57 +#define CLK_GOUT_PERIC1_TOP1_IPCLK_5 58 +#define CLK_GOUT_PERIC1_TOP1_IPCLK_6 59 +#define CLK_GOUT_PERIC1_TOP1_IPCLK_7 60 +#define CLK_GOUT_PERIC1_TOP1_IPCLK_9 61 +#define CLK_GOUT_PERIC1_TOP1_PCLK_0 62 +#define CLK_GOUT_PERIC1_TOP1_PCLK_1 63 +#define CLK_GOUT_PERIC1_TOP1_PCLK_10 64 +#define CLK_GOUT_PERIC1_TOP1_PCLK_12 65 +#define CLK_GOUT_PERIC1_TOP1_PCLK_13 66 +#define CLK_GOUT_PERIC1_TOP1_PCLK_14 67 +#define CLK_GOUT_PERIC1_TOP1_PCLK_15 68 +#define CLK_GOUT_PERIC1_TOP1_PCLK_2 69 +#define CLK_GOUT_PERIC1_TOP1_PCLK_3 70 +#define CLK_GOUT_PERIC1_TOP1_PCLK_4 71 +#define CLK_GOUT_PERIC1_TOP1_PCLK_5 72 +#define CLK_GOUT_PERIC1_TOP1_PCLK_6 73 +#define CLK_GOUT_PERIC1_TOP1_PCLK_7 74 +#define CLK_GOUT_PERIC1_TOP1_PCLK_9 75 +#define CLK_GOUT_PERIC1_BUSP_CLK 76 +#define CLK_GOUT_PERIC1_OSCCLK_CLK 77 +#define CLK_GOUT_PERIC1_USI06_USI_CLK 78 +#define CLK_GOUT_PERIC1_USI07_USI_CLK 79 +#define CLK_GOUT_PERIC1_USI08_USI_CLK 80 +#define CLK_GOUT_PERIC1_USI09_USI_CLK 81 +#define CLK_GOUT_PERIC1_USI10_USI_CLK 82 +#define CLK_GOUT_PERIC1_USI11_USI_CLK 83 +#define CLK_GOUT_PERIC1_USI16_USI_CLK 84 +#define CLK_GOUT_PERIC1_USI17_USI_CLK 85 +#define CLK_GOUT_PERIC1_USI_I2C_CLK 86 +#define CLK_GOUT_PERIC1_SYSREG_PCLK 87 +#define CLK_GOUT_PERIC1_USI16_I3C_PCLK 88 +#define CLK_GOUT_PERIC1_USI16_I3C_SCLK 89 +#define CLK_GOUT_PERIC1_USI17_I3C_PCLK 90 +#define CLK_GOUT_PERIC1_USI17_I3C_SCLK 91 +#define CLK_GOUT_PERIC1_XIU_P_ACLK 92 + /* CMU_PERIS */ #define CLK_MOUT_PERIS_BUS_USER 1 #define CLK_MOUT_PERIS_CLK_PERIS_GIC 2 -- cgit v1.2.3 From 0bcd01f757bc06471c82a137eafee281ef1b6e38 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 28 Aug 2024 21:56:57 -0700 Subject: hwmon: Introduce 64-bit energy attribute support Many chips require 64-bit variables to display the accumulated energy, even more so since the energy units are micro-Joule. Add new sensor type "energy64" to support reporting the chip energy as 64-bit values. Changing the entire hardware monitoring API is not feasible, and it is only really necessary to support reading 64-bit values for the "energyX_input" attribute. For this reason, keep the API as-is and use type casts on both ends to pass 64-bit pointers when reading the accumulated energy. On the write side (which is only useful for the energyX_enable attribute), keep passing the written value as long. Reviewed-by: Chris Packham Tested-by: Chris Packham # INA780 Signed-off-by: Guenter Roeck --- include/linux/hwmon.h | 1 + include/trace/events/hwmon.h | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h index 3a63dff62d03..886fc90b2d25 100644 --- a/include/linux/hwmon.h +++ b/include/linux/hwmon.h @@ -24,6 +24,7 @@ enum hwmon_sensor_types { hwmon_curr, hwmon_power, hwmon_energy, + hwmon_energy64, hwmon_humidity, hwmon_fan, hwmon_pwm, diff --git a/include/trace/events/hwmon.h b/include/trace/events/hwmon.h index d1ff560cd9b5..3865098f21f1 100644 --- a/include/trace/events/hwmon.h +++ b/include/trace/events/hwmon.h @@ -9,14 +9,14 @@ DECLARE_EVENT_CLASS(hwmon_attr_class, - TP_PROTO(int index, const char *attr_name, long val), + TP_PROTO(int index, const char *attr_name, long long val), TP_ARGS(index, attr_name, val), TP_STRUCT__entry( __field(int, index) __string(attr_name, attr_name) - __field(long, val) + __field(long long, val) ), TP_fast_assign( @@ -25,20 +25,20 @@ DECLARE_EVENT_CLASS(hwmon_attr_class, __entry->val = val; ), - TP_printk("index=%d, attr_name=%s, val=%ld", + TP_printk("index=%d, attr_name=%s, val=%lld", __entry->index, __get_str(attr_name), __entry->val) ); DEFINE_EVENT(hwmon_attr_class, hwmon_attr_show, - TP_PROTO(int index, const char *attr_name, long val), + TP_PROTO(int index, const char *attr_name, long long val), TP_ARGS(index, attr_name, val) ); DEFINE_EVENT(hwmon_attr_class, hwmon_attr_store, - TP_PROTO(int index, const char *attr_name, long val), + TP_PROTO(int index, const char *attr_name, long long val), TP_ARGS(index, attr_name, val) ); -- cgit v1.2.3 From 2c92e2fbe9e22cefdae87d8a0d654691ee4c1957 Mon Sep 17 00:00:00 2001 From: Joris Verhaegen Date: Fri, 5 Sep 2025 10:12:54 +0100 Subject: ALSA: compress_offload: Add 64-bit safe timestamp infrastructure The copied_total field in struct snd_compr_tstamp is a 32-bit value that can overflow on long-running high-bitrate streams, leading to incorrect calculations for buffer availablility. This patch adds a 64-bit safe timestamping mechanism. A new UAPI struct, snd_compr_tstamp64, is added which uses 64-bit types for byte counters. The relevant ops structures across the ASoC and core compress code are updated to use this new struct. ASoC drivers are updated to use u64 counters. Internal timestamps being u64 now, a compatibility function is added to convert the 64-bit timestamp back to the 32-bit format for legacy ioctl callers. Reviewed-by: Miller Liang Tested-by: Joris Verhaegen Signed-off-by: Joris Verhaegen Reviewed-by: Srinivas Kandagatla Reviewed-by: Charles Keepax Acked-by: Mark Brown Acked-by: Vinod Koul Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250905091301.2711705-2-verhaegen@google.com --- include/sound/compress_driver.h | 2 +- include/sound/soc-component.h | 4 ++-- include/sound/soc-dai.h | 7 ++++--- include/uapi/sound/compress_offload.h | 19 +++++++++++++++++++ 4 files changed, 26 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/sound/compress_driver.h b/include/sound/compress_driver.h index b55c9eeb2b54..9e3d801e45ec 100644 --- a/include/sound/compress_driver.h +++ b/include/sound/compress_driver.h @@ -161,7 +161,7 @@ struct snd_compr_ops { struct snd_compr_metadata *metadata); int (*trigger)(struct snd_compr_stream *stream, int cmd); int (*pointer)(struct snd_compr_stream *stream, - struct snd_compr_tstamp *tstamp); + struct snd_compr_tstamp64 *tstamp); int (*copy)(struct snd_compr_stream *stream, char __user *buf, size_t count); int (*mmap)(struct snd_compr_stream *stream, diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h index 2caa807c6249..cdb536c4ab2b 100644 --- a/include/sound/soc-component.h +++ b/include/sound/soc-component.h @@ -47,7 +47,7 @@ struct snd_compress_ops { struct snd_compr_stream *stream, int cmd); int (*pointer)(struct snd_soc_component *component, struct snd_compr_stream *stream, - struct snd_compr_tstamp *tstamp); + struct snd_compr_tstamp64 *tstamp); int (*copy)(struct snd_soc_component *component, struct snd_compr_stream *stream, char __user *buf, size_t count); @@ -498,7 +498,7 @@ int snd_soc_component_compr_get_codec_caps(struct snd_compr_stream *cstream, struct snd_compr_codec_caps *codec); int snd_soc_component_compr_ack(struct snd_compr_stream *cstream, size_t bytes); int snd_soc_component_compr_pointer(struct snd_compr_stream *cstream, - struct snd_compr_tstamp *tstamp); + struct snd_compr_tstamp64 *tstamp); int snd_soc_component_compr_copy(struct snd_compr_stream *cstream, char __user *buf, size_t count); int snd_soc_component_compr_set_metadata(struct snd_compr_stream *cstream, diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index 166c29557e9d..224396927aef 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -256,7 +256,7 @@ int snd_soc_dai_compr_ack(struct snd_soc_dai *dai, size_t bytes); int snd_soc_dai_compr_pointer(struct snd_soc_dai *dai, struct snd_compr_stream *cstream, - struct snd_compr_tstamp *tstamp); + struct snd_compr_tstamp64 *tstamp); int snd_soc_dai_compr_set_metadata(struct snd_soc_dai *dai, struct snd_compr_stream *cstream, struct snd_compr_metadata *metadata); @@ -383,8 +383,9 @@ struct snd_soc_cdai_ops { struct snd_compr_metadata *, struct snd_soc_dai *); int (*trigger)(struct snd_compr_stream *, int, struct snd_soc_dai *); - int (*pointer)(struct snd_compr_stream *, - struct snd_compr_tstamp *, struct snd_soc_dai *); + int (*pointer)(struct snd_compr_stream *stream, + struct snd_compr_tstamp64 *tstamp, + struct snd_soc_dai *dai); int (*ack)(struct snd_compr_stream *, size_t, struct snd_soc_dai *); }; diff --git a/include/uapi/sound/compress_offload.h b/include/uapi/sound/compress_offload.h index d62eb93af0ed..abd0ea3f86ee 100644 --- a/include/uapi/sound/compress_offload.h +++ b/include/uapi/sound/compress_offload.h @@ -56,6 +56,25 @@ struct snd_compr_tstamp { __u32 sampling_rate; } __attribute__((packed, aligned(4))); +/** + * struct snd_compr_tstamp64 - timestamp descriptor with fields in 64 bit + * @byte_offset: Byte offset in ring buffer to DSP + * @copied_total: Total number of bytes copied from/to ring buffer to/by DSP + * @pcm_frames: Frames decoded or encoded by DSP. This field will evolve by + * large steps and should only be used to monitor encoding/decoding + * progress. It shall not be used for timing estimates. + * @pcm_io_frames: Frames rendered or received by DSP into a mixer or an audio + * output/input. This field should be used for A/V sync or time estimates. + * @sampling_rate: sampling rate of audio + */ +struct snd_compr_tstamp64 { + __u32 byte_offset; + __u64 copied_total; + __u64 pcm_frames; + __u64 pcm_io_frames; + __u32 sampling_rate; +} __attribute__((packed, aligned(4))); + /** * struct snd_compr_avail - avail descriptor * @avail: Number of bytes available in ring buffer for writing/reading -- cgit v1.2.3 From f20a53974f79619d0ef6c9f17bb8693499fb6ebb Mon Sep 17 00:00:00 2001 From: Joris Verhaegen Date: Fri, 5 Sep 2025 10:12:55 +0100 Subject: ALSA: compress_offload: Add SNDRV_COMPRESS_TSTAMP64 ioctl The previous patch introduced the internal infrastructure for handling 64-bit timestamps. This patch exposes this capability to user-space. Define the new ioctl command SNDRV_COMPRESS_TSTAMP64, which allows applications to fetch the overflow-safe struct snd_compr_tstamp64. The ioctl dispatch table is updated to handle the new command by calling a new snd_compr_tstamp64 handler, while the legacy path is renamed to snd_compr_tstamp32 for clarity. This patch bumps the SNDRV_COMPRESS_VERSION to 0.4.0. Reviewed-by: Miller Liang Tested-by: Joris Verhaegen Signed-off-by: Joris Verhaegen Reviewed-by: Charles Keepax Acked-by: Mark Brown Acked-by: Vinod Koul Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250905091301.2711705-3-verhaegen@google.com --- include/uapi/sound/compress_offload.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/sound/compress_offload.h b/include/uapi/sound/compress_offload.h index abd0ea3f86ee..70b8921601f9 100644 --- a/include/uapi/sound/compress_offload.h +++ b/include/uapi/sound/compress_offload.h @@ -13,8 +13,7 @@ #include #include - -#define SNDRV_COMPRESS_VERSION SNDRV_PROTOCOL_VERSION(0, 3, 0) +#define SNDRV_COMPRESS_VERSION SNDRV_PROTOCOL_VERSION(0, 4, 0) /** * struct snd_compressed_buffer - compressed buffer * @fragment_size: size of buffer fragment in bytes @@ -208,6 +207,7 @@ struct snd_compr_task_status { * Note: only codec params can be changed runtime and stream params cant be * SNDRV_COMPRESS_GET_PARAMS: Query codec params * SNDRV_COMPRESS_TSTAMP: get the current timestamp value + * SNDRV_COMPRESS_TSTAMP64: get the current timestamp value in 64 bit format * SNDRV_COMPRESS_AVAIL: get the current buffer avail value. * This also queries the tstamp properties * SNDRV_COMPRESS_PAUSE: Pause the running stream @@ -230,6 +230,7 @@ struct snd_compr_task_status { struct snd_compr_metadata) #define SNDRV_COMPRESS_TSTAMP _IOR('C', 0x20, struct snd_compr_tstamp) #define SNDRV_COMPRESS_AVAIL _IOR('C', 0x21, struct snd_compr_avail) +#define SNDRV_COMPRESS_TSTAMP64 _IOR('C', 0x22, struct snd_compr_tstamp64) #define SNDRV_COMPRESS_PAUSE _IO('C', 0x30) #define SNDRV_COMPRESS_RESUME _IO('C', 0x31) #define SNDRV_COMPRESS_START _IO('C', 0x32) -- cgit v1.2.3 From 86eec88c5bddf9a57bfebe701d9c7a4d439aed9b Mon Sep 17 00:00:00 2001 From: Joris Verhaegen Date: Fri, 5 Sep 2025 10:12:56 +0100 Subject: ALSA: compress_offload: Add SNDRV_COMPRESS_AVAIL64 ioctl The previous patch introduced a 64-bit timestamp ioctl (SNDRV_COMPRESS_TSTAMP64). To provide a consistent API, this patch adds a corresponding 64-bit version of the SNDRV_COMPRESS_AVAIL ioctl. A new struct snd_compr_avail64 is added to the UAPI, which includes the 64-bit timestamp. The existing ioctl implementation is refactored to handle both the 32-bit and 64-bit variants. Reviewed-by: Miller Liang Tested-by: Joris Verhaegen Signed-off-by: Joris Verhaegen Acked-by: Vinod Koul Reviewed-by: Charles Keepax Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250905091301.2711705-4-verhaegen@google.com --- include/uapi/sound/compress_offload.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/uapi/sound/compress_offload.h b/include/uapi/sound/compress_offload.h index 70b8921601f9..26f756cc2e62 100644 --- a/include/uapi/sound/compress_offload.h +++ b/include/uapi/sound/compress_offload.h @@ -84,6 +84,16 @@ struct snd_compr_avail { struct snd_compr_tstamp tstamp; } __attribute__((packed, aligned(4))); +/** + * struct snd_compr_avail64 - avail descriptor with tstamp in 64 bit format + * @avail: Number of bytes available in ring buffer for writing/reading + * @tstamp: timestamp information + */ +struct snd_compr_avail64 { + __u64 avail; + struct snd_compr_tstamp64 tstamp; +} __attribute__((packed, aligned(4))); + enum snd_compr_direction { SND_COMPRESS_PLAYBACK = 0, SND_COMPRESS_CAPTURE, @@ -231,6 +241,7 @@ struct snd_compr_task_status { #define SNDRV_COMPRESS_TSTAMP _IOR('C', 0x20, struct snd_compr_tstamp) #define SNDRV_COMPRESS_AVAIL _IOR('C', 0x21, struct snd_compr_avail) #define SNDRV_COMPRESS_TSTAMP64 _IOR('C', 0x22, struct snd_compr_tstamp64) +#define SNDRV_COMPRESS_AVAIL64 _IOR('C', 0x23, struct snd_compr_avail64) #define SNDRV_COMPRESS_PAUSE _IO('C', 0x30) #define SNDRV_COMPRESS_RESUME _IO('C', 0x31) #define SNDRV_COMPRESS_START _IO('C', 0x32) -- cgit v1.2.3 From d364d2ad07873dc4991b2a631a8536597272418b Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 2 Sep 2025 13:59:11 +0200 Subject: devres: provide devm_kmemdup_const() Provide a function similar to devm_strdup_const() but for copying blocks of memory that are likely to be placed in .rodata. Reviewed-by: Andy Shevchenko Acked-by: Greg Kroah-Hartman Tested-by: Neil Armstrong Signed-off-by: Bartosz Golaszewski Signed-off-by: Linus Walleij --- include/linux/device/devres.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/device/devres.h b/include/linux/device/devres.h index ae696d10faff..8c5f57e0d613 100644 --- a/include/linux/device/devres.h +++ b/include/linux/device/devres.h @@ -80,6 +80,8 @@ void devm_kfree(struct device *dev, const void *p); void * __realloc_size(3) devm_kmemdup(struct device *dev, const void *src, size_t len, gfp_t gfp); +const void * +devm_kmemdup_const(struct device *dev, const void *src, size_t len, gfp_t gfp); static inline void *devm_kmemdup_array(struct device *dev, const void *src, size_t n, size_t size, gfp_t flags) { -- cgit v1.2.3 From 11aa02d6a9c222260490f952d041dec6d7f16a92 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 2 Sep 2025 13:59:22 +0200 Subject: pinctrl: allow to mark pin functions as requestable GPIOs The name of the pin function has no real meaning to pinctrl core and is there only for human readability of device properties. Some pins are muxed as GPIOs but for "strict" pinmuxers it's impossible to request them as GPIOs if they're bound to a devide - even if their function name explicitly says "gpio". Add a new field to struct pinfunction that allows to pass additional flags to pinctrl core. While we could go with a boolean "is_gpio" field, a flags field is more future-proof. If the PINFUNCTION_FLAG_GPIO is set for a given function, the pin muxed to it can be requested as GPIO even on strict pin controllers. Add a new callback to struct pinmux_ops - function_is_gpio() - that allows pinmux core to inspect a function and see if it's a GPIO one. Provide a generic implementation of this callback. Tested-by: Neil Armstrong Signed-off-by: Bartosz Golaszewski Signed-off-by: Linus Walleij --- include/linux/pinctrl/pinctrl.h | 14 ++++++++++++++ include/linux/pinctrl/pinmux.h | 2 ++ 2 files changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h index d138e1815645..1a8084e29405 100644 --- a/include/linux/pinctrl/pinctrl.h +++ b/include/linux/pinctrl/pinctrl.h @@ -11,6 +11,7 @@ #ifndef __LINUX_PINCTRL_PINCTRL_H #define __LINUX_PINCTRL_PINCTRL_H +#include #include struct device; @@ -206,16 +207,20 @@ extern int pinctrl_get_group_pins(struct pinctrl_dev *pctldev, const char *pin_group, const unsigned int **pins, unsigned int *num_pins); +#define PINFUNCTION_FLAG_GPIO BIT(0) + /** * struct pinfunction - Description about a function * @name: Name of the function * @groups: An array of groups for this function * @ngroups: Number of groups in @groups + * @flags: Additional pin function flags */ struct pinfunction { const char *name; const char * const *groups; size_t ngroups; + unsigned long flags; }; /* Convenience macro to define a single named pinfunction */ @@ -226,6 +231,15 @@ struct pinfunction { .ngroups = (_ngroups), \ } +/* Same as PINCTRL_PINFUNCTION() but for the GPIO category of functions */ +#define PINCTRL_GPIO_PINFUNCTION(_name, _groups, _ngroups) \ +(struct pinfunction) { \ + .name = (_name), \ + .groups = (_groups), \ + .ngroups = (_ngroups), \ + .flags = PINFUNCTION_FLAG_GPIO, \ + } + #if IS_ENABLED(CONFIG_OF) && IS_ENABLED(CONFIG_PINCTRL) extern struct pinctrl_dev *of_pinctrl_get(struct device_node *np); #else diff --git a/include/linux/pinctrl/pinmux.h b/include/linux/pinctrl/pinmux.h index d6f7b58d6ad0..6db6c3e1ccc2 100644 --- a/include/linux/pinctrl/pinmux.h +++ b/include/linux/pinctrl/pinmux.h @@ -66,6 +66,8 @@ struct pinmux_ops { unsigned int selector, const char * const **groups, unsigned int *num_groups); + bool (*function_is_gpio) (struct pinctrl_dev *pctldev, + unsigned int selector); int (*set_mux) (struct pinctrl_dev *pctldev, unsigned int func_selector, unsigned int group_selector); int (*gpio_request_enable) (struct pinctrl_dev *pctldev, -- cgit v1.2.3 From 203a83112e097a501fbe12722b6342787497efe0 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 5 Sep 2025 11:21:50 +0200 Subject: pinctrl: generic: rename PIN_CONFIG_OUTPUT to LEVEL This generic pin config property is confusingly named so let's rename it to make things clearer. There are already drivers in the tree that use PIN_CONFIG_OUTPUT to *read* the value of an output driven pin, which is a big semantic confusion for the head: are we then reading the setting of the output or the actual value/level that is put out on the pin? We already have PIN_CONFIG_OUTPUT_ENABLE that turns on driver buffers for output, so this can by logical conclusion only drive the voltage level if it should be any different. But if we read the pin, are we then reading the *setting* of the output value or the *actual* value we can see on the line? If the pin has not first been set into output mode with PIN_CONFIG_OUTPUT_ENABLE, but is instead in some input mode or tristate, what will reading this property actually return? Reading the current users reading this property it is clear that what we read is the logical level of the pin as 0 or 1 depending on if it is low or high. Rename it to PIN_CONFIG_LEVEL so it is crystal clear that we set or read the voltage level of the pin and nothing else. Acked-by: Sudeep Holla Signed-off-by: Linus Walleij --- include/linux/pinctrl/pinconf-generic.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h index 1bcf071b860e..d9245ecec71d 100644 --- a/include/linux/pinctrl/pinconf-generic.h +++ b/include/linux/pinctrl/pinconf-generic.h @@ -88,9 +88,13 @@ struct pinctrl_map; * passed in the argument on a custom form, else just use argument 1 * to indicate low power mode, argument 0 turns low power mode off. * @PIN_CONFIG_MODE_PWM: this will configure the pin for PWM - * @PIN_CONFIG_OUTPUT: this will configure the pin as an output and drive a - * value on the line. Use argument 1 to indicate high level, argument 0 to - * indicate low level. (Please see Documentation/driver-api/pin-control.rst, + * @PIN_CONFIG_LEVEL: setting this will configure the pin as an output and + * drive a value on the line. Use argument 1 to indicate high level, + * argument 0 to indicate low level. Conversely the value of the line + * can be read using this parameter, if and only if that value can be + * represented as a binary 0 or 1 where 0 indicate a low voltage level + * and 1 indicate a high voltage level. + * (Please see Documentation/driver-api/pin-control.rst, * section "GPIO mode pitfalls" for a discussion around this parameter.) * @PIN_CONFIG_OUTPUT_ENABLE: this will enable the pin's output mode * without driving a value there. For most platforms this reduces to @@ -137,7 +141,7 @@ enum pin_config_param { PIN_CONFIG_INPUT_SCHMITT_UV, PIN_CONFIG_MODE_LOW_POWER, PIN_CONFIG_MODE_PWM, - PIN_CONFIG_OUTPUT, + PIN_CONFIG_LEVEL, PIN_CONFIG_OUTPUT_ENABLE, PIN_CONFIG_OUTPUT_IMPEDANCE_OHMS, PIN_CONFIG_PERSIST_STATE, -- cgit v1.2.3 From 5f3cec21f6d57336a2cbfa9ee428ac66c77a7211 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 29 Aug 2025 20:46:01 +0300 Subject: overflow: add range_overflows() and range_end_overflows() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the range_overflows() and range_end_overflows() along with the _t variants over from drm/i915 and drm/buddy to overflow.h. Cc: Kees Cook Cc: "Gustavo A. R. Silva" Cc: linux-hardening@vger.kernel.org Reviewed-by: Kees Cook Reviewed-by: Jouni Högander Acked-by: Thomas Zimmermann Link: https://lore.kernel.org/r/20250829174601.2163064-3-jani.nikula@intel.com Signed-off-by: Jani Nikula --- include/drm/drm_buddy.h | 9 ------- include/linux/overflow.h | 70 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h index 9689a7c5dd36..236971681514 100644 --- a/include/drm/drm_buddy.h +++ b/include/drm/drm_buddy.h @@ -13,15 +13,6 @@ #include -#define range_overflows(start, size, max) ({ \ - typeof(start) start__ = (start); \ - typeof(size) size__ = (size); \ - typeof(max) max__ = (max); \ - (void)(&start__ == &size__); \ - (void)(&start__ == &max__); \ - start__ >= max__ || size__ > max__ - start__; \ -}) - #define DRM_BUDDY_RANGE_ALLOCATION BIT(0) #define DRM_BUDDY_TOPDOWN_ALLOCATION BIT(1) #define DRM_BUDDY_CONTIGUOUS_ALLOCATION BIT(2) diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 154ed0dbb43f..725f95f7e416 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -238,6 +238,76 @@ static inline bool __must_check __must_check_overflow(bool overflow) __overflows_type_constexpr(n, T), \ __overflows_type(n, T)) +/** + * range_overflows() - Check if a range is out of bounds + * @start: Start of the range. + * @size: Size of the range. + * @max: Exclusive upper boundary. + * + * A strict check to determine if the range [@start, @start + @size) is + * invalid with respect to the allowable range [0, @max). Any range + * starting at or beyond @max is considered an overflow, even if @size is 0. + * + * Returns: true if the range is out of bounds. + */ +#define range_overflows(start, size, max) ({ \ + typeof(start) start__ = (start); \ + typeof(size) size__ = (size); \ + typeof(max) max__ = (max); \ + (void)(&start__ == &size__); \ + (void)(&start__ == &max__); \ + start__ >= max__ || size__ > max__ - start__; \ +}) + +/** + * range_overflows_t() - Check if a range is out of bounds + * @type: Data type to use. + * @start: Start of the range. + * @size: Size of the range. + * @max: Exclusive upper boundary. + * + * Same as range_overflows() but forcing the parameters to @type. + * + * Returns: true if the range is out of bounds. + */ +#define range_overflows_t(type, start, size, max) \ + range_overflows((type)(start), (type)(size), (type)(max)) + +/** + * range_end_overflows() - Check if a range's endpoint is out of bounds + * @start: Start of the range. + * @size: Size of the range. + * @max: Exclusive upper boundary. + * + * Checks only if the endpoint of a range (@start + @size) exceeds @max. + * Unlike range_overflows(), a zero-sized range at the boundary (@start == @max) + * is not considered an overflow. Useful for iterator-style checks. + * + * Returns: true if the endpoint exceeds the boundary. + */ +#define range_end_overflows(start, size, max) ({ \ + typeof(start) start__ = (start); \ + typeof(size) size__ = (size); \ + typeof(max) max__ = (max); \ + (void)(&start__ == &size__); \ + (void)(&start__ == &max__); \ + start__ > max__ || size__ > max__ - start__; \ +}) + +/** + * range_end_overflows_t() - Check if a range's endpoint is out of bounds + * @type: Data type to use. + * @start: Start of the range. + * @size: Size of the range. + * @max: Exclusive upper boundary. + * + * Same as range_end_overflows() but forcing the parameters to @type. + * + * Returns: true if the endpoint exceeds the boundary. + */ +#define range_end_overflows_t(type, start, size, max) \ + range_end_overflows((type)(start), (type)(size), (type)(max)) + /** * castable_to_type - like __same_type(), but also allows for casted literals * -- cgit v1.2.3 From 3ad2a7b9b15d5072139a20be84adb36776eb6c9b Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 5 Jun 2025 16:23:57 -0700 Subject: hwmon: Serialize accesses in hwmon core Implement locking in the hardware monitoring core for drivers using the _with_info() API functions. Most hardware monitoring drivers need to support locking to protect against parallel accesses from userspace. With older API functions, such locking had to be implemented in the driver code since sysfs attributes were created by the driver. However, the _with_info() API creates sysfs attributes in the hardware monitoring core. This makes it easy to move the locking primitives into that code. This has the benefit of simplifying driver code while at the same time reducing the risk of incomplete of bad locking implementations in hardware monitoring drivers. While this means that all accesses are forced to be synchronized, this has little if any practical impact since accesses are expected to be low frequency and are typically synchronized from userspace anyway since only a single process is accessing the data. On top of that, many drivers use regmap, which also has its own locking scheme and already serializes accesses. Signed-off-by: Guenter Roeck --- include/linux/hwmon.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h index 886fc90b2d25..301a83afbd66 100644 --- a/include/linux/hwmon.h +++ b/include/linux/hwmon.h @@ -492,6 +492,9 @@ int hwmon_notify_event(struct device *dev, enum hwmon_sensor_types type, char *hwmon_sanitize_name(const char *name); char *devm_hwmon_sanitize_name(struct device *dev, const char *name); +void hwmon_lock(struct device *dev); +void hwmon_unlock(struct device *dev); + /** * hwmon_is_bad_char - Is the char invalid in a hwmon name * @ch: the char to be considered -- cgit v1.2.3 From ad0d05dbddc1bf86e92220fea873176de6b12f78 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 30 Aug 2025 10:18:21 +0800 Subject: blk-mq: Defer freeing of tags page_list to SRCU callback Tag iterators can race with the freeing of the request pages(tags->page_list), potentially leading to use-after-free issues. Defer the freeing of the page list and the tags structure itself until after an SRCU grace period has passed. This ensures that any concurrent tag iterators have completed before the memory is released. With this way, we can replace the big tags->lock in tags iterator code path with srcu for solving the issue. This is achieved by: - Adding a new `srcu_struct tags_srcu` to `blk_mq_tag_set` to protect tag map iteration. - Adding an `rcu_head` to `struct blk_mq_tags` to be used with `call_srcu`. - Moving the page list freeing logic and the `kfree(tags)` call into a new callback function, `blk_mq_free_tags_callback`. - In `blk_mq_free_tags`, invoking `call_srcu` to schedule the new callback for deferred execution. The read-side protection for the tag iterators will be added in a subsequent patch. Reviewed-by: Hannes Reinecke Reviewed-by: Yu Kuai Signed-off-by: Ming Lei Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 2a5a828f19a0..1325ceeb743a 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -531,6 +531,7 @@ struct blk_mq_tag_set { struct mutex tag_list_lock; struct list_head tag_list; struct srcu_struct *srcu; + struct srcu_struct tags_srcu; struct rw_semaphore update_nr_hwq_lock; }; @@ -767,6 +768,7 @@ struct blk_mq_tags { * request pool */ spinlock_t lock; + struct rcu_head rcu_head; }; static inline struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, -- cgit v1.2.3 From c265ae75f900cea4e415230a77b5d152377627dd Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 8 Sep 2025 00:03:00 +0100 Subject: io_uring: introduce io_uring querying There are many parameters users might want to query about io_uring like available request types or the ring sizes. This patch introduces an interface for such slow path queries. It was written with several requirements in mind: - Can be used with or without an io_uring instance. Asking for supported setup flags before creating an instance as well as qeurying info about an already created ring are valid use cases. - Should be moderately fast. For example, users might use it to periodically retrieve ring attributes at runtime. As a consequence, it should be able to query multiple attributes in a single syscall. - Backward and forward compatible. - Should be reasobably easy to use. - Reduce the kernel code size for introducing new query types. It's implemented as a new registration opcode IORING_REGISTER_QUERY. The user passes one or more query strutctures linked together, each represented by struct io_uring_query_hdr. The header stores common control fields needed for processing and points to query type specific information. The header contains - The query type - The result field, which on return contains the error code for the query - Pointer to the query type specific information - The size of the query structure. The kernel will only populate up to the size, which helps with backward compatibility. The kernel can also reduce the size, so if the current kernel is older than the inteface the user tries to use, it'll get only the supported bits. - next_entry field is used to chain multiple queries. Apart from common registeration syscall failures, it can only immediately return an error code in case when the headers are incorrect or any other addresses and invalid. That usually mean that the userspace doesn't use the API right and should be corrected. All query type specific errors are returned in the header's result field. As an example, the patch adds a single query type for now, i.e. IO_URING_QUERY_OPCODES, which tells what register / request / etc. opcodes are supported, but there are particular plans to extend it. Note: there is a request probing interface via IORING_REGISTER_PROBE, but it's a mess. It requires the user to create a ring first, it only works for requests, and requires dynamic allocations. Reviewed-by: Martin K. Petersen Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 3 +++ include/uapi/linux/io_uring/query.h | 41 +++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 include/uapi/linux/io_uring/query.h (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 04ebff33d0e6..1ce17c535944 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -686,6 +686,9 @@ enum io_uring_register_op { IORING_REGISTER_MEM_REGION = 34, + /* query various aspects of io_uring, see linux/io_uring/query.h */ + IORING_REGISTER_QUERY = 35, + /* this goes last */ IORING_REGISTER_LAST, diff --git a/include/uapi/linux/io_uring/query.h b/include/uapi/linux/io_uring/query.h new file mode 100644 index 000000000000..5d754322a27c --- /dev/null +++ b/include/uapi/linux/io_uring/query.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */ +/* + * Header file for the io_uring query interface. + */ +#ifndef LINUX_IO_URING_QUERY_H +#define LINUX_IO_URING_QUERY_H + +#include + +struct io_uring_query_hdr { + __u64 next_entry; + __u64 query_data; + __u32 query_op; + __u32 size; + __s32 result; + __u32 __resv[3]; +}; + +enum { + IO_URING_QUERY_OPCODES = 0, + + __IO_URING_QUERY_MAX, +}; + +/* Doesn't require a ring */ +struct io_uring_query_opcode { + /* The number of supported IORING_OP_* opcodes */ + __u32 nr_request_opcodes; + /* The number of supported IORING_[UN]REGISTER_* opcodes */ + __u32 nr_register_opcodes; + /* Bitmask of all supported IORING_FEAT_* flags */ + __u64 feature_flags; + /* Bitmask of all supported IORING_SETUP_* flags */ + __u64 ring_setup_flags; + /* Bitmask of all supported IORING_ENTER_** flags */ + __u64 enter_flags; + /* Bitmask of all supported IOSQE_* flags */ + __u64 sqe_flags; +}; + +#endif -- cgit v1.2.3 From 473efbc3ca29f725abfb7b323798df596ef41f3e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 8 Sep 2025 08:18:15 -0600 Subject: io_uring/uring_cmd: fix __io_uring_cmd_do_in_task !CONFIG_IO_URING typo A manual application of this patch resulted in a typo for the stub function __io_uring_cmd_do_in_task(), for the case where CONFIG_IO_URING isn't true. Fix that up. Reported-by: Klara Modin Fixes: df3a7762ee24 ("io_uring/uring_cmd: add io_uring_cmd_tw_t type alias") Signed-off-by: Jens Axboe --- include/linux/io_uring/cmd.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h index 50dd6a53cb5e..1350af846ddd 100644 --- a/include/linux/io_uring/cmd.h +++ b/include/linux/io_uring/cmd.h @@ -109,8 +109,7 @@ static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, s32 ret, { } static inline void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd, - io_uring_tw_t task_work_cb, - unsigned flags) + io_uring_cmd_tw_t task_work_cb, unsigned flags) { } static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd, -- cgit v1.2.3 From 34c605fe53d49886d2741223b12950a33bdf2acf Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 26 Aug 2025 16:56:06 +0200 Subject: xen: rework xen_pv_domain() Rework xen_pv_domain() to no longer use the xen_domain_type variable, but the artificial X86_FEATURE_XENPV cpu feature. On non-x86 architectures xen_pv_domain() can be defined as "0". This has the advantage that a kernel not built with CONFIG_XEN_PV will be smaller due to dead code elimination. Set the X86_FEATURE_XENPV feature very early, as xen_pv_domain() is used rather early, too. Reviewed-by: Jason Andryuk Signed-off-by: Juergen Gross Message-ID: <20250826145608.10352-2-jgross@suse.com> --- include/xen/xen.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/xen/xen.h b/include/xen/xen.h index a1e5b3f18d69..61854e3f2837 100644 --- a/include/xen/xen.h +++ b/include/xen/xen.h @@ -22,8 +22,15 @@ extern bool xen_pvh; #define xen_pvh 0 #endif +#ifdef CONFIG_X86 +#include + +#define xen_pv_domain() (cpu_feature_enabled(X86_FEATURE_XENPV)) +#else +#define xen_pv_domain() 0 +#endif + #define xen_domain() (xen_domain_type != XEN_NATIVE) -#define xen_pv_domain() (xen_domain_type == XEN_PV_DOMAIN) #define xen_hvm_domain() (xen_domain_type == XEN_HVM_DOMAIN) #define xen_pvh_domain() (xen_pvh) -- cgit v1.2.3 From 0f4283123fe1e6016296048d0fdcfce615047a13 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 26 Aug 2025 16:56:07 +0200 Subject: xen: replace XENFEAT_auto_translated_physmap with xen_pv_domain() Instead of testing the XENFEAT_auto_translated_physmap feature, just use !xen_pv_domain() which is equivalent. This has the advantage that a kernel not built with CONFIG_XEN_PV will be smaller due to dead code elimination. Reviewed-by: Jason Andryuk Signed-off-by: Juergen Gross Message-ID: <20250826145608.10352-3-jgross@suse.com> --- include/xen/grant_table.h | 4 ++-- include/xen/mem-reservation.h | 4 ++-- include/xen/xen-ops.h | 7 ++++--- 3 files changed, 8 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index e279be353e3f..69ac6d80a006 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -164,7 +164,7 @@ gnttab_set_map_op(struct gnttab_map_grant_ref *map, phys_addr_t addr, { if (flags & GNTMAP_contains_pte) map->host_addr = addr; - else if (xen_feature(XENFEAT_auto_translated_physmap)) + else if (!xen_pv_domain()) map->host_addr = __pa(addr); else map->host_addr = addr; @@ -181,7 +181,7 @@ gnttab_set_unmap_op(struct gnttab_unmap_grant_ref *unmap, phys_addr_t addr, { if (flags & GNTMAP_contains_pte) unmap->host_addr = addr; - else if (xen_feature(XENFEAT_auto_translated_physmap)) + else if (!xen_pv_domain()) unmap->host_addr = __pa(addr); else unmap->host_addr = addr; diff --git a/include/xen/mem-reservation.h b/include/xen/mem-reservation.h index a2ab516fcd2c..3cbe3df0dfd4 100644 --- a/include/xen/mem-reservation.h +++ b/include/xen/mem-reservation.h @@ -39,7 +39,7 @@ static inline void xenmem_reservation_va_mapping_update(unsigned long count, xen_pfn_t *frames) { #ifdef CONFIG_XEN_HAVE_PVMMU - if (!xen_feature(XENFEAT_auto_translated_physmap)) + if (xen_pv_domain()) __xenmem_reservation_va_mapping_update(count, pages, frames); #endif } @@ -48,7 +48,7 @@ static inline void xenmem_reservation_va_mapping_reset(unsigned long count, struct page **pages) { #ifdef CONFIG_XEN_HAVE_PVMMU - if (!xen_feature(XENFEAT_auto_translated_physmap)) + if (xen_pv_domain()) __xenmem_reservation_va_mapping_reset(count, pages); #endif } diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 9e2a769b0d96..496e6013c689 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -116,7 +117,7 @@ static inline int xen_remap_domain_gfn_array(struct vm_area_struct *vma, unsigned int domid, struct page **pages) { - if (xen_feature(XENFEAT_auto_translated_physmap)) + if (!xen_pv_domain()) return xen_xlate_remap_gfn_array(vma, addr, gfn, nr, err_ptr, prot, domid, pages); @@ -150,7 +151,7 @@ static inline int xen_remap_domain_mfn_array(struct vm_area_struct *vma, int nr, int *err_ptr, pgprot_t prot, unsigned int domid) { - if (xen_feature(XENFEAT_auto_translated_physmap)) + if (!xen_pv_domain()) return -EOPNOTSUPP; return xen_remap_pfn(vma, addr, mfn, nr, err_ptr, prot, domid, @@ -175,7 +176,7 @@ static inline int xen_remap_domain_gfn_range(struct vm_area_struct *vma, pgprot_t prot, unsigned int domid, struct page **pages) { - if (xen_feature(XENFEAT_auto_translated_physmap)) + if (!xen_pv_domain()) return -EOPNOTSUPP; return xen_remap_pfn(vma, addr, &gfn, nr, NULL, prot, domid, false); -- cgit v1.2.3 From 9cf93a8fa9513c6d3cc65bdd50e05c1355cef322 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Wed, 20 Aug 2025 14:04:24 -0500 Subject: ipmi: Allow an SMI sender to return an error Getting ready for handling when a BMC is non-responsive or broken, allow the sender operation to fail in an SMI. If it was a user-generated message it will return the error. The powernv code was already doing this internally, but the way it was written could result in deep stack descent if there were a lot of messages queued. Have its send return an error in this case. Signed-off-by: Corey Minyard --- include/linux/ipmi_smi.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h index 5d69820d8b02..c2d975bbff60 100644 --- a/include/linux/ipmi_smi.h +++ b/include/linux/ipmi_smi.h @@ -109,8 +109,8 @@ struct ipmi_smi_msg { enum ipmi_smi_msg_type type; - long msgid; - void *user_data; + long msgid; + void *user_data; int data_size; unsigned char data[IPMI_MAX_MSG_LENGTH]; @@ -168,9 +168,11 @@ struct ipmi_smi_handlers { * are held when this is run. Message are delivered one at * a time by the message handler, a new message will not be * delivered until the previous message is returned. + * + * This can return an error if the SMI is not in a state where it + * can send a message. */ - void (*sender)(void *send_info, - struct ipmi_smi_msg *msg); + int (*sender)(void *send_info, struct ipmi_smi_msg *msg); /* * Called by the upper layer to request that we try to get -- cgit v1.2.3 From 3bc54ab3b9790ca92f197e9822e486665daa321c Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Wed, 20 Aug 2025 14:09:11 -0500 Subject: ipmi: Rename "user_data" to "recv_msg" in an SMI message It's only used to hold the corresponding receive message, so fix the name to make that clear and the type so nothing else can be accidentally assigned to it. Signed-off-by: Corey Minyard --- include/linux/ipmi_smi.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h index c2d975bbff60..892e2d656e1e 100644 --- a/include/linux/ipmi_smi.h +++ b/include/linux/ipmi_smi.h @@ -110,7 +110,8 @@ struct ipmi_smi_msg { enum ipmi_smi_msg_type type; long msgid; - void *user_data; + /* Response to this message, will be NULL if not from a user request. */ + struct ipmi_recv_msg *recv_msg; int data_size; unsigned char data[IPMI_MAX_MSG_LENGTH]; -- cgit v1.2.3 From 06aba2126b414248a34b13584f22a78787c95450 Mon Sep 17 00:00:00 2001 From: Alex Tran Date: Mon, 1 Sep 2025 11:40:08 -0700 Subject: ASoC: codecs: tlv320dac33: Remove unused struct tlv320dac33_platform_data and header file tlv320dac33-plat.h Remove the tlv320dac33_platform_data struct and header file tlv320dac33-plat.h as they are not used anywhere in the kernel or outside this driver. Signed-off-by: Alex Tran Message-ID: <20250901184008.1249535-3-alex.t.tran@gmail.com> Signed-off-by: Mark Brown --- include/sound/tlv320dac33-plat.h | 21 --------------------- 1 file changed, 21 deletions(-) delete mode 100644 include/sound/tlv320dac33-plat.h (limited to 'include') diff --git a/include/sound/tlv320dac33-plat.h b/include/sound/tlv320dac33-plat.h deleted file mode 100644 index 7a7249a896e3..000000000000 --- a/include/sound/tlv320dac33-plat.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Platform header for Texas Instruments TLV320DAC33 codec driver - * - * Author: Peter Ujfalusi - * - * Copyright: (C) 2009 Nokia Corporation - */ - -#ifndef __TLV320DAC33_PLAT_H -#define __TLV320DAC33_PLAT_H - -struct tlv320dac33_platform_data { - int power_gpio; - int mode1_latency; /* latency caused by the i2c writes in us */ - int auto_fifo_config; /* FIFO config based on the period size */ - int keep_bclk; /* Keep the BCLK running in FIFO modes */ - u8 burst_bclkdiv; -}; - -#endif /* __TLV320DAC33_PLAT_H */ -- cgit v1.2.3 From e465ad7ef57aa1ec4122fd5b34c182d59629cb91 Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Mon, 11 Aug 2025 08:48:08 -0400 Subject: clk: ti: dpll: convert from round_rate() to determine_rate() The round_rate() clk ops is deprecated, so migrate this driver from round_rate() to determine_rate(). Part of these changes were done using the Coccinelle semantic patch on the cover letter of this series, and the rest of the changes were manually done. omap4_dpll_regm4xen_round_rate() is now only called by omap4_dpll_regm4xen_determine_rate(), so let's merge that functionality into one function. This is needed for another cleanup to completely remove the round_rate() clk ops from the clk core. Tested-by: Anddreas Kemnade # OMAP3 GTA04, OMAP4 Panda Reviewed-by: Kevin Hilman Tested-by: Kevin Hilman Signed-off-by: Brian Masney --- include/linux/clk/ti.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index e656f63efdce..54a3fa370004 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -34,14 +34,14 @@ struct clk_omap_reg { * @clk_ref: struct clk_hw pointer to the clock's reference clock input * @control_reg: register containing the DPLL mode bitfield * @enable_mask: mask of the DPLL mode bitfield in @control_reg - * @last_rounded_rate: cache of the last rate result of omap2_dpll_round_rate() - * @last_rounded_m: cache of the last M result of omap2_dpll_round_rate() + * @last_rounded_rate: cache of the last rate result of omap2_dpll_determine_rate() + * @last_rounded_m: cache of the last M result of omap2_dpll_determine_rate() * @last_rounded_m4xen: cache of the last M4X result of - * omap4_dpll_regm4xen_round_rate() + * omap4_dpll_regm4xen_determine_rate() * @last_rounded_lpmode: cache of the last lpmode result of * omap4_dpll_lpmode_recalc() * @max_multiplier: maximum valid non-bypass multiplier value (actual) - * @last_rounded_n: cache of the last N result of omap2_dpll_round_rate() + * @last_rounded_n: cache of the last N result of omap2_dpll_determine_rate() * @min_divider: minimum valid non-bypass divider value (actual) * @max_divider: maximum valid non-bypass divider value (actual) * @max_rate: maximum clock rate for the DPLL -- cgit v1.2.3 From 17d370a70bae277678b6ea82d71ef5892e7aaa97 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 26 Aug 2025 17:54:55 +0200 Subject: xdp, libeth: make the xdp_init_buff() micro-optimization generic Often times the compilers are not able to expand two consecutive 32-bit writes into one 64-bit on the corresponding architectures. This applies to xdp_init_buff() called for every received frame (or at least once per each 64 frames when the frag size is fixed). Move the not-so-pretty hack from libeth_xdp straight to xdp_init_buff(), but using a proper union around ::frame_sz and ::flags. The optimization is limited to LE architectures due to the structure layout. One simple example from idpf with the XDP series applied (Clang 22-git, CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE => -O2): add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-27 (-27) Function old new delta idpf_vport_splitq_napi_poll 5076 5049 -27 The perf difference with XDP_DROP is around +0.8-1% which I see as more than satisfying. Suggested-by: Simon Horman Signed-off-by: Alexander Lobakin Tested-by: Ramu R Signed-off-by: Tony Nguyen --- include/net/libeth/xdp.h | 11 +---------- include/net/xdp.h | 28 +++++++++++++++++++++++++--- 2 files changed, 26 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/net/libeth/xdp.h b/include/net/libeth/xdp.h index f4880b50e804..bc3507edd589 100644 --- a/include/net/libeth/xdp.h +++ b/include/net/libeth/xdp.h @@ -1274,7 +1274,6 @@ bool libeth_xdp_buff_add_frag(struct libeth_xdp_buff *xdp, * Internal, use libeth_xdp_process_buff() instead. Initializes XDP buffer * head with the Rx buffer data: data pointer, length, headroom, and * truesize/tailroom. Zeroes the flags. - * Uses faster single u64 write instead of per-field access. */ static inline void libeth_xdp_prepare_buff(struct libeth_xdp_buff *xdp, const struct libeth_fqe *fqe, @@ -1282,17 +1281,9 @@ static inline void libeth_xdp_prepare_buff(struct libeth_xdp_buff *xdp, { const struct page *page = __netmem_to_page(fqe->netmem); -#ifdef __LIBETH_WORD_ACCESS - static_assert(offsetofend(typeof(xdp->base), flags) - - offsetof(typeof(xdp->base), frame_sz) == - sizeof(u64)); - - *(u64 *)&xdp->base.frame_sz = fqe->truesize; -#else - xdp_init_buff(&xdp->base, fqe->truesize, xdp->base.rxq); -#endif xdp_prepare_buff(&xdp->base, page_address(page) + fqe->offset, pp_page_to_nmdesc(page)->pp->p.offset, len, true); + xdp_init_buff(&xdp->base, fqe->truesize, xdp->base.rxq); } /** diff --git a/include/net/xdp.h b/include/net/xdp.h index b40f1f96cb11..af60e11b336c 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -85,8 +85,20 @@ struct xdp_buff { void *data_hard_start; struct xdp_rxq_info *rxq; struct xdp_txq_info *txq; - u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/ - u32 flags; /* supported values defined in xdp_buff_flags */ + + union { + struct { + /* frame size to deduce data_hard_end/tailroom */ + u32 frame_sz; + /* supported values defined in xdp_buff_flags */ + u32 flags; + }; + +#ifdef __LITTLE_ENDIAN + /* Used to micro-optimize xdp_init_buff(), don't use directly */ + u64 frame_sz_flags_init; +#endif + }; }; static __always_inline bool xdp_buff_has_frags(const struct xdp_buff *xdp) @@ -118,9 +130,19 @@ static __always_inline void xdp_buff_set_frag_pfmemalloc(struct xdp_buff *xdp) static __always_inline void xdp_init_buff(struct xdp_buff *xdp, u32 frame_sz, struct xdp_rxq_info *rxq) { - xdp->frame_sz = frame_sz; xdp->rxq = rxq; + +#ifdef __LITTLE_ENDIAN + /* + * Force the compilers to initialize ::flags and assign ::frame_sz with + * one write on 64-bit LE architectures as they're often unable to do + * it themselves. + */ + xdp->frame_sz_flags_init = frame_sz; +#else + xdp->frame_sz = frame_sz; xdp->flags = 0; +#endif } static __always_inline void -- cgit v1.2.3 From 3f5952917498e7bb9d227812d4349668f62c413b Mon Sep 17 00:00:00 2001 From: Per Larsen Date: Wed, 20 Aug 2025 01:10:09 +0000 Subject: KVM: arm64: Mask response to FFA_FEATURE call The minimum size and alignment boundary for FFA_RXTX_MAP is returned in bit[1:0]. Mask off any other bits in w2 when reading the minimum buffer size in hyp_ffa_post_init. Acked-by: Will Deacon Signed-off-by: Per Larsen Signed-off-by: Marc Zyngier --- include/linux/arm_ffa.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index e1634897e159..cd7ee4df9045 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -128,6 +128,7 @@ #define FFA_FEAT_RXTX_MIN_SZ_4K 0 #define FFA_FEAT_RXTX_MIN_SZ_64K 1 #define FFA_FEAT_RXTX_MIN_SZ_16K 2 +#define FFA_FEAT_RXTX_MIN_SZ_MASK GENMASK(1, 0) /* FFA Bus/Device/Driver related */ struct ffa_device { -- cgit v1.2.3 From 6606c8c7e81886565f5cbdb0c0ce82e280c2b229 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 4 Aug 2025 09:43:58 -0700 Subject: bitops: Add __attribute_const__ to generic ffs()-family implementations While tracking down a problem where constant expressions used by BUILD_BUG_ON() suddenly stopped working[1], we found that an added static initializer was convincing the compiler that it couldn't track the state of the prior statically initialized value. Tracing this down found that ffs() was used in the initializer macro, but since it wasn't marked with __attribute__const__, the compiler had to assume the function might change variable states as a side-effect (which is not true for ffs(), which provides deterministic math results). Add missing __attribute_const__ annotations to generic implementations of ffs(), __ffs(), fls(), and __fls() functions. These are pure mathematical functions that always return the same result for the same input with no side effects, making them eligible for compiler optimization. Build tested with x86_64 defconfig using GCC 14.2.0, which should validate the implementations when used by ARM, ARM64, LoongArch, Microblaze, NIOS2, and SPARC32 architectures. Link: https://github.com/KSPP/linux/issues/364 [1] Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250804164417.1612371-2-kees@kernel.org Signed-off-by: Kees Cook --- include/asm-generic/bitops/__ffs.h | 2 +- include/asm-generic/bitops/__fls.h | 2 +- include/asm-generic/bitops/builtin-__ffs.h | 2 +- include/asm-generic/bitops/builtin-__fls.h | 2 +- include/asm-generic/bitops/builtin-fls.h | 2 +- include/asm-generic/bitops/ffs.h | 2 +- include/asm-generic/bitops/fls.h | 2 +- include/asm-generic/bitops/fls64.h | 4 ++-- include/linux/bitops.h | 2 +- 9 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/asm-generic/bitops/__ffs.h b/include/asm-generic/bitops/__ffs.h index 2d08c750c8a7..3a899c626fdc 100644 --- a/include/asm-generic/bitops/__ffs.h +++ b/include/asm-generic/bitops/__ffs.h @@ -10,7 +10,7 @@ * * Undefined if no bit exists, so code should check against 0 first. */ -static __always_inline unsigned int generic___ffs(unsigned long word) +static __always_inline __attribute_const__ unsigned int generic___ffs(unsigned long word) { unsigned int num = 0; diff --git a/include/asm-generic/bitops/__fls.h b/include/asm-generic/bitops/__fls.h index e974ec932ec1..35f33780ca6c 100644 --- a/include/asm-generic/bitops/__fls.h +++ b/include/asm-generic/bitops/__fls.h @@ -10,7 +10,7 @@ * * Undefined if no set bit exists, so code should check against 0 first. */ -static __always_inline unsigned int generic___fls(unsigned long word) +static __always_inline __attribute_const__ unsigned int generic___fls(unsigned long word) { unsigned int num = BITS_PER_LONG - 1; diff --git a/include/asm-generic/bitops/builtin-__ffs.h b/include/asm-generic/bitops/builtin-__ffs.h index cf4b3d33bf96..d3c3f567045d 100644 --- a/include/asm-generic/bitops/builtin-__ffs.h +++ b/include/asm-generic/bitops/builtin-__ffs.h @@ -8,7 +8,7 @@ * * Undefined if no bit exists, so code should check against 0 first. */ -static __always_inline unsigned int __ffs(unsigned long word) +static __always_inline __attribute_const__ unsigned int __ffs(unsigned long word) { return __builtin_ctzl(word); } diff --git a/include/asm-generic/bitops/builtin-__fls.h b/include/asm-generic/bitops/builtin-__fls.h index 6d72fc8a5259..7770c4f1bfcd 100644 --- a/include/asm-generic/bitops/builtin-__fls.h +++ b/include/asm-generic/bitops/builtin-__fls.h @@ -8,7 +8,7 @@ * * Undefined if no set bit exists, so code should check against 0 first. */ -static __always_inline unsigned int __fls(unsigned long word) +static __always_inline __attribute_const__ unsigned int __fls(unsigned long word) { return (sizeof(word) * 8) - 1 - __builtin_clzl(word); } diff --git a/include/asm-generic/bitops/builtin-fls.h b/include/asm-generic/bitops/builtin-fls.h index c8455cc28841..be707da8c7cd 100644 --- a/include/asm-generic/bitops/builtin-fls.h +++ b/include/asm-generic/bitops/builtin-fls.h @@ -9,7 +9,7 @@ * This is defined the same way as ffs. * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. */ -static __always_inline int fls(unsigned int x) +static __always_inline __attribute_const__ int fls(unsigned int x) { return x ? sizeof(x) * 8 - __builtin_clz(x) : 0; } diff --git a/include/asm-generic/bitops/ffs.h b/include/asm-generic/bitops/ffs.h index 4c43f242daeb..5ff2b7fbda6d 100644 --- a/include/asm-generic/bitops/ffs.h +++ b/include/asm-generic/bitops/ffs.h @@ -10,7 +10,7 @@ * the libc and compiler builtin ffs routines, therefore * differs in spirit from ffz (man ffs). */ -static inline int generic_ffs(int x) +static inline __attribute_const__ int generic_ffs(int x) { int r = 1; diff --git a/include/asm-generic/bitops/fls.h b/include/asm-generic/bitops/fls.h index 26f3ce1dd6e4..8eed3437edb9 100644 --- a/include/asm-generic/bitops/fls.h +++ b/include/asm-generic/bitops/fls.h @@ -10,7 +10,7 @@ * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. */ -static __always_inline int generic_fls(unsigned int x) +static __always_inline __attribute_const__ int generic_fls(unsigned int x) { int r = 32; diff --git a/include/asm-generic/bitops/fls64.h b/include/asm-generic/bitops/fls64.h index 866f2b2304ff..b5f58dd261a3 100644 --- a/include/asm-generic/bitops/fls64.h +++ b/include/asm-generic/bitops/fls64.h @@ -16,7 +16,7 @@ * at position 64. */ #if BITS_PER_LONG == 32 -static __always_inline int fls64(__u64 x) +static __always_inline __attribute_const__ int fls64(__u64 x) { __u32 h = x >> 32; if (h) @@ -24,7 +24,7 @@ static __always_inline int fls64(__u64 x) return fls(x); } #elif BITS_PER_LONG == 64 -static __always_inline int fls64(__u64 x) +static __always_inline __attribute_const__ int fls64(__u64 x) { if (x == 0) return 0; diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 9be2d50da09a..ea7898cc5903 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -267,7 +267,7 @@ static inline int parity8(u8 val) * The result is not defined if no bits are set, so check that @word * is non-zero before calling this. */ -static inline unsigned int __ffs64(u64 word) +static inline __attribute_const__ unsigned int __ffs64(u64 word) { #if BITS_PER_LONG == 32 if (((u32)word) == 0UL) -- cgit v1.2.3 From 4cd661c248b6671914ad59e16760bb6d908dfc61 Mon Sep 17 00:00:00 2001 From: Nuno Das Neves Date: Wed, 13 Aug 2025 11:20:57 -0700 Subject: hyperv: Add missing field to hv_output_map_device_interrupt This field is unused, but the correct structure size is needed when computing the amount of space for the output argument to reside, so that it does not cross a page boundary. Signed-off-by: Nuno Das Neves Reviewed-by: Michael Kelley Signed-off-by: Wei Liu --- include/hyperv/hvhdk_mini.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/hyperv/hvhdk_mini.h b/include/hyperv/hvhdk_mini.h index 42e7876455b5..858f6a3925b3 100644 --- a/include/hyperv/hvhdk_mini.h +++ b/include/hyperv/hvhdk_mini.h @@ -301,6 +301,7 @@ struct hv_input_map_device_interrupt { /* HV_OUTPUT_MAP_DEVICE_INTERRUPT */ struct hv_output_map_device_interrupt { struct hv_interrupt_entry interrupt_entry; + u64 ext_status_deprecated[5]; } __packed; /* HV_INPUT_UNMAP_DEVICE_INTERRUPT */ -- cgit v1.2.3 From f26c9306dff818bbf4ef545c5a5ee0eca7149922 Mon Sep 17 00:00:00 2001 From: Nuno Das Neves Date: Tue, 2 Sep 2025 16:48:33 -0700 Subject: mshv: Add support for a new parent partition configuration Detect booting as an "L1VH" partition. This is a new scenario very similar to root partition where the mshv_root driver can be used to create and manage guest partitions. It mostly works the same as root partition, but there are some differences in how various features are handled. hv_l1vh_partition() is introduced to handle these cases. Add hv_parent_partition() which returns true for either case, replacing some hv_root_partition() checks. Signed-off-by: Nuno Das Neves Acked-by: Wei Liu Reviewed-by: Michael Kelley Signed-off-by: Wei Liu --- include/asm-generic/mshyperv.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index a729b77983fa..dbd4c2f3aee3 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -31,6 +31,7 @@ enum hv_partition_type { HV_PARTITION_TYPE_GUEST, HV_PARTITION_TYPE_ROOT, + HV_PARTITION_TYPE_L1VH, }; struct ms_hyperv_info { @@ -354,12 +355,22 @@ static inline bool hv_root_partition(void) { return hv_curr_partition_type == HV_PARTITION_TYPE_ROOT; } +static inline bool hv_l1vh_partition(void) +{ + return hv_curr_partition_type == HV_PARTITION_TYPE_L1VH; +} +static inline bool hv_parent_partition(void) +{ + return hv_root_partition() || hv_l1vh_partition(); +} int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages); int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id); int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags); #else /* CONFIG_MSHV_ROOT */ static inline bool hv_root_partition(void) { return false; } +static inline bool hv_l1vh_partition(void) { return false; } +static inline bool hv_parent_partition(void) { return false; } static inline int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages) { return -EOPNOTSUPP; -- cgit v1.2.3 From 2d0ddbb65cef99aab241378b0f4ff2d6ea8c3a5a Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 31 Aug 2025 09:04:06 -0700 Subject: Drivers: hv: Simplify data structures for VMBus channel close message struct vmbus_close_msg is used for sending the VMBus channel close message. It contains a struct vmbus_channel_msginfo, which has a flex array member at the end. The latter's presence in the middle of struct vmbus_close_msg causes warnings when built with -Wflex-array-member-not-at-end. But the struct vmbus_channel_msginfo is unused because the Hyper-V host does not send a response to the channel close message. So remove the struct vmbus_channel_msginfo. Then, since the only remaining field is struct vmbus_channel_close_channel, also remove the containing struct vmbus_close_msg and directly use struct vmbus_channel_close_channel. Besides eliminating unnecessary complexity, these changes resolve the -Wflex-array-member-not-at-end warnings. Signed-off-by: Michael Kelley Reviewed-by: Tianyu Lan Signed-off-by: Wei Liu --- include/linux/hyperv.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index a59c5c3e95fb..59826c89171c 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -707,11 +707,6 @@ struct vmbus_channel_msginfo { unsigned char msg[]; }; -struct vmbus_close_msg { - struct vmbus_channel_msginfo info; - struct vmbus_channel_close_channel msg; -}; - enum vmbus_device_type { HV_IDE = 0, HV_SCSI, @@ -800,7 +795,7 @@ struct vmbus_channel { struct hv_ring_buffer_info outbound; /* send to parent */ struct hv_ring_buffer_info inbound; /* receive from parent */ - struct vmbus_close_msg close_msg; + struct vmbus_channel_close_channel close_msg; /* Statistics */ u64 interrupts; /* Host to Guest interrupts */ -- cgit v1.2.3 From ceac1fb2290d230eb83aff3761058c559440de13 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 5 Sep 2025 16:58:06 +0000 Subject: ipv6: snmp: do not use SNMP_MIB_SENTINEL anymore Use ARRAY_SIZE(), so that we know the limit at compile time. Following patch needs this preliminary change. Signed-off-by: Eric Dumazet Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/20250905165813.1470708-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/ip.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 6dbd2bf8fa9c..a1624e8db1ab 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -338,6 +338,19 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o } \ } +#define snmp_get_cpu_field64_batch_cnt(buff64, stats_list, cnt, \ + mib_statistic, offset) \ +{ \ + int i, c; \ + for_each_possible_cpu(c) { \ + for (i = 0; i < cnt; i++) \ + buff64[i] += snmp_get_cpu_field64( \ + mib_statistic, \ + c, stats_list[i].entry, \ + offset); \ + } \ +} + #define snmp_get_cpu_field_batch(buff, stats_list, mib_statistic) \ { \ int i, c; \ @@ -349,6 +362,17 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o } \ } +#define snmp_get_cpu_field_batch_cnt(buff, stats_list, cnt, mib_statistic) \ +{ \ + int i, c; \ + for_each_possible_cpu(c) { \ + for (i = 0; i < cnt; i++) \ + buff[i] += snmp_get_cpu_field( \ + mib_statistic, \ + c, stats_list[i].entry); \ + } \ +} + static inline void inet_get_local_port_range(const struct net *net, int *low, int *high) { u32 range = READ_ONCE(net->ipv4.ip_local_ports.range); -- cgit v1.2.3 From 20d3d26815441d03a9a15114729faaa54957baba Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 5 Sep 2025 16:58:13 +0000 Subject: net: snmp: remove SNMP_MIB_SENTINEL No more user of SNMP_MIB_SENTINEL, we can remove it. Also remove snmp_get_cpu_field[64]_batch() helpers. Signed-off-by: Eric Dumazet Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/20250905165813.1470708-10-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/ip.h | 23 ----------------------- include/net/snmp.h | 5 ----- 2 files changed, 28 deletions(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index a1624e8db1ab..380afb691c41 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -326,18 +326,6 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o } #endif -#define snmp_get_cpu_field64_batch(buff64, stats_list, mib_statistic, offset) \ -{ \ - int i, c; \ - for_each_possible_cpu(c) { \ - for (i = 0; stats_list[i].name; i++) \ - buff64[i] += snmp_get_cpu_field64( \ - mib_statistic, \ - c, stats_list[i].entry, \ - offset); \ - } \ -} - #define snmp_get_cpu_field64_batch_cnt(buff64, stats_list, cnt, \ mib_statistic, offset) \ { \ @@ -351,17 +339,6 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o } \ } -#define snmp_get_cpu_field_batch(buff, stats_list, mib_statistic) \ -{ \ - int i, c; \ - for_each_possible_cpu(c) { \ - for (i = 0; stats_list[i].name; i++) \ - buff[i] += snmp_get_cpu_field( \ - mib_statistic, \ - c, stats_list[i].entry); \ - } \ -} - #define snmp_get_cpu_field_batch_cnt(buff, stats_list, cnt, mib_statistic) \ { \ int i, c; \ diff --git a/include/net/snmp.h b/include/net/snmp.h index 4cb4326dfebe..584e70742e9b 100644 --- a/include/net/snmp.h +++ b/include/net/snmp.h @@ -36,11 +36,6 @@ struct snmp_mib { .entry = _entry, \ } -#define SNMP_MIB_SENTINEL { \ - .name = NULL, \ - .entry = 0, \ -} - /* * We use unsigned longs for most mibs but u64 for ipstats. */ -- cgit v1.2.3 From 7ceb69ca82b1456a66783a1472d6e677e00065a1 Mon Sep 17 00:00:00 2001 From: Baojun Xu Date: Sat, 30 Aug 2025 14:14:58 +0800 Subject: ASoC: tas2781: Add tas2118, tas2x20, tas5825 support Add tas2020, tas2118, tas2120, tas2320, tas2570, tas2572, tas5825 tas5827 support in tas2781 driver. Tas2118, tas2x20, tas257x have no on-chip DSP, tas582x have on-chip DSP but have no calibration required stereo smart amplifier. Signed-off-by: Baojun Xu Acked-by: Mark Brown Signed-off-by: Takashi Iwai --- include/sound/tas2781.h | 14 ++- include/sound/tas2x20-tlv.h | 259 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 272 insertions(+), 1 deletion(-) create mode 100644 include/sound/tas2x20-tlv.h (limited to 'include') diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h index f0aefc04a957..ddd997ac3216 100644 --- a/include/sound/tas2781.h +++ b/include/sound/tas2781.h @@ -51,7 +51,9 @@ /* Software Reset, compatble with new device (TAS5825). */ #define TASDEVICE_REG_SWRESET TASDEVICE_REG(0x0, 0x0, 0x01) -#define TASDEVICE_REG_SWRESET_RESET (BIT(0) | BIT(4)) +#define TASDEVICE_REG_SWRESET_RESET BIT(0) + +#define TAS5825_REG_SWRESET_RESET (BIT(0) | BIT(4)) /* Checksum */ #define TASDEVICE_CHECKSUM_REG TASDEVICE_REG(0x0, 0x0, 0x7e) @@ -110,8 +112,17 @@ #define TAS2781_RUNTIME_RE_REG TASDEVICE_REG(0x64, 0x63, 0x44) enum audio_device { + TAS2020, + TAS2118, + TAS2120, + TAS2320, TAS2563, + TAS2570, + TAS2572, TAS2781, + TAS5825, + TAS5827, + TAS_OTHERS, }; enum dspbin_type { @@ -194,6 +205,7 @@ struct tasdevice_priv { unsigned char coef_binaryname[64]; unsigned char rca_binaryname[64]; unsigned char dev_name[32]; + const unsigned char (*dvc_tlv_table)[4]; const char *name_prefix; unsigned char ndev; unsigned int dspbin_typ; diff --git a/include/sound/tas2x20-tlv.h b/include/sound/tas2x20-tlv.h new file mode 100644 index 000000000000..6e6bcec4a0a1 --- /dev/null +++ b/include/sound/tas2x20-tlv.h @@ -0,0 +1,259 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// +// ALSA SoC Texas Instruments TAS2x20/TAS2118 Audio Smart Amplifier +// +// Copyright (C) 2025 Texas Instruments Incorporated +// https://www.ti.com +// +// The TAS2x20/TAS2118 hda driver implements for one, two, or even multiple +// TAS2x20/TAS2118 chips. +// +// Author: Baojun Xu +// + +#ifndef __TAS2X20_TLV_H__ +#define __TAS2X20_TLV_H__ + +#define TAS2X20_DVC_LEVEL TASDEVICE_REG(0x0, 0x2, 0x0c) +#define TAS2X20_AMP_LEVEL TASDEVICE_REG(0x0, 0x0, 0x07) + +static const __maybe_unused DECLARE_TLV_DB_SCALE(tas2x20_dvc_tlv, 1650, 50, 0); +static const __maybe_unused DECLARE_TLV_DB_SCALE(tas2x20_amp_tlv, 2100, 50, 0); + +/* pow(10, db/20) * pow(2,22) */ +static const __maybe_unused unsigned char tas2x20_dvc_table[][4] = { + { 0X00, 0X00, 0X0D, 0X00 }, /* -110.0db */ + { 0X00, 0X00, 0X0E, 0X00 }, /* -109.5db */ + { 0X00, 0X00, 0X0E, 0X00 }, /* -109.0db */ + { 0X00, 0X00, 0X0F, 0X00 }, /* -108.5db */ + { 0X00, 0X00, 0X10, 0X00 }, /* -108.0db */ + { 0X00, 0X00, 0X11, 0X00 }, /* -107.5db */ + { 0X00, 0X00, 0X12, 0X00 }, /* -107.0db */ + { 0X00, 0X00, 0X13, 0X00 }, /* -106.5db */ + { 0X00, 0X00, 0X15, 0X00 }, /* -106.0db */ + { 0X00, 0X00, 0X16, 0X00 }, /* -105.5db */ + { 0X00, 0X00, 0X17, 0X00 }, /* -105.0db */ + { 0X00, 0X00, 0X18, 0X00 }, /* -104.5db */ + { 0X00, 0X00, 0X1A, 0X00 }, /* -104.0db */ + { 0X00, 0X00, 0X1C, 0X00 }, /* -103.5db */ + { 0X00, 0X00, 0X1D, 0X00 }, /* -103.0db */ + { 0X00, 0X00, 0X1F, 0X00 }, /* -102.5db */ + { 0X00, 0X00, 0X21, 0X00 }, /* -102.0db */ + { 0X00, 0X00, 0X23, 0X00 }, /* -101.5db */ + { 0X00, 0X00, 0X25, 0X00 }, /* -101.0db */ + { 0X00, 0X00, 0X27, 0X00 }, /* -100.5db */ + { 0X00, 0X00, 0X29, 0X00 }, /* -100.0db */ + { 0X00, 0X00, 0X2C, 0X00 }, /* -99.5db */ + { 0X00, 0X00, 0X2F, 0X00 }, /* -99.0db */ + { 0X00, 0X00, 0X31, 0X00 }, /* -98.5db */ + { 0X00, 0X00, 0X34, 0X00 }, /* -98.0db */ + { 0X00, 0X00, 0X37, 0X00 }, /* -97.5db */ + { 0X00, 0X00, 0X3B, 0X00 }, /* -97.0db */ + { 0X00, 0X00, 0X3E, 0X00 }, /* -96.5db */ + { 0X00, 0X00, 0X42, 0X00 }, /* -96.0db */ + { 0X00, 0X00, 0X46, 0X00 }, /* -95.5db */ + { 0X00, 0X00, 0X4A, 0X00 }, /* -95.0db */ + { 0X00, 0X00, 0X4F, 0X00 }, /* -94.5db */ + { 0X00, 0X00, 0X53, 0X00 }, /* -94.0db */ + { 0X00, 0X00, 0X58, 0X00 }, /* -93.5db */ + { 0X00, 0X00, 0X5D, 0X00 }, /* -93.0db */ + { 0X00, 0X00, 0X63, 0X00 }, /* -92.5db */ + { 0X00, 0X00, 0X69, 0X00 }, /* -92.0db */ + { 0X00, 0X00, 0X6F, 0X00 }, /* -91.5db */ + { 0X00, 0X00, 0X76, 0X00 }, /* -91.0db */ + { 0X00, 0X00, 0X7D, 0X00 }, /* -90.5db */ + { 0X00, 0X00, 0X84, 0X00 }, /* -90.0db */ + { 0X00, 0X00, 0X8C, 0X00 }, /* -89.5db */ + { 0X00, 0X00, 0X94, 0X00 }, /* -89.0db */ + { 0X00, 0X00, 0X9D, 0X00 }, /* -88.5db */ + { 0X00, 0X00, 0XA6, 0X00 }, /* -88.0db */ + { 0X00, 0X00, 0XB0, 0X00 }, /* -87.5db */ + { 0X00, 0X00, 0XBB, 0X00 }, /* -87.0db */ + { 0X00, 0X00, 0XC6, 0X00 }, /* -86.5db */ + { 0X00, 0X00, 0XD2, 0X00 }, /* -86.0db */ + { 0X00, 0X00, 0XDE, 0X00 }, /* -85.5db */ + { 0X00, 0X00, 0XEB, 0X00 }, /* -85.0db */ + { 0X00, 0X00, 0XF9, 0X00 }, /* -84.5db */ + { 0X00, 0X01, 0X08, 0X00 }, /* -84.0db */ + { 0X00, 0X01, 0X18, 0X00 }, /* -83.5db */ + { 0X00, 0X01, 0X28, 0X00 }, /* -83.0db */ + { 0X00, 0X01, 0X3A, 0X00 }, /* -82.5db */ + { 0X00, 0X01, 0X4D, 0X00 }, /* -82.0db */ + { 0X00, 0X01, 0X60, 0X00 }, /* -81.5db */ + { 0X00, 0X01, 0X75, 0X00 }, /* -81.0db */ + { 0X00, 0X01, 0X8B, 0X00 }, /* -80.5db */ + { 0X00, 0X01, 0XA3, 0X00 }, /* -80.0db */ + { 0X00, 0X01, 0XBC, 0X00 }, /* -79.5db */ + { 0X00, 0X01, 0XD6, 0X00 }, /* -79.0db */ + { 0X00, 0X01, 0XF2, 0X00 }, /* -78.5db */ + { 0X00, 0X02, 0X10, 0X00 }, /* -78.0db */ + { 0X00, 0X02, 0X2F, 0X00 }, /* -77.5db */ + { 0X00, 0X02, 0X50, 0X00 }, /* -77.0db */ + { 0X00, 0X02, 0X73, 0X00 }, /* -76.5db */ + { 0X00, 0X02, 0X98, 0X00 }, /* -76.0db */ + { 0X00, 0X02, 0XC0, 0X00 }, /* -75.5db */ + { 0X00, 0X02, 0XE9, 0X00 }, /* -75.0db */ + { 0X00, 0X03, 0X16, 0X00 }, /* -74.5db */ + { 0X00, 0X03, 0X44, 0X00 }, /* -74.0db */ + { 0X00, 0X03, 0X76, 0X00 }, /* -73.5db */ + { 0X00, 0X03, 0XAA, 0X00 }, /* -73.0db */ + { 0X00, 0X03, 0XE2, 0X00 }, /* -72.5db */ + { 0X00, 0X04, 0X1D, 0X00 }, /* -72.0db */ + { 0X00, 0X04, 0X5B, 0X00 }, /* -71.5db */ + { 0X00, 0X04, 0X9E, 0X00 }, /* -71.0db */ + { 0X00, 0X04, 0XE4, 0X00 }, /* -70.5db */ + { 0X00, 0X05, 0X2E, 0X00 }, /* -70.0db */ + { 0X00, 0X05, 0X7C, 0X00 }, /* -69.5db */ + { 0X00, 0X05, 0XD0, 0X00 }, /* -69.0db */ + { 0X00, 0X06, 0X28, 0X00 }, /* -68.5db */ + { 0X00, 0X06, 0X85, 0X00 }, /* -68.0db */ + { 0X00, 0X06, 0XE8, 0X00 }, /* -67.5db */ + { 0X00, 0X07, 0X51, 0X00 }, /* -67.0db */ + { 0X00, 0X07, 0XC0, 0X00 }, /* -66.5db */ + { 0X00, 0X08, 0X36, 0X00 }, /* -66.0db */ + { 0X00, 0X08, 0XB2, 0X00 }, /* -65.5db */ + { 0X00, 0X09, 0X36, 0X00 }, /* -65.0db */ + { 0X00, 0X09, 0XC2, 0X00 }, /* -64.5db */ + { 0X00, 0X0A, 0X56, 0X00 }, /* -64.0db */ + { 0X00, 0X0A, 0XF3, 0X00 }, /* -63.5db */ + { 0X00, 0X0B, 0X99, 0X00 }, /* -63.0db */ + { 0X00, 0X0C, 0X49, 0X00 }, /* -62.5db */ + { 0X00, 0X0D, 0X03, 0X00 }, /* -62.0db */ + { 0X00, 0X0D, 0XC9, 0X00 }, /* -61.5db */ + { 0X00, 0X0E, 0X9A, 0X00 }, /* -61.0db */ + { 0X00, 0X0F, 0X77, 0X00 }, /* -60.5db */ + { 0X00, 0X10, 0X62, 0X00 }, /* -60.0db */ + { 0X00, 0X11, 0X5A, 0X00 }, /* -59.5db */ + { 0X00, 0X12, 0X62, 0X00 }, /* -59.0db */ + { 0X00, 0X13, 0X78, 0X00 }, /* -58.5db */ + { 0X00, 0X14, 0XA0, 0X00 }, /* -58.0db */ + { 0X00, 0X15, 0XD9, 0X00 }, /* -57.5db */ + { 0X00, 0X17, 0X24, 0X00 }, /* -57.0db */ + { 0X00, 0X18, 0X83, 0X00 }, /* -56.5db */ + { 0X00, 0X19, 0XF7, 0X00 }, /* -56.0db */ + { 0X00, 0X1B, 0X81, 0X00 }, /* -55.5db */ + { 0X00, 0X1D, 0X22, 0X00 }, /* -55.0db */ + { 0X00, 0X1E, 0XDC, 0X00 }, /* -54.5db */ + { 0X00, 0X20, 0XB0, 0X00 }, /* -54.0db */ + { 0X00, 0X22, 0XA0, 0X00 }, /* -53.5db */ + { 0X00, 0X24, 0XAD, 0X00 }, /* -53.0db */ + { 0X00, 0X26, 0XDA, 0X00 }, /* -52.5db */ + { 0X00, 0X29, 0X27, 0X00 }, /* -52.0db */ + { 0X00, 0X2B, 0X97, 0X00 }, /* -51.5db */ + { 0X00, 0X2E, 0X2D, 0X00 }, /* -51.0db */ + { 0X00, 0X30, 0XE9, 0X00 }, /* -50.5db */ + { 0X00, 0X33, 0XCF, 0X00 }, /* -50.0db */ + { 0X00, 0X36, 0XE1, 0X00 }, /* -49.5db */ + { 0X00, 0X3A, 0X21, 0X00 }, /* -49.0db */ + { 0X00, 0X3D, 0X93, 0X00 }, /* -48.5db */ + { 0X00, 0X41, 0X39, 0X00 }, /* -48.0db */ + { 0X00, 0X45, 0X17, 0X00 }, /* -47.5db */ + { 0X00, 0X49, 0X2F, 0X00 }, /* -47.0db */ + { 0X00, 0X4D, 0X85, 0X00 }, /* -46.5db */ + { 0X00, 0X52, 0X1D, 0X00 }, /* -46.0db */ + { 0X00, 0X56, 0XFA, 0X00 }, /* -45.5db */ + { 0X00, 0X5C, 0X22, 0X00 }, /* -45.0db */ + { 0X00, 0X61, 0X97, 0X00 }, /* -44.5db */ + { 0X00, 0X67, 0X60, 0X00 }, /* -44.0db */ + { 0X00, 0X6D, 0X80, 0X00 }, /* -43.5db */ + { 0X00, 0X73, 0XFD, 0X00 }, /* -43.0db */ + { 0X00, 0X7A, 0XDC, 0X00 }, /* -42.5db */ + { 0X00, 0X82, 0X24, 0X00 }, /* -42.0db */ + { 0X00, 0X89, 0XDA, 0X00 }, /* -41.5db */ + { 0X00, 0X92, 0X05, 0X00 }, /* -41.0db */ + { 0X00, 0X9A, 0XAC, 0X00 }, /* -40.5db */ + { 0X00, 0XA3, 0XD7, 0X00 }, /* -40.0db */ + { 0X00, 0XAD, 0X8C, 0X00 }, /* -39.5db */ + { 0X00, 0XB7, 0XD4, 0X00 }, /* -39.0db */ + { 0X00, 0XC2, 0XB9, 0X00 }, /* -38.5db */ + { 0X00, 0XCE, 0X43, 0X00 }, /* -38.0db */ + { 0X00, 0XDA, 0X7B, 0X00 }, /* -37.5db */ + { 0X00, 0XE7, 0X6E, 0X00 }, /* -37.0db */ + { 0X00, 0XF5, 0X24, 0X00 }, /* -36.5db */ + { 0X01, 0X03, 0XAB, 0X00 }, /* -36.0db */ + { 0X01, 0X13, 0X0E, 0X00 }, /* -35.5db */ + { 0X01, 0X23, 0X5A, 0X00 }, /* -35.0db */ + { 0X01, 0X34, 0X9D, 0X00 }, /* -34.5db */ + { 0X01, 0X46, 0XE7, 0X00 }, /* -34.0db */ + { 0X01, 0X5A, 0X46, 0X00 }, /* -33.5db */ + { 0X01, 0X6E, 0XCA, 0X00 }, /* -33.0db */ + { 0X01, 0X84, 0X86, 0X00 }, /* -32.5db */ + { 0X01, 0X9B, 0X8C, 0X00 }, /* -32.0db */ + { 0X01, 0XB3, 0XEE, 0X00 }, /* -31.5db */ + { 0X01, 0XCD, 0XC3, 0X00 }, /* -31.0db */ + { 0X01, 0XE9, 0X20, 0X00 }, /* -30.5db */ + { 0X02, 0X06, 0X1B, 0X00 }, /* -30.0db */ + { 0X02, 0X24, 0XCE, 0X00 }, /* -29.5db */ + { 0X02, 0X45, 0X53, 0X00 }, /* -29.0db */ + { 0X02, 0X67, 0XC5, 0X00 }, /* -28.5db */ + { 0X02, 0X8C, 0X42, 0X00 }, /* -28.0db */ + { 0X02, 0XB2, 0XE8, 0X00 }, /* -27.5db */ + { 0X02, 0XDB, 0XD8, 0X00 }, /* -27.0db */ + { 0X03, 0X07, 0X36, 0X00 }, /* -26.5db */ + { 0X03, 0X35, 0X25, 0X00 }, /* -26.0db */ + { 0X03, 0X65, 0XCD, 0X00 }, /* -25.5db */ + { 0X03, 0X99, 0X57, 0X00 }, /* -25.0db */ + { 0X03, 0XCF, 0XEE, 0X00 }, /* -24.5db */ + { 0X04, 0X09, 0XC2, 0X00 }, /* -24.0db */ + { 0X04, 0X47, 0X03, 0X00 }, /* -23.5db */ + { 0X04, 0X87, 0XE5, 0X00 }, /* -23.0db */ + { 0X04, 0XCC, 0XA0, 0X00 }, /* -22.5db */ + { 0X05, 0X15, 0X6D, 0X00 }, /* -22.0db */ + { 0X05, 0X62, 0X8A, 0X00 }, /* -21.5db */ + { 0X05, 0XB4, 0X39, 0X00 }, /* -21.0db */ + { 0X06, 0X0A, 0XBF, 0X00 }, /* -20.5db */ + { 0X06, 0X66, 0X66, 0X00 }, /* -20.0db */ + { 0X06, 0XC7, 0X7B, 0X00 }, /* -19.5db */ + { 0X07, 0X2E, 0X50, 0X00 }, /* -19.0db */ + { 0X07, 0X9B, 0X3D, 0X00 }, /* -18.5db */ + { 0X08, 0X0E, 0X9F, 0X00 }, /* -18.0db */ + { 0X08, 0X88, 0XD7, 0X00 }, /* -17.5db */ + { 0X09, 0X0A, 0X4D, 0X00 }, /* -17.0db */ + { 0X09, 0X93, 0X6E, 0X00 }, /* -16.5db */ + { 0X0A, 0X24, 0XB0, 0X00 }, /* -16.0db */ + { 0X0A, 0XBE, 0X8D, 0X00 }, /* -15.5db */ + { 0X0B, 0X61, 0X88, 0X00 }, /* -15.0db */ + { 0X0C, 0X0E, 0X2B, 0X00 }, /* -14.5db */ + { 0X0C, 0XC5, 0X09, 0X00 }, /* -14.0db */ + { 0X0D, 0X86, 0XBD, 0X00 }, /* -13.5db */ + { 0X0E, 0X53, 0XEB, 0X00 }, /* -13.0db */ + { 0X0F, 0X2D, 0X42, 0X00 }, /* -12.5db */ + { 0X10, 0X13, 0X79, 0X00 }, /* -12.0db */ + { 0X11, 0X07, 0X54, 0X00 }, /* -11.5db */ + { 0X12, 0X09, 0XA3, 0X00 }, /* -11.0db */ + { 0X13, 0X1B, 0X40, 0X00 }, /* -10.5db */ + { 0X14, 0X3D, 0X13, 0X00 }, /* -10.0db */ + { 0X15, 0X70, 0X12, 0X00 }, /* -9.5db */ + { 0X16, 0XB5, 0X43, 0X00 }, /* -9.0db */ + { 0X18, 0X0D, 0XB8, 0X00 }, /* -8.5db */ + { 0X19, 0X7A, 0X96, 0X00 }, /* -8.0db */ + { 0X1A, 0XFD, 0X13, 0X00 }, /* -7.5db */ + { 0X1C, 0X96, 0X76, 0X00 }, /* -7.0db */ + { 0X1E, 0X48, 0X1C, 0X00 }, /* -6.5db */ + { 0X20, 0X13, 0X73, 0X00 }, /* -6.0db */ + { 0X21, 0XFA, 0X02, 0X00 }, /* -5.5db */ + { 0X23, 0XFD, 0X66, 0X00 }, /* -5.0db */ + { 0X26, 0X1F, 0X54, 0X00 }, /* -4.5db */ + { 0X28, 0X61, 0X9A, 0X00 }, /* -4.0db */ + { 0X2A, 0XC6, 0X25, 0X00 }, /* -3.5db */ + { 0X2D, 0X4E, 0XFB, 0X00 }, /* -3.0db */ + { 0X2F, 0XFE, 0X44, 0X00 }, /* -2.5db */ + { 0X32, 0XD6, 0X46, 0X00 }, /* -2.0db */ + { 0X35, 0XD9, 0X6B, 0X00 }, /* -1.5db */ + { 0X39, 0X0A, 0X41, 0X00 }, /* -1.0db */ + { 0X3C, 0X6B, 0X7E, 0X00 }, /* -0.5db */ + { 0X40, 0X00, 0X00, 0X00 }, /* 0.0db */ + { 0X43, 0XCA, 0XD0, 0X00 }, /* 0.5db */ + { 0X47, 0XCF, 0X26, 0X00 }, /* 1.0db */ + { 0X4C, 0X10, 0X6B, 0X00 }, /* 1.5db */ + { 0X50, 0X92, 0X3B, 0X00 }, /* 2.0db */ + { 0X55, 0X58, 0X6A, 0X00 }, /* 2.5db */ + { 0X5A, 0X67, 0X03, 0X00 }, /* 3.0db */ + { 0X5F, 0XC2, 0X53, 0X00 }, /* 3.5db */ + { 0X65, 0X6E, 0XE3, 0X00 }, /* 4.0db */ + { 0X6B, 0X71, 0X86, 0X00 }, /* 4.5db */ + { 0X71, 0XCF, 0X54, 0X00 }, /* 5.0db */ + { 0X78, 0X8D, 0XB4, 0X00 }, /* 5.5db */ + { 0X7F, 0XB2, 0X61, 0X00 }, /* 6.0db */ +}; +#endif -- cgit v1.2.3 From 79357cd06d41d0f5a11b17d7c86176e395d10ef2 Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Sun, 31 Aug 2025 14:10:58 +0200 Subject: mm/vmalloc, mm/kasan: respect gfp mask in kasan_populate_vmalloc() kasan_populate_vmalloc() and its helpers ignore the caller's gfp_mask and always allocate memory using the hardcoded GFP_KERNEL flag. This makes them inconsistent with vmalloc(), which was recently extended to support GFP_NOFS and GFP_NOIO allocations. Page table allocations performed during shadow population also ignore the external gfp_mask. To preserve the intended semantics of GFP_NOFS and GFP_NOIO, wrap the apply_to_page_range() calls into the appropriate memalloc scope. xfs calls vmalloc with GFP_NOFS, so this bug could lead to deadlock. There was a report here https://lkml.kernel.org/r/686ea951.050a0220.385921.0016.GAE@google.com This patch: - Extends kasan_populate_vmalloc() and helpers to take gfp_mask; - Passes gfp_mask down to alloc_pages_bulk() and __get_free_page(); - Enforces GFP_NOFS/NOIO semantics with memalloc_*_save()/restore() around apply_to_page_range(); - Updates vmalloc.c and percpu allocator call sites accordingly. Link: https://lkml.kernel.org/r/20250831121058.92971-1-urezki@gmail.com Fixes: 451769ebb7e7 ("mm/vmalloc: alloc GFP_NO{FS,IO} for vmalloc") Signed-off-by: Uladzislau Rezki (Sony) Reported-by: syzbot+3470c9ffee63e4abafeb@syzkaller.appspotmail.com Reviewed-by: Andrey Ryabinin Cc: Baoquan He Cc: Michal Hocko Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Vincenzo Frascino Cc: Signed-off-by: Andrew Morton --- include/linux/kasan.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 890011071f2b..fe5ce9215821 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -562,7 +562,7 @@ static inline void kasan_init_hw_tags(void) { } #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) void kasan_populate_early_vm_area_shadow(void *start, unsigned long size); -int kasan_populate_vmalloc(unsigned long addr, unsigned long size); +int kasan_populate_vmalloc(unsigned long addr, unsigned long size, gfp_t gfp_mask); void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, unsigned long free_region_end, @@ -574,7 +574,7 @@ static inline void kasan_populate_early_vm_area_shadow(void *start, unsigned long size) { } static inline int kasan_populate_vmalloc(unsigned long start, - unsigned long size) + unsigned long size, gfp_t gfp_mask) { return 0; } @@ -610,7 +610,7 @@ static __always_inline void kasan_poison_vmalloc(const void *start, static inline void kasan_populate_early_vm_area_shadow(void *start, unsigned long size) { } static inline int kasan_populate_vmalloc(unsigned long start, - unsigned long size) + unsigned long size, gfp_t gfp_mask) { return 0; } -- cgit v1.2.3 From 3fac212fe489aa0dbe8d80a42a7809840ca7b0f9 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 2 Sep 2025 15:49:26 -0700 Subject: compiler-clang.h: define __SANITIZE_*__ macros only when undefined Clang 22 recently added support for defining __SANITIZE__ macros similar to GCC [1], which causes warnings (or errors with CONFIG_WERROR=y or W=e) with the existing defines that the kernel creates to emulate this behavior with existing clang versions. In file included from :3: In file included from include/linux/compiler_types.h:171: include/linux/compiler-clang.h:37:9: error: '__SANITIZE_THREAD__' macro redefined [-Werror,-Wmacro-redefined] 37 | #define __SANITIZE_THREAD__ | ^ :352:9: note: previous definition is here 352 | #define __SANITIZE_THREAD__ 1 | ^ Refactor compiler-clang.h to only define the sanitizer macros when they are undefined and adjust the rest of the code to use these macros for checking if the sanitizers are enabled, clearing up the warnings and allowing the kernel to easily drop these defines when the minimum supported version of LLVM for building the kernel becomes 22.0.0 or newer. Link: https://lkml.kernel.org/r/20250902-clang-update-sanitize-defines-v1-1-cf3702ca3d92@kernel.org Link: https://github.com/llvm/llvm-project/commit/568c23bbd3303518c5056d7f03444dae4fdc8a9c [1] Signed-off-by: Nathan Chancellor Reviewed-by: Justin Stitt Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Bill Wendling Cc: Dmitriy Vyukov Cc: Marco Elver Cc: Signed-off-by: Andrew Morton --- include/linux/compiler-clang.h | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index fa4ffe037bc7..8720a0705900 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -18,23 +18,42 @@ #define KASAN_ABI_VERSION 5 /* + * Clang 22 added preprocessor macros to match GCC, in hopes of eventually + * dropping __has_feature support for sanitizers: + * https://github.com/llvm/llvm-project/commit/568c23bbd3303518c5056d7f03444dae4fdc8a9c + * Create these macros for older versions of clang so that it is easy to clean + * up once the minimum supported version of LLVM for building the kernel always + * creates these macros. + * * Note: Checking __has_feature(*_sanitizer) is only true if the feature is * enabled. Therefore it is not required to additionally check defined(CONFIG_*) * to avoid adding redundant attributes in other configurations. */ +#if __has_feature(address_sanitizer) && !defined(__SANITIZE_ADDRESS__) +#define __SANITIZE_ADDRESS__ +#endif +#if __has_feature(hwaddress_sanitizer) && !defined(__SANITIZE_HWADDRESS__) +#define __SANITIZE_HWADDRESS__ +#endif +#if __has_feature(thread_sanitizer) && !defined(__SANITIZE_THREAD__) +#define __SANITIZE_THREAD__ +#endif -#if __has_feature(address_sanitizer) || __has_feature(hwaddress_sanitizer) -/* Emulate GCC's __SANITIZE_ADDRESS__ flag */ +/* + * Treat __SANITIZE_HWADDRESS__ the same as __SANITIZE_ADDRESS__ in the kernel. + */ +#ifdef __SANITIZE_HWADDRESS__ #define __SANITIZE_ADDRESS__ +#endif + +#ifdef __SANITIZE_ADDRESS__ #define __no_sanitize_address \ __attribute__((no_sanitize("address", "hwaddress"))) #else #define __no_sanitize_address #endif -#if __has_feature(thread_sanitizer) -/* emulate gcc's __SANITIZE_THREAD__ flag */ -#define __SANITIZE_THREAD__ +#ifdef __SANITIZE_THREAD__ #define __no_sanitize_thread \ __attribute__((no_sanitize("thread"))) #else -- cgit v1.2.3 From faf23f54d366467bb449a7b2c39b382db9f92e80 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Tue, 12 Aug 2025 17:17:06 +0300 Subject: ptp: Add ioctl commands to expose raw cycle counter values Introduce two new ioctl commands, PTP_SYS_OFFSET_PRECISE_CYCLES and PTP_SYS_OFFSET_EXTENDED_CYCLES, to allow user space to access the raw free-running cycle counter from PTP devices. These ioctls are variants of the existing PRECISE and EXTENDED offset queries, but instead of returning device time in realtime, they return the raw cycle counter value. Signed-off-by: Carolina Jubran Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan Acked-by: Richard Cochran Link: https://patch.msgid.link/1755008228-88881-2-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- include/uapi/linux/ptp_clock.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h index 18eefa6d93d6..65f187b5f0d0 100644 --- a/include/uapi/linux/ptp_clock.h +++ b/include/uapi/linux/ptp_clock.h @@ -245,6 +245,10 @@ struct ptp_pin_desc { _IOWR(PTP_CLK_MAGIC, 18, struct ptp_sys_offset_extended) #define PTP_MASK_CLEAR_ALL _IO(PTP_CLK_MAGIC, 19) #define PTP_MASK_EN_SINGLE _IOW(PTP_CLK_MAGIC, 20, unsigned int) +#define PTP_SYS_OFFSET_PRECISE_CYCLES \ + _IOWR(PTP_CLK_MAGIC, 21, struct ptp_sys_offset_precise) +#define PTP_SYS_OFFSET_EXTENDED_CYCLES \ + _IOWR(PTP_CLK_MAGIC, 22, struct ptp_sys_offset_extended) struct ptp_extts_event { struct ptp_clock_time t; /* Time event occurred. */ -- cgit v1.2.3 From ff97bc38be343e4530e2f140b40cbdce2e09152f Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Wed, 3 Sep 2025 10:30:00 +0300 Subject: net/mlx5: Add RS FEC histogram infrastructure Define the Ports Phy Histogram Configuration Register (PPHCR) to expose RS-FEC histogram bin ranges, and expose a new counter group in the Ports Performance Counters Register (PPCNT) to report the corresponding histogram values. Co-developed-by: Yael Chemla Signed-off-by: Yael Chemla Signed-off-by: Carolina Jubran Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1756884600-520195-1-git-send-email-tariqt@nvidia.com Signed-off-by: Leon Romanovsky --- include/linux/mlx5/device.h | 1 + include/linux/mlx5/driver.h | 1 + include/linux/mlx5/mlx5_ifc.h | 29 +++++++++++++++++++++++++++++ 3 files changed, 31 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 72a83666e67f..d7f46a8fbfa1 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1525,6 +1525,7 @@ enum { MLX5_PHYSICAL_LAYER_RECOVERY_GROUP = 0x1a, MLX5_INFINIBAND_PORT_COUNTERS_GROUP = 0x20, MLX5_INFINIBAND_EXTENDED_PORT_COUNTERS_GROUP = 0x21, + MLX5_RS_FEC_HISTOGRAM_GROUP = 0x23, }; enum { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 8c5fbfb85749..c0858af0e854 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -130,6 +130,7 @@ enum { MLX5_REG_PDDR = 0x5031, MLX5_REG_PMLP = 0x5002, MLX5_REG_PPLM = 0x5023, + MLX5_REG_PPHCR = 0x503E, MLX5_REG_PCAM = 0x507f, MLX5_REG_NODE_DESC = 0x6001, MLX5_REG_HOST_ENDIANNESS = 0x7004, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index e9f14a0c7f4f..097b1b7ada63 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -4901,6 +4901,11 @@ union mlx5_ifc_field_select_802_1_r_roce_auto_bits { u8 reserved_at_0[0x20]; }; +struct mlx5_ifc_rs_histogram_cntrs_bits { + u8 hist[16][0x40]; + u8 reserved_at_400[0x2c0]; +}; + union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits { struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits eth_802_3_cntrs_grp_data_layout; struct mlx5_ifc_eth_2863_cntrs_grp_data_layout_bits eth_2863_cntrs_grp_data_layout; @@ -4915,6 +4920,7 @@ union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits { struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs; struct mlx5_ifc_phys_layer_statistical_cntrs_bits phys_layer_statistical_cntrs; struct mlx5_ifc_phys_layer_recovery_cntrs_bits phys_layer_recovery_cntrs; + struct mlx5_ifc_rs_histogram_cntrs_bits rs_histogram_cntrs; u8 reserved_at_0[0x7c0]; }; @@ -11738,6 +11744,28 @@ struct mlx5_ifc_mtctr_reg_bits { u8 second_clock_timestamp[0x40]; }; +struct mlx5_ifc_bin_range_layout_bits { + u8 reserved_at_0[0xa]; + u8 high_val[0x6]; + u8 reserved_at_10[0xa]; + u8 low_val[0x6]; +}; + +struct mlx5_ifc_pphcr_reg_bits { + u8 active_hist_type[0x4]; + u8 reserved_at_4[0x4]; + u8 local_port[0x8]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x8]; + u8 num_of_bins[0x8]; + u8 reserved_at_30[0x10]; + + u8 reserved_at_40[0x40]; + + struct mlx5_ifc_bin_range_layout_bits bin_range[16]; +}; + union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_bufferx_reg_bits bufferx_reg; struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits eth_2819_cntrs_grp_data_layout; @@ -11804,6 +11832,7 @@ union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_mtmp_reg_bits mtmp_reg; struct mlx5_ifc_mtptm_reg_bits mtptm_reg; struct mlx5_ifc_mtctr_reg_bits mtctr_reg; + struct mlx5_ifc_pphcr_reg_bits pphcr_reg; u8 reserved_at_0[0x60e0]; }; -- cgit v1.2.3 From 6b6dc81ee7e8ca87c71a533e1d69cf96a4f1e986 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 2 Sep 2025 06:44:59 +0000 Subject: bonding: add support for per-port LACP actor priority Introduce a new netlink attribute 'actor_port_prio' to allow setting the LACP actor port priority on a per-slave basis. This extends the existing bonding infrastructure to support more granular control over LACP negotiations. The priority value is embedded in LACPDU packets and will be used by subsequent patches to influence aggregator selection policies. Signed-off-by: Hangbin Liu Link: https://patch.msgid.link/20250902064501.360822-2-liuhangbin@gmail.com Signed-off-by: Paolo Abeni --- include/net/bond_3ad.h | 1 + include/net/bond_options.h | 1 + include/uapi/linux/if_link.h | 1 + 3 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h index dba369a2cf27..e9188646e22e 100644 --- a/include/net/bond_3ad.h +++ b/include/net/bond_3ad.h @@ -274,6 +274,7 @@ struct ad_slave_info { struct port port; /* 802.3ad port structure */ struct bond_3ad_stats stats; u16 id; + u16 port_priority; }; static inline const char *bond_3ad_churn_desc(churn_state_t state) diff --git a/include/net/bond_options.h b/include/net/bond_options.h index 022b122a9fb6..e6eedf23aea1 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -78,6 +78,7 @@ enum { BOND_OPT_PRIO, BOND_OPT_COUPLED_CONTROL, BOND_OPT_BROADCAST_NEIGH, + BOND_OPT_ACTOR_PORT_PRIO, BOND_OPT_LAST }; diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 784ace3a519c..45f56c9f95d9 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1564,6 +1564,7 @@ enum { IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE, IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE, IFLA_BOND_SLAVE_PRIO, + IFLA_BOND_SLAVE_ACTOR_PORT_PRIO, __IFLA_BOND_SLAVE_MAX, }; -- cgit v1.2.3 From e5a6643435fa4ad1e104323ec7d3e6215e2d832c Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 2 Sep 2025 06:45:00 +0000 Subject: bonding: support aggregator selection based on port priority Add a new ad_select policy 'port_priority' that uses the per-port actor priority values (set via ad_actor_port_prio) to determine aggregator selection. This allows administrators to influence which ports are preferred for aggregation by assigning different priority values, providing more flexible load balancing control in LACP configurations. Signed-off-by: Hangbin Liu Link: https://patch.msgid.link/20250902064501.360822-3-liuhangbin@gmail.com Signed-off-by: Paolo Abeni --- include/net/bond_3ad.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h index e9188646e22e..c92d4a976246 100644 --- a/include/net/bond_3ad.h +++ b/include/net/bond_3ad.h @@ -26,6 +26,7 @@ enum { BOND_AD_STABLE = 0, BOND_AD_BANDWIDTH = 1, BOND_AD_COUNT = 2, + BOND_AD_PRIO = 3, }; /* rx machine states(43.4.11 in the 802.3ad standard) */ -- cgit v1.2.3 From ed7240444e82aaaa2245a3cc9b040e4db894a665 Mon Sep 17 00:00:00 2001 From: Ryan Chen Date: Mon, 8 Sep 2025 09:18:11 +0800 Subject: dt-bindings: interrupt-controller: aspeed: Add AST2700 SCU IC compatibles Add compatible strings for the four SCU interrupt controller instances on the AST2700 SoC (scu-ic0 to 3), following the multi-instance model used on AST2600. Also define interrupt indices in the binding header. Signed-off-by: Ryan Chen Signed-off-by: Thomas Gleixner Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/all/20250908011812.1033858-4-ryan_chen@aspeedtech.com --- include/dt-bindings/interrupt-controller/aspeed-scu-ic.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/interrupt-controller/aspeed-scu-ic.h b/include/dt-bindings/interrupt-controller/aspeed-scu-ic.h index f315d5a7f5ee..7dd04424afcc 100644 --- a/include/dt-bindings/interrupt-controller/aspeed-scu-ic.h +++ b/include/dt-bindings/interrupt-controller/aspeed-scu-ic.h @@ -20,4 +20,18 @@ #define ASPEED_AST2600_SCU_IC1_LPC_RESET_LO_TO_HI 0 #define ASPEED_AST2600_SCU_IC1_LPC_RESET_HI_TO_LO 1 +#define ASPEED_AST2700_SCU_IC0_PCIE_PERST_LO_TO_HI 3 +#define ASPEED_AST2700_SCU_IC0_PCIE_PERST_HI_TO_LO 2 + +#define ASPEED_AST2700_SCU_IC1_PCIE_RCRST_LO_TO_HI 3 +#define ASPEED_AST2700_SCU_IC1_PCIE_RCRST_HI_TO_LO 2 + +#define ASPEED_AST2700_SCU_IC2_PCIE_PERST_LO_TO_HI 3 +#define ASPEED_AST2700_SCU_IC2_PCIE_PERST_HI_TO_LO 2 +#define ASPEED_AST2700_SCU_IC2_LPC_RESET_LO_TO_HI 1 +#define ASPEED_AST2700_SCU_IC2_LPC_RESET_HI_TO_LO 0 + +#define ASPEED_AST2700_SCU_IC3_LPC_RESET_LO_TO_HI 1 +#define ASPEED_AST2700_SCU_IC3_LPC_RESET_HI_TO_LO 0 + #endif /* _DT_BINDINGS_INTERRUPT_CONTROLLER_ASPEED_SCU_IC_H_ */ -- cgit v1.2.3 From cdea7cdae26995992a766443e1ea862923f2443d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 21 Aug 2025 15:28:14 +0200 Subject: hrtimer: Use hrtimer_cb_get_time() helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Various other helpers contain open-coded implementations of hrtimer_cb_get_time(). This prevents refactoring the implementation. Reuse the existing helper. For this to work, move hrtimer_cb_get_time() a bit up in the file and also make its argument 'const'. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/20250821-hrtimer-cleanup-get_time-v2-7-3ae822e5bfbd@linutronix.de --- include/linux/hrtimer.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 1ef867bb8c44..e655502b14e6 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -154,14 +154,14 @@ static inline s64 hrtimer_get_expires_ns(const struct hrtimer *timer) return ktime_to_ns(timer->node.expires); } -static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer) +static inline ktime_t hrtimer_cb_get_time(const struct hrtimer *timer) { - return ktime_sub(timer->node.expires, timer->base->get_time()); + return timer->base->get_time(); } -static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer) +static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer) { - return timer->base->get_time(); + return ktime_sub(timer->node.expires, hrtimer_cb_get_time(timer)); } static inline int hrtimer_is_hres_active(struct hrtimer *timer) @@ -200,8 +200,7 @@ __hrtimer_expires_remaining_adjusted(const struct hrtimer *timer, ktime_t now) static inline ktime_t hrtimer_expires_remaining_adjusted(const struct hrtimer *timer) { - return __hrtimer_expires_remaining_adjusted(timer, - timer->base->get_time()); + return __hrtimer_expires_remaining_adjusted(timer, hrtimer_cb_get_time(timer)); } #ifdef CONFIG_TIMERFD @@ -363,7 +362,7 @@ hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval); static inline u64 hrtimer_forward_now(struct hrtimer *timer, ktime_t interval) { - return hrtimer_forward(timer, timer->base->get_time(), interval); + return hrtimer_forward(timer, hrtimer_cb_get_time(timer), interval); } /* Precise sleep: */ -- cgit v1.2.3 From 009eb5da29a91016e3ebb988e6401e79411be7a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 21 Aug 2025 15:28:15 +0200 Subject: hrtimer: Remove hrtimer_clock_base:: Get_time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The get_time() callbacks always need to match the bases clockid. Instead of maintaining that association twice in hrtimer_bases, use a helper. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/20250821-hrtimer-cleanup-get_time-v2-8-3ae822e5bfbd@linutronix.de --- include/linux/hrtimer.h | 5 +---- include/linux/hrtimer_defs.h | 2 -- 2 files changed, 1 insertion(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index e655502b14e6..2cf1bf65b225 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -154,10 +154,7 @@ static inline s64 hrtimer_get_expires_ns(const struct hrtimer *timer) return ktime_to_ns(timer->node.expires); } -static inline ktime_t hrtimer_cb_get_time(const struct hrtimer *timer) -{ - return timer->base->get_time(); -} +ktime_t hrtimer_cb_get_time(const struct hrtimer *timer); static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer) { diff --git a/include/linux/hrtimer_defs.h b/include/linux/hrtimer_defs.h index 84a5045f80f3..aa49ffa130e5 100644 --- a/include/linux/hrtimer_defs.h +++ b/include/linux/hrtimer_defs.h @@ -41,7 +41,6 @@ * @seq: seqcount around __run_hrtimer * @running: pointer to the currently running hrtimer * @active: red black tree root node for the active timers - * @get_time: function to retrieve the current time of the clock * @offset: offset of this clock to the monotonic base */ struct hrtimer_clock_base { @@ -51,7 +50,6 @@ struct hrtimer_clock_base { seqcount_raw_spinlock_t seq; struct hrtimer *running; struct timerqueue_head active; - ktime_t (*get_time)(void); ktime_t offset; } __hrtimer_clock_base_align; -- cgit v1.2.3 From 267b9cdee522d03f95acf7c77de91056a4e004b3 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 9 Sep 2025 12:30:35 +0100 Subject: ASoC: cs-amp-lib: Add handling for Lenovo and HP UEFI speaker ID Add handling of the Lenovo-specific and HP-specific EFI variables for speaker ID. Future Lenovo and HP models will not give the codec driver access to the speaker detect GPIO. Instead, the BIOS will read the GPIO and create an EFI variable with a value indicating the state of the GPIO. The Lenovo and HP EFI variables are both defined to have only two valid values. But the variable name, GUID and values are different. This adds a new exported function cs_amp_get_vendor_spkid(). Signed-off-by: Richard Fitzgerald Message-ID: <20250909113039.922065-3-rf@opensource.cirrus.com> Signed-off-by: Mark Brown --- include/sound/cs-amp-lib.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/sound/cs-amp-lib.h b/include/sound/cs-amp-lib.h index 5459c221badf..43a87a39110c 100644 --- a/include/sound/cs-amp-lib.h +++ b/include/sound/cs-amp-lib.h @@ -49,6 +49,7 @@ int cs_amp_write_cal_coeffs(struct cs_dsp *dsp, const struct cirrus_amp_cal_data *data); int cs_amp_get_efi_calibration_data(struct device *dev, u64 target_uid, int amp_index, struct cirrus_amp_cal_data *out_data); +int cs_amp_get_vendor_spkid(struct device *dev); struct cs_amp_test_hooks { efi_status_t (*get_efi_variable)(efi_char16_t *name, -- cgit v1.2.3 From 70a6f71b1a77decfc5b1db426ccbe914b58adb38 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 Sep 2025 12:56:38 +0200 Subject: block: add a bio_init_inline helper Just a simpler wrapper around bio_init for callers that want to initialize a bio with inline bvecs. Signed-off-by: Christoph Hellwig Reviewed-by: John Garry Reviewed-by: Yu Kuai Signed-off-by: Jens Axboe --- include/linux/bio.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 46ffac5caab7..eb7f4fbd8aa9 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -405,6 +405,11 @@ struct request_queue; void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table, unsigned short max_vecs, blk_opf_t opf); +static inline void bio_init_inline(struct bio *bio, struct block_device *bdev, + unsigned short max_vecs, blk_opf_t opf) +{ + bio_init(bio, bdev, bio->bi_inline_vecs, max_vecs, opf); +} extern void bio_uninit(struct bio *); void bio_reset(struct bio *bio, struct block_device *bdev, blk_opf_t opf); void bio_chain(struct bio *, struct bio *); -- cgit v1.2.3 From d86eaa0f3c56da286853b698b45c8ce404291082 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 Sep 2025 12:56:39 +0200 Subject: block: remove the bi_inline_vecs variable sized array from struct bio Bios are embedded into other structures, and at least spare is unhappy about embedding structures with variable sized arrays. There's no real need to the array anyway, we can replace it with a helper pointing to the memory just behind the bio, and with the previous cleanups there is very few site doing anything special with it. Signed-off-by: Christoph Hellwig Reviewed-by: John Garry Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 +- include/linux/blk_types.h | 12 +++++------- 2 files changed, 6 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index eb7f4fbd8aa9..27cbff5b0356 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -408,7 +408,7 @@ void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table, static inline void bio_init_inline(struct bio *bio, struct block_device *bdev, unsigned short max_vecs, blk_opf_t opf) { - bio_init(bio, bdev, bio->bi_inline_vecs, max_vecs, opf); + bio_init(bio, bdev, bio_inline_vecs(bio), max_vecs, opf); } extern void bio_uninit(struct bio *); void bio_reset(struct bio *bio, struct block_device *bdev, blk_opf_t opf); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 930daff207df..bbb7893e0542 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -269,18 +269,16 @@ struct bio { struct bio_vec *bi_io_vec; /* the actual vec list */ struct bio_set *bi_pool; - - /* - * We can inline a number of vecs at the end of the bio, to avoid - * double allocations for a small number of bio_vecs. This member - * MUST obviously be kept at the very end of the bio. - */ - struct bio_vec bi_inline_vecs[]; }; #define BIO_RESET_BYTES offsetof(struct bio, bi_max_vecs) #define BIO_MAX_SECTORS (UINT_MAX >> SECTOR_SHIFT) +static inline struct bio_vec *bio_inline_vecs(struct bio *bio) +{ + return (struct bio_vec *)(bio + 1); +} + /* * bio flags */ -- cgit v1.2.3 From 199c9a8d26638845f509b76e3c176c27e7baafd7 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 9 Sep 2025 20:33:10 +0800 Subject: blk-mq: Document tags_srcu member in blk_mq_tag_set structure Add missing documentation for the tags_srcu member that was introduced to defer freeing of tags page_list to prevent use-after-free when iterating tags. Fixes htmldocs warning: WARNING: include/linux/blk-mq.h:536 struct member 'tags_srcu' not described in 'blk_mq_tag_set' Reported-by: Stephen Rothwell Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 1325ceeb743a..b25d12545f46 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -507,6 +507,8 @@ enum hctx_type { * request_queue.tag_set_list. * @srcu: Use as lock when type of the request queue is blocking * (BLK_MQ_F_BLOCKING). + * @tags_srcu: SRCU used to defer freeing of tags page_list to prevent + * use-after-free when iterating tags. * @update_nr_hwq_lock: * Synchronize updating nr_hw_queues with add/del disk & * switching elevator. -- cgit v1.2.3 From ce4c356d760f6fb22d69f0c5091542891ba6f394 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 24 Jun 2025 08:36:16 +0200 Subject: media: update Hans Verkuil's email address Replace hansverk@cisco.com by hverkuil@kernel.org. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/media/i2c/ths7303.h | 2 +- include/media/media-request.h | 2 +- include/uapi/linux/v4l2-dv-timings.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/media/i2c/ths7303.h b/include/media/i2c/ths7303.h index fc937025cdb4..7eda467b6725 100644 --- a/include/media/i2c/ths7303.h +++ b/include/media/i2c/ths7303.h @@ -5,7 +5,7 @@ * Copyright 2013 Cisco Systems, Inc. and/or its affiliates. * * Contributors: - * Hans Verkuil + * Hans Verkuil * Lad, Prabhakar * Martin Bugge */ diff --git a/include/media/media-request.h b/include/media/media-request.h index d4ac557678a7..bb500b2f9da4 100644 --- a/include/media/media-request.h +++ b/include/media/media-request.h @@ -5,7 +5,7 @@ * Copyright 2018 Cisco Systems, Inc. and/or its affiliates. All rights reserved. * Copyright (C) 2018 Intel Corporation * - * Author: Hans Verkuil + * Author: Hans Verkuil * Author: Sakari Ailus */ diff --git a/include/uapi/linux/v4l2-dv-timings.h b/include/uapi/linux/v4l2-dv-timings.h index 44a16e0e5a12..58f478f98a35 100644 --- a/include/uapi/linux/v4l2-dv-timings.h +++ b/include/uapi/linux/v4l2-dv-timings.h @@ -2,7 +2,7 @@ /* * V4L2 DV timings header. * - * Copyright (C) 2012-2016 Hans Verkuil + * Copyright (C) 2012-2016 Hans Verkuil */ #ifndef _V4L2_DV_TIMINGS_H -- cgit v1.2.3 From e8c8d961d8ad53d8d104d7474c08d8e4626c7767 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 24 Jun 2025 08:41:15 +0200 Subject: media: include: update Hans Verkuil's email address Replace hverkuil@xs4all.nl by hverkuil@kernel.org. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/dt-bindings/media/tvp5150.h | 2 +- include/linux/videodev2.h | 2 +- include/media/drv-intf/cx25840.h | 2 +- include/media/drv-intf/msp3400.h | 2 +- include/media/i2c/bt819.h | 2 +- include/media/i2c/cs5345.h | 2 +- include/media/i2c/cs53l32a.h | 2 +- include/media/i2c/m52790.h | 2 +- include/media/i2c/mt9v011.h | 2 +- include/media/i2c/saa7115.h | 2 +- include/media/i2c/saa7127.h | 2 +- include/media/i2c/tvaudio.h | 2 +- include/media/i2c/upd64031a.h | 2 +- include/media/i2c/upd64083.h | 2 +- include/media/i2c/wm8775.h | 2 +- include/media/v4l2-common.h | 2 +- include/media/v4l2-ctrls.h | 2 +- include/media/v4l2-device.h | 2 +- include/media/v4l2-subdev.h | 2 +- include/uapi/linux/ivtv.h | 2 +- include/uapi/linux/videodev2.h | 2 +- 21 files changed, 21 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/media/tvp5150.h b/include/dt-bindings/media/tvp5150.h index dda00c038530..ba34c420c303 100644 --- a/include/dt-bindings/media/tvp5150.h +++ b/include/dt-bindings/media/tvp5150.h @@ -2,7 +2,7 @@ /* tvp5150.h - definition for tvp5150 inputs - Copyright (C) 2006 Hans Verkuil (hverkuil@xs4all.nl) + Copyright (C) 2006 Hans Verkuil (hverkuil@kernel.org) */ diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h index 219037f4c08d..9609cf365e8e 100644 --- a/include/linux/videodev2.h +++ b/include/linux/videodev2.h @@ -50,7 +50,7 @@ * * Author: Bill Dirks * Justin Schoeman - * Hans Verkuil + * Hans Verkuil * et al. */ #ifndef __LINUX_VIDEODEV2_H diff --git a/include/media/drv-intf/cx25840.h b/include/media/drv-intf/cx25840.h index ba69bc525382..8b455d9dd5ca 100644 --- a/include/media/drv-intf/cx25840.h +++ b/include/media/drv-intf/cx25840.h @@ -3,7 +3,7 @@ /* * cx25840.h - definition for cx25840/1/2/3 inputs * - * Copyright (C) 2006 Hans Verkuil (hverkuil@xs4all.nl) + * Copyright (C) 2006 Hans Verkuil (hverkuil@kernel.org) */ #ifndef _CX25840_H_ diff --git a/include/media/drv-intf/msp3400.h b/include/media/drv-intf/msp3400.h index d6dfae104a6f..853258ee6bbd 100644 --- a/include/media/drv-intf/msp3400.h +++ b/include/media/drv-intf/msp3400.h @@ -2,7 +2,7 @@ /* msp3400.h - definition for msp3400 inputs and outputs - Copyright (C) 2006 Hans Verkuil (hverkuil@xs4all.nl) + Copyright (C) 2006 Hans Verkuil (hverkuil@kernel.org) */ diff --git a/include/media/i2c/bt819.h b/include/media/i2c/bt819.h index 70aa46bd5182..2277a7eb9548 100644 --- a/include/media/i2c/bt819.h +++ b/include/media/i2c/bt819.h @@ -2,7 +2,7 @@ /* bt819.h - bt819 notifications - Copyright (C) 2009 Hans Verkuil (hverkuil@xs4all.nl) + Copyright (C) 2009 Hans Verkuil (hverkuil@kernel.org) */ diff --git a/include/media/i2c/cs5345.h b/include/media/i2c/cs5345.h index d41e4dca3fcc..39e1cf6c1a2f 100644 --- a/include/media/i2c/cs5345.h +++ b/include/media/i2c/cs5345.h @@ -2,7 +2,7 @@ /* cs5345.h - definition for cs5345 inputs and outputs - Copyright (C) 2007 Hans Verkuil (hverkuil@xs4all.nl) + Copyright (C) 2007 Hans Verkuil (hverkuil@kernel.org) */ diff --git a/include/media/i2c/cs53l32a.h b/include/media/i2c/cs53l32a.h index 52ceb2f916d3..777f667855cb 100644 --- a/include/media/i2c/cs53l32a.h +++ b/include/media/i2c/cs53l32a.h @@ -2,7 +2,7 @@ /* cs53l32a.h - definition for cs53l32a inputs and outputs - Copyright (C) 2006 Hans Verkuil (hverkuil@xs4all.nl) + Copyright (C) 2006 Hans Verkuil (hverkuil@kernel.org) */ diff --git a/include/media/i2c/m52790.h b/include/media/i2c/m52790.h index 3f214fa9bc64..cedaaf215273 100644 --- a/include/media/i2c/m52790.h +++ b/include/media/i2c/m52790.h @@ -2,7 +2,7 @@ /* m52790.h - definition for m52790 inputs and outputs - Copyright (C) 2007 Hans Verkuil (hverkuil@xs4all.nl) + Copyright (C) 2007 Hans Verkuil (hverkuil@kernel.org) */ diff --git a/include/media/i2c/mt9v011.h b/include/media/i2c/mt9v011.h index 41c00b3e7184..552839756e64 100644 --- a/include/media/i2c/mt9v011.h +++ b/include/media/i2c/mt9v011.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* mt9v011 sensor * - * Copyright (C) 2011 Hans Verkuil + * Copyright (C) 2011 Hans Verkuil */ #ifndef __MT9V011_H__ diff --git a/include/media/i2c/saa7115.h b/include/media/i2c/saa7115.h index 0cd6080d7cb1..a607c91ef5f3 100644 --- a/include/media/i2c/saa7115.h +++ b/include/media/i2c/saa7115.h @@ -2,7 +2,7 @@ /* saa7115.h - definition for saa7111/3/4/5 inputs and frequency flags - Copyright (C) 2006 Hans Verkuil (hverkuil@xs4all.nl) + Copyright (C) 2006 Hans Verkuil (hverkuil@kernel.org) */ diff --git a/include/media/i2c/saa7127.h b/include/media/i2c/saa7127.h index 53ee999e6090..c81ee1743df1 100644 --- a/include/media/i2c/saa7127.h +++ b/include/media/i2c/saa7127.h @@ -2,7 +2,7 @@ /* saa7127.h - definition for saa7126/7/8/9 inputs/outputs - Copyright (C) 2006 Hans Verkuil (hverkuil@xs4all.nl) + Copyright (C) 2006 Hans Verkuil (hverkuil@kernel.org) */ diff --git a/include/media/i2c/tvaudio.h b/include/media/i2c/tvaudio.h index 42cd3206fb6c..206f42ed4e69 100644 --- a/include/media/i2c/tvaudio.h +++ b/include/media/i2c/tvaudio.h @@ -2,7 +2,7 @@ /* tvaudio.h - definition for tvaudio inputs - Copyright (C) 2006 Hans Verkuil (hverkuil@xs4all.nl) + Copyright (C) 2006 Hans Verkuil (hverkuil@kernel.org) */ diff --git a/include/media/i2c/upd64031a.h b/include/media/i2c/upd64031a.h index b6570abc84ef..d39b2b7f0cf3 100644 --- a/include/media/i2c/upd64031a.h +++ b/include/media/i2c/upd64031a.h @@ -2,7 +2,7 @@ /* * upd64031a - NEC Electronics Ghost Reduction input defines * - * 2006 by Hans Verkuil (hverkuil@xs4all.nl) + * 2006 by Hans Verkuil (hverkuil@kernel.org) */ #ifndef _UPD64031A_H_ diff --git a/include/media/i2c/upd64083.h b/include/media/i2c/upd64083.h index 17fb7b5201cc..72cf547c25fc 100644 --- a/include/media/i2c/upd64083.h +++ b/include/media/i2c/upd64083.h @@ -2,7 +2,7 @@ /* * upd6408x - NEC Electronics 3-Dimensional Y/C separation input defines * - * 2006 by Hans Verkuil (hverkuil@xs4all.nl) + * 2006 by Hans Verkuil (hverkuil@kernel.org) */ #ifndef _UPD64083_H_ diff --git a/include/media/i2c/wm8775.h b/include/media/i2c/wm8775.h index 6ccdeb3817ab..a02695ee3a58 100644 --- a/include/media/i2c/wm8775.h +++ b/include/media/i2c/wm8775.h @@ -2,7 +2,7 @@ /* wm8775.h - definition for wm8775 inputs and outputs - Copyright (C) 2006 Hans Verkuil (hverkuil@xs4all.nl) + Copyright (C) 2006 Hans Verkuil (hverkuil@kernel.org) */ diff --git a/include/media/v4l2-common.h b/include/media/v4l2-common.h index d8e23991a656..594314459333 100644 --- a/include/media/v4l2-common.h +++ b/include/media/v4l2-common.h @@ -7,7 +7,7 @@ Each ioctl begins with VIDIOC_INT_ to clearly mark that it is an internal define, - Copyright (C) 2005 Hans Verkuil + Copyright (C) 2005 Hans Verkuil */ diff --git a/include/media/v4l2-ctrls.h b/include/media/v4l2-ctrls.h index 4a294a5c7bdd..31fc1bee3797 100644 --- a/include/media/v4l2-ctrls.h +++ b/include/media/v4l2-ctrls.h @@ -2,7 +2,7 @@ /* * V4L2 controls support header. * - * Copyright (C) 2010 Hans Verkuil + * Copyright (C) 2010 Hans Verkuil */ #ifndef _V4L2_CTRLS_H diff --git a/include/media/v4l2-device.h b/include/media/v4l2-device.h index dd897a362f36..25f69b1b8db0 100644 --- a/include/media/v4l2-device.h +++ b/include/media/v4l2-device.h @@ -2,7 +2,7 @@ /* V4L2 device support header. - Copyright (C) 2008 Hans Verkuil + Copyright (C) 2008 Hans Verkuil */ diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h index 4b28086808c9..e0bb58cb6d04 100644 --- a/include/media/v4l2-subdev.h +++ b/include/media/v4l2-subdev.h @@ -2,7 +2,7 @@ /* * V4L2 sub-device support header. * - * Copyright (C) 2008 Hans Verkuil + * Copyright (C) 2008 Hans Verkuil */ #ifndef _V4L2_SUBDEV_H diff --git a/include/uapi/linux/ivtv.h b/include/uapi/linux/ivtv.h index e74f18642b11..c9241f7271c4 100644 --- a/include/uapi/linux/ivtv.h +++ b/include/uapi/linux/ivtv.h @@ -2,7 +2,7 @@ /* Public ivtv API header Copyright (C) 2003-2004 Kevin Thayer - Copyright (C) 2004-2007 Hans Verkuil + Copyright (C) 2004-2007 Hans Verkuil This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 64943f1a6149..becd08fdbddb 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -51,7 +51,7 @@ * * Author: Bill Dirks * Justin Schoeman - * Hans Verkuil + * Hans Verkuil * et al. */ #ifndef _UAPI__LINUX_VIDEODEV2_H -- cgit v1.2.3 From 55e3c86887ddbd1c676a06dcc9c3cceeb5380008 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 13 Aug 2025 00:45:13 +0300 Subject: media: i2c: mt9v022: Drop unused mt9v022.h header The mt9v022 driver got removed in commit e7eab49132ba ("media: staging/media/soc_camera: remove this driver"), but its platform header file got left behind. Remove it. Signed-off-by: Laurent Pinchart Signed-off-by: Sakari Ailus Reviewed-by: Mehdi Djait Signed-off-by: Hans Verkuil --- include/media/i2c/mt9v022.h | 13 ------------- 1 file changed, 13 deletions(-) delete mode 100644 include/media/i2c/mt9v022.h (limited to 'include') diff --git a/include/media/i2c/mt9v022.h b/include/media/i2c/mt9v022.h deleted file mode 100644 index 6966eb538165..000000000000 --- a/include/media/i2c/mt9v022.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * mt9v022 sensor - */ - -#ifndef __MT9V022_H__ -#define __MT9V022_H__ - -struct mt9v022_platform_data { - unsigned short y_skip_top; /* Lines to skip at the top */ -}; - -#endif -- cgit v1.2.3 From b4dd3bbd2eeb60702a8251c50cadd098257c5bfe Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 13 Aug 2025 00:45:15 +0300 Subject: media: i2c: mt9v032: Drop support for platform data The last user of the mt9v032 driver through board files and platform data has long been removed. Drop support for platform data from the driver. Signed-off-by: Laurent Pinchart Signed-off-by: Sakari Ailus Reviewed-by: Mehdi Djait Signed-off-by: Hans Verkuil --- include/media/i2c/mt9v032.h | 12 ------------ 1 file changed, 12 deletions(-) delete mode 100644 include/media/i2c/mt9v032.h (limited to 'include') diff --git a/include/media/i2c/mt9v032.h b/include/media/i2c/mt9v032.h deleted file mode 100644 index 83a37ccfb649..000000000000 --- a/include/media/i2c/mt9v032.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _MEDIA_MT9V032_H -#define _MEDIA_MT9V032_H - -struct mt9v032_platform_data { - unsigned int clk_pol:1; - - const s64 *link_freqs; - s64 link_def_freq; -}; - -#endif -- cgit v1.2.3 From dd235b07b65e123c0fdadc2883b9c16aa4749164 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 13 Aug 2025 00:45:53 +0300 Subject: media: v4l2-common: Add legacy camera sensor clock helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The recently introduced devm_v4l2_sensor_clk_get() helper aims at simplifying sensor drivers by centralizing clock handling code, as well as reducing cargo-cult and deprecated behaviour. A set of drivers implement external clock handling in a non-standard way. This can't be changed as there is a high risk of breaking existing platforms, but keeping the code as-is creates a risk of new drivers copying deprecated behaviour. To fix this, introduce a new devm_v4l2_sensor_clk_get_legacy() helper and use it in those driver. Compared to devm_v4l2_sensor_clk_get(), the new helper takes the "clock-frequency" property into account and sets the external clock rate on OF platforms, and adds the ability to specify a fixed default or fallback clock rate in case the "clock-frequency" property is not present. Signed-off-by: Laurent Pinchart Reviewed-by: Niklas Söderlund Signed-off-by: Sakari Ailus Reviewed-by: Mehdi Djait Signed-off-by: Hans Verkuil --- include/media/v4l2-common.h | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/media/v4l2-common.h b/include/media/v4l2-common.h index 594314459333..5c0a7f6b5bb6 100644 --- a/include/media/v4l2-common.h +++ b/include/media/v4l2-common.h @@ -612,6 +612,10 @@ int v4l2_link_freq_to_bitmap(struct device *dev, const u64 *fw_link_freqs, unsigned int num_of_driver_link_freqs, unsigned long *bitmap); +struct clk *__devm_v4l2_sensor_clk_get(struct device *dev, const char *id, + bool legacy, bool fixed_rate, + unsigned long clk_rate); + /** * devm_v4l2_sensor_clk_get - lookup and obtain a reference to a clock producer * for a camera sensor @@ -644,7 +648,42 @@ int v4l2_link_freq_to_bitmap(struct device *dev, const u64 *fw_link_freqs, * * Returns a pointer to a struct clk on success or an error pointer on failure. */ -struct clk *devm_v4l2_sensor_clk_get(struct device *dev, const char *id); +static inline struct clk * +devm_v4l2_sensor_clk_get(struct device *dev, const char *id) +{ + return __devm_v4l2_sensor_clk_get(dev, id, false, false, 0); +} + +/** + * devm_v4l2_sensor_clk_get_legacy - lookup and obtain a reference to a clock + * producer for a camera sensor. + * + * @dev: device for v4l2 sensor clock "consumer" + * @id: clock consumer ID + * @fixed_rate: interpret the @clk_rate as a fixed rate or default rate + * @clk_rate: the clock rate + * + * This function behaves the same way as devm_v4l2_sensor_clk_get() except that + * it extends the behaviour on ACPI platforms to all platforms. + * + * The function also provides the ability to set the clock rate to a fixed + * frequency by setting @fixed_rate to true and specifying the fixed frequency + * in @clk_rate, or to use a default clock rate when the "clock-frequency" + * property is absent by setting @fixed_rate to false and specifying the default + * frequency in @clk_rate. Setting @fixed_rate to true and @clk_rate to 0 is an + * error. + * + * This function is meant to support legacy behaviour in existing drivers only. + * It must not be used in any new driver. + * + * Returns a pointer to a struct clk on success or an error pointer on failure. + */ +static inline struct clk * +devm_v4l2_sensor_clk_get_legacy(struct device *dev, const char *id, + bool fixed_rate, unsigned long clk_rate) +{ + return __devm_v4l2_sensor_clk_get(dev, id, true, fixed_rate, clk_rate); +} static inline u64 v4l2_buffer_get_timestamp(const struct v4l2_buffer *buf) { -- cgit v1.2.3 From 86a9fe82e9b1f43e6d2bc867bf96bb40660d8719 Mon Sep 17 00:00:00 2001 From: Luiz Otavio Mello Date: Mon, 8 Sep 2025 09:15:09 -0400 Subject: drm/i915: Move struct_mutex to drm_i915_private Move legacy BKL struct_mutex from drm_device to drm_i915_private, which is the last remaining user. Signed-off-by: Luiz Otavio Mello Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20250908131518.36625-2-luiz.mello@estudante.ufscar.br Acked-by: Thomas Zimmermann Signed-off-by: Rodrigo Vivi --- include/drm/drm_device.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h index 08b3b2467c4c..cb16fd47e075 100644 --- a/include/drm/drm_device.h +++ b/include/drm/drm_device.h @@ -186,16 +186,6 @@ struct drm_device { /** @unique: Unique name of the device */ char *unique; - /** - * @struct_mutex: - * - * Lock for others (not &drm_minor.master and &drm_file.is_master) - * - * TODO: This lock used to be the BKL of the DRM subsystem. Move the - * lock into i915, which is the only remaining user. - */ - struct mutex struct_mutex; - /** * @master_mutex: * -- cgit v1.2.3 From eddb5ba91b289faa15117d4fc1c2fb223f3493c2 Mon Sep 17 00:00:00 2001 From: Nicolas Frattaroli Date: Fri, 30 May 2025 15:38:09 +0200 Subject: PM / devfreq: rockchip-dfi: add support for LPDDR5 The Rockchip RK3588 SoC can also support LPDDR5 memory. This type of memory needs some special case handling in the rockchip-dfi driver. Add support for it in rockchip-dfi, as well as the needed GRF register definitions. This has been tested as returning both the right cycle count and bandwidth on a LPDDR5 board where the CKR bit is 1. I couldn't test whether the values are correct on a system where CKR is 0, as I'm not savvy enough with the Rockchip tooling to know whether this can be set in the DDR init blob. Downstream has some special case handling for a hardware version where not just the control bits differ, but also the register. Since I don't know whether that hardware version is in any production silicon, it's left unimplemented for now, with an error message urging users to report if they have such a system. There is a slight change of behaviour for non-LPDDR5 systems: instead of writing 0 as the control flags to the control register and pretending everything is alright if the memory type is unknown, we now explicitly return an error. Signed-off-by: Nicolas Frattaroli Reviewed-by: Sascha Hauer Acked-by: Heiko Stuebner Signed-off-by: Chanwoo Choi Link: https://patchwork.kernel.org/project/linux-pm/patch/20250530-rk3588-dfi-improvements-v1-2-6e077c243a95@collabora.com/ --- include/soc/rockchip/rk3588_grf.h | 8 ++++++-- include/soc/rockchip/rockchip_grf.h | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/soc/rockchip/rk3588_grf.h b/include/soc/rockchip/rk3588_grf.h index 630b35a55064..02a7b2432d99 100644 --- a/include/soc/rockchip/rk3588_grf.h +++ b/include/soc/rockchip/rk3588_grf.h @@ -12,7 +12,11 @@ #define RK3588_PMUGRF_OS_REG3_DRAMTYPE_INFO_V3 GENMASK(13, 12) #define RK3588_PMUGRF_OS_REG3_SYSREG_VERSION GENMASK(31, 28) -#define RK3588_PMUGRF_OS_REG4 0x210 -#define RK3588_PMUGRF_OS_REG5 0x214 +#define RK3588_PMUGRF_OS_REG4 0x210 +#define RK3588_PMUGRF_OS_REG5 0x214 +#define RK3588_PMUGRF_OS_REG6 0x218 +#define RK3588_PMUGRF_OS_REG6_LP5_BANK_MODE GENMASK(2, 1) +/* Whether the LPDDR5 is in 2:1 (= 0) or 4:1 (= 1) CKR a.k.a. DQS mode */ +#define RK3588_PMUGRF_OS_REG6_LP5_CKR BIT(0) #endif /* __SOC_RK3588_GRF_H */ diff --git a/include/soc/rockchip/rockchip_grf.h b/include/soc/rockchip/rockchip_grf.h index e46fd72aea8d..41c7bb26fd53 100644 --- a/include/soc/rockchip/rockchip_grf.h +++ b/include/soc/rockchip/rockchip_grf.h @@ -13,6 +13,7 @@ enum { ROCKCHIP_DDRTYPE_LPDDR3 = 6, ROCKCHIP_DDRTYPE_LPDDR4 = 7, ROCKCHIP_DDRTYPE_LPDDR4X = 8, + ROCKCHIP_DDRTYPE_LPDDR5 = 9, }; #endif /* __SOC_ROCKCHIP_GRF_H */ -- cgit v1.2.3 From fec2e705729dc93de5399d8b139e4746805c3d81 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 27 Aug 2025 07:12:51 -0700 Subject: block: check for valid bio while splitting We're already iterating every segment, so check these for a valid IO lengths at the same time. Individual segment lengths will not be checked on passthrough commands. The read/write command segments must be sized to the dma alignment. Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/bio.h | 4 ++-- include/linux/blkdev.h | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 27cbff5b0356..13d1df02656a 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -322,8 +322,8 @@ static inline void bio_next_folio(struct folio_iter *fi, struct bio *bio) void bio_trim(struct bio *bio, sector_t offset, sector_t size); extern struct bio *bio_split(struct bio *bio, int sectors, gfp_t gfp, struct bio_set *bs); -int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim, - unsigned *segs, unsigned max_bytes); +int bio_split_io_at(struct bio *bio, const struct queue_limits *lim, + unsigned *segs, unsigned max_bytes, unsigned len_align); /** * bio_next_split - get next @sectors from a bio, splitting if necessary diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7709d55adc23..9efacabaa2f7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1870,6 +1870,13 @@ bdev_atomic_write_unit_max_bytes(struct block_device *bdev) return queue_atomic_write_unit_max_bytes(bdev_get_queue(bdev)); } +static inline int bio_split_rw_at(struct bio *bio, + const struct queue_limits *lim, + unsigned *segs, unsigned max_bytes) +{ + return bio_split_io_at(bio, lim, segs, max_bytes, lim->dma_alignment); +} + #define DEFINE_IO_COMP_BATCH(name) struct io_comp_batch name = { } #endif /* _LINUX_BLKDEV_H */ -- cgit v1.2.3 From 743bf2e0c49c835cb7c4e4ac7d5a2610587047be Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 27 Aug 2025 07:12:52 -0700 Subject: block: add size alignment to bio_iov_iter_get_pages The block layer tries to align bio vectors to the block device's logical block size. Some cases don't have a block device, or we may need to align to something larger, which we can't derive it from the queue limits. Have the caller specify what they want, or allow any length alignment if nothing was specified. Since the most common use case relies on the block device's limits, a helper function is provided. Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/bio.h | 9 ++++++++- include/linux/blkdev.h | 7 +++++++ 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 13d1df02656a..a64a30131031 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -446,7 +446,14 @@ int submit_bio_wait(struct bio *bio); int bdev_rw_virt(struct block_device *bdev, sector_t sector, void *data, size_t len, enum req_op op); -int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter); +int bio_iov_iter_get_pages_aligned(struct bio *bio, struct iov_iter *iter, + unsigned len_align_mask); + +static inline int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) +{ + return bio_iov_iter_get_pages_aligned(bio, iter, 0); +} + void bio_iov_bvec_set(struct bio *bio, const struct iov_iter *iter); void __bio_release_pages(struct bio *bio, bool mark_dirty); extern void bio_set_pages_dirty(struct bio *bio); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9efacabaa2f7..44e1066f7446 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1877,6 +1877,13 @@ static inline int bio_split_rw_at(struct bio *bio, return bio_split_io_at(bio, lim, segs, max_bytes, lim->dma_alignment); } +static inline int bio_iov_iter_get_bdev_pages(struct bio *bio, + struct iov_iter *iter, struct block_device *bdev) +{ + return bio_iov_iter_get_pages_aligned(bio, iter, + bdev_logical_block_size(bdev) - 1); +} + #define DEFINE_IO_COMP_BATCH(name) struct io_comp_batch name = { } #endif /* _LINUX_BLKDEV_H */ -- cgit v1.2.3 From 9eab1d4e0d15b633adc170c458c51e8be3b1c553 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 27 Aug 2025 07:12:56 -0700 Subject: block: remove bdev_iter_is_aligned No more callers. Signed-off-by: Keith Busch Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 44e1066f7446..c8cb08b2ed29 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1590,13 +1590,6 @@ static inline unsigned int bdev_dma_alignment(struct block_device *bdev) return queue_dma_alignment(bdev_get_queue(bdev)); } -static inline bool bdev_iter_is_aligned(struct block_device *bdev, - struct iov_iter *iter) -{ - return iov_iter_is_aligned(iter, bdev_dma_alignment(bdev), - bdev_logical_block_size(bdev) - 1); -} - static inline unsigned int blk_lim_dma_alignment_and_pad(struct queue_limits *lim) { -- cgit v1.2.3 From b475272f03ca5d0c437c8f899ff229b21010ec83 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 27 Aug 2025 07:12:58 -0700 Subject: iov_iter: remove iov_iter_is_aligned No more callers. Signed-off-by: Keith Busch Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Reviewed-by: Christoph Hellwig Reviewed-by: Mike Snitzer Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/uio.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/uio.h b/include/linux/uio.h index 2e86c653186c..5b127043a151 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -286,8 +286,6 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i); #endif size_t iov_iter_zero(size_t bytes, struct iov_iter *); -bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask, - unsigned len_mask); unsigned long iov_iter_alignment(const struct iov_iter *i); unsigned long iov_iter_gap_alignment(const struct iov_iter *i); void iov_iter_init(struct iov_iter *i, unsigned int direction, const struct iovec *iov, -- cgit v1.2.3 From d57447ffb5fadffdba920f2fb933296fb6c5ff57 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 3 Sep 2025 12:33:17 -0700 Subject: blk-mq-dma: bring back p2p request flags We only need to consider data and metadata dma mapping types separately. The request and bio integrity payload have enough flag bits to internally track the mapping type for each. Use these so the caller doesn't need to track them, and provide separete request and integrity helpers to the common code. This will make it easier to scale new mappings, like the proposed MMIO attribute, without burdening the caller to track such things. Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Leon Romanovsky Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/bio-integrity.h | 1 + include/linux/blk-integrity.h | 15 +++++++++++++++ include/linux/blk-mq-dma.h | 11 +++++++++-- include/linux/blk_types.h | 2 ++ 4 files changed, 27 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bio-integrity.h b/include/linux/bio-integrity.h index 0a25716820fe..851254f36eb3 100644 --- a/include/linux/bio-integrity.h +++ b/include/linux/bio-integrity.h @@ -13,6 +13,7 @@ enum bip_flags { BIP_CHECK_GUARD = 1 << 5, /* guard check */ BIP_CHECK_REFTAG = 1 << 6, /* reftag check */ BIP_CHECK_APPTAG = 1 << 7, /* apptag check */ + BIP_P2P_DMA = 1 << 8, /* using P2P address */ }; struct bio_integrity_payload { diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h index 78fe2459e661..b659373788f6 100644 --- a/include/linux/blk-integrity.h +++ b/include/linux/blk-integrity.h @@ -27,6 +27,15 @@ static inline bool queue_limits_stack_integrity_bdev(struct queue_limits *t, #ifdef CONFIG_BLK_DEV_INTEGRITY int blk_rq_map_integrity_sg(struct request *, struct scatterlist *); + +static inline bool blk_rq_integrity_dma_unmap(struct request *req, + struct device *dma_dev, struct dma_iova_state *state, + size_t mapped_len) +{ + return blk_dma_unmap(req, dma_dev, state, mapped_len, + bio_integrity(req->bio)->bip_flags & BIP_P2P_DMA); +} + int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf, ssize_t bytes); @@ -115,6 +124,12 @@ static inline int blk_rq_map_integrity_sg(struct request *q, { return 0; } +static inline bool blk_rq_integrity_dma_unmap(struct request *req, + struct device *dma_dev, struct dma_iova_state *state, + size_t mapped_len) +{ + return false; +} static inline int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf, ssize_t bytes) diff --git a/include/linux/blk-mq-dma.h b/include/linux/blk-mq-dma.h index 0f45ea110ca1..51829958d872 100644 --- a/include/linux/blk-mq-dma.h +++ b/include/linux/blk-mq-dma.h @@ -43,7 +43,7 @@ static inline bool blk_rq_dma_map_coalesce(struct dma_iova_state *state) } /** - * blk_rq_dma_unmap - try to DMA unmap a request + * blk_dma_unmap - try to DMA unmap a request * @req: request to unmap * @dma_dev: device to unmap from * @state: DMA IOVA state @@ -53,7 +53,7 @@ static inline bool blk_rq_dma_map_coalesce(struct dma_iova_state *state) * Returns %false if the callers need to manually unmap every DMA segment * mapped using @iter or %true if no work is left to be done. */ -static inline bool blk_rq_dma_unmap(struct request *req, struct device *dma_dev, +static inline bool blk_dma_unmap(struct request *req, struct device *dma_dev, struct dma_iova_state *state, size_t mapped_len, bool is_p2p) { if (is_p2p) @@ -68,4 +68,11 @@ static inline bool blk_rq_dma_unmap(struct request *req, struct device *dma_dev, return !dma_need_unmap(dma_dev); } +static inline bool blk_rq_dma_unmap(struct request *req, struct device *dma_dev, + struct dma_iova_state *state, size_t mapped_len) +{ + return blk_dma_unmap(req, dma_dev, state, mapped_len, + req->cmd_flags & REQ_P2PDMA); +} + #endif /* BLK_MQ_DMA_H */ diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index bbb7893e0542..4bd098fd61cb 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -384,6 +384,7 @@ enum req_flag_bits { __REQ_DRV, /* for driver use */ __REQ_FS_PRIVATE, /* for file system (submitter) use */ __REQ_ATOMIC, /* for atomic write operations */ + __REQ_P2PDMA, /* contains P2P DMA pages */ /* * Command specific flags, keep last: */ @@ -416,6 +417,7 @@ enum req_flag_bits { #define REQ_DRV (__force blk_opf_t)(1ULL << __REQ_DRV) #define REQ_FS_PRIVATE (__force blk_opf_t)(1ULL << __REQ_FS_PRIVATE) #define REQ_ATOMIC (__force blk_opf_t)(1ULL << __REQ_ATOMIC) +#define REQ_P2PDMA (__force blk_opf_t)(1ULL << __REQ_P2PDMA) #define REQ_NOUNMAP (__force blk_opf_t)(1ULL << __REQ_NOUNMAP) -- cgit v1.2.3 From 60df8a5d8f6505974784f7290fdfa94e2aa4e255 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Tue, 26 Aug 2025 15:49:20 +0530 Subject: drm/ttm: Bump TTM_NUM_MEM_TYPES to 9 (Prep for AMDGPU_PL_MMIO_REMAP) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Increase TTM_NUM_MEM_TYPES from 8 to 9 to accommodate the upcoming AMDGPU_PL_MMIO_REMAP placement. Cc: Alex Deucher Suggested-by: Christian König Signed-off-by: Srinivasan Shanmugam Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- include/drm/ttm/ttm_resource.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h index e52bba15012f..f49daa504c36 100644 --- a/include/drm/ttm/ttm_resource.h +++ b/include/drm/ttm/ttm_resource.h @@ -36,7 +36,7 @@ #include #define TTM_MAX_BO_PRIORITY 4U -#define TTM_NUM_MEM_TYPES 8 +#define TTM_NUM_MEM_TYPES 9 struct dmem_cgroup_device; struct ttm_device; -- cgit v1.2.3 From 056132483724a1ba1ff8823914dace71f8e8938c Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 20 Aug 2025 12:33:29 +0530 Subject: drm/amdgpu/uapi: Introduce AMDGPU_GEM_DOMAIN_MMIO_REMAP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new GEM domain bit AMDGPU_GEM_DOMAIN_MMIO_REMAP to allow userspace to request the MMIO remap (HDP flush) page via GEM_CREATE. - include/uapi/drm/amdgpu_drm.h: * define AMDGPU_GEM_DOMAIN_MMIO_REMAP * include the bit in AMDGPU_GEM_DOMAIN_MASK v2: Add early reject in amdgpu_gem_create_ioctl() (Alex). Cc: Christian König Suggested-by: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 9cebd072a042..85b3ca14f81e 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -105,6 +105,8 @@ extern "C" { * * %AMDGPU_GEM_DOMAIN_DOORBELL Doorbell. It is an MMIO region for * signalling user mode queues. + * + * %AMDGPU_GEM_DOMAIN_MMIO_REMAP MMIO remap page (special mapping for HDP flushing). */ #define AMDGPU_GEM_DOMAIN_CPU 0x1 #define AMDGPU_GEM_DOMAIN_GTT 0x2 @@ -113,13 +115,15 @@ extern "C" { #define AMDGPU_GEM_DOMAIN_GWS 0x10 #define AMDGPU_GEM_DOMAIN_OA 0x20 #define AMDGPU_GEM_DOMAIN_DOORBELL 0x40 +#define AMDGPU_GEM_DOMAIN_MMIO_REMAP 0x80 #define AMDGPU_GEM_DOMAIN_MASK (AMDGPU_GEM_DOMAIN_CPU | \ AMDGPU_GEM_DOMAIN_GTT | \ AMDGPU_GEM_DOMAIN_VRAM | \ AMDGPU_GEM_DOMAIN_GDS | \ AMDGPU_GEM_DOMAIN_GWS | \ - AMDGPU_GEM_DOMAIN_OA | \ - AMDGPU_GEM_DOMAIN_DOORBELL) + AMDGPU_GEM_DOMAIN_OA | \ + AMDGPU_GEM_DOMAIN_DOORBELL | \ + AMDGPU_GEM_DOMAIN_MMIO_REMAP) /* Flag that CPU access will be required for the case of VRAM domain */ #define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED (1 << 0) -- cgit v1.2.3 From ce0b015e2619ae64b7d33fb24a6b6cadcd70c317 Mon Sep 17 00:00:00 2001 From: Vlad Dumitrescu Date: Sat, 6 Sep 2025 18:29:43 -0700 Subject: devlink: Add 'total_vfs' generic device param NICs are typically configured with total_vfs=0, forcing users to rely on external tools to enable SR-IOV (a widely used and essential feature). Add total_vfs parameter to devlink for SR-IOV max VF configurability. Enables standard kernel tools to manage SR-IOV, addressing the need for flexible VF configuration. Signed-off-by: Vlad Dumitrescu Tested-by: Kamal Heib Reviewed-by: Jiri Pirko Signed-off-by: Saeed Mahameed Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250907012953.301746-2-saeed@kernel.org Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index 5f44e702c25c..8d4362f010e4 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -530,6 +530,7 @@ enum devlink_param_generic_id { DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE, DEVLINK_PARAM_GENERIC_ID_ENABLE_PHC, DEVLINK_PARAM_GENERIC_ID_CLOCK_ID, + DEVLINK_PARAM_GENERIC_ID_TOTAL_VFS, /* add new param generic ids above here*/ __DEVLINK_PARAM_GENERIC_ID_MAX, @@ -594,6 +595,9 @@ enum devlink_param_generic_id { #define DEVLINK_PARAM_GENERIC_CLOCK_ID_NAME "clock_id" #define DEVLINK_PARAM_GENERIC_CLOCK_ID_TYPE DEVLINK_PARAM_TYPE_U64 +#define DEVLINK_PARAM_GENERIC_TOTAL_VFS_NAME "total_vfs" +#define DEVLINK_PARAM_GENERIC_TOTAL_VFS_TYPE DEVLINK_PARAM_TYPE_U32 + #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \ { \ .id = DEVLINK_PARAM_GENERIC_ID_##_id, \ -- cgit v1.2.3 From bf2da4799fdb6eb58d9c9541b7dc1096c260499d Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Sat, 6 Sep 2025 18:29:44 -0700 Subject: net/mlx5: Implement cqe_compress_type via devlink params Selects which algorithm should be used by the NIC in order to decide rate of CQE compression dependeng on PCIe bus conditions. Supported values: 1) balanced, merges fewer CQEs, resulting in a moderate compression ratio but maintaining a balance between bandwidth savings and performance 2) aggressive, merges more CQEs into a single entry, achieving a higher compression rate and maximizing performance, particularly under high traffic loads. Signed-off-by: Saeed Mahameed Reviewed-by: Jiri Pirko Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250907012953.301746-3-saeed@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index c0858af0e854..fcfc18bfeba9 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -137,6 +137,7 @@ enum { MLX5_REG_MTCAP = 0x9009, MLX5_REG_MTMP = 0x900A, MLX5_REG_MCIA = 0x9014, + MLX5_REG_MNVDA = 0x9024, MLX5_REG_MFRL = 0x9028, MLX5_REG_MLCR = 0x902b, MLX5_REG_MRTC = 0x902d, -- cgit v1.2.3 From e096a7cc0be126d9376e549a10d71cf16b1a1c1c Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Fri, 5 Sep 2025 11:07:09 +0800 Subject: ptp: add debugfs interfaces to loop back the periodic output signal For some PTP devices, they have the capability to loop back the periodic output signal for debugging, such as the ptp_qoriq device. So add the generic interfaces to set the periodic output signal loopback, rather than each vendor having a different implementation. Show how many channels support the periodic output signal loopback: $ cat /sys/kernel/debug/ptp/n_perout_loopback Enable the loopback of the periodic output signal of channel X: $ echo 1 > /sys/kernel/debug/ptp/perout_loopback Disable the loopback of the periodic output signal of channel X: $ echo 0 > /sys/kernel/debug/ptp/perout_loopback Suggested-by: Andrew Lunn Signed-off-by: Wei Fang Link: https://patch.msgid.link/20250905030711.1509648-2-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- include/linux/ptp_clock_kernel.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 7dd7951b23d5..884364596dd3 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -67,6 +67,8 @@ struct ptp_system_timestamp { * @n_ext_ts: The number of external time stamp channels. * @n_per_out: The number of programmable periodic signals. * @n_pins: The number of programmable pins. + * @n_per_lp: The number of channels that support loopback the periodic + * output signal. * @pps: Indicates whether the clock supports a PPS callback. * * @supported_perout_flags: The set of flags the driver supports for the @@ -175,6 +177,11 @@ struct ptp_system_timestamp { * scheduling time (>=0) or negative value in case further * scheduling is not required. * + * @perout_loopback: Request driver to enable or disable the periodic output + * signal loopback. + * parameter index: index of the periodic output signal channel. + * parameter on: caller passes one to enable or zero to disable. + * * Drivers should embed their ptp_clock_info within a private * structure, obtaining a reference to it using container_of(). * @@ -189,6 +196,7 @@ struct ptp_clock_info { int n_ext_ts; int n_per_out; int n_pins; + int n_per_lp; int pps; unsigned int supported_perout_flags; unsigned int supported_extts_flags; @@ -213,6 +221,8 @@ struct ptp_clock_info { int (*verify)(struct ptp_clock_info *ptp, unsigned int pin, enum ptp_pin_function func, unsigned int chan); long (*do_aux_work)(struct ptp_clock_info *ptp); + int (*perout_loopback)(struct ptp_clock_info *ptp, unsigned int index, + int on); }; struct ptp_clock; -- cgit v1.2.3 From f3164840a136b123c8348ca5af4d83d99aa86eb7 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Fri, 5 Sep 2025 11:07:11 +0800 Subject: ptp: qoriq: convert to use generic interfaces to set loopback mode Since the generic debugfs interfaces for setting the periodic pulse signal loopback have been added to the ptp_clock driver, so convert the vendor-defined debugfs interfaces to the generic interfaces. Signed-off-by: Wei Fang Reviewed-by: Vladimir Oltean Tested-by: Vladimir Oltean Link: https://patch.msgid.link/20250905030711.1509648-4-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- include/linux/fsl/ptp_qoriq.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h index b301bf7199d3..3601e25779ba 100644 --- a/include/linux/fsl/ptp_qoriq.h +++ b/include/linux/fsl/ptp_qoriq.h @@ -145,7 +145,6 @@ struct ptp_qoriq { struct ptp_clock *clock; struct ptp_clock_info caps; struct resource *rsrc; - struct dentry *debugfs_root; struct device *dev; bool extts_fifo_support; bool fiper3_support; @@ -195,14 +194,5 @@ int ptp_qoriq_settime(struct ptp_clock_info *ptp, int ptp_qoriq_enable(struct ptp_clock_info *ptp, struct ptp_clock_request *rq, int on); int extts_clean_up(struct ptp_qoriq *ptp_qoriq, int index, bool update_event); -#ifdef CONFIG_DEBUG_FS -void ptp_qoriq_create_debugfs(struct ptp_qoriq *ptp_qoriq); -void ptp_qoriq_remove_debugfs(struct ptp_qoriq *ptp_qoriq); -#else -static inline void ptp_qoriq_create_debugfs(struct ptp_qoriq *ptp_qoriq) -{ } -static inline void ptp_qoriq_remove_debugfs(struct ptp_qoriq *ptp_qoriq) -{ } -#endif #endif -- cgit v1.2.3 From faac32d4ece30609f1a0930ca0ae951cf6dc1786 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Wed, 3 Sep 2025 10:44:37 +0800 Subject: scsi: ufs: host: mediatek: Enhance recovery on hibernation exit failure Improve the recovery process for hibernation exit failures. Trigger the error handler and break the suspend operation to ensure effective recovery from hibernation errors. Activate the error handling mechanism by ufshcd_force_error_recovery and scheduling the error handler work. Signed-off-by: Peter Wang Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index 1d3943777584..219935b3a76f 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -1508,5 +1508,6 @@ int __ufshcd_write_ee_control(struct ufs_hba *hba, u32 ee_ctrl_mask); int ufshcd_write_ee_control(struct ufs_hba *hba); int ufshcd_update_ee_control(struct ufs_hba *hba, u16 *mask, const u16 *other_mask, u16 set, u16 clr); +void ufshcd_force_error_recovery(struct ufs_hba *hba); #endif /* End of Header */ -- cgit v1.2.3 From bc5dbf7739594b05c673ab3905471257be9921e7 Mon Sep 17 00:00:00 2001 From: Nitin Rawat Date: Wed, 3 Sep 2025 13:18:15 +0530 Subject: scsi: ufs: ufs-qcom: Refactor MCQ register dump logic Refactor MCQ register dump to align with the new resource mapping. As part of refactor, below changes are done: - Update ufs_qcom_dump_regs() function signature to accept direct base address instead of resource ID enum - Modify ufs_qcom_dump_mcq_hci_regs() to use hba->mcq_base and calculated addresses from MCQ operation info - Replace enum ufshcd_res with direct memory-mapped I/O addresses Additionally remove the ufshcd_res_info structure and associated enum ufshcd_res definitions from the UFS host controller header. These were previously used for MCQ resource mapping but are no longer needed following recent refactoring to use direct base addresses instead of multiple separate resource regions. Signed-off-by: Nitin Rawat Reviewed-by: Bart Van Assche Reviewed-by: Manivannan Sadhasivam Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 25 ------------------------- 1 file changed, 25 deletions(-) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index 1d3943777584..a7bcf7c7a1af 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -794,30 +794,6 @@ struct ufs_hba_monitor { bool enabled; }; -/** - * struct ufshcd_res_info_t - MCQ related resource regions - * - * @name: resource name - * @resource: pointer to resource region - * @base: register base address - */ -struct ufshcd_res_info { - const char *name; - struct resource *resource; - void __iomem *base; -}; - -enum ufshcd_res { - RES_UFS, - RES_MCQ, - RES_MCQ_SQD, - RES_MCQ_SQIS, - RES_MCQ_CQD, - RES_MCQ_CQIS, - RES_MCQ_VS, - RES_MAX, -}; - /** * struct ufshcd_mcq_opr_info_t - Operation and Runtime registers * @@ -1127,7 +1103,6 @@ struct ufs_hba { bool lsdb_sup; bool mcq_enabled; bool mcq_esi_enabled; - struct ufshcd_res_info res[RES_MAX]; void __iomem *mcq_base; struct ufs_hw_queue *uhq; struct ufs_hw_queue *dev_cmd_queue; -- cgit v1.2.3 From 5f5598d945e2a69f764aa5c2074dad73e23bcfcb Mon Sep 17 00:00:00 2001 From: Aaron Kling Date: Sat, 6 Sep 2025 15:16:53 -0500 Subject: dt-bindings: memory: tegra210: Add memory client IDs Each memory client has unique hardware ID, add these IDs. Signed-off-by: Aaron Kling Signed-off-by: Krzysztof Kozlowski --- include/dt-bindings/memory/tegra210-mc.h | 74 ++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/memory/tegra210-mc.h b/include/dt-bindings/memory/tegra210-mc.h index 5e082547f179..881bf78aa8b2 100644 --- a/include/dt-bindings/memory/tegra210-mc.h +++ b/include/dt-bindings/memory/tegra210-mc.h @@ -75,4 +75,78 @@ #define TEGRA210_MC_RESET_ETR 28 #define TEGRA210_MC_RESET_TSECB 29 +#define TEGRA210_MC_PTCR 0 +#define TEGRA210_MC_DISPLAY0A 1 +#define TEGRA210_MC_DISPLAY0AB 2 +#define TEGRA210_MC_DISPLAY0B 3 +#define TEGRA210_MC_DISPLAY0BB 4 +#define TEGRA210_MC_DISPLAY0C 5 +#define TEGRA210_MC_DISPLAY0CB 6 +#define TEGRA210_MC_AFIR 14 +#define TEGRA210_MC_AVPCARM7R 15 +#define TEGRA210_MC_DISPLAYHC 16 +#define TEGRA210_MC_DISPLAYHCB 17 +#define TEGRA210_MC_HDAR 21 +#define TEGRA210_MC_HOST1XDMAR 22 +#define TEGRA210_MC_HOST1XR 23 +#define TEGRA210_MC_NVENCSRD 28 +#define TEGRA210_MC_PPCSAHBDMAR 29 +#define TEGRA210_MC_PPCSAHBSLVR 30 +#define TEGRA210_MC_SATAR 31 +#define TEGRA210_MC_MPCORER 39 +#define TEGRA210_MC_NVENCSWR 43 +#define TEGRA210_MC_AFIW 49 +#define TEGRA210_MC_AVPCARM7W 50 +#define TEGRA210_MC_HDAW 53 +#define TEGRA210_MC_HOST1XW 54 +#define TEGRA210_MC_MPCOREW 57 +#define TEGRA210_MC_PPCSAHBDMAW 59 +#define TEGRA210_MC_PPCSAHBSLVW 60 +#define TEGRA210_MC_SATAW 61 +#define TEGRA210_MC_ISPRA 68 +#define TEGRA210_MC_ISPWA 70 +#define TEGRA210_MC_ISPWB 71 +#define TEGRA210_MC_XUSB_HOSTR 74 +#define TEGRA210_MC_XUSB_HOSTW 75 +#define TEGRA210_MC_XUSB_DEVR 76 +#define TEGRA210_MC_XUSB_DEVW 77 +#define TEGRA210_MC_ISPRAB 78 +#define TEGRA210_MC_ISPWAB 80 +#define TEGRA210_MC_ISPWBB 81 +#define TEGRA210_MC_TSECSRD 84 +#define TEGRA210_MC_TSECSWR 85 +#define TEGRA210_MC_A9AVPSCR 86 +#define TEGRA210_MC_A9AVPSCW 87 +#define TEGRA210_MC_GPUSRD 88 +#define TEGRA210_MC_GPUSWR 89 +#define TEGRA210_MC_DISPLAYT 90 +#define TEGRA210_MC_SDMMCRA 96 +#define TEGRA210_MC_SDMMCRAA 97 +#define TEGRA210_MC_SDMMCR 98 +#define TEGRA210_MC_SDMMCRAB 99 +#define TEGRA210_MC_SDMMCWA 100 +#define TEGRA210_MC_SDMMCWAA 101 +#define TEGRA210_MC_SDMMCW 102 +#define TEGRA210_MC_SDMMCWAB 103 +#define TEGRA210_MC_VICSRD 108 +#define TEGRA210_MC_VICSWR 109 +#define TEGRA210_MC_VIW 114 +#define TEGRA210_MC_DISPLAYD 115 +#define TEGRA210_MC_NVDECSRD 120 +#define TEGRA210_MC_NVDECSWR 121 +#define TEGRA210_MC_APER 122 +#define TEGRA210_MC_APEW 123 +#define TEGRA210_MC_NVJPGRD 126 +#define TEGRA210_MC_NVJPGWR 127 +#define TEGRA210_MC_SESRD 128 +#define TEGRA210_MC_SESWR 129 +#define TEGRA210_MC_AXIAPR 130 +#define TEGRA210_MC_AXIAPW 131 +#define TEGRA210_MC_ETRR 132 +#define TEGRA210_MC_ETRW 133 +#define TEGRA210_MC_TSECSRDB 134 +#define TEGRA210_MC_TSECSWRB 135 +#define TEGRA210_MC_GPUSRD2 136 +#define TEGRA210_MC_GPUSWR2 137 + #endif -- cgit v1.2.3 From 7d6ca84aa985fc940f5544ed7feedb1b4a82b96b Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 5 Sep 2025 03:05:26 -0700 Subject: KVM: arm64: vgic: Drop stale comment on IRQ active state While LPIs lack an active state, KVM unconditionally folds the active state from the LR into the vgic_irq struct meaning this field cannot be 'creatively' reused for something else. Drop the misleading comment to reflect this. Link: https://lore.kernel.org/r/20250905100531.282980-2-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- include/kvm/arm_vgic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 404883c7af6e..9f8a116925ca 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -139,7 +139,7 @@ struct vgic_irq { bool pending_latch; /* The pending latch state used to calculate * the pending state for both level * and edge triggered IRQs. */ - bool active; /* not used for LPIs */ + bool active; bool enabled; bool hw; /* Tied to HW IRQ */ struct kref refcount; /* Used for LPIs */ -- cgit v1.2.3 From 3a08a6ca7c373198c84e2a8c025c395ee966ff8a Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 5 Sep 2025 03:05:27 -0700 Subject: KVM: arm64: vgic-v3: Use bare refcount for VGIC LPIs KVM's use of krefs to manage LPIs isn't adding much, especially considering vgic_irq_release() is a noop due to the lack of sufficient context. Switch to using a regular refcount in anticipation of adding a meaningful release concept for LPIs. Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20250905100531.282980-3-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- include/kvm/arm_vgic.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 9f8a116925ca..640555ff5b54 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -8,8 +8,8 @@ #include #include #include -#include #include +#include #include #include #include @@ -142,7 +142,7 @@ struct vgic_irq { bool active; bool enabled; bool hw; /* Tied to HW IRQ */ - struct kref refcount; /* Used for LPIs */ + refcount_t refcount; /* Used for LPIs */ u32 hwintid; /* HW INTID number */ unsigned int host_irq; /* linux irq corresponding to hwintid */ union { -- cgit v1.2.3 From d54594accf732d17891d276aa1f545ef25606555 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 5 Sep 2025 03:05:29 -0700 Subject: KVM: arm64: vgic-v3: Erase LPIs from xarray outside of raw spinlocks syzkaller has caught us red-handed once more, this time nesting regular spinlocks behind raw spinlocks: ============================= [ BUG: Invalid wait context ] 6.16.0-rc3-syzkaller-g7b8346bd9fce #0 Not tainted ----------------------------- syz.0.29/3743 is trying to lock: a3ff80008e2e9e18 (&xa->xa_lock#20){....}-{3:3}, at: vgic_put_irq+0xb4/0x190 arch/arm64/kvm/vgic/vgic.c:137 other info that might help us debug this: context-{5:5} 3 locks held by syz.0.29/3743: #0: a3ff80008e2e90a8 (&kvm->slots_lock){+.+.}-{4:4}, at: kvm_vgic_destroy+0x50/0x624 arch/arm64/kvm/vgic/vgic-init.c:499 #1: a3ff80008e2e9fa0 (&kvm->arch.config_lock){+.+.}-{4:4}, at: kvm_vgic_destroy+0x5c/0x624 arch/arm64/kvm/vgic/vgic-init.c:500 #2: 58f0000021be1428 (&vgic_cpu->ap_list_lock){....}-{2:2}, at: vgic_flush_pending_lpis+0x3c/0x31c arch/arm64/kvm/vgic/vgic.c:150 stack backtrace: CPU: 0 UID: 0 PID: 3743 Comm: syz.0.29 Not tainted 6.16.0-rc3-syzkaller-g7b8346bd9fce #0 PREEMPT Hardware name: linux,dummy-virt (DT) Call trace: show_stack+0x2c/0x3c arch/arm64/kernel/stacktrace.c:466 (C) __dump_stack+0x30/0x40 lib/dump_stack.c:94 dump_stack_lvl+0xd8/0x12c lib/dump_stack.c:120 dump_stack+0x1c/0x28 lib/dump_stack.c:129 print_lock_invalid_wait_context kernel/locking/lockdep.c:4833 [inline] check_wait_context kernel/locking/lockdep.c:4905 [inline] __lock_acquire+0x978/0x299c kernel/locking/lockdep.c:5190 lock_acquire+0x14c/0x2e0 kernel/locking/lockdep.c:5871 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline] _raw_spin_lock_irqsave+0x5c/0x7c kernel/locking/spinlock.c:162 vgic_put_irq+0xb4/0x190 arch/arm64/kvm/vgic/vgic.c:137 vgic_flush_pending_lpis+0x24c/0x31c arch/arm64/kvm/vgic/vgic.c:158 __kvm_vgic_vcpu_destroy+0x44/0x500 arch/arm64/kvm/vgic/vgic-init.c:455 kvm_vgic_destroy+0x100/0x624 arch/arm64/kvm/vgic/vgic-init.c:505 kvm_arch_destroy_vm+0x80/0x138 arch/arm64/kvm/arm.c:244 kvm_destroy_vm virt/kvm/kvm_main.c:1308 [inline] kvm_put_kvm+0x800/0xff8 virt/kvm/kvm_main.c:1344 kvm_vm_release+0x58/0x78 virt/kvm/kvm_main.c:1367 __fput+0x4ac/0x980 fs/file_table.c:465 ____fput+0x20/0x58 fs/file_table.c:493 task_work_run+0x1bc/0x254 kernel/task_work.c:227 resume_user_mode_work include/linux/resume_user_mode.h:50 [inline] do_notify_resume+0x1b4/0x270 arch/arm64/kernel/entry-common.c:151 exit_to_user_mode_prepare arch/arm64/kernel/entry-common.c:169 [inline] exit_to_user_mode arch/arm64/kernel/entry-common.c:178 [inline] el0_svc+0xb4/0x160 arch/arm64/kernel/entry-common.c:768 el0t_64_sync_handler+0x78/0x108 arch/arm64/kernel/entry-common.c:786 el0t_64_sync+0x198/0x19c arch/arm64/kernel/entry.S:600 This is of course no good, but is at odds with how LPI refcounts are managed. Solve the locking mess by deferring the release of unreferenced LPIs after the ap_list_lock is released. Mark these to-be-released LPIs specially to avoid racing with vgic_put_irq() and causing a double-free. Since references can only be taken on LPIs with a nonzero refcount, extending the lifetime of freed LPIs is still safe. Reviewed-by: Marc Zyngier Reported-by: syzbot+cef594105ac7e60c6d93@syzkaller.appspotmail.com Closes: https://lore.kernel.org/kvmarm/68acd0d9.a00a0220.33401d.048b.GAE@google.com/ Link: https://lore.kernel.org/r/20250905100531.282980-5-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- include/kvm/arm_vgic.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 640555ff5b54..4000ff16f295 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -140,6 +140,9 @@ struct vgic_irq { * the pending state for both level * and edge triggered IRQs. */ bool active; + bool pending_release; /* Used for LPIs only, unreferenced IRQ + * pending a release */ + bool enabled; bool hw; /* Tied to HW IRQ */ refcount_t refcount; /* Used for LPIs */ -- cgit v1.2.3 From e0423541477dfb684fbc6e6b5386054bc650f264 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 5 Sep 2025 15:44:45 +0200 Subject: PM: EM: Add function for registering a PD without capacity update The intel_pstate driver manages CPU capacity changes itself and it does not need an update of the capacity of all CPUs in the system to be carried out after registering a PD. Moreover, in some configurations (for instance, an SMT-capable hybrid x86 system booted with nosmt in the kernel command line) the em_check_capacity_update() call at the end of em_dev_register_perf_domain() always fails and reschedules itself to run once again in 1 s, so effectively it runs in vain every 1 s forever. To address this, introduce a new variant of em_dev_register_perf_domain(), called em_dev_register_pd_no_update(), that does not invoke em_check_capacity_update(), and make intel_pstate use it instead of the original. Fixes: 7b010f9b9061 ("cpufreq: intel_pstate: EAS support for hybrid platforms") Closes: https://lore.kernel.org/linux-pm/40212796-734c-4140-8a85-854f72b8144d@panix.com/ Reported-by: Kenneth R. Crudup Tested-by: Kenneth R. Crudup Cc: 6.16+ # 6.16+ Signed-off-by: Rafael J. Wysocki --- include/linux/energy_model.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 7fa1eb3cc823..61d50571ad88 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -171,6 +171,9 @@ int em_dev_update_perf_domain(struct device *dev, int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, const struct em_data_callback *cb, const cpumask_t *cpus, bool microwatts); +int em_dev_register_pd_no_update(struct device *dev, unsigned int nr_states, + const struct em_data_callback *cb, + const cpumask_t *cpus, bool microwatts); void em_dev_unregister_perf_domain(struct device *dev); struct em_perf_table *em_table_alloc(struct em_perf_domain *pd); void em_table_free(struct em_perf_table *table); @@ -350,6 +353,13 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, { return -EINVAL; } +static inline +int em_dev_register_pd_no_update(struct device *dev, unsigned int nr_states, + const struct em_data_callback *cb, + const cpumask_t *cpus, bool microwatts) +{ + return -EINVAL; +} static inline void em_dev_unregister_perf_domain(struct device *dev) { } -- cgit v1.2.3 From 1733e88874838ddebf7774440c285700865e6b08 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Wed, 10 Sep 2025 14:30:41 +0800 Subject: block: cleanup bio_issue Now that bio->bi_issue is only used by blk-iolatency to get bio issue time, replace bio_issue with u64 time directly and remove bio_issue to make code cleaner. Signed-off-by: Yu Kuai Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 4bd098fd61cb..8e8d1cc8b06c 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -198,10 +198,6 @@ static inline bool blk_path_error(blk_status_t error) return true; } -struct bio_issue { - u64 value; -}; - typedef __u32 __bitwise blk_opf_t; typedef unsigned int blk_qc_t; @@ -242,7 +238,8 @@ struct bio { * on release of the bio. */ struct blkcg_gq *bi_blkg; - struct bio_issue bi_issue; + /* Time that this bio was issued. */ + u64 issue_time_ns; #ifdef CONFIG_BLK_CGROUP_IOCOST u64 bi_iocost_cost; #endif -- cgit v1.2.3 From ea3d1f104db60f9d5074b33819ccea3c216e0bee Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Wed, 10 Sep 2025 14:30:43 +0800 Subject: blk-mq: add QUEUE_FLAG_BIO_ISSUE_TIME bio->issue_time_ns is initialized for every bio, however, it's only used by blk-iolatency. Add a new queue_flag and only set this flag when blk-iolatency is enabled, so that extra blk_time_get_ns() can be saved for disks that blk-iolatency is not enabled. Signed-off-by: Yu Kuai Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c8cb08b2ed29..7c542b1851fa 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -657,6 +657,7 @@ enum { QUEUE_FLAG_DISABLE_WBT_DEF, /* for sched to disable/enable wbt */ QUEUE_FLAG_NO_ELV_SWITCH, /* can't switch elevator any more */ QUEUE_FLAG_QOS_ENABLED, /* qos is enabled */ + QUEUE_FLAG_BIO_ISSUE_TIME, /* record bio->issue_time_ns */ QUEUE_FLAG_MAX }; -- cgit v1.2.3 From e37b5596a19be9a150cb194ec32e78f295a3574b Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Wed, 10 Sep 2025 14:30:46 +0800 Subject: block: factor out a helper bio_submit_split_bioset() No functional changes are intended, some drivers like mdraid will split bio by internal processing, prepare to unify bio split codes. Signed-off-by: Yu Kuai Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7c542b1851fa..066e5309bd45 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1000,6 +1000,8 @@ extern int blk_register_queue(struct gendisk *disk); extern void blk_unregister_queue(struct gendisk *disk); void submit_bio_noacct(struct bio *bio); struct bio *bio_split_to_limits(struct bio *bio); +struct bio *bio_submit_split_bioset(struct bio *bio, unsigned int split_sectors, + struct bio_set *bs); extern int blk_lld_busy(struct request_queue *q); extern int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags); -- cgit v1.2.3 From 448097bbd3836d2ee46fa6eabd18661e9a3c8be8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-Fran=C3=A7ois=20Lessard?= Date: Tue, 2 Sep 2025 15:04:39 -0400 Subject: device property: Add scoped fwnode child node iterators MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add scoped versions of fwnode child node iterators that automatically handle reference counting cleanup using the __free() attribute: - fwnode_for_each_child_node_scoped() - fwnode_for_each_available_child_node_scoped() These macros follow the same pattern as existing scoped iterators in the kernel, ensuring fwnode references are automatically released when the iterator variable goes out of scope. This prevents resource leaks and eliminates the need for manual cleanup in error paths. The implementation mirrors the non-scoped variants but uses __free(fwnode_handle) for automatic resource management, providing a safer and more convenient interface for drivers iterating over firmware node children. Signed-off-by: Jean-François Lessard Acked-by: Danilo Krummrich Reviewed-by: Andy Shevchenko Reviewed-by: Sakari Ailus Signed-off-by: Wolfram Sang --- include/linux/property.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/property.h b/include/linux/property.h index 82f0cb3abd1e..862e208133f3 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -176,6 +176,16 @@ struct fwnode_handle *fwnode_get_next_available_child_node( for (child = fwnode_get_next_available_child_node(fwnode, NULL); child;\ child = fwnode_get_next_available_child_node(fwnode, child)) +#define fwnode_for_each_child_node_scoped(fwnode, child) \ + for (struct fwnode_handle *child __free(fwnode_handle) = \ + fwnode_get_next_child_node(fwnode, NULL); \ + child; child = fwnode_get_next_child_node(fwnode, child)) + +#define fwnode_for_each_available_child_node_scoped(fwnode, child) \ + for (struct fwnode_handle *child __free(fwnode_handle) = \ + fwnode_get_next_available_child_node(fwnode, NULL); \ + child; child = fwnode_get_next_available_child_node(fwnode, child)) + struct fwnode_handle *device_get_next_child_node(const struct device *dev, struct fwnode_handle *child); -- cgit v1.2.3 From c4272905c37930c19b54fa3549b22899122ce69e Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 8 Sep 2025 09:00:34 -0700 Subject: cxl/acpi: Rename CFMW coherency restrictions ACPICA commit 710745713ad3a2543dbfb70e84764f31f0e46bdc This has been renamed in more recent CXL specs, as type3 (memory expanders) can also use HDM-DB for device coherent memory. Link: https://github.com/acpica/acpica/commit/710745713ad3a2543dbfb70e84764f31f0e46bdc Acked-by: Rafael J. Wysocki (Intel) Signed-off-by: Davidlohr Bueso Reviewed-by: Jonathan Cameron Reviewed-by: Gregory Price Reviewed-by: Dave Jiang Link: https://patch.msgid.link/20250908160034.86471-1-dave@stgolabs.net Signed-off-by: Dave Jiang --- include/acpi/actbl1.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 99fd1588ff38..eb787dfbd2fa 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -560,8 +560,8 @@ struct acpi_cedt_cfmws_target_element { /* Values for Restrictions field above */ -#define ACPI_CEDT_CFMWS_RESTRICT_TYPE2 (1) -#define ACPI_CEDT_CFMWS_RESTRICT_TYPE3 (1<<1) +#define ACPI_CEDT_CFMWS_RESTRICT_DEVMEM (1) +#define ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM (1<<1) #define ACPI_CEDT_CFMWS_RESTRICT_VOLATILE (1<<2) #define ACPI_CEDT_CFMWS_RESTRICT_PMEM (1<<3) #define ACPI_CEDT_CFMWS_RESTRICT_FIXED (1<<4) -- cgit v1.2.3 From a1ffc8ad3165fa1cf6a60c6a4b4e00dfd6603cf2 Mon Sep 17 00:00:00 2001 From: Yi Tao Date: Wed, 10 Sep 2025 14:59:33 +0800 Subject: cgroup: refactor the cgroup_attach_lock code to make it clearer Dynamic cgroup migration involving threadgroup locks can be in one of two states: no lock held, or holding the global lock. Explicitly declaring the different lock modes to make the code easier to understand and facilitates future extensions of the lock modes. Signed-off-by: Yi Tao Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 92ed6d18266d..ff3c7d0e3e01 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -140,6 +140,14 @@ enum { __CFTYPE_ADDED = (1 << 18), }; +enum cgroup_attach_lock_mode { + /* Default */ + CGRP_ATTACH_LOCK_GLOBAL, + + /* When pid=0 && threadgroup=false, see comments in cgroup_procs_write_start */ + CGRP_ATTACH_LOCK_NONE, +}; + /* * cgroup_file is the handle for a file instance created in a cgroup which * is used, for example, to generate file changed notifications. This can -- cgit v1.2.3 From 0568f89d4fb82d98001baeb870e92f43cd1f7317 Mon Sep 17 00:00:00 2001 From: Yi Tao Date: Wed, 10 Sep 2025 14:59:35 +0800 Subject: cgroup: replace global percpu_rwsem with per threadgroup resem when writing to cgroup.procs The static usage pattern of creating a cgroup, enabling controllers, and then seeding it with CLONE_INTO_CGROUP doesn't require write locking cgroup_threadgroup_rwsem and thus doesn't benefit from this patch. To avoid affecting other users, the per threadgroup rwsem is only used when the favordynmods is enabled. As computer hardware advances, modern systems are typically equipped with many CPU cores and large amounts of memory, enabling the deployment of numerous applications. On such systems, container creation and deletion become frequent operations, making cgroup process migration no longer a cold path. This leads to noticeable contention with common process operations such as fork, exec, and exit. To alleviate the contention between cgroup process migration and operations like process fork, this patch modifies lock to take the write lock on signal_struct->group_rwsem when writing pid to cgroup.procs/threads instead of holding a global write lock. Cgroup process migration has historically relied on signal_struct->group_rwsem to protect thread group integrity. In commit <1ed1328792ff> ("sched, cgroup: replace signal_struct->group_rwsem with a global percpu_rwsem"), this was changed to a global cgroup_threadgroup_rwsem. The advantage of using a global lock was simplified handling of process group migrations. This patch retains the use of the global lock for protecting process group migration, while reducing contention by using per thread group lock during cgroup.procs/threads writes. The locking behavior is as follows: write cgroup.procs/threads | process fork,exec,exit | process group migration ------------------------------------------------------------------------------ cgroup_lock() | down_read(&g_rwsem) | cgroup_lock() down_write(&p_rwsem) | down_read(&p_rwsem) | down_write(&g_rwsem) critical section | critical section | critical section up_write(&p_rwsem) | up_read(&p_rwsem) | up_write(&g_rwsem) cgroup_unlock() | up_read(&g_rwsem) | cgroup_unlock() g_rwsem denotes cgroup_threadgroup_rwsem, p_rwsem denotes signal_struct->group_rwsem. This patch eliminates contention between cgroup migration and fork operations for threads that belong to different thread groups, thereby reducing the long-tail latency of cgroup migrations and lowering system load. With this patch, under heavy fork and exec interference, the long-tail latency of cgroup migration has been reduced from milliseconds to microseconds. Under heavy cgroup migration interference, the multi-CPU score of the spawn test case in UnixBench increased by 9%. tj: Update comment in cgroup_favor_dynmods() and switch WARN_ONCE() to pr_warn_once(). Signed-off-by: Yi Tao Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 17 ++++++++++++++++- include/linux/sched/signal.h | 4 ++++ 2 files changed, 20 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index ff3c7d0e3e01..93318fce31f3 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -91,6 +91,12 @@ enum { * cgroup_threadgroup_rwsem. This makes hot path operations such as * forks and exits into the slow path and more expensive. * + * Alleviate the contention between fork, exec, exit operations and + * writing to cgroup.procs by taking a per threadgroup rwsem instead of + * the global cgroup_threadgroup_rwsem. Fork and other operations + * from threads in different thread groups no longer contend with + * writing to cgroup.procs. + * * The static usage pattern of creating a cgroup, enabling controllers, * and then seeding it with CLONE_INTO_CGROUP doesn't require write * locking cgroup_threadgroup_rwsem and thus doesn't benefit from @@ -146,6 +152,9 @@ enum cgroup_attach_lock_mode { /* When pid=0 && threadgroup=false, see comments in cgroup_procs_write_start */ CGRP_ATTACH_LOCK_NONE, + + /* When favordynmods is on, see comments above CGRP_ROOT_FAVOR_DYNMODS */ + CGRP_ATTACH_LOCK_PER_THREADGROUP, }; /* @@ -846,6 +855,7 @@ struct cgroup_subsys { }; extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem; +extern bool cgroup_enable_per_threadgroup_rwsem; struct cgroup_of_peak { unsigned long value; @@ -857,11 +867,14 @@ struct cgroup_of_peak { * @tsk: target task * * Allows cgroup operations to synchronize against threadgroup changes - * using a percpu_rw_semaphore. + * using a global percpu_rw_semaphore and a per threadgroup rw_semaphore when + * favordynmods is on. See the comment above CGRP_ROOT_FAVOR_DYNMODS definition. */ static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) { percpu_down_read(&cgroup_threadgroup_rwsem); + if (cgroup_enable_per_threadgroup_rwsem) + down_read(&tsk->signal->cgroup_threadgroup_rwsem); } /** @@ -872,6 +885,8 @@ static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) */ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) { + if (cgroup_enable_per_threadgroup_rwsem) + up_read(&tsk->signal->cgroup_threadgroup_rwsem); percpu_up_read(&cgroup_threadgroup_rwsem); } diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 1ef1edbaaf79..7d6449982822 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -226,6 +226,10 @@ struct signal_struct { struct tty_audit_buf *tty_audit_buf; #endif +#ifdef CONFIG_CGROUPS + struct rw_semaphore cgroup_threadgroup_rwsem; +#endif + /* * Thread is the potential origin of an oom condition; kill first on * oom -- cgit v1.2.3 From 64102d9bbc3d41dac5188b8fba75b1344c438970 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 10 Sep 2025 10:02:20 +0200 Subject: netfilter: nf_tables: place base_seq in struct net This will soon be read from packet path around same time as the gencursor. Both gencursor and base_seq get incremented almost at the same time, so it makes sense to place them in the same structure. This doesn't increase struct net size on 64bit due to padding. Signed-off-by: Florian Westphal --- include/net/netfilter/nf_tables.h | 1 - include/net/netns/nftables.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 891e43a01bdc..3faa80f5d801 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1912,7 +1912,6 @@ struct nftables_pernet { struct mutex commit_mutex; u64 table_handle; u64 tstamp; - unsigned int base_seq; unsigned int gc_seq; u8 validate_state; struct work_struct destroy_work; diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h index cc8060c017d5..99dd166c5d07 100644 --- a/include/net/netns/nftables.h +++ b/include/net/netns/nftables.h @@ -3,6 +3,7 @@ #define _NETNS_NFTABLES_H_ struct netns_nftables { + unsigned int base_seq; u8 gencursor; }; -- cgit v1.2.3 From 11fe5a82e53ac3581a80c88e0e35fb8a80e15f48 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 10 Sep 2025 10:02:21 +0200 Subject: netfilter: nf_tables: make nft_set_do_lookup available unconditionally This function was added for retpoline mitigation and is replaced by a static inline helper if mitigations are not enabled. Enable this helper function unconditionally so next patch can add a lookup restart mechanism to fix possible false negatives while transactions are in progress. Adding lookup restarts in nft_lookup_eval doesn't work as nft_objref would then need the same copypaste loop. This patch is separate to ease review of the actual bug fix. Suggested-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- include/net/netfilter/nf_tables_core.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 6c2f483d9828..656e784714f3 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -109,17 +109,11 @@ nft_hash_lookup_fast(const struct net *net, const struct nft_set *set, const struct nft_set_ext * nft_hash_lookup(const struct net *net, const struct nft_set *set, const u32 *key); +#endif + const struct nft_set_ext * nft_set_do_lookup(const struct net *net, const struct nft_set *set, const u32 *key); -#else -static inline const struct nft_set_ext * -nft_set_do_lookup(const struct net *net, const struct nft_set *set, - const u32 *key) -{ - return set->ops->lookup(net, set, key); -} -#endif /* called from nft_pipapo_avx2.c */ const struct nft_set_ext * -- cgit v1.2.3 From 1dfdf4527fd391f653c53b634af9122613c58904 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 30 Aug 2025 18:48:32 +0200 Subject: iio: adc: exynos_adc: Drop platform data support There are no Samsung Exynos SoC ADC driver users which bind via platform ID, thus platform data is never set and can be dropped. Reviewed-by: Andy Shevchenko Signed-off-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20250830-s3c-cleanup-adc-v2-3-4f8299343d32@linaro.org Signed-off-by: Jonathan Cameron --- include/linux/platform_data/touchscreen-s3c2410.h | 22 ---------------------- 1 file changed, 22 deletions(-) delete mode 100644 include/linux/platform_data/touchscreen-s3c2410.h (limited to 'include') diff --git a/include/linux/platform_data/touchscreen-s3c2410.h b/include/linux/platform_data/touchscreen-s3c2410.h deleted file mode 100644 index bf8d3b9d7c6a..000000000000 --- a/include/linux/platform_data/touchscreen-s3c2410.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2005 Arnaud Patard -*/ - -#ifndef __TOUCHSCREEN_S3C2410_H -#define __TOUCHSCREEN_S3C2410_H - -struct s3c2410_ts_mach_info { - int delay; - int presc; - int oversampling_shift; - void (*cfg_gpio)(struct platform_device *dev); -}; - -extern void s3c24xx_ts_set_platdata(struct s3c2410_ts_mach_info *); -extern void s3c64xx_ts_set_platdata(struct s3c2410_ts_mach_info *); - -/* defined by architecture to configure gpio */ -extern void s3c24xx_ts_cfg_gpio(struct platform_device *dev); - -#endif /*__TOUCHSCREEN_S3C2410_H */ -- cgit v1.2.3 From cec1aec9c46305743a2d4a1bfb06f6b0374d5ed0 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 31 Aug 2025 12:48:22 +0200 Subject: iio: consumers: Add an iio_multiply_value() helper function The channel-scale handling in iio_convert_raw_to_processed() in essence does the following: processed = raw * caller-provided-scale * channel-scale Which can also be written as: multiplier = raw * caller-provided-scale iio-value = channel-scale processed = multiplier * iio-value Where iio-value is a set of IIO_VAL_* type + val + val2 integers, being able to handle multiplication of iio-values like this is something which is useful to have in general and, as previous bugfixes to iio_convert_raw_to_processed() have shown, also tricky to implement. Split the iio-value multiplication code from iio_convert_raw_to_processed() out into a new iio_multiply_value() helper. This serves multiple purposes: 1. Having this split out allows writing a KUnit test for this. 2. Having this split out allows re-use to get better precision when scaling values in iio_read_channel_processed_scale(). Reviewed-by: Andy Shevchenko Signed-off-by: Hans de Goede Link: https://patch.msgid.link/20250831104825.15097-4-hansg@kernel.org Signed-off-by: Jonathan Cameron --- include/linux/iio/consumer.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/iio/consumer.h b/include/linux/iio/consumer.h index 6a4479616479..a38b277c2c02 100644 --- a/include/linux/iio/consumer.h +++ b/include/linux/iio/consumer.h @@ -381,6 +381,24 @@ int iio_read_channel_offset(struct iio_channel *chan, int *val, int iio_read_channel_scale(struct iio_channel *chan, int *val, int *val2); +/** + * iio_multiply_value() - Multiply an IIO value + * @result: Destination pointer for the multiplication result + * @multiplier: Multiplier. + * @type: One of the IIO_VAL_* constants. This decides how the @val and + * @val2 parameters are interpreted. + * @val: Value being multiplied. + * @val2: Value being multiplied. @val2 use depends on type. + * + * Multiply an IIO value with a s64 multiplier storing the result as + * IIO_VAL_INT. This is typically used for scaling. + * + * Returns: + * IIO_VAL_INT on success or a negative error-number on failure. + */ +int iio_multiply_value(int *result, s64 multiplier, + unsigned int type, int val, int val2); + /** * iio_convert_raw_to_processed() - Converts a raw value to a processed value * @chan: The channel being queried -- cgit v1.2.3 From 50243079865ae7c150bc54ea3ed59077cdf3da03 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 4 Sep 2025 12:16:39 +1000 Subject: ttm/bo: add an API to populate a bo before exporting. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While discussing cgroups we noticed a problem where you could export a BO to a dma-buf without having it ever being backed or accounted for. This meant in low memory situations or eventually with cgroups, a lower privledged process might cause the compositor to try and allocate a lot of memory on it's behalf and this could fail. At least make sure the exporter has managed to allocate the RAM at least once before exporting the object. This only applies currently to TTM_PL_SYSTEM objects, because GTT objects get populated on first validate, and VRAM doesn't use TT. Reviewed-by: Christian Koenig Cc: Thomas Hellström Cc: Simona Vetter Signed-off-by: Dave Airlie Reviewed-by: Thomas Hellström Signed-off-by: Dave Airlie Link: https://lore.kernel.org/r/20250904021643.2050497-1-airlied@gmail.com --- include/drm/ttm/ttm_bo.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h index 479b7ed075c0..e664a96540eb 100644 --- a/include/drm/ttm/ttm_bo.h +++ b/include/drm/ttm/ttm_bo.h @@ -466,6 +466,8 @@ pgprot_t ttm_io_prot(struct ttm_buffer_object *bo, struct ttm_resource *res, void ttm_bo_tt_destroy(struct ttm_buffer_object *bo); int ttm_bo_populate(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx); +int ttm_bo_setup_export(struct ttm_buffer_object *bo, + struct ttm_operation_ctx *ctx); /* Driver LRU walk helpers initially targeted for shrinking. */ -- cgit v1.2.3 From 6fef6ae764be8a77f61ad3b6937ba82fe8358045 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sun, 7 Sep 2025 21:43:20 +0100 Subject: net: ethtool: fix wrong type used in struct kernel_ethtool_ts_info In C, enumerated types do not have a defined size, apart from being compatible with one of the standard types. This allows an ABI / compiler to choose the type of an enum depending on the values it needs to store, and storing larger values in it can lead to undefined behaviour. The tx_type and rx_filters members of struct kernel_ethtool_ts_info are defined as enumerated types, but are bit arrays, where each bit is defined by the enumerated type. This means they typically store values in excess of the maximum value of the enumerated type, in fact (1 << max_value) and thus must not be declared using the enumated type. Fix both of these to use u32, as per the corresponding __u32 UAPI type. Fixes: 2111375b85ad ("net: Add struct kernel_ethtool_ts_info") Signed-off-by: Russell King (Oracle) Reviewed-by: Kory Maincent Link: https://patch.msgid.link/E1uvMEK-00000003Amd-2pWR@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index de5bd76a400c..d7d757e72554 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -856,8 +856,8 @@ struct kernel_ethtool_ts_info { enum hwtstamp_provider_qualifier phc_qualifier; enum hwtstamp_source phc_source; int phc_phyindex; - enum hwtstamp_tx_types tx_types; - enum hwtstamp_rx_filters rx_filters; + u32 tx_types; + u32 rx_filters; }; /** -- cgit v1.2.3 From b2461e20fa9ac18b1305bba5bc7e22ebf644ea01 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Mon, 25 Aug 2025 15:00:30 +0800 Subject: firmware: imx: Add stub functions for SCMI MISC API To ensure successful builds when CONFIG_IMX_SCMI_MISC_DRV is not enabled, this patch adds static inline stub implementations for the following functions: - scmi_imx_misc_ctrl_get() - scmi_imx_misc_ctrl_set() These stubs return -EOPNOTSUPP to indicate that the functionality is not supported in the current configuration. This avoids potential build or link errors in code that conditionally calls these functions based on feature availability. This patch also drops the changes in commit 540c830212ed ("firmware: imx: remove duplicate scmi_imx_misc_ctrl_get()"). The original change aimed to simplify the handling of optional features by removing conditional stubs. However, the use of conditional stubs is necessary when CONFIG_IMX_SCMI_MISC_DRV is n, while consumer driver is set to y. This is not a matter of preserving legacy patterns, but rather to ensure that there is no link error whether for module or built-in. Fixes: 0b4f8a68b292 ("firmware: imx: Add i.MX95 MISC driver") Reviewed-by: Cristian Marussi Signed-off-by: Peng Fan Signed-off-by: Shawn Guo --- include/linux/firmware/imx/sm.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/firmware/imx/sm.h b/include/linux/firmware/imx/sm.h index d4212bc42b2c..99c15bbb46aa 100644 --- a/include/linux/firmware/imx/sm.h +++ b/include/linux/firmware/imx/sm.h @@ -26,8 +26,20 @@ #define SCMI_IMX94_CTRL_SAI3_MCLK 5U /*!< WAKE SAI3 MCLK */ #define SCMI_IMX94_CTRL_SAI4_MCLK 6U /*!< WAKE SAI4 MCLK */ +#if IS_ENABLED(CONFIG_IMX_SCMI_MISC_DRV) int scmi_imx_misc_ctrl_get(u32 id, u32 *num, u32 *val); int scmi_imx_misc_ctrl_set(u32 id, u32 val); +#else +static inline int scmi_imx_misc_ctrl_get(u32 id, u32 *num, u32 *val) +{ + return -EOPNOTSUPP; +} + +static inline int scmi_imx_misc_ctrl_set(u32 id, u32 val) +{ + return -EOPNOTSUPP; +} +#endif int scmi_imx_cpu_start(u32 cpuid, bool start); int scmi_imx_cpu_started(u32 cpuid, bool *started); -- cgit v1.2.3 From 3fb91b5c86d0fb5ff6f65c30a4f20193166e22fe Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Mon, 25 Aug 2025 15:00:31 +0800 Subject: firmware: imx: Add stub functions for SCMI LMM API To ensure successful builds when CONFIG_IMX_SCMI_LMM_DRV is not enabled, this patch adds static inline stub implementations for the following functions: - scmi_imx_lmm_operation() - scmi_imx_lmm_info() - scmi_imx_lmm_reset_vector_set() These stubs return -EOPNOTSUPP to indicate that the functionality is not supported in the current configuration. This avoids potential build or link errors in code that conditionally calls these functions based on feature availability. Fixes: 7242bbf418f0 ("firmware: imx: Add i.MX95 SCMI LMM driver") Reviewed-by: Cristian Marussi Signed-off-by: Peng Fan Signed-off-by: Shawn Guo --- include/linux/firmware/imx/sm.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/firmware/imx/sm.h b/include/linux/firmware/imx/sm.h index 99c15bbb46aa..f2a72177bb37 100644 --- a/include/linux/firmware/imx/sm.h +++ b/include/linux/firmware/imx/sm.h @@ -56,7 +56,24 @@ enum scmi_imx_lmm_op { #define SCMI_IMX_LMM_OP_FORCEFUL 0 #define SCMI_IMX_LMM_OP_GRACEFUL BIT(0) +#if IS_ENABLED(CONFIG_IMX_SCMI_LMM_DRV) int scmi_imx_lmm_operation(u32 lmid, enum scmi_imx_lmm_op op, u32 flags); int scmi_imx_lmm_info(u32 lmid, struct scmi_imx_lmm_info *info); int scmi_imx_lmm_reset_vector_set(u32 lmid, u32 cpuid, u32 flags, u64 vector); +#else +static inline int scmi_imx_lmm_operation(u32 lmid, enum scmi_imx_lmm_op op, u32 flags) +{ + return -EOPNOTSUPP; +} + +static inline int scmi_imx_lmm_info(u32 lmid, struct scmi_imx_lmm_info *info) +{ + return -EOPNOTSUPP; +} + +static inline int scmi_imx_lmm_reset_vector_set(u32 lmid, u32 cpuid, u32 flags, u64 vector) +{ + return -EOPNOTSUPP; +} +#endif #endif -- cgit v1.2.3 From 222accf05fc42f68ae02065d9c1542c20315118b Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Mon, 25 Aug 2025 15:00:32 +0800 Subject: firmware: imx: Add stub functions for SCMI CPU API To ensure successful builds when CONFIG_IMX_SCMI_CPU_DRV is not enabled, this patch adds static inline stub implementations for the following functions: - scmi_imx_cpu_start() - scmi_imx_cpu_started() - scmi_imx_cpu_reset_vector_set() These stubs return -EOPNOTSUPP to indicate that the functionality is not supported in the current configuration. This avoids potential build or link errors in code that conditionally calls these functions based on feature availability. Fixes: 1055faa5d660 ("firmware: imx: Add i.MX95 SCMI CPU driver") Reviewed-by: Cristian Marussi Signed-off-by: Peng Fan Signed-off-by: Shawn Guo --- include/linux/firmware/imx/sm.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/firmware/imx/sm.h b/include/linux/firmware/imx/sm.h index f2a72177bb37..a33b45027356 100644 --- a/include/linux/firmware/imx/sm.h +++ b/include/linux/firmware/imx/sm.h @@ -41,10 +41,28 @@ static inline int scmi_imx_misc_ctrl_set(u32 id, u32 val) } #endif +#if IS_ENABLED(CONFIG_IMX_SCMI_CPU_DRV) int scmi_imx_cpu_start(u32 cpuid, bool start); int scmi_imx_cpu_started(u32 cpuid, bool *started); int scmi_imx_cpu_reset_vector_set(u32 cpuid, u64 vector, bool start, bool boot, bool resume); +#else +static inline int scmi_imx_cpu_start(u32 cpuid, bool start) +{ + return -EOPNOTSUPP; +} + +static inline int scmi_imx_cpu_started(u32 cpuid, bool *started) +{ + return -EOPNOTSUPP; +} + +static inline int scmi_imx_cpu_reset_vector_set(u32 cpuid, u64 vector, bool start, + bool boot, bool resume) +{ + return -EOPNOTSUPP; +} +#endif enum scmi_imx_lmm_op { SCMI_IMX_LMM_BOOT, -- cgit v1.2.3 From 83597c841ed53807a99a2ee837a8cbc3541ce62a Mon Sep 17 00:00:00 2001 From: Abhijit Gangurde Date: Wed, 3 Sep 2025 11:45:59 +0530 Subject: RDMA: Add IONIC to rdma_driver_id definition Define RDMA_DRIVER_IONIC in enum rdma_driver_id. Signed-off-by: Abhijit Gangurde Link: https://patch.msgid.link/20250903061606.4139957-8-abhijit.gangurde@amd.com Signed-off-by: Leon Romanovsky --- include/uapi/rdma/ib_user_ioctl_verbs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h index fe15bc7e9f70..89e6a3f13191 100644 --- a/include/uapi/rdma/ib_user_ioctl_verbs.h +++ b/include/uapi/rdma/ib_user_ioctl_verbs.h @@ -255,6 +255,7 @@ enum rdma_driver_id { RDMA_DRIVER_SIW, RDMA_DRIVER_ERDMA, RDMA_DRIVER_MANA, + RDMA_DRIVER_IONIC, }; enum ib_uverbs_gid_type { -- cgit v1.2.3 From e8521822c733c6deab0f339843cd37cd62c12795 Mon Sep 17 00:00:00 2001 From: Abhijit Gangurde Date: Wed, 3 Sep 2025 11:46:02 +0530 Subject: RDMA/ionic: Register device ops for control path Implement device supported verb APIs for control path. Co-developed-by: Andrew Boyer Signed-off-by: Andrew Boyer Co-developed-by: Allen Hubbe Signed-off-by: Allen Hubbe Signed-off-by: Abhijit Gangurde Link: https://patch.msgid.link/20250903061606.4139957-11-abhijit.gangurde@amd.com Signed-off-by: Leon Romanovsky --- include/uapi/rdma/ionic-abi.h | 115 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 include/uapi/rdma/ionic-abi.h (limited to 'include') diff --git a/include/uapi/rdma/ionic-abi.h b/include/uapi/rdma/ionic-abi.h new file mode 100644 index 000000000000..7b589d3e9728 --- /dev/null +++ b/include/uapi/rdma/ionic-abi.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc */ + +#ifndef IONIC_ABI_H +#define IONIC_ABI_H + +#include + +#define IONIC_ABI_VERSION 1 + +#define IONIC_EXPDB_64 1 +#define IONIC_EXPDB_128 2 +#define IONIC_EXPDB_256 4 +#define IONIC_EXPDB_512 8 + +#define IONIC_EXPDB_SQ 1 +#define IONIC_EXPDB_RQ 2 + +#define IONIC_CMB_ENABLE 1 +#define IONIC_CMB_REQUIRE 2 +#define IONIC_CMB_EXPDB 4 +#define IONIC_CMB_WC 8 +#define IONIC_CMB_UC 16 + +struct ionic_ctx_req { + __u32 rsvd[2]; +}; + +struct ionic_ctx_resp { + __u32 rsvd; + __u32 page_shift; + + __aligned_u64 dbell_offset; + + __u16 version; + __u8 qp_opcodes; + __u8 admin_opcodes; + + __u8 sq_qtype; + __u8 rq_qtype; + __u8 cq_qtype; + __u8 admin_qtype; + + __u8 max_stride; + __u8 max_spec; + __u8 udma_count; + __u8 expdb_mask; + __u8 expdb_qtypes; + + __u8 rsvd2[3]; +}; + +struct ionic_qdesc { + __aligned_u64 addr; + __u32 size; + __u16 mask; + __u8 depth_log2; + __u8 stride_log2; +}; + +struct ionic_ah_resp { + __u32 ahid; + __u32 pad; +}; + +struct ionic_cq_req { + struct ionic_qdesc cq[2]; + __u8 udma_mask; + __u8 rsvd[7]; +}; + +struct ionic_cq_resp { + __u32 cqid[2]; + __u8 udma_mask; + __u8 rsvd[7]; +}; + +struct ionic_qp_req { + struct ionic_qdesc sq; + struct ionic_qdesc rq; + __u8 sq_spec; + __u8 rq_spec; + __u8 sq_cmb; + __u8 rq_cmb; + __u8 udma_mask; + __u8 rsvd[3]; +}; + +struct ionic_qp_resp { + __u32 qpid; + __u8 sq_cmb; + __u8 rq_cmb; + __u8 udma_idx; + __u8 rsvd[1]; + __aligned_u64 sq_cmb_offset; + __aligned_u64 rq_cmb_offset; +}; + +struct ionic_srq_req { + struct ionic_qdesc rq; + __u8 rq_spec; + __u8 rq_cmb; + __u8 udma_mask; + __u8 rsvd[5]; +}; + +struct ionic_srq_resp { + __u32 qpid; + __u8 rq_cmb; + __u8 udma_idx; + __u8 rsvd[2]; + __aligned_u64 rq_cmb_offset; +}; + +#endif /* IONIC_ABI_H */ -- cgit v1.2.3 From c924c65f52c300ba36373e140a43a8e723c3abdd Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Wed, 13 Aug 2025 08:02:52 +0200 Subject: tee: implement protected DMA-heap Implement DMA heap for protected DMA-buf allocation in the TEE subsystem. Protected memory refers to memory buffers behind a hardware enforced firewall. It is not accessible to the kernel during normal circumstances but rather only accessible to certain hardware IPs or CPUs executing in higher or differently privileged mode than the kernel itself. This interface allows to allocate and manage such protected memory buffers via interaction with a TEE implementation. The protected memory is allocated for a specific use-case, like Secure Video Playback, Trusted UI, or Secure Video Recording where certain hardware devices can access the memory. The DMA-heaps are enabled explicitly by the TEE backend driver. The TEE backend drivers needs to implement protected memory pool to manage the protected memory. Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- include/linux/tee_core.h | 53 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) (limited to 'include') diff --git a/include/linux/tee_core.h b/include/linux/tee_core.h index a38494d6b5f4..28b65010b9ed 100644 --- a/include/linux/tee_core.h +++ b/include/linux/tee_core.h @@ -8,9 +8,11 @@ #include #include +#include #include #include #include +#include #include #include #include @@ -30,6 +32,12 @@ #define TEE_DEVICE_FLAG_REGISTERED 0x1 #define TEE_MAX_DEV_NAME_LEN 32 +enum tee_dma_heap_id { + TEE_DMA_HEAP_SECURE_VIDEO_PLAY = 1, + TEE_DMA_HEAP_TRUSTED_UI, + TEE_DMA_HEAP_SECURE_VIDEO_RECORD, +}; + /** * struct tee_device - TEE Device representation * @name: name of device @@ -116,6 +124,36 @@ struct tee_desc { u32 flags; }; +/** + * struct tee_protmem_pool - protected memory pool + * @ops: operations + * + * This is an abstract interface where this struct is expected to be + * embedded in another struct specific to the implementation. + */ +struct tee_protmem_pool { + const struct tee_protmem_pool_ops *ops; +}; + +/** + * struct tee_protmem_pool_ops - protected memory pool operations + * @alloc: called when allocating protected memory + * @free: called when freeing protected memory + * @update_shm: called when registering a dma-buf to update the @shm + * with physical address of the buffer or to return the + * @parent_shm of the memory pool + * @destroy_pool: called when destroying the pool + */ +struct tee_protmem_pool_ops { + int (*alloc)(struct tee_protmem_pool *pool, struct sg_table *sgt, + size_t size, size_t *offs); + void (*free)(struct tee_protmem_pool *pool, struct sg_table *sgt); + int (*update_shm)(struct tee_protmem_pool *pool, struct sg_table *sgt, + size_t offs, struct tee_shm *shm, + struct tee_shm **parent_shm); + void (*destroy_pool)(struct tee_protmem_pool *pool); +}; + /** * tee_device_alloc() - Allocate a new struct tee_device instance * @teedesc: Descriptor for this driver @@ -154,6 +192,11 @@ int tee_device_register(struct tee_device *teedev); */ void tee_device_unregister(struct tee_device *teedev); +int tee_device_register_dma_heap(struct tee_device *teedev, + enum tee_dma_heap_id id, + struct tee_protmem_pool *pool); +void tee_device_put_all_dma_heaps(struct tee_device *teedev); + /** * tee_device_set_dev_groups() - Set device attribute groups * @teedev: Device to register @@ -229,6 +272,16 @@ static inline void tee_shm_pool_free(struct tee_shm_pool *pool) pool->ops->destroy_pool(pool); } +/** + * tee_protmem_static_pool_alloc() - Create a protected memory manager + * @paddr: Physical address of start of pool + * @size: Size in bytes of the pool + * + * @returns pointer to a 'struct tee_protmem_pool' or an ERR_PTR on failure. + */ +struct tee_protmem_pool *tee_protmem_static_pool_alloc(phys_addr_t paddr, + size_t size); + /** * tee_get_drvdata() - Return driver_data pointer * @returns the driver_data pointer supplied to tee_register(). -- cgit v1.2.3 From 146bf4e75ecab9759ed78c9d167e860042d627fb Mon Sep 17 00:00:00 2001 From: Etienne Carriere Date: Wed, 13 Aug 2025 08:02:54 +0200 Subject: tee: new ioctl to a register tee_shm from a dmabuf file descriptor Add a userspace API to create a tee_shm object that refers to a dmabuf reference. Userspace registers the dmabuf file descriptor as in a tee_shm object. The registration is completed with a tee_shm returned file descriptor. Userspace is free to close the dmabuf file descriptor after it has been registered since all the resources are now held via the new tee_shm object. Closing the tee_shm file descriptor will eventually release all resources used by the tee_shm object when all references are released. The new IOCTL, TEE_IOC_SHM_REGISTER_FD, supports dmabuf references to physically contiguous memory buffers. Dmabuf references acquired from the TEE DMA-heap can be used as protected memory for Secure Video Path and such use cases. It depends on the TEE and the TEE driver if dmabuf references acquired by other means can be used. A new tee_shm flag is added to identify tee_shm objects built from a registered dmabuf, TEE_SHM_DMA_BUF. Signed-off-by: Etienne Carriere Signed-off-by: Olivier Masse Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- include/linux/tee_core.h | 1 + include/linux/tee_drv.h | 10 ++++++++++ include/uapi/linux/tee.h | 31 +++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+) (limited to 'include') diff --git a/include/linux/tee_core.h b/include/linux/tee_core.h index 28b65010b9ed..b6c54b34a8b5 100644 --- a/include/linux/tee_core.h +++ b/include/linux/tee_core.h @@ -28,6 +28,7 @@ #define TEE_SHM_USER_MAPPED BIT(1) /* Memory mapped in user space */ #define TEE_SHM_POOL BIT(2) /* Memory allocated from pool */ #define TEE_SHM_PRIV BIT(3) /* Memory private to TEE driver */ +#define TEE_SHM_DMA_BUF BIT(4) /* Memory with dma-buf handle */ #define TEE_DEVICE_FLAG_REGISTERED 0x1 #define TEE_MAX_DEV_NAME_LEN 32 diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index a54c203000ed..824f1251de60 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -116,6 +116,16 @@ struct tee_shm *tee_shm_alloc_kernel_buf(struct tee_context *ctx, size_t size); struct tee_shm *tee_shm_register_kernel_buf(struct tee_context *ctx, void *addr, size_t length); +/** + * tee_shm_register_fd() - Register shared memory from file descriptor + * + * @ctx: Context that allocates the shared memory + * @fd: Shared memory file descriptor reference + * + * @returns a pointer to 'struct tee_shm' on success, and ERR_PTR on failure + */ +struct tee_shm *tee_shm_register_fd(struct tee_context *ctx, int fd); + /** * tee_shm_free() - Free shared memory * @shm: Handle to shared memory to free diff --git a/include/uapi/linux/tee.h b/include/uapi/linux/tee.h index d0430bee8292..d843cf980d98 100644 --- a/include/uapi/linux/tee.h +++ b/include/uapi/linux/tee.h @@ -378,6 +378,37 @@ struct tee_ioctl_shm_register_data { __s32 id; }; +/** + * struct tee_ioctl_shm_register_fd_data - Shared memory registering argument + * @fd: [in] File descriptor identifying dmabuf reference + * @size: [out] Size of referenced memory + * @flags: [in] Flags to/from allocation. + * @id: [out] Identifier of the shared memory + * + * The flags field should currently be zero as input. Updated by the call + * with actual flags as defined by TEE_IOCTL_SHM_* above. + * This structure is used as argument for TEE_IOC_SHM_REGISTER_FD below. + */ +struct tee_ioctl_shm_register_fd_data { + __s64 fd; + __u64 size; + __u32 flags; + __s32 id; +}; + +/** + * TEE_IOC_SHM_REGISTER_FD - register a shared memory from a file descriptor + * + * Returns a file descriptor on success or < 0 on failure + * + * The returned file descriptor refers to the shared memory object in the + * kernel. The supplied file deccriptor can be closed if it's not needed + * for other purposes. The shared memory is freed when the descriptor is + * closed. + */ +#define TEE_IOC_SHM_REGISTER_FD _IOWR(TEE_IOC_MAGIC, TEE_IOC_BASE + 8, \ + struct tee_ioctl_shm_register_fd_data) + /** * TEE_IOC_SHM_REGISTER - Register shared memory argument * -- cgit v1.2.3 From ab09dd6d9201af9930efd5a5a0cb56a0fea6a169 Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Wed, 13 Aug 2025 08:02:55 +0200 Subject: tee: add tee_shm_alloc_dma_mem() Add tee_shm_alloc_dma_mem() to allocate DMA memory. The memory is represented by a tee_shm object using the new flag TEE_SHM_DMA_MEM to identify it as DMA memory. The allocated memory will later be lent to the TEE to be used as protected memory. Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- include/linux/tee_core.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/tee_core.h b/include/linux/tee_core.h index b6c54b34a8b5..7b0c1da2ca6c 100644 --- a/include/linux/tee_core.h +++ b/include/linux/tee_core.h @@ -29,6 +29,8 @@ #define TEE_SHM_POOL BIT(2) /* Memory allocated from pool */ #define TEE_SHM_PRIV BIT(3) /* Memory private to TEE driver */ #define TEE_SHM_DMA_BUF BIT(4) /* Memory with dma-buf handle */ +#define TEE_SHM_DMA_MEM BIT(5) /* Memory allocated with */ + /* dma_alloc_pages() */ #define TEE_DEVICE_FLAG_REGISTERED 0x1 #define TEE_MAX_DEV_NAME_LEN 32 @@ -298,6 +300,9 @@ void *tee_get_drvdata(struct tee_device *teedev); */ struct tee_shm *tee_shm_alloc_priv_buf(struct tee_context *ctx, size_t size); +struct tee_shm *tee_shm_alloc_dma_mem(struct tee_context *ctx, + size_t page_count); + int tee_dyn_shm_alloc_helper(struct tee_shm *shm, size_t size, size_t align, int (*shm_register)(struct tee_context *ctx, struct tee_shm *shm, -- cgit v1.2.3 From 1827f773e416842bb0a1be93f313e02591e0b0c2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 5 Sep 2025 15:15:38 -0700 Subject: net: xdp: pass full flags to xdp_update_skb_shared_info() xdp_update_skb_shared_info() needs to update skb state which was maintained in xdp_buff / frame. Pass full flags into it, instead of breaking it out bit by bit. We will need to add a bit for unreadable frags (even tho XDP doesn't support those the driver paths may be common), at which point almost all call sites would become: xdp_update_skb_shared_info(skb, num_frags, sinfo->xdp_frags_size, MY_PAGE_SIZE * num_frags, xdp_buff_is_frag_pfmemalloc(xdp), xdp_buff_is_frag_unreadable(xdp)); Keep a helper for accessing the flags, in case we need to transform them somehow in the future (e.g. to cover up xdp_buff vs xdp_frame differences). While we are touching call callers - rename the helper to xdp_update_skb_frags_info(), previous name may have implied that it's shinfo that's updated. We are updating flags in struct sk_buff based on frags that got attched. Signed-off-by: Jakub Kicinski Acked-by: Jesper Dangaard Brouer Link: https://patch.msgid.link/20250905221539.2930285-2-kuba@kernel.org Acked-by: Stanislav Fomichev Reviewed-by: Alexander Lobakin Signed-off-by: Paolo Abeni --- include/net/xdp.h | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/xdp.h b/include/net/xdp.h index af60e11b336c..976cfd2f113c 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -116,15 +116,14 @@ static __always_inline void xdp_buff_clear_frags_flag(struct xdp_buff *xdp) xdp->flags &= ~XDP_FLAGS_HAS_FRAGS; } -static __always_inline bool -xdp_buff_is_frag_pfmemalloc(const struct xdp_buff *xdp) +static __always_inline void xdp_buff_set_frag_pfmemalloc(struct xdp_buff *xdp) { - return !!(xdp->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC); + xdp->flags |= XDP_FLAGS_FRAGS_PF_MEMALLOC; } -static __always_inline void xdp_buff_set_frag_pfmemalloc(struct xdp_buff *xdp) +static __always_inline u32 xdp_buff_get_skb_flags(const struct xdp_buff *xdp) { - xdp->flags |= XDP_FLAGS_FRAGS_PF_MEMALLOC; + return xdp->flags; } static __always_inline void @@ -294,10 +293,10 @@ static __always_inline bool xdp_frame_has_frags(const struct xdp_frame *frame) return !!(frame->flags & XDP_FLAGS_HAS_FRAGS); } -static __always_inline bool -xdp_frame_is_frag_pfmemalloc(const struct xdp_frame *frame) +static __always_inline u32 +xdp_frame_get_skb_flags(const struct xdp_frame *frame) { - return !!(frame->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC); + return frame->flags; } #define XDP_BULK_QUEUE_SIZE 16 @@ -334,9 +333,9 @@ static inline void xdp_scrub_frame(struct xdp_frame *frame) } static inline void -xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags, - unsigned int size, unsigned int truesize, - bool pfmemalloc) +xdp_update_skb_frags_info(struct sk_buff *skb, u8 nr_frags, + unsigned int size, unsigned int truesize, + u32 xdp_flags) { struct skb_shared_info *sinfo = skb_shinfo(skb); @@ -350,7 +349,7 @@ xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags, skb->len += size; skb->data_len += size; skb->truesize += truesize; - skb->pfmemalloc |= pfmemalloc; + skb->pfmemalloc |= !!(xdp_flags & XDP_FLAGS_FRAGS_PF_MEMALLOC); } /* Avoids inlining WARN macro in fast-path */ -- cgit v1.2.3 From 6bffdc0f88f85cd15b261286be4dc3c62ddea7d3 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 5 Sep 2025 15:15:39 -0700 Subject: net: xdp: handle frags with unreadable memory We don't expect frags with unreadable memory to be presented to XDP programs today, but the XDP helpers are designed to be usable whether XDP is enabled or not. Support handling frags with unreadable memory. Signed-off-by: Jakub Kicinski Link: https://patch.msgid.link/20250905221539.2930285-3-kuba@kernel.org Acked-by: Stanislav Fomichev Reviewed-by: Alexander Lobakin Signed-off-by: Paolo Abeni --- include/net/xdp.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/net/xdp.h b/include/net/xdp.h index 976cfd2f113c..6fd294fa6841 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -76,6 +76,11 @@ enum xdp_buff_flags { XDP_FLAGS_FRAGS_PF_MEMALLOC = BIT(1), /* xdp paged memory is under * pressure */ + /* frags have unreadable mem, this can't be true for real XDP packets, + * but drivers may use XDP helpers to construct Rx pkt state even when + * XDP program is not attached. + */ + XDP_FLAGS_FRAGS_UNREADABLE = BIT(2), }; struct xdp_buff { @@ -121,6 +126,11 @@ static __always_inline void xdp_buff_set_frag_pfmemalloc(struct xdp_buff *xdp) xdp->flags |= XDP_FLAGS_FRAGS_PF_MEMALLOC; } +static __always_inline void xdp_buff_set_frag_unreadable(struct xdp_buff *xdp) +{ + xdp->flags |= XDP_FLAGS_FRAGS_UNREADABLE; +} + static __always_inline u32 xdp_buff_get_skb_flags(const struct xdp_buff *xdp) { return xdp->flags; @@ -270,6 +280,8 @@ static inline bool xdp_buff_add_frag(struct xdp_buff *xdp, netmem_ref netmem, if (unlikely(netmem_is_pfmemalloc(netmem))) xdp_buff_set_frag_pfmemalloc(xdp); + if (unlikely(netmem_is_net_iov(netmem))) + xdp_buff_set_frag_unreadable(xdp); return true; } @@ -350,6 +362,7 @@ xdp_update_skb_frags_info(struct sk_buff *skb, u8 nr_frags, skb->data_len += size; skb->truesize += truesize; skb->pfmemalloc |= !!(xdp_flags & XDP_FLAGS_FRAGS_PF_MEMALLOC); + skb->unreadable |= !!(xdp_flags & XDP_FLAGS_FRAGS_UNREADABLE); } /* Avoids inlining WARN macro in fast-path */ -- cgit v1.2.3 From 0d3c4a441686663ad34aa3d6abe8c5317d21e707 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 8 Sep 2025 10:32:32 +0300 Subject: ipv4: icmp: Pass IPv4 control block structure as an argument to __icmp_send() __icmp_send() is used to generate ICMP error messages in response to various situations such as MTU errors (i.e., "Fragmentation Required") and too many hops (i.e., "Time Exceeded"). The skb that generated the error does not necessarily come from the IPv4 layer and does not always have a valid IPv4 control block in skb->cb. Therefore, commit 9ef6b42ad6fd ("net: Add __icmp_send helper.") changed the function to take the IP options structure as argument instead of deriving it from the skb's control block. Some callers of this function such as icmp_send() pass the IP options structure from the skb's control block as in these call paths the control block is known to be valid, but other callers simply pass a zeroed structure. A subsequent patch will need __icmp_send() to access more information from the IPv4 control block (specifically, the ifindex of the input interface). As a preparation for this change, change the function to take the IPv4 control block structure as an argument instead of the IP options structure. This makes the function similar to its IPv6 counterpart that already takes the IPv6 control block structure as an argument. No functional changes intended. Reviewed-by: Petr Machata Reviewed-by: David Ahern Signed-off-by: Ido Schimmel Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250908073238.119240-3-idosch@nvidia.com Signed-off-by: Paolo Abeni --- include/net/icmp.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/icmp.h b/include/net/icmp.h index caddf4a59ad1..935ee13d9ae9 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h @@ -37,10 +37,10 @@ struct sk_buff; struct net; void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, - const struct ip_options *opt); + const struct inet_skb_parm *parm); static inline void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) { - __icmp_send(skb_in, type, code, info, &IPCB(skb_in)->opt); + __icmp_send(skb_in, type, code, info, IPCB(skb_in)); } #if IS_ENABLED(CONFIG_NF_NAT) @@ -48,8 +48,10 @@ void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info); #else static inline void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info) { - struct ip_options opts = { 0 }; - __icmp_send(skb_in, type, code, info, &opts); + struct inet_skb_parm parm; + + memset(&parm, 0, sizeof(parm)); + __icmp_send(skb_in, type, code, info, &parm); } #endif -- cgit v1.2.3 From 002ebddd695a53999550e241b71950f1aa0e1ac4 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 9 Sep 2025 13:11:20 +0200 Subject: pmdomain: core: Restore behaviour for disabling unused PM domains Recent changes to genpd prevents those PM domains being powered-on during initialization from being powered-off during the boot sequence. Based upon whether CONFIG_PM_CONFIG_PM_GENERIC_DOMAINS_OF is set of not, genpd relies on the sync_state mechanism or the genpd_power_off_unused() (which is a late_initcall_sync), to understand when it's okay to allow these PM domains to be powered-off. This new behaviour in genpd has lead to problems on different platforms. Let's therefore restore the behavior of genpd_power_off_unused(). Moreover, let's introduce GENPD_FLAG_NO_STAY_ON, to allow genpd OF providers to opt-out from the new behaviour. Link: https://lore.kernel.org/all/20250701114733.636510-1-ulf.hansson@linaro.org/ Reported-by: Geert Uytterhoeven Link: https://lore.kernel.org/all/20250902-rk3576-lockup-regression-v1-1-c4a0c9daeb00@collabora.com/ Reported-by: Nicolas Frattaroli Fixes: 0e789b491ba0 ("pmdomain: core: Leave powered-on genpds on until sync_state") Fixes: 13a4b7fb6260 ("pmdomain: core: Leave powered-on genpds on until late_initcall_sync") Tested-by: Heiko Stuebner Reviewed-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Signed-off-by: Ulf Hansson --- include/linux/pm_domain.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index c84edf217819..f67a2cb7d781 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -115,6 +115,12 @@ struct dev_pm_domain_list { * genpd provider specific way, likely through a * parent device node. This flag makes genpd to * skip its internal support for this. + * + * GENPD_FLAG_NO_STAY_ON: For genpd OF providers a powered-on PM domain at + * initialization is prevented from being + * powered-off until the ->sync_state() callback is + * invoked. This flag informs genpd to allow a + * power-off without waiting for ->sync_state(). */ #define GENPD_FLAG_PM_CLK (1U << 0) #define GENPD_FLAG_IRQ_SAFE (1U << 1) @@ -126,6 +132,7 @@ struct dev_pm_domain_list { #define GENPD_FLAG_OPP_TABLE_FW (1U << 7) #define GENPD_FLAG_DEV_NAME_FW (1U << 8) #define GENPD_FLAG_NO_SYNC_STATE (1U << 9) +#define GENPD_FLAG_NO_STAY_ON (1U << 10) enum gpd_status { GENPD_STATE_ON = 0, /* PM domain is on */ -- cgit v1.2.3 From f0addd325ef692c92c522a2ba4d9db13fc90e664 Mon Sep 17 00:00:00 2001 From: Alexander Kurz Date: Mon, 11 Aug 2025 06:43:58 +0000 Subject: mfd: input: rtc: mc13783: Remove deprecated mc13xxx_irq_ack() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mc13xxx_irq_ack() got deprecated and became dead code with commit 10f9edaeaa30 ("mfd: mc13xxx: Use regmap irq framework for interrupts"). It should be safe to remove it now. Signed-off-by: Alexander Kurz Acked-by: Alexandre Belloni Acked-by: Uwe Kleine-König Acked-by: Dmitry Torokhov # for input Link: https://lore.kernel.org/r/20250811064358.1659-1-akurz@blala.de Signed-off-by: Lee Jones --- include/linux/mfd/mc13xxx.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/mc13xxx.h b/include/linux/mfd/mc13xxx.h index f372926d5894..dd46fe424a80 100644 --- a/include/linux/mfd/mc13xxx.h +++ b/include/linux/mfd/mc13xxx.h @@ -31,12 +31,6 @@ int mc13xxx_adc_do_conversion(struct mc13xxx *mc13xxx, unsigned int mode, unsigned int channel, u8 ato, bool atox, unsigned int *sample); -/* Deprecated calls */ -static inline int mc13xxx_irq_ack(struct mc13xxx *mc13xxx, int irq) -{ - return 0; -} - static inline int mc13xxx_irq_request_nounmask(struct mc13xxx *mc13xxx, int irq, irq_handler_t handler, const char *name, void *dev) -- cgit v1.2.3 From cbd2257dc96e3e46217540fcb095a757ffa20d96 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Tue, 2 Sep 2025 13:28:08 +0200 Subject: netfilter: nft_meta_bridge: introduce NFT_META_BRI_IIFHWADDR support Expose the input bridge interface ethernet address so it can be used to redirect the packet to the receiving physical device for processing. Tested with nft command line tool. table bridge nat { chain PREROUTING { type filter hook prerouting priority 0; policy accept; ether daddr de:ad:00:00:be:ef meta pkttype set host ether daddr set meta ibrhwdr accept } } Joint work with Pablo Neira. Signed-off-by: Fernando Fernandez Mancera Signed-off-by: Florian Westphal --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 8e0eb832bc01..7c0c915f0306 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -959,6 +959,7 @@ enum nft_exthdr_attributes { * @NFT_META_SDIF: slave device interface index * @NFT_META_SDIFNAME: slave device interface name * @NFT_META_BRI_BROUTE: packet br_netfilter_broute bit + * @NFT_META_BRI_IIFHWADDR: packet input bridge interface ethernet address */ enum nft_meta_keys { NFT_META_LEN, @@ -999,6 +1000,7 @@ enum nft_meta_keys { NFT_META_SDIFNAME, NFT_META_BRI_BROUTE, __NFT_META_IIFTYPE, + NFT_META_BRI_IIFHWADDR, }; /** -- cgit v1.2.3 From b8cf8fda522d5a37f8948ad8a19a1113cc38710f Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 9 Sep 2025 16:30:47 +0200 Subject: fanotify: add watchdog for permission events This is to make it easier to debug issues with AV software, which time and again deadlocks with no indication of where the issue comes from, and the kernel being blamed for the deadlock. Then we need to analyze dumps to prove that the kernel is not in fact at fault. The deadlock comes from recursion: handling the event triggers another permission event, in some roundabout way, obviously, otherwise it would have been found in testing. With this patch a warning is printed when permission event is received by userspace but not answered for more than the timeout specified in /proc/sys/fs/fanotify/watchdog_timeout. The watchdog can be turned off by setting the timeout to zero (which is the default). The timeout is very coarse (T <= t < 2T) but I guess it's good enough for the purpose. Overhead should be minimal. Signed-off-by: Miklos Szeredi Reviewed-by: Amir Goldstein Link: https://patch.msgid.link/20250909143053.112171-1-mszeredi@redhat.com Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index d4034ddaf392..0d954ea7b179 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -273,6 +273,8 @@ struct fsnotify_group { int f_flags; /* event_f_flags from fanotify_init() */ struct ucounts *ucounts; mempool_t error_events_pool; + /* chained on perm_group_list */ + struct list_head perm_grp_list; } fanotify_data; #endif /* CONFIG_FANOTIFY */ }; -- cgit v1.2.3 From 3b1bbfb5fce3ca9fffc92ac1b053b0cfbb1f322b Mon Sep 17 00:00:00 2001 From: Chris Morgan Date: Thu, 4 Sep 2025 11:05:27 -0500 Subject: mfd: bq257xx: Add support for BQ25703A core driver The Texas Instruments BQ25703A is an integrated charger manager and boost converter. The MFD driver initializes the device for the regulator driver and power supply driver. Signed-off-by: Chris Morgan Link: https://lore.kernel.org/r/20250904160530.66178-3-macroalpha82@gmail.com Signed-off-by: Lee Jones --- include/linux/mfd/bq257xx.h | 104 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 include/linux/mfd/bq257xx.h (limited to 'include') diff --git a/include/linux/mfd/bq257xx.h b/include/linux/mfd/bq257xx.h new file mode 100644 index 000000000000..1d6ddc7fb09f --- /dev/null +++ b/include/linux/mfd/bq257xx.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Register definitions for TI BQ257XX + * Copyright (C) 2020 Texas Instruments Incorporated - http://www.ti.com/ + */ + +#define BQ25703_CHARGE_OPTION_0 0x00 +#define BQ25703_CHARGE_CURRENT 0x02 +#define BQ25703_MAX_CHARGE_VOLT 0x04 +#define BQ25703_OTG_VOLT 0x06 +#define BQ25703_OTG_CURRENT 0x08 +#define BQ25703_INPUT_VOLTAGE 0x0a +#define BQ25703_MIN_VSYS 0x0c +#define BQ25703_IIN_HOST 0x0e +#define BQ25703_CHARGER_STATUS 0x20 +#define BQ25703_PROCHOT_STATUS 0x22 +#define BQ25703_IIN_DPM 0x24 +#define BQ25703_ADCIBAT_CHG 0x28 +#define BQ25703_ADCIINCMPIN 0x2a +#define BQ25703_ADCVSYSVBAT 0x2c +#define BQ25703_MANUFACT_DEV_ID 0x2e +#define BQ25703_CHARGE_OPTION_1 0x30 +#define BQ25703_CHARGE_OPTION_2 0x32 +#define BQ25703_CHARGE_OPTION_3 0x34 +#define BQ25703_ADC_OPTION 0x3a + +#define BQ25703_EN_LWPWR BIT(15) +#define BQ25703_WDTMR_ADJ_MASK GENMASK(14, 13) +#define BQ25703_WDTMR_DISABLE 0 +#define BQ25703_WDTMR_5_SEC 1 +#define BQ25703_WDTMR_88_SEC 2 +#define BQ25703_WDTMR_175_SEC 3 + +#define BQ25703_ICHG_MASK GENMASK(12, 6) +#define BQ25703_ICHG_STEP_UA 64000 +#define BQ25703_ICHG_MIN_UA 64000 +#define BQ25703_ICHG_MAX_UA 8128000 + +#define BQ25703_MAX_CHARGE_VOLT_MASK GENMASK(15, 4) +#define BQ25703_VBATREG_STEP_UV 16000 +#define BQ25703_VBATREG_MIN_UV 1024000 +#define BQ25703_VBATREG_MAX_UV 19200000 + +#define BQ25703_OTG_VOLT_MASK GENMASK(13, 6) +#define BQ25703_OTG_VOLT_STEP_UV 64000 +#define BQ25703_OTG_VOLT_MIN_UV 4480000 +#define BQ25703_OTG_VOLT_MAX_UV 20800000 +#define BQ25703_OTG_VOLT_NUM_VOLT 256 + +#define BQ25703_OTG_CUR_MASK GENMASK(14, 8) +#define BQ25703_OTG_CUR_STEP_UA 50000 +#define BQ25703_OTG_CUR_MAX_UA 6350000 + +#define BQ25703_MINVSYS_MASK GENMASK(13, 8) +#define BQ25703_MINVSYS_STEP_UV 256000 +#define BQ25703_MINVSYS_MIN_UV 1024000 +#define BQ25703_MINVSYS_MAX_UV 16128000 + +#define BQ25703_STS_AC_STAT BIT(15) +#define BQ25703_STS_IN_FCHRG BIT(10) +#define BQ25703_STS_IN_PCHRG BIT(9) +#define BQ25703_STS_FAULT_ACOV BIT(7) +#define BQ25703_STS_FAULT_BATOC BIT(6) +#define BQ25703_STS_FAULT_ACOC BIT(5) + +#define BQ25703_IINDPM_MASK GENMASK(14, 8) +#define BQ25703_IINDPM_STEP_UA 50000 +#define BQ25703_IINDPM_MIN_UA 50000 +#define BQ25703_IINDPM_MAX_UA 6400000 +#define BQ25703_IINDPM_DEFAULT_UA 3300000 +#define BQ25703_IINDPM_OFFSET_UA 50000 + +#define BQ25703_ADCIBAT_DISCHG_MASK GENMASK(6, 0) +#define BQ25703_ADCIBAT_CHG_MASK GENMASK(14, 8) +#define BQ25703_ADCIBAT_CHG_STEP_UA 64000 +#define BQ25703_ADCIBAT_DIS_STEP_UA 256000 + +#define BQ25703_ADCIIN GENMASK(15, 8) +#define BQ25703_ADCIINCMPIN_STEP 50000 + +#define BQ25703_ADCVSYS_MASK GENMASK(15, 8) +#define BQ25703_ADCVBAT_MASK GENMASK(7, 0) +#define BQ25703_ADCVSYSVBAT_OFFSET_UV 2880000 +#define BQ25703_ADCVSYSVBAT_STEP 64000 + +#define BQ25703_ADC_CH_MASK GENMASK(7, 0) +#define BQ25703_ADC_CONV_EN BIT(15) +#define BQ25703_ADC_START BIT(14) +#define BQ25703_ADC_FULL_SCALE BIT(13) +#define BQ25703_ADC_CMPIN_EN BIT(7) +#define BQ25703_ADC_VBUS_EN BIT(6) +#define BQ25703_ADC_PSYS_EN BIT(5) +#define BQ25703_ADC_IIN_EN BIT(4) +#define BQ25703_ADC_IDCHG_EN BIT(3) +#define BQ25703_ADC_ICHG_EN BIT(2) +#define BQ25703_ADC_VSYS_EN BIT(1) +#define BQ25703_ADC_VBAT_EN BIT(0) + +#define BQ25703_EN_OTG_MASK BIT(12) + +struct bq257xx_device { + struct i2c_client *client; + struct regmap *regmap; +}; -- cgit v1.2.3 From 948cb194bcb4c01fb4cd029936f0c02b10780394 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 30 Jul 2025 15:59:24 +0200 Subject: mtd: map: add back asm/barrier.h inclusion The mb() macro is used in this header: In file included from include/linux/mtd/qinfo.h:5, from include/linux/mtd/pfow.h:8, from drivers/mtd/lpddr/lpddr_cmds.c:14: include/linux/mtd/map.h: In function 'inline_map_write': include/linux/mtd/map.h:428:9: error: implicit declaration of function 'mb' [-Wimplicit-function-declaration] Fixes: 56eb7c13b97c ("mtd: map: Don't use "proxy" headers") Signed-off-by: Arnd Bergmann Acked-by: Andy Shevchenko Signed-off-by: Miquel Raynal --- include/linux/mtd/map.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h index 288ef765a44e..75b0b2abc880 100644 --- a/include/linux/mtd/map.h +++ b/include/linux/mtd/map.h @@ -14,6 +14,7 @@ #include #include #include +#include struct device_node; struct module; -- cgit v1.2.3 From fc02f529a8dbf617f6d211cb693f56a842b6dbe5 Mon Sep 17 00:00:00 2001 From: Svyatoslav Ryhel Date: Sat, 6 Sep 2025 16:53:23 +0300 Subject: dt-bindings: clock: tegra30: Add IDs for CSI pad clocks Tegra30 has CSI pad clock enable bits embedded into PLLD/PLLD2 registers. Add ids for these clocks. Additionally, move TEGRA30_CLK_CLK_MAX into clk-tegra30 source. Signed-off-by: Svyatoslav Ryhel Acked-by: Krzysztof Kozlowski Signed-off-by: Thierry Reding --- include/dt-bindings/clock/tegra30-car.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/dt-bindings/clock/tegra30-car.h b/include/dt-bindings/clock/tegra30-car.h index f193663e6f28..763b81f80908 100644 --- a/include/dt-bindings/clock/tegra30-car.h +++ b/include/dt-bindings/clock/tegra30-car.h @@ -271,6 +271,7 @@ #define TEGRA30_CLK_AUDIO3_MUX 306 #define TEGRA30_CLK_AUDIO4_MUX 307 #define TEGRA30_CLK_SPDIF_MUX 308 -#define TEGRA30_CLK_CLK_MAX 309 +#define TEGRA30_CLK_CSIA_PAD 309 +#define TEGRA30_CLK_CSIB_PAD 310 #endif /* _DT_BINDINGS_CLOCK_TEGRA30_CAR_H */ -- cgit v1.2.3 From 7526e6db4703d0fe81b5397939c2aefd5fe8d9bc Mon Sep 17 00:00:00 2001 From: Svyatoslav Ryhel Date: Fri, 29 Aug 2025 15:22:31 +0300 Subject: dt-bindings: reset: Add Tegra114 CAR header The way that resets are handled on these Tegra devices is that there is a set of peripheral clocks & resets which are paired up. This is because they are laid out in banks within the CAR (clock and reset) controller. In most cases we're referring to those resets, so you'll often see a clock ID used in conjection with the same reset ID for a given IP block. In addition to those peripheral resets, there are a number of extra resets that don't have a corresponding clock and which are exposed in registers outside of the peripheral banks, but still part of the CAR. To support those "special" registers, the TEGRA*_RESET() is used to denote resets outside of the regular peripheral resets. Essentially it defines the offset within the CAR at which special resets start. In the above case, Tegra114 has 5 banks with 32 peripheral resets each. The first special reset, TEGRA114_RESET(0), therefore gets ID 5 * 32 + 0 = 160. Signed-off-by: Svyatoslav Ryhel Reviewed-by: Mikko Perttunen Acked-by: Krzysztof Kozlowski Signed-off-by: Thierry Reding --- include/dt-bindings/reset/nvidia,tegra114-car.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 include/dt-bindings/reset/nvidia,tegra114-car.h (limited to 'include') diff --git a/include/dt-bindings/reset/nvidia,tegra114-car.h b/include/dt-bindings/reset/nvidia,tegra114-car.h new file mode 100644 index 000000000000..9b8c320402db --- /dev/null +++ b/include/dt-bindings/reset/nvidia,tegra114-car.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */ +/* + * This header provides Tegra114-specific constants for binding + * nvidia,tegra114-car. + */ + +#ifndef _DT_BINDINGS_RESET_NVIDIA_TEGRA114_CAR_H +#define _DT_BINDINGS_RESET_NVIDIA_TEGRA114_CAR_H + +#define TEGRA114_RESET(x) (5 * 32 + (x)) +#define TEGRA114_RST_DFLL_DVCO TEGRA114_RESET(0) + +#endif /* _DT_BINDINGS_RESET_NVIDIA_TEGRA114_CAR_H */ -- cgit v1.2.3 From 67a529b7d3c50a56c162476509361f4fe11350dd Mon Sep 17 00:00:00 2001 From: David Lechner Date: Fri, 15 Aug 2025 12:40:02 -0500 Subject: include: adi-axi-common: add version check function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a version check function for checking ADI AXI IP core versions. These cores use a semantic versioning scheme, so it is useful to have a version check function that can check the minor version to enable features in driver while maintaining backward compatibility. Signed-off-by: David Lechner Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250815-spi-axi-spi-enigne-improve-version-checks-v1-1-13bde357d5b6@baylibre.com Signed-off-by: Mark Brown --- include/linux/adi-axi-common.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include') diff --git a/include/linux/adi-axi-common.h b/include/linux/adi-axi-common.h index f64f4ad4beda..37962ba530df 100644 --- a/include/linux/adi-axi-common.h +++ b/include/linux/adi-axi-common.h @@ -8,6 +8,8 @@ * https://wiki.analog.com/resources/fpga/docs/hdl/regmap */ +#include + #ifndef ADI_AXI_COMMON_H_ #define ADI_AXI_COMMON_H_ @@ -21,6 +23,25 @@ #define ADI_AXI_PCORE_VER_MINOR(version) (((version) >> 8) & 0xff) #define ADI_AXI_PCORE_VER_PATCH(version) ((version) & 0xff) +/** + * adi_axi_pcore_ver_gteq() - check if a version is satisfied + * @version: the full version read from the hardware + * @major: the major version to compare against + * @minor: the minor version to compare against + * + * ADI AXI IP Cores use semantic versioning, so this can be used to check for + * feature availability. + * + * Return: true if the version is greater than or equal to the specified + * major and minor version, false otherwise. + */ +static inline bool adi_axi_pcore_ver_gteq(u32 version, u32 major, u32 minor) +{ + return ADI_AXI_PCORE_VER_MAJOR(version) > (major) || + (ADI_AXI_PCORE_VER_MAJOR(version) == (major) && + ADI_AXI_PCORE_VER_MINOR(version) >= (minor)); +} + #define ADI_AXI_INFO_FPGA_TECH(info) (((info) >> 24) & 0xff) #define ADI_AXI_INFO_FPGA_FAMILY(info) (((info) >> 16) & 0xff) #define ADI_AXI_INFO_FPGA_SPEED_GRADE(info) (((info) >> 8) & 0xff) -- cgit v1.2.3 From a24cd110e664396061b0a72930734bf419bf88c4 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 9 Sep 2025 19:07:46 +0100 Subject: dt-bindings: clock: renesas,r9a09g047-cpg: Add USB3.0 core clocks Add definitions for USB3.0 core clocks in the R9A09G047 CPG DT bindings header file. Reviewed-by: Geert Uytterhoeven Acked-by: Conor Dooley Signed-off-by: Biju Das Link: https://patch.msgid.link/20250909180803.140939-2-biju.das.jz@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- include/dt-bindings/clock/renesas,r9a09g047-cpg.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/renesas,r9a09g047-cpg.h b/include/dt-bindings/clock/renesas,r9a09g047-cpg.h index a27132f9a6c8..f165df8a6f5a 100644 --- a/include/dt-bindings/clock/renesas,r9a09g047-cpg.h +++ b/include/dt-bindings/clock/renesas,r9a09g047-cpg.h @@ -20,5 +20,7 @@ #define R9A09G047_SPI_CLK_SPI 9 #define R9A09G047_GBETH_0_CLK_PTP_REF_I 10 #define R9A09G047_GBETH_1_CLK_PTP_REF_I 11 +#define R9A09G047_USB3_0_REF_ALT_CLK_P 12 +#define R9A09G047_USB3_0_CLKCORE 13 #endif /* __DT_BINDINGS_CLOCK_RENESAS_R9A09G047_CPG_H__ */ -- cgit v1.2.3 From 413cf5db2fee00fdd69bc62debdbf655f97f4c08 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Tue, 12 Aug 2025 06:23:31 +0200 Subject: libie, ice: move fwlog admin queue to libie Copy the code and: - change ICE_AQC to LIBIE_AQC - change ice_aqc to libie_aqc - move definitions outside the structures Reviewed-by: Przemek Kitszel Signed-off-by: Michal Swiatkowski Tested-by: Rinitha S (A Contingent worker at Intel) Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen --- include/linux/net/intel/libie/adminq.h | 89 ++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) (limited to 'include') diff --git a/include/linux/net/intel/libie/adminq.h b/include/linux/net/intel/libie/adminq.h index ba62f703df43..ca2ac88b5709 100644 --- a/include/linux/net/intel/libie/adminq.h +++ b/include/linux/net/intel/libie/adminq.h @@ -222,6 +222,94 @@ struct libie_aqc_list_caps_elem { }; LIBIE_CHECK_STRUCT_LEN(32, libie_aqc_list_caps_elem); +/* Admin Queue command opcodes */ +enum libie_adminq_opc { + /* FW Logging Commands */ + libie_aqc_opc_fw_logs_config = 0xFF30, + libie_aqc_opc_fw_logs_register = 0xFF31, + libie_aqc_opc_fw_logs_query = 0xFF32, + libie_aqc_opc_fw_logs_event = 0xFF33, +}; + +enum libie_aqc_fw_logging_mod { + LIBIE_AQC_FW_LOG_ID_GENERAL = 0, + LIBIE_AQC_FW_LOG_ID_CTRL, + LIBIE_AQC_FW_LOG_ID_LINK, + LIBIE_AQC_FW_LOG_ID_LINK_TOPO, + LIBIE_AQC_FW_LOG_ID_DNL, + LIBIE_AQC_FW_LOG_ID_I2C, + LIBIE_AQC_FW_LOG_ID_SDP, + LIBIE_AQC_FW_LOG_ID_MDIO, + LIBIE_AQC_FW_LOG_ID_ADMINQ, + LIBIE_AQC_FW_LOG_ID_HDMA, + LIBIE_AQC_FW_LOG_ID_LLDP, + LIBIE_AQC_FW_LOG_ID_DCBX, + LIBIE_AQC_FW_LOG_ID_DCB, + LIBIE_AQC_FW_LOG_ID_XLR, + LIBIE_AQC_FW_LOG_ID_NVM, + LIBIE_AQC_FW_LOG_ID_AUTH, + LIBIE_AQC_FW_LOG_ID_VPD, + LIBIE_AQC_FW_LOG_ID_IOSF, + LIBIE_AQC_FW_LOG_ID_PARSER, + LIBIE_AQC_FW_LOG_ID_SW, + LIBIE_AQC_FW_LOG_ID_SCHEDULER, + LIBIE_AQC_FW_LOG_ID_TXQ, + LIBIE_AQC_FW_LOG_ID_RSVD, + LIBIE_AQC_FW_LOG_ID_POST, + LIBIE_AQC_FW_LOG_ID_WATCHDOG, + LIBIE_AQC_FW_LOG_ID_TASK_DISPATCH, + LIBIE_AQC_FW_LOG_ID_MNG, + LIBIE_AQC_FW_LOG_ID_SYNCE, + LIBIE_AQC_FW_LOG_ID_HEALTH, + LIBIE_AQC_FW_LOG_ID_TSDRV, + LIBIE_AQC_FW_LOG_ID_PFREG, + LIBIE_AQC_FW_LOG_ID_MDLVER, + LIBIE_AQC_FW_LOG_ID_MAX, +}; + +/* Set FW Logging configuration (indirect 0xFF30) + * Register for FW Logging (indirect 0xFF31) + * Query FW Logging (indirect 0xFF32) + * FW Log Event (indirect 0xFF33) + */ +#define LIBIE_AQC_FW_LOG_CONF_UART_EN BIT(0) +#define LIBIE_AQC_FW_LOG_CONF_AQ_EN BIT(1) +#define LIBIE_AQC_FW_LOG_QUERY_REGISTERED BIT(2) +#define LIBIE_AQC_FW_LOG_CONF_SET_VALID BIT(3) +#define LIBIE_AQC_FW_LOG_AQ_REGISTER BIT(0) +#define LIBIE_AQC_FW_LOG_AQ_QUERY BIT(2) + +#define LIBIE_AQC_FW_LOG_MIN_RESOLUTION (1) +#define LIBIE_AQC_FW_LOG_MAX_RESOLUTION (128) + +struct libie_aqc_fw_log { + u8 cmd_flags; + + u8 rsp_flag; + __le16 fw_rt_msb; + union { + struct { + __le32 fw_rt_lsb; + } sync; + struct { + __le16 log_resolution; + __le16 mdl_cnt; + } cfg; + } ops; + __le32 addr_high; + __le32 addr_low; +}; + +/* Response Buffer for: + * Set Firmware Logging Configuration (0xFF30) + * Query FW Logging (0xFF32) + */ +struct libie_aqc_fw_log_cfg_resp { + __le16 module_identifier; + u8 log_level; + u8 rsvd0; +}; + /** * struct libie_aq_desc - Admin Queue (AQ) descriptor * @flags: LIBIE_AQ_FLAG_* flags @@ -253,6 +341,7 @@ struct libie_aq_desc { struct libie_aqc_driver_ver driver_ver; struct libie_aqc_req_res res_owner; struct libie_aqc_list_caps get_cap; + struct libie_aqc_fw_log fw_log; } params; }; LIBIE_CHECK_STRUCT_LEN(32, libie_aq_desc); -- cgit v1.2.3 From 02f44dac8930dc7cc43aa3eba872ce35382f6332 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Tue, 12 Aug 2025 06:23:33 +0200 Subject: ice: prepare for moving file to libie s/ice/libie There is no function for filling default descriptor in libie. Zero descriptor structure and set opcode without calling the function. Make functions that are caled only in ice_fwlog.c static. Reviewed-by: Przemek Kitszel Signed-off-by: Michal Swiatkowski Tested-by: Rinitha S (A Contingent worker at Intel) Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen --- include/linux/net/intel/libie/adminq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/net/intel/libie/adminq.h b/include/linux/net/intel/libie/adminq.h index ca2ac88b5709..29420193889a 100644 --- a/include/linux/net/intel/libie/adminq.h +++ b/include/linux/net/intel/libie/adminq.h @@ -9,6 +9,7 @@ #define LIBIE_CHECK_STRUCT_LEN(n, X) \ static_assert((n) == sizeof(struct X)) +#define LIBIE_AQ_MAX_BUF_LEN 4096 /** * struct libie_aqc_generic - Generic structure used in adminq communication -- cgit v1.2.3 From f3b3fc1ff0823b73f6f66b6340e6ebc4b00d2ed3 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Tue, 12 Aug 2025 06:23:35 +0200 Subject: ice, libie: move fwlog code to libie Move whole code from ice_fwlog.c/h to libie/fwlog.c/h. Reviewed-by: Przemek Kitszel Signed-off-by: Michal Swiatkowski Tested-by: Rinitha S (A Contingent worker at Intel) Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen --- include/linux/net/intel/libie/adminq.h | 6 +-- include/linux/net/intel/libie/fwlog.h | 85 ++++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 3 deletions(-) create mode 100644 include/linux/net/intel/libie/fwlog.h (limited to 'include') diff --git a/include/linux/net/intel/libie/adminq.h b/include/linux/net/intel/libie/adminq.h index 29420193889a..ab13bd777a28 100644 --- a/include/linux/net/intel/libie/adminq.h +++ b/include/linux/net/intel/libie/adminq.h @@ -265,7 +265,7 @@ enum libie_aqc_fw_logging_mod { LIBIE_AQC_FW_LOG_ID_TSDRV, LIBIE_AQC_FW_LOG_ID_PFREG, LIBIE_AQC_FW_LOG_ID_MDLVER, - LIBIE_AQC_FW_LOG_ID_MAX, + LIBIE_AQC_FW_LOG_ID_MAX }; /* Set FW Logging configuration (indirect 0xFF30) @@ -280,8 +280,8 @@ enum libie_aqc_fw_logging_mod { #define LIBIE_AQC_FW_LOG_AQ_REGISTER BIT(0) #define LIBIE_AQC_FW_LOG_AQ_QUERY BIT(2) -#define LIBIE_AQC_FW_LOG_MIN_RESOLUTION (1) -#define LIBIE_AQC_FW_LOG_MAX_RESOLUTION (128) +#define LIBIE_AQC_FW_LOG_MIN_RESOLUTION 1 +#define LIBIE_AQC_FW_LOG_MAX_RESOLUTION 128 struct libie_aqc_fw_log { u8 cmd_flags; diff --git a/include/linux/net/intel/libie/fwlog.h b/include/linux/net/intel/libie/fwlog.h new file mode 100644 index 000000000000..36b13fabca9e --- /dev/null +++ b/include/linux/net/intel/libie/fwlog.h @@ -0,0 +1,85 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2022, Intel Corporation. */ + +#ifndef _LIBIE_FWLOG_H_ +#define _LIBIE_FWLOG_H_ + +#include + +/* Only a single log level should be set and all log levels under the set value + * are enabled, e.g. if log level is set to LIBIE_FW_LOG_LEVEL_VERBOSE, then all + * other log levels are included (except LIBIE_FW_LOG_LEVEL_NONE) + */ +enum libie_fwlog_level { + LIBIE_FWLOG_LEVEL_NONE = 0, + LIBIE_FWLOG_LEVEL_ERROR = 1, + LIBIE_FWLOG_LEVEL_WARNING = 2, + LIBIE_FWLOG_LEVEL_NORMAL = 3, + LIBIE_FWLOG_LEVEL_VERBOSE = 4, + LIBIE_FWLOG_LEVEL_INVALID, /* all values >= this entry are invalid */ +}; + +struct libie_fwlog_module_entry { + /* module ID for the corresponding firmware logging event */ + u16 module_id; + /* verbosity level for the module_id */ + u8 log_level; +}; + +struct libie_fwlog_cfg { + /* list of modules for configuring log level */ + struct libie_fwlog_module_entry module_entries[LIBIE_AQC_FW_LOG_ID_MAX]; + /* options used to configure firmware logging */ + u16 options; +#define LIBIE_FWLOG_OPTION_ARQ_ENA BIT(0) +#define LIBIE_FWLOG_OPTION_UART_ENA BIT(1) + /* set before calling libie_fwlog_init() so the PF registers for + * firmware logging on initialization + */ +#define LIBIE_FWLOG_OPTION_REGISTER_ON_INIT BIT(2) + /* set in the libie_aq_fwlog_get() response if the PF is registered for + * FW logging events over ARQ + */ +#define LIBIE_FWLOG_OPTION_IS_REGISTERED BIT(3) + + /* minimum number of log events sent per Admin Receive Queue event */ + u16 log_resolution; +}; + +struct libie_fwlog_data { + u16 data_size; + u8 *data; +}; + +struct libie_fwlog_ring { + struct libie_fwlog_data *rings; + u16 index; + u16 size; + u16 head; + u16 tail; +}; + +#define LIBIE_FWLOG_RING_SIZE_INDEX_DFLT 3 +#define LIBIE_FWLOG_RING_SIZE_DFLT 256 +#define LIBIE_FWLOG_RING_SIZE_MAX 512 + +struct libie_fwlog { + struct libie_fwlog_cfg cfg; + bool supported; /* does hardware support FW logging? */ + struct libie_fwlog_ring ring; + struct dentry *debugfs; + /* keep track of all the dentrys for FW log modules */ + struct dentry **debugfs_modules; + struct_group_tagged(libie_fwlog_api, api, + struct pci_dev *pdev; + int (*send_cmd)(void *, struct libie_aq_desc *, void *, u16); + void *priv; + struct dentry *debugfs_root; + ); +}; + +int libie_fwlog_init(struct libie_fwlog *fwlog, struct libie_fwlog_api *api); +void libie_fwlog_deinit(struct libie_fwlog *fwlog); +void libie_fwlog_reregister(struct libie_fwlog *fwlog); +void libie_get_fwlog_data(struct libie_fwlog *fwlog, u8 *buf, u16 len); +#endif /* _LIBIE_FWLOG_H_ */ -- cgit v1.2.3 From 70f23546d246563da648baedbb0432ba1d6bb357 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Thu, 11 Sep 2025 14:58:01 +0000 Subject: bpf: core: introduce main_prog_aux for stream access BPF streams are only valid for the main programs, to make it easier to access streams from subprogs, introduce main_prog_aux in struct bpf_prog_aux. prog->aux->main_prog_aux = prog->aux, for main programs and prog->aux->main_prog_aux = main_prog->aux, for subprograms. Make bpf_prog_find_from_stack() use the added main_prog_aux to return the mainprog when a subprog is found on the stack. Signed-off-by: Puranjay Mohan Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250911145808.58042-3-puranjay@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8f6e87f0f3a8..d133171c4d2a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1633,6 +1633,7 @@ struct bpf_prog_aux { /* function name for valid attach_btf_id */ const char *attach_func_name; struct bpf_prog **func; + struct bpf_prog_aux *main_prog_aux; void *jit_data; /* JIT specific data. arch dependent */ struct bpf_jit_poke_descriptor *poke_tab; struct bpf_kfunc_desc_tab *kfunc_tab; -- cgit v1.2.3 From 5c5240d020615f13331f4e2c559186125eddc7d3 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Thu, 11 Sep 2025 14:58:02 +0000 Subject: bpf: Report arena faults to BPF stderr Begin reporting arena page faults and the faulting address to BPF program's stderr, this patch adds support in the arm64 and x86-64 JITs, support for other archs can be added later. The fault handlers receive the 32 bit address in the arena region so the upper 32 bits of user_vm_start is added to it before printing the address. This is what the user would expect to see as this is what is printed by bpf_printk() is you pass it an address returned by bpf_arena_alloc_pages(); Signed-off-by: Puranjay Mohan Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20250911145808.58042-4-puranjay@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d133171c4d2a..41f776071ff5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2881,6 +2881,7 @@ void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, enum bpf_dynptr_type type, u32 offset, u32 size); void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr); void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr); +void bpf_prog_report_arena_violation(bool write, unsigned long addr, unsigned long fault_ip); #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) @@ -3168,6 +3169,11 @@ static inline void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr) static inline void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr) { } + +static inline void bpf_prog_report_arena_violation(bool write, unsigned long addr, + unsigned long fault_ip) +{ +} #endif /* CONFIG_BPF_SYSCALL */ static __always_inline int -- cgit v1.2.3 From eadaa8b255f36ee39ca97d0815c25eeeb1f5d674 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 9 Sep 2025 16:27:29 +0300 Subject: dma-mapping: introduce new DMA attribute to indicate MMIO memory This patch introduces the DMA_ATTR_MMIO attribute to mark DMA buffers that reside in memory-mapped I/O (MMIO) regions, such as device BARs exposed through the host bridge, which are accessible for peer-to-peer (P2P) DMA. This attribute is especially useful for exporting device memory to other devices for DMA without CPU involvement, and avoids unnecessary or potentially detrimental CPU cache maintenance calls. DMA_ATTR_MMIO is supposed to provide dma_map_resource() functionality without need to call to special function and perform branching when processing generic containers like bio_vec by the callers. Reviewed-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/6f058ec395c5348014860dbc2eed348c17975843.1757423202.git.leonro@nvidia.com --- include/linux/dma-mapping.h | 20 ++++++++++++++++++++ include/trace/events/dma.h | 3 ++- 2 files changed, 22 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 55c03e5fe8cb..4254fd9bdf5d 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -58,6 +58,26 @@ */ #define DMA_ATTR_PRIVILEGED (1UL << 9) +/* + * DMA_ATTR_MMIO - Indicates memory-mapped I/O (MMIO) region for DMA mapping + * + * This attribute indicates the physical address is not normal system + * memory. It may not be used with kmap*()/phys_to_virt()/phys_to_page() + * functions, it may not be cacheable, and access using CPU load/store + * instructions may not be allowed. + * + * Usually this will be used to describe MMIO addresses, or other non-cacheable + * register addresses. When DMA mapping this sort of address we call + * the operation Peer to Peer as a one device is DMA'ing to another device. + * For PCI devices the p2pdma APIs must be used to determine if DMA_ATTR_MMIO + * is appropriate. + * + * For architectures that require cache flushing for DMA coherence + * DMA_ATTR_MMIO will not perform any cache flushing. The address + * provided must never be mapped cacheable into the CPU. + */ +#define DMA_ATTR_MMIO (1UL << 10) + /* * A dma_addr_t can hold any valid DMA or bus address for the platform. It can * be given to a device to use as a DMA source or target. It is specific to a diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h index d8ddc27b6a7c..ee90d6f1dcf3 100644 --- a/include/trace/events/dma.h +++ b/include/trace/events/dma.h @@ -31,7 +31,8 @@ TRACE_DEFINE_ENUM(DMA_NONE); { DMA_ATTR_FORCE_CONTIGUOUS, "FORCE_CONTIGUOUS" }, \ { DMA_ATTR_ALLOC_SINGLE_PAGES, "ALLOC_SINGLE_PAGES" }, \ { DMA_ATTR_NO_WARN, "NO_WARN" }, \ - { DMA_ATTR_PRIVILEGED, "PRIVILEGED" }) + { DMA_ATTR_PRIVILEGED, "PRIVILEGED" }, \ + { DMA_ATTR_MMIO, "MMIO" }) DECLARE_EVENT_CLASS(dma_map, TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr, -- cgit v1.2.3 From e9e81d86fee63c6d5757841ab557019ddf73786f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 9 Sep 2025 16:27:31 +0300 Subject: dma-debug: refactor to use physical addresses for page mapping Convert the DMA debug infrastructure from page-based to physical address-based mapping as a preparation to rely on physical address for DMA mapping routines. The refactoring renames debug_dma_map_page() to debug_dma_map_phys() and changes its signature to accept a phys_addr_t parameter instead of struct page and offset. Similarly, debug_dma_unmap_page() becomes debug_dma_unmap_phys(). A new dma_debug_phy type is introduced to distinguish physical address mappings from other debug entry types. All callers throughout the codebase are updated to pass physical addresses directly, eliminating the need for page-to-physical conversion in the debug layer. This refactoring eliminates the need to convert between page pointers and physical addresses in the debug layer, making the code more efficient and consistent with the DMA mapping API's physical address focus. Reviewed-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky [mszyprow: added a fixup] Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/56d1a6769b68dfcbf8b26a75a7329aeb8e3c3b6a.1757423202.git.leonro@nvidia.com Link: https://lore.kernel.org/all/20250910052618.GH341237@unreal/ --- include/linux/page-flags.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 8d3fa3a91ce4..2a1f34617802 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -618,6 +618,7 @@ FOLIO_FLAG(dropbehind, FOLIO_HEAD_PAGE) #else PAGEFLAG_FALSE(HighMem, highmem) #endif +#define PhysHighMem(__p) (PageHighMem(phys_to_page(__p))) /* Does kmap_local_folio() only allow access to one page of the folio? */ #ifdef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP -- cgit v1.2.3 From 76bb7c49f50ce7687f98eb35e78798584652dd0e Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 9 Sep 2025 16:27:32 +0300 Subject: dma-mapping: rename trace_dma_*map_page to trace_dma_*map_phys As a preparation for following map_page -> map_phys API conversion, let's rename trace_dma_*map_page() to be trace_dma_*map_phys(). Reviewed-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/c0c02d7d8bd4a148072d283353ba227516a76682.1757423202.git.leonro@nvidia.com --- include/trace/events/dma.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h index ee90d6f1dcf3..84416c7d6bfa 100644 --- a/include/trace/events/dma.h +++ b/include/trace/events/dma.h @@ -72,7 +72,7 @@ DEFINE_EVENT(dma_map, name, \ size_t size, enum dma_data_direction dir, unsigned long attrs), \ TP_ARGS(dev, phys_addr, dma_addr, size, dir, attrs)) -DEFINE_MAP_EVENT(dma_map_page); +DEFINE_MAP_EVENT(dma_map_phys); DEFINE_MAP_EVENT(dma_map_resource); DECLARE_EVENT_CLASS(dma_unmap, @@ -110,7 +110,7 @@ DEFINE_EVENT(dma_unmap, name, \ enum dma_data_direction dir, unsigned long attrs), \ TP_ARGS(dev, addr, size, dir, attrs)) -DEFINE_UNMAP_EVENT(dma_unmap_page); +DEFINE_UNMAP_EVENT(dma_unmap_phys); DEFINE_UNMAP_EVENT(dma_unmap_resource); DECLARE_EVENT_CLASS(dma_alloc_class, -- cgit v1.2.3 From 513559f73700966ded094b090c3ecc6dff877ef9 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 9 Sep 2025 16:27:33 +0300 Subject: iommu/dma: rename iommu_dma_*map_page to iommu_dma_*map_phys Rename the IOMMU DMA mapping functions to better reflect their actual calling convention. The functions iommu_dma_map_page() and iommu_dma_unmap_page() are renamed to iommu_dma_map_phys() and iommu_dma_unmap_phys() respectively, as they already operate on physical addresses rather than page structures. The calling convention changes from accepting (struct page *page, unsigned long offset) to (phys_addr_t phys), which eliminates the need for page-to-physical address conversion within the functions. This renaming prepares for the broader DMA API conversion from page-based to physical address-based mapping throughout the kernel. All callers are updated to pass physical addresses directly, including dma_map_page_attrs(), scatterlist mapping functions, and DMA page allocation helpers. The change simplifies the code by removing the page_to_phys() + offset calculation that was previously done inside the IOMMU functions. Reviewed-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/ed172f95f8f57782beae04f782813366894e98df.1757423202.git.leonro@nvidia.com --- include/linux/iommu-dma.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/iommu-dma.h b/include/linux/iommu-dma.h index 508beaa44c39..485bdffed988 100644 --- a/include/linux/iommu-dma.h +++ b/include/linux/iommu-dma.h @@ -21,10 +21,9 @@ static inline bool use_dma_iommu(struct device *dev) } #endif /* CONFIG_IOMMU_DMA */ -dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir, - unsigned long attrs); -void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, +dma_addr_t iommu_dma_map_phys(struct device *dev, phys_addr_t phys, size_t size, + enum dma_data_direction dir, unsigned long attrs); +void iommu_dma_unmap_phys(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction dir, unsigned long attrs); int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs); -- cgit v1.2.3 From e53d29f957b36ba1666331956c6ccb047bb157d2 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 9 Sep 2025 16:27:35 +0300 Subject: dma-mapping: convert dma_direct_*map_page to be phys_addr_t based Convert the DMA direct mapping functions to accept physical addresses directly instead of page+offset parameters. The functions were already operating on physical addresses internally, so this change eliminates the redundant page-to-physical conversion at the API boundary. The functions dma_direct_map_page() and dma_direct_unmap_page() are renamed to dma_direct_map_phys() and dma_direct_unmap_phys() respectively, with their calling convention changed from (struct page *page, unsigned long offset) to (phys_addr_t phys). Architecture-specific functions arch_dma_map_page_direct() and arch_dma_unmap_page_direct() are similarly renamed to arch_dma_map_phys_direct() and arch_dma_unmap_phys_direct(). The is_pci_p2pdma_page() checks are replaced with DMA_ATTR_MMIO checks to allow integration with dma_direct_map_resource and dma_direct_map_phys() is extended to support MMIO path either. Reviewed-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/bb15a22f76dc2e26683333ff54e789606cfbfcf0.1757423202.git.leonro@nvidia.com --- include/linux/dma-map-ops.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 332b80c42b6f..10882d00cb17 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -395,15 +395,15 @@ void *arch_dma_set_uncached(void *addr, size_t size); void arch_dma_clear_uncached(void *addr, size_t size); #ifdef CONFIG_ARCH_HAS_DMA_MAP_DIRECT -bool arch_dma_map_page_direct(struct device *dev, phys_addr_t addr); -bool arch_dma_unmap_page_direct(struct device *dev, dma_addr_t dma_handle); +bool arch_dma_map_phys_direct(struct device *dev, phys_addr_t addr); +bool arch_dma_unmap_phys_direct(struct device *dev, dma_addr_t dma_handle); bool arch_dma_map_sg_direct(struct device *dev, struct scatterlist *sg, int nents); bool arch_dma_unmap_sg_direct(struct device *dev, struct scatterlist *sg, int nents); #else -#define arch_dma_map_page_direct(d, a) (false) -#define arch_dma_unmap_page_direct(d, a) (false) +#define arch_dma_map_phys_direct(d, a) (false) +#define arch_dma_unmap_phys_direct(d, a) (false) #define arch_dma_map_sg_direct(d, s, n) (false) #define arch_dma_unmap_sg_direct(d, s, n) (false) #endif -- cgit v1.2.3 From 6eb1e769b2c13a33cb2ca694454a7561d3d72c0a Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 9 Sep 2025 16:27:36 +0300 Subject: kmsan: convert kmsan_handle_dma to use physical addresses Convert the KMSAN DMA handling function from page-based to physical address-based interface. The refactoring renames kmsan_handle_dma() parameters from accepting (struct page *page, size_t offset, size_t size) to (phys_addr_t phys, size_t size). The existing semantics where callers are expected to provide only kmap memory is continued here. Reviewed-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/3557cbaf66e935bc794f37d2b891ef75cbf2c80c.1757423202.git.leonro@nvidia.com --- include/linux/kmsan.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/kmsan.h b/include/linux/kmsan.h index 2b1432cc16d5..f2fd221107bb 100644 --- a/include/linux/kmsan.h +++ b/include/linux/kmsan.h @@ -182,8 +182,7 @@ void kmsan_iounmap_page_range(unsigned long start, unsigned long end); /** * kmsan_handle_dma() - Handle a DMA data transfer. - * @page: first page of the buffer. - * @offset: offset of the buffer within the first page. + * @phys: physical address of the buffer. * @size: buffer size. * @dir: one of possible dma_data_direction values. * @@ -192,7 +191,7 @@ void kmsan_iounmap_page_range(unsigned long start, unsigned long end); * * initializes the buffer, if it is copied from device; * * does both, if this is a DMA_BIDIRECTIONAL transfer. */ -void kmsan_handle_dma(struct page *page, size_t offset, size_t size, +void kmsan_handle_dma(phys_addr_t phys, size_t size, enum dma_data_direction dir); /** @@ -372,8 +371,8 @@ static inline void kmsan_iounmap_page_range(unsigned long start, { } -static inline void kmsan_handle_dma(struct page *page, size_t offset, - size_t size, enum dma_data_direction dir) +static inline void kmsan_handle_dma(phys_addr_t phys, size_t size, + enum dma_data_direction dir) { } -- cgit v1.2.3 From f7326196a781622b33bfbdabb00f5e72b5fb5679 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 9 Sep 2025 16:27:39 +0300 Subject: dma-mapping: export new dma_*map_phys() interface Introduce new DMA mapping functions dma_map_phys() and dma_unmap_phys() that operate directly on physical addresses instead of page+offset parameters. This provides a more efficient interface for drivers that already have physical addresses available. The new functions are implemented as the primary mapping layer, with the existing dma_map_page_attrs()/dma_map_resource() and dma_unmap_page_attrs()/dma_unmap_resource() functions converted to simple wrappers around the phys-based implementations. In case dma_map_page_attrs(), the struct page is converted to physical address with help of page_to_phys() function and dma_map_resource() provides physical address as is together with addition of DMA_ATTR_MMIO attribute. The old page-based API is preserved in mapping.c to ensure that existing code won't be affected by changing EXPORT_SYMBOL to EXPORT_SYMBOL_GPL variant for dma_*map_phys(). Reviewed-by: Jason Gunthorpe Reviewed-by: Keith Busch Signed-off-by: Leon Romanovsky Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/54cc52af91777906bbe4a386113437ba0bcfba9c.1757423202.git.leonro@nvidia.com --- include/linux/dma-direct.h | 2 -- include/linux/dma-mapping.h | 13 +++++++++++++ include/linux/iommu-dma.h | 4 ---- include/trace/events/dma.h | 2 -- 4 files changed, 13 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index f3bc0bcd7098..c249912456f9 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -149,7 +149,5 @@ void dma_direct_free_pages(struct device *dev, size_t size, struct page *page, dma_addr_t dma_addr, enum dma_data_direction dir); int dma_direct_supported(struct device *dev, u64 mask); -dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr, - size_t size, enum dma_data_direction dir, unsigned long attrs); #endif /* _LINUX_DMA_DIRECT_H */ diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 4254fd9bdf5d..8248ff9363ee 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -138,6 +138,10 @@ dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page, unsigned long attrs); void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs); +dma_addr_t dma_map_phys(struct device *dev, phys_addr_t phys, size_t size, + enum dma_data_direction dir, unsigned long attrs); +void dma_unmap_phys(struct device *dev, dma_addr_t addr, size_t size, + enum dma_data_direction dir, unsigned long attrs); unsigned int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs); void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, @@ -192,6 +196,15 @@ static inline void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { } +static inline dma_addr_t dma_map_phys(struct device *dev, phys_addr_t phys, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + return DMA_MAPPING_ERROR; +} +static inline void dma_unmap_phys(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ +} static inline unsigned int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs) diff --git a/include/linux/iommu-dma.h b/include/linux/iommu-dma.h index 485bdffed988..a92b3ff9b934 100644 --- a/include/linux/iommu-dma.h +++ b/include/linux/iommu-dma.h @@ -42,10 +42,6 @@ size_t iommu_dma_opt_mapping_size(void); size_t iommu_dma_max_mapping_size(struct device *dev); void iommu_dma_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle, unsigned long attrs); -dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, - size_t size, enum dma_data_direction dir, unsigned long attrs); -void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, - size_t size, enum dma_data_direction dir, unsigned long attrs); struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev, size_t size, enum dma_data_direction dir, gfp_t gfp, unsigned long attrs); void iommu_dma_free_noncontiguous(struct device *dev, size_t size, diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h index 84416c7d6bfa..5da59fd8121d 100644 --- a/include/trace/events/dma.h +++ b/include/trace/events/dma.h @@ -73,7 +73,6 @@ DEFINE_EVENT(dma_map, name, \ TP_ARGS(dev, phys_addr, dma_addr, size, dir, attrs)) DEFINE_MAP_EVENT(dma_map_phys); -DEFINE_MAP_EVENT(dma_map_resource); DECLARE_EVENT_CLASS(dma_unmap, TP_PROTO(struct device *dev, dma_addr_t addr, size_t size, @@ -111,7 +110,6 @@ DEFINE_EVENT(dma_unmap, name, \ TP_ARGS(dev, addr, size, dir, attrs)) DEFINE_UNMAP_EVENT(dma_unmap_phys); -DEFINE_UNMAP_EVENT(dma_unmap_resource); DECLARE_EVENT_CLASS(dma_alloc_class, TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr, -- cgit v1.2.3 From 5f790208d68fe1526c751dc2af366c7b552b8631 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 10 Sep 2025 22:42:47 +0200 Subject: net: phy: fixed_phy: remove two function stubs Remove stubs for fixed_phy_set_link_update() and fixed_phy_change_carrier() because all callers (actually just one per function) select config symbol FIXED_PHY. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/8729170d-cf39-48d9-aabc-c9aa4acda070@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/phy_fixed.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h index 6227a1bdefec..d17ff750c708 100644 --- a/include/linux/phy_fixed.h +++ b/include/linux/phy_fixed.h @@ -37,16 +37,6 @@ fixed_phy_register(const struct fixed_phy_status *status, static inline void fixed_phy_unregister(struct phy_device *phydev) { } -static inline int fixed_phy_set_link_update(struct phy_device *phydev, - int (*link_update)(struct net_device *, - struct fixed_phy_status *)) -{ - return -ENODEV; -} -static inline int fixed_phy_change_carrier(struct net_device *dev, bool new_carrier) -{ - return -EINVAL; -} #endif /* CONFIG_FIXED_PHY */ #endif /* __PHY_FIXED_H */ -- cgit v1.2.3 From 21446c06b441b9c993870efae71aef4e9aa72ec7 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 4 Sep 2025 19:07:23 +0200 Subject: net: bridge: Introduce UAPI for BR_BOOLOPT_FDB_LOCAL_VLAN_0 The previous patches introduced a new option, BR_BOOLOPT_FDB_LOCAL_VLAN_0. When enabled, it has local FDB entries installed only on VLAN 0, instead of duplicating them across all VLANs. In this patch, add the corresponding UAPI toggle, and the code for turning the feature on and off. Reviewed-by: Ido Schimmel Signed-off-by: Petr Machata Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/ea99bfb10f687fa58091e6e1c2f8acc33f47ca45.1757004393.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_bridge.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 73876c0e2bba..e52f8207ab27 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -823,6 +823,8 @@ struct br_mcast_stats { /* bridge boolean options * BR_BOOLOPT_NO_LL_LEARN - disable learning from link-local packets * BR_BOOLOPT_MCAST_VLAN_SNOOPING - control vlan multicast snooping + * BR_BOOLOPT_FDB_LOCAL_VLAN_0 - local FDB entries installed by the bridge + * driver itself should only be added on VLAN 0 * * IMPORTANT: if adding a new option do not forget to handle * it in br_boolopt_toggle/get and bridge sysfs @@ -832,6 +834,7 @@ enum br_boolopt_id { BR_BOOLOPT_MCAST_VLAN_SNOOPING, BR_BOOLOPT_MST_ENABLE, BR_BOOLOPT_MDB_OFFLOAD_FAIL_NOTIFICATION, + BR_BOOLOPT_FDB_LOCAL_VLAN_0, BR_BOOLOPT_MAX }; -- cgit v1.2.3 From 9e472d9e84b11e9f3c429eba97c2a9e74461a884 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Tue, 9 Sep 2025 02:18:50 +0100 Subject: tcp: Destroy TCP-AO, TCP-MD5 keys in .sk_destruct() Currently there are a couple of minor issues with destroying the keys tcp_v4_destroy_sock(): 1. The socket is yet in TCP bind buckets, making it reachable for incoming segments [on another CPU core], potentially available to send late FIN/ACK/RST replies. 2. There is at least one code path, where tcp_done() is called before sending RST [kudos to Bob for investigation]. This is a case of a server, that finished sending its data and just called close(). The socket is in TCP_FIN_WAIT2 and has RCV_SHUTDOWN (set by __tcp_close()) tcp_v4_do_rcv()/tcp_v6_do_rcv() tcp_rcv_state_process() /* LINUX_MIB_TCPABORTONDATA */ tcp_reset() tcp_done_with_error() tcp_done() inet_csk_destroy_sock() /* Destroys AO/MD5 keys */ /* tcp_rcv_state_process() returns SKB_DROP_REASON_TCP_ABORT_ON_DATA */ tcp_v4_send_reset() /* Sends an unsigned RST segment */ tcpdump: > 22:53:15.399377 00:00:b2:1f:00:00 > 00:00:01:01:00:00, ethertype IPv4 (0x0800), length 74: (tos 0x0, ttl 64, id 33929, offset 0, flags [DF], proto TCP (6), length 60) > 1.0.0.1.34567 > 1.0.0.2.49848: Flags [F.], seq 2185658590, ack 3969644355, win 502, options [nop,nop,md5 valid], length 0 > 22:53:15.399396 00:00:01:01:00:00 > 00:00:b2:1f:00:00, ethertype IPv4 (0x0800), length 86: (tos 0x0, ttl 64, id 51951, offset 0, flags [DF], proto TCP (6), length 72) > 1.0.0.2.49848 > 1.0.0.1.34567: Flags [.], seq 3969644375, ack 2185658591, win 128, options [nop,nop,md5 valid,nop,nop,sack 1 {2185658590:2185658591}], length 0 > 22:53:16.429588 00:00:b2:1f:00:00 > 00:00:01:01:00:00, ethertype IPv4 (0x0800), length 60: (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto TCP (6), length 40) > 1.0.0.1.34567 > 1.0.0.2.49848: Flags [R], seq 2185658590, win 0, length 0 > 22:53:16.664725 00:00:b2:1f:00:00 > 00:00:01:01:00:00, ethertype IPv4 (0x0800), length 74: (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto TCP (6), length 60) > 1.0.0.1.34567 > 1.0.0.2.49848: Flags [R], seq 2185658591, win 0, options [nop,nop,md5 valid], length 0 > 22:53:17.289832 00:00:b2:1f:00:00 > 00:00:01:01:00:00, ethertype IPv4 (0x0800), length 74: (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto TCP (6), length 60) > 1.0.0.1.34567 > 1.0.0.2.49848: Flags [R], seq 2185658591, win 0, options [nop,nop,md5 valid], length 0 Note the signed RSTs later in the dump - those are sent by the server when the fin-wait socket gets removed from hash buckets, by the listener socket. Instead of destroying AO/MD5 info and their keys in inet_csk_destroy_sock(), slightly delay it until the actual socket .sk_destruct(). As shutdown'ed socket can yet send non-data replies, they should be signed in order for the peer to process them. Now it also matches how AO/MD5 gets destructed for TIME-WAIT sockets (in tcp_twsk_destructor()). This seems optimal for TCP-MD5, while for TCP-AO it seems to have an open problem: once RST get sent and socket gets actually destructed, there is no information on the initial sequence numbers. So, in case this last RST gets lost in the network, the server's listener socket won't be able to properly sign another RST. Nothing in RFC 1122 prescribes keeping any local state after non-graceful reset. Luckily, BGP are known to use keep alive(s). While the issue is quite minor/cosmetic, these days monitoring network counters is a common practice and getting invalid signed segments from a trusted BGP peer can get customers worried. Investigated-by: Bob Gilligan Reviewed-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Signed-off-by: Dmitry Safonov Link: https://patch.msgid.link/20250909-b4-tcp-ao-md5-rst-finwait2-v5-1-9ffaaaf8b236@arista.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 0fb7923b8367..277914c4d067 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1941,6 +1941,7 @@ tcp_md5_do_lookup_any_l3index(const struct sock *sk, } #define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key) +void tcp_md5_destruct_sock(struct sock *sk); #else static inline struct tcp_md5sig_key * tcp_md5_do_lookup(const struct sock *sk, int l3index, @@ -1957,6 +1958,9 @@ tcp_md5_do_lookup_any_l3index(const struct sock *sk, } #define tcp_twsk_md5_key(twsk) NULL +static inline void tcp_md5_destruct_sock(struct sock *sk) +{ +} #endif int tcp_md5_alloc_sigpool(void); -- cgit v1.2.3 From 51e547e8c89c661f6fbede4a28b1d33b13625683 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Tue, 9 Sep 2025 02:18:51 +0100 Subject: tcp: Free TCP-AO/TCP-MD5 info/keys without RCU Now that the destruction of info/keys is delayed until the socket destructor, it's safe to use kfree() without an RCU callback. The socket is in TCP_CLOSE state either because it never left it, or it's already closed and the refcounter is zero. In any way, no one can discover it anymore, it's safe to release memory straight away. Similar thing was possible for twsk already. Reviewed-by: Kuniyuki Iwashima Signed-off-by: Dmitry Safonov Link: https://patch.msgid.link/20250909-b4-tcp-ao-md5-rst-finwait2-v5-2-9ffaaaf8b236@arista.com Signed-off-by: Jakub Kicinski --- include/net/tcp_ao.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index df655ce6987d..1e9e27d6e06b 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -130,7 +130,6 @@ struct tcp_ao_info { u32 snd_sne; u32 rcv_sne; refcount_t refcnt; /* Protects twsk destruction */ - struct rcu_head rcu; }; #ifdef CONFIG_TCP_MD5SIG -- cgit v1.2.3 From 9b90afa6d613b66ec4e74ae75f9bfa5baf386ecd Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 10 Sep 2025 09:12:51 +0200 Subject: gpio: move gpio-mmio-specific fields out of struct gpio_chip With all users of bgpio_init() converted to using the modernized generic GPIO chip API, we can now move the gpio-mmio-specific fields out of struct gpio_chip and into the dedicated struct gpio_generic_chip. To that end: adjust the gpio-mmio driver to the new layout, update the docs, etc. The changes in gpio-mlxbf2.c and gpio-mpc8xxx.c are here and not in their respective conversion commits because the former passes the address of the generic chip's lock to the __releases() annotation and we cannot really hide it while gpio-mpc8xxx.c accesses the shadow registers in a driver-specific workaround and there's no reason to make them available in a public API. Also: drop the relevant task from TODO as it's now done. Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20250910-gpio-mmio-gpio-conv-part4-v2-15-f3d1a4c57124@linaro.org Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/driver.h | 44 ----------------------------- include/linux/gpio/generic.h | 67 +++++++++++++++++++++++++++++--------------- 2 files changed, 45 insertions(+), 66 deletions(-) (limited to 'include') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 9fcd4a988081..9b14fd20f13e 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -388,28 +388,6 @@ struct gpio_irq_chip { * implies that if the chip supports IRQs, these IRQs need to be threaded * as the chip access may sleep when e.g. reading out the IRQ status * registers. - * @read_reg: reader function for generic GPIO - * @write_reg: writer function for generic GPIO - * @be_bits: if the generic GPIO has big endian bit order (bit 31 is representing - * line 0, bit 30 is line 1 ... bit 0 is line 31) this is set to true by the - * generic GPIO core. It is for internal housekeeping only. - * @reg_dat: data (in) register for generic GPIO - * @reg_set: output set register (out=high) for generic GPIO - * @reg_clr: output clear register (out=low) for generic GPIO - * @reg_dir_out: direction out setting register for generic GPIO - * @reg_dir_in: direction in setting register for generic GPIO - * @bgpio_dir_unreadable: indicates that the direction register(s) cannot - * be read and we need to rely on out internal state tracking. - * @bgpio_pinctrl: the generic GPIO uses a pin control backend. - * @bgpio_bits: number of register bits used for a generic GPIO i.e. - * * 8 - * @bgpio_lock: used to lock chip->bgpio_data. Also, this is needed to keep - * shadowed and real data registers writes together. - * @bgpio_data: shadowed data register for generic GPIO to clear/set bits - * safely. - * @bgpio_dir: shadowed direction register for generic GPIO to clear/set - * direction safely. A "1" in this word means the line is set as - * output. * * A gpio_chip can help platforms abstract various sources of GPIOs so * they can all be accessed through a common programming interface. @@ -475,23 +453,6 @@ struct gpio_chip { const char *const *names; bool can_sleep; -#if IS_ENABLED(CONFIG_GPIO_GENERIC) - unsigned long (*read_reg)(void __iomem *reg); - void (*write_reg)(void __iomem *reg, unsigned long data); - bool be_bits; - void __iomem *reg_dat; - void __iomem *reg_set; - void __iomem *reg_clr; - void __iomem *reg_dir_out; - void __iomem *reg_dir_in; - bool bgpio_dir_unreadable; - bool bgpio_pinctrl; - int bgpio_bits; - raw_spinlock_t bgpio_lock; - unsigned long bgpio_data; - unsigned long bgpio_dir; -#endif /* CONFIG_GPIO_GENERIC */ - #ifdef CONFIG_GPIOLIB_IRQCHIP /* * With CONFIG_GPIOLIB_IRQCHIP we get an irqchip inside the gpiolib @@ -723,11 +684,6 @@ int gpiochip_populate_parent_fwspec_fourcell(struct gpio_chip *gc, #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ -int bgpio_init(struct gpio_chip *gc, struct device *dev, - unsigned long sz, void __iomem *dat, void __iomem *set, - void __iomem *clr, void __iomem *dirout, void __iomem *dirin, - unsigned long flags); - #define BGPIOF_BIG_ENDIAN BIT(0) #define BGPIOF_UNREADABLE_REG_SET BIT(1) /* reg_set is unreadable */ #define BGPIOF_UNREADABLE_REG_DIR BIT(2) /* reg_dir is unreadable */ diff --git a/include/linux/gpio/generic.h b/include/linux/gpio/generic.h index 4c0626b53ec9..162430d96660 100644 --- a/include/linux/gpio/generic.h +++ b/include/linux/gpio/generic.h @@ -50,9 +50,44 @@ struct gpio_generic_chip_config { * struct gpio_generic_chip - Generic GPIO chip implementation. * @gc: The underlying struct gpio_chip object, implementing low-level GPIO * chip routines. + * @read_reg: reader function for generic GPIO + * @write_reg: writer function for generic GPIO + * @be_bits: if the generic GPIO has big endian bit order (bit 31 is + * representing line 0, bit 30 is line 1 ... bit 0 is line 31) this + * is set to true by the generic GPIO core. It is for internal + * housekeeping only. + * @reg_dat: data (in) register for generic GPIO + * @reg_set: output set register (out=high) for generic GPIO + * @reg_clr: output clear register (out=low) for generic GPIO + * @reg_dir_out: direction out setting register for generic GPIO + * @reg_dir_in: direction in setting register for generic GPIO + * @dir_unreadable: indicates that the direction register(s) cannot be read and + * we need to rely on out internal state tracking. + * @pinctrl: the generic GPIO uses a pin control backend. + * @bits: number of register bits used for a generic GPIO + * i.e. * 8 + * @lock: used to lock chip->sdata. Also, this is needed to keep + * shadowed and real data registers writes together. + * @sdata: shadowed data register for generic GPIO to clear/set bits safely. + * @sdir: shadowed direction register for generic GPIO to clear/set direction + * safely. A "1" in this word means the line is set as output. */ struct gpio_generic_chip { struct gpio_chip gc; + unsigned long (*read_reg)(void __iomem *reg); + void (*write_reg)(void __iomem *reg, unsigned long data); + bool be_bits; + void __iomem *reg_dat; + void __iomem *reg_set; + void __iomem *reg_clr; + void __iomem *reg_dir_out; + void __iomem *reg_dir_in; + bool dir_unreadable; + bool pinctrl; + int bits; + raw_spinlock_t lock; + unsigned long sdata; + unsigned long sdir; }; static inline struct gpio_generic_chip * @@ -61,20 +96,8 @@ to_gpio_generic_chip(struct gpio_chip *gc) return container_of(gc, struct gpio_generic_chip, gc); } -/** - * gpio_generic_chip_init() - Initialize a generic GPIO chip. - * @chip: Generic GPIO chip to set up. - * @cfg: Generic GPIO chip configuration. - * - * Returns 0 on success, negative error number on failure. - */ -static inline int -gpio_generic_chip_init(struct gpio_generic_chip *chip, - const struct gpio_generic_chip_config *cfg) -{ - return bgpio_init(&chip->gc, cfg->dev, cfg->sz, cfg->dat, cfg->set, - cfg->clr, cfg->dirout, cfg->dirin, cfg->flags); -} +int gpio_generic_chip_init(struct gpio_generic_chip *chip, + const struct gpio_generic_chip_config *cfg); /** * gpio_generic_chip_set() - Set the GPIO line value of the generic GPIO chip. @@ -110,10 +133,10 @@ gpio_generic_chip_set(struct gpio_generic_chip *chip, unsigned int offset, static inline unsigned long gpio_generic_read_reg(struct gpio_generic_chip *chip, void __iomem *reg) { - if (WARN_ON(!chip->gc.read_reg)) + if (WARN_ON(!chip->read_reg)) return 0; - return chip->gc.read_reg(reg); + return chip->read_reg(reg); } /** @@ -125,23 +148,23 @@ gpio_generic_read_reg(struct gpio_generic_chip *chip, void __iomem *reg) static inline void gpio_generic_write_reg(struct gpio_generic_chip *chip, void __iomem *reg, unsigned long val) { - if (WARN_ON(!chip->gc.write_reg)) + if (WARN_ON(!chip->write_reg)) return; - chip->gc.write_reg(reg, val); + chip->write_reg(reg, val); } #define gpio_generic_chip_lock(gen_gc) \ - raw_spin_lock(&(gen_gc)->gc.bgpio_lock) + raw_spin_lock(&(gen_gc)->lock) #define gpio_generic_chip_unlock(gen_gc) \ - raw_spin_unlock(&(gen_gc)->gc.bgpio_lock) + raw_spin_unlock(&(gen_gc)->lock) #define gpio_generic_chip_lock_irqsave(gen_gc, flags) \ - raw_spin_lock_irqsave(&(gen_gc)->gc.bgpio_lock, flags) + raw_spin_lock_irqsave(&(gen_gc)->lock, flags) #define gpio_generic_chip_unlock_irqrestore(gen_gc, flags) \ - raw_spin_unlock_irqrestore(&(gen_gc)->gc.bgpio_lock, flags) + raw_spin_unlock_irqrestore(&(gen_gc)->lock, flags) DEFINE_LOCK_GUARD_1(gpio_generic_lock, struct gpio_generic_chip, -- cgit v1.2.3 From 121a0f839dbb397af5fabb701cea3e9983223e50 Mon Sep 17 00:00:00 2001 From: Israel Cepeda Date: Thu, 11 Sep 2025 20:13:41 +0200 Subject: usb: misc: Add Intel USBIO bridge driver Add a driver for the Intel USBIO USB IO-expander used by the MIPI cameras on various new (Meteor Lake and later) Intel laptops. This is an USB bridge driver which adds auxbus child devices for the GPIO, I2C and SPI functions of the USBIO chip and which exports IO-functions for the drivers for the auxbus child devices to communicate with the USBIO device's firmware. Co-developed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Israel Cepeda Link: https://lore.kernel.org/r/20250911181343.77398-2-hansg@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/usbio.h | 177 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 177 insertions(+) create mode 100644 include/linux/usb/usbio.h (limited to 'include') diff --git a/include/linux/usb/usbio.h b/include/linux/usb/usbio.h new file mode 100644 index 000000000000..6c4e7c246d58 --- /dev/null +++ b/include/linux/usb/usbio.h @@ -0,0 +1,177 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2025 Intel Corporation. + * + */ + +#ifndef _LINUX_USBIO_H_ +#define _LINUX_USBIO_H_ + +#include +#include +#include +#include + +/*********************** + * USBIO Clients Names * + ***********************/ +#define USBIO_GPIO_CLIENT "usbio-gpio" +#define USBIO_I2C_CLIENT "usbio-i2c" + +/**************** + * USBIO quirks * + ****************/ +#define USBIO_QUIRK_BULK_MAXP_63 BIT(0) /* Force bulk endpoint maxp to 63 */ +#define USBIO_QUIRK_I2C_NO_INIT_ACK BIT(8) /* Do not ask for ack on I2C init */ +#define USBIO_QUIRK_I2C_MAX_RW_LEN_52 BIT(9) /* Set i2c-adapter max r/w len to 52 */ +#define USBIO_QUIRK_I2C_USE_CHUNK_LEN BIT(10) /* Send chunk-len for split xfers */ +#define USBIO_QUIRK_I2C_ALLOW_400KHZ BIT(11) /* Override desc, allowing 400 KHz */ + +/************************** + * USBIO Type Definitions * + **************************/ + +/* USBIO Packet Type */ +#define USBIO_PKTTYPE_CTRL 1 +#define USBIO_PKTTYPE_DBG 2 +#define USBIO_PKTTYPE_GPIO 3 +#define USBIO_PKTTYPE_I2C 4 + +/* USBIO Packet Header */ +struct usbio_packet_header { + u8 type; + u8 cmd; + u8 flags; +} __packed; + +/* USBIO Control Transfer Packet */ +struct usbio_ctrl_packet { + struct usbio_packet_header header; + u8 len; + u8 data[] __counted_by(len); +} __packed; + +/* USBIO Bulk Transfer Packet */ +struct usbio_bulk_packet { + struct usbio_packet_header header; + __le16 len; + u8 data[] __counted_by(len); +} __packed; + +/* USBIO GPIO commands */ +enum usbio_gpio_cmd { + USBIO_GPIOCMD_DEINIT, + USBIO_GPIOCMD_INIT, + USBIO_GPIOCMD_READ, + USBIO_GPIOCMD_WRITE, + USBIO_GPIOCMD_END +}; + +/* USBIO GPIO config */ +enum usbio_gpio_pincfg { + USBIO_GPIO_PINCFG_DEFAULT, + USBIO_GPIO_PINCFG_PULLUP, + USBIO_GPIO_PINCFG_PULLDOWN, + USBIO_GPIO_PINCFG_PUSHPULL +}; + +#define USBIO_GPIO_PINCFG_SHIFT 2 +#define USBIO_GPIO_PINCFG_MASK (0x3 << USBIO_GPIO_PINCFG_SHIFT) +#define USBIO_GPIO_SET_PINCFG(pincfg) \ + (((pincfg) << USBIO_GPIO_PINCFG_SHIFT) & USBIO_GPIO_PINCFG_MASK) + +enum usbio_gpio_pinmode { + USBIO_GPIO_PINMOD_INVAL, + USBIO_GPIO_PINMOD_INPUT, + USBIO_GPIO_PINMOD_OUTPUT, + USBIO_GPIO_PINMOD_MAXVAL +}; + +#define USBIO_GPIO_PINMOD_MASK 0x3 +#define USBIO_GPIO_SET_PINMOD(pin) (pin & USBIO_GPIO_PINMOD_MASK) + +/************************* + * USBIO GPIO Controller * + *************************/ + +#define USBIO_MAX_GPIOBANKS 5 +#define USBIO_GPIOSPERBANK 32 + +struct usbio_gpio_bank_desc { + u8 id; + u8 pins; + __le32 bmap; +} __packed; + +struct usbio_gpio_init { + u8 bankid; + u8 config; + u8 pincount; + u8 pin; +} __packed; + +struct usbio_gpio_rw { + u8 bankid; + u8 pincount; + u8 pin; + __le32 value; +} __packed; + +/* USBIO I2C commands */ +enum usbio_i2c_cmd { + USBIO_I2CCMD_UNINIT, + USBIO_I2CCMD_INIT, + USBIO_I2CCMD_READ, + USBIO_I2CCMD_WRITE, + USBIO_I2CCMD_END +}; + +/************************ + * USBIO I2C Controller * + ************************/ + +#define USBIO_MAX_I2CBUSES 5 + +#define USBIO_I2C_BUS_ADDR_CAP_10B BIT(3) /* 10bit address support */ +#define USBIO_I2C_BUS_MODE_CAP_MASK 0x3 +#define USBIO_I2C_BUS_MODE_CAP_SM 0 /* Standard Mode */ +#define USBIO_I2C_BUS_MODE_CAP_FM 1 /* Fast Mode */ +#define USBIO_I2C_BUS_MODE_CAP_FMP 2 /* Fast Mode+ */ +#define USBIO_I2C_BUS_MODE_CAP_HSM 3 /* High-Speed Mode */ + +struct usbio_i2c_bus_desc { + u8 id; + u8 caps; +} __packed; + +struct usbio_i2c_uninit { + u8 busid; + __le16 config; +} __packed; + +struct usbio_i2c_init { + u8 busid; + __le16 config; + __le32 speed; +} __packed; + +struct usbio_i2c_rw { + u8 busid; + __le16 config; + __le16 size; + u8 data[] __counted_by(size); +} __packed; + +int usbio_control_msg(struct auxiliary_device *adev, u8 type, u8 cmd, + const void *obuf, u16 obuf_len, void *ibuf, u16 ibuf_len); + +int usbio_bulk_msg(struct auxiliary_device *adev, u8 type, u8 cmd, bool last, + const void *obuf, u16 obuf_len, void *ibuf, u16 ibuf_len); + +int usbio_acquire(struct auxiliary_device *adev); +void usbio_release(struct auxiliary_device *adev); +void usbio_get_txrxbuf_len(struct auxiliary_device *adev, u16 *txbuf_len, u16 *rxbuf_len); +unsigned long usbio_get_quirks(struct auxiliary_device *adev); +void usbio_acpi_bind(struct auxiliary_device *adev, const struct acpi_device_id *hids); + +#endif -- cgit v1.2.3 From 7f70b89b2be66c03ddc76d3ad8aebeeec4a9c505 Mon Sep 17 00:00:00 2001 From: Guan-Yu Lin Date: Thu, 11 Sep 2025 14:20:14 +0000 Subject: usb: offload: add apis for offload usage tracking Introduce offload_usage and corresponding apis to track offload usage on each USB device. Offload denotes that there is another co-processor accessing the USB device via the same USB host controller. To optimize power usage, it's essential to monitor whether the USB device is actively used by other co-processor. This information is vital when determining if a USB device can be safely suspended during system power state transitions. Signed-off-by: Guan-Yu Lin Link: https://lore.kernel.org/r/20250911142051.90822-3-guanyulin@google.com Signed-off-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20250911142051.90822-3-guanyulin@google.com --- include/linux/usb.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/usb.h b/include/linux/usb.h index 70ef00c42d22..e85105939af8 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -636,6 +636,8 @@ struct usb3_lpm_parameters { * @do_remote_wakeup: remote wakeup should be enabled * @reset_resume: needs reset instead of resume * @port_is_suspended: the upstream port is suspended (L2 or U3) + * @offload_at_suspend: offload activities during suspend is enabled. + * @offload_usage: number of offload activities happening on this usb device. * @slot_id: Slot ID assigned by xHCI * @l1_params: best effor service latency for USB2 L1 LPM state, and L1 timeout. * @u1_params: exit latencies for USB3 U1 LPM state, and hub-initiated timeout. @@ -724,6 +726,8 @@ struct usb_device { unsigned do_remote_wakeup:1; unsigned reset_resume:1; unsigned port_is_suspended:1; + unsigned offload_at_suspend:1; + int offload_usage; enum usb_link_tunnel_mode tunnel_mode; struct device_link *usb4_link; @@ -841,6 +845,20 @@ static inline void usb_mark_last_busy(struct usb_device *udev) { } #endif +#if IS_ENABLED(CONFIG_USB_XHCI_SIDEBAND) +int usb_offload_get(struct usb_device *udev); +int usb_offload_put(struct usb_device *udev); +bool usb_offload_check(struct usb_device *udev); +#else + +static inline int usb_offload_get(struct usb_device *udev) +{ return 0; } +static inline int usb_offload_put(struct usb_device *udev) +{ return 0; } +static inline bool usb_offload_check(struct usb_device *udev) +{ return false; } +#endif + extern int usb_disable_lpm(struct usb_device *udev); extern void usb_enable_lpm(struct usb_device *udev); /* Same as above, but these functions lock/unlock the bandwidth_mutex. */ -- cgit v1.2.3 From ef82a4803aabaf623bfcae07981406f1386eabf9 Mon Sep 17 00:00:00 2001 From: Guan-Yu Lin Date: Thu, 11 Sep 2025 14:20:15 +0000 Subject: xhci: sideband: add api to trace sideband usage The existing sideband driver only registers sidebands without tracking their active usage. To address this, sideband will now record its active usage when it creates/removes interrupters. In addition, a new api is introduced to provide a means for other dirvers to fetch sideband activity information on a USB host controller. Signed-off-by: Guan-Yu Lin Link: https://lore.kernel.org/r/20250911142051.90822-4-guanyulin@google.com Signed-off-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20250911142051.90822-4-guanyulin@google.com --- include/linux/usb/xhci-sideband.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/usb/xhci-sideband.h b/include/linux/usb/xhci-sideband.h index 45288c392f6e..005257085dcb 100644 --- a/include/linux/usb/xhci-sideband.h +++ b/include/linux/usb/xhci-sideband.h @@ -11,6 +11,7 @@ #include #include +#include #define EP_CTX_PER_DEV 31 /* FIXME defined twice, from xhci.h */ @@ -83,6 +84,14 @@ xhci_sideband_get_endpoint_buffer(struct xhci_sideband *sb, struct usb_host_endpoint *host_ep); struct sg_table * xhci_sideband_get_event_buffer(struct xhci_sideband *sb); + +#if IS_ENABLED(CONFIG_USB_XHCI_SIDEBAND) +bool xhci_sideband_check(struct usb_hcd *hcd); +#else +static inline bool xhci_sideband_check(struct usb_hcd *hcd) +{ return false; } +#endif /* IS_ENABLED(CONFIG_USB_XHCI_SIDEBAND) */ + int xhci_sideband_create_interrupter(struct xhci_sideband *sb, int num_seg, bool ip_autoclear, u32 imod_interval, int intr_num); -- cgit v1.2.3 From f338529ca9279e3bea392cb53cec8bd292909cb1 Mon Sep 17 00:00:00 2001 From: Sarthak Garg Date: Mon, 8 Sep 2025 16:11:21 +0530 Subject: mmc: core: Parse and use the new max-sd-hs-hz DT property Introduce a new device tree flag to cap the maximum High-Speed (HS) mode frequency for SD cards, accommodating board-specific electrical limitations which cannot support the default 50Mhz HS frequency and others. Signed-off-by: Sarthak Garg Signed-off-by: Ulf Hansson --- include/linux/mmc/host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index e0d935a4ac1d..e0e2c265e5d1 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -576,6 +576,7 @@ struct mmc_host { int hsq_depth; u32 err_stats[MMC_ERR_MAX]; + u32 max_sd_hs_hz; unsigned long private[] ____cacheline_aligned; }; -- cgit v1.2.3 From d81c041ed5e444233daaa2d4b0f9ae4008c57f59 Mon Sep 17 00:00:00 2001 From: Ling Xu Date: Fri, 12 Sep 2025 14:13:00 +0100 Subject: misc: fastrpc: Remove kernel-side domain checks from capability ioctl Domain ID in the uAPI is misleading. Remove checks and log messages related to 'domain' field in capability structure. Update UAPI to mark the field as unused. Reviewed-by: Dmitry Baryshkov Reviewed-by: Ekansh Gupta Signed-off-by: Ling Xu Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20250912131302.303199-3-srini@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/uapi/misc/fastrpc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h index f33d914d8f46..c6e2925f47e6 100644 --- a/include/uapi/misc/fastrpc.h +++ b/include/uapi/misc/fastrpc.h @@ -134,7 +134,7 @@ struct fastrpc_mem_unmap { }; struct fastrpc_ioctl_capability { - __u32 domain; + __u32 unused; /* deprecated, ignored by the kernel */ __u32 attribute_id; __u32 capability; /* dsp capability */ __u32 reserved[4]; -- cgit v1.2.3 From e9671ddd82eee96146a7359431a4e1f04ac2b076 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Fri, 12 Sep 2025 01:47:04 +0800 Subject: dt-bindings: clock: sun55i-a523-ccu: Add missing NPU module clock The main clock controller on the A523/T527 has the NPU's module clock. It was missing from the original submission, likely because that was based on the A523 user manual; the A523 is marketed without the NPU. Reviewed-by: Andre Przywara Acked-by: Rob Herring (Arm) Link: https://patch.msgid.link/20250911174710.3149589-2-wens@kernel.org Signed-off-by: Chen-Yu Tsai --- include/dt-bindings/clock/sun55i-a523-ccu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/clock/sun55i-a523-ccu.h b/include/dt-bindings/clock/sun55i-a523-ccu.h index c8259ac5ada7..54808fcfd556 100644 --- a/include/dt-bindings/clock/sun55i-a523-ccu.h +++ b/include/dt-bindings/clock/sun55i-a523-ccu.h @@ -185,5 +185,6 @@ #define CLK_FANOUT0 176 #define CLK_FANOUT1 177 #define CLK_FANOUT2 178 +#define CLK_NPU 179 #endif /* _DT_BINDINGS_CLK_SUN55I_A523_CCU_H_ */ -- cgit v1.2.3 From 0f610e650d4e979490ccfa4c22ca29ca547f41e7 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Fri, 12 Sep 2025 01:47:05 +0800 Subject: dt-bindings: clock: sun55i-a523-ccu: Add A523 MCU CCU clock controller There are four clock controllers in the A523 SoC. The existing binding already covers two of them that are critical for basic operation. The remaining ones are the MCU clock controller and CPU PLL clock controller. Add a description for the MCU CCU. This unit controls and provides clocks to the MCU (RISC-V) subsystem and peripherals meant to operate under low power conditions. Reviewed-by: Rob Herring (Arm) Link: https://patch.msgid.link/20250911174710.3149589-3-wens@kernel.org Signed-off-by: Chen-Yu Tsai --- include/dt-bindings/clock/sun55i-a523-mcu-ccu.h | 54 +++++++++++++++++++++++++ include/dt-bindings/reset/sun55i-a523-mcu-ccu.h | 30 ++++++++++++++ 2 files changed, 84 insertions(+) create mode 100644 include/dt-bindings/clock/sun55i-a523-mcu-ccu.h create mode 100644 include/dt-bindings/reset/sun55i-a523-mcu-ccu.h (limited to 'include') diff --git a/include/dt-bindings/clock/sun55i-a523-mcu-ccu.h b/include/dt-bindings/clock/sun55i-a523-mcu-ccu.h new file mode 100644 index 000000000000..6efc6bc7e11a --- /dev/null +++ b/include/dt-bindings/clock/sun55i-a523-mcu-ccu.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR MIT) */ +/* + * Copyright (C) 2025 Chen-Yu Tsai + */ + +#ifndef _DT_BINDINGS_CLK_SUN55I_A523_MCU_CCU_H_ +#define _DT_BINDINGS_CLK_SUN55I_A523_MCU_CCU_H_ + +#define CLK_MCU_PLL_AUDIO1 0 +#define CLK_MCU_PLL_AUDIO1_DIV2 1 +#define CLK_MCU_PLL_AUDIO1_DIV5 2 +#define CLK_MCU_AUDIO_OUT 3 +#define CLK_MCU_DSP 4 +#define CLK_MCU_I2S0 5 +#define CLK_MCU_I2S1 6 +#define CLK_MCU_I2S2 7 +#define CLK_MCU_I2S3 8 +#define CLK_MCU_I2S3_ASRC 9 +#define CLK_BUS_MCU_I2S0 10 +#define CLK_BUS_MCU_I2S1 11 +#define CLK_BUS_MCU_I2S2 12 +#define CLK_BUS_MCU_I2S3 13 +#define CLK_MCU_SPDIF_TX 14 +#define CLK_MCU_SPDIF_RX 15 +#define CLK_BUS_MCU_SPDIF 16 +#define CLK_MCU_DMIC 17 +#define CLK_BUS_MCU_DMIC 18 +#define CLK_MCU_AUDIO_CODEC_DAC 19 +#define CLK_MCU_AUDIO_CODEC_ADC 20 +#define CLK_BUS_MCU_AUDIO_CODEC 21 +#define CLK_BUS_MCU_DSP_MSGBOX 22 +#define CLK_BUS_MCU_DSP_CFG 23 +#define CLK_BUS_MCU_NPU_HCLK 24 +#define CLK_BUS_MCU_NPU_ACLK 25 +#define CLK_MCU_TIMER0 26 +#define CLK_MCU_TIMER1 27 +#define CLK_MCU_TIMER2 28 +#define CLK_MCU_TIMER3 29 +#define CLK_MCU_TIMER4 30 +#define CLK_MCU_TIMER5 31 +#define CLK_BUS_MCU_TIMER 32 +#define CLK_BUS_MCU_DMA 33 +#define CLK_MCU_TZMA0 34 +#define CLK_MCU_TZMA1 35 +#define CLK_BUS_MCU_PUBSRAM 36 +#define CLK_MCU_MBUS_DMA 37 +#define CLK_MCU_MBUS 38 +#define CLK_MCU_RISCV 39 +#define CLK_BUS_MCU_RISCV_CFG 40 +#define CLK_BUS_MCU_RISCV_MSGBOX 41 +#define CLK_MCU_PWM0 42 +#define CLK_BUS_MCU_PWM0 43 + +#endif /* _DT_BINDINGS_CLK_SUN55I_A523_MCU_CCU_H_ */ diff --git a/include/dt-bindings/reset/sun55i-a523-mcu-ccu.h b/include/dt-bindings/reset/sun55i-a523-mcu-ccu.h new file mode 100644 index 000000000000..a89a0b44f08b --- /dev/null +++ b/include/dt-bindings/reset/sun55i-a523-mcu-ccu.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR MIT) */ +/* + * Copyright (C) 2025 Chen-Yu Tsai + */ + +#ifndef _DT_BINDINGS_RST_SUN55I_A523_MCU_CCU_H_ +#define _DT_BINDINGS_RST_SUN55I_A523_MCU_CCU_H_ + +#define RST_BUS_MCU_I2S0 0 +#define RST_BUS_MCU_I2S1 1 +#define RST_BUS_MCU_I2S2 2 +#define RST_BUS_MCU_I2S3 3 +#define RST_BUS_MCU_SPDIF 4 +#define RST_BUS_MCU_DMIC 5 +#define RST_BUS_MCU_AUDIO_CODEC 6 +#define RST_BUS_MCU_DSP_MSGBOX 7 +#define RST_BUS_MCU_DSP_CFG 8 +#define RST_BUS_MCU_NPU 9 +#define RST_BUS_MCU_TIMER 10 +#define RST_BUS_MCU_DSP_DEBUG 11 +#define RST_BUS_MCU_DSP 12 +#define RST_BUS_MCU_DMA 13 +#define RST_BUS_MCU_PUBSRAM 14 +#define RST_BUS_MCU_RISCV_CFG 15 +#define RST_BUS_MCU_RISCV_DEBUG 16 +#define RST_BUS_MCU_RISCV_CORE 17 +#define RST_BUS_MCU_RISCV_MSGBOX 18 +#define RST_BUS_MCU_PWM0 19 + +#endif /* _DT_BINDINGS_RST_SUN55I_A523_MCU_CCU_H_ */ -- cgit v1.2.3 From f15bc37d8c336e79491209b268e73868c44733c4 Mon Sep 17 00:00:00 2001 From: Antoniu Miclaus Date: Mon, 8 Sep 2025 07:35:21 +0000 Subject: iio: add IIO_ALTCURRENT channel type Add support for IIO_ALTCURRENT channel type to distinguish AC current measurements from DC current measurements. This follows the same pattern as IIO_VOLTAGE and IIO_ALTVOLTAGE. Reviewed-by: David Lechner Signed-off-by: Antoniu Miclaus Signed-off-by: Jonathan Cameron --- include/uapi/linux/iio/types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h index 3eb0821af7a4..3c3cc1497a1e 100644 --- a/include/uapi/linux/iio/types.h +++ b/include/uapi/linux/iio/types.h @@ -52,6 +52,7 @@ enum iio_chan_type { IIO_COLORTEMP, IIO_CHROMATICITY, IIO_ATTENTION, + IIO_ALTCURRENT, }; enum iio_modifier { -- cgit v1.2.3 From 70da02061499ca89ab92f7c4310f815d5fe674ec Mon Sep 17 00:00:00 2001 From: Antoniu Miclaus Date: Mon, 8 Sep 2025 07:35:22 +0000 Subject: iio: add power and energy measurement modifiers Add new IIO modifiers to support power and energy measurement devices: Power modifiers: - IIO_MOD_ACTIVE: Real power consumed by the load - IIO_MOD_REACTIVE: Power that oscillates between source and load - IIO_MOD_APPARENT: Magnitude of complex power Signal quality modifiers: - IIO_MOD_RMS: Root Mean Square value Additionally adds: - IIO_CHAN_INFO_POWERFACTOR: Power factor channel info type for representing the ratio of active power to apparent power These modifiers enable proper representation of power measurement devices like energy meters and power analyzers. Signed-off-by: Antoniu Miclaus Signed-off-by: Jonathan Cameron --- include/linux/iio/types.h | 1 + include/uapi/linux/iio/types.h | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h index ad2761efcc83..34eebad12d2c 100644 --- a/include/linux/iio/types.h +++ b/include/linux/iio/types.h @@ -70,6 +70,7 @@ enum iio_chan_info_enum { IIO_CHAN_INFO_ZEROPOINT, IIO_CHAN_INFO_TROUGH, IIO_CHAN_INFO_CONVDELAY, + IIO_CHAN_INFO_POWERFACTOR, }; #endif /* _IIO_TYPES_H_ */ diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h index 3c3cc1497a1e..6d269b844271 100644 --- a/include/uapi/linux/iio/types.h +++ b/include/uapi/linux/iio/types.h @@ -109,6 +109,10 @@ enum iio_modifier { IIO_MOD_ROLL, IIO_MOD_LIGHT_UVA, IIO_MOD_LIGHT_UVB, + IIO_MOD_RMS, + IIO_MOD_ACTIVE, + IIO_MOD_REACTIVE, + IIO_MOD_APPARENT, }; enum iio_event_type { -- cgit v1.2.3 From 3422b4bc606eee2ba7758ea9347c83332eeec3e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Duje=20Mihanovi=C4=87?= Date: Thu, 11 Sep 2025 14:43:45 +0200 Subject: iio: adc: Add driver for Marvell 88PM886 PMIC ADC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Marvell's 88PM886 PMIC has a so-called General Purpose ADC used for monitoring various system voltages and temperatures. Add the relevant register definitions to the MFD header and a driver for the ADC. Acked-by: Karel Balej # for the PMIC Signed-off-by: Duje Mihanović Signed-off-by: Jonathan Cameron --- include/linux/mfd/88pm886.h | 58 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/88pm886.h b/include/linux/mfd/88pm886.h index 85eca44f39ab..38892ba7b8a4 100644 --- a/include/linux/mfd/88pm886.h +++ b/include/linux/mfd/88pm886.h @@ -10,6 +10,7 @@ #define PM886_IRQ_ONKEY 0 #define PM886_PAGE_OFFSET_REGULATORS 1 +#define PM886_PAGE_OFFSET_GPADC 2 #define PM886_REG_ID 0x00 @@ -70,6 +71,63 @@ #define PM886_LDO_VSEL_MASK 0x0f #define PM886_BUCK_VSEL_MASK 0x7f +/* GPADC enable/disable registers */ +#define PM886_REG_GPADC_CONFIG(n) (n) + +#define PM886_GPADC_VSC_EN BIT(0) +#define PM886_GPADC_VBAT_EN BIT(1) +#define PM886_GPADC_GNDDET1_EN BIT(3) +#define PM886_GPADC_VBUS_EN BIT(4) +#define PM886_GPADC_VCHG_PWR_EN BIT(5) +#define PM886_GPADC_VCF_OUT_EN BIT(6) +#define PM886_GPADC_CONFIG1_EN_ALL \ + (PM886_GPADC_VSC_EN | \ + PM886_GPADC_VBAT_EN | \ + PM886_GPADC_GNDDET1_EN | \ + PM886_GPADC_VBUS_EN | \ + PM886_GPADC_VCHG_PWR_EN | \ + PM886_GPADC_VCF_OUT_EN) + +#define PM886_GPADC_TINT_EN BIT(0) +#define PM886_GPADC_PMODE_EN BIT(1) +#define PM886_GPADC_GPADC0_EN BIT(2) +#define PM886_GPADC_GPADC1_EN BIT(3) +#define PM886_GPADC_GPADC2_EN BIT(4) +#define PM886_GPADC_GPADC3_EN BIT(5) +#define PM886_GPADC_MIC_DET_EN BIT(6) +#define PM886_GPADC_CONFIG2_EN_ALL \ + (PM886_GPADC_TINT_EN | \ + PM886_GPADC_GPADC0_EN | \ + PM886_GPADC_GPADC1_EN | \ + PM886_GPADC_GPADC2_EN | \ + PM886_GPADC_GPADC3_EN | \ + PM886_GPADC_MIC_DET_EN) + +/* No CONFIG3_EN_ALL because this is the only bit there. */ +#define PM886_GPADC_GND_DET2_EN BIT(0) + +/* GPADC channel registers */ +#define PM886_REG_GPADC_VSC 0x40 +#define PM886_REG_GPADC_VCHG_PWR 0x4c +#define PM886_REG_GPADC_VCF_OUT 0x4e +#define PM886_REG_GPADC_TINT 0x50 +#define PM886_REG_GPADC_GPADC0 0x54 +#define PM886_REG_GPADC_GPADC1 0x56 +#define PM886_REG_GPADC_GPADC2 0x58 +#define PM886_REG_GPADC_VBAT 0xa0 +#define PM886_REG_GPADC_GND_DET1 0xa4 +#define PM886_REG_GPADC_GND_DET2 0xa6 +#define PM886_REG_GPADC_VBUS 0xa8 +#define PM886_REG_GPADC_GPADC3 0xaa +#define PM886_REG_GPADC_MIC_DET 0xac +#define PM886_REG_GPADC_VBAT_SLP 0xb0 + +/* VBAT_SLP is the last register and is 2 bytes wide like other channels. */ +#define PM886_GPADC_MAX_REGISTER (PM886_REG_GPADC_VBAT_SLP + 1) + +#define PM886_GPADC_BIAS_LEVELS 16 +#define PM886_GPADC_INDEX_TO_BIAS_uA(i) (1 + (i) * 5) + struct pm886_chip { struct i2c_client *client; unsigned int chip_id; -- cgit v1.2.3 From 1ab40529ad52f339975886a6a9e815dfdcb8d011 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Tue, 15 Jul 2025 21:52:54 +0300 Subject: media: uvcvideo: Move MSXU_CONTROL_METADATA definition to header Move the MSXU_CONTROL_METADATA control definitino to the include/linux/usb/uvc.h header, alongside the corresponding XU GUID. Add a UVC_ prefix to avoid namespace clashes. While at it, add the definition for the other controls for that extension unit, as defined in https://learn.microsoft.com/en-us/windows-hardware/drivers/stream/uvc-extensions-1-5#222-extension-unit-controls. Signed-off-by: Laurent Pinchart Reviewed-by: Ricardo Ribalda Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Hans Verkuil --- include/linux/usb/uvc.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/usb/uvc.h b/include/linux/usb/uvc.h index ee19e9f915b8..12a57e1d3467 100644 --- a/include/linux/usb/uvc.h +++ b/include/linux/usb/uvc.h @@ -33,6 +33,23 @@ {0xdc, 0x95, 0x3f, 0x0f, 0x32, 0x26, 0x4e, 0x4c, \ 0x92, 0xc9, 0xa0, 0x47, 0x82, 0xf4, 0x3b, 0xc8} +/* https://learn.microsoft.com/en-us/windows-hardware/drivers/stream/uvc-extensions-1-5#222-extension-unit-controls */ +#define UVC_MSXU_CONTROL_FOCUS 0x01 +#define UVC_MSXU_CONTROL_EXPOSURE 0x02 +#define UVC_MSXU_CONTROL_EVCOMPENSATION 0x03 +#define UVC_MSXU_CONTROL_WHITEBALANCE 0x04 +#define UVC_MSXU_CONTROL_FACE_AUTHENTICATION 0x06 +#define UVC_MSXU_CONTROL_CAMERA_EXTRINSICS 0x07 +#define UVC_MSXU_CONTROL_CAMERA_INTRINSICS 0x08 +#define UVC_MSXU_CONTROL_METADATA 0x09 +#define UVC_MSXU_CONTROL_IR_TORCH 0x0a +#define UVC_MSXU_CONTROL_DIGITALWINDOW 0x0b +#define UVC_MSXU_CONTROL_DIGITALWINDOW_CONFIG 0x0c +#define UVC_MSXU_CONTROL_VIDEO_HDR 0x0d +#define UVC_MSXU_CONTROL_FRAMERATE_THROTTLE 0x0e +#define UVC_MSXU_CONTROL_FIELDOFVIEW2_CONFIG 0x0f +#define UVC_MSXU_CONTROL_FIELDOFVIEW2 0x10 + #define UVC_GUID_FORMAT_MJPEG \ { 'M', 'J', 'P', 'G', 0x00, 0x00, 0x10, 0x00, \ 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} -- cgit v1.2.3 From 0f99b8bed426b8f5434b10c3f6f6b92d7ce3c467 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Mon, 18 Aug 2025 20:15:39 +0000 Subject: media: uvcvideo: Support UVC_CROSXU_CONTROL_IQ_PROFILE The ChromeOS XU provides a control to change the IQ profile for a camera. It can be switched from VIVID (a.k.a. standard) to NONE (a.k.a. natural). Wire it up to the standard v4l2 control. Signed-off-by: Ricardo Ribalda Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Reviewed-by: Laurent Pinchart Signed-off-by: Laurent Pinchart Signed-off-by: Hans Verkuil --- include/linux/usb/uvc.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/usb/uvc.h b/include/linux/usb/uvc.h index 12a57e1d3467..22e0dab0809e 100644 --- a/include/linux/usb/uvc.h +++ b/include/linux/usb/uvc.h @@ -29,6 +29,9 @@ #define UVC_GUID_EXT_GPIO_CONTROLLER \ {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x03} +#define UVC_GUID_CHROMEOS_XU \ + {0x24, 0xe9, 0xd7, 0x74, 0xc9, 0x49, 0x45, 0x4a, \ + 0x98, 0xa3, 0xc8, 0x07, 0x7e, 0x05, 0x1c, 0xa3} #define UVC_GUID_MSXU_1_5 \ {0xdc, 0x95, 0x3f, 0x0f, 0x32, 0x26, 0x4e, 0x4c, \ 0x92, 0xc9, 0xa0, 0x47, 0x82, 0xf4, 0x3b, 0xc8} @@ -50,6 +53,8 @@ #define UVC_MSXU_CONTROL_FIELDOFVIEW2_CONFIG 0x0f #define UVC_MSXU_CONTROL_FIELDOFVIEW2 0x10 +#define UVC_CROSXU_CONTROL_IQ_PROFILE 0x04 + #define UVC_GUID_FORMAT_MJPEG \ { 'M', 'J', 'P', 'G', 0x00, 0x00, 0x10, 0x00, \ 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} -- cgit v1.2.3 From f1880f9cc1476171bec3dae8fd56fff5665e22a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Grosjean?= Date: Fri, 12 Sep 2025 10:17:19 +0200 Subject: can: peak: Modification of references to email accounts being deleted MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the upcoming deletion of @peak-system.com accounts and following the acquisition of PEAK-System and its brand by HMS-Networks, this fix aims to migrate all address references to @hms-networks.com, as well as to map my personal committer addresses to author addresses, while taking the opportunity to correct the accent on the first ‘e’ of my first name. Signed-off-by: Stéphane Grosjean Link: https://patch.msgid.link/20250912081820.86314-1-stephane.grosjean@free.fr Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev/peak_canfd.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/can/dev/peak_canfd.h b/include/linux/can/dev/peak_canfd.h index f38772fd0c07..d3788a3d0942 100644 --- a/include/linux/can/dev/peak_canfd.h +++ b/include/linux/can/dev/peak_canfd.h @@ -2,8 +2,8 @@ /* * CAN driver for PEAK System micro-CAN based adapters * - * Copyright (C) 2003-2011 PEAK System-Technik GmbH - * Copyright (C) 2011-2013 Stephane Grosjean + * Copyright (C) 2003-2025 PEAK System-Technik GmbH + * Author: Stéphane Grosjean */ #ifndef PUCAN_H #define PUCAN_H -- cgit v1.2.3 From 6eb350a2233100a283f882c023e5ad426d0ed63b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Aug 2025 17:02:30 +0200 Subject: rseq: Protect event mask against membarrier IPI rseq_need_restart() reads and clears task::rseq_event_mask with preemption disabled to guard against the scheduler. But membarrier() uses an IPI and sets the PREEMPT bit in the event mask from the IPI, which leaves that RMW operation unprotected. Use guard(irq) if CONFIG_MEMBARRIER is enabled to fix that. Fixes: 2a36ab717e8f ("rseq/membarrier: Add MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ") Signed-off-by: Thomas Gleixner Reviewed-by: Boqun Feng Reviewed-by: Mathieu Desnoyers Cc: stable@vger.kernel.org --- include/linux/rseq.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/rseq.h b/include/linux/rseq.h index bc8af3eb5598..1fbeb61babeb 100644 --- a/include/linux/rseq.h +++ b/include/linux/rseq.h @@ -7,6 +7,12 @@ #include #include +#ifdef CONFIG_MEMBARRIER +# define RSEQ_EVENT_GUARD irq +#else +# define RSEQ_EVENT_GUARD preempt +#endif + /* * Map the event mask on the user-space ABI enum rseq_cs_flags * for direct mask checks. @@ -41,9 +47,8 @@ static inline void rseq_handle_notify_resume(struct ksignal *ksig, static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { - preempt_disable(); - __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask); - preempt_enable(); + scoped_guard(RSEQ_EVENT_GUARD) + __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask); rseq_handle_notify_resume(ksig, regs); } -- cgit v1.2.3 From 2da6de30e60dd9bb14600eff1cc99df2fa2ddae3 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 8 Sep 2025 15:23:15 -0700 Subject: mm: folio_may_be_lru_cached() unless folio_test_large() mm/swap.c and mm/mlock.c agree to drain any per-CPU batch as soon as a large folio is added: so collect_longterm_unpinnable_folios() just wastes effort when calling lru_add_drain[_all]() on a large folio. But although there is good reason not to batch up PMD-sized folios, we might well benefit from batching a small number of low-order mTHPs (though unclear how that "small number" limitation will be implemented). So ask if folio_may_be_lru_cached() rather than !folio_test_large(), to insulate those particular checks from future change. Name preferred to "folio_is_batchable" because large folios can well be put on a batch: it's just the per-CPU LRU caches, drained much later, which need care. Marked for stable, to counter the increase in lru_add_drain_all()s from "mm/gup: check ref_count instead of lru before migration". Link: https://lkml.kernel.org/r/57d2eaf8-3607-f318-e0c5-be02dce61ad0@google.com Fixes: 9a4e9f3b2d73 ("mm: update get_user_pages_longterm to migrate pages allocated from CMA region") Signed-off-by: Hugh Dickins Suggested-by: David Hildenbrand Acked-by: David Hildenbrand Cc: "Aneesh Kumar K.V" Cc: Axel Rasmussen Cc: Chris Li Cc: Christoph Hellwig Cc: Jason Gunthorpe Cc: Johannes Weiner Cc: John Hubbard Cc: Keir Fraser Cc: Konstantin Khlebnikov Cc: Li Zhe Cc: Matthew Wilcox (Oracle) Cc: Peter Xu Cc: Rik van Riel Cc: Shivank Garg Cc: Vlastimil Babka Cc: Wei Xu Cc: Will Deacon Cc: yangge Cc: Yuanchu Xie Cc: Yu Zhao Cc: Signed-off-by: Andrew Morton --- include/linux/swap.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index 2fe6ed2cc3fd..7012a0f758d8 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -385,6 +385,16 @@ void folio_add_lru_vma(struct folio *, struct vm_area_struct *); void mark_page_accessed(struct page *); void folio_mark_accessed(struct folio *); +static inline bool folio_may_be_lru_cached(struct folio *folio) +{ + /* + * Holding PMD-sized folios in per-CPU LRU cache unbalances accounting. + * Holding small numbers of low-order mTHP folios in per-CPU LRU cache + * will be sensible, but nobody has implemented and tested that yet. + */ + return !folio_test_large(folio); +} + extern atomic_t lru_disable_count; static inline bool lru_cache_disabled(void) -- cgit v1.2.3 From e6a0deb6fa5b0fc134ee2aa127d1cfc9456d8445 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 8 Sep 2025 13:15:12 -0700 Subject: mm/damon/core: introduce damon_call_control->dealloc_on_cancel Patch series "mm/damon/sysfs: fix refresh_ms control overwriting on multi-kdamonds usages". Automatic esssential DAMON/DAMOS status update feature of DAMON sysfs interface (refresh_ms) is broken [1] for multiple DAMON contexts (kdamonds) use case, since it uses a global single damon_call_control object for all created DAMON contexts. The fields of the object, particularly the list field is over-written for the contexts and it makes unexpected results including user-space hangup and kernel crashes [2]. Fix it by extending damon_call_control for the use case and updating the usage on DAMON sysfs interface to use per-context dynamically allocated damon_call_control object. This patch (of 2): When damon_call_control->repeat is set, damon_call() is executed asynchronously, and is eventually canceled when kdamond finishes. If the damon_call_control object is dynamically allocated, finding the place to deallocate the object is difficult. Introduce a new damon_call_control field, namely dealloc_on_cancel, to ask the kdamond deallocates those dynamically allocated objects when those are canceled. Link: https://lkml.kernel.org/r/20250908201513.60802-3-sj@kernel.org Link: https://lkml.kernel.org/r/20250908201513.60802-2-sj@kernel.org Fixes: d809a7c64ba8 ("mm/damon/sysfs: implement refresh_ms file internal work") Signed-off-by: SeongJae Park Cc: Yunjeong Mun Signed-off-by: Andrew Morton --- include/linux/damon.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/damon.h b/include/linux/damon.h index f13664c62ddd..9e62b2a85538 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -636,6 +636,7 @@ struct damon_operations { * @data: Data that will be passed to @fn. * @repeat: Repeat invocations. * @return_code: Return code from @fn invocation. + * @dealloc_on_cancel: De-allocate when canceled. * * Control damon_call(), which requests specific kdamond to invoke a given * function. Refer to damon_call() for more details. @@ -645,6 +646,7 @@ struct damon_call_control { void *data; bool repeat; int return_code; + bool dealloc_on_cancel; /* private: internal use only */ /* informs if the kdamond finished handling of the request */ struct completion completion; -- cgit v1.2.3 From 56b060d0a1d3b1fd0429daeac366f00c030fca59 Mon Sep 17 00:00:00 2001 From: Joshua Hahn Date: Tue, 5 Aug 2025 13:50:47 -0700 Subject: mempolicy: clarify what zone reclaim means The zone_reclaim_mode API controls the reclaim behavior when a node runs out of memory. Contrary to its user-facing name, it is internally referred to as "node_reclaim_mode". This can be confusing. But because we cannot change the name of the API since it has been in place since at least 2.6, let's try to be more explicit about what the behavior of this API is. Change the description to clarify what zone reclaim entails, and be explicit about the RECLAIM_ZONE bit, whose purpose has led to some confusion in the past already [1] [2]. While at it, also soften the warning about changing these bits. [joshua.hahnjy@gmail.com: remove the reference to the vm.zone_reclaim_mode sysctl as an ABI] Link: https://lkml.kernel.org/r/20250806134404.2000234-1-joshua.hahnjy@gmail.com Link: https://lkml.kernel.org/r/20250805205048.1518453-1-joshua.hahnjy@gmail.com Link: https://lore.kernel.org/linux-mm/1579005573-58923-1-git-send-email-alex.shi@linux.alibaba.com/ [1] Link: https://lore.kernel.org/linux-mm/20200626003459.D8E015CA@viggo.jf.intel.com/ [2] Signed-off-by: Joshua Hahn Acked-by: SeongJae Park Acked-by: David Hildenbrand Reviewed-by: Huang Ying Acked-by: Zi Yan Acked-by: Byungchul Park Cc: Alistair Popple Cc: Byungchul Park Cc: Gregory Price Cc: Mathew Brost Cc: Rakie Kim Signed-off-by: Andrew Morton --- include/uapi/linux/mempolicy.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h index 1f9bb10d1a47..8fbbe613611a 100644 --- a/include/uapi/linux/mempolicy.h +++ b/include/uapi/linux/mempolicy.h @@ -66,10 +66,16 @@ enum { #define MPOL_F_MORON (1 << 4) /* Migrate On protnone Reference On Node */ /* - * These bit locations are exposed in the vm.zone_reclaim_mode sysctl - * ABI. New bits are OK, but existing bits can never change. + * Enabling zone reclaim means the page allocator will attempt to fulfill + * the allocation request on the current node by triggering reclaim and + * trying to shrink the current node. + * Fallback allocations on the next candidates in the zonelist are considered + * when reclaim fails to free up enough memory in the current node/zone. + * + * These bit locations are exposed in the vm.zone_reclaim_mode sysctl. + * New bits are OK, but existing bits should not be changed. */ -#define RECLAIM_ZONE (1<<0) /* Run shrink_inactive_list on the zone */ +#define RECLAIM_ZONE (1<<0) /* Enable zone reclaim */ #define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */ #define RECLAIM_UNMAP (1<<2) /* Unmap pages during reclaim */ -- cgit v1.2.3 From 337135e6124b6d37d7ef1cd5a6c0b9681938c5ee Mon Sep 17 00:00:00 2001 From: Ruan Shiyang Date: Tue, 29 Jul 2025 11:51:01 +0800 Subject: mm: memory-tiering: fix PGPROMOTE_CANDIDATE counting Goto-san reported confusing pgpromote statistics where the pgpromote_success count significantly exceeded pgpromote_candidate. On a system with three nodes (nodes 0-1: DRAM 4GB, node 2: NVDIMM 4GB): # Enable demotion only echo 1 > /sys/kernel/mm/numa/demotion_enabled numactl -m 0-1 memhog -r200 3500M >/dev/null & pid=$! sleep 2 numactl memhog -r100 2500M >/dev/null & sleep 10 kill -9 $pid # terminate the 1st memhog # Enable promotion echo 2 > /proc/sys/kernel/numa_balancing After a few seconds, we observeed `pgpromote_candidate < pgpromote_success` $ grep -e pgpromote /proc/vmstat pgpromote_success 2579 pgpromote_candidate 0 In this scenario, after terminating the first memhog, the conditions for pgdat_free_space_enough() are quickly met, and triggers promotion. However, these migrated pages are only counted for in PGPROMOTE_SUCCESS, not in PGPROMOTE_CANDIDATE. To solve these confusing statistics, introduce PGPROMOTE_CANDIDATE_NRL to count the missed promotion pages. And also, not counting these pages into PGPROMOTE_CANDIDATE is to avoid changing the existing algorithm or performance of the promotion rate limit. Link: https://lkml.kernel.org/r/20250901090122.124262-1-ruansy.fnst@fujitsu.com Link: https://lkml.kernel.org/r/20250729035101.1601407-1-ruansy.fnst@fujitsu.com Fixes: c6833e10008f ("memory tiering: rate limit NUMA migration throughput") Co-developed-by: Li Zhijian Signed-off-by: Li Zhijian Signed-off-by: Ruan Shiyang Reported-by: Yasunori Gotou (Fujitsu) Suggested-by: Huang Ying Acked-by: Vlastimil Babka Reviewed-by: Huang Ying Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Juri Lelli Cc: Vincent Guittot Cc: Dietmar Eggemann Cc: Steven Rostedt Cc: Ben Segall Cc: Mel Gorman Cc: Valentin Schneider Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 0c5da9141983..9d3ea9085556 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -234,7 +234,21 @@ enum node_stat_item { #endif #ifdef CONFIG_NUMA_BALANCING PGPROMOTE_SUCCESS, /* promote successfully */ - PGPROMOTE_CANDIDATE, /* candidate pages to promote */ + /** + * Candidate pages for promotion based on hint fault latency. This + * counter is used to control the promotion rate and adjust the hot + * threshold. + */ + PGPROMOTE_CANDIDATE, + /** + * Not rate-limited (NRL) candidate pages for those can be promoted + * without considering hot threshold because of enough free pages in + * fast-tier node. These promotions bypass the regular hotness checks + * and do NOT influence the promotion rate-limiter or + * threshold-adjustment logic. + * This is for statistics/monitoring purposes. + */ + PGPROMOTE_CANDIDATE_NRL, #endif /* PGDEMOTE_*: pages demoted */ PGDEMOTE_KSWAPD, -- cgit v1.2.3 From 79e1c24285c40cdfa9eb00fe8131d1ba14b84ef1 Mon Sep 17 00:00:00 2001 From: Ye Liu Date: Fri, 18 Jul 2025 10:41:32 +0800 Subject: mm: replace (20 - PAGE_SHIFT) with common macros for pages<->MB conversion Replace repeated (20 - PAGE_SHIFT) calculations with standard macros: - MB_TO_PAGES(mb) converts MB to page count - PAGES_TO_MB(pages) converts pages to MB No functional change. [akpm@linux-foundation.org: remove arc's private PAGES_TO_MB, remove its unused PAGES_TO_KB] [akpm@linux-foundation.org: don't include mm.h due to include file ordering mess] Link: https://lkml.kernel.org/r/20250718024134.1304745-1-ye.liu@linux.dev Signed-off-by: Ye Liu Acked-by: Zi Yan Reviewed-by: Lorenzo Stoakes Reviewed-by: Dev Jain Acked-by: David Hildenbrand Acked-by: Chris Li Cc: Baolin Wang Cc: Baoquan He Cc: Barry Song Cc: Ben Segall Cc: Boqun Feng Cc: Davidlohr Bueso Cc: Dietmar Eggemann Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Josh Triplett Cc: Juri Lelli Cc: Kairui Song Cc: Kemeng Shi Cc: Lai jiangshan Cc: Liam Howlett Cc: Mariano Pache Cc: Mathieu Desnoyers Cc: Mel Gorman Cc: Michal Hocko Cc: Mike Rapoport Cc: Neeraj Upadhyay Cc: Nhat Pham Cc: "Paul E . McKenney" Cc: Peter Zijlstra Cc: Ryan Roberts Cc: Steven Rostedt Cc: Suren Baghdasaryan Cc: "Uladzislau Rezki (Sony)" Cc: Valentin Schneider Cc: Vincent Guittot Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/mm.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 1ae97a0b8ec7..b626d1bacef5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -69,6 +69,15 @@ static inline void totalram_pages_add(long count) extern void * high_memory; +/* + * Convert between pages and MB + * 20 is the shift for 1MB (2^20 = 1MB) + * PAGE_SHIFT is the shift for page size (e.g., 12 for 4KB pages) + * So (20 - PAGE_SHIFT) converts between pages and MB + */ +#define PAGES_TO_MB(pages) ((pages) >> (20 - PAGE_SHIFT)) +#define MB_TO_PAGES(mb) ((mb) << (20 - PAGE_SHIFT)) + #ifdef CONFIG_SYSCTL extern int sysctl_legacy_va_layout; #else -- cgit v1.2.3 From cc483b328881bbccb55265a86731384d5176fe85 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Mon, 4 Aug 2025 16:33:48 -0700 Subject: mm: limit the scope of vma_start_read() Limit the scope of vma_start_read() as it is used only as a helper for higher-level locking functions implemented inside mmap_lock.c and we are about to introduce more complex RCU rules for this function. The change is pure code refactoring and has no functional changes. Link: https://lkml.kernel.org/r/20250804233349.1278678-1-surenb@google.com Suggested-by: Vlastimil Babka Signed-off-by: Suren Baghdasaryan Reviewed-by: Lorenzo Stoakes Reviewed-by: Vlastimil Babka Cc: Jann Horn Cc: Liam Howlett Signed-off-by: Andrew Morton --- include/linux/mmap_lock.h | 85 ----------------------------------------------- 1 file changed, 85 deletions(-) (limited to 'include') diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index 11a078de9150..2c9fffa58714 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -147,91 +147,6 @@ static inline void vma_refcount_put(struct vm_area_struct *vma) } } -/* - * Try to read-lock a vma. The function is allowed to occasionally yield false - * locked result to avoid performance overhead, in which case we fall back to - * using mmap_lock. The function should never yield false unlocked result. - * False locked result is possible if mm_lock_seq overflows or if vma gets - * reused and attached to a different mm before we lock it. - * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got - * detached. - * - * WARNING! The vma passed to this function cannot be used if the function - * fails to lock it because in certain cases RCU lock is dropped and then - * reacquired. Once RCU lock is dropped the vma can be concurently freed. - */ -static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm, - struct vm_area_struct *vma) -{ - int oldcnt; - - /* - * Check before locking. A race might cause false locked result. - * We can use READ_ONCE() for the mm_lock_seq here, and don't need - * ACQUIRE semantics, because this is just a lockless check whose result - * we don't rely on for anything - the mm_lock_seq read against which we - * need ordering is below. - */ - if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(mm->mm_lock_seq.sequence)) - return NULL; - - /* - * If VMA_LOCK_OFFSET is set, __refcount_inc_not_zero_limited_acquire() - * will fail because VMA_REF_LIMIT is less than VMA_LOCK_OFFSET. - * Acquire fence is required here to avoid reordering against later - * vm_lock_seq check and checks inside lock_vma_under_rcu(). - */ - if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt, - VMA_REF_LIMIT))) { - /* return EAGAIN if vma got detached from under us */ - return oldcnt ? NULL : ERR_PTR(-EAGAIN); - } - - rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_); - - /* - * If vma got attached to another mm from under us, that mm is not - * stable and can be freed in the narrow window after vma->vm_refcnt - * is dropped and before rcuwait_wake_up(mm) is called. Grab it before - * releasing vma->vm_refcnt. - */ - if (unlikely(vma->vm_mm != mm)) { - /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */ - struct mm_struct *other_mm = vma->vm_mm; - - /* - * __mmdrop() is a heavy operation and we don't need RCU - * protection here. Release RCU lock during these operations. - * We reinstate the RCU read lock as the caller expects it to - * be held when this function returns even on error. - */ - rcu_read_unlock(); - mmgrab(other_mm); - vma_refcount_put(vma); - mmdrop(other_mm); - rcu_read_lock(); - return NULL; - } - - /* - * Overflow of vm_lock_seq/mm_lock_seq might produce false locked result. - * False unlocked result is impossible because we modify and check - * vma->vm_lock_seq under vma->vm_refcnt protection and mm->mm_lock_seq - * modification invalidates all existing locks. - * - * We must use ACQUIRE semantics for the mm_lock_seq so that if we are - * racing with vma_end_write_all(), we only start reading from the VMA - * after it has been unlocked. - * This pairs with RELEASE semantics in vma_end_write_all(). - */ - if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&mm->mm_lock_seq))) { - vma_refcount_put(vma); - return NULL; - } - - return vma; -} - /* * Use only while holding mmap read lock which guarantees that locking will not * fail (nobody can concurrently write-lock the vma). vma_start_read() should -- cgit v1.2.3 From 913fff314547c1922002e655bb25199ee38e8825 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Thu, 7 Aug 2025 00:17:47 +0800 Subject: mm, swap: remove fragment clusters counter It was used for calculating the iteration number when the swap allocator wants to scan the whole fragment list. Now the allocator only scans one fragment cluster at a time, so no one uses this counter anymore. Remove it as a cleanup; the performance change is marginal: Build linux kernel using 10G ZRAM, make -j96, defconfig with 2G cgroup memory limit, on top of tmpfs, 64kB mTHP enabled: Before: sys time: 6278.45s After: sys time: 6176.34s Change to 8G ZRAM: Before: sys time: 5572.85s After: sys time: 5531.49s Link: https://lkml.kernel.org/r/20250806161748.76651-3-ryncsn@gmail.com Signed-off-by: Kairui Song Reviewed-by: Nhat Pham Acked-by: Chris Li Cc: Baoquan He Cc: Barry Song Cc: "Huang, Ying" Cc: Kemeng Shi Signed-off-by: Andrew Morton --- include/linux/swap.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index 2fe6ed2cc3fd..a060d102e0d1 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -310,7 +310,6 @@ struct swap_info_struct { /* list of cluster that contains at least one free slot */ struct list_head frag_clusters[SWAP_NR_ORDERS]; /* list of cluster that are fragmented or contented */ - atomic_long_t frag_cluster_nr[SWAP_NR_ORDERS]; unsigned int pages; /* total of usable pages of swap */ atomic_long_t inuse_pages; /* number of those currently in use */ struct swap_sequential_cluster *global_cluster; /* Use one global cluster for rotating device */ -- cgit v1.2.3 From 61dc4358d37ae0be3220a0fa32cf7f0ccd4f7636 Mon Sep 17 00:00:00 2001 From: "Adrian Huang (Lenovo)" Date: Wed, 6 Aug 2025 22:59:06 +0800 Subject: mm: correct misleading comment on mmap_lock field in mm_struct The comment previously described the offset of mmap_lock as 0x120 (hex), which is misleading. The correct offset is 56 bytes (decimal) from the last cache line boundary. Using '0x120' could confuse readers trying to understand why the count and owner fields reside in separate cachelines. This change also removes an unnecessary space for improved formatting. Link: https://lkml.kernel.org/r/20250806145906.24647-1-adrianhuang0701@gmail.com Signed-off-by: Adrian Huang (Lenovo) Reviewed-by: Lorenzo Stoakes Acked-by: David Hildenbrand Cc: Liam Howlett Cc: Michal Hocko Cc: Mike Rapoport Cc: Suren Baghdasaryan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 08bc2442db93..3ed763e7ec6f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1026,10 +1026,10 @@ struct mm_struct { * counters */ /* - * With some kernel config, the current mmap_lock's offset - * inside 'mm_struct' is at 0x120, which is very optimal, as + * Typically the current mmap_lock's offset is 56 bytes from + * the last cacheline boundary, which is very optimal, as * its two hot fields 'count' and 'owner' sit in 2 different - * cachelines, and when mmap_lock is highly contended, both + * cachelines, and when mmap_lock is highly contended, both * of the 2 fields will be accessed frequently, current layout * will help to reduce cache bouncing. * -- cgit v1.2.3 From 4c5d3365882dbbc0784688784904f440d7a4c0f1 Mon Sep 17 00:00:00 2001 From: Vitaly Wool Date: Wed, 6 Aug 2025 14:41:08 +0200 Subject: mm/vmalloc: allow to set node and align in vrealloc Patch series "support large align and nid in Rust allocators", v15. The series provides the ability for Rust allocators to set NUMA node and large alignment. This patch (of 4): Reimplement vrealloc() to be able to set node and alignment should a user need to do so. Rename the function to vrealloc_node_align() to better match what it actually does now and introduce macros for vrealloc() and friends for backward compatibility. With that change we also provide the ability for the Rust part of the kernel to set node and alignment in its allocations. Link: https://lkml.kernel.org/r/20250806124034.1724515-1-vitaly.wool@konsulko.se Link: https://lkml.kernel.org/r/20250806124108.1724561-1-vitaly.wool@konsulko.se Signed-off-by: Vitaly Wool Reviewed-by: Uladzislau Rezki (Sony) Reviewed-by: Vlastimil Babka Cc: Alice Ryhl Cc: Danilo Krummrich Cc: Herbert Xu Cc: Jann Horn Cc: Kent Overstreet Cc: Liam Howlett Cc: Lorenzo Stoakes Signed-off-by: Andrew Morton --- include/linux/vmalloc.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 2759dac6be44..eb54b7b3202f 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -197,9 +197,15 @@ extern void *__vcalloc_noprof(size_t n, size_t size, gfp_t flags) __alloc_size(1 extern void *vcalloc_noprof(size_t n, size_t size) __alloc_size(1, 2); #define vcalloc(...) alloc_hooks(vcalloc_noprof(__VA_ARGS__)) -void * __must_check vrealloc_noprof(const void *p, size_t size, gfp_t flags) - __realloc_size(2); -#define vrealloc(...) alloc_hooks(vrealloc_noprof(__VA_ARGS__)) +void *__must_check vrealloc_node_align_noprof(const void *p, size_t size, + unsigned long align, gfp_t flags, int nid) __realloc_size(2); +#define vrealloc_node_noprof(_p, _s, _f, _nid) \ + vrealloc_node_align_noprof(_p, _s, 1, _f, _nid) +#define vrealloc_noprof(_p, _s, _f) \ + vrealloc_node_align_noprof(_p, _s, 1, _f, NUMA_NO_NODE) +#define vrealloc_node_align(...) alloc_hooks(vrealloc_node_align_noprof(__VA_ARGS__)) +#define vrealloc_node(...) alloc_hooks(vrealloc_node_noprof(__VA_ARGS__)) +#define vrealloc(...) alloc_hooks(vrealloc_noprof(__VA_ARGS__)) extern void vfree(const void *addr); extern void vfree_atomic(const void *addr); -- cgit v1.2.3 From 2cd8231796b5e7133b1c3d66ad7d2a3c42c97258 Mon Sep 17 00:00:00 2001 From: Vitaly Wool Date: Wed, 6 Aug 2025 14:41:47 +0200 Subject: mm/slub: allow to set node and align in k[v]realloc Reimplement k[v]realloc_node() to be able to set node and alignment should a user need to do so. In order to do that while retaining the maximal backward compatibility, add k[v]realloc_node_align() functions and redefine the rest of API using these new ones. While doing that, we also keep the number of _noprof variants to a minimum, which implies some changes to the existing users of older _noprof functions, that basically being bcachefs. With that change we also provide the ability for the Rust part of the kernel to set node and alignment in its K[v]xxx [re]allocations. Link: https://lkml.kernel.org/r/20250806124147.1724658-1-vitaly.wool@konsulko.se Signed-off-by: Vitaly Wool Reviewed-by: Vlastimil Babka Cc: Alice Ryhl Cc: Danilo Krummrich Cc: Herbert Xu Cc: Jann Horn Cc: Kent Overstreet Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Uladzislau Rezki (Sony) Signed-off-by: Andrew Morton --- include/linux/bpfptr.h | 2 +- include/linux/slab.h | 39 ++++++++++++++++++++++++--------------- 2 files changed, 25 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/bpfptr.h b/include/linux/bpfptr.h index 1af241525a17..f6e0795db484 100644 --- a/include/linux/bpfptr.h +++ b/include/linux/bpfptr.h @@ -67,7 +67,7 @@ static inline int copy_to_bpfptr_offset(bpfptr_t dst, size_t offset, static inline void *kvmemdup_bpfptr_noprof(bpfptr_t src, size_t len) { - void *p = kvmalloc_noprof(len, GFP_USER | __GFP_NOWARN); + void *p = kvmalloc_node_align_noprof(len, 1, GFP_USER | __GFP_NOWARN, NUMA_NO_NODE); if (!p) return ERR_PTR(-ENOMEM); diff --git a/include/linux/slab.h b/include/linux/slab.h index d5a8ab98035c..6dc300bac2a1 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -465,9 +465,13 @@ int kmem_cache_shrink(struct kmem_cache *s); /* * Common kmalloc functions provided by all allocators */ -void * __must_check krealloc_noprof(const void *objp, size_t new_size, - gfp_t flags) __realloc_size(2); -#define krealloc(...) alloc_hooks(krealloc_noprof(__VA_ARGS__)) +void * __must_check krealloc_node_align_noprof(const void *objp, size_t new_size, + unsigned long align, + gfp_t flags, int nid) __realloc_size(2); +#define krealloc_noprof(_o, _s, _f) krealloc_node_align_noprof(_o, _s, 1, _f, NUMA_NO_NODE) +#define krealloc_node_align(...) alloc_hooks(krealloc_node_align_noprof(__VA_ARGS__)) +#define krealloc_node(_o, _s, _f, _n) krealloc_node_align(_o, _s, 1, _f, _n) +#define krealloc(...) krealloc_node(__VA_ARGS__, NUMA_NO_NODE) void kfree(const void *objp); void kfree_sensitive(const void *objp); @@ -1041,18 +1045,20 @@ static inline __alloc_size(1) void *kzalloc_noprof(size_t size, gfp_t flags) #define kzalloc(...) alloc_hooks(kzalloc_noprof(__VA_ARGS__)) #define kzalloc_node(_size, _flags, _node) kmalloc_node(_size, (_flags)|__GFP_ZERO, _node) -void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) __alloc_size(1); -#define kvmalloc_node_noprof(size, flags, node) \ - __kvmalloc_node_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node) -#define kvmalloc_node(...) alloc_hooks(kvmalloc_node_noprof(__VA_ARGS__)) - -#define kvmalloc(_size, _flags) kvmalloc_node(_size, _flags, NUMA_NO_NODE) -#define kvmalloc_noprof(_size, _flags) kvmalloc_node_noprof(_size, _flags, NUMA_NO_NODE) +void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), unsigned long align, + gfp_t flags, int node) __alloc_size(1); +#define kvmalloc_node_align_noprof(_size, _align, _flags, _node) \ + __kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, NULL), _align, _flags, _node) +#define kvmalloc_node_align(...) \ + alloc_hooks(kvmalloc_node_align_noprof(__VA_ARGS__)) +#define kvmalloc_node(_s, _f, _n) kvmalloc_node_align(_s, 1, _f, _n) +#define kvmalloc(...) kvmalloc_node(__VA_ARGS__, NUMA_NO_NODE) #define kvzalloc(_size, _flags) kvmalloc(_size, (_flags)|__GFP_ZERO) #define kvzalloc_node(_size, _flags, _node) kvmalloc_node(_size, (_flags)|__GFP_ZERO, _node) + #define kmem_buckets_valloc(_b, _size, _flags) \ - alloc_hooks(__kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE)) + alloc_hooks(__kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), 1, _flags, NUMA_NO_NODE)) static inline __alloc_size(1, 2) void * kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node) @@ -1062,7 +1068,7 @@ kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node) if (unlikely(check_mul_overflow(n, size, &bytes))) return NULL; - return kvmalloc_node_noprof(bytes, flags, node); + return kvmalloc_node_align_noprof(bytes, 1, flags, node); } #define kvmalloc_array_noprof(...) kvmalloc_array_node_noprof(__VA_ARGS__, NUMA_NO_NODE) @@ -1073,9 +1079,12 @@ kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node) #define kvcalloc_node(...) alloc_hooks(kvcalloc_node_noprof(__VA_ARGS__)) #define kvcalloc(...) alloc_hooks(kvcalloc_noprof(__VA_ARGS__)) -void *kvrealloc_noprof(const void *p, size_t size, gfp_t flags) - __realloc_size(2); -#define kvrealloc(...) alloc_hooks(kvrealloc_noprof(__VA_ARGS__)) +void *kvrealloc_node_align_noprof(const void *p, size_t size, unsigned long align, + gfp_t flags, int nid) __realloc_size(2); +#define kvrealloc_node_align(...) \ + alloc_hooks(kvrealloc_node_align_noprof(__VA_ARGS__)) +#define kvrealloc_node(_p, _s, _f, _n) kvrealloc_node_align(_p, _s, 1, _f, _n) +#define kvrealloc(...) kvrealloc_node(__VA_ARGS__, NUMA_NO_NODE) extern void kvfree(const void *addr); DEFINE_FREE(kvfree, void *, if (!IS_ERR_OR_NULL(_T)) kvfree(_T)) -- cgit v1.2.3 From e6d7d3502e00ed6f86e03dcdb282cb7785e55448 Mon Sep 17 00:00:00 2001 From: Sang-Heon Jeon Date: Tue, 5 Aug 2025 21:39:40 +0900 Subject: mm/damon: update expired description of damos_action Nowadays, damos operation actions support a greater operation set. But comments (also, generated documentation) weren't updated. So fix the comments with current support status. Link: https://lkml.kernel.org/r/20250805123940.13691-1-ekffu200098@gmail.com Signed-off-by: Sang-Heon Jeon Reviewed-by: SeongJae Park Cc: Honggyu Kim Signed-off-by: Andrew Morton --- include/linux/damon.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/damon.h b/include/linux/damon.h index f13664c62ddd..d01bfee80bd6 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -110,7 +110,7 @@ struct damon_target { * * @DAMOS_WILLNEED: Call ``madvise()`` for the region with MADV_WILLNEED. * @DAMOS_COLD: Call ``madvise()`` for the region with MADV_COLD. - * @DAMOS_PAGEOUT: Call ``madvise()`` for the region with MADV_PAGEOUT. + * @DAMOS_PAGEOUT: Reclaim the region. * @DAMOS_HUGEPAGE: Call ``madvise()`` for the region with MADV_HUGEPAGE. * @DAMOS_NOHUGEPAGE: Call ``madvise()`` for the region with MADV_NOHUGEPAGE. * @DAMOS_LRU_PRIO: Prioritize the region on its LRU lists. @@ -121,10 +121,10 @@ struct damon_target { * @NR_DAMOS_ACTIONS: Total number of DAMOS actions * * The support of each action is up to running &struct damon_operations. - * &enum DAMON_OPS_VADDR and &enum DAMON_OPS_FVADDR supports all actions except - * &enum DAMOS_LRU_PRIO and &enum DAMOS_LRU_DEPRIO. &enum DAMON_OPS_PADDR - * supports only &enum DAMOS_PAGEOUT, &enum DAMOS_LRU_PRIO, &enum - * DAMOS_LRU_DEPRIO, and &DAMOS_STAT. + * Refer to 'Operation Action' section of Documentation/mm/damon/design.rst for + * status of the supports. + * + * Note that DAMOS_PAGEOUT doesn't trigger demotions. */ enum damos_action { DAMOS_WILLNEED, -- cgit v1.2.3 From 95c2908f1a4fd608b1cdbb5acef3572e5d769e1c Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 11 Aug 2025 16:39:47 +0200 Subject: mm/migrate: remove MIGRATEPAGE_UNMAP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit migrate_folio_unmap() is the only user of MIGRATEPAGE_UNMAP. We want to remove MIGRATEPAGE_* completely. It's rather weird to have a generic MIGRATEPAGE_UNMAP, documented to be returned from address-space callbacks, when it's only used for an internal helper. Let's start by having only a single "success" return value for migrate_folio_unmap() -- 0 -- by moving the "folio was already freed" check into the single caller. There is a remaining comment for PG_isolated, which we renamed to PG_movable_ops_isolated recently and forgot to update. While we might still run into that case with zsmalloc, it's something we want to get rid of soon. So let's just focus that optimization on real folios only for now by excluding movable_ops pages. Note that concurrent freeing can happen at any time and this "already freed" check is not relevant for correctness. [david@redhat.com: no need to pass "reason" to migrate_folio_unmap(), per Lance] Link: https://lkml.kernel.org/r/3bb725f8-28d7-4aa2-b75f-af40d5cab280@redhat.com Link: https://lkml.kernel.org/r/20250811143949.1117439-2-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Zi Yan Reviewed-by: Lance Yang Cc: Alistair Popple Cc: Al Viro Cc: Arnd Bergmann Cc: Benjamin LaHaise Cc: Byungchul Park Cc: Chris Mason Cc: Christian Brauner Cc: Christophe Leroy Cc: Dave Kleikamp Cc: David Sterba Cc: Eugenio Pé rez Cc: Greg Kroah-Hartman Cc: Gregory Price Cc: "Huang, Ying" Cc: Jan Kara Cc: Jason Wang Cc: Jerrin Shaji George Cc: Josef Bacik Cc: Joshua Hahn Cc: Madhavan Srinivasan Cc: Mathew Brost Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Minchan Kim Cc: Muchun Song Cc: Nicholas Piggin Cc: Oscar Salvador Cc: Rakie Kim Cc: Sergey Senozhatsky Cc: Xuan Zhuo Cc: Dave Kleikamp Signed-off-by: Andrew Morton --- include/linux/migrate.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 9009e27b5f44..302f3e95faea 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -18,7 +18,6 @@ struct migration_target_control; * - zero on page migration success; */ #define MIGRATEPAGE_SUCCESS 0 -#define MIGRATEPAGE_UNMAP 1 /** * struct movable_operations - Driver page migration -- cgit v1.2.3 From fb49a4425cfa163faccd91f913773d3401d3a7d4 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 11 Aug 2025 16:39:48 +0200 Subject: treewide: remove MIGRATEPAGE_SUCCESS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At this point MIGRATEPAGE_SUCCESS is misnamed for all folio users, and now that we remove MIGRATEPAGE_UNMAP, it's really the only "success" return value that the code uses and expects. Let's just get rid of MIGRATEPAGE_SUCCESS completely and just use "0" for success. Link: https://lkml.kernel.org/r/20250811143949.1117439-3-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Zi Yan [mm] Acked-by: Dave Kleikamp [jfs] Acked-by: David Sterba [btrfs] Acked-by: Greg Kroah-Hartman Reviewed-by: Byungchul Park Cc: Alistair Popple Cc: Al Viro Cc: Arnd Bergmann Cc: Benjamin LaHaise Cc: Chris Mason Cc: Christian Brauner Cc: Christophe Leroy Cc: Dave Kleikamp Cc: Eugenio Pé rez Cc: Gregory Price Cc: "Huang, Ying" Cc: Jan Kara Cc: Jason Wang Cc: Jerrin Shaji George Cc: Josef Bacik Cc: Joshua Hahn Cc: Madhavan Srinivasan Cc: Mathew Brost Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Minchan Kim Cc: Muchun Song Cc: Nicholas Piggin Cc: Oscar Salvador Cc: Rakie Kim Cc: Sergey Senozhatsky Cc: Xuan Zhuo Cc: Lance Yang Signed-off-by: Andrew Morton --- include/linux/migrate.h | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 302f3e95faea..1f0ac122c3bf 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -12,13 +12,6 @@ typedef void free_folio_t(struct folio *folio, unsigned long private); struct migration_target_control; -/* - * Return values from addresss_space_operations.migratepage(): - * - negative errno on page migration failure; - * - zero on page migration success; - */ -#define MIGRATEPAGE_SUCCESS 0 - /** * struct movable_operations - Driver page migration * @isolate_page: @@ -34,8 +27,7 @@ struct migration_target_control; * @src page. The driver should copy the contents of the * @src page to the @dst page and set up the fields of @dst page. * Both pages are locked. - * If page migration is successful, the driver should - * return MIGRATEPAGE_SUCCESS. + * If page migration is successful, the driver should return 0. * If the driver cannot migrate the page at the moment, it can return * -EAGAIN. The VM interprets this as a temporary migration failure and * will retry it later. Any other error value is a permanent migration -- cgit v1.2.3 From b22cc9a9c7ff0ad8998d58fdd7122de6038c46a7 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 11 Aug 2025 13:26:27 +0200 Subject: mm/rmap: convert "enum rmap_level" to "enum pgtable_level" Let's factor it out, and convert all checks for unsupported levels to BUILD_BUG(). The code is written in a way such that force-inlining will optimize out the levels. [nathan@kernel.org: always inline __folio_rmap_sanity_checks()] Link: https://lkml.kernel.org/r/20250814-rmap-fix-build_bug-conversion-v1-1-fb7b10a0b362@kernel.org Link: https://lkml.kernel.org/r/20250811112631.759341-8-david@redhat.com Signed-off-by: David Hildenbrand Signed-off-by: Nathan Chancellor Reviewed-by: Lorenzo Stoakes Cc: Alistair Popple Cc: Al Viro Cc: Baolin Wang Cc: Barry Song Cc: Christian Brauner Cc: Christophe Leroy Cc: Dan Williams Cc: David Vrabel Cc: Dev Jain Cc: Hugh Dickins Cc: Jan Kara Cc: Jann Horn Cc: Juegren Gross Cc: Lance Yang Cc: Liam Howlett Cc: Madhavan Srinivasan Cc: Mariano Pache Cc: Matthew Wilcox (Oracle) Cc: Michael Ellerman Cc: Michal Hocko Cc: Mike Rapoport Cc: Nicholas Piggin Cc: Oleksandr Tyshchenko Cc: Oscar Salvador Cc: Ryan Roberts Cc: Stefano Stabellini Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Wei Yang Cc: Zi Yan Cc: Nathan Chancellor Signed-off-by: Andrew Morton --- include/linux/pgtable.h | 8 +++++++ include/linux/rmap.h | 62 ++++++++++++++++++++++--------------------------- 2 files changed, 36 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 2b80fd456c8b..4f88e460eb9c 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1975,6 +1975,14 @@ static inline bool arch_has_pfn_modify_check(void) /* Page-Table Modification Mask */ typedef unsigned int pgtbl_mod_mask; +enum pgtable_level { + PGTABLE_LEVEL_PTE = 0, + PGTABLE_LEVEL_PMD, + PGTABLE_LEVEL_PUD, + PGTABLE_LEVEL_P4D, + PGTABLE_LEVEL_PGD, +}; + #endif /* !__ASSEMBLY__ */ #if !defined(MAX_POSSIBLE_PHYSMEM_BITS) && !defined(CONFIG_64BIT) diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 6cd020eea37a..e8aff6d2deda 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -394,18 +394,8 @@ typedef int __bitwise rmap_t; /* The anonymous (sub)page is exclusive to a single process. */ #define RMAP_EXCLUSIVE ((__force rmap_t)BIT(0)) -/* - * Internally, we're using an enum to specify the granularity. We make the - * compiler emit specialized code for each granularity. - */ -enum rmap_level { - RMAP_LEVEL_PTE = 0, - RMAP_LEVEL_PMD, - RMAP_LEVEL_PUD, -}; - -static inline void __folio_rmap_sanity_checks(const struct folio *folio, - const struct page *page, int nr_pages, enum rmap_level level) +static __always_inline void __folio_rmap_sanity_checks(const struct folio *folio, + const struct page *page, int nr_pages, enum pgtable_level level) { /* hugetlb folios are handled separately. */ VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio); @@ -427,18 +417,18 @@ static inline void __folio_rmap_sanity_checks(const struct folio *folio, VM_WARN_ON_FOLIO(page_folio(page + nr_pages - 1) != folio, folio); switch (level) { - case RMAP_LEVEL_PTE: + case PGTABLE_LEVEL_PTE: break; - case RMAP_LEVEL_PMD: + case PGTABLE_LEVEL_PMD: /* * We don't support folios larger than a single PMD yet. So - * when RMAP_LEVEL_PMD is set, we assume that we are creating + * when PGTABLE_LEVEL_PMD is set, we assume that we are creating * a single "entire" mapping of the folio. */ VM_WARN_ON_FOLIO(folio_nr_pages(folio) != HPAGE_PMD_NR, folio); VM_WARN_ON_FOLIO(nr_pages != HPAGE_PMD_NR, folio); break; - case RMAP_LEVEL_PUD: + case PGTABLE_LEVEL_PUD: /* * Assume that we are creating a single "entire" mapping of the * folio. @@ -447,7 +437,7 @@ static inline void __folio_rmap_sanity_checks(const struct folio *folio, VM_WARN_ON_FOLIO(nr_pages != HPAGE_PUD_NR, folio); break; default: - VM_WARN_ON_ONCE(true); + BUILD_BUG(); } /* @@ -567,14 +557,14 @@ static inline void hugetlb_remove_rmap(struct folio *folio) static __always_inline void __folio_dup_file_rmap(struct folio *folio, struct page *page, int nr_pages, struct vm_area_struct *dst_vma, - enum rmap_level level) + enum pgtable_level level) { const int orig_nr_pages = nr_pages; __folio_rmap_sanity_checks(folio, page, nr_pages, level); switch (level) { - case RMAP_LEVEL_PTE: + case PGTABLE_LEVEL_PTE: if (!folio_test_large(folio)) { atomic_inc(&folio->_mapcount); break; @@ -587,11 +577,13 @@ static __always_inline void __folio_dup_file_rmap(struct folio *folio, } folio_add_large_mapcount(folio, orig_nr_pages, dst_vma); break; - case RMAP_LEVEL_PMD: - case RMAP_LEVEL_PUD: + case PGTABLE_LEVEL_PMD: + case PGTABLE_LEVEL_PUD: atomic_inc(&folio->_entire_mapcount); folio_inc_large_mapcount(folio, dst_vma); break; + default: + BUILD_BUG(); } } @@ -609,13 +601,13 @@ static __always_inline void __folio_dup_file_rmap(struct folio *folio, static inline void folio_dup_file_rmap_ptes(struct folio *folio, struct page *page, int nr_pages, struct vm_area_struct *dst_vma) { - __folio_dup_file_rmap(folio, page, nr_pages, dst_vma, RMAP_LEVEL_PTE); + __folio_dup_file_rmap(folio, page, nr_pages, dst_vma, PGTABLE_LEVEL_PTE); } static __always_inline void folio_dup_file_rmap_pte(struct folio *folio, struct page *page, struct vm_area_struct *dst_vma) { - __folio_dup_file_rmap(folio, page, 1, dst_vma, RMAP_LEVEL_PTE); + __folio_dup_file_rmap(folio, page, 1, dst_vma, PGTABLE_LEVEL_PTE); } /** @@ -632,7 +624,7 @@ static inline void folio_dup_file_rmap_pmd(struct folio *folio, struct page *page, struct vm_area_struct *dst_vma) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - __folio_dup_file_rmap(folio, page, HPAGE_PMD_NR, dst_vma, RMAP_LEVEL_PTE); + __folio_dup_file_rmap(folio, page, HPAGE_PMD_NR, dst_vma, PGTABLE_LEVEL_PTE); #else WARN_ON_ONCE(true); #endif @@ -640,7 +632,7 @@ static inline void folio_dup_file_rmap_pmd(struct folio *folio, static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, struct page *page, int nr_pages, struct vm_area_struct *dst_vma, - struct vm_area_struct *src_vma, enum rmap_level level) + struct vm_area_struct *src_vma, enum pgtable_level level) { const int orig_nr_pages = nr_pages; bool maybe_pinned; @@ -665,7 +657,7 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, * copying if the folio maybe pinned. */ switch (level) { - case RMAP_LEVEL_PTE: + case PGTABLE_LEVEL_PTE: if (unlikely(maybe_pinned)) { for (i = 0; i < nr_pages; i++) if (PageAnonExclusive(page + i)) @@ -687,8 +679,8 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, } while (page++, --nr_pages > 0); folio_add_large_mapcount(folio, orig_nr_pages, dst_vma); break; - case RMAP_LEVEL_PMD: - case RMAP_LEVEL_PUD: + case PGTABLE_LEVEL_PMD: + case PGTABLE_LEVEL_PUD: if (PageAnonExclusive(page)) { if (unlikely(maybe_pinned)) return -EBUSY; @@ -697,6 +689,8 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, atomic_inc(&folio->_entire_mapcount); folio_inc_large_mapcount(folio, dst_vma); break; + default: + BUILD_BUG(); } return 0; } @@ -730,7 +724,7 @@ static inline int folio_try_dup_anon_rmap_ptes(struct folio *folio, struct vm_area_struct *src_vma) { return __folio_try_dup_anon_rmap(folio, page, nr_pages, dst_vma, - src_vma, RMAP_LEVEL_PTE); + src_vma, PGTABLE_LEVEL_PTE); } static __always_inline int folio_try_dup_anon_rmap_pte(struct folio *folio, @@ -738,7 +732,7 @@ static __always_inline int folio_try_dup_anon_rmap_pte(struct folio *folio, struct vm_area_struct *src_vma) { return __folio_try_dup_anon_rmap(folio, page, 1, dst_vma, src_vma, - RMAP_LEVEL_PTE); + PGTABLE_LEVEL_PTE); } /** @@ -770,7 +764,7 @@ static inline int folio_try_dup_anon_rmap_pmd(struct folio *folio, { #ifdef CONFIG_TRANSPARENT_HUGEPAGE return __folio_try_dup_anon_rmap(folio, page, HPAGE_PMD_NR, dst_vma, - src_vma, RMAP_LEVEL_PMD); + src_vma, PGTABLE_LEVEL_PMD); #else WARN_ON_ONCE(true); return -EBUSY; @@ -778,7 +772,7 @@ static inline int folio_try_dup_anon_rmap_pmd(struct folio *folio, } static __always_inline int __folio_try_share_anon_rmap(struct folio *folio, - struct page *page, int nr_pages, enum rmap_level level) + struct page *page, int nr_pages, enum pgtable_level level) { VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); VM_WARN_ON_FOLIO(!PageAnonExclusive(page), folio); @@ -873,7 +867,7 @@ static __always_inline int __folio_try_share_anon_rmap(struct folio *folio, static inline int folio_try_share_anon_rmap_pte(struct folio *folio, struct page *page) { - return __folio_try_share_anon_rmap(folio, page, 1, RMAP_LEVEL_PTE); + return __folio_try_share_anon_rmap(folio, page, 1, PGTABLE_LEVEL_PTE); } /** @@ -904,7 +898,7 @@ static inline int folio_try_share_anon_rmap_pmd(struct folio *folio, { #ifdef CONFIG_TRANSPARENT_HUGEPAGE return __folio_try_share_anon_rmap(folio, page, HPAGE_PMD_NR, - RMAP_LEVEL_PMD); + PGTABLE_LEVEL_PMD); #else WARN_ON_ONCE(true); return -EBUSY; -- cgit v1.2.3 From ec63a44011dccebca24e7ef7e8a9521306de1bc9 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 11 Aug 2025 13:26:28 +0200 Subject: mm/memory: convert print_bad_pte() to print_bad_page_map() print_bad_pte() looks like something that should actually be a WARN or similar, but historically it apparently has proven to be useful to detect corruption of page tables even on production systems -- report the issue and keep the system running to make it easier to actually detect what is going wrong (e.g., multiple such messages might shed a light). As we want to unify vm_normal_page_*() handling for PTE/PMD/PUD, we'll have to take care of print_bad_pte() as well. Let's prepare for using print_bad_pte() also for non-PTEs by adjusting the implementation and renaming the function to print_bad_page_map(). Provide print_bad_pte() as a simple wrapper. Document the implicit locking requirements for the page table re-walk. To make the function a bit more readable, factor out the ratelimit check into is_bad_page_map_ratelimited() and place the printing of page table content into __print_bad_page_map_pgtable(). We'll now dump information from each level in a single line, and just stop the table walk once we hit something that is not a present page table. The report will now look something like (dumping pgd to pmd values): [ 77.943408] BUG: Bad page map in process XXX pte:80000001233f5867 [ 77.944077] addr:00007fd84bb1c000 vm_flags:08100071 anon_vma: ... [ 77.945186] pgd:10a89f067 p4d:10a89f067 pud:10e5a2067 pmd:105327067 Not using pgdp_get(), because that does not work properly on some arm configs where pgd_t is an array. Note that we are dumping all levels even when levels are folded for simplicity. [david@redhat.com: drop warning] Link: https://lkml.kernel.org/r/923b279c-de33-44dd-a923-2959afad8626@redhat.com Link: https://lkml.kernel.org/r/20250811112631.759341-9-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Lorenzo Stoakes Cc: Alistair Popple Cc: Al Viro Cc: Baolin Wang Cc: Barry Song Cc: Christian Brauner Cc: Christophe Leroy Cc: Dan Williams Cc: David Vrabel Cc: Dev Jain Cc: Hugh Dickins Cc: Jan Kara Cc: Jann Horn Cc: Juegren Gross Cc: Lance Yang Cc: Liam Howlett Cc: Madhavan Srinivasan Cc: Mariano Pache Cc: Matthew Wilcox (Oracle) Cc: Michael Ellerman Cc: Michal Hocko Cc: Mike Rapoport Cc: Nicholas Piggin Cc: Oleksandr Tyshchenko Cc: Oscar Salvador Cc: Ryan Roberts Cc: Stefano Stabellini Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Wei Yang Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/pgtable.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 4f88e460eb9c..94249e671a7e 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1983,6 +1983,24 @@ enum pgtable_level { PGTABLE_LEVEL_PGD, }; +static inline const char *pgtable_level_to_str(enum pgtable_level level) +{ + switch (level) { + case PGTABLE_LEVEL_PTE: + return "pte"; + case PGTABLE_LEVEL_PMD: + return "pmd"; + case PGTABLE_LEVEL_PUD: + return "pud"; + case PGTABLE_LEVEL_P4D: + return "p4d"; + case PGTABLE_LEVEL_PGD: + return "pgd"; + default: + return "unknown"; + } +} + #endif /* !__ASSEMBLY__ */ #if !defined(MAX_POSSIBLE_PHYSMEM_BITS) && !defined(CONFIG_64BIT) -- cgit v1.2.3 From 2db308160b5a191b494746fd167dbbaaead3fb26 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 11 Aug 2025 13:26:30 +0200 Subject: mm: introduce and use vm_normal_page_pud() Let's introduce vm_normal_page_pud(), which ends up being fairly simple because of our new common helpers and there not being a PUD-sized zero folio. Use vm_normal_page_pud() in folio_walk_start() to resolve a TODO, structuring the code like the other (pmd/pte) cases. Defer introducing vm_normal_folio_pud() until really used. Note that we can so far get PUDs with hugetlb, daxfs and PFNMAP entries. Link: https://lkml.kernel.org/r/20250811112631.759341-11-david@redhat.com Reviewed-by: Wei Yang Reviewed-by: Oscar Salvador Signed-off-by: David Hildenbrand Reviewed-by: Lorenzo Stoakes Cc: Alistair Popple Cc: Al Viro Cc: Baolin Wang Cc: Barry Song Cc: Christian Brauner Cc: Christophe Leroy Cc: Dan Williams Cc: David Vrabel Cc: Dev Jain Cc: Hugh Dickins Cc: Jan Kara Cc: Jann Horn Cc: Juegren Gross Cc: Lance Yang Cc: Liam Howlett Cc: Madhavan Srinivasan Cc: Mariano Pache Cc: Matthew Wilcox (Oracle) Cc: Michael Ellerman Cc: Michal Hocko Cc: Mike Rapoport Cc: Nicholas Piggin Cc: Oleksandr Tyshchenko Cc: Ryan Roberts Cc: Stefano Stabellini Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index b626d1bacef5..8ca7d2fa7134 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2360,6 +2360,8 @@ struct folio *vm_normal_folio_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t pmd); struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t pmd); +struct page *vm_normal_page_pud(struct vm_area_struct *vma, unsigned long addr, + pud_t pud); void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, unsigned long size); -- cgit v1.2.3 From 4c89792ea0a224340ff198abc7caffa211baccd6 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 11 Aug 2025 13:26:31 +0200 Subject: mm: rename vm_ops->find_special_page() to vm_ops->find_normal_page() ... and hide it behind a kconfig option. There is really no need for any !xen code to perform this check. The naming is a bit off: we want to find the "normal" page when a PTE was marked "special". So it's really not "finding a special" page. Improve the documentation, and add a comment in the code where XEN ends up performing the pte_mkspecial() through a hypercall. More details can be found in commit 923b2919e2c3 ("xen/gntdev: mark userspace PTEs as special on x86 PV guests"). Link: https://lkml.kernel.org/r/20250811112631.759341-12-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Oscar Salvador Reviewed-by: Lorenzo Stoakes Reviewed-by: Wei Yang Cc: David Vrabel Cc: Alistair Popple Cc: Al Viro Cc: Baolin Wang Cc: Barry Song Cc: Christian Brauner Cc: Christophe Leroy Cc: Dan Williams Cc: Dev Jain Cc: Hugh Dickins Cc: Jan Kara Cc: Jann Horn Cc: Juegren Gross Cc: Lance Yang Cc: Liam Howlett Cc: Madhavan Srinivasan Cc: Mariano Pache Cc: Matthew Wilcox (Oracle) Cc: Michael Ellerman Cc: Michal Hocko Cc: Mike Rapoport Cc: Nicholas Piggin Cc: Oleksandr Tyshchenko Cc: Ryan Roberts Cc: Stefano Stabellini Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/mm.h | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 8ca7d2fa7134..3868ca1a25f9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -657,13 +657,21 @@ struct vm_operations_struct { struct mempolicy *(*get_policy)(struct vm_area_struct *vma, unsigned long addr, pgoff_t *ilx); #endif +#ifdef CONFIG_FIND_NORMAL_PAGE /* - * Called by vm_normal_page() for special PTEs to find the - * page for @addr. This is useful if the default behavior - * (using pte_page()) would not find the correct page. + * Called by vm_normal_page() for special PTEs in @vma at @addr. This + * allows for returning a "normal" page from vm_normal_page() even + * though the PTE indicates that the "struct page" either does not exist + * or should not be touched: "special". + * + * Do not add new users: this really only works when a "normal" page + * was mapped, but then the PTE got changed to something weird (+ + * marked special) that would not make pte_pfn() identify the originally + * inserted page. */ - struct page *(*find_special_page)(struct vm_area_struct *vma, - unsigned long addr); + struct page *(*find_normal_page)(struct vm_area_struct *vma, + unsigned long addr); +#endif /* CONFIG_FIND_NORMAL_PAGE */ }; #ifdef CONFIG_NUMA_BALANCING -- cgit v1.2.3 From 2843408ca971d1472a6a8b32ee4647f55ecab598 Mon Sep 17 00:00:00 2001 From: Pankaj Raghav Date: Mon, 11 Aug 2025 10:41:10 +0200 Subject: mm: rename MMF_HUGE_ZERO_PAGE to MMF_HUGE_ZERO_FOLIO As all the helper functions has been renamed from *_page to *_folio, rename the MM flag from MMF_HUGE_ZERO_PAGE to MMF_HUGE_ZERO_FOLIO. No functional changes. Link: https://lkml.kernel.org/r/20250811084113.647267-3-kernel@pankajraghav.com Signed-off-by: Pankaj Raghav Reviewed-by: Lorenzo Stoakes Reviewed-by: Zi Yan Suggested-by: David Hildenbrand Acked-by: David Hildenbrand Reviewed-by: Hannes Reinecke Cc: Baolin Wang Cc: Christoph Hellwig Cc: "Darrick J. Wong" Cc: Dev Jain Cc: Jens Axboe Cc: Liam Howlett Cc: Luis Chamberalin Cc: Mariano Pache Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Mike Rapoport Cc: "Ritesh Harjani (IBM)" Cc: Ryan Roberts Cc: Suren Baghdasaryan Cc: Thomas Gleinxer Cc: Vlastimil Babka Cc: Kiryl Shutsemau Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 3ed763e7ec6f..cf94df4955c7 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1758,7 +1758,7 @@ enum { #define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ #define MMF_OOM_SKIP 21 /* mm is of no interest for the OOM killer */ #define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */ -#define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */ +#define MMF_HUGE_ZERO_FOLIO 23 /* mm has ever used the global huge zero folio */ #define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ #define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) #define MMF_OOM_REAP_QUEUED 25 /* mm was queued for oom_reaper */ -- cgit v1.2.3 From 2d8bd8049e89efe42a5397de4effd899e8dd2249 Mon Sep 17 00:00:00 2001 From: Pankaj Raghav Date: Mon, 11 Aug 2025 10:41:11 +0200 Subject: mm: add persistent huge zero folio Many places in the kernel need to zero out larger chunks, but the maximum segment that can be zeroed out at a time by ZERO_PAGE is limited by PAGE_SIZE. This is especially annoying in block devices and filesystems where multiple ZERO_PAGEs are attached to the bio in different bvecs. With multipage bvec support in block layer, it is much more efficient to send out larger zero pages as a part of single bvec. This concern was raised during the review of adding Large Block Size support to XFS[1][2]. Usually huge_zero_folio is allocated on demand, and it will be deallocated by the shrinker if there are no users of it left. At moment, huge_zero_folio infrastructure refcount is tied to the process lifetime that created it. This might not work for bio layer as the completions can be async and the process that created the huge_zero_folio might no longer be alive. And, one of the main points that came up during discussion is to have something bigger than zero page as a drop-in replacement. Add a config option PERSISTENT_HUGE_ZERO_FOLIO that will result in allocating the huge zero folio during early init and never free the memory by disabling the shrinker. This makes using the huge_zero_folio without having to pass any mm struct and does not tie the lifetime of the zero folio to anything, making it a drop-in replacement for ZERO_PAGE. If PERSISTENT_HUGE_ZERO_FOLIO config option is enabled, then mm_get_huge_zero_folio() will simply return the allocated page instead of dynamically allocating a new PMD page. Use this option carefully in resource constrained systems as it uses one full PMD sized page for zeroing purposes. [1] https://lore.kernel.org/linux-xfs/20231027051847.GA7885@lst.de/ [2] https://lore.kernel.org/linux-xfs/ZitIK5OnR7ZNY0IG@infradead.org/ Link: https://lkml.kernel.org/r/20250811084113.647267-4-kernel@pankajraghav.com Signed-off-by: David Hildenbrand Signed-off-by: Pankaj Raghav Reviewed-by: Lorenzo Stoakes Co-developed-by: David Hildenbrand Reviewed-by: Hannes Reinecke Cc: Baolin Wang Cc: Christoph Hellwig Cc: "Darrick J. Wong" Cc: Dev Jain Cc: Jens Axboe Cc: Liam Howlett Cc: Luis Chamberalin Cc: Mariano Pache Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Mike Rapoport Cc: "Ritesh Harjani (IBM)" Cc: Ryan Roberts Cc: Suren Baghdasaryan Cc: Thomas Gleinxer Cc: Vlastimil Babka Cc: Zi Yan Cc: Kiryl Shutsemau Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 7748489fde1b..bd547857c6c1 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -495,6 +495,17 @@ static inline bool is_huge_zero_pmd(pmd_t pmd) struct folio *mm_get_huge_zero_folio(struct mm_struct *mm); void mm_put_huge_zero_folio(struct mm_struct *mm); +static inline struct folio *get_persistent_huge_zero_folio(void) +{ + if (!IS_ENABLED(CONFIG_PERSISTENT_HUGE_ZERO_FOLIO)) + return NULL; + + if (unlikely(!huge_zero_folio)) + return NULL; + + return huge_zero_folio; +} + static inline bool thp_migration_supported(void) { return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION); @@ -685,6 +696,11 @@ static inline int change_huge_pud(struct mmu_gather *tlb, { return 0; } + +static inline struct folio *get_persistent_huge_zero_folio(void) +{ + return NULL; +} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ static inline int split_folio_to_list_to_order(struct folio *folio, -- cgit v1.2.3 From 415a0fd62f1899fe2bb81d661e427194b1c97201 Mon Sep 17 00:00:00 2001 From: Pankaj Raghav Date: Mon, 11 Aug 2025 10:41:12 +0200 Subject: mm: add largest_zero_folio() routine The callers of mm_get_huge_zero_folio() have access to a mm struct and the lifetime of the huge_zero_folio is tied to the lifetime of the mm struct. largest_zero_folio() will give access to huge_zero_folio when PERSISTENT_HUGE_ZERO_FOLIO config option is enabled for callers that do not want to tie the lifetime to a mm struct. This is very useful for filesystem and block layers where the request completions can be async and there is no guarantee on the mm struct lifetime. This function will return a ZERO_PAGE folio if PERSISTENT_HUGE_ZERO_FOLIO is disabled or if we failed to allocate a huge_zero_folio during early init. Link: https://lkml.kernel.org/r/20250811084113.647267-5-kernel@pankajraghav.com Signed-off-by: David Hildenbrand Signed-off-by: Pankaj Raghav Reviewed-by: Lorenzo Stoakes Co-developed-by: David Hildenbrand Reviewed-by: Hannes Reinecke Cc: Baolin Wang Cc: Christoph Hellwig Cc: "Darrick J. Wong" Cc: Dev Jain Cc: Jens Axboe Cc: Liam Howlett Cc: Luis Chamberalin Cc: Mariano Pache Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Mike Rapoport Cc: "Ritesh Harjani (IBM)" Cc: Ryan Roberts Cc: Suren Baghdasaryan Cc: Thomas Gleinxer Cc: Vlastimil Babka Cc: Zi Yan Cc: Kiryl Shutsemau Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index bd547857c6c1..14d424830fa8 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -714,4 +714,26 @@ static inline int split_folio_to_order(struct folio *folio, int new_order) return split_folio_to_list_to_order(folio, NULL, new_order); } +/** + * largest_zero_folio - Get the largest zero size folio available + * + * This function shall be used when mm_get_huge_zero_folio() cannot be + * used as there is no appropriate mm lifetime to tie the huge zero folio + * from the caller. + * + * Deduce the size of the folio with folio_size instead of assuming the + * folio size. + * + * Return: pointer to PMD sized zero folio if CONFIG_PERSISTENT_HUGE_ZERO_FOLIO + * is enabled or a single page sized zero folio + */ +static inline struct folio *largest_zero_folio(void) +{ + struct folio *folio = get_persistent_huge_zero_folio(); + + if (folio) + return folio; + + return page_folio(ZERO_PAGE(0)); +} #endif /* _LINUX_HUGE_MM_H */ -- cgit v1.2.3 From bb6525f2f8c41e89ba3fc506bc1705c68cf845ae Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Tue, 12 Aug 2025 16:44:10 +0100 Subject: mm: add bitmap mm->flags field Patch series "mm: make mm->flags a bitmap and 64-bit on all arches". We are currently in the bizarre situation where we are constrained on the number of flags we can set in an mm_struct based on whether this is a 32-bit or 64-bit kernel. This is because mm->flags is an unsigned long field, which is 32-bits on a 32-bit system and 64-bits on a 64-bit system. In order to keep things functional across both architectures, we do not permit mm flag bits to be set above flag 31 (i.e. the 32nd bit). This is a silly situation, especially given how profligate we are in storing metadata in mm_struct, so let's convert mm->flags into a bitmap and allow ourselves as many bits as we like. In order to execute this change, we introduce a new opaque type - mm_flags_t - which wraps a bitmap. We go further and mark the bitmap field __private, which forces users to have to use accessors, which allows us to enforce atomicity rules around mm->flags (except on those occasions they are not required - fork, etc.) and makes it far easier to keep track of how mm flags are being utilised. In order to implement this change sensibly and an an iterative way, we start by introducing the type with the same bitsize as the current mm flags (system word size) and place it in union with mm->flags. We are then able to gradually update users as we go without being forced to do everything in a single patch. In the course of working on this series I noticed the MMF_* flag masks encounter a sign extension bug that, due to the 32-bit limit on mm->flags thus far, has not caused any issues in practice, but required fixing for this series. We must make special dispensation for two cases - coredump and initailisation on fork, but of which use masks extensively. Since coredump flags are set in stone, we can safely assume they will remain in the first 32-bits of the flags. We therefore provide special non-atomic accessors for this case that access the first system word of flags, keeping everything there essentially the same. For mm->flags initialisation on fork, we adjust the logic to ensure all bits are cleared correctly, and then adjust the existing intialisation logic, dubbing the implementation utilising flags as legacy. This means we get the same fast operations as we do now, but in future we can also choose to update the forking logic to additionally propagate flags beyond 32-bits across fork. With this change in place we can, in future, decide to have as many bits as we please. Since the size of the bitmap will scale in system word multiples, there should be no issues with changes in alignment in mm_struct. Additionally, the really sensitive field (mmap_lock) is located prior to the flags field so this should have no impact on that either. This patch (of 10): We are currently in the bizarre situation where we are constrained on the number of flags we can set in an mm_struct based on whether this is a 32-bit or 64-bit kernel. This is because mm->flags is an unsigned long field, which is 32-bits on a 32-bit system and 64-bits on a 64-bit system. In order to keep things functional across both architectures, we do not permit mm flag bits to be set above flag 31 (i.e. the 32nd bit). This is a silly situation, especially given how profligate we are in storing metadata in mm_struct, so let's convert mm->flags into a bitmap and allow ourselves as many bits as we like. To keep things manageable, firstly we introduce the bitmap at a system word system as a new field mm->_flags, in union. This means the new bitmap mm->_flags is bitwise exactly identical to the existing mm->flags field. We have an opportunity to also introduce some type safety here, so let's wrap the mm flags field as a struct and declare it as an mm_flags_t typedef to keep it consistent with vm_flags_t for VMAs. We make the internal field privately accessible, in order to force the use of helper functions so we can enforce that accesses are bitwise as required. We therefore introduce accessors prefixed with mm_flags_*() for callers to use. We place the bit parameter first so as to match the parameter ordering of the *_bit() functions. Having this temporary union arrangement allows us to incrementally swap over users of mm->flags patch-by-patch rather than having to do everything in one fell swoop. [lorenzo.stoakes@oracle.com: place __private in correct place, const-ify __mm_flags_get_word] Link: https://lkml.kernel.org/r/d4ba117d-6234-4069-b871-254d152d7d21@lucifer.local Link: https://lkml.kernel.org/r/cover.1755012943.git.lorenzo.stoakes@oracle.com Link: https://lkml.kernel.org/r/9de8dfd9de8c95cd31622d6e52051ba0d1848f5a.1755012943.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Acked-by: David Hildenbrand Cc: Adrian Hunter Cc: Alexander Gordeev Cc: Alexander Shishkin Cc: Al Viro Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Baolin Wang Cc: Barry Song Cc: Ben Segall Cc: Borislav Betkov Cc: Chengming Zhou Cc: Christian Borntraeger Cc: Christian Brauner Cc: David Rientjes Cc: David S. Miller Cc: Dev Jain Cc: Dietmar Eggemann Cc: Gerald Schaefer Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ian Rogers Cc: Ingo Molnar Cc: Jan Kara Cc: Jann Horn Cc: Jason Gunthorpe Cc: Jiri Olsa Cc: John Hubbard Cc: Juri Lelli Cc: Kan Liang Cc: Kees Cook Cc: Marc Rutland Cc: Mariano Pache Cc: "Masami Hiramatsu (Google)" Cc: Mateusz Guzik Cc: Matthew Wilcox (Oracle) Cc: Mel Gorman Cc: Michal Hocko Cc: Mike Rapoport Cc: Namhyung kim Cc: Oleg Nesterov Cc: Peter Xu Cc: Peter Zijlstra Cc: Ryan Roberts Cc: Shakeel Butt Cc: Steven Rostedt Cc: Suren Baghdasaryan Cc: Sven Schnelle Cc: Thomas Gleinxer Cc: Valentin Schneider Cc: Vasily Gorbik Cc: Vincent Guittot Cc: Vlastimil Babka Cc: xu xin Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/mm.h | 32 ++++++++++++++++++++++++++++++++ include/linux/mm_types.h | 38 +++++++++++++++++++++++++++++++++++++- 2 files changed, 69 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 3868ca1a25f9..4ed4a0b9dad6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -34,6 +34,8 @@ #include #include #include +#include +#include struct mempolicy; struct anon_vma; @@ -720,6 +722,36 @@ static inline void assert_fault_locked(struct vm_fault *vmf) } #endif /* CONFIG_PER_VMA_LOCK */ +static inline bool mm_flags_test(int flag, const struct mm_struct *mm) +{ + return test_bit(flag, ACCESS_PRIVATE(&mm->_flags, __mm_flags)); +} + +static inline bool mm_flags_test_and_set(int flag, struct mm_struct *mm) +{ + return test_and_set_bit(flag, ACCESS_PRIVATE(&mm->_flags, __mm_flags)); +} + +static inline bool mm_flags_test_and_clear(int flag, struct mm_struct *mm) +{ + return test_and_clear_bit(flag, ACCESS_PRIVATE(&mm->_flags, __mm_flags)); +} + +static inline void mm_flags_set(int flag, struct mm_struct *mm) +{ + set_bit(flag, ACCESS_PRIVATE(&mm->_flags, __mm_flags)); +} + +static inline void mm_flags_clear(int flag, struct mm_struct *mm) +{ + clear_bit(flag, ACCESS_PRIVATE(&mm->_flags, __mm_flags)); +} + +static inline void mm_flags_clear_all(struct mm_struct *mm) +{ + bitmap_zero(ACCESS_PRIVATE(&mm->_flags, __mm_flags), NUM_MM_FLAG_BITS); +} + extern const struct vm_operations_struct vma_dummy_vm_ops; static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index cf94df4955c7..0e001dbad455 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -20,6 +20,7 @@ #include #include #include +#include #include @@ -927,6 +928,15 @@ struct mm_cid { }; #endif +/* + * Opaque type representing current mm_struct flag state. Must be accessed via + * mm_flags_xxx() helper functions. + */ +#define NUM_MM_FLAG_BITS BITS_PER_LONG +typedef struct { + DECLARE_BITMAP(__mm_flags, NUM_MM_FLAG_BITS); +} __private mm_flags_t; + struct kioctx_table; struct iommu_mm_data; struct mm_struct { @@ -1109,7 +1119,11 @@ struct mm_struct { /* Architecture-specific MM context */ mm_context_t context; - unsigned long flags; /* Must use atomic bitops to access */ + /* Temporary union while we convert users to mm_flags_t. */ + union { + unsigned long flags; /* Must use atomic bitops to access */ + mm_flags_t _flags; /* Must use mm_flags_* helpers to access */ + }; #ifdef CONFIG_AIO spinlock_t ioctx_lock; @@ -1219,6 +1233,28 @@ struct mm_struct { unsigned long cpu_bitmap[]; }; +/* Set the first system word of mm flags, non-atomically. */ +static inline void __mm_flags_set_word(struct mm_struct *mm, unsigned long value) +{ + unsigned long *bitmap = ACCESS_PRIVATE(&mm->_flags, __mm_flags); + + bitmap_copy(bitmap, &value, BITS_PER_LONG); +} + +/* Obtain a read-only view of the bitmap. */ +static inline const unsigned long *__mm_flags_get_bitmap(const struct mm_struct *mm) +{ + return (const unsigned long *)ACCESS_PRIVATE(&mm->_flags, __mm_flags); +} + +/* Read the first system word of mm flags, non-atomically. */ +static inline unsigned long __mm_flags_get_word(const struct mm_struct *mm) +{ + const unsigned long *bitmap = __mm_flags_get_bitmap(mm); + + return bitmap_read(bitmap, 0, BITS_PER_LONG); +} + #define MM_MT_FLAGS (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN | \ MT_FLAGS_USE_RCU) extern struct mm_struct init_mm; -- cgit v1.2.3 From 12e423ba4eaed7b1561b677d32e6599f932d03db Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Tue, 12 Aug 2025 16:44:11 +0100 Subject: mm: convert core mm to mm_flags_*() accessors As part of the effort to move to mm->flags becoming a bitmap field, convert existing users to making use of the mm_flags_*() accessors which will, when the conversion is complete, be the only means of accessing mm_struct flags. This will result in the debug output being that of a bitmap output, which will result in a minor change here, but since this is for debug only, this should have no bearing. Otherwise, no functional changes intended. [akpm@linux-foundation.org: fix typo in comment]Link: https://lkml.kernel.org/r/1eb2266f4408798a55bda00cb04545a3203aa572.1755012943.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Reviewed-by: Mike Rapoport (Microsoft) Reviewed-by: Baolin Wang Acked-by: David Hildenbrand Cc: Adrian Hunter Cc: Alexander Gordeev Cc: Alexander Shishkin Cc: Al Viro Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Barry Song Cc: Ben Segall Cc: Borislav Betkov Cc: Chengming Zhou Cc: Christian Borntraeger Cc: Christian Brauner Cc: David Rientjes Cc: David S. Miller Cc: Dev Jain Cc: Dietmar Eggemann Cc: Gerald Schaefer Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ian Rogers Cc: Ingo Molnar Cc: Jan Kara Cc: Jann Horn Cc: Jason Gunthorpe Cc: Jiri Olsa Cc: John Hubbard Cc: Juri Lelli Cc: Kan Liang Cc: Kees Cook Cc: Marc Rutland Cc: Mariano Pache Cc: "Masami Hiramatsu (Google)" Cc: Mateusz Guzik Cc: Matthew Wilcox (Oracle) Cc: Mel Gorman Cc: Michal Hocko Cc: Namhyung kim Cc: Oleg Nesterov Cc: Peter Xu Cc: Peter Zijlstra Cc: Ryan Roberts Cc: Shakeel Butt Cc: Steven Rostedt Cc: Suren Baghdasaryan Cc: Sven Schnelle Cc: Thomas Gleinxer Cc: Valentin Schneider Cc: Vasily Gorbik Cc: Vincent Guittot Cc: Vlastimil Babka Cc: xu xin Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 2 +- include/linux/khugepaged.h | 6 ++++-- include/linux/ksm.h | 6 +++--- include/linux/mm.h | 2 +- include/linux/mman.h | 2 +- include/linux/oom.h | 2 +- 6 files changed, 11 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 14d424830fa8..84b7eebe0d68 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -327,7 +327,7 @@ static inline bool vma_thp_disabled(struct vm_area_struct *vma, * example, s390 kvm. */ return (vm_flags & VM_NOHUGEPAGE) || - test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags); + mm_flags_test(MMF_DISABLE_THP, vma->vm_mm); } static inline bool thp_disabled_by_hw(void) diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h index ff6120463745..eb1946a70cff 100644 --- a/include/linux/khugepaged.h +++ b/include/linux/khugepaged.h @@ -2,6 +2,8 @@ #ifndef _LINUX_KHUGEPAGED_H #define _LINUX_KHUGEPAGED_H +#include + extern unsigned int khugepaged_max_ptes_none __read_mostly; #ifdef CONFIG_TRANSPARENT_HUGEPAGE extern struct attribute_group khugepaged_attr_group; @@ -20,13 +22,13 @@ extern int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, static inline void khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm) { - if (test_bit(MMF_VM_HUGEPAGE, &oldmm->flags)) + if (mm_flags_test(MMF_VM_HUGEPAGE, oldmm)) __khugepaged_enter(mm); } static inline void khugepaged_exit(struct mm_struct *mm) { - if (test_bit(MMF_VM_HUGEPAGE, &mm->flags)) + if (mm_flags_test(MMF_VM_HUGEPAGE, mm)) __khugepaged_exit(mm); } #else /* CONFIG_TRANSPARENT_HUGEPAGE */ diff --git a/include/linux/ksm.h b/include/linux/ksm.h index c17b955e7b0b..22e67ca7cba3 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -56,13 +56,13 @@ static inline long mm_ksm_zero_pages(struct mm_struct *mm) static inline void ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) { /* Adding mm to ksm is best effort on fork. */ - if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags)) + if (mm_flags_test(MMF_VM_MERGEABLE, oldmm)) __ksm_enter(mm); } static inline int ksm_execve(struct mm_struct *mm) { - if (test_bit(MMF_VM_MERGE_ANY, &mm->flags)) + if (mm_flags_test(MMF_VM_MERGE_ANY, mm)) return __ksm_enter(mm); return 0; @@ -70,7 +70,7 @@ static inline int ksm_execve(struct mm_struct *mm) static inline void ksm_exit(struct mm_struct *mm) { - if (test_bit(MMF_VM_MERGEABLE, &mm->flags)) + if (mm_flags_test(MMF_VM_MERGEABLE, mm)) __ksm_exit(mm); } diff --git a/include/linux/mm.h b/include/linux/mm.h index 4ed4a0b9dad6..34311ebe62cc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1949,7 +1949,7 @@ static inline bool folio_needs_cow_for_dma(struct vm_area_struct *vma, { VM_BUG_ON(!(raw_read_seqcount(&vma->vm_mm->write_protect_seq) & 1)); - if (!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags)) + if (!mm_flags_test(MMF_HAS_PINNED, vma->vm_mm)) return false; return folio_maybe_dma_pinned(folio); diff --git a/include/linux/mman.h b/include/linux/mman.h index de9e8e6229a4..0ba8a7e8b90a 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -201,7 +201,7 @@ static inline bool arch_memory_deny_write_exec_supported(void) static inline bool map_deny_write_exec(unsigned long old, unsigned long new) { /* If MDWE is disabled, we have nothing to deny. */ - if (!test_bit(MMF_HAS_MDWE, ¤t->mm->flags)) + if (!mm_flags_test(MMF_HAS_MDWE, current->mm)) return false; /* If the new VMA is not executable, we have nothing to deny. */ diff --git a/include/linux/oom.h b/include/linux/oom.h index 1e0fc6931ce9..7b02bc1d0a7e 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -91,7 +91,7 @@ static inline bool tsk_is_oom_victim(struct task_struct * tsk) */ static inline vm_fault_t check_stable_address_space(struct mm_struct *mm) { - if (unlikely(test_bit(MMF_UNSTABLE, &mm->flags))) + if (unlikely(mm_flags_test(MMF_UNSTABLE, mm))) return VM_FAULT_SIGBUS; return 0; } -- cgit v1.2.3 From 39f8049cd49f7e88f89a33f97f996c7306e8be0b Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Tue, 12 Aug 2025 16:44:15 +0100 Subject: mm: update coredump logic to correctly use bitmap mm flags The coredump logic is slightly different from other users in that it both stores mm flags and additionally sets and gets using masks. Since the MMF_DUMPABLE_* flags must remain as they are for uABI reasons, and of course these are within the first 32-bits of the flags, it is reasonable to provide access to these in the same fashion so this logic can all still keep working as it has been. Therefore, introduce coredump-specific helpers __mm_flags_get_dumpable() and __mm_flags_set_mask_dumpable() for this purpose, and update all core dump users of mm flags to use these. [lorenzo.stoakes@oracle.com: abstract set_mask_bits() invocation to mm_types.h to satisfy ARC] Link: https://lkml.kernel.org/r/0e7ad263-1ff7-446d-81fe-97cff9c0e7ed@lucifer.local Link: https://lkml.kernel.org/r/2a5075f7e3c5b367d988178c79a3063d12ee53a9.1755012943.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Reviewed-by: Mike Rapoport (Microsoft) Reviewed-by: Christian Brauner Acked-by: David Hildenbrand Cc: Adrian Hunter Cc: Alexander Gordeev Cc: Alexander Shishkin Cc: Al Viro Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Baolin Wang Cc: Barry Song Cc: Ben Segall Cc: Borislav Betkov Cc: Chengming Zhou Cc: Christian Borntraeger Cc: David Rientjes Cc: David S. Miller Cc: Dev Jain Cc: Dietmar Eggemann Cc: Gerald Schaefer Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ian Rogers Cc: Ingo Molnar Cc: Jan Kara Cc: Jann Horn Cc: Jason Gunthorpe Cc: Jiri Olsa Cc: John Hubbard Cc: Juri Lelli Cc: Kan Liang Cc: Kees Cook Cc: Marc Rutland Cc: Mariano Pache Cc: "Masami Hiramatsu (Google)" Cc: Mateusz Guzik Cc: Matthew Wilcox (Oracle) Cc: Mel Gorman Cc: Michal Hocko Cc: Namhyung kim Cc: Oleg Nesterov Cc: Peter Xu Cc: Peter Zijlstra Cc: Ryan Roberts Cc: Shakeel Butt Cc: Steven Rostedt Cc: Suren Baghdasaryan Cc: Sven Schnelle Cc: Thomas Gleinxer Cc: Valentin Schneider Cc: Vasily Gorbik Cc: Vincent Guittot Cc: Vlastimil Babka Cc: xu xin Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 12 ++++++++++++ include/linux/sched/coredump.h | 18 +++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 0e001dbad455..9d224075d895 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1255,6 +1255,18 @@ static inline unsigned long __mm_flags_get_word(const struct mm_struct *mm) return bitmap_read(bitmap, 0, BITS_PER_LONG); } +/* + * Update the first system word of mm flags ONLY, applying the specified mask to + * it, then setting all flags specified by bits. + */ +static inline void __mm_flags_set_mask_bits_word(struct mm_struct *mm, + unsigned long mask, unsigned long bits) +{ + unsigned long *bitmap = ACCESS_PRIVATE(&mm->_flags, __mm_flags); + + set_mask_bits(bitmap, mask, bits); +} + #define MM_MT_FLAGS (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN | \ MT_FLAGS_USE_RCU) extern struct mm_struct init_mm; diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index 6eb65ceed213..b7fafe999073 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -8,6 +8,20 @@ #define SUID_DUMP_USER 1 /* Dump as user of process */ #define SUID_DUMP_ROOT 2 /* Dump as root */ +static inline unsigned long __mm_flags_get_dumpable(struct mm_struct *mm) +{ + /* + * By convention, dumpable bits are contained in first 32 bits of the + * bitmap, so we can simply access this first unsigned long directly. + */ + return __mm_flags_get_word(mm); +} + +static inline void __mm_flags_set_mask_dumpable(struct mm_struct *mm, int value) +{ + __mm_flags_set_mask_bits_word(mm, MMF_DUMPABLE_MASK, value); +} + extern void set_dumpable(struct mm_struct *mm, int value); /* * This returns the actual value of the suid_dumpable flag. For things @@ -22,7 +36,9 @@ static inline int __get_dumpable(unsigned long mm_flags) static inline int get_dumpable(struct mm_struct *mm) { - return __get_dumpable(mm->flags); + unsigned long flags = __mm_flags_get_dumpable(mm); + + return __get_dumpable(flags); } #endif /* _LINUX_SCHED_COREDUMP_H */ -- cgit v1.2.3 From 01f86753a05a3b971d69147d6d074c1a8d29b57d Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Tue, 12 Aug 2025 16:44:16 +0100 Subject: mm: correct sign-extension issue in MMF_* flag masks There is an issue with the mask declarations in linux/mm_types.h, which naively do (1 << bit) operations. Unfortunately this results in the 1 being defaulted as a signed (32-bit) integer. When the compiler expands the MMF_INIT_MASK bitmask it comes up with: (((1 << 2) - 1) | (((1 << 9) - 1) << 2) | (1 << 24) | (1 << 28) | (1 << 30) | (1 << 31)) Which overflows the signed integer to -788,527,105. Implicitly casting this to an unsigned integer results in sign-expansion, and thus this value becomes 0xffffffffd10007ff, rather than the intended 0xd10007ff. While we're limited to a maximum of 32 bits in mm->flags, this isn't an issue as the remaining bits being masked will always be zero. However, now we are moving towards having more bits in this flag, this becomes an issue. Simply resolve this by using the _BITUL() helper to cast the shifted value to an unsigned long. [lorenzo.stoakes@oracle.com: prefer BIT() to _BITUL()] Link: https://lkml.kernel.org/r/a0290c77-cd88-46d6-8d9a-073be7600d88@lucifer.local Link: https://lkml.kernel.org/r/f92194bee8c92a04fd4c9b2c14c7e65229639300.1755012943.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Reviewed-by: Mike Rapoport (Microsoft) Cc: Adrian Hunter Cc: Alexander Gordeev Cc: Alexander Shishkin Cc: Al Viro Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Baolin Wang Cc: Barry Song Cc: Ben Segall Cc: Borislav Betkov Cc: Chengming Zhou Cc: Christian Borntraeger Cc: Christian Brauner Cc: David Hildenbrand Cc: David Rientjes Cc: David S. Miller Cc: Dev Jain Cc: Dietmar Eggemann Cc: Gerald Schaefer Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ian Rogers Cc: Ingo Molnar Cc: Jan Kara Cc: Jann Horn Cc: Jason Gunthorpe Cc: Jiri Olsa Cc: John Hubbard Cc: Juri Lelli Cc: Kan Liang Cc: Kees Cook Cc: Marc Rutland Cc: Mariano Pache Cc: "Masami Hiramatsu (Google)" Cc: Mateusz Guzik Cc: Matthew Wilcox (Oracle) Cc: Mel Gorman Cc: Michal Hocko Cc: Namhyung kim Cc: Oleg Nesterov Cc: Peter Xu Cc: Peter Zijlstra Cc: Ryan Roberts Cc: Shakeel Butt Cc: Steven Rostedt Cc: Suren Baghdasaryan Cc: Sven Schnelle Cc: Thomas Gleinxer Cc: Valentin Schneider Cc: Vasily Gorbik Cc: Vincent Guittot Cc: Vlastimil Babka Cc: xu xin Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 9d224075d895..de09ae2a0de6 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1767,7 +1767,7 @@ enum { * the modes are SUID_DUMP_* defined in linux/sched/coredump.h */ #define MMF_DUMPABLE_BITS 2 -#define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1) +#define MMF_DUMPABLE_MASK (BIT(MMF_DUMPABLE_BITS) - 1) /* coredump filter bits */ #define MMF_DUMP_ANON_PRIVATE 2 #define MMF_DUMP_ANON_SHARED 3 @@ -1782,13 +1782,13 @@ enum { #define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS #define MMF_DUMP_FILTER_BITS 9 #define MMF_DUMP_FILTER_MASK \ - (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT) + ((BIT(MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT) #define MMF_DUMP_FILTER_DEFAULT \ - ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED) |\ - (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF) + (BIT(MMF_DUMP_ANON_PRIVATE) | BIT(MMF_DUMP_ANON_SHARED) | \ + BIT(MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF) #ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS -# define MMF_DUMP_MASK_DEFAULT_ELF (1 << MMF_DUMP_ELF_HEADERS) +# define MMF_DUMP_MASK_DEFAULT_ELF BIT(MMF_DUMP_ELF_HEADERS) #else # define MMF_DUMP_MASK_DEFAULT_ELF 0 #endif @@ -1808,7 +1808,7 @@ enum { #define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */ #define MMF_HUGE_ZERO_FOLIO 23 /* mm has ever used the global huge zero folio */ #define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ -#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) +#define MMF_DISABLE_THP_MASK BIT(MMF_DISABLE_THP) #define MMF_OOM_REAP_QUEUED 25 /* mm was queued for oom_reaper */ #define MMF_MULTIPROCESS 26 /* mm is shared between processes */ /* @@ -1821,16 +1821,15 @@ enum { #define MMF_HAS_PINNED 27 /* FOLL_PIN has run, never cleared */ #define MMF_HAS_MDWE 28 -#define MMF_HAS_MDWE_MASK (1 << MMF_HAS_MDWE) - +#define MMF_HAS_MDWE_MASK BIT(MMF_HAS_MDWE) #define MMF_HAS_MDWE_NO_INHERIT 29 #define MMF_VM_MERGE_ANY 30 -#define MMF_VM_MERGE_ANY_MASK (1 << MMF_VM_MERGE_ANY) +#define MMF_VM_MERGE_ANY_MASK BIT(MMF_VM_MERGE_ANY) #define MMF_TOPDOWN 31 /* mm searches top down by default */ -#define MMF_TOPDOWN_MASK (1 << MMF_TOPDOWN) +#define MMF_TOPDOWN_MASK BIT(MMF_TOPDOWN) #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK |\ -- cgit v1.2.3 From 19148a19da86f1b7d1a1b067c9f656b0f3a60fb1 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Tue, 12 Aug 2025 16:44:17 +0100 Subject: mm: update fork mm->flags initialisation to use bitmap We now need to account for flag initialisation on fork. We retain the existing logic as much as we can, but dub the existing flag mask legacy. These flags are therefore required to fit in the first 32-bits of the flags field. However, further flag propagation upon fork can be implemented in mm_init() on a per-flag basis. We ensure we clear the entire bitmap prior to setting it, and use __mm_flags_get_word() and __mm_flags_set_word() to manipulate these legacy fields efficiently. Link: https://lkml.kernel.org/r/9fb8954a7a0f0184f012a8e66f8565bcbab014ba.1755012943.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Reviewed-by: Mike Rapoport (Microsoft) Acked-by: David Hildenbrand Cc: Adrian Hunter Cc: Alexander Gordeev Cc: Alexander Shishkin Cc: Al Viro Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Baolin Wang Cc: Barry Song Cc: Ben Segall Cc: Borislav Betkov Cc: Chengming Zhou Cc: Christian Borntraeger Cc: Christian Brauner Cc: David Rientjes Cc: David S. Miller Cc: Dev Jain Cc: Dietmar Eggemann Cc: Gerald Schaefer Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ian Rogers Cc: Ingo Molnar Cc: Jan Kara Cc: Jann Horn Cc: Jason Gunthorpe Cc: Jiri Olsa Cc: John Hubbard Cc: Juri Lelli Cc: Kan Liang Cc: Kees Cook Cc: Marc Rutland Cc: Mariano Pache Cc: "Masami Hiramatsu (Google)" Cc: Mateusz Guzik Cc: Matthew Wilcox (Oracle) Cc: Mel Gorman Cc: Michal Hocko Cc: Namhyung kim Cc: Oleg Nesterov Cc: Peter Xu Cc: Peter Zijlstra Cc: Ryan Roberts Cc: Shakeel Butt Cc: Steven Rostedt Cc: Suren Baghdasaryan Cc: Sven Schnelle Cc: Thomas Gleinxer Cc: Valentin Schneider Cc: Vasily Gorbik Cc: Vincent Guittot Cc: Vlastimil Babka Cc: xu xin Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index de09ae2a0de6..69ce407b4343 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1831,16 +1831,23 @@ enum { #define MMF_TOPDOWN 31 /* mm searches top down by default */ #define MMF_TOPDOWN_MASK BIT(MMF_TOPDOWN) -#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ +#define MMF_INIT_LEGACY_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK |\ MMF_VM_MERGE_ANY_MASK | MMF_TOPDOWN_MASK) -static inline unsigned long mmf_init_flags(unsigned long flags) +/* Legacy flags must fit within 32 bits. */ +static_assert((u64)MMF_INIT_LEGACY_MASK <= (u64)UINT_MAX); + +/* + * Initialise legacy flags according to masks, propagating selected flags on + * fork. Further flag manipulation can be performed by the caller. + */ +static inline unsigned long mmf_init_legacy_flags(unsigned long flags) { if (flags & (1UL << MMF_HAS_MDWE_NO_INHERIT)) flags &= ~((1UL << MMF_HAS_MDWE) | (1UL << MMF_HAS_MDWE_NO_INHERIT)); - return flags & MMF_INIT_MASK; + return flags & MMF_INIT_LEGACY_MASK; } #endif /* _LINUX_MM_TYPES_H */ -- cgit v1.2.3 From 8166353fb8841390bf3ffe1b923a5ddeb348e4f7 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Tue, 12 Aug 2025 16:44:19 +0100 Subject: mm: replace mm->flags with bitmap entirely and set to 64 bits Now we have updated all users of mm->flags to use the bitmap accessors, repalce it with the bitmap version entirely. We are then able to move to having 64 bits of mm->flags on both 32-bit and 64-bit architectures. We also update the VMA userland tests to ensure that everything remains functional there. No functional changes intended, other than there now being 64 bits of available mm_struct flags. Link: https://lkml.kernel.org/r/e1f6654e016d36c43959764b01355736c5cbcdf8.1755012943.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Reviewed-by: Mike Rapoport (Microsoft) Acked-by: David Hildenbrand Cc: Adrian Hunter Cc: Alexander Gordeev Cc: Alexander Shishkin Cc: Al Viro Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Baolin Wang Cc: Barry Song Cc: Ben Segall Cc: Borislav Betkov Cc: Chengming Zhou Cc: Christian Borntraeger Cc: Christian Brauner Cc: David Rientjes Cc: David S. Miller Cc: Dev Jain Cc: Dietmar Eggemann Cc: Gerald Schaefer Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ian Rogers Cc: Ingo Molnar Cc: Jan Kara Cc: Jann Horn Cc: Jason Gunthorpe Cc: Jiri Olsa Cc: John Hubbard Cc: Juri Lelli Cc: Kan Liang Cc: Kees Cook Cc: Marc Rutland Cc: Mariano Pache Cc: "Masami Hiramatsu (Google)" Cc: Mateusz Guzik Cc: Matthew Wilcox (Oracle) Cc: Mel Gorman Cc: Michal Hocko Cc: Namhyung kim Cc: Oleg Nesterov Cc: Peter Xu Cc: Peter Zijlstra Cc: Ryan Roberts Cc: Shakeel Butt Cc: Steven Rostedt Cc: Suren Baghdasaryan Cc: Sven Schnelle Cc: Thomas Gleinxer Cc: Valentin Schneider Cc: Vasily Gorbik Cc: Vincent Guittot Cc: Vlastimil Babka Cc: xu xin Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/mm.h | 12 ++++++------ include/linux/mm_types.h | 14 +++++--------- 2 files changed, 11 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 34311ebe62cc..b61e2d4858cf 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -724,32 +724,32 @@ static inline void assert_fault_locked(struct vm_fault *vmf) static inline bool mm_flags_test(int flag, const struct mm_struct *mm) { - return test_bit(flag, ACCESS_PRIVATE(&mm->_flags, __mm_flags)); + return test_bit(flag, ACCESS_PRIVATE(&mm->flags, __mm_flags)); } static inline bool mm_flags_test_and_set(int flag, struct mm_struct *mm) { - return test_and_set_bit(flag, ACCESS_PRIVATE(&mm->_flags, __mm_flags)); + return test_and_set_bit(flag, ACCESS_PRIVATE(&mm->flags, __mm_flags)); } static inline bool mm_flags_test_and_clear(int flag, struct mm_struct *mm) { - return test_and_clear_bit(flag, ACCESS_PRIVATE(&mm->_flags, __mm_flags)); + return test_and_clear_bit(flag, ACCESS_PRIVATE(&mm->flags, __mm_flags)); } static inline void mm_flags_set(int flag, struct mm_struct *mm) { - set_bit(flag, ACCESS_PRIVATE(&mm->_flags, __mm_flags)); + set_bit(flag, ACCESS_PRIVATE(&mm->flags, __mm_flags)); } static inline void mm_flags_clear(int flag, struct mm_struct *mm) { - clear_bit(flag, ACCESS_PRIVATE(&mm->_flags, __mm_flags)); + clear_bit(flag, ACCESS_PRIVATE(&mm->flags, __mm_flags)); } static inline void mm_flags_clear_all(struct mm_struct *mm) { - bitmap_zero(ACCESS_PRIVATE(&mm->_flags, __mm_flags), NUM_MM_FLAG_BITS); + bitmap_zero(ACCESS_PRIVATE(&mm->flags, __mm_flags), NUM_MM_FLAG_BITS); } extern const struct vm_operations_struct vma_dummy_vm_ops; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 69ce407b4343..05475b5fd516 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -932,7 +932,7 @@ struct mm_cid { * Opaque type representing current mm_struct flag state. Must be accessed via * mm_flags_xxx() helper functions. */ -#define NUM_MM_FLAG_BITS BITS_PER_LONG +#define NUM_MM_FLAG_BITS (64) typedef struct { DECLARE_BITMAP(__mm_flags, NUM_MM_FLAG_BITS); } __private mm_flags_t; @@ -1119,11 +1119,7 @@ struct mm_struct { /* Architecture-specific MM context */ mm_context_t context; - /* Temporary union while we convert users to mm_flags_t. */ - union { - unsigned long flags; /* Must use atomic bitops to access */ - mm_flags_t _flags; /* Must use mm_flags_* helpers to access */ - }; + mm_flags_t flags; /* Must use mm_flags_* hlpers to access */ #ifdef CONFIG_AIO spinlock_t ioctx_lock; @@ -1236,7 +1232,7 @@ struct mm_struct { /* Set the first system word of mm flags, non-atomically. */ static inline void __mm_flags_set_word(struct mm_struct *mm, unsigned long value) { - unsigned long *bitmap = ACCESS_PRIVATE(&mm->_flags, __mm_flags); + unsigned long *bitmap = ACCESS_PRIVATE(&mm->flags, __mm_flags); bitmap_copy(bitmap, &value, BITS_PER_LONG); } @@ -1244,7 +1240,7 @@ static inline void __mm_flags_set_word(struct mm_struct *mm, unsigned long value /* Obtain a read-only view of the bitmap. */ static inline const unsigned long *__mm_flags_get_bitmap(const struct mm_struct *mm) { - return (const unsigned long *)ACCESS_PRIVATE(&mm->_flags, __mm_flags); + return (const unsigned long *)ACCESS_PRIVATE(&mm->flags, __mm_flags); } /* Read the first system word of mm flags, non-atomically. */ @@ -1262,7 +1258,7 @@ static inline unsigned long __mm_flags_get_word(const struct mm_struct *mm) static inline void __mm_flags_set_mask_bits_word(struct mm_struct *mm, unsigned long mask, unsigned long bits) { - unsigned long *bitmap = ACCESS_PRIVATE(&mm->_flags, __mm_flags); + unsigned long *bitmap = ACCESS_PRIVATE(&mm->flags, __mm_flags); set_mask_bits(bitmap, mask, bits); } -- cgit v1.2.3 From 0f9ab62a6e44ef51ea3e7f1c552b447cf4eb20ae Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 12 Aug 2025 10:30:08 +0200 Subject: mempool: rename struct mempool_s to struct mempool Drop the pointless _s prefix and align to the usual struct naming to prepare for actually using the struct instead of the typedef so that random headers don't need to include mempool.h for just having a pointer to the mempool. Link: https://lkml.kernel.org/r/20250812083105.371295-1-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Harry Yoo Reviewed-by: Vlastimil Babka Cc: Christoph Lameter (Ampere) Cc: David Rientjes Cc: Roman Gushchin Signed-off-by: Andrew Morton --- include/linux/blkdev.h | 2 +- include/linux/mempool.h | 2 +- include/linux/netfs.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index fe1797bbec42..28ceaeffc0c9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -199,7 +199,7 @@ struct gendisk { unsigned int zone_wplugs_hash_bits; atomic_t nr_zone_wplugs; spinlock_t zone_wplugs_lock; - struct mempool_s *zone_wplugs_pool; + struct mempool *zone_wplugs_pool; struct hlist_head *zone_wplugs_hash; struct workqueue_struct *zone_wplugs_wq; #endif /* CONFIG_BLK_DEV_ZONED */ diff --git a/include/linux/mempool.h b/include/linux/mempool.h index 7b151441341b..34941a4b9026 100644 --- a/include/linux/mempool.h +++ b/include/linux/mempool.h @@ -15,7 +15,7 @@ struct kmem_cache; typedef void * (mempool_alloc_t)(gfp_t gfp_mask, void *pool_data); typedef void (mempool_free_t)(void *element, void *pool_data); -typedef struct mempool_s { +typedef struct mempool { spinlock_t lock; int min_nr; /* nr of elements at *elements */ int curr_nr; /* Current nr of elements at *elements */ diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 98c96d649bf9..72ee7d210a74 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -21,7 +21,7 @@ #include enum netfs_sreq_ref_trace; -typedef struct mempool_s mempool_t; +typedef struct mempool mempool_t; struct folio_queue; /** -- cgit v1.2.3 From 85b8cec15034e07500a6e5b8a5aea8185a3d775a Mon Sep 17 00:00:00 2001 From: Chris Li Date: Tue, 12 Aug 2025 00:10:59 -0700 Subject: mm: swap.h: Remove deleted field from comments The comment for struct swap_info_struct.lock incorrectly mentions fields that have already been deleted from the structure. Update the comments to accurately reflect the current struct swap_info_struct. There is no functional change. Link: https://lkml.kernel.org/r/20250812-swap-scan-list-v3-2-6d73504d267b@kernel.org Signed-off-by: Chris Li Reviewed-by: Kairui Song Acked-by: Nhat Pham Reviewed-by: Barry Song Cc: Baoquan He Cc: "Huang, Ying" Cc: Kemeng Shi Signed-off-by: Andrew Morton --- include/linux/swap.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index a060d102e0d1..c2da85cb7fe7 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -320,11 +320,8 @@ struct swap_info_struct { struct completion comp; /* seldom referenced */ spinlock_t lock; /* * protect map scan related fields like - * swap_map, lowest_bit, highest_bit, - * inuse_pages, cluster_next, - * cluster_nr, lowest_alloc, - * highest_alloc, free/discard cluster - * list. other fields are only changed + * swap_map, inuse_pages and all cluster + * lists. other fields are only changed * at swapon/swapoff, so are protected * by swap_lock. changing flags need * hold this lock and swap_lock. If -- cgit v1.2.3 From ec45783fce52f358c9e8680d2837bc0d477f16ad Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Aug 2025 16:57:55 +0200 Subject: memcg: optimize exit to user space memcg uses TIF_NOTIFY_RESUME to handle reclaiming on exit to user space. TIF_NOTIFY_RESUME is a multiplexing TIF bit, which is utilized by other entities as well. This results in a unconditional mem_cgroup_handle_over_high() call for every invocation of resume_user_mode_work(), which is a pointless exercise as most of the time there is no reclaim work to do. Especially since RSEQ is used by glibc, TIF_NOTIFY_RESUME is raised quite frequently and the empty calls show up in exit path profiling. Optimize this by doing a quick check of the reclaim condition before invoking it. [akpm@linux-foundation.org: remove now-unneeded test of memcg_nr_pages_over_high==0, per Shakeel] Link: https://lkml.kernel.org/r/87tt2b6zgs.ffs@tglx Signed-off-by: Thomas Gleixner Reviewed-by: Roman Gushchin Acked-by: Johannes Weiner Acked-by: Shakeel Butt Cc: Michal Hocko Cc: Muchun Song Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 785173aa0739..9fa3afc90dd5 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -900,7 +900,13 @@ unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec, return READ_ONCE(mz->lru_zone_size[zone_idx][lru]); } -void mem_cgroup_handle_over_high(gfp_t gfp_mask); +void __mem_cgroup_handle_over_high(gfp_t gfp_mask); + +static inline void mem_cgroup_handle_over_high(gfp_t gfp_mask) +{ + if (unlikely(current->memcg_nr_pages_over_high)) + __mem_cgroup_handle_over_high(gfp_mask); +} unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg); -- cgit v1.2.3 From 9dc21bbd62edeae6f63e6f25e1edb7167452457b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 15 Aug 2025 14:54:53 +0100 Subject: prctl: extend PR_SET_THP_DISABLE to optionally exclude VM_HUGEPAGE Patch series "prctl: extend PR_SET_THP_DISABLE to only provide THPs when advised", v5. This will allow individual processes to opt-out of THP = "always" into THP = "madvise", without affecting other workloads on the system. This has been extensively discussed on the mailing list and has been summarized very well by David in the first patch which also includes the links to alternatives, please refer to the first patch commit message for the motivation for this series. Patch 1 adds the PR_THP_DISABLE_EXCEPT_ADVISED flag to implement this, along with the MMF changes. Patch 2 is a cleanup patch for tva_flags that will allow the forced collapse case to be transmitted to vma_thp_disabled (which is done in patch 3). Patch 4 adds documentation for PR_SET_THP_DISABLE/PR_GET_THP_DISABLE. Patches 6-7 implement the selftests for PR_SET_THP_DISABLE for completely disabling THPs (old behaviour) and only enabling it at advise (PR_THP_DISABLE_EXCEPT_ADVISED). This patch (of 7): People want to make use of more THPs, for example, moving from the "never" system policy to "madvise", or from "madvise" to "always". While this is great news for every THP desperately waiting to get allocated out there, apparently there are some workloads that require a bit of care during that transition: individual processes may need to opt-out from this behavior for various reasons, and this should be permitted without needing to make all other workloads on the system similarly opt-out. The following scenarios are imaginable: (1) Switch from "none" system policy to "madvise"/"always", but keep THPs disabled for selected workloads. (2) Stay at "none" system policy, but enable THPs for selected workloads, making only these workloads use the "madvise" or "always" policy. (3) Switch from "madvise" system policy to "always", but keep the "madvise" policy for selected workloads: allocate THPs only when advised. (4) Stay at "madvise" system policy, but enable THPs even when not advised for selected workloads -- "always" policy. Once can emulate (2) through (1), by setting the system policy to "madvise"/"always" while disabling THPs for all processes that don't want THPs. It requires configuring all workloads, but that is a user-space problem to sort out. (4) can be emulated through (3) in a similar way. Back when (1) was relevant in the past, as people started enabling THPs, we added PR_SET_THP_DISABLE, so relevant workloads that were not ready yet (i.e., used by Redis) were able to just disable THPs completely. Redis still implements the option to use this interface to disable THPs completely. With PR_SET_THP_DISABLE, we added a way to force-disable THPs for a workload -- a process, including fork+exec'ed process hierarchy. That essentially made us support (1): simply disable THPs for all workloads that are not ready for THPs yet, while still enabling THPs system-wide. The quest for handling (3) and (4) started, but current approaches (completely new prctl, options to set other policies per process, alternatives to prctl -- mctrl, cgroup handling) don't look particularly promising. Likely, the future will use bpf or something similar to implement better policies, in particular to also make better decisions about THP sizes to use, but this will certainly take a while as that work just started. Long story short: a simple enable/disable is not really suitable for the future, so we're not willing to add completely new toggles. While we could emulate (3)+(4) through (1)+(2) by simply disabling THPs completely for these processes, this is a step backwards, because these processes can no longer allocate THPs in regions where THPs were explicitly advised: regions flagged as VM_HUGEPAGE. Apparently, that imposes a problem for relevant workloads, because "not THPs" is certainly worse than "THPs only when advised". Could we simply relax PR_SET_THP_DISABLE, to "disable THPs unless not explicitly advised by the app through MAD_HUGEPAGE"? *maybe*, but this would change the documented semantics quite a bit, and the versatility to use it for debugging purposes, so I am not 100% sure that is what we want -- although it would certainly be much easier. So instead, as an easy way forward for (3) and (4), add an option to make PR_SET_THP_DISABLE disable *less* THPs for a process. In essence, this patch: (A) Adds PR_THP_DISABLE_EXCEPT_ADVISED, to be used as a flag in arg3 of prctl(PR_SET_THP_DISABLE) when disabling THPs (arg2 != 0). prctl(PR_SET_THP_DISABLE, 1, PR_THP_DISABLE_EXCEPT_ADVISED). (B) Makes prctl(PR_GET_THP_DISABLE) return 3 if PR_THP_DISABLE_EXCEPT_ADVISED was set while disabling. Previously, it would return 1 if THPs were disabled completely. Now it returns the set flags as well: 3 if PR_THP_DISABLE_EXCEPT_ADVISED was set. (C) Renames MMF_DISABLE_THP to MMF_DISABLE_THP_COMPLETELY, to express the semantics clearly. Fortunately, there are only two instances outside of prctl() code. (D) Adds MMF_DISABLE_THP_EXCEPT_ADVISED to express "no THP except for VMAs with VM_HUGEPAGE" -- essentially "thp=madvise" behavior Fortunately, we only have to extend vma_thp_disabled(). (E) Indicates "THP_enabled: 0" in /proc/pid/status only if THPs are disabled completely Only indicating that THPs are disabled when they are really disabled completely, not only partially. For now, we don't add another interface to obtained whether THPs are disabled partially (PR_THP_DISABLE_EXCEPT_ADVISED was set). If ever required, we could add a new entry. The documented semantics in the man page for PR_SET_THP_DISABLE "is inherited by a child created via fork(2) and is preserved across execve(2)" is maintained. This behavior, for example, allows for disabling THPs for a workload through the launching process (e.g., systemd where we fork() a helper process to then exec()). For now, MADV_COLLAPSE will *fail* in regions without VM_HUGEPAGE and VM_NOHUGEPAGE. As MADV_COLLAPSE is a clear advise that user space thinks a THP is a good idea, we'll enable that separately next (requiring a bit of cleanup first). There is currently not way to prevent that a process will not issue PR_SET_THP_DISABLE itself to re-enable THP. There are not really known users for re-enabling it, and it's against the purpose of the original interface. So if ever required, we could investigate just forbidding to re-enable them, or make this somehow configurable. Link: https://lkml.kernel.org/r/20250815135549.130506-1-usamaarif642@gmail.com Link: https://lkml.kernel.org/r/20250815135549.130506-2-usamaarif642@gmail.com Acked-by: Zi Yan Acked-by: Usama Arif Tested-by: Usama Arif Signed-off-by: David Hildenbrand Reviewed-by: Lorenzo Stoakes Signed-off-by: Usama Arif Cc: Arnd Bergmann Cc: Baolin Wang Cc: Barry Song Cc: Dev Jain Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Liam Howlett Cc: Mariano Pache Cc: Michal Hocko Cc: Mike Rapoport Cc: Rik van Riel Cc: Ryan Roberts Cc: SeongJae Park Cc: Shakeel Butt Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Yafang Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 20 +++++++++++++++----- include/linux/mm_types.h | 13 +++++-------- include/uapi/linux/prctl.h | 10 ++++++++++ 3 files changed, 30 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 84b7eebe0d68..22b8b067b295 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -318,16 +318,26 @@ struct thpsize { (transparent_hugepage_flags & \ (1<vm_mm)) + return true; /* - * Explicitly disabled through madvise or prctl, or some - * architectures may disable THP for some mappings, for - * example, s390 kvm. + * Are THPs disabled only for VMAs where we didn't get an explicit + * advise to use them? */ - return (vm_flags & VM_NOHUGEPAGE) || - mm_flags_test(MMF_DISABLE_THP, vma->vm_mm); + if (vm_flags & VM_HUGEPAGE) + return false; + return mm_flags_test(MMF_DISABLE_THP_EXCEPT_ADVISED, vma->vm_mm); } static inline bool thp_disabled_by_hw(void) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 05475b5fd516..d247da2fdb52 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1792,19 +1792,16 @@ enum { #define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ #define MMF_VM_HUGEPAGE 17 /* set when mm is available for khugepaged */ -/* - * This one-shot flag is dropped due to necessity of changing exe once again - * on NFS restore - */ -//#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ +#define MMF_HUGE_ZERO_FOLIO 18 /* mm has ever used the global huge zero folio */ #define MMF_HAS_UPROBES 19 /* has uprobes */ #define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ #define MMF_OOM_SKIP 21 /* mm is of no interest for the OOM killer */ #define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */ -#define MMF_HUGE_ZERO_FOLIO 23 /* mm has ever used the global huge zero folio */ -#define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ -#define MMF_DISABLE_THP_MASK BIT(MMF_DISABLE_THP) +#define MMF_DISABLE_THP_EXCEPT_ADVISED 23 /* no THP except when advised (e.g., VM_HUGEPAGE) */ +#define MMF_DISABLE_THP_COMPLETELY 24 /* no THP for all VMAs */ +#define MMF_DISABLE_THP_MASK (BIT(MMF_DISABLE_THP_COMPLETELY) | \ + BIT(MMF_DISABLE_THP_EXCEPT_ADVISED)) #define MMF_OOM_REAP_QUEUED 25 /* mm was queued for oom_reaper */ #define MMF_MULTIPROCESS 26 /* mm is shared between processes */ /* diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index ed3aed264aeb..150b6deebfb1 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -177,7 +177,17 @@ struct prctl_mm_map { #define PR_GET_TID_ADDRESS 40 +/* + * Flags for PR_SET_THP_DISABLE are only applicable when disabling. Bit 0 + * is reserved, so PR_GET_THP_DISABLE can return "1 | flags", to effectively + * return "1" when no flags were specified for PR_SET_THP_DISABLE. + */ #define PR_SET_THP_DISABLE 41 +/* + * Don't disable THPs when explicitly advised (e.g., MADV_HUGEPAGE / + * VM_HUGEPAGE). + */ +# define PR_THP_DISABLE_EXCEPT_ADVISED (1 << 1) #define PR_GET_THP_DISABLE 42 /* -- cgit v1.2.3 From 1f1c061089dcd274befa0c76fb9f6e253a8368c0 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 15 Aug 2025 14:54:54 +0100 Subject: mm/huge_memory: convert "tva_flags" to "enum tva_type" When determining which THP orders are eligible for a VMA mapping, we have previously specified tva_flags, however it turns out it is really not necessary to treat these as flags. Rather, we distinguish between distinct modes. The only case where we previously combined flags was with TVA_ENFORCE_SYSFS, but we can avoid this by observing that this is the default, except for MADV_COLLAPSE or an edge cases in collapse_pte_mapped_thp() and hugepage_vma_revalidate(), and adding a mode specifically for this case - TVA_FORCED_COLLAPSE. We have: * smaps handling for showing "THPeligible" * Pagefault handling * khugepaged handling * Forced collapse handling: primarily MADV_COLLAPSE, but also for an edge case in collapse_pte_mapped_thp() Disregarding the edge cases, we only want to ignore sysfs settings only when we are forcing a collapse through MADV_COLLAPSE, otherwise we want to enforce it, hence this patch does the following flag to enum conversions: * TVA_SMAPS | TVA_ENFORCE_SYSFS -> TVA_SMAPS * TVA_IN_PF | TVA_ENFORCE_SYSFS -> TVA_PAGEFAULT * TVA_ENFORCE_SYSFS -> TVA_KHUGEPAGED * 0 -> TVA_FORCED_COLLAPSE With this change, we immediately know if we are in the forced collapse case, which will be valuable next. Link: https://lkml.kernel.org/r/20250815135549.130506-3-usamaarif642@gmail.com Signed-off-by: David Hildenbrand Signed-off-by: Usama Arif Acked-by: Usama Arif Reviewed-by: Baolin Wang Reviewed-by: Lorenzo Stoakes Reviewed-by: Zi Yan Cc: Arnd Bergmann Cc: Barry Song Cc: Dev Jain Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Liam Howlett Cc: Mariano Pache Cc: Michal Hocko Cc: Mike Rapoport Cc: Rik van Riel Cc: Ryan Roberts Cc: SeongJae Park Cc: Shakeel Butt Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Yafang Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 22b8b067b295..92ea0b9771fa 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -94,12 +94,15 @@ extern struct kobj_attribute thpsize_shmem_enabled_attr; #define THP_ORDERS_ALL \ (THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_SPECIAL | THP_ORDERS_ALL_FILE_DEFAULT) -#define TVA_SMAPS (1 << 0) /* Will be used for procfs */ -#define TVA_IN_PF (1 << 1) /* Page fault handler */ -#define TVA_ENFORCE_SYSFS (1 << 2) /* Obey sysfs configuration */ +enum tva_type { + TVA_SMAPS, /* Exposing "THPeligible:" in smaps. */ + TVA_PAGEFAULT, /* Serving a page fault. */ + TVA_KHUGEPAGED, /* Khugepaged collapse. */ + TVA_FORCED_COLLAPSE, /* Forced collapse (e.g. MADV_COLLAPSE). */ +}; -#define thp_vma_allowable_order(vma, vm_flags, tva_flags, order) \ - (!!thp_vma_allowable_orders(vma, vm_flags, tva_flags, BIT(order))) +#define thp_vma_allowable_order(vma, vm_flags, type, order) \ + (!!thp_vma_allowable_orders(vma, vm_flags, type, BIT(order))) #define split_folio(f) split_folio_to_list(f, NULL) @@ -264,14 +267,14 @@ static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma, unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, vm_flags_t vm_flags, - unsigned long tva_flags, + enum tva_type type, unsigned long orders); /** * thp_vma_allowable_orders - determine hugepage orders that are allowed for vma * @vma: the vm area to check * @vm_flags: use these vm_flags instead of vma->vm_flags - * @tva_flags: Which TVA flags to honour + * @type: TVA type * @orders: bitfield of all orders to consider * * Calculates the intersection of the requested hugepage orders and the allowed @@ -285,11 +288,14 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, static inline unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma, vm_flags_t vm_flags, - unsigned long tva_flags, + enum tva_type type, unsigned long orders) { - /* Optimization to check if required orders are enabled early. */ - if ((tva_flags & TVA_ENFORCE_SYSFS) && vma_is_anonymous(vma)) { + /* + * Optimization to check if required orders are enabled early. Only + * forced collapse ignores sysfs configs. + */ + if (type != TVA_FORCED_COLLAPSE && vma_is_anonymous(vma)) { unsigned long mask = READ_ONCE(huge_anon_orders_always); if (vm_flags & VM_HUGEPAGE) @@ -303,7 +309,7 @@ unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma, return 0; } - return __thp_vma_allowable_orders(vma, vm_flags, tva_flags, orders); + return __thp_vma_allowable_orders(vma, vm_flags, type, orders); } struct thpsize { @@ -547,7 +553,7 @@ static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma, static inline unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma, vm_flags_t vm_flags, - unsigned long tva_flags, + enum tva_type type, unsigned long orders) { return 0; -- cgit v1.2.3 From 8cdc4d27019356b0304308eb799484c899b62a87 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 15 Aug 2025 14:54:55 +0100 Subject: mm/huge_memory: respect MADV_COLLAPSE with PR_THP_DISABLE_EXCEPT_ADVISED Let's allow for making MADV_COLLAPSE succeed on areas that neither have VM_HUGEPAGE nor VM_NOHUGEPAGE when we have THP disabled unless explicitly advised (PR_THP_DISABLE_EXCEPT_ADVISED). MADV_COLLAPSE is a clear advice that we want to collapse. Note that we still respect the VM_NOHUGEPAGE flag, just like MADV_COLLAPSE always does. So consequently, MADV_COLLAPSE is now only refused on VM_NOHUGEPAGE with PR_THP_DISABLE_EXCEPT_ADVISED, including for shmem. Link: https://lkml.kernel.org/r/20250815135549.130506-4-usamaarif642@gmail.com Co-developed-by: Usama Arif Signed-off-by: Usama Arif Signed-off-by: David Hildenbrand Reviewed-by: Baolin Wang Reviewed-by: Lorenzo Stoakes Reviewed-by: Zi Yan Cc: Arnd Bergmann Cc: Barry Song Cc: Dev Jain Cc: Jann Horn Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Liam Howlett Cc: Mariano Pache Cc: Michal Hocko Cc: Mike Rapoport Cc: Rik van Riel Cc: Ryan Roberts Cc: SeongJae Park Cc: Shakeel Butt Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Yafang Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 8 +++++++- include/uapi/linux/prctl.h | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 92ea0b9771fa..1ac0d06fb3c1 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -329,7 +329,7 @@ struct thpsize { * through madvise or prctl. */ static inline bool vma_thp_disabled(struct vm_area_struct *vma, - vm_flags_t vm_flags) + vm_flags_t vm_flags, bool forced_collapse) { /* Are THPs disabled for this VMA? */ if (vm_flags & VM_NOHUGEPAGE) @@ -343,6 +343,12 @@ static inline bool vma_thp_disabled(struct vm_area_struct *vma, */ if (vm_flags & VM_HUGEPAGE) return false; + /* + * Forcing a collapse (e.g., madv_collapse), is a clear advice to + * use THPs. + */ + if (forced_collapse) + return false; return mm_flags_test(MMF_DISABLE_THP_EXCEPT_ADVISED, vma->vm_mm); } diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 150b6deebfb1..51c4e8c82b1e 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -185,7 +185,7 @@ struct prctl_mm_map { #define PR_SET_THP_DISABLE 41 /* * Don't disable THPs when explicitly advised (e.g., MADV_HUGEPAGE / - * VM_HUGEPAGE). + * VM_HUGEPAGE, MADV_COLLAPSE). */ # define PR_THP_DISABLE_EXCEPT_ADVISED (1 << 1) #define PR_GET_THP_DISABLE 42 -- cgit v1.2.3 From 53fbef56e07df822ea3029109ffca25328c2e5ac Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 5 Aug 2025 18:22:51 +0100 Subject: mm: introduce memdesc_flags_t Patch series "Add and use memdesc_flags_t". At some point struct page will be separated from struct slab and struct folio. This is a step towards that by introducing a type for the 'flags' word of all three structures. This gives us a certain amount of type safety by establishing that some of these unsigned longs are different from other unsigned longs in that they contain things like node ID, section number and zone number in the upper bits. That lets us have functions that can be easily called by anyone who has a slab, folio or page (but not easily by anyone else) to get the node or zone. There's going to be some unusual merge problems with this as some odd bits of the kernel decide they want to print out the flags value or something similar by writing page->flags and now they'll need to write page->flags.f instead. That's most of the churn here. Maybe we should be removing these things from the debug output? This patch (of 11): Wrap the unsigned long flags in a typedef. In upcoming patches, this will provide a strong hint that you can't just pass a random unsigned long to functions which take this as an argument. [willy@infradead.org: s/flags/flags.f/ in several architectures] Link: https://lkml.kernel.org/r/aKMgPRLD-WnkPxYm@casper.infradead.org [nicola.vetrini@gmail.com: mips: fix compilation error] Link: https://lore.kernel.org/lkml/CA+G9fYvkpmqGr6wjBNHY=dRp71PLCoi2341JxOudi60yqaeUdg@mail.gmail.com/ Link: https://lkml.kernel.org/r/20250825214245.1838158-1-nicola.vetrini@gmail.com Link: https://lkml.kernel.org/r/20250805172307.1302730-1-willy@infradead.org Link: https://lkml.kernel.org/r/20250805172307.1302730-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Zi Yan Cc: Shakeel Butt Signed-off-by: Andrew Morton --- include/linux/mm.h | 32 ++++++++++++++++---------------- include/linux/mm_inline.h | 12 ++++++------ include/linux/mm_types.h | 8 ++++++-- include/linux/mmzone.h | 2 +- include/linux/page-flags.h | 40 ++++++++++++++++++++-------------------- include/linux/pgalloc_tag.h | 7 ++++--- include/trace/events/page_ref.h | 4 ++-- 7 files changed, 55 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index b61e2d4858cf..da562f23f50c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1024,7 +1024,7 @@ static inline unsigned int compound_order(struct page *page) { struct folio *folio = (struct folio *)page; - if (!test_bit(PG_head, &folio->flags)) + if (!test_bit(PG_head, &folio->flags.f)) return 0; return folio_large_order(folio); } @@ -1554,7 +1554,7 @@ static inline bool is_nommu_shared_mapping(vm_flags_t flags) */ static inline int page_zone_id(struct page *page) { - return (page->flags >> ZONEID_PGSHIFT) & ZONEID_MASK; + return (page->flags.f >> ZONEID_PGSHIFT) & ZONEID_MASK; } #ifdef NODE_NOT_IN_PAGE_FLAGS @@ -1562,7 +1562,7 @@ int page_to_nid(const struct page *page); #else static inline int page_to_nid(const struct page *page) { - return (PF_POISONED_CHECK(page)->flags >> NODES_PGSHIFT) & NODES_MASK; + return (PF_POISONED_CHECK(page)->flags.f >> NODES_PGSHIFT) & NODES_MASK; } #endif @@ -1637,14 +1637,14 @@ static inline void page_cpupid_reset_last(struct page *page) #else static inline int folio_last_cpupid(struct folio *folio) { - return (folio->flags >> LAST_CPUPID_PGSHIFT) & LAST_CPUPID_MASK; + return (folio->flags.f >> LAST_CPUPID_PGSHIFT) & LAST_CPUPID_MASK; } int folio_xchg_last_cpupid(struct folio *folio, int cpupid); static inline void page_cpupid_reset_last(struct page *page) { - page->flags |= LAST_CPUPID_MASK << LAST_CPUPID_PGSHIFT; + page->flags.f |= LAST_CPUPID_MASK << LAST_CPUPID_PGSHIFT; } #endif /* LAST_CPUPID_NOT_IN_PAGE_FLAGS */ @@ -1740,7 +1740,7 @@ static inline u8 page_kasan_tag(const struct page *page) u8 tag = KASAN_TAG_KERNEL; if (kasan_enabled()) { - tag = (page->flags >> KASAN_TAG_PGSHIFT) & KASAN_TAG_MASK; + tag = (page->flags.f >> KASAN_TAG_PGSHIFT) & KASAN_TAG_MASK; tag ^= 0xff; } @@ -1755,12 +1755,12 @@ static inline void page_kasan_tag_set(struct page *page, u8 tag) return; tag ^= 0xff; - old_flags = READ_ONCE(page->flags); + old_flags = READ_ONCE(page->flags.f); do { flags = old_flags; flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT); flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT; - } while (unlikely(!try_cmpxchg(&page->flags, &old_flags, flags))); + } while (unlikely(!try_cmpxchg(&page->flags.f, &old_flags, flags))); } static inline void page_kasan_tag_reset(struct page *page) @@ -1804,13 +1804,13 @@ static inline pg_data_t *folio_pgdat(const struct folio *folio) #ifdef SECTION_IN_PAGE_FLAGS static inline void set_page_section(struct page *page, unsigned long section) { - page->flags &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT); - page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT; + page->flags.f &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT); + page->flags.f |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT; } static inline unsigned long page_to_section(const struct page *page) { - return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK; + return (page->flags.f >> SECTIONS_PGSHIFT) & SECTIONS_MASK; } #endif @@ -2015,14 +2015,14 @@ static inline bool folio_is_longterm_pinnable(struct folio *folio) static inline void set_page_zone(struct page *page, enum zone_type zone) { - page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT); - page->flags |= (zone & ZONES_MASK) << ZONES_PGSHIFT; + page->flags.f &= ~(ZONES_MASK << ZONES_PGSHIFT); + page->flags.f |= (zone & ZONES_MASK) << ZONES_PGSHIFT; } static inline void set_page_node(struct page *page, unsigned long node) { - page->flags &= ~(NODES_MASK << NODES_PGSHIFT); - page->flags |= (node & NODES_MASK) << NODES_PGSHIFT; + page->flags.f &= ~(NODES_MASK << NODES_PGSHIFT); + page->flags.f |= (node & NODES_MASK) << NODES_PGSHIFT; } static inline void set_page_links(struct page *page, enum zone_type zone, @@ -2064,7 +2064,7 @@ static inline long compound_nr(struct page *page) { struct folio *folio = (struct folio *)page; - if (!test_bit(PG_head, &folio->flags)) + if (!test_bit(PG_head, &folio->flags.f)) return 1; return folio_large_nr_pages(folio); } diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 89b518ff097e..150302b4a905 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -143,7 +143,7 @@ static inline int lru_tier_from_refs(int refs, bool workingset) static inline int folio_lru_refs(struct folio *folio) { - unsigned long flags = READ_ONCE(folio->flags); + unsigned long flags = READ_ONCE(folio->flags.f); if (!(flags & BIT(PG_referenced))) return 0; @@ -156,7 +156,7 @@ static inline int folio_lru_refs(struct folio *folio) static inline int folio_lru_gen(struct folio *folio) { - unsigned long flags = READ_ONCE(folio->flags); + unsigned long flags = READ_ONCE(folio->flags.f); return ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; } @@ -268,7 +268,7 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, gen = lru_gen_from_seq(seq); flags = (gen + 1UL) << LRU_GEN_PGOFF; /* see the comment on MIN_NR_GENS about PG_active */ - set_mask_bits(&folio->flags, LRU_GEN_MASK | BIT(PG_active), flags); + set_mask_bits(&folio->flags.f, LRU_GEN_MASK | BIT(PG_active), flags); lru_gen_update_size(lruvec, folio, -1, gen); /* for folio_rotate_reclaimable() */ @@ -293,7 +293,7 @@ static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, /* for folio_migrate_flags() */ flags = !reclaiming && lru_gen_is_active(lruvec, gen) ? BIT(PG_active) : 0; - flags = set_mask_bits(&folio->flags, LRU_GEN_MASK, flags); + flags = set_mask_bits(&folio->flags.f, LRU_GEN_MASK, flags); gen = ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; lru_gen_update_size(lruvec, folio, gen, -1); @@ -304,9 +304,9 @@ static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, static inline void folio_migrate_refs(struct folio *new, struct folio *old) { - unsigned long refs = READ_ONCE(old->flags) & LRU_REFS_MASK; + unsigned long refs = READ_ONCE(old->flags.f) & LRU_REFS_MASK; - set_mask_bits(&new->flags, LRU_REFS_MASK, refs); + set_mask_bits(&new->flags.f, LRU_REFS_MASK, refs); } #else /* !CONFIG_LRU_GEN */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index d247da2fdb52..d934a3a5b443 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -34,6 +34,10 @@ struct address_space; struct futex_private_hash; struct mem_cgroup; +typedef struct { + unsigned long f; +} memdesc_flags_t; + /* * Each physical page in the system has a struct page associated with * it to keep track of whatever it is we are using the page for at the @@ -72,7 +76,7 @@ struct mem_cgroup; #endif struct page { - unsigned long flags; /* Atomic flags, some possibly + memdesc_flags_t flags; /* Atomic flags, some possibly * updated asynchronously */ /* * Five words (20/40 bytes) are available in this union. @@ -383,7 +387,7 @@ struct folio { union { struct { /* public: */ - unsigned long flags; + memdesc_flags_t flags; union { struct list_head lru; /* private: avoid cluttering the output */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 9d3ea9085556..990560cd99ee 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1186,7 +1186,7 @@ static inline bool zone_is_empty(struct zone *zone) static inline enum zone_type page_zonenum(const struct page *page) { ASSERT_EXCLUSIVE_BITS(page->flags, ZONES_MASK << ZONES_PGSHIFT); - return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; + return (page->flags.f >> ZONES_PGSHIFT) & ZONES_MASK; } static inline enum zone_type folio_zonenum(const struct folio *folio) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 8d3fa3a91ce4..d53a86e68c89 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -217,7 +217,7 @@ static __always_inline const struct page *page_fixed_fake_head(const struct page * cold cacheline in some cases. */ if (IS_ALIGNED((unsigned long)page, PAGE_SIZE) && - test_bit(PG_head, &page->flags)) { + test_bit(PG_head, &page->flags.f)) { /* * We can safely access the field of the @page[1] with PG_head * because the @page is a compound page composed with at least @@ -325,14 +325,14 @@ static __always_inline int PageTail(const struct page *page) static __always_inline int PageCompound(const struct page *page) { - return test_bit(PG_head, &page->flags) || + return test_bit(PG_head, &page->flags.f) || READ_ONCE(page->compound_head) & 1; } #define PAGE_POISON_PATTERN -1l static inline int PagePoisoned(const struct page *page) { - return READ_ONCE(page->flags) == PAGE_POISON_PATTERN; + return READ_ONCE(page->flags.f) == PAGE_POISON_PATTERN; } #ifdef CONFIG_DEBUG_VM @@ -349,8 +349,8 @@ static const unsigned long *const_folio_flags(const struct folio *folio, const struct page *page = &folio->page; VM_BUG_ON_PGFLAGS(page->compound_head & 1, page); - VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags), page); - return &page[n].flags; + VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags.f), page); + return &page[n].flags.f; } static unsigned long *folio_flags(struct folio *folio, unsigned n) @@ -358,8 +358,8 @@ static unsigned long *folio_flags(struct folio *folio, unsigned n) struct page *page = &folio->page; VM_BUG_ON_PGFLAGS(page->compound_head & 1, page); - VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags), page); - return &page[n].flags; + VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags.f), page); + return &page[n].flags.f; } /* @@ -449,37 +449,37 @@ FOLIO_CLEAR_FLAG(name, page) #define TESTPAGEFLAG(uname, lname, policy) \ FOLIO_TEST_FLAG(lname, FOLIO_##policy) \ static __always_inline int Page##uname(const struct page *page) \ -{ return test_bit(PG_##lname, &policy(page, 0)->flags); } +{ return test_bit(PG_##lname, &policy(page, 0)->flags.f); } #define SETPAGEFLAG(uname, lname, policy) \ FOLIO_SET_FLAG(lname, FOLIO_##policy) \ static __always_inline void SetPage##uname(struct page *page) \ -{ set_bit(PG_##lname, &policy(page, 1)->flags); } +{ set_bit(PG_##lname, &policy(page, 1)->flags.f); } #define CLEARPAGEFLAG(uname, lname, policy) \ FOLIO_CLEAR_FLAG(lname, FOLIO_##policy) \ static __always_inline void ClearPage##uname(struct page *page) \ -{ clear_bit(PG_##lname, &policy(page, 1)->flags); } +{ clear_bit(PG_##lname, &policy(page, 1)->flags.f); } #define __SETPAGEFLAG(uname, lname, policy) \ __FOLIO_SET_FLAG(lname, FOLIO_##policy) \ static __always_inline void __SetPage##uname(struct page *page) \ -{ __set_bit(PG_##lname, &policy(page, 1)->flags); } +{ __set_bit(PG_##lname, &policy(page, 1)->flags.f); } #define __CLEARPAGEFLAG(uname, lname, policy) \ __FOLIO_CLEAR_FLAG(lname, FOLIO_##policy) \ static __always_inline void __ClearPage##uname(struct page *page) \ -{ __clear_bit(PG_##lname, &policy(page, 1)->flags); } +{ __clear_bit(PG_##lname, &policy(page, 1)->flags.f); } #define TESTSETFLAG(uname, lname, policy) \ FOLIO_TEST_SET_FLAG(lname, FOLIO_##policy) \ static __always_inline int TestSetPage##uname(struct page *page) \ -{ return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); } +{ return test_and_set_bit(PG_##lname, &policy(page, 1)->flags.f); } #define TESTCLEARFLAG(uname, lname, policy) \ FOLIO_TEST_CLEAR_FLAG(lname, FOLIO_##policy) \ static __always_inline int TestClearPage##uname(struct page *page) \ -{ return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); } +{ return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags.f); } #define PAGEFLAG(uname, lname, policy) \ TESTPAGEFLAG(uname, lname, policy) \ @@ -846,7 +846,7 @@ static __always_inline bool folio_test_head(const struct folio *folio) static __always_inline int PageHead(const struct page *page) { PF_POISONED_CHECK(page); - return test_bit(PG_head, &page->flags) && !page_is_fake_head(page); + return test_bit(PG_head, &page->flags.f) && !page_is_fake_head(page); } __SETPAGEFLAG(Head, head, PF_ANY) @@ -1170,28 +1170,28 @@ static __always_inline int PageAnonExclusive(const struct page *page) */ if (PageHuge(page)) page = compound_head(page); - return test_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags); + return test_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags.f); } static __always_inline void SetPageAnonExclusive(struct page *page) { VM_BUG_ON_PGFLAGS(!PageAnonNotKsm(page), page); VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page); - set_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags); + set_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags.f); } static __always_inline void ClearPageAnonExclusive(struct page *page) { VM_BUG_ON_PGFLAGS(!PageAnonNotKsm(page), page); VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page); - clear_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags); + clear_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags.f); } static __always_inline void __ClearPageAnonExclusive(struct page *page) { VM_BUG_ON_PGFLAGS(!PageAnon(page), page); VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page); - __clear_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags); + __clear_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags.f); } #ifdef CONFIG_MMU @@ -1241,7 +1241,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page) */ static inline int folio_has_private(const struct folio *folio) { - return !!(folio->flags & PAGE_FLAGS_PRIVATE); + return !!(folio->flags.f & PAGE_FLAGS_PRIVATE); } #undef PF_ANY diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h index 8a7f4f802c57..38a82d65e58e 100644 --- a/include/linux/pgalloc_tag.h +++ b/include/linux/pgalloc_tag.h @@ -107,7 +107,8 @@ static inline bool get_page_tag_ref(struct page *page, union codetag_ref *ref, if (static_key_enabled(&mem_profiling_compressed)) { pgalloc_tag_idx idx; - idx = (page->flags >> alloc_tag_ref_offs) & alloc_tag_ref_mask; + idx = (page->flags.f >> alloc_tag_ref_offs) & + alloc_tag_ref_mask; idx_to_ref(idx, ref); handle->page = page; } else { @@ -149,11 +150,11 @@ static inline void update_page_tag_ref(union pgtag_ref_handle handle, union code idx = (unsigned long)ref_to_idx(ref); idx = (idx & alloc_tag_ref_mask) << alloc_tag_ref_offs; do { - old_flags = READ_ONCE(page->flags); + old_flags = READ_ONCE(page->flags.f); flags = old_flags; flags &= ~(alloc_tag_ref_mask << alloc_tag_ref_offs); flags |= idx; - } while (unlikely(!try_cmpxchg(&page->flags, &old_flags, flags))); + } while (unlikely(!try_cmpxchg(&page->flags.f, &old_flags, flags))); } else { if (WARN_ON(!handle.ref || !ref)) return; diff --git a/include/trace/events/page_ref.h b/include/trace/events/page_ref.h index fe33a255b7d0..ea6b5c4baf3d 100644 --- a/include/trace/events/page_ref.h +++ b/include/trace/events/page_ref.h @@ -28,7 +28,7 @@ DECLARE_EVENT_CLASS(page_ref_mod_template, TP_fast_assign( __entry->pfn = page_to_pfn(page); - __entry->flags = page->flags; + __entry->flags = page->flags.f; __entry->count = page_ref_count(page); __entry->mapcount = atomic_read(&page->_mapcount); __entry->mapping = page->mapping; @@ -77,7 +77,7 @@ DECLARE_EVENT_CLASS(page_ref_mod_and_test_template, TP_fast_assign( __entry->pfn = page_to_pfn(page); - __entry->flags = page->flags; + __entry->flags = page->flags.f; __entry->count = page_ref_count(page); __entry->mapcount = atomic_read(&page->_mapcount); __entry->mapping = page->mapping; -- cgit v1.2.3 From 56d578c1300f7efe9605b75714173dd3fda16fe2 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 5 Aug 2025 18:22:52 +0100 Subject: mm: convert page_to_section() to memdesc_section() Pass in the memdesc_flags_t instead of a pointer to the page. This will allow us to remove a few conversions to struct page in upcoming patches. Link: https://lkml.kernel.org/r/20250805172307.1302730-3-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan Cc: Shakeel Butt Signed-off-by: Andrew Morton --- include/asm-generic/memory_model.h | 2 +- include/linux/mm.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h index 74d0077cc5fa..efa6610acbc7 100644 --- a/include/asm-generic/memory_model.h +++ b/include/asm-generic/memory_model.h @@ -53,7 +53,7 @@ static inline int pfn_valid(unsigned long pfn) */ #define __page_to_pfn(pg) \ ({ const struct page *__pg = (pg); \ - int __sec = page_to_section(__pg); \ + int __sec = memdesc_section(__pg->flags); \ (unsigned long)(__pg - __section_mem_map_addr(__nr_to_section(__sec))); \ }) diff --git a/include/linux/mm.h b/include/linux/mm.h index da562f23f50c..82617c4cfa24 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1808,9 +1808,9 @@ static inline void set_page_section(struct page *page, unsigned long section) page->flags.f |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT; } -static inline unsigned long page_to_section(const struct page *page) +static inline unsigned long memdesc_section(memdesc_flags_t mdf) { - return (page->flags.f >> SECTIONS_PGSHIFT) & SECTIONS_MASK; + return (mdf.f >> SECTIONS_PGSHIFT) & SECTIONS_MASK; } #endif -- cgit v1.2.3 From eb00fdd84ddabd6948d26595bb5e8c1302220d37 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 5 Aug 2025 18:22:53 +0100 Subject: mm: introduce memdesc_nid() Remove a conversion from folio to page by passing the folio->flags (which are a copy of the page->flags) to the new memdesc_nid() function. Link: https://lkml.kernel.org/r/20250805172307.1302730-4-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan Cc: Shakeel Butt Signed-off-by: Andrew Morton --- include/linux/mm.h | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 82617c4cfa24..00c8a54127d3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1558,17 +1558,22 @@ static inline int page_zone_id(struct page *page) } #ifdef NODE_NOT_IN_PAGE_FLAGS -int page_to_nid(const struct page *page); +int memdesc_nid(memdesc_flags_t mdf); #else -static inline int page_to_nid(const struct page *page) +static inline int memdesc_nid(memdesc_flags_t mdf) { - return (PF_POISONED_CHECK(page)->flags.f >> NODES_PGSHIFT) & NODES_MASK; + return (mdf.f >> NODES_PGSHIFT) & NODES_MASK; } #endif +static inline int page_to_nid(const struct page *page) +{ + return memdesc_nid(PF_POISONED_CHECK(page)->flags); +} + static inline int folio_nid(const struct folio *folio) { - return page_to_nid(&folio->page); + return memdesc_nid(folio->flags); } #ifdef CONFIG_NUMA_BALANCING @@ -1791,14 +1796,14 @@ static inline pg_data_t *page_pgdat(const struct page *page) return NODE_DATA(page_to_nid(page)); } -static inline struct zone *folio_zone(const struct folio *folio) +static inline pg_data_t *folio_pgdat(const struct folio *folio) { - return page_zone(&folio->page); + return NODE_DATA(folio_nid(folio)); } -static inline pg_data_t *folio_pgdat(const struct folio *folio) +static inline struct zone *folio_zone(const struct folio *folio) { - return page_pgdat(&folio->page); + return &folio_pgdat(folio)->node_zones[folio_zonenum(folio)]; } #ifdef SECTION_IN_PAGE_FLAGS -- cgit v1.2.3 From 4aff03fbe508780394039053bebfc4f4800b286e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 5 Aug 2025 18:22:54 +0100 Subject: mm: introduce memdesc_zonenum() Remove a conversion from folio to page by passing the folio->flags (which are a copy of the page->flags) to the new memdesc_zonenum() function. Link: https://lkml.kernel.org/r/20250805172307.1302730-5-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan Cc: Shakeel Butt Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 990560cd99ee..80a3b6642603 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1183,15 +1183,20 @@ static inline bool zone_is_empty(struct zone *zone) #define KASAN_TAG_MASK ((1UL << KASAN_TAG_WIDTH) - 1) #define ZONEID_MASK ((1UL << ZONEID_SHIFT) - 1) +static inline enum zone_type memdesc_zonenum(memdesc_flags_t flags) +{ + ASSERT_EXCLUSIVE_BITS(flags.f, ZONES_MASK << ZONES_PGSHIFT); + return (flags.f >> ZONES_PGSHIFT) & ZONES_MASK; +} + static inline enum zone_type page_zonenum(const struct page *page) { - ASSERT_EXCLUSIVE_BITS(page->flags, ZONES_MASK << ZONES_PGSHIFT); - return (page->flags.f >> ZONES_PGSHIFT) & ZONES_MASK; + return memdesc_zonenum(page->flags); } static inline enum zone_type folio_zonenum(const struct folio *folio) { - return page_zonenum(&folio->page); + return memdesc_zonenum(folio->flags); } #ifdef CONFIG_ZONE_DEVICE -- cgit v1.2.3 From 89ef6ad6fa849b780b5a5caae9068261603e1738 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 5 Aug 2025 18:22:57 +0100 Subject: mm: introduce memdesc_is_zone_device() Remove the conversion from folio to page in folio_is_zone_device() by introducing memdesc_is_zone_device() which takes a memdesc_flags_t from either a page or a folio. Link: https://lkml.kernel.org/r/20250805172307.1302730-8-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan Cc: Shakeel Butt Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 80a3b6642603..fe13ad175fed 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1200,14 +1200,14 @@ static inline enum zone_type folio_zonenum(const struct folio *folio) } #ifdef CONFIG_ZONE_DEVICE -static inline bool is_zone_device_page(const struct page *page) +static inline bool memdesc_is_zone_device(memdesc_flags_t mdf) { - return page_zonenum(page) == ZONE_DEVICE; + return memdesc_zonenum(mdf) == ZONE_DEVICE; } static inline struct dev_pagemap *page_pgmap(const struct page *page) { - VM_WARN_ON_ONCE_PAGE(!is_zone_device_page(page), page); + VM_WARN_ON_ONCE_PAGE(!memdesc_is_zone_device(page->flags), page); return page_folio(page)->pgmap; } @@ -1222,9 +1222,9 @@ static inline struct dev_pagemap *page_pgmap(const struct page *page) static inline bool zone_device_pages_have_same_pgmap(const struct page *a, const struct page *b) { - if (is_zone_device_page(a) != is_zone_device_page(b)) + if (memdesc_is_zone_device(a->flags) != memdesc_is_zone_device(b->flags)) return false; - if (!is_zone_device_page(a)) + if (!memdesc_is_zone_device(a->flags)) return true; return page_pgmap(a) == page_pgmap(b); } @@ -1232,7 +1232,7 @@ static inline bool zone_device_pages_have_same_pgmap(const struct page *a, extern void memmap_init_zone_device(struct zone *, unsigned long, unsigned long, struct dev_pagemap *); #else -static inline bool is_zone_device_page(const struct page *page) +static inline bool memdesc_is_zone_device(memdesc_flags_t mdf) { return false; } @@ -1247,9 +1247,14 @@ static inline struct dev_pagemap *page_pgmap(const struct page *page) } #endif +static inline bool is_zone_device_page(const struct page *page) +{ + return memdesc_is_zone_device(page->flags); +} + static inline bool folio_is_zone_device(const struct folio *folio) { - return is_zone_device_page(&folio->page); + return memdesc_is_zone_device(folio->flags); } static inline bool is_zone_movable_page(const struct page *page) -- cgit v1.2.3 From 7cfe9cafb6adebc13a246bebafcd69cd37add4e6 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 5 Aug 2025 18:22:58 +0100 Subject: mm: reimplement folio_is_device_private() For callers of folio_is_device_private(), we save a folio->page->folio conversion. Callers of is_device_private_page() simply move the conversion of page->folio from the implementation of page_pgmap() to is_device_private_page(). Link: https://lkml.kernel.org/r/20250805172307.1302730-9-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan Cc: Shakeel Butt Signed-off-by: Andrew Morton --- include/linux/memremap.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 4aa151914eab..5d18cb7a70e5 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -157,16 +157,17 @@ static inline unsigned long pgmap_vmemmap_nr(struct dev_pagemap *pgmap) return 1 << pgmap->vmemmap_shift; } -static inline bool is_device_private_page(const struct page *page) +static inline bool folio_is_device_private(const struct folio *folio) { return IS_ENABLED(CONFIG_DEVICE_PRIVATE) && - is_zone_device_page(page) && - page_pgmap(page)->type == MEMORY_DEVICE_PRIVATE; + folio_is_zone_device(folio) && + folio->pgmap->type == MEMORY_DEVICE_PRIVATE; } -static inline bool folio_is_device_private(const struct folio *folio) +static inline bool is_device_private_page(const struct page *page) { - return is_device_private_page(&folio->page); + return IS_ENABLED(CONFIG_DEVICE_PRIVATE) && + folio_is_device_private(page_folio(page)); } static inline bool is_pci_p2pdma_page(const struct page *page) -- cgit v1.2.3 From bd0dbbb3fd902c7eea7eb166d91bda4530a8de96 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 5 Aug 2025 18:22:59 +0100 Subject: mm: reimplement folio_is_device_coherent() For callers of folio_is_device_coherent(), we save a folio->page->folio conversion. Callers of is_device_coherent_page() simply move the conversion of page->folio from the implementation of page_pgmap() to is_device_coherent_page(). Link: https://lkml.kernel.org/r/20250805172307.1302730-10-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan Cc: Shakeel Butt Signed-off-by: Andrew Morton --- include/linux/memremap.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 5d18cb7a70e5..06d29794abe6 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -177,15 +177,15 @@ static inline bool is_pci_p2pdma_page(const struct page *page) page_pgmap(page)->type == MEMORY_DEVICE_PCI_P2PDMA; } -static inline bool is_device_coherent_page(const struct page *page) +static inline bool folio_is_device_coherent(const struct folio *folio) { - return is_zone_device_page(page) && - page_pgmap(page)->type == MEMORY_DEVICE_COHERENT; + return folio_is_zone_device(folio) && + folio->pgmap->type == MEMORY_DEVICE_COHERENT; } -static inline bool folio_is_device_coherent(const struct folio *folio) +static inline bool is_device_coherent_page(const struct page *page) { - return is_device_coherent_page(&folio->page); + return folio_is_device_coherent(page_folio(page)); } static inline bool is_fsdax_page(const struct page *page) -- cgit v1.2.3 From c995ac3aa3747ec2a0373e5f319a22e0cb31d613 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 5 Aug 2025 18:23:00 +0100 Subject: mm: reimplement folio_is_fsdax() For callers of folio_is_fsdax(), we save a folio->page->folio conversion. Callers of is_fsdax_page() simply move the conversion of page->folio from the implementation of page_pgmap() to is_fsdax_page(). Link: https://lkml.kernel.org/r/20250805172307.1302730-11-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan Cc: Shakeel Butt Signed-off-by: Andrew Morton --- include/linux/memremap.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 06d29794abe6..450d4bb6835c 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -188,15 +188,15 @@ static inline bool is_device_coherent_page(const struct page *page) return folio_is_device_coherent(page_folio(page)); } -static inline bool is_fsdax_page(const struct page *page) +static inline bool folio_is_fsdax(const struct folio *folio) { - return is_zone_device_page(page) && - page_pgmap(page)->type == MEMORY_DEVICE_FS_DAX; + return folio_is_zone_device(folio) && + folio->pgmap->type == MEMORY_DEVICE_FS_DAX; } -static inline bool folio_is_fsdax(const struct folio *folio) +static inline bool is_fsdax_page(const struct page *page) { - return is_fsdax_page(&folio->page); + return folio_is_fsdax(page_folio(page)); } #ifdef CONFIG_ZONE_DEVICE -- cgit v1.2.3 From 88df6ab2f34b60837ebdab64b2514f356d5ebb65 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 5 Aug 2025 18:23:01 +0100 Subject: mm: add folio_is_pci_p2pdma() Reimplement is_pci_p2pdma_page() in terms of folio_is_pci_p2pdma(). Moves the page_folio() call from inside page_pgmap() to is_pci_p2pdma_page(). This removes a page_folio() call from try_grab_folio() which already has a folio and can pass it in. Link: https://lkml.kernel.org/r/20250805172307.1302730-12-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Shakeel Butt Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/memremap.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 450d4bb6835c..aa1b6aa877a0 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -170,11 +170,17 @@ static inline bool is_device_private_page(const struct page *page) folio_is_device_private(page_folio(page)); } +static inline bool folio_is_pci_p2pdma(const struct folio *folio) +{ + return IS_ENABLED(CONFIG_PCI_P2PDMA) && + folio_is_zone_device(folio) && + folio->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA; +} + static inline bool is_pci_p2pdma_page(const struct page *page) { return IS_ENABLED(CONFIG_PCI_P2PDMA) && - is_zone_device_page(page) && - page_pgmap(page)->type == MEMORY_DEVICE_PCI_P2PDMA; + folio_is_pci_p2pdma(page_folio(page)); } static inline bool folio_is_device_coherent(const struct folio *folio) -- cgit v1.2.3 From 7bebb41b96b5a898134b757fda520b7b990a91fa Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 18 Aug 2025 08:10:10 +0200 Subject: mm: remove write_cache_pages No users left. Link: https://lkml.kernel.org/r/20250818061017.1526853-4-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: David Hildenbrand Cc: Kent Overstreet Cc: Konstantin Komarov Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/writeback.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index a2848d731a46..2a7e134d03ee 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -360,12 +360,6 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb); struct folio *writeback_iter(struct address_space *mapping, struct writeback_control *wbc, struct folio *folio, int *error); -typedef int (*writepage_t)(struct folio *folio, struct writeback_control *wbc, - void *data); - -int write_cache_pages(struct address_space *mapping, - struct writeback_control *wbc, writepage_t writepage, - void *data); int do_writepages(struct address_space *mapping, struct writeback_control *wbc); void writeback_set_ratelimit(void); void tag_pages_for_writeback(struct address_space *mapping, -- cgit v1.2.3 From 0cd01c4a5cc140efb9fc203dd05ffccf3c2197d0 Mon Sep 17 00:00:00 2001 From: gaoxiang17 Date: Thu, 21 Aug 2025 06:38:55 +0800 Subject: mm/cma: add 'available count' and 'total count' to trace_cma_alloc_start This makes cma info more intuitive during debugging. Show up in the trace as: 279.814717: cma_alloc_start: name=reserved request_count=4 available_count=8096 total_count=8192 align=0 309.790580: cma_alloc_start: name=reserved request_count=4 available_count=8092 total_count=8192 align=0 317.046609: cma_alloc_start: name=reserved request_count=4 available_count=8088 total_count=8192 align=0 Link: https://lkml.kernel.org/r/8a79284879c529f467478552825154b018076e95.1755729178.git.gaoxiang17@xiaomi.com Signed-off-by: gaoxiang17 Cc: David Hildenbrand Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Michal Hocko Cc: Mike Rapoport Cc: Suren Baghdasaryan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/trace/events/cma.h | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/trace/events/cma.h b/include/trace/events/cma.h index 383c09f583ac..37195edf2498 100644 --- a/include/trace/events/cma.h +++ b/include/trace/events/cma.h @@ -38,25 +38,32 @@ TRACE_EVENT(cma_release, TRACE_EVENT(cma_alloc_start, - TP_PROTO(const char *name, unsigned long count, unsigned int align), + TP_PROTO(const char *name, unsigned long request_count, unsigned long available_count, + unsigned long total_count, unsigned int align), - TP_ARGS(name, count, align), + TP_ARGS(name, request_count, available_count, total_count, align), TP_STRUCT__entry( __string(name, name) - __field(unsigned long, count) + __field(unsigned long, request_count) + __field(unsigned long, available_count) + __field(unsigned long, total_count) __field(unsigned int, align) ), TP_fast_assign( __assign_str(name); - __entry->count = count; + __entry->request_count = request_count; + __entry->available_count = available_count; + __entry->total_count = total_count; __entry->align = align; ), - TP_printk("name=%s count=%lu align=%u", + TP_printk("name=%s request_count=%lu available_count=%lu total_count=%lu align=%u", __get_str(name), - __entry->count, + __entry->request_count, + __entry->available_count, + __entry->total_count, __entry->align) ); -- cgit v1.2.3 From dfd04add595b97758c8ad1ee970554b7af5c57dd Mon Sep 17 00:00:00 2001 From: Wander Lairson Costa Date: Mon, 25 Aug 2025 09:59:26 -0300 Subject: kmem/tracing: add kmem name to kmem_cache_alloc tracepoint The kmem_cache_free tracepoint includes a "name" field, which allows for easy identification and filtering of specific kmem's. However, the kmem_cache_alloc tracepoint lacks this field, making it difficult to pair corresponding alloc and free events for analysis. Add the "name" field to kmem_cache_alloc to enable consistent tracking and correlation of kmem alloc and free events. Link: https://lkml.kernel.org/r/20250825125927.59816-1-wander@redhat.com Signed-off-by: Wander Lairson Costa Cc: David Hildenbrand Cc: David Rientjes Cc: Martin Liu Cc: "Masami Hiramatsu (Google)" Cc: Mathieu Desnoyers Cc: Steven Rostedt Cc: Zi Yan Signed-off-by: Andrew Morton --- include/trace/events/kmem.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index 474358773abe..7f93e754da5c 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -22,6 +22,7 @@ TRACE_EVENT(kmem_cache_alloc, TP_STRUCT__entry( __field( unsigned long, call_site ) __field( const void *, ptr ) + __string( name, s->name ) __field( size_t, bytes_req ) __field( size_t, bytes_alloc ) __field( unsigned long, gfp_flags ) @@ -32,6 +33,7 @@ TRACE_EVENT(kmem_cache_alloc, TP_fast_assign( __entry->call_site = call_site; __entry->ptr = ptr; + __assign_str(name); __entry->bytes_req = s->object_size; __entry->bytes_alloc = s->size; __entry->gfp_flags = (__force unsigned long)gfp_flags; @@ -41,9 +43,10 @@ TRACE_EVENT(kmem_cache_alloc, (s->flags & SLAB_ACCOUNT)) : false; ), - TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d accounted=%s", + TP_printk("call_site=%pS ptr=%p name=%s bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d accounted=%s", (void *)__entry->call_site, __entry->ptr, + __get_str(name), __entry->bytes_req, __entry->bytes_alloc, show_gfp_flags(__entry->gfp_flags), -- cgit v1.2.3 From ef49b7b39d50b9e4f9d63e64f5d8acafe3c71158 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Tue, 26 Aug 2025 15:13:44 +0000 Subject: maple_tree: fix MAPLE_PARENT_RANGE32 and parent pointer docs MAPLE_PARENT_RANGE32 should be 0x02 as a 32 bit node is indicated by the bit pattern 0b010 which is the hex value 0x02. There are no users currently, so there is no associated bug with this wrong value. Fix typo Note -> Node and replace x with b to indicate binary values. Link: https://lkml.kernel.org/r/20250826151344.403286-1-sidhartha.kumar@oracle.com Fixes: 54a611b60590 ("Maple Tree: add new data structure") Signed-off-by: Sidhartha Kumar Reviewed-by: Liam R. Howlett Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/maple_tree.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index bafe143b1f78..41e633264e51 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -57,17 +57,17 @@ * MT_FLAGS_ALLOC_RANGE flag. * * Node types: - * 0x??1 = Root - * 0x?00 = 16 bit nodes - * 0x010 = 32 bit nodes - * 0x110 = 64 bit nodes + * 0b??1 = Root + * 0b?00 = 16 bit nodes + * 0b010 = 32 bit nodes + * 0b110 = 64 bit nodes * * Slot size and location in the parent pointer: * type : slot location - * 0x??1 : Root - * 0x?00 : 16 bit values, type in 0-1, slot in 2-6 - * 0x010 : 32 bit values, type in 0-2, slot in 3-6 - * 0x110 : 64 bit values, type in 0-2, slot in 3-6 + * 0b??1 : Root + * 0b?00 : 16 bit values, type in 0-1, slot in 2-6 + * 0b010 : 32 bit values, type in 0-2, slot in 3-6 + * 0b110 : 64 bit values, type in 0-2, slot in 3-6 */ /* -- cgit v1.2.3 From cf1dec76ba8a00b20e51d205f3c9f5c45bc96df2 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Thu, 21 Aug 2025 14:55:35 -0700 Subject: mm/filemap: add AS_KERNEL_FILE Patch series "introduce kernel file mapped folios", v4. Btrfs currently tracks its metadata pages in the page cache, using a fake inode (fs_info->btree_inode) with offsets corresponding to where the metadata is stored in the filesystem's full logical address space. A consequence of this is that when btrfs uses filemap_add_folio(), this usage is charged to the cgroup of whichever task happens to be running at the time. These folios don't belong to any particular user cgroup, so I don't think it makes much sense for them to be charged in that way. Some negative consequences as a result: - A task can be holding some important btrfs locks, then need to lookup some metadata and go into reclaim, extending the duration it holds that lock for, and unfairly pushing its own reclaim pain onto other cgroups. - If that cgroup goes into reclaim, it might reclaim these folios a different non-reclaiming cgroup might need soon. This is naturally offset by LRU reclaim, but still. We have two options for how to manage such file pages: 1. charge them to the root cgroup. 2. don't charge them to any cgroup at all. 2. breaks the invariant that every mapped page has a cgroup. This is workable, but unnecessarily risky. Therefore, go with 1. A very similar proposal to use the root cgroup was previously made by Qu, where he eventually proposed the idea of setting it per address_space. This makes good sense for the btrfs use case, as the behavior should apply to all use of the address_space, not select allocations. I.e., if someone adds another filemap_add_folio() call using btrfs's btree_inode, we would almost certainly want to account that to the root cgroup as well. This patch (of 3): Add the flag AS_KERNEL_FILE to the address_space to indicate that this mapping's memory is exempt from the usual memcg accounting. [boris@bur.io: fix CONFIG_MEMCG build for AS_KERNEL_FILE] Link: https://lkml.kernel.org/r/6de59ddeec81b5c294d337c001ba0061631d4ec6.1755816635.git.boris@bur.io Link: https://lore.kernel.org/linux-mm/b5fef5372ae454a7b6da4f2f75c427aeab6a07d6.1727498749.git.wqu@suse.com/ Link: https://lkml.kernel.org/r/f09c4e2c90351d4cb30a1969f7a863b9238bd291.1755812945.git.boris@bur.io Signed-off-by: Boris Burkov Suggested-by: Qu Wenruo Suggested-by: Shakeel Butt Acked-by: Shakeel Butt Cc: Johannes Weiner Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Muchun Song Cc: Roman Gushchin Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 2 ++ include/linux/pagemap.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 9fa3afc90dd5..e693978b2022 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1059,6 +1059,8 @@ extern int mem_cgroup_init(void); #define MEM_CGROUP_ID_SHIFT 0 +#define root_mem_cgroup (NULL) + static inline struct mem_cgroup *folio_memcg(struct folio *folio) { return NULL; diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 12a12dae727d..f0dfdfb13cd9 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -211,6 +211,8 @@ enum mapping_flags { folio contents */ AS_INACCESSIBLE = 8, /* Do not attempt direct R/W access to the mapping */ AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM = 9, + AS_KERNEL_FILE = 10, /* mapping for a fake kernel file that shouldn't + account usage to user cgroups */ /* Bits 16-25 are used for FOLIO_ORDER */ AS_FOLIO_ORDER_BITS = 5, AS_FOLIO_ORDER_MIN = 16, -- cgit v1.2.3 From e3a9ac4e866ea746475b4026819a8c08ec1142e6 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Thu, 21 Aug 2025 14:55:36 -0700 Subject: mm: add vmstat for kernel_file pages Kernel file pages are tricky to track because they are indistinguishable from files whose usage is accounted to the root cgroup. To maintain good accounting, introduce a vmstat counter tracking kernel file pages. Confirmed that these work as expected at a high level by mounting a btrfs using AS_KERNEL_FILE for metadata pages, and seeing the counter rise with fs usage then go back to a minimal level after drop_caches and finally down to 0 after unmounting the fs. Link: https://lkml.kernel.org/r/08ff633e3a005ed5f7691bfd9f58a5df8e474339.1755812945.git.boris@bur.io Signed-off-by: Boris Burkov Suggested-by: Shakeel Butt Acked-by: Shakeel Butt Tested-by: syzbot@syzkaller.appspotmail.com Cc: Johannes Weiner Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Muchun Song Cc: Qu Wenruo Cc: Roman Gushchin Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index fe13ad175fed..f3272ef5131b 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -259,6 +259,7 @@ enum node_stat_item { NR_HUGETLB, #endif NR_BALLOON_PAGES, + NR_KERNEL_FILE_PAGES, NR_VM_NODE_STAT_ITEMS }; -- cgit v1.2.3 From 98c94f1035fc0c82ab008854a165df2c20c0cb6a Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Wed, 27 Aug 2025 07:01:05 +0000 Subject: mm/pageblock-flags: remove PB_migratetype_bits/PB_migrate_end enum pageblock_bits defines the meaning of pageblock bits. Currently PB_migratetype_bits says the lowest 3 bits represents migratetype and PB_migrate_end/MIGRATETYPE_MASK's definition rely on it with magical computation. Remove the definition of PB_migratetype_bits/PB_migrate_end. Use PB_migrate_[0|1|2] to represent lowest bits for migratetype. Then we can simplify related definition. Also, MIGRATETYPE_AND_ISO_MASK is MIGRATETYPE_MASK add isolation bit. Use MIGRATETYPE_MASK in the definition of MIGRATETYPE_AND_ISO_MASK looks cleaner. No functional change intended. Link: https://lkml.kernel.org/r/20250827070105.16864-3-richard.weiyang@gmail.com Signed-off-by: Wei Yang Suggested-by: David Hildenbrand Acked-by: David Hildenbrand Reviewed-by: Zi Yan Cc: Vlastimil Babka Cc: Lorenzo Stoakes Signed-off-by: Andrew Morton --- include/linux/pageblock-flags.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index 6a44be0f39f4..e046278a01fa 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -13,12 +13,11 @@ #include -#define PB_migratetype_bits 3 /* Bit indices that affect a whole block of pages */ enum pageblock_bits { - PB_migrate, - PB_migrate_end = PB_migrate + PB_migratetype_bits - 1, - /* 3 bits required for migrate types */ + PB_migrate_0, + PB_migrate_1, + PB_migrate_2, PB_compact_skip,/* If set the block is skipped by compaction */ #ifdef CONFIG_MEMORY_ISOLATION @@ -37,11 +36,10 @@ enum pageblock_bits { #define NR_PAGEBLOCK_BITS (roundup_pow_of_two(__NR_PAGEBLOCK_BITS)) -#define MIGRATETYPE_MASK ((1UL << (PB_migrate_end + 1)) - 1) +#define MIGRATETYPE_MASK (BIT(PB_migrate_0)|BIT(PB_migrate_1)|BIT(PB_migrate_2)) #ifdef CONFIG_MEMORY_ISOLATION -#define MIGRATETYPE_AND_ISO_MASK \ - (((1UL << (PB_migrate_end + 1)) - 1) | BIT(PB_migrate_isolate)) +#define MIGRATETYPE_AND_ISO_MASK (MIGRATETYPE_MASK | BIT(PB_migrate_isolate)) #else #define MIGRATETYPE_AND_ISO_MASK MIGRATETYPE_MASK #endif -- cgit v1.2.3 From 09a616cbb371e6b843e536f00e38d6b43d796ac4 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 28 Aug 2025 10:12:32 -0700 Subject: mm/damon/core: add damon_ctx->addr_unit Patch series "mm/damon: support ARM32 with LPAE", v3. Previously, DAMON's physical address space monitoring only supported memory ranges below 4GB on LPAE-enabled systems. This was due to the use of 'unsigned long' in 'struct damon_addr_range', which is 32-bit on ARM32 even with LPAE enabled[1]. To add DAMON support for ARM32 with LPAE enabled, a new core layer parameter called 'addr_unit' was introduced[2]. Operations set layer can translate a core layer address to the real address by multiplying the parameter value to the core layer address. Support of the parameter is up to each operations layer implementation, though. For example, operations set implementations for virtual address space can simply ignore the parameter. Add the support on paddr, which is the DAMON operations set implementation for the physical address space, as we have a clear use case for that. This patch (of 11): In some cases, some of the real address that handled by the underlying operations set cannot be handled by DAMON since it uses only 'unsinged long' as the address type. Using DAMON for physical address space monitoring of 32 bit ARM devices with large physical address extension (LPAE) is one example[1]. Add a parameter name 'addr_unit' to core layer to help such cases. DAMON core API callers can set it as the scale factor that will be used by the operations set for translating the core layer's addresses to the real address by multiplying the parameter value to the core layer address. Support of the parameter is up to each operations set layer. The support from the physical address space operations set (paddr) will be added with following commits. Link: https://lkml.kernel.org/r/20250828171242.59810-1-sj@kernel.org Link: https://lkml.kernel.org/r/20250828171242.59810-2-sj@kernel.org Link: https://lore.kernel.org/20250408075553.959388-1-zuoze1@huawei.com [1] Link: https://lore.kernel.org/all/20250416042551.158131-1-sj@kernel.org/ [2] Signed-off-by: SeongJae Park Signed-off-by: Quanmin Yan Reviewed-by: SeongJae Park Cc: David Hildenbrand Cc: Jonathan Corbet Cc: Kefeng Wang Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Michal Hocko Cc: Mike Rapoport Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: ze zuo Signed-off-by: Andrew Morton --- include/linux/damon.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/damon.h b/include/linux/damon.h index d01bfee80bd6..6fa52f7495d9 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -746,7 +746,7 @@ struct damon_attrs { * Accesses to other fields must be protected by themselves. * * @ops: Set of monitoring operations for given use cases. - * + * @addr_unit: Scale factor for core to ops address conversion. * @adaptive_targets: Head of monitoring targets (&damon_target) list. * @schemes: Head of schemes (&damos) list. */ @@ -788,6 +788,7 @@ struct damon_ctx { struct mutex kdamond_lock; struct damon_operations ops; + unsigned long addr_unit; struct list_head adaptive_targets; struct list_head schemes; -- cgit v1.2.3 From d8f867fa0825fb3e358457566d7326d8aab2406a Mon Sep 17 00:00:00 2001 From: Quanmin Yan Date: Thu, 28 Aug 2025 10:12:42 -0700 Subject: mm/damon: add damon_ctx->min_sz_region Adopting addr_unit would make DAMON_MINREGION 'addr_unit * 4096' bytes and cause data alignment issues[1]. Add damon_ctx->min_sz_region to change DAMON_MIN_REGION from a global macro value to per-context variable. Link: https://lkml.kernel.org/r/20250828171242.59810-12-sj@kernel.org Link: https://lore.kernel.org/all/527714dd-0e33-43ab-bbbd-d89670ba79e7@huawei.com [1] Signed-off-by: Quanmin Yan Signed-off-by: SeongJae Park Reviewed-by: SeongJae Park Cc: David Hildenbrand Cc: Jonathan Corbet Cc: Kefeng Wang Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Michal Hocko Cc: Mike Rapoport Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: ze zuo Signed-off-by: Andrew Morton --- include/linux/damon.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/damon.h b/include/linux/damon.h index 6fa52f7495d9..ec8716292c09 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -747,6 +747,7 @@ struct damon_attrs { * * @ops: Set of monitoring operations for given use cases. * @addr_unit: Scale factor for core to ops address conversion. + * @min_sz_region: Minimum region size. * @adaptive_targets: Head of monitoring targets (&damon_target) list. * @schemes: Head of schemes (&damos) list. */ @@ -789,6 +790,7 @@ struct damon_ctx { struct damon_operations ops; unsigned long addr_unit; + unsigned long min_sz_region; struct list_head adaptive_targets; struct list_head schemes; @@ -877,7 +879,7 @@ static inline void damon_insert_region(struct damon_region *r, void damon_add_region(struct damon_region *r, struct damon_target *t); void damon_destroy_region(struct damon_region *r, struct damon_target *t); int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges, - unsigned int nr_ranges); + unsigned int nr_ranges, unsigned long min_sz_region); void damon_update_region_access_rate(struct damon_region *r, bool accessed, struct damon_attrs *attrs); -- cgit v1.2.3 From 1e332f303ae93ba4d38b480b1bb5a08f833306f6 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Thu, 28 Aug 2025 15:03:11 +0200 Subject: pagevec.h: add `const` to pointer parameters of getter functions For improved const-correctness. Link: https://lkml.kernel.org/r/20250828130311.772993-1-max.kellermann@ionos.com Signed-off-by: Max Kellermann Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: Lorenzo Stoakes Acked-by: SeongJae Park Reviewed-by: Vishal Moola (Oracle) Cc: David Hildenbrand Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/pagevec.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 5d3a0cccc6bf..63be5a451627 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -51,12 +51,12 @@ static inline void folio_batch_reinit(struct folio_batch *fbatch) fbatch->i = 0; } -static inline unsigned int folio_batch_count(struct folio_batch *fbatch) +static inline unsigned int folio_batch_count(const struct folio_batch *fbatch) { return fbatch->nr; } -static inline unsigned int folio_batch_space(struct folio_batch *fbatch) +static inline unsigned int folio_batch_space(const struct folio_batch *fbatch) { return PAGEVEC_SIZE - fbatch->nr; } -- cgit v1.2.3 From 39b44c8c73312ac535ffdf7c8ecd37ea07d4ef86 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Thu, 28 Aug 2025 10:48:20 +0200 Subject: huge_mm.h: disallow is_huge_zero_folio(NULL) Calling is_huge_zero_folio(NULL) should not be legal - it makes no sense, and a different (theoretical) implementation may dereference the pointer. But currently, lacking any explicit documentation, this call is possible. But if somebody really passes NULL, the function should not return true - this isn't the huge zero folio after all! However, if the `huge_zero_folio` hasn't been allocated yet, it's NULL, and is_huge_zero_folio(NULL) just happens to return true, which is a lie. This weird side effect prevented me from reproducing a kernel crash that occurred when the elements of a folio_batch were NULL - since folios_put_refs() skips huge zero folios, this sometimes causes a crash, but sometimes does not. For debugging, it is better to reveal such bugs reliably and not hide them behind random preconditions like "has the huge zero folio already been created?" To improve detection of such bugs, David Hildenbrand suggested adding a VM_WARN_ON_ONCE(). Link: https://lkml.kernel.org/r/20250828084820.570118-1-max.kellermann@ionos.com Signed-off-by: Max Kellermann Reviewed-by: Lorenzo Stoakes Reviewed-by: Zi Yan Cc: Baolin Wang Cc: Baoquan He Cc: Barry Song Cc: Chris Li Cc: David Hildenbrand Cc: Dev Jain Cc: Kairui Song Cc: Kemeng Shi Cc: Liam Howlett Cc: Mariano Pache Cc: Nhat Pham Cc: Ryan Roberts Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 1ac0d06fb3c1..29ef70022da1 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -501,6 +501,8 @@ extern unsigned long huge_zero_pfn; static inline bool is_huge_zero_folio(const struct folio *folio) { + VM_WARN_ON_ONCE(!folio); + return READ_ONCE(huge_zero_folio) == folio; } -- cgit v1.2.3 From f367474b5884edbc42661e7fecf784cb131dd25d Mon Sep 17 00:00:00 2001 From: Brian Mak Date: Tue, 5 Aug 2025 14:15:27 -0700 Subject: x86/kexec: carry forward the boot DTB on kexec Currently, the kexec_file_load syscall on x86 does not support passing a device tree blob to the new kernel. Some embedded x86 systems use device trees. On these systems, failing to pass a device tree to the new kernel causes a boot failure. To add support for this, we copy the behavior of ARM64 and PowerPC and copy the current boot's device tree blob for use in the new kernel. We do this on x86 by passing the device tree blob as a setup_data entry in accordance with the x86 boot protocol. This behavior is gated behind the KEXEC_FILE_FORCE_DTB flag. Link: https://lkml.kernel.org/r/20250805211527.122367-3-makb@juniper.net Signed-off-by: Brian Mak Cc: Alexander Graf Cc: Baoquan He Cc: Borislav Betkov Cc: Dave Young Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Rob Herring Cc: Saravana Kannan Cc: Thomas Gleinxer Signed-off-by: Andrew Morton --- include/linux/kexec.h | 5 ++++- include/uapi/linux/kexec.h | 4 ++++ 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 39fe3e6cd282..ff7e231b0485 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -395,6 +395,9 @@ struct kimage { /* Information for loading purgatory */ struct purgatory_info purgatory_info; + + /* Force carrying over the DTB from the current boot */ + bool force_dtb; #endif #ifdef CONFIG_CRASH_HOTPLUG @@ -461,7 +464,7 @@ bool kexec_load_permitted(int kexec_image_type); /* List of defined/legal kexec file flags */ #define KEXEC_FILE_FLAGS (KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \ KEXEC_FILE_NO_INITRAMFS | KEXEC_FILE_DEBUG | \ - KEXEC_FILE_NO_CMA) + KEXEC_FILE_NO_CMA | KEXEC_FILE_FORCE_DTB) /* flag to track if kexec reboot is in progress */ extern bool kexec_in_progress; diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h index 8958ebfcff94..55749cb0b81d 100644 --- a/include/uapi/linux/kexec.h +++ b/include/uapi/linux/kexec.h @@ -22,12 +22,16 @@ * KEXEC_FILE_ON_CRASH : Load/unload operation belongs to kdump image. * KEXEC_FILE_NO_INITRAMFS : No initramfs is being loaded. Ignore the initrd * fd field. + * KEXEC_FILE_FORCE_DTB : Force carrying over the current boot's DTB to the new + * kernel on x86. This is already the default behavior on + * some other architectures, like ARM64 and PowerPC. */ #define KEXEC_FILE_UNLOAD 0x00000001 #define KEXEC_FILE_ON_CRASH 0x00000002 #define KEXEC_FILE_NO_INITRAMFS 0x00000004 #define KEXEC_FILE_DEBUG 0x00000008 #define KEXEC_FILE_NO_CMA 0x00000010 +#define KEXEC_FILE_FORCE_DTB 0x00000020 /* These values match the ELF architecture values. * Unless there is a good reason that should continue to be the case. -- cgit v1.2.3 From c8a09fc9664f79eeb66cdf4a2a34d5b6a239b727 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 14 Jul 2025 10:17:09 +0200 Subject: ida: remove the ida_simple_xxx() API All users of the ida_simple_xxx() have been converted. In Linux 6.11-rc2, the only callers are in tools/testing/. So it is now time to remove the definition of this old and deprecated ida_simple_get() and ida_simple_remove(). Link: https://lkml.kernel.org/r/aa205f45fef70a9c948b6a98bad06da58e4de776.1752480043.git.christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/idr.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/idr.h b/include/linux/idr.h index 2267902d29a7..789e23e67444 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -334,14 +334,6 @@ static inline void ida_init(struct ida *ida) xa_init_flags(&ida->xa, IDA_INIT_FLAGS); } -/* - * ida_simple_get() and ida_simple_remove() are deprecated. Use - * ida_alloc() and ida_free() instead respectively. - */ -#define ida_simple_get(ida, start, end, gfp) \ - ida_alloc_range(ida, start, (end) - 1, gfp) -#define ida_simple_remove(ida, id) ida_free(ida, id) - static inline bool ida_is_empty(const struct ida *ida) { return xa_empty(&ida->xa); -- cgit v1.2.3 From baa96bcb180e979a821ce3ade87a3d2349b2d640 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 14 Jul 2025 10:17:10 +0200 Subject: nvmem: update a comment related to struct nvmem_config Update a comment to match the function used in nvmem_register(). ida_simple_get() was replaced by ida_alloc() in commit 1eb51d6a4fce ("nvmem: switch to simpler IDA interface") Link: https://lkml.kernel.org/r/27a9dec93a9f79140b11a77df38b1b45bd342e09.1752480043.git.christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/nvmem-provider.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h index 615a560d9edb..f3b13da78aac 100644 --- a/include/linux/nvmem-provider.h +++ b/include/linux/nvmem-provider.h @@ -103,7 +103,7 @@ struct nvmem_cell_info { * * Note: A default "nvmem" name will be assigned to the device if * no name is specified in its configuration. In such case "" is - * generated with ida_simple_get() and provided id field is ignored. + * generated with ida_alloc() and provided id field is ignored. * * Note: Specifying name and setting id to -1 implies a unique device * whose name is provided as-is (kept unaltered). -- cgit v1.2.3 From 2a8c51bc9391ff3c701f06ae1b678419f843dc1a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 19 Aug 2025 00:55:07 -0700 Subject: list.h: add missing kernel-doc for basic macros kernel-doc for the basic LIST_HEAD() and LIST_HEAD_INIT() macros has been missing forever (i.e., since git). Add them for completeness. Link: https://lkml.kernel.org/r/20250819075507.113639-1-rdunlap@infradead.org Signed-off-by: Randy Dunlap Cc: Nicolas Frattaroli Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- include/linux/list.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/list.h b/include/linux/list.h index e7e28afd28f8..ca63bdea6c1a 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -20,8 +20,16 @@ * using the generic single-entry routines. */ +/** + * LIST_HEAD_INIT - initialize a &struct list_head's links to point to itself + * @name: name of the list_head + */ #define LIST_HEAD_INIT(name) { &(name), &(name) } +/** + * LIST_HEAD - definition of a &struct list_head with initialization values + * @name: name of the list_head + */ #define LIST_HEAD(name) \ struct list_head name = LIST_HEAD_INIT(name) -- cgit v1.2.3 From 2683df6539cbc3f0eeeba11154bc0cbf042a5cee Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Mon, 25 Aug 2025 10:57:00 +0800 Subject: panic: add note that 'panic_print' parameter is deprecated Just like for 'panic_print's systcl interface, add similar note for setup of kernel cmdline parameter and parameter under /sys/module/kernel/. Also add __core_param_cb() macro, which enables to add special get/set operation for a kernel parameter. Link: https://lkml.kernel.org/r/20250825025701.81921-4-feng.tang@linux.alibaba.com Signed-off-by: Feng Tang Suggested-by: Petr Mladek Reviewed-by: Petr Mladek Cc: Askar Safin Cc: John Ogness Cc: Jonathan Corbet Cc: Lance Yang Cc: "Paul E . McKenney" Cc: Steven Rostedt Signed-off-by: Andrew Morton --- include/linux/moduleparam.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 3a25122d83e2..6907aedc4f74 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -349,6 +349,19 @@ static inline void kernel_param_unlock(struct module *mod) __module_param_call("", name, ¶m_ops_##type, &var, perm, \ -1, KERNEL_PARAM_FL_UNSAFE) +/** + * __core_param_cb - similar like core_param, with a set/get ops instead of type. + * @name: the name of the cmdline and sysfs parameter (often the same as var) + * @var: the variable + * @ops: the set & get operations for this parameter. + * @perm: visibility in sysfs + * + * Ideally this should be called 'core_param_cb', but the name has been + * used for module core parameter, so add the '__' prefix + */ +#define __core_param_cb(name, ops, arg, perm) \ + __module_param_call("", name, ops, arg, perm, -1, 0) + #endif /* !MODULE */ /** -- cgit v1.2.3 From d0d9c7235548f1d772f1e48c9d5742c65d81c705 Mon Sep 17 00:00:00 2001 From: Jinchao Wang Date: Mon, 25 Aug 2025 10:29:29 +0800 Subject: panic: introduce helper functions for panic state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "panic: introduce panic status function family", v2. This series introduces a family of helper functions to manage panic state and updates existing code to use them. Before this series, panic state helpers were scattered and inconsistent. For example, panic_in_progress() was defined in printk/printk.c, not in panic.c or panic.h. As a result, developers had to look in unexpected places to understand or re-use panic state logic. Other checks were open- coded, duplicating logic across panic, crash, and watchdog paths. The new helpers centralize the functionality in panic.c/panic.h: - panic_try_start() - panic_reset() - panic_in_progress() - panic_on_this_cpu() - panic_on_other_cpu() Patches 1–8 add the helpers and convert panic/crash and printk/nbcon code to use them. Patch 9 fixes a bug in the watchdog subsystem by skipping checks when a panic is in progress, avoiding interference with the panic CPU. Together, this makes panic state handling simpler, more discoverable, and more robust. This patch (of 9): This patch introduces four new helper functions to abstract the management of the panic_cpu variable. These functions will be used in subsequent patches to refactor existing code. The direct use of panic_cpu can be error-prone and ambiguous, as it requires manual checks to determine which CPU is handling the panic. The new helpers clarify intent: panic_try_start(): Atomically sets the current CPU as the panicking CPU. panic_reset(): Reset panic_cpu to PANIC_CPU_INVALID. panic_in_progress(): Checks if a panic has been triggered. panic_on_this_cpu(): Returns true if the current CPU is the panic originator. panic_on_other_cpu(): Returns true if a panic is on another CPU. This change lays the groundwork for improved code readability and robustness in the panic handling subsystem. Link: https://lkml.kernel.org/r/20250825022947.1596226-1-wangjinchao600@gmail.com Link: https://lkml.kernel.org/r/20250825022947.1596226-2-wangjinchao600@gmail.com Signed-off-by: Jinchao Wang Cc: Anna Schumaker Cc: Baoquan He Cc: "Darrick J. Wong" Cc: Dave Young Cc: Doug Anderson Cc: "Guilherme G. Piccoli" Cc: Helge Deller Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: Joanthan Cameron Cc: Joel Granados Cc: John Ogness Cc: Kees Cook Cc: Li Huafei Cc: "Luck, Tony" Cc: Luo Gengkun Cc: Max Kellermann Cc: Nam Cao Cc: oushixiong Cc: Petr Mladek Cc: Qianqiang Liu Cc: Sergey Senozhatsky Cc: Sohil Mehta Cc: Steven Rostedt Cc: Tejun Heo Cc: Thomas Gleinxer Cc: Thomas Zimemrmann Cc: Thorsten Blum Cc: Ville Syrjala Cc: Vivek Goyal Cc: Yicong Yang Cc: Yunhui Cui Cc: Yury Norov (NVIDIA) b Signed-off-by: Andrew Morton --- include/linux/panic.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/panic.h b/include/linux/panic.h index 7be742628c25..6f972a66c13e 100644 --- a/include/linux/panic.h +++ b/include/linux/panic.h @@ -43,6 +43,12 @@ void abort(void); extern atomic_t panic_cpu; #define PANIC_CPU_INVALID -1 +bool panic_try_start(void); +void panic_reset(void); +bool panic_in_progress(void); +bool panic_on_this_cpu(void); +bool panic_on_other_cpu(void); + /* * Only to be used by arch init code. If the user over-wrote the default * CONFIG_PANIC_TIMEOUT, honor it. -- cgit v1.2.3 From c6be36e2997662f423edfa3979a63935873ff648 Mon Sep 17 00:00:00 2001 From: Jinchao Wang Date: Mon, 25 Aug 2025 10:29:35 +0800 Subject: panic/printk: replace this_cpu_in_panic() with panic_on_this_cpu() The helper this_cpu_in_panic() duplicated logic already provided by panic_on_this_cpu(). Remove this_cpu_in_panic() and switch all users to panic_on_this_cpu(). This simplifies the code and avoids having two helpers for the same check. Link: https://lkml.kernel.org/r/20250825022947.1596226-8-wangjinchao600@gmail.com Signed-off-by: Jinchao Wang Cc: Anna Schumaker Cc: Baoquan He Cc: "Darrick J. Wong" Cc: Dave Young Cc: Doug Anderson Cc: "Guilherme G. Piccoli" Cc: Helge Deller Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: Joanthan Cameron Cc: Joel Granados Cc: John Ogness Cc: Kees Cook Cc: Li Huafei Cc: "Luck, Tony" Cc: Luo Gengkun Cc: Max Kellermann Cc: Nam Cao Cc: oushixiong Cc: Petr Mladek Cc: Qianqiang Liu Cc: Sergey Senozhatsky Cc: Sohil Mehta Cc: Steven Rostedt Cc: Tejun Heo Cc: Thomas Gleinxer Cc: Thomas Zimemrmann Cc: Thorsten Blum Cc: Ville Syrjala Cc: Vivek Goyal Cc: Yicong Yang Cc: Yunhui Cui Cc: Yury Norov (NVIDIA) Signed-off-by: Andrew Morton --- include/linux/printk.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/printk.h b/include/linux/printk.h index 5d22b803f51e..45c663124c9b 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -330,8 +330,6 @@ static inline bool pr_flush(int timeout_ms, bool reset_on_progress) #endif -bool this_cpu_in_panic(void); - #ifdef CONFIG_SMP extern int __printk_cpu_sync_try_get(void); extern void __printk_cpu_sync_wait(void); -- cgit v1.2.3 From 7b1e502eb17c23fa6459a19bfe5974bffdb95574 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 6 Sep 2025 21:38:57 -0700 Subject: kernel.h: add comments for enum system_states Provide some basic comments about the system_states and what they imply. Also convert the comments to kernel-doc format. Split the enum declaration from the definition of the system_state variable so that kernel-doc notation works cleanly with it. This is picked up by Documentation/driver-api/basics.rst so it does not need further inclusion in the kernel docbooks. Link: https://lkml.kernel.org/r/20250907043857.2941203-1-rdunlap@infradead.org Signed-off-by: Randy Dunlap Acked-by: Rafael J. Wysocki # v1 Reviewed-by: Mauro Carvalho Chehab [v5] Cc: "Brown, Len" Cc: Greg Kroah-Hartman Cc: James Bottomley Cc: Jani Nikula Cc: Jonathan Corbet Cc: Mauro Carvalho Chehab Cc: Pavel Machek Signed-off-by: Andrew Morton --- include/linux/kernel.h | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 989315dabb86..5b46924fdff5 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -164,11 +164,23 @@ extern int root_mountflags; extern bool early_boot_irqs_disabled; -/* - * Values used for system_state. Ordering of the states must not be changed +/** + * enum system_states - Values used for system_state. + * + * @SYSTEM_BOOTING: %0, no init needed + * @SYSTEM_SCHEDULING: system is ready for scheduling; OK to use RCU + * @SYSTEM_FREEING_INITMEM: system is freeing all of initmem; almost running + * @SYSTEM_RUNNING: system is up and running + * @SYSTEM_HALT: system entered clean system halt state + * @SYSTEM_POWER_OFF: system entered shutdown/clean power off state + * @SYSTEM_RESTART: system entered emergency power off or normal restart + * @SYSTEM_SUSPEND: system entered suspend or hibernate state + * + * Note: + * Ordering of the states must not be changed * as code checks for <, <=, >, >= STATE. */ -extern enum system_states { +enum system_states { SYSTEM_BOOTING, SYSTEM_SCHEDULING, SYSTEM_FREEING_INITMEM, @@ -177,7 +189,8 @@ extern enum system_states { SYSTEM_POWER_OFF, SYSTEM_RESTART, SYSTEM_SUSPEND, -} system_state; +}; +extern enum system_states system_state; /* * General tracing related utility functions - trace_printk(), -- cgit v1.2.3 From d6d5116391857fc78fad9aa42317b36e4ce17b58 Mon Sep 17 00:00:00 2001 From: Evangelos Petrongonas Date: Thu, 21 Aug 2025 17:58:59 +0000 Subject: kexec: introduce is_kho_boot() Patch series "efi: Fix EFI boot with kexec handover (KHO)", v3. This patch series fixes a kernel panic that occurs when booting with both EFI and KHO (Kexec HandOver) enabled. The issue arises because EFI's `reserve_regions()` clears all memory regions with `memblock_remove(0, PHYS_ADDR_MAX)` before rebuilding them from EFI data. This destroys KHO scratch regions that were set up early during device tree scanning, causing a panic as the kernel has no valid memory regions for early allocations. The first patch introduces `is_kho_boot()` to allow early boot components to reliably detect if the kernel was booted via KHO-enabled kexec. The existing `kho_is_enabled()` only checks the command line and doesn't verify if an actual KHO FDT was passed. The second patch modifies EFI's `reserve_regions()` to selectively remove only non-KHO memory regions when KHO is active, preserving the critical scratch regions while still allowing EFI to rebuild its memory map. This patch (of 3): During early initialisation, after a kexec, other components, like EFI need to know if a KHO enabled kexec is performed. The `kho_is_enabled` function is not enough as in the early stages, it only reflects whether the cmdline has KHO enabled, not if an actual KHO FDT exists. Extend the KHO API with `is_kho_boot()` to provide a way for components to check if a KHO enabled kexec is performed. Link: https://lkml.kernel.org/r/cover.1755721529.git.epetron@amazon.de Link: https://lkml.kernel.org/r/7dc6674a76bf6e68cca0222ccff32427699cc02e.1755721529.git.epetron@amazon.de Signed-off-by: Evangelos Petrongonas Reviewed-by: Mike Rapoport (Microsoft) Reviewed-by: Pratyush Yadav Cc: Alexander Graf Cc: Ard Biesheuvel Cc: Baoquan He Cc: Changyuan Lyu Signed-off-by: Andrew Morton --- include/linux/kexec_handover.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h index 348844cffb13..559d13a3bc44 100644 --- a/include/linux/kexec_handover.h +++ b/include/linux/kexec_handover.h @@ -40,6 +40,7 @@ struct kho_serialization; #ifdef CONFIG_KEXEC_HANDOVER bool kho_is_enabled(void); +bool is_kho_boot(void); int kho_preserve_folio(struct folio *folio); int kho_preserve_phys(phys_addr_t phys, size_t size); @@ -60,6 +61,11 @@ static inline bool kho_is_enabled(void) return false; } +static inline bool is_kho_boot(void) +{ + return false; +} + static inline int kho_preserve_folio(struct folio *folio) { return -EOPNOTSUPP; -- cgit v1.2.3 From e19ceeb1c0f63e3e15b197c5f34797134b51ba0e Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Thu, 28 Aug 2025 08:35:58 +0000 Subject: platform/chrome: Centralize common cros_ec_device initialization Move the common initialization from protocol device drivers into central cros_ec_device_alloc(). This removes duplicated code from each driver's probe function. The buffer sizes are now calculated once, using the maximum possible overhead required by any of the transport protocols, ensuring the allocated buffers are sufficient for all cases. Link: https://lore.kernel.org/r/20250828083601.856083-3-tzungbi@kernel.org Signed-off-by: Tzung-Bi Shih --- include/linux/platform_data/cros_ec_proto.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index 3ec24f445c29..4d96cffbb9e3 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -33,12 +33,18 @@ /* * Max bus-specific overhead incurred by request/responses. - * I2C requires 1 additional byte for requests. - * I2C requires 2 additional bytes for responses. - * SPI requires up to 32 additional bytes for responses. + * + * Request: + * - I2C requires 1 byte (see struct ec_host_request_i2c). + * - ISHTP requires 4 bytes (see struct cros_ish_out_msg). + * + * Response: + * - I2C requires 2 bytes (see struct ec_host_response_i2c). + * - ISHTP requires 4 bytes (see struct cros_ish_in_msg). + * - SPI requires 32 bytes (see EC_MSG_PREAMBLE_COUNT). */ #define EC_PROTO_VERSION_UNKNOWN 0 -#define EC_MAX_REQUEST_OVERHEAD 1 +#define EC_MAX_REQUEST_OVERHEAD 4 #define EC_MAX_RESPONSE_OVERHEAD 32 /* -- cgit v1.2.3 From 56cb557279d70397cefb497e0f06bdd6fd685f8e Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Thu, 28 Aug 2025 08:36:00 +0000 Subject: platform/chrome: cros_ec: Add a flag to track registration state Introduce a `registered` flag to the `struct cros_ec_device` to allow callers to determine if the device has been fully registered and is ready for use. This is a preparatory step to prevent race conditions where other drivers might try to access the device before it is fully registered or after it has been unregistered. Link: https://lore.kernel.org/r/20250828083601.856083-5-tzungbi@kernel.org Signed-off-by: Tzung-Bi Shih --- include/linux/platform_data/cros_ec_proto.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index 4d96cffbb9e3..de14923720a5 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -128,6 +128,7 @@ struct cros_ec_command { * @dout_size: Size of dout buffer to allocate (zero to use static dout). * @wake_enabled: True if this device can wake the system from sleep. * @suspended: True if this device had been suspended. + * @registered: True if this device had been registered. * @cmd_xfer: Send command to EC and get response. * Returns the number of bytes received if the communication * succeeded, but that doesn't mean the EC was happy with the @@ -186,6 +187,7 @@ struct cros_ec_device { int dout_size; bool wake_enabled; bool suspended; + bool registered; int (*cmd_xfer)(struct cros_ec_device *ec, struct cros_ec_command *msg); int (*pkt_xfer)(struct cros_ec_device *ec, @@ -278,6 +280,8 @@ int cros_ec_cmd_readmem(struct cros_ec_device *ec_dev, u8 offset, u8 size, void int cros_ec_get_cmd_versions(struct cros_ec_device *ec_dev, u16 cmd); +bool cros_ec_device_registered(struct cros_ec_device *ec_dev); + /** * cros_ec_get_time_ns() - Return time in ns. * -- cgit v1.2.3 From e68f150bc11d0a05cbe984a4e5c0f72a95cae07d Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Mon, 18 Aug 2025 09:46:15 +0300 Subject: memblock: drop for_each_free_mem_pfn_range_in_zone_from() for_each_free_mem_pfn_range_in_zone_from() and its "backend" implementation __next_mem_pfn_range_in_zone() were only used by deferred initialization of the memory map. Remove them as they are not used anymore. Reviewed-by: Wei Yang Reviewed-by: David Hildenbrand Signed-off-by: Mike Rapoport (Microsoft) --- include/linux/memblock.h | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index fcda8481de9a..221118b5a16e 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -324,28 +324,6 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, for (i = -1, __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid); \ i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid)) -#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT -void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone, - unsigned long *out_spfn, - unsigned long *out_epfn); - -/** - * for_each_free_mem_pfn_range_in_zone_from - iterate through zone specific - * free memblock areas from a given point - * @i: u64 used as loop variable - * @zone: zone in which all of the memory blocks reside - * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL - * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL - * - * Walks over free (memory && !reserved) areas of memblock in a specific - * zone, continuing from current position. Available as soon as memblock is - * initialized. - */ -#define for_each_free_mem_pfn_range_in_zone_from(i, zone, p_start, p_end) \ - for (; i != U64_MAX; \ - __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end)) - -#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ /** * for_each_free_mem_range - iterate through free memblock areas -- cgit v1.2.3 From fdae0ab67d57d480dc61e9fb45678bbdc3786711 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 9 Sep 2025 12:19:42 +0000 Subject: net: use NUMA drop counters for softnet_data.dropped Hosts under DOS attack can suffer from false sharing in enqueue_to_backlog() : atomic_inc(&sd->dropped). This is because sd->dropped can be touched from many cpus, possibly residing on different NUMA nodes. Generalize the sk_drop_counters infrastucture added in commit c51613fa276f ("net: add sk->sk_drop_counters") and use it to replace softnet_data.dropped with NUMA friendly softnet_data.drop_counters. This adds 64 bytes per cpu, maybe more in the future if we increase the number of counters (currently 2) per 'struct numa_drop_counters'. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250909121942.1202585-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/ipv6.h | 2 +- include/linux/netdevice.h | 28 +++++++++++++++++++++++++++- include/linux/udp.h | 2 +- include/net/raw.h | 2 +- include/net/sock.h | 37 ++++++++++++------------------------- 5 files changed, 42 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 261d02efb615..f43314517396 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -295,7 +295,7 @@ struct raw6_sock { __u32 offset; /* checksum offset */ struct icmp6_filter filter; __u32 ip6mr_table; - struct socket_drop_counters drop_counters; + struct numa_drop_counters drop_counters; struct ipv6_pinfo inet6; }; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f3a3b761abfb..f5a840c07cf1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3459,6 +3459,32 @@ static inline bool dev_has_header(const struct net_device *dev) return dev->header_ops && dev->header_ops->create; } +struct numa_drop_counters { + atomic_t drops0 ____cacheline_aligned_in_smp; + atomic_t drops1 ____cacheline_aligned_in_smp; +}; + +static inline int numa_drop_read(const struct numa_drop_counters *ndc) +{ + return atomic_read(&ndc->drops0) + atomic_read(&ndc->drops1); +} + +static inline void numa_drop_add(struct numa_drop_counters *ndc, int val) +{ + int n = numa_node_id() % 2; + + if (n) + atomic_add(val, &ndc->drops1); + else + atomic_add(val, &ndc->drops0); +} + +static inline void numa_drop_reset(struct numa_drop_counters *ndc) +{ + atomic_set(&ndc->drops0, 0); + atomic_set(&ndc->drops1, 0); +} + /* * Incoming packets are placed on per-CPU queues */ @@ -3504,7 +3530,7 @@ struct softnet_data { struct sk_buff_head input_pkt_queue; struct napi_struct backlog; - atomic_t dropped ____cacheline_aligned_in_smp; + struct numa_drop_counters drop_counters; /* Another possibly contended cache line */ spinlock_t defer_lock ____cacheline_aligned_in_smp; diff --git a/include/linux/udp.h b/include/linux/udp.h index 981506be1e15..6ed008ab1665 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -108,7 +108,7 @@ struct udp_sock { * the last UDP socket cacheline. */ struct hlist_node tunnel_list; - struct socket_drop_counters drop_counters; + struct numa_drop_counters drop_counters; }; #define udp_test_bit(nr, sk) \ diff --git a/include/net/raw.h b/include/net/raw.h index d52709139060..66c0ffeada2e 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -81,7 +81,7 @@ struct raw_sock { struct inet_sock inet; struct icmp_filter filter; u32 ipmr_table; - struct socket_drop_counters drop_counters; + struct numa_drop_counters drop_counters; }; #define raw_sk(ptr) container_of_const(ptr, struct raw_sock, inet.sk) diff --git a/include/net/sock.h b/include/net/sock.h index 896bec2d2176..0fd465935334 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -102,11 +102,6 @@ struct net; typedef __u32 __bitwise __portpair; typedef __u64 __bitwise __addrpair; -struct socket_drop_counters { - atomic_t drops0 ____cacheline_aligned_in_smp; - atomic_t drops1 ____cacheline_aligned_in_smp; -}; - /** * struct sock_common - minimal network layer representation of sockets * @skc_daddr: Foreign IPv4 addr @@ -287,7 +282,7 @@ struct sk_filter; * @sk_err_soft: errors that don't cause failure but are the cause of a * persistent failure not just 'timed out' * @sk_drops: raw/udp drops counter - * @sk_drop_counters: optional pointer to socket_drop_counters + * @sk_drop_counters: optional pointer to numa_drop_counters * @sk_ack_backlog: current listen backlog * @sk_max_ack_backlog: listen backlog set in listen() * @sk_uid: user id of owner @@ -456,7 +451,7 @@ struct sock { #ifdef CONFIG_XFRM struct xfrm_policy __rcu *sk_policy[2]; #endif - struct socket_drop_counters *sk_drop_counters; + struct numa_drop_counters *sk_drop_counters; __cacheline_group_end(sock_read_rxtx); __cacheline_group_begin(sock_write_rxtx); @@ -2698,18 +2693,12 @@ struct sock_skb_cb { static inline void sk_drops_add(struct sock *sk, int segs) { - struct socket_drop_counters *sdc = sk->sk_drop_counters; + struct numa_drop_counters *ndc = sk->sk_drop_counters; - if (sdc) { - int n = numa_node_id() % 2; - - if (n) - atomic_add(segs, &sdc->drops1); - else - atomic_add(segs, &sdc->drops0); - } else { + if (ndc) + numa_drop_add(ndc, segs); + else atomic_add(segs, &sk->sk_drops); - } } static inline void sk_drops_inc(struct sock *sk) @@ -2719,23 +2708,21 @@ static inline void sk_drops_inc(struct sock *sk) static inline int sk_drops_read(const struct sock *sk) { - const struct socket_drop_counters *sdc = sk->sk_drop_counters; + const struct numa_drop_counters *ndc = sk->sk_drop_counters; - if (sdc) { + if (ndc) { DEBUG_NET_WARN_ON_ONCE(atomic_read(&sk->sk_drops)); - return atomic_read(&sdc->drops0) + atomic_read(&sdc->drops1); + return numa_drop_read(ndc); } return atomic_read(&sk->sk_drops); } static inline void sk_drops_reset(struct sock *sk) { - struct socket_drop_counters *sdc = sk->sk_drop_counters; + struct numa_drop_counters *ndc = sk->sk_drop_counters; - if (sdc) { - atomic_set(&sdc->drops0, 0); - atomic_set(&sdc->drops1, 0); - } + if (ndc) + numa_drop_reset(ndc); atomic_set(&sk->sk_drops, 0); } -- cgit v1.2.3 From 66048f8b3cc7e462953c04285183cdee43a1cb89 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Tue, 2 Sep 2025 14:29:33 +0800 Subject: net/cls_cgroup: Fix task_get_classid() during qdisc run During recent testing with the netem qdisc to inject delays into TCP traffic, we observed that our CLS BPF program failed to function correctly due to incorrect classid retrieval from task_get_classid(). The issue manifests in the following call stack: bpf_get_cgroup_classid+5 cls_bpf_classify+507 __tcf_classify+90 tcf_classify+217 __dev_queue_xmit+798 bond_dev_queue_xmit+43 __bond_start_xmit+211 bond_start_xmit+70 dev_hard_start_xmit+142 sch_direct_xmit+161 __qdisc_run+102 <<<<< Issue location __dev_xmit_skb+1015 __dev_queue_xmit+637 neigh_hh_output+159 ip_finish_output2+461 __ip_finish_output+183 ip_finish_output+41 ip_output+120 ip_local_out+94 __ip_queue_xmit+394 ip_queue_xmit+21 __tcp_transmit_skb+2169 tcp_write_xmit+959 __tcp_push_pending_frames+55 tcp_push+264 tcp_sendmsg_locked+661 tcp_sendmsg+45 inet_sendmsg+67 sock_sendmsg+98 sock_write_iter+147 vfs_write+786 ksys_write+181 __x64_sys_write+25 do_syscall_64+56 entry_SYSCALL_64_after_hwframe+100 The problem occurs when multiple tasks share a single qdisc. In such cases, __qdisc_run() may transmit skbs created by different tasks. Consequently, task_get_classid() retrieves an incorrect classid since it references the current task's context rather than the skb's originating task. Given that dev_queue_xmit() always executes with bh disabled, we can use softirq_count() instead to obtain the correct classid. The simple steps to reproduce this issue: 1. Add network delay to the network interface: such as: tc qdisc add dev bond0 root netem delay 1.5ms 2. Build two distinct net_cls cgroups, each with a network-intensive task 3. Initiate parallel TCP streams from both tasks to external servers. Under this specific condition, the issue reliably occurs. The kernel eventually dequeues an SKB that originated from Task-A while executing in the context of Task-B. It is worth noting that it will change the established behavior for a slightly different scenario: prior to this patch the skb will be classified with the 'new' task A classid, now with the old/original one. The bpf_get_cgroup_classid_curr() function is a more appropriate choice for this case. Signed-off-by: Yafang Shao Cc: Daniel Borkmann Cc: Thomas Graf Cc: Sebastian Andrzej Siewior Cc: Nikolay Aleksandrov Link: https://patch.msgid.link/20250902062933.30087-1-laoar.shao@gmail.com Signed-off-by: Jakub Kicinski --- include/net/cls_cgroup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h index 7e78e7d6f015..668aeee9b3f6 100644 --- a/include/net/cls_cgroup.h +++ b/include/net/cls_cgroup.h @@ -63,7 +63,7 @@ static inline u32 task_get_classid(const struct sk_buff *skb) * calls by looking at the number of nested bh disable calls because * softirqs always disables bh. */ - if (in_serving_softirq()) { + if (softirq_count()) { struct sock *sk = skb_to_full_sk(skb); /* If there is an sock_cgroup_classid we'll use that. */ -- cgit v1.2.3 From ae1c658b33d4bec20c037aebba583a68375d4773 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Thu, 11 Sep 2025 15:08:31 +0200 Subject: net: phy: introduce phy_id_compare_model() PHY ID helper Similar to phy_id_compare_vendor(), introduce the equivalent phy_id_compare_model() helper for the generic PHY ID Model mask. Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: Christian Marangi Link: https://patch.msgid.link/20250911130840.23569-1-ansuelsmth@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 04553419adc3..6f3b25cb7f4e 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1308,6 +1308,19 @@ static inline bool phy_id_compare_vendor(u32 id, u32 vendor_mask) return phy_id_compare(id, vendor_mask, PHY_ID_MATCH_VENDOR_MASK); } +/** + * phy_id_compare_model - compare @id with @model mask + * @id: PHY ID + * @model_mask: PHY Model mask + * + * Return: true if the bits from @id match @model using the + * generic PHY Model mask. + */ +static inline bool phy_id_compare_model(u32 id, u32 model_mask) +{ + return phy_id_compare(id, model_mask, PHY_ID_MATCH_MODEL_MASK); +} + /** * phydev_id_compare - compare @id with the PHY's Clause 22 ID * @phydev: the PHY device -- cgit v1.2.3 From a9888628cb2c768202a4530e2816da1889cc3165 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Tue, 9 Sep 2025 18:54:15 +0200 Subject: net: dst_metadata: fix IP_DF bit not extracted from tunnel headers Both OVS and TC flower allow extracting and matching on the DF bit of the outer IP header via OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT in the OVS_KEY_ATTR_TUNNEL and TCA_FLOWER_KEY_FLAGS_TUNNEL_DONT_FRAGMENT in the TCA_FLOWER_KEY_ENC_FLAGS respectively. Flow dissector extracts this information as FLOW_DIS_F_TUNNEL_DONT_FRAGMENT from the tunnel info key. However, the IP_TUNNEL_DONT_FRAGMENT_BIT in the tunnel key is never actually set, because the tunneling code doesn't actually extract it from the IP header. OAM and CRIT_OPT are extracted by the tunnel implementation code, same code also sets the KEY flag, if present. UDP tunnel core takes care of setting the CSUM flag if the checksum is present in the UDP header, but the DONT_FRAGMENT is not handled at any layer. Fix that by checking the bit and setting the corresponding flag while populating the tunnel info in the IP layer where it belongs. Not using __assign_bit as we don't really need to clear the bit in a just initialized field. It also doesn't seem like using __assign_bit will make the code look better. Clearly, users didn't rely on this functionality for anything very important until now. The reason why this doesn't break OVS logic is that it only matches on what kernel previously parsed out and if kernel consistently reports this bit as zero, OVS will only match on it to be zero, which sort of works. But it is still a bug that the uAPI reports and allows matching on the field that is not actually checked in the packet. And this is causing misleading -df reporting in OVS datapath flows, while the tunnel traffic actually has the bit set in most cases. This may also cause issues if a hardware properly implements support for tunnel flag matching as it will disagree with the implementation in a software path of TC flower. Fixes: 7d5437c709de ("openvswitch: Add tunneling interface.") Fixes: 1d17568e74de ("net/sched: cls_flower: add support for matching tunnel control flags") Signed-off-by: Ilya Maximets Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20250909165440.229890-2-i.maximets@ovn.org Signed-off-by: Jakub Kicinski --- include/net/dst_metadata.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 4160731dcb6e..1fc2fb03ce3f 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -3,6 +3,7 @@ #define __NET_DST_METADATA_H 1 #include +#include #include #include #include @@ -220,9 +221,15 @@ static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb, int md_size) { const struct iphdr *iph = ip_hdr(skb); + struct metadata_dst *tun_dst; + + tun_dst = __ip_tun_set_dst(iph->saddr, iph->daddr, iph->tos, iph->ttl, + 0, flags, tunnel_id, md_size); - return __ip_tun_set_dst(iph->saddr, iph->daddr, iph->tos, iph->ttl, - 0, flags, tunnel_id, md_size); + if (tun_dst && (iph->frag_off & htons(IP_DF))) + __set_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, + tun_dst->u.tun_info.key.tun_flags); + return tun_dst; } static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *saddr, -- cgit v1.2.3 From 3afaff7a0ce97457c8ab46862f2c06603a89962e Mon Sep 17 00:00:00 2001 From: Akhilesh Patil Date: Mon, 11 Aug 2025 17:42:53 +0530 Subject: include/linux/rv.h: remove redundant include file Remove redundant include to clean up the code. Move all unique include files inside CONFIG_RV as they are only needed when CONFIG_RV is enabled. Arrange include files alphabetically. Fixes: 24cbfe18d55a ("rv: Merge struct rv_monitor_def into struct rv_monitor") [1] Reported-by: kernel test robot Closes: https://lore.kernel.org/r/202507312017.oyD08TL5-lkp@intel.com/ Signed-off-by: Akhilesh Patil Reviewed-by: Gabriele Monaco Link: https://lore.kernel.org/r/aJneRbHGlNFg7lr9@bhairav-test.ee.iitb.ac.in Signed-off-by: Gabriele Monaco --- include/linux/rv.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/rv.h b/include/linux/rv.h index 14410a42faef..9520aab34bcb 100644 --- a/include/linux/rv.h +++ b/include/linux/rv.h @@ -7,16 +7,14 @@ #ifndef _LINUX_RV_H #define _LINUX_RV_H -#include -#include - #define MAX_DA_NAME_LEN 32 #define MAX_DA_RETRY_RACING_EVENTS 3 #ifdef CONFIG_RV +#include #include +#include #include -#include /* * Deterministic automaton per-object variables. -- cgit v1.2.3 From e7c9b66b106989aeb17b167f5bbea9a108d26c0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 17 Jul 2025 17:11:16 +0200 Subject: pwm: Provide a gpio device for waveform drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A PWM is a more general concept than an output-only GPIO. When using duty_length = period_length the PWM looks like an active GPIO, with duty_length = 0 like an inactive GPIO. With the waveform abstraction there is enough control over the configuration to ensure that PWMs that cannot generate a constant signal at both levels error out. The pwm-pca9685 driver already provides a gpio chip. When this driver is converted to the waveform callbacks, the gpio part can just be dropped. Signed-off-by: Uwe Kleine-König Reviewed-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20250717151117.1828585-2-u.kleine-koenig@baylibre.com Signed-off-by: Uwe Kleine-König --- include/linux/pwm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 8cafc483db53..549ac4aaad59 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -321,6 +322,7 @@ struct pwm_ops { * @npwm: number of PWMs controlled by this chip * @of_xlate: request a PWM device given a device tree PWM specifier * @atomic: can the driver's ->apply() be called in atomic context + * @gpio: &struct gpio_chip to operate this PWM chip's lines as GPO * @uses_pwmchip_alloc: signals if pwmchip_allow was used to allocate this chip * @operational: signals if the chip can be used (or is already deregistered) * @nonatomic_lock: mutex for nonatomic chips @@ -340,6 +342,7 @@ struct pwm_chip { bool atomic; /* only used internally by the PWM framework */ + struct gpio_chip gpio; bool uses_pwmchip_alloc; bool operational; union { -- cgit v1.2.3 From 09f37134464cc03baf5cb8eab2d99db27ee73217 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 5 Sep 2025 16:34:00 -0500 Subject: x86,fs/resctrl: Consolidate monitor event descriptions There are currently only three monitor events, all associated with the RDT_RESOURCE_L3 resource. Growing support for additional events will be easier with some restructuring to have a single point in file system code where all attributes of all events are defined. Place all event descriptions into an array mon_event_all[]. Doing this has the beneficial side effect of removing the need for rdt_resource::evt_list. Add resctrl_event_id::QOS_FIRST_EVENT for a lower bound on range checks for event ids and as the starting index to scan mon_event_all[]. Drop the code that builds evt_list and change the two places where the list is scanned to scan mon_event_all[] instead using a new helper macro for_each_mon_event(). Architecture code now informs file system code which events are available with resctrl_enable_mon_event(). Signed-off-by: Tony Luck Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Fenghua Yu Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com --- include/linux/resctrl.h | 4 ++-- include/linux/resctrl_types.h | 12 ++++++++---- 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 6fb4894b8cfd..2944042bd84c 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -269,7 +269,6 @@ enum resctrl_schema_fmt { * @mon_domains: RCU list of all monitor domains for this resource * @name: Name to use in "schemata" file. * @schema_fmt: Which format string and parser is used for this schema. - * @evt_list: List of monitoring events * @mbm_cfg_mask: Bandwidth sources that can be tracked when bandwidth * monitoring events can be configured. * @cdp_capable: Is the CDP feature available on this resource @@ -287,7 +286,6 @@ struct rdt_resource { struct list_head mon_domains; char *name; enum resctrl_schema_fmt schema_fmt; - struct list_head evt_list; unsigned int mbm_cfg_mask; bool cdp_capable; }; @@ -372,6 +370,8 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *r); u32 resctrl_arch_system_num_rmid_idx(void); int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid); +void resctrl_enable_mon_event(enum resctrl_event_id eventid); + bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt); /** diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h index a25fb9c4070d..2dadbc54e4b3 100644 --- a/include/linux/resctrl_types.h +++ b/include/linux/resctrl_types.h @@ -34,11 +34,15 @@ /* Max event bits supported */ #define MAX_EVT_CONFIG_BITS GENMASK(6, 0) -/* - * Event IDs, the values match those used to program IA32_QM_EVTSEL before - * reading IA32_QM_CTR on RDT systems. - */ +/* Event IDs */ enum resctrl_event_id { + /* Must match value of first event below */ + QOS_FIRST_EVENT = 0x01, + + /* + * These values match those used to program IA32_QM_EVTSEL before + * reading IA32_QM_CTR on RDT systems. + */ QOS_L3_OCCUP_EVENT_ID = 0x01, QOS_L3_MBM_TOTAL_EVENT_ID = 0x02, QOS_L3_MBM_LOCAL_EVENT_ID = 0x03, -- cgit v1.2.3 From d257cc2e5c8bb8236cb161360d6c0529fd442712 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 5 Sep 2025 16:34:01 -0500 Subject: x86,fs/resctrl: Replace architecture event enabled checks The resctrl file system now has complete knowledge of the status of every event. So there is no need for per-event function calls to check. Replace each of the resctrl_arch_is_{event}enabled() calls with resctrl_is_mon_event_enabled(QOS_{EVENT}). No functional change. Signed-off-by: Tony Luck Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Fenghua Yu Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com --- include/linux/resctrl.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 2944042bd84c..40aba6b5d4f0 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -372,6 +372,8 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid); void resctrl_enable_mon_event(enum resctrl_event_id eventid); +bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid); + bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt); /** -- cgit v1.2.3 From 8810c6e7cca8fbfce7652b53e05acc465e671d28 Mon Sep 17 00:00:00 2001 From: Keir Fraser Date: Tue, 9 Sep 2025 10:00:04 +0000 Subject: KVM: arm64: vgic-init: Remove vgic_ready() macro It is now used only within kvm_vgic_map_resources(). vgic_dist::ready is already written directly by this function, so it is clearer to bypass the macro for reads as well. Signed-off-by: Keir Fraser Signed-off-by: Marc Zyngier --- include/kvm/arm_vgic.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 404883c7af6e..e7ffaf4bf2e7 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -406,7 +406,6 @@ u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu); #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) #define vgic_initialized(k) ((k)->arch.vgic.initialized) -#define vgic_ready(k) ((k)->arch.vgic.ready) #define vgic_valid_spi(k, i) (((i) >= VGIC_NR_PRIVATE_IRQS) && \ ((i) < (k)->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)) -- cgit v1.2.3 From 7788255aba6545a27b8d143c5256536f8dfb2c0a Mon Sep 17 00:00:00 2001 From: Keir Fraser Date: Tue, 9 Sep 2025 10:00:06 +0000 Subject: KVM: Implement barriers before accessing kvm->buses[] on SRCU read paths This ensures that, if a VCPU has "observed" that an IO registration has occurred, the instruction currently being trapped or emulated will also observe the IO registration. At the same time, enforce that kvm_get_bus() is used only on the update side, ensuring that a long-term reference cannot be obtained by an SRCU reader. Signed-off-by: Keir Fraser Signed-off-by: Marc Zyngier --- include/linux/kvm_host.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 15656b7fba6c..e7d6111cf254 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -966,11 +966,15 @@ static inline bool kvm_dirty_log_manual_protect_and_init_set(struct kvm *kvm) return !!(kvm->manual_dirty_log_protect & KVM_DIRTY_LOG_INITIALLY_SET); } +/* + * Get a bus reference under the update-side lock. No long-term SRCU reader + * references are permitted, to avoid stale reads vs concurrent IO + * registrations. + */ static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx) { - return srcu_dereference_check(kvm->buses[idx], &kvm->srcu, - lockdep_is_held(&kvm->slots_lock) || - !refcount_read(&kvm->users_count)); + return rcu_dereference_protected(kvm->buses[idx], + lockdep_is_held(&kvm->slots_lock)); } static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) -- cgit v1.2.3 From 7d9a0273c45962e9a6bc06f3b87eef7c431c1853 Mon Sep 17 00:00:00 2001 From: Keir Fraser Date: Tue, 9 Sep 2025 10:00:07 +0000 Subject: KVM: Avoid synchronize_srcu() in kvm_io_bus_register_dev() Device MMIO registration may happen quite frequently during VM boot, and the SRCU synchronization each time has a measurable effect on VM startup time. In our experiments it can account for around 25% of a VM's startup time. Replace the synchronization with a deferred free of the old kvm_io_bus structure. Tested-by: Li RongQing Signed-off-by: Keir Fraser Signed-off-by: Marc Zyngier --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index e7d6111cf254..103be35caf0d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -206,6 +206,7 @@ struct kvm_io_range { struct kvm_io_bus { int dev_count; int ioeventfd_count; + struct rcu_head rcu; struct kvm_io_range range[]; }; -- cgit v1.2.3 From 83b039877310ae1eb614eef17b780df1e10d9fb5 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 5 Sep 2025 16:34:03 -0500 Subject: x86,fs/resctrl: Prepare for more monitor events There's a rule in computer programming that objects appear zero, once, or many times. So code accordingly. There are two MBM events and resctrl is coded with a lot of if (local) do one thing if (total) do a different thing Change the rdt_mon_domain and rdt_hw_mon_domain structures to hold arrays of pointers to per event data instead of explicit fields for total and local bandwidth. Simplify by coding for many events using loops on which are enabled. Move resctrl_is_mbm_event() to so it can be used more widely. Also provide a for_each_mbm_event_id() helper macro. Cleanup variable names in functions touched to consistently use "eventid" for those with type enum resctrl_event_id. Signed-off-by: Tony Luck Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com --- include/linux/resctrl.h | 23 +++++++++++++++++++---- include/linux/resctrl_types.h | 3 +++ 2 files changed, 22 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 40aba6b5d4f0..478d7a935ca3 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -161,8 +161,9 @@ struct rdt_ctrl_domain { * @hdr: common header for different domain types * @ci_id: cache info id for this domain * @rmid_busy_llc: bitmap of which limbo RMIDs are above threshold - * @mbm_total: saved state for MBM total bandwidth - * @mbm_local: saved state for MBM local bandwidth + * @mbm_states: Per-event pointer to the MBM event's saved state. + * An MBM event's state is an array of struct mbm_state + * indexed by RMID on x86 or combined CLOSID, RMID on Arm. * @mbm_over: worker to periodically read MBM h/w counters * @cqm_limbo: worker to periodically read CQM h/w counters * @mbm_work_cpu: worker CPU for MBM h/w counters @@ -172,8 +173,7 @@ struct rdt_mon_domain { struct rdt_domain_hdr hdr; unsigned int ci_id; unsigned long *rmid_busy_llc; - struct mbm_state *mbm_total; - struct mbm_state *mbm_local; + struct mbm_state *mbm_states[QOS_NUM_L3_MBM_EVENTS]; struct delayed_work mbm_over; struct delayed_work cqm_limbo; int mbm_work_cpu; @@ -376,6 +376,21 @@ bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid); bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt); +static inline bool resctrl_is_mbm_event(enum resctrl_event_id eventid) +{ + return (eventid >= QOS_L3_MBM_TOTAL_EVENT_ID && + eventid <= QOS_L3_MBM_LOCAL_EVENT_ID); +} + +/* Iterate over all memory bandwidth events */ +#define for_each_mbm_event_id(eventid) \ + for (eventid = QOS_L3_MBM_TOTAL_EVENT_ID; \ + eventid <= QOS_L3_MBM_LOCAL_EVENT_ID; eventid++) + +/* Iterate over memory bandwidth arrays in domain structures */ +#define for_each_mbm_idx(idx) \ + for (idx = 0; idx < QOS_NUM_L3_MBM_EVENTS; idx++) + /** * resctrl_arch_mon_event_config_write() - Write the config for an event. * @config_info: struct resctrl_mon_config_info describing the resource, domain diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h index 2dadbc54e4b3..d98351663c2c 100644 --- a/include/linux/resctrl_types.h +++ b/include/linux/resctrl_types.h @@ -51,4 +51,7 @@ enum resctrl_event_id { QOS_NUM_EVENTS, }; +#define QOS_NUM_L3_MBM_EVENTS (QOS_L3_MBM_LOCAL_EVENT_ID - QOS_L3_MBM_TOTAL_EVENT_ID + 1) +#define MBM_STATE_IDX(evt) ((evt) - QOS_L3_MBM_TOTAL_EVENT_ID) + #endif /* __LINUX_RESCTRL_TYPES_H */ -- cgit v1.2.3 From 5ad68c8f965fed78c61f2ac7aea933f06bb50032 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:06 -0500 Subject: x86,fs/resctrl: Consolidate monitoring related data from rdt_resource The cache allocation and memory bandwidth allocation feature properties are consolidated into struct resctrl_cache and struct resctrl_membw respectively. In preparation for more monitoring properties that will clobber the existing resource struct more, re-organize the monitoring specific properties to also be in a separate structure. Also convert "bandwidth sources" terminology to "memory transactions" to have consistency within resctrl for related monitoring features. [ bp: Massage commit message. ] Suggested-by: Reinette Chatre Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com --- include/linux/resctrl.h | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 478d7a935ca3..fe2af6cb96d4 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -255,38 +255,46 @@ enum resctrl_schema_fmt { RESCTRL_SCHEMA_RANGE, }; +/** + * struct resctrl_mon - Monitoring related data of a resctrl resource. + * @num_rmid: Number of RMIDs available. + * @mbm_cfg_mask: Memory transactions that can be tracked when bandwidth + * monitoring events can be configured. + */ +struct resctrl_mon { + int num_rmid; + unsigned int mbm_cfg_mask; +}; + /** * struct rdt_resource - attributes of a resctrl resource * @rid: The index of the resource * @alloc_capable: Is allocation available on this machine * @mon_capable: Is monitor feature available on this machine - * @num_rmid: Number of RMIDs available * @ctrl_scope: Scope of this resource for control functions * @mon_scope: Scope of this resource for monitor functions * @cache: Cache allocation related data * @membw: If the component has bandwidth controls, their properties. + * @mon: Monitoring related data. * @ctrl_domains: RCU list of all control domains for this resource * @mon_domains: RCU list of all monitor domains for this resource * @name: Name to use in "schemata" file. * @schema_fmt: Which format string and parser is used for this schema. - * @mbm_cfg_mask: Bandwidth sources that can be tracked when bandwidth - * monitoring events can be configured. * @cdp_capable: Is the CDP feature available on this resource */ struct rdt_resource { int rid; bool alloc_capable; bool mon_capable; - int num_rmid; enum resctrl_scope ctrl_scope; enum resctrl_scope mon_scope; struct resctrl_cache cache; struct resctrl_membw membw; + struct resctrl_mon mon; struct list_head ctrl_domains; struct list_head mon_domains; char *name; enum resctrl_schema_fmt schema_fmt; - unsigned int mbm_cfg_mask; bool cdp_capable; }; -- cgit v1.2.3 From 13390861b426e936db20d675804a5b405622bc79 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:07 -0500 Subject: x86,fs/resctrl: Detect Assignable Bandwidth Monitoring feature details ABMC feature details are reported via CPUID Fn8000_0020_EBX_x5. Bits Description 15:0 MAX_ABMC Maximum Supported Assignable Bandwidth Monitoring Counter ID + 1 The ABMC feature details are documented in APM [1] available from [2]. [1] AMD64 Architecture Programmer's Manual Volume 2: System Programming Publication # 24593 Revision 3.41 section 19.3.3.3 Assignable Bandwidth Monitoring (ABMC). Detect the feature and number of assignable counters supported. For backward compatibility, upon detecting the assignable counter feature, enable the mbm_total_bytes and mbm_local_bytes events that users are familiar with as part of original L3 MBM support. Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 # [2] --- include/linux/resctrl.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index fe2af6cb96d4..eb80cc233be4 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -260,10 +260,14 @@ enum resctrl_schema_fmt { * @num_rmid: Number of RMIDs available. * @mbm_cfg_mask: Memory transactions that can be tracked when bandwidth * monitoring events can be configured. + * @num_mbm_cntrs: Number of assignable counters. + * @mbm_cntr_assignable:Is system capable of supporting counter assignment? */ struct resctrl_mon { int num_rmid; unsigned int mbm_cfg_mask; + int num_mbm_cntrs; + bool mbm_cntr_assignable; }; /** -- cgit v1.2.3 From faebbc58cde9d8f6050ac152c34c88195ed4abaa Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:08 -0500 Subject: x86/resctrl: Add support to enable/disable AMD ABMC feature Add the functionality to enable/disable the AMD ABMC feature. The AMD ABMC feature is enabled by setting enabled bit(0) in the L3_QOS_EXT_CFG MSR. When the state of ABMC is changed, the MSR needs to be updated on all the logical processors in the QOS Domain. Hardware counters will reset when ABMC state is changed. [ bp: Massage commit message. ] Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 # [2] --- include/linux/resctrl.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index eb80cc233be4..919806122c50 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -445,6 +445,26 @@ static inline u32 resctrl_get_config_index(u32 closid, bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l); int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable); +/** + * resctrl_arch_mbm_cntr_assign_enabled() - Check if MBM counter assignment + * mode is enabled. + * @r: Pointer to the resource structure. + * + * Return: + * true if the assignment mode is enabled, false otherwise. + */ +bool resctrl_arch_mbm_cntr_assign_enabled(struct rdt_resource *r); + +/** + * resctrl_arch_mbm_cntr_assign_set() - Configure the MBM counter assignment mode. + * @r: Pointer to the resource structure. + * @enable: Set to true to enable, false to disable the assignment mode. + * + * Return: + * 0 on success, < 0 on error. + */ +int resctrl_arch_mbm_cntr_assign_set(struct rdt_resource *r, bool enable); + /* * Update the ctrl_val and apply this config right now. * Must be called on one of the domain's CPUs. -- cgit v1.2.3 From 4d32c24a74f2c12ff440d381ba01de574f6631ce Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:11 -0500 Subject: fs/resctrl: Introduce mbm_cntr_cfg to track assignable counters per domain The "mbm_event" counter assignment mode allows users to assign a hardware counter to an RMID, event pair and monitor bandwidth usage as long as it is assigned. The hardware continues to track the assigned counter until it is explicitly unassigned by the user. Counters are assigned/unassigned at monitoring domain level. Manage a monitoring domain's hardware counters using a per monitoring domain array of struct mbm_cntr_cfg that is indexed by the hardware counter ID. A hardware counter's configuration contains the MBM event ID and points to the monitoring group that it is assigned to, with a NULL pointer meaning that the hardware counter is available for assignment. There is no direct way to determine which hardware counters are assigned to a particular monitoring group. Check every entry of every hardware counter configuration array in every monitoring domain to query which MBM events of a monitoring group is tracked by hardware. Such queries are acceptable because of a very small number of assignable counters (32 to 64). Suggested-by: Peter Newman Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com --- include/linux/resctrl.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 919806122c50..e013caba6641 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -156,6 +156,18 @@ struct rdt_ctrl_domain { u32 *mbps_val; }; +/** + * struct mbm_cntr_cfg - Assignable counter configuration. + * @evtid: MBM event to which the counter is assigned. Only valid + * if @rdtgroup is not NULL. + * @rdtgrp: resctrl group assigned to the counter. NULL if the + * counter is free. + */ +struct mbm_cntr_cfg { + enum resctrl_event_id evtid; + struct rdtgroup *rdtgrp; +}; + /** * struct rdt_mon_domain - group of CPUs sharing a resctrl monitor resource * @hdr: common header for different domain types @@ -168,6 +180,8 @@ struct rdt_ctrl_domain { * @cqm_limbo: worker to periodically read CQM h/w counters * @mbm_work_cpu: worker CPU for MBM h/w counters * @cqm_work_cpu: worker CPU for CQM h/w counters + * @cntr_cfg: array of assignable counters' configuration (indexed + * by counter ID) */ struct rdt_mon_domain { struct rdt_domain_hdr hdr; @@ -178,6 +192,7 @@ struct rdt_mon_domain { struct delayed_work cqm_limbo; int mbm_work_cpu; int cqm_work_cpu; + struct mbm_cntr_cfg *cntr_cfg; }; /** -- cgit v1.2.3 From ebebda853633de389ba2c6737f8ca38405713e90 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:14 -0500 Subject: fs/resctrl: Introduce event configuration field in struct mon_evt When supported, mbm_event counter assignment mode allows the user to configure events to track specific types of memory transactions. Introduce an evt_cfg field in struct mon_evt to define the type of memory transactions tracked by a monitoring event. Also add a helper function to get the evt_cfg value. Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com --- include/linux/resctrl.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index e013caba6641..87daa4ca312d 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -409,6 +409,8 @@ static inline bool resctrl_is_mbm_event(enum resctrl_event_id eventid) eventid <= QOS_L3_MBM_LOCAL_EVENT_ID); } +u32 resctrl_get_mon_evt_cfg(enum resctrl_event_id eventid); + /* Iterate over all memory bandwidth events */ #define for_each_mbm_event_id(eventid) \ for (eventid = QOS_L3_MBM_TOTAL_EVENT_ID; \ -- cgit v1.2.3 From feb8ae81b2378b75a99c81d315602ac8918ed382 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Fri, 12 Sep 2025 21:54:53 +0200 Subject: ACPICA: Allow to skip Global Lock initialization Introduce acpi_gbl_use_global_lock, which allows to skip the Global Lock initialization. This is useful for systems without Global Lock (such as loong_arch), so as to avoid error messages during boot phase: ACPI Error: Could not enable global_lock event (20240827/evxfevnt-182) ACPI Error: No response from Global Lock hardware, disabling lock (20240827/evglock-59) Link: https://github.com/acpica/acpica/commit/463cb0fe Signed-off-by: Huacai Chen Signed-off-by: Rafael J. Wysocki --- include/acpi/acpixf.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index b49396aa4058..97c25ae8a36e 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -213,6 +213,12 @@ ACPI_INIT_GLOBAL(u8, acpi_gbl_osi_data, 0); */ ACPI_INIT_GLOBAL(u8, acpi_gbl_reduced_hardware, FALSE); +/* + * ACPI Global Lock is mainly used for systems with SMM, so no-SMM systems + * (such as loong_arch) may not have and not use Global Lock. + */ +ACPI_INIT_GLOBAL(u8, acpi_gbl_use_global_lock, TRUE); + /* * Maximum timeout for While() loop iterations before forced method abort. * This mechanism is intended to prevent infinite loops during interpreter -- cgit v1.2.3 From 12fd607554c4efb4856959f0e5823f541bc0e701 Mon Sep 17 00:00:00 2001 From: Ahmed Salem Date: Fri, 12 Sep 2025 21:55:35 +0200 Subject: ACPICA: Apply ACPI_NONSTRING Add ACPI_NONSTRING for destination char arrays without a terminating NUL character. This is a follow-up to commit 2b82118845e0 ("ACPICA: Apply ACPI_NONSTRING") where a few more destination arrays were missed. Link: https://github.com/acpica/acpica/commit/f359e5ed Fixes: 2b82118845e0 ("ACPICA: Apply ACPI_NONSTRING") Signed-off-by: Ahmed Salem Signed-off-by: Rafael J. Wysocki --- include/acpi/actbl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h index 243097a3da63..8a67d4ea6e3f 100644 --- a/include/acpi/actbl.h +++ b/include/acpi/actbl.h @@ -73,7 +73,7 @@ struct acpi_table_header { char oem_id[ACPI_OEM_ID_SIZE] ACPI_NONSTRING; /* ASCII OEM identification */ char oem_table_id[ACPI_OEM_TABLE_ID_SIZE] ACPI_NONSTRING; /* ASCII OEM table identification */ u32 oem_revision; /* OEM revision number */ - char asl_compiler_id[ACPI_NAMESEG_SIZE]; /* ASCII ASL compiler vendor ID */ + char asl_compiler_id[ACPI_NAMESEG_SIZE] ACPI_NONSTRING; /* ASCII ASL compiler vendor ID */ u32 asl_compiler_revision; /* ASL compiler version */ }; -- cgit v1.2.3 From e2c80b3c23782d809b4e15da7d8a0135a8d350b5 Mon Sep 17 00:00:00 2001 From: Saket Dumbre Date: Fri, 12 Sep 2025 22:01:52 +0200 Subject: ACPICA: Print error messages for too few or too many arguments Fix Issue #1027 by displaying error messages when there are too few or too many arguments in the caller vs the definition of an ASL/AML method. Link: https://github.com/acpica/acpica/commit/cbc243e4 Reported-by: Peter Williams Signed-off-by: Rafael J. Wysocki Tested-by: Hans de Goede Signed-off-by: Saket Dumbre --- include/acpi/acexcep.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/acpi/acexcep.h b/include/acpi/acexcep.h index 53c98f5fe3c3..a2db36d18419 100644 --- a/include/acpi/acexcep.h +++ b/include/acpi/acexcep.h @@ -173,8 +173,10 @@ struct acpi_exception_info { #define AE_AML_TARGET_TYPE EXCEP_AML (0x0023) #define AE_AML_PROTOCOL EXCEP_AML (0x0024) #define AE_AML_BUFFER_LENGTH EXCEP_AML (0x0025) +#define AE_AML_TOO_FEW_ARGUMENTS EXCEP_AML (0x0026) +#define AE_AML_TOO_MANY_ARGUMENTS EXCEP_AML (0x0027) -#define AE_CODE_AML_MAX 0x0025 +#define AE_CODE_AML_MAX 0x0027 /* * Internal exceptions used for control @@ -353,7 +355,11 @@ static const struct acpi_exception_info acpi_gbl_exception_names_aml[] = { "A target operand of an incorrect type was encountered"), EXCEP_TXT("AE_AML_PROTOCOL", "Violation of a fixed ACPI protocol"), EXCEP_TXT("AE_AML_BUFFER_LENGTH", - "The length of the buffer is invalid/incorrect") + "The length of the buffer is invalid/incorrect"), + EXCEP_TXT("AE_AML_TOO_FEW_ARGUMENTS", + "There are fewer than expected method arguments"), + EXCEP_TXT("AE_AML_TOO_MANY_ARGUMENTS", + "There are too many arguments for this method") }; static const struct acpi_exception_info acpi_gbl_exception_names_ctrl[] = { -- cgit v1.2.3 From b0fb6891b8ad55cb5767edf94351ffe4f0b9a404 Mon Sep 17 00:00:00 2001 From: Saket Dumbre Date: Fri, 12 Sep 2025 22:02:32 +0200 Subject: ACPICA: Update version to 20250807 Link: https://github.com/acpica/acpica/commit/0845a773 Signed-off-by: Saket Dumbre Signed-off-by: Rafael J. Wysocki --- include/acpi/acpixf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 97c25ae8a36e..e65a2afe9250 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -12,7 +12,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20250404 +#define ACPI_CA_VERSION 0x20250807 #include #include -- cgit v1.2.3 From eddf12041ddd8c40289015f42285c5ac614b5c30 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Fri, 12 Sep 2025 22:04:04 +0200 Subject: ACPICA: CEDT: Add Back-Invalidate restriction to CXL Window This is added in newer version (3.0+) of the CXL Spec to support the HDM-DB coherency model. Link: https://github.com/acpica/acpica/commit/a6886da1 Signed-off-by: Davidlohr Bueso Signed-off-by: Rafael J. Wysocki --- include/acpi/actbl1.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 99fd1588ff38..0b4c332df25c 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -565,6 +565,7 @@ struct acpi_cedt_cfmws_target_element { #define ACPI_CEDT_CFMWS_RESTRICT_VOLATILE (1<<2) #define ACPI_CEDT_CFMWS_RESTRICT_PMEM (1<<3) #define ACPI_CEDT_CFMWS_RESTRICT_FIXED (1<<4) +#define ACPI_CEDT_CFMWS_RESTRICT_BI (1<<5) /* 2: CXL XOR Interleave Math Structure */ -- cgit v1.2.3 From f7a4fb22312646329ba21bc58958fd83fb9fc15d Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:15 -0500 Subject: x86,fs/resctrl: Implement resctrl_arch_config_cntr() to assign a counter with ABMC The ABMC feature allows users to assign a hardware counter to an RMID, event pair and monitor bandwidth usage as long as it is assigned. The hardware continues to track the assigned counter until it is explicitly unassigned by the user. Implement an x86 architecture-specific handler to configure a counter. This architecture specific handler is called by resctrl fs when a counter is assigned or unassigned as well as when an already assigned counter's configuration should be updated. Configure counters by writing to the L3_QOS_ABMC_CFG MSR, specifying the counter ID, bandwidth source (RMID), and event configuration. The ABMC feature details are documented in APM [1] available from [2]. [1] AMD64 Architecture Programmer's Manual Volume 2: System Programming Publication # 24593 Revision 3.41 section 19.3.3.3 Assignable Bandwidth Monitoring (ABMC). Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 # [2] --- include/linux/resctrl.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 87daa4ca312d..50e38445183a 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -594,6 +594,25 @@ void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain * */ void resctrl_arch_reset_all_ctrls(struct rdt_resource *r); +/** + * resctrl_arch_config_cntr() - Configure the counter with its new RMID + * and event details. + * @r: Resource structure. + * @d: The domain in which counter with ID @cntr_id should be configured. + * @evtid: Monitoring event type (e.g., QOS_L3_MBM_TOTAL_EVENT_ID + * or QOS_L3_MBM_LOCAL_EVENT_ID). + * @rmid: RMID. + * @closid: CLOSID. + * @cntr_id: Counter ID to configure. + * @assign: True to assign the counter or update an existing assignment, + * false to unassign the counter. + * + * This can be called from any CPU. + */ +void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, + enum resctrl_event_id evtid, u32 rmid, u32 closid, + u32 cntr_id, bool assign); + extern unsigned int resctrl_rmid_realloc_threshold; extern unsigned int resctrl_rmid_realloc_limit; -- cgit v1.2.3 From 862314fd1f93d96eddb0559a807c66cb1f6ee520 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:19 -0500 Subject: fs/resctrl: Introduce counter ID read, reset calls in mbm_event mode When supported, "mbm_event" counter assignment mode allows users to assign a hardware counter to an RMID, event pair and monitor the bandwidth usage as long as it is assigned. The hardware continues to track the assigned counter until it is explicitly unassigned by the user. Introduce the architecture calls resctrl_arch_cntr_read() and resctrl_arch_reset_cntr() to read and reset event counters when "mbm_event" mode is supported. Function names match existing resctrl_arch_rmid_read() and resctrl_arch_reset_rmid(). Suggested-by: Reinette Chatre Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com --- include/linux/resctrl.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'include') diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 50e38445183a..04152654827d 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -613,6 +613,44 @@ void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, enum resctrl_event_id evtid, u32 rmid, u32 closid, u32 cntr_id, bool assign); +/** + * resctrl_arch_cntr_read() - Read the event data corresponding to the counter ID + * assigned to the RMID, event pair for this resource + * and domain. + * @r: Resource that the counter should be read from. + * @d: Domain that the counter should be read from. + * @closid: CLOSID that matches the RMID. + * @rmid: The RMID to which @cntr_id is assigned. + * @cntr_id: The counter to read. + * @eventid: The MBM event to which @cntr_id is assigned. + * @val: Result of the counter read in bytes. + * + * Called on a CPU that belongs to domain @d when "mbm_event" mode is enabled. + * Called from a non-migrateable process context via smp_call_on_cpu() unless all + * CPUs are nohz_full, in which case it is called via IPI (smp_call_function_any()). + * + * Return: + * 0 on success, or -EIO, -EINVAL etc on error. + */ +int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_mon_domain *d, + u32 closid, u32 rmid, int cntr_id, + enum resctrl_event_id eventid, u64 *val); + +/** + * resctrl_arch_reset_cntr() - Reset any private state associated with counter ID. + * @r: The domain's resource. + * @d: The counter ID's domain. + * @closid: CLOSID that matches the RMID. + * @rmid: The RMID to which @cntr_id is assigned. + * @cntr_id: The counter to reset. + * @eventid: The MBM event to which @cntr_id is assigned. + * + * This can be called from any CPU. + */ +void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, + u32 closid, u32 rmid, int cntr_id, + enum resctrl_event_id eventid); + extern unsigned int resctrl_rmid_realloc_threshold; extern unsigned int resctrl_rmid_realloc_limit; -- cgit v1.2.3 From ea274cbeaf8f0667267b347e3f84797439cdab4e Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:23 -0500 Subject: fs/resctrl: Add event configuration directory under info/L3_MON/ The "mbm_event" counter assignment mode allows the user to assign a hardware counter to an RMID, event pair and monitor the bandwidth as long as it is assigned. The user can specify the memory transaction(s) for the counter to track. When this mode is supported, the /sys/fs/resctrl/info/L3_MON/event_configs directory contains a sub-directory for each MBM event that can be assigned to a counter. The MBM event sub-directory contains a file named "event_filter" that is used to view and modify which memory transactions the MBM event is configured with. Create /sys/fs/resctrl/info/L3_MON/event_configs directory on resctrl mount and pre-populate it with directories for the two existing MBM events: mbm_total_bytes and mbm_local_bytes. Create the "event_filter" file within each MBM event directory with the needed *show() that displays the memory transactions with which the MBM event is configured. Example: $ mount -t resctrl resctrl /sys/fs/resctrl $ cd /sys/fs/resctrl/ $ cat info/L3_MON/event_configs/mbm_total_bytes/event_filter local_reads,remote_reads,local_non_temporal_writes, remote_non_temporal_writes,local_reads_slow_memory, remote_reads_slow_memory,dirty_victim_writes_all $ cat info/L3_MON/event_configs/mbm_local_bytes/event_filter local_reads,local_non_temporal_writes,local_reads_slow_memory Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com --- include/linux/resctrl_types.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h index d98351663c2c..acfe07860b34 100644 --- a/include/linux/resctrl_types.h +++ b/include/linux/resctrl_types.h @@ -34,6 +34,9 @@ /* Max event bits supported */ #define MAX_EVT_CONFIG_BITS GENMASK(6, 0) +/* Number of memory transactions that an MBM event can be configured with */ +#define NUM_MBM_TRANSACTIONS 7 + /* Event IDs */ enum resctrl_event_id { /* Must match value of first event below */ -- cgit v1.2.3 From ac1df9bb0ba3ae94137fb494cd9efc598f65d826 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:25 -0500 Subject: fs/resctrl: Introduce mbm_assign_on_mkdir to enable assignments on mkdir The "mbm_event" counter assignment mode allows users to assign a hardware counter to an RMID, event pair and monitor the bandwidth as long as it is assigned. Introduce a user-configurable option that determines if a counter will automatically be assigned to an RMID, event pair when its associated monitor group is created via mkdir. Accessible when "mbm_event" counter assignment mode is enabled. Suggested-by: Peter Newman Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com --- include/linux/resctrl.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 04152654827d..a7d92718b653 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -277,12 +277,15 @@ enum resctrl_schema_fmt { * monitoring events can be configured. * @num_mbm_cntrs: Number of assignable counters. * @mbm_cntr_assignable:Is system capable of supporting counter assignment? + * @mbm_assign_on_mkdir:True if counters should automatically be assigned to MBM + * events of monitor groups created via mkdir. */ struct resctrl_mon { int num_rmid; unsigned int mbm_cfg_mask; int num_mbm_cntrs; bool mbm_cntr_assignable; + bool mbm_assign_on_mkdir; }; /** -- cgit v1.2.3 From 4e445729dc103ff7780ed03da9f8310759ea5dae Mon Sep 17 00:00:00 2001 From: Kaustabh Chakraborty Date: Sun, 6 Jul 2025 23:55:35 +0530 Subject: drm/bridge: samsung-dsim: support separate LINK and DPHY status registers Exynos7870's DSIM has separate registers for LINK and DPHY status. This is in contrast to older variants in the driver which use a single register for both. Add a driver data flag which indicates that the device variant supports the legacy status register. Change the register read calls appropriately. Suggested-by: Inki Dae Signed-off-by: Kaustabh Chakraborty Signed-off-by: Inki Dae --- include/drm/bridge/samsung-dsim.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/bridge/samsung-dsim.h b/include/drm/bridge/samsung-dsim.h index 9764d6eb5beb..d7877191bad1 100644 --- a/include/drm/bridge/samsung-dsim.h +++ b/include/drm/bridge/samsung-dsim.h @@ -53,6 +53,7 @@ struct samsung_dsim_transfer { struct samsung_dsim_driver_data { const unsigned int *reg_ofs; unsigned int plltmr_reg; + unsigned int has_legacy_status_reg:1; unsigned int has_freqband:1; unsigned int has_clklane_stop:1; unsigned int has_broken_fifoctrl_emptyhdr:1; -- cgit v1.2.3 From 7c9b998947f19457e32496ab9edeea798373c426 Mon Sep 17 00:00:00 2001 From: Kaustabh Chakraborty Date: Sun, 6 Jul 2025 23:55:36 +0530 Subject: drm/bridge: samsung-dsim: add SFRCTRL register On Exynos7870 devices, enabling the display requires disabling standby by writing to the SFRCTRL register. Add the register and related bit values. Since this behavior isn't available on other SoCs, implement a flag in the driver data struct indicating the availability of this feature. Signed-off-by: Kaustabh Chakraborty Signed-off-by: Inki Dae --- include/drm/bridge/samsung-dsim.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/bridge/samsung-dsim.h b/include/drm/bridge/samsung-dsim.h index d7877191bad1..f0c1e5c5ed49 100644 --- a/include/drm/bridge/samsung-dsim.h +++ b/include/drm/bridge/samsung-dsim.h @@ -57,6 +57,7 @@ struct samsung_dsim_driver_data { unsigned int has_freqband:1; unsigned int has_clklane_stop:1; unsigned int has_broken_fifoctrl_emptyhdr:1; + unsigned int has_sfrctrl:1; unsigned int num_clks; unsigned int min_freq; unsigned int max_freq; -- cgit v1.2.3 From 92beab1a397d80d04d90f511c6d0af696da67a33 Mon Sep 17 00:00:00 2001 From: Kaustabh Chakraborty Date: Sun, 6 Jul 2025 23:55:37 +0530 Subject: drm/bridge: samsung-dsim: add flag to control header FIFO wait Exynos7870's DSIM device doesn't require waiting for the header FIFO during a MIPI DSI transfer. Add a flag in the driver data in order to control said behavior. Signed-off-by: Kaustabh Chakraborty Signed-off-by: Inki Dae Date: Sun, 6 Jul 2025 23:55:38 +0530 Subject: drm/bridge: samsung-dsim: allow configuring bits and offsets of CLKCTRL register DSIM_CLKCTRL bit and offset values hardcoded in the driver: name | bit/offset value --------------------------+----------------- DSIM_LANE_ESC_CLK_EN_CLK | 19 DSIM_LANE_ESC_CLK_EN_DATA | 20 DSIM_BYTE_CLKEN | 24 DSIM_ESC_CLKEN | 28 DSIM_TX_REQUEST_HSCLK | 31 DSIM_CLKCTRL bit and offset values in Exynos7870 DSIM as per downstream kernel sources: name | bit/offset value --------------------------+----------------- DSIM_LANE_ESC_CLK_EN_CLK | 8 DSIM_LANE_ESC_CLK_EN_DATA | 9 DSIM_BYTE_CLKEN | 17 DSIM_ESC_CLKEN | 16 DSIM_TX_REQUEST_HSCLK | 20 In order to support both, move all values to the driver data struct and define it for every driver compatible. Reference the values from there instead, in functions wherever required. Signed-off-by: Kaustabh Chakraborty Signed-off-by: Inki Dae --- include/drm/bridge/samsung-dsim.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/drm/bridge/samsung-dsim.h b/include/drm/bridge/samsung-dsim.h index 62c07952bd00..b1e64c7f9931 100644 --- a/include/drm/bridge/samsung-dsim.h +++ b/include/drm/bridge/samsung-dsim.h @@ -64,6 +64,11 @@ struct samsung_dsim_driver_data { unsigned int wait_for_hdr_fifo; unsigned int wait_for_reset; unsigned int num_bits_resol; + unsigned int esc_clken_bit; + unsigned int byte_clken_bit; + unsigned int tx_req_hsclk_bit; + unsigned int lane_esc_clk_bit; + unsigned int lane_esc_data_offset; unsigned int pll_p_offset; const unsigned int *reg_values; unsigned int pll_fin_min; -- cgit v1.2.3 From 4d244122dd90c72f6c3f10eb7a53678d78d3b857 Mon Sep 17 00:00:00 2001 From: Kaustabh Chakraborty Date: Sun, 6 Jul 2025 23:55:39 +0530 Subject: drm/bridge: samsung-dsim: allow configuring the MAIN_VSA offset The MAIN_VSA offset of DSIM_MSYNC is hardcoded to a 22-bit offset, but Exynos7870's DSIM has it in a 16-bit offset as per the downstream kernel sources. In order to support both, move this offset value to the driver data struct and define it for every driver compatible. Reference the value from there instead, in functions wherever required. Signed-off-by: Kaustabh Chakraborty Signed-off-by: Inki Dae --- include/drm/bridge/samsung-dsim.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/bridge/samsung-dsim.h b/include/drm/bridge/samsung-dsim.h index b1e64c7f9931..7f6d353f34af 100644 --- a/include/drm/bridge/samsung-dsim.h +++ b/include/drm/bridge/samsung-dsim.h @@ -70,6 +70,7 @@ struct samsung_dsim_driver_data { unsigned int lane_esc_clk_bit; unsigned int lane_esc_data_offset; unsigned int pll_p_offset; + unsigned int main_vsa_offset; const unsigned int *reg_values; unsigned int pll_fin_min; unsigned int pll_fin_max; -- cgit v1.2.3 From d6dbefb2fed7d7f333c4241965296d84c202b6bf Mon Sep 17 00:00:00 2001 From: Kaustabh Chakraborty Date: Sun, 6 Jul 2025 23:55:40 +0530 Subject: drm/bridge: samsung-dsim: allow configuring the VIDEO_MODE bit The VIDEO_MODE bit of DSIM_CONFIG is hardcoded to BIT(25), but Exynos7870's DSIM has it in BIT(18) as per downstream kernel sources. In order to support both, move this bit value to the driver data struct and define it for every driver compatible. Reference the value from there instead, in functions wherever required. Signed-off-by: Kaustabh Chakraborty Signed-off-by: Inki Dae --- include/drm/bridge/samsung-dsim.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/bridge/samsung-dsim.h b/include/drm/bridge/samsung-dsim.h index 7f6d353f34af..9d11c3e39fe5 100644 --- a/include/drm/bridge/samsung-dsim.h +++ b/include/drm/bridge/samsung-dsim.h @@ -64,6 +64,7 @@ struct samsung_dsim_driver_data { unsigned int wait_for_hdr_fifo; unsigned int wait_for_reset; unsigned int num_bits_resol; + unsigned int video_mode_bit; unsigned int esc_clken_bit; unsigned int byte_clken_bit; unsigned int tx_req_hsclk_bit; -- cgit v1.2.3 From 9aa49c21aac071383353315036520ba753484c93 Mon Sep 17 00:00:00 2001 From: Kaustabh Chakraborty Date: Sun, 6 Jul 2025 23:55:41 +0530 Subject: drm/bridge: samsung-dsim: allow configuring PLL_M and PLL_S offsets Currently, PLL_P offset of DSIM_PLLCTRL is configurable in the driver data, while PLL_M and PLL_S offsets are hardcoded as 4-bit and 1-bit offsets respectively, but Exynos7870's DSIM have them at 3-bit and 0-bit offsets as per downstream kernel sources. In order to support both, move both offset values to the driver data struct and define it for every driver compatible. Reference the values from there instead, in functions wherever required. Signed-off-by: Kaustabh Chakraborty Signed-off-by: Inki Dae --- include/drm/bridge/samsung-dsim.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/drm/bridge/samsung-dsim.h b/include/drm/bridge/samsung-dsim.h index 9d11c3e39fe5..000ada3ece4d 100644 --- a/include/drm/bridge/samsung-dsim.h +++ b/include/drm/bridge/samsung-dsim.h @@ -71,6 +71,8 @@ struct samsung_dsim_driver_data { unsigned int lane_esc_clk_bit; unsigned int lane_esc_data_offset; unsigned int pll_p_offset; + unsigned int pll_m_offset; + unsigned int pll_s_offset; unsigned int main_vsa_offset; const unsigned int *reg_values; unsigned int pll_fin_min; -- cgit v1.2.3 From f7754d843a05c685ba453be176a29ae157f88b0c Mon Sep 17 00:00:00 2001 From: Kaustabh Chakraborty Date: Sun, 6 Jul 2025 23:55:42 +0530 Subject: drm/bridge: samsung-dsim: allow configuring the PLL_STABLE bit The PLL_STABLE bit of DSIM_DPHY_STATUS is hardcoded to BIT(31), but Exynos7870's DSIM has it in BIT(24) as per downstream kernel sources. In order to support both, move this bit value to the driver data struct and define it for every driver compatible. Reference the value from there instead, in functions wherever required. Signed-off-by: Kaustabh Chakraborty Signed-off-by: Inki Dae --- include/drm/bridge/samsung-dsim.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/bridge/samsung-dsim.h b/include/drm/bridge/samsung-dsim.h index 000ada3ece4d..04ed11787bbd 100644 --- a/include/drm/bridge/samsung-dsim.h +++ b/include/drm/bridge/samsung-dsim.h @@ -65,6 +65,7 @@ struct samsung_dsim_driver_data { unsigned int wait_for_reset; unsigned int num_bits_resol; unsigned int video_mode_bit; + unsigned int pll_stable_bit; unsigned int esc_clken_bit; unsigned int byte_clken_bit; unsigned int tx_req_hsclk_bit; -- cgit v1.2.3 From f08051a4158fec363e1f33b75dd48131f524fa5f Mon Sep 17 00:00:00 2001 From: Kaustabh Chakraborty Date: Sun, 6 Jul 2025 23:55:44 +0530 Subject: drm/bridge: samsung-dsim: add ability to define clock names for every variant Presently, all devices refer to clock names from a single array. The only controlling parameter is the number of clocks (num_clks field of samsung_dsim_driver_data) which uses the first n clocks of that array. As new devices are added, this approach turns out to be cumbersome. Separate the clock names in individual arrays required by each variant, in a struct clk_bulk_data. Add a pointer field to the driver data struct which points to their respective clock names, and rework the clock usage code to use the clk_bulk_* API instead. Signed-off-by: Kaustabh Chakraborty Signed-off-by: Inki Dae --- include/drm/bridge/samsung-dsim.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/bridge/samsung-dsim.h b/include/drm/bridge/samsung-dsim.h index 04ed11787bbd..eb9fdbab1b34 100644 --- a/include/drm/bridge/samsung-dsim.h +++ b/include/drm/bridge/samsung-dsim.h @@ -58,6 +58,7 @@ struct samsung_dsim_driver_data { unsigned int has_clklane_stop:1; unsigned int has_broken_fifoctrl_emptyhdr:1; unsigned int has_sfrctrl:1; + struct clk_bulk_data *clk_data; unsigned int num_clks; unsigned int min_freq; unsigned int max_freq; @@ -104,7 +105,6 @@ struct samsung_dsim { void __iomem *reg_base; struct phy *phy; - struct clk **clks; struct clk *pll_clk; struct regulator_bulk_data supplies[2]; int irq; -- cgit v1.2.3 From 77169a11d4e9916f6c22587df396d6128505dbfb Mon Sep 17 00:00:00 2001 From: Kaustabh Chakraborty Date: Sun, 6 Jul 2025 23:55:46 +0530 Subject: drm/bridge: samsung-dsim: add driver support for exynos7870 DSIM bridge Add support for Exynos7870's DSIM IP block in the bridge driver. Signed-off-by: Kaustabh Chakraborty Signed-off-by: Inki Dae --- include/drm/bridge/samsung-dsim.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/bridge/samsung-dsim.h b/include/drm/bridge/samsung-dsim.h index eb9fdbab1b34..31d7ed589233 100644 --- a/include/drm/bridge/samsung-dsim.h +++ b/include/drm/bridge/samsung-dsim.h @@ -29,6 +29,7 @@ enum samsung_dsim_type { DSIM_TYPE_EXYNOS5410, DSIM_TYPE_EXYNOS5422, DSIM_TYPE_EXYNOS5433, + DSIM_TYPE_EXYNOS7870, DSIM_TYPE_IMX8MM, DSIM_TYPE_IMX8MP, DSIM_TYPE_COUNT, -- cgit v1.2.3 From 3c17001b21b9f168c957ced9384abe969019b609 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 12 Sep 2025 13:52:24 +0200 Subject: pidfs: validate extensible ioctls Validate extensible ioctls stricter than we do now. Reviewed-by: Aleksa Sarai Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/fs.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index d7ab4f96d705..2f2edc53bf3c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -4023,4 +4023,18 @@ static inline bool vfs_empty_path(int dfd, const char __user *path) int generic_atomic_write_valid(struct kiocb *iocb, struct iov_iter *iter); +static inline bool extensible_ioctl_valid(unsigned int cmd_a, + unsigned int cmd_b, size_t min_size) +{ + if (_IOC_DIR(cmd_a) != _IOC_DIR(cmd_b)) + return false; + if (_IOC_TYPE(cmd_a) != _IOC_TYPE(cmd_b)) + return false; + if (_IOC_NR(cmd_a) != _IOC_NR(cmd_b)) + return false; + if (_IOC_SIZE(cmd_a) < min_size) + return false; + return true; +} + #endif /* _LINUX_FS_H */ -- cgit v1.2.3 From be975448a45cd024e2b98598eefc0e164ad93f09 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 22 Jul 2025 14:25:35 -0700 Subject: srcu: Document __srcu_read_{,un}lock_fast() implicit RCU readers This commit documents the implicit RCU readers that are implied by the this_cpu_inc() and atomic_long_inc() operations in __srcu_read_lock_fast() and __srcu_read_unlock_fast(). While in the area, fix the documentation of the memory pairing of atomic_long_inc() in __srcu_read_lock_fast(). [ paulmck: Apply Joel Fernandes feedback. ] Signed-off-by: Paul E. McKenney Cc: Mathieu Desnoyers Cc: Steven Rostedt Cc: Sebastian Andrzej Siewior Cc: --- include/linux/srcutree.h | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 4d2fee4d3828..42098e0fa0b7 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -232,9 +232,27 @@ static inline struct srcu_ctr __percpu *__srcu_ctr_to_ptr(struct srcu_struct *ss * srcu_read_unlock_fast(). * * Note that both this_cpu_inc() and atomic_long_inc() are RCU read-side - * critical sections either because they disables interrupts, because they - * are a single instruction, or because they are a read-modify-write atomic - * operation, depending on the whims of the architecture. + * critical sections either because they disables interrupts, because + * they are a single instruction, or because they are read-modify-write + * atomic operations, depending on the whims of the architecture. + * This matters because the SRCU-fast grace-period mechanism uses either + * synchronize_rcu() or synchronize_rcu_expedited(), that is, RCU, + * *not* SRCU, in order to eliminate the need for the read-side smp_mb() + * invocations that are used by srcu_read_lock() and srcu_read_unlock(). + * The __srcu_read_unlock_fast() function also relies on this same RCU + * (again, *not* SRCU) trick to eliminate the need for smp_mb(). + * + * The key point behind this RCU trick is that if any part of a given + * RCU reader precedes the beginning of a given RCU grace period, then + * the entirety of that RCU reader and everything preceding it happens + * before the end of that same RCU grace period. Similarly, if any part + * of a given RCU reader follows the end of a given RCU grace period, + * then the entirety of that RCU reader and everything following it + * happens after the beginning of that same RCU grace period. Therefore, + * the operations labeled Y in __srcu_read_lock_fast() and those labeled Z + * in __srcu_read_unlock_fast() are ordered against the corresponding SRCU + * read-side critical section from the viewpoint of the SRCU grace period. + * This is all the ordering that is required, hence no calls to smp_mb(). * * This means that __srcu_read_lock_fast() is not all that fast * on architectures that support NMIs but do not supply NMI-safe @@ -245,9 +263,9 @@ static inline struct srcu_ctr __percpu notrace *__srcu_read_lock_fast(struct src struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp); if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) - this_cpu_inc(scp->srcu_locks.counter); /* Y */ + this_cpu_inc(scp->srcu_locks.counter); // Y, and implicit RCU reader. else - atomic_long_inc(raw_cpu_ptr(&scp->srcu_locks)); /* Z */ + atomic_long_inc(raw_cpu_ptr(&scp->srcu_locks)); // Y, and implicit RCU reader. barrier(); /* Avoid leaking the critical section. */ return scp; } @@ -258,23 +276,17 @@ static inline struct srcu_ctr __percpu notrace *__srcu_read_lock_fast(struct src * different CPU than that which was incremented by the corresponding * srcu_read_lock_fast(), but it must be within the same task. * - * Note that both this_cpu_inc() and atomic_long_inc() are RCU read-side - * critical sections either because they disables interrupts, because they - * are a single instruction, or because they are a read-modify-write atomic - * operation, depending on the whims of the architecture. - * - * This means that __srcu_read_unlock_fast() is not all that fast - * on architectures that support NMIs but do not supply NMI-safe - * implementations of this_cpu_inc(). + * Please see the __srcu_read_lock_fast() function's header comment for + * information on implicit RCU readers and NMI safety. */ static inline void notrace __srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) { barrier(); /* Avoid leaking the critical section. */ if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) - this_cpu_inc(scp->srcu_unlocks.counter); /* Z */ + this_cpu_inc(scp->srcu_unlocks.counter); // Z, and implicit RCU reader. else - atomic_long_inc(raw_cpu_ptr(&scp->srcu_unlocks)); /* Z */ + atomic_long_inc(raw_cpu_ptr(&scp->srcu_unlocks)); // Z, and implicit RCU reader. } void __srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor); -- cgit v1.2.3 From 5e0ae59159e3a07391a35865bb79ff335473fa79 Mon Sep 17 00:00:00 2001 From: Angela Czubak Date: Mon, 18 Aug 2025 23:08:42 +0000 Subject: HID: add haptics page defines Introduce haptic usages as defined in HID Usage Tables specification. Add HID units for newton and gram. Signed-off-by: Angela Czubak Co-developed-by: Jonathan Denose Signed-off-by: Jonathan Denose Signed-off-by: Benjamin Tissoires --- include/linux/hid.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index 2cc4f1e4ea96..10f113c758fe 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -156,6 +156,7 @@ struct hid_item { #define HID_UP_TELEPHONY 0x000b0000 #define HID_UP_CONSUMER 0x000c0000 #define HID_UP_DIGITIZER 0x000d0000 +#define HID_UP_HAPTIC 0x000e0000 #define HID_UP_PID 0x000f0000 #define HID_UP_BATTERY 0x00850000 #define HID_UP_CAMERA 0x00900000 @@ -316,6 +317,28 @@ struct hid_item { #define HID_DG_TOOLSERIALNUMBER 0x000d005b #define HID_DG_LATENCYMODE 0x000d0060 +#define HID_HP_SIMPLECONTROLLER 0x000e0001 +#define HID_HP_WAVEFORMLIST 0x000e0010 +#define HID_HP_DURATIONLIST 0x000e0011 +#define HID_HP_AUTOTRIGGER 0x000e0020 +#define HID_HP_MANUALTRIGGER 0x000e0021 +#define HID_HP_AUTOTRIGGERASSOCIATEDCONTROL 0x000e0022 +#define HID_HP_INTENSITY 0x000e0023 +#define HID_HP_REPEATCOUNT 0x000e0024 +#define HID_HP_RETRIGGERPERIOD 0x000e0025 +#define HID_HP_WAVEFORMVENDORPAGE 0x000e0026 +#define HID_HP_WAVEFORMVENDORID 0x000e0027 +#define HID_HP_WAVEFORMCUTOFFTIME 0x000e0028 +#define HID_HP_WAVEFORMNONE 0x000e1001 +#define HID_HP_WAVEFORMSTOP 0x000e1002 +#define HID_HP_WAVEFORMCLICK 0x000e1003 +#define HID_HP_WAVEFORMBUZZCONTINUOUS 0x000e1004 +#define HID_HP_WAVEFORMRUMBLECONTINUOUS 0x000e1005 +#define HID_HP_WAVEFORMPRESS 0x000e1006 +#define HID_HP_WAVEFORMRELEASE 0x000e1007 +#define HID_HP_VENDORWAVEFORMMIN 0x000e2001 +#define HID_HP_VENDORWAVEFORMMAX 0x000e2fff + #define HID_BAT_ABSOLUTESTATEOFCHARGE 0x00850065 #define HID_BAT_CHARGING 0x00850044 @@ -423,6 +446,12 @@ struct hid_item { #define HID_REPORT_PROTOCOL 1 #define HID_BOOT_PROTOCOL 0 +/* + * HID units + */ +#define HID_UNIT_GRAM 0x0101 +#define HID_UNIT_NEWTON 0xe111 + /* * This is the global environment of the parser. This information is * persistent for main-items. The global environment can be saved and -- cgit v1.2.3 From 08a72a220e960e7f153a810fb633638afd0b7563 Mon Sep 17 00:00:00 2001 From: Angela Czubak Date: Mon, 18 Aug 2025 23:08:43 +0000 Subject: Input: add FF_HAPTIC effect type FF_HAPTIC effect type can be used to trigger haptic feedback with HID simple haptic usages. Signed-off-by: Angela Czubak Co-developed-by: Jonathan Denose Signed-off-by: Jonathan Denose Acked-by: Dmitry Torokhov Signed-off-by: Benjamin Tissoires --- include/uapi/linux/input.h | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h index 127119c287cf..6aa703fcfcfb 100644 --- a/include/uapi/linux/input.h +++ b/include/uapi/linux/input.h @@ -429,6 +429,24 @@ struct ff_rumble_effect { __u16 weak_magnitude; }; +/** + * struct ff_haptic_effect + * @hid_usage: hid_usage according to Haptics page (WAVEFORM_CLICK, etc.) + * @vendor_id: the waveform vendor ID if hid_usage is in the vendor-defined range + * @vendor_waveform_page: the vendor waveform page if hid_usage is in the vendor-defined range + * @intensity: strength of the effect as percentage + * @repeat_count: number of times to retrigger effect + * @retrigger_period: time before effect is retriggered (in ms) + */ +struct ff_haptic_effect { + __u16 hid_usage; + __u16 vendor_id; + __u8 vendor_waveform_page; + __u16 intensity; + __u16 repeat_count; + __u16 retrigger_period; +}; + /** * struct ff_effect - defines force feedback effect * @type: type of the effect (FF_CONSTANT, FF_PERIODIC, FF_RAMP, FF_SPRING, @@ -465,6 +483,7 @@ struct ff_effect { struct ff_periodic_effect periodic; struct ff_condition_effect condition[2]; /* One for each axis */ struct ff_rumble_effect rumble; + struct ff_haptic_effect haptic; } u; }; @@ -472,6 +491,7 @@ struct ff_effect { * Force feedback effect types */ +#define FF_HAPTIC 0x4f #define FF_RUMBLE 0x50 #define FF_PERIODIC 0x51 #define FF_CONSTANT 0x52 @@ -481,7 +501,7 @@ struct ff_effect { #define FF_INERTIA 0x56 #define FF_RAMP 0x57 -#define FF_EFFECT_MIN FF_RUMBLE +#define FF_EFFECT_MIN FF_HAPTIC #define FF_EFFECT_MAX FF_RAMP /* -- cgit v1.2.3 From 7075ae4ac9db93a3e762f3c2793ad57dbbf8a120 Mon Sep 17 00:00:00 2001 From: Angela Czubak Date: Mon, 18 Aug 2025 23:08:44 +0000 Subject: Input: add INPUT_PROP_HAPTIC_TOUCHPAD INPUT_PROP_HAPTIC_TOUCHPAD property is to be set for a device with simple haptic capabilities. Signed-off-by: Angela Czubak Co-developed-by: Jonathan Denose Signed-off-by: Jonathan Denose Acked-by: Dmitry Torokhov Reviewed-by: Randy Dunlap Signed-off-by: Benjamin Tissoires --- include/uapi/linux/input-event-codes.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index ca5851e97fac..4a9fbf42aa9f 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -27,6 +27,7 @@ #define INPUT_PROP_TOPBUTTONPAD 0x04 /* softbuttons at top of pad */ #define INPUT_PROP_POINTING_STICK 0x05 /* is a pointing stick */ #define INPUT_PROP_ACCELEROMETER 0x06 /* has accelerometer */ +#define INPUT_PROP_HAPTIC_TOUCHPAD 0x07 /* is a haptic touchpad */ #define INPUT_PROP_MAX 0x1f #define INPUT_PROP_CNT (INPUT_PROP_MAX + 1) -- cgit v1.2.3 From 4e584ac737884ef0fd80f6836b917972fad86b17 Mon Sep 17 00:00:00 2001 From: Angela Czubak Date: Mon, 18 Aug 2025 23:08:50 +0000 Subject: Input: MT - add INPUT_MT_TOTAL_FORCE flags Add a flag to generate ABS_PRESSURE as sum of ABS_MT_PRESSURE across all slots. This flag should be set if one knows a device reports true force and would like to report total force to the userspace. Signed-off-by: Angela Czubak Co-developed-by: Jonathan Denose Signed-off-by: Jonathan Denose Acked-by: Dmitry Torokhov Signed-off-by: Benjamin Tissoires --- include/linux/input/mt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/input/mt.h b/include/linux/input/mt.h index 2cf89a538b18..d30286298a00 100644 --- a/include/linux/input/mt.h +++ b/include/linux/input/mt.h @@ -17,6 +17,7 @@ #define INPUT_MT_DROP_UNUSED 0x0004 /* drop contacts not seen in frame */ #define INPUT_MT_TRACK 0x0008 /* use in-kernel tracking */ #define INPUT_MT_SEMI_MT 0x0010 /* semi-mt device, finger count handled manually */ +#define INPUT_MT_TOTAL_FORCE 0x0020 /* calculate total force from slots pressure */ /** * struct input_mt_slot - represents the state of an input MT slot -- cgit v1.2.3 From 8aa1e3a6f0ffbcfdf3bd7d87feb9090f96c54bc4 Mon Sep 17 00:00:00 2001 From: Amirreza Zarrabi Date: Thu, 11 Sep 2025 21:07:40 -0700 Subject: firmware: qcom: tzmem: export shm_bridge create/delete Anyone with access to contiguous physical memory should be able to share memory with QTEE using shm_bridge. Tested-by: Neil Armstrong Tested-by: Harshal Dev Reviewed-by: Kuldeep Singh Signed-off-by: Amirreza Zarrabi Link: https://lore.kernel.org/r/20250911-qcom-tee-using-tee-ss-without-mem-obj-v12-1-17f07a942b8d@oss.qualcomm.com Signed-off-by: Bjorn Andersson --- include/linux/firmware/qcom/qcom_tzmem.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/firmware/qcom/qcom_tzmem.h b/include/linux/firmware/qcom/qcom_tzmem.h index b83b63a0c049..48ac0e5454c7 100644 --- a/include/linux/firmware/qcom/qcom_tzmem.h +++ b/include/linux/firmware/qcom/qcom_tzmem.h @@ -53,4 +53,19 @@ DEFINE_FREE(qcom_tzmem, void *, if (_T) qcom_tzmem_free(_T)) phys_addr_t qcom_tzmem_to_phys(void *ptr); +#if IS_ENABLED(CONFIG_QCOM_TZMEM_MODE_SHMBRIDGE) +int qcom_tzmem_shm_bridge_create(phys_addr_t paddr, size_t size, u64 *handle); +void qcom_tzmem_shm_bridge_delete(u64 handle); +#else +static inline int qcom_tzmem_shm_bridge_create(phys_addr_t paddr, + size_t size, u64 *handle) +{ + return 0; +} + +static inline void qcom_tzmem_shm_bridge_delete(u64 handle) +{ +} +#endif + #endif /* __QCOM_TZMEM */ -- cgit v1.2.3 From 4b700098c0fc4a76c5c1e54465c8f35e13755294 Mon Sep 17 00:00:00 2001 From: Amirreza Zarrabi Date: Thu, 11 Sep 2025 21:07:41 -0700 Subject: firmware: qcom: scm: add support for object invocation Qualcomm TEE (QTEE) hosts Trusted Applications (TAs) and services in the secure world, accessed via objects. A QTEE client can invoke these objects to request services. Similarly, QTEE can request services from the nonsecure world using objects exported to the secure world. Add low-level primitives to facilitate the invocation of objects hosted in QTEE, as well as those hosted in the nonsecure world. If support for object invocation is available, the qcom_scm allocates a dedicated child platform device. The driver for this device communicates with QTEE using low-level primitives. Tested-by: Neil Armstrong Tested-by: Harshal Dev Signed-off-by: Amirreza Zarrabi Link: https://lore.kernel.org/r/20250911-qcom-tee-using-tee-ss-without-mem-obj-v12-2-17f07a942b8d@oss.qualcomm.com Signed-off-by: Bjorn Andersson --- include/linux/firmware/qcom/qcom_scm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/firmware/qcom/qcom_scm.h b/include/linux/firmware/qcom/qcom_scm.h index 0f667bf1d4d9..a55ca771286b 100644 --- a/include/linux/firmware/qcom/qcom_scm.h +++ b/include/linux/firmware/qcom/qcom_scm.h @@ -175,4 +175,10 @@ static inline int qcom_scm_qseecom_app_send(u32 app_id, #endif /* CONFIG_QCOM_QSEECOM */ +int qcom_scm_qtee_invoke_smc(phys_addr_t inbuf, size_t inbuf_size, + phys_addr_t outbuf, size_t outbuf_size, + u64 *result, u64 *response_type); +int qcom_scm_qtee_callback_response(phys_addr_t buf, size_t buf_size, + u64 *result, u64 *response_type); + #endif -- cgit v1.2.3 From 54fd6bd42e7bd351802ff1d193a2e33e4bfb1836 Mon Sep 17 00:00:00 2001 From: Shubhrajyoti Datta Date: Mon, 8 Sep 2025 17:26:45 +0530 Subject: cdx: Split mcdi.h and reorganize headers Move bitfield.h from the CDX controller directory to include/linux/cdx to make them accessible to other drivers. As part of this refactoring, split mcdi.h into two headers: - mcdi.h: retains interface-level declarations - mcdid.h: contains internal definitions and macros This is in preparation for VersalNET EDAC driver that relies on it. Signed-off-by: Shubhrajyoti Datta Signed-off-by: Borislav Petkov (AMD) Acked-by: Nikhil Agarwal Link: https://lore.kernel.org/20250908115649.22903-1-shubhrajyoti.datta@amd.com --- include/linux/cdx/bitfield.h | 90 ++++++++++++++++++++ include/linux/cdx/mcdi.h | 192 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 282 insertions(+) create mode 100644 include/linux/cdx/bitfield.h create mode 100644 include/linux/cdx/mcdi.h (limited to 'include') diff --git a/include/linux/cdx/bitfield.h b/include/linux/cdx/bitfield.h new file mode 100644 index 000000000000..567f8ec47582 --- /dev/null +++ b/include/linux/cdx/bitfield.h @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright 2005-2006 Fen Systems Ltd. + * Copyright 2006-2013 Solarflare Communications Inc. + * Copyright (C) 2022-2023, Advanced Micro Devices, Inc. + */ + +#ifndef CDX_BITFIELD_H +#define CDX_BITFIELD_H + +#include + +/* Lowest bit numbers and widths */ +#define CDX_DWORD_LBN 0 +#define CDX_DWORD_WIDTH 32 + +/* Specified attribute (e.g. LBN) of the specified field */ +#define CDX_VAL(field, attribute) field ## _ ## attribute +/* Low bit number of the specified field */ +#define CDX_LOW_BIT(field) CDX_VAL(field, LBN) +/* Bit width of the specified field */ +#define CDX_WIDTH(field) CDX_VAL(field, WIDTH) +/* High bit number of the specified field */ +#define CDX_HIGH_BIT(field) (CDX_LOW_BIT(field) + CDX_WIDTH(field) - 1) + +/* A doubleword (i.e. 4 byte) datatype - little-endian in HW */ +struct cdx_dword { + __le32 cdx_u32; +}; + +/* Value expanders for printk */ +#define CDX_DWORD_VAL(dword) \ + ((unsigned int)le32_to_cpu((dword).cdx_u32)) + +/* + * Extract bit field portion [low,high) from the 32-bit little-endian + * element which contains bits [min,max) + */ +#define CDX_DWORD_FIELD(dword, field) \ + (FIELD_GET(GENMASK(CDX_HIGH_BIT(field), CDX_LOW_BIT(field)), \ + le32_to_cpu((dword).cdx_u32))) + +/* + * Creates the portion of the named bit field that lies within the + * range [min,max). + */ +#define CDX_INSERT_FIELD(field, value) \ + (FIELD_PREP(GENMASK(CDX_HIGH_BIT(field), \ + CDX_LOW_BIT(field)), value)) + +/* + * Creates the portion of the named bit fields that lie within the + * range [min,max). + */ +#define CDX_INSERT_FIELDS(field1, value1, \ + field2, value2, \ + field3, value3, \ + field4, value4, \ + field5, value5, \ + field6, value6, \ + field7, value7) \ + (CDX_INSERT_FIELD(field1, (value1)) | \ + CDX_INSERT_FIELD(field2, (value2)) | \ + CDX_INSERT_FIELD(field3, (value3)) | \ + CDX_INSERT_FIELD(field4, (value4)) | \ + CDX_INSERT_FIELD(field5, (value5)) | \ + CDX_INSERT_FIELD(field6, (value6)) | \ + CDX_INSERT_FIELD(field7, (value7))) + +#define CDX_POPULATE_DWORD(dword, ...) \ + (dword).cdx_u32 = cpu_to_le32(CDX_INSERT_FIELDS(__VA_ARGS__)) + +/* Populate a dword field with various numbers of arguments */ +#define CDX_POPULATE_DWORD_7 CDX_POPULATE_DWORD +#define CDX_POPULATE_DWORD_6(dword, ...) \ + CDX_POPULATE_DWORD_7(dword, CDX_DWORD, 0, __VA_ARGS__) +#define CDX_POPULATE_DWORD_5(dword, ...) \ + CDX_POPULATE_DWORD_6(dword, CDX_DWORD, 0, __VA_ARGS__) +#define CDX_POPULATE_DWORD_4(dword, ...) \ + CDX_POPULATE_DWORD_5(dword, CDX_DWORD, 0, __VA_ARGS__) +#define CDX_POPULATE_DWORD_3(dword, ...) \ + CDX_POPULATE_DWORD_4(dword, CDX_DWORD, 0, __VA_ARGS__) +#define CDX_POPULATE_DWORD_2(dword, ...) \ + CDX_POPULATE_DWORD_3(dword, CDX_DWORD, 0, __VA_ARGS__) +#define CDX_POPULATE_DWORD_1(dword, ...) \ + CDX_POPULATE_DWORD_2(dword, CDX_DWORD, 0, __VA_ARGS__) +#define CDX_SET_DWORD(dword) \ + CDX_POPULATE_DWORD_1(dword, CDX_DWORD, 0xffffffff) + +#endif /* CDX_BITFIELD_H */ diff --git a/include/linux/cdx/mcdi.h b/include/linux/cdx/mcdi.h new file mode 100644 index 000000000000..46e3f63b062a --- /dev/null +++ b/include/linux/cdx/mcdi.h @@ -0,0 +1,192 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright 2008-2013 Solarflare Communications Inc. + * Copyright (C) 2022-2023, Advanced Micro Devices, Inc. + */ + +#ifndef CDX_MCDI_H +#define CDX_MCDI_H + +#include +#include +#include + +#include "linux/cdx/bitfield.h" + +/** + * enum cdx_mcdi_mode - MCDI transaction mode + * @MCDI_MODE_EVENTS: wait for an mcdi response callback. + * @MCDI_MODE_FAIL: we think MCDI is dead, so fail-fast all calls + */ +enum cdx_mcdi_mode { + MCDI_MODE_EVENTS, + MCDI_MODE_FAIL, +}; + +#define MCDI_RPC_TIMEOUT (10 * HZ) +#define MCDI_RPC_LONG_TIMEOU (60 * HZ) +#define MCDI_RPC_POST_RST_TIME (10 * HZ) + +/** + * enum cdx_mcdi_cmd_state - State for an individual MCDI command + * @MCDI_STATE_QUEUED: Command not started and is waiting to run. + * @MCDI_STATE_RETRY: Command was submitted and MC rejected with no resources, + * as MC have too many outstanding commands. Command will be retried once + * another command returns. + * @MCDI_STATE_RUNNING: Command was accepted and is running. + * @MCDI_STATE_RUNNING_CANCELLED: Command is running but the issuer cancelled + * the command. + * @MCDI_STATE_FINISHED: Processing of this command has completed. + */ + +enum cdx_mcdi_cmd_state { + MCDI_STATE_QUEUED, + MCDI_STATE_RETRY, + MCDI_STATE_RUNNING, + MCDI_STATE_RUNNING_CANCELLED, + MCDI_STATE_FINISHED, +}; + +/** + * struct cdx_mcdi - CDX MCDI Firmware interface, to interact + * with CDX controller. + * @mcdi: MCDI interface + * @mcdi_ops: MCDI operations + * @r5_rproc : R5 Remoteproc device handle + * @rpdev: RPMsg device + * @ept: RPMsg endpoint + * @work: Post probe work + */ +struct cdx_mcdi { + /* MCDI interface */ + struct cdx_mcdi_data *mcdi; + const struct cdx_mcdi_ops *mcdi_ops; + + struct rproc *r5_rproc; + struct rpmsg_device *rpdev; + struct rpmsg_endpoint *ept; + struct work_struct work; +}; + +struct cdx_mcdi_ops { + void (*mcdi_request)(struct cdx_mcdi *cdx, + const struct cdx_dword *hdr, size_t hdr_len, + const struct cdx_dword *sdu, size_t sdu_len); + unsigned int (*mcdi_rpc_timeout)(struct cdx_mcdi *cdx, unsigned int cmd); +}; + +typedef void cdx_mcdi_async_completer(struct cdx_mcdi *cdx, + unsigned long cookie, int rc, + struct cdx_dword *outbuf, + size_t outlen_actual); + +/** + * struct cdx_mcdi_cmd - An outstanding MCDI command + * @ref: Reference count. There will be one reference if the command is + * in the mcdi_iface cmd_list, another if it's on a cleanup list, + * and a third if it's queued in the work queue. + * @list: The data for this entry in mcdi->cmd_list + * @cleanup_list: The data for this entry in a cleanup list + * @work: The work item for this command, queued in mcdi->workqueue + * @mcdi: The mcdi_iface for this command + * @state: The state of this command + * @inlen: inbuf length + * @inbuf: Input buffer + * @quiet: Whether to silence errors + * @reboot_seen: Whether a reboot has been seen during this command, + * to prevent duplicates + * @seq: Sequence number + * @started: Jiffies this command was started at + * @cookie: Context for completion function + * @completer: Completion function + * @handle: Command handle + * @cmd: Command number + * @rc: Return code + * @outlen: Length of output buffer + * @outbuf: Output buffer + */ +struct cdx_mcdi_cmd { + struct kref ref; + struct list_head list; + struct list_head cleanup_list; + struct work_struct work; + struct cdx_mcdi_iface *mcdi; + enum cdx_mcdi_cmd_state state; + size_t inlen; + const struct cdx_dword *inbuf; + bool quiet; + bool reboot_seen; + u8 seq; + unsigned long started; + unsigned long cookie; + cdx_mcdi_async_completer *completer; + unsigned int handle; + unsigned int cmd; + int rc; + size_t outlen; + struct cdx_dword *outbuf; + /* followed by inbuf data if necessary */ +}; + +/** + * struct cdx_mcdi_iface - MCDI protocol context + * @cdx: The associated NIC + * @iface_lock: Serialise access to this structure + * @outstanding_cleanups: Count of cleanups + * @cmd_list: List of outstanding and running commands + * @workqueue: Workqueue used for delayed processing + * @cmd_complete_wq: Waitqueue for command completion + * @db_held_by: Command the MC doorbell is in use by + * @seq_held_by: Command each sequence number is in use by + * @prev_handle: The last used command handle + * @mode: Poll for mcdi completion, or wait for an mcdi_event + * @prev_seq: The last used sequence number + * @new_epoch: Indicates start of day or start of MC reboot recovery + */ +struct cdx_mcdi_iface { + struct cdx_mcdi *cdx; + /* Serialise access */ + struct mutex iface_lock; + unsigned int outstanding_cleanups; + struct list_head cmd_list; + struct workqueue_struct *workqueue; + wait_queue_head_t cmd_complete_wq; + struct cdx_mcdi_cmd *db_held_by; + struct cdx_mcdi_cmd *seq_held_by[16]; + unsigned int prev_handle; + enum cdx_mcdi_mode mode; + u8 prev_seq; + bool new_epoch; +}; + +/** + * struct cdx_mcdi_data - extra state for NICs that implement MCDI + * @iface: Interface/protocol state + * @fn_flags: Flags for this function, as returned by %MC_CMD_DRV_ATTACH. + */ +struct cdx_mcdi_data { + struct cdx_mcdi_iface iface; + u32 fn_flags; +}; + +/* + * We expect that 16- and 32-bit fields in MCDI requests and responses + * are appropriately aligned, but 64-bit fields are only + * 32-bit-aligned. + */ +#define MCDI_DECLARE_BUF(_name, _len) struct cdx_dword _name[DIV_ROUND_UP(_len, 4)] = {{0}} +#define _MCDI_PTR(_buf, _offset) \ + ((u8 *)(_buf) + (_offset)) +#define MCDI_PTR(_buf, _field) \ + _MCDI_PTR(_buf, MC_CMD_ ## _field ## _OFST) +#define _MCDI_CHECK_ALIGN(_ofst, _align) \ + ((void)BUILD_BUG_ON_ZERO((_ofst) & ((_align) - 1)), \ + (_ofst)) +#define _MCDI_DWORD(_buf, _field) \ + ((_buf) + (_MCDI_CHECK_ALIGN(MC_CMD_ ## _field ## _OFST, 4) >> 2)) + +#define MCDI_SET_DWORD(_buf, _field, _value) \ + CDX_POPULATE_DWORD_1(*_MCDI_DWORD(_buf, _field), CDX_DWORD, _value) +#define MCDI_DWORD(_buf, _field) \ + CDX_DWORD_FIELD(*_MCDI_DWORD(_buf, _field), CDX_DWORD) +#endif /* CDX_MCDI_H */ -- cgit v1.2.3 From f99b3917789d83ea89b24b722d784956f8289f45 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Mon, 15 Sep 2025 14:57:29 +0200 Subject: fs: rename generic_delete_inode() and generic_drop_inode() generic_delete_inode() is rather misleading for what the routine is doing. inode_just_drop() should be much clearer. The new naming is inconsistent with generic_drop_inode(), so rename that one as well with inode_ as the suffix. No functional changes. Signed-off-by: Mateusz Guzik Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 4daf9b30a641..724b9af67f35 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3312,8 +3312,8 @@ extern void address_space_init_once(struct address_space *mapping); extern struct inode * igrab(struct inode *); extern ino_t iunique(struct super_block *, ino_t); extern int inode_needs_sync(struct inode *inode); -extern int generic_delete_inode(struct inode *inode); -static inline int generic_drop_inode(struct inode *inode) +extern int inode_just_drop(struct inode *inode); +static inline int inode_generic_drop(struct inode *inode) { return !inode->i_nlink || inode_unhashed(inode); } -- cgit v1.2.3 From 8b0d03129b6165bbf8c9494897489c6da6fadd58 Mon Sep 17 00:00:00 2001 From: Shubhrajyoti Datta Date: Mon, 8 Sep 2025 17:26:46 +0530 Subject: cdx: Export Symbols for MCDI RPC and Initialization The cdx_mcdi_init(), cdx_mcdi_process_cmd(), and cdx_mcdi_rpc() functions are needed by the VersalNET EDAC module that interact with the MCDI (Management Controller Direct Interface) framework. These functions facilitate communication between different hardware components by enabling command execution and status management. Signed-off-by: Shubhrajyoti Datta Signed-off-by: Borislav Petkov (AMD) Acked-by: Nikhil Agarwal Link: https://lore.kernel.org/20250908115649.22903-1-shubhrajyoti.datta@amd.com --- include/linux/cdx/mcdi.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/cdx/mcdi.h b/include/linux/cdx/mcdi.h index 46e3f63b062a..74075305cba4 100644 --- a/include/linux/cdx/mcdi.h +++ b/include/linux/cdx/mcdi.h @@ -169,6 +169,13 @@ struct cdx_mcdi_data { u32 fn_flags; }; +void cdx_mcdi_finish(struct cdx_mcdi *cdx); +int cdx_mcdi_init(struct cdx_mcdi *cdx); +void cdx_mcdi_process_cmd(struct cdx_mcdi *cdx, struct cdx_dword *outbuf, int len); +int cdx_mcdi_rpc(struct cdx_mcdi *cdx, unsigned int cmd, + const struct cdx_dword *inbuf, size_t inlen, + struct cdx_dword *outbuf, size_t outlen, size_t *outlen_actual); + /* * We expect that 16- and 32-bit fields in MCDI requests and responses * are appropriately aligned, but 64-bit fields are only -- cgit v1.2.3 From d5fe2fec6c40dda03df8cc9b4a97de0b7e39f984 Mon Sep 17 00:00:00 2001 From: Shubhrajyoti Datta Date: Mon, 8 Sep 2025 17:26:49 +0530 Subject: EDAC: Add a driver for the AMD Versal NET DDR controller Add a driver for the AMD Versal NET DDR memory controller which supports single bit error correction, double bit error detection and other system errors from various IP subsystems (e.g., RPU, NOCs, HNICX, PL). The driver listens for notifications from the NMC (Network management controller) using RPMsg (Remote Processor Messaging). The channel used for communicating to RPMsg is named "error_edac". Upon receipt of a notification, the driver sends a RAS event trace. [ bp: - Fixup title - Rewrite commit message - Fixup Kconfig text - Zap unused defines and align them - Simplify rpmsg_cb() considerably - Drop silly double-brackets in conditionals - Use proper void * type in mcdi_request() - Do not clear chinfo in rpmsg_probe() unnecessarily - Fix indentation - Do a proper err unwind path in init_versalnet() - Redo the error unwind path in mc_probe() properly - Fix the ordering in mc_remove() ] Signed-off-by: Shubhrajyoti Datta Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/20250908115649.22903-1-shubhrajyoti.datta@amd.com Link: https://lore.kernel.org/r/20250703173105.GLaGa-WQCESDNsqygm@fat_crate.local --- include/linux/cdx/edac_cdx_pcol.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 include/linux/cdx/edac_cdx_pcol.h (limited to 'include') diff --git a/include/linux/cdx/edac_cdx_pcol.h b/include/linux/cdx/edac_cdx_pcol.h new file mode 100644 index 000000000000..749db33bb482 --- /dev/null +++ b/include/linux/cdx/edac_cdx_pcol.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Driver for AMD network controllers and boards + * + * Copyright (C) 2021, Xilinx, Inc. + * Copyright (C) 2022-2023, Advanced Micro Devices, Inc. + */ + +#ifndef MC_CDX_PCOL_H +#define MC_CDX_PCOL_H +#include + +#define MC_CMD_EDAC_GET_DDR_CONFIG_OUT_WORD_LENGTH_LEN 4 +/* Number of registers for the DDR controller */ +#define MC_CMD_GET_DDR_CONFIG_OFST 4 +#define MC_CMD_GET_DDR_CONFIG_LEN 4 + +/***********************************/ +/* MC_CMD_EDAC_GET_DDR_CONFIG + * Provides detailed configuration for the DDR controller of the given index. + */ +#define MC_CMD_EDAC_GET_DDR_CONFIG 0x3 + +/* MC_CMD_EDAC_GET_DDR_CONFIG_IN msgrequest */ +#define MC_CMD_EDAC_GET_DDR_CONFIG_IN_CONTROLLER_INDEX_OFST 0 +#define MC_CMD_EDAC_GET_DDR_CONFIG_IN_CONTROLLER_INDEX_LEN 4 + +#endif /* MC_CDX_PCOL_H */ -- cgit v1.2.3 From 1b3aa3900782707ec2f4cc1651bc82c628f25d2b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 10 Sep 2025 17:45:36 -0600 Subject: io_uring/uring_cmd: correct signature for io_uring_mshot_cmd_post_cqe() The !CONFIG_IO_URING signature is wrong, fix that up. The non stub signature got updated for the io_br_sel changes that happened before this patch went in, but the stub one did not. Fixes: 620a50c92700 ("io_uring: uring_cmd: add multishot support") Signed-off-by: Jens Axboe --- include/linux/io_uring/cmd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h index 1350af846ddd..c8185f54fde9 100644 --- a/include/linux/io_uring/cmd.h +++ b/include/linux/io_uring/cmd.h @@ -126,7 +126,7 @@ io_uring_cmd_buffer_select(struct io_uring_cmd *ioucmd, unsigned buf_group, return (struct io_br_sel) { .val = -EOPNOTSUPP }; } static inline bool io_uring_mshot_cmd_post_cqe(struct io_uring_cmd *ioucmd, - ssize_t ret, unsigned int issue_flags) + struct io_br_sel *sel, unsigned int issue_flags) { return true; } -- cgit v1.2.3 From 0cbaf65c91db0e40a577e8919979dac1963cfcc0 Mon Sep 17 00:00:00 2001 From: Amirreza Zarrabi Date: Thu, 11 Sep 2025 21:07:43 -0700 Subject: tee: add close_context to TEE driver operation The tee_context can be used to manage TEE user resources, including those allocated by the driver for the TEE on behalf of the user. The release() callback is invoked only when all resources, such as tee_shm, are released and there are no references to the tee_context. When a user closes the device file, the driver should notify the TEE to release any resources it may hold and drop the context references. To achieve this, a close_context() callback is introduced to initiate resource release in the TEE driver when the device file is closed. Relocate teedev_ctx_get, teedev_ctx_put, tee_device_get, and tee_device_get functions to tee_core.h to make them accessible outside the TEE subsystem. Reviewed-by: Sumit Garg Tested-by: Neil Armstrong Tested-by: Harshal Dev Signed-off-by: Amirreza Zarrabi Signed-off-by: Jens Wiklander --- include/linux/tee_core.h | 50 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/tee_core.h b/include/linux/tee_core.h index 7b0c1da2ca6c..456a940d4710 100644 --- a/include/linux/tee_core.h +++ b/include/linux/tee_core.h @@ -76,8 +76,9 @@ struct tee_device { /** * struct tee_driver_ops - driver operations vtable * @get_version: returns version of driver - * @open: called when the device file is opened - * @release: release this open file + * @open: called for a context when the device file is opened + * @close_context: called when the device file is closed + * @release: called to release the context * @open_session: open a new session * @close_session: close a session * @system_session: declare session as a system session @@ -87,11 +88,17 @@ struct tee_device { * @supp_send: called for supplicant to send a response * @shm_register: register shared memory buffer in TEE * @shm_unregister: unregister shared memory buffer in TEE + * + * The context given to @open might last longer than the device file if it is + * tied to other resources in the TEE driver. @close_context is called when the + * client closes the device file, even if there are existing references to the + * context. The TEE driver can use @close_context to start cleaning up. */ struct tee_driver_ops { void (*get_version)(struct tee_device *teedev, struct tee_ioctl_version_data *vers); int (*open)(struct tee_context *ctx); + void (*close_context)(struct tee_context *ctx); void (*release)(struct tee_context *ctx); int (*open_session)(struct tee_context *ctx, struct tee_ioctl_open_session_arg *arg, @@ -200,6 +207,24 @@ int tee_device_register_dma_heap(struct tee_device *teedev, struct tee_protmem_pool *pool); void tee_device_put_all_dma_heaps(struct tee_device *teedev); +/** + * tee_device_get() - Increment the user count for a tee_device + * @teedev: Pointer to the tee_device + * + * If tee_device_unregister() has been called and the final user of @teedev + * has already released the device, this function will fail to prevent new users + * from accessing the device during the unregistration process. + * + * Returns: true if @teedev remains valid, otherwise false + */ +bool tee_device_get(struct tee_device *teedev); + +/** + * tee_device_put() - Decrease the user count for a tee_device + * @teedev: pointer to the tee_device + */ +void tee_device_put(struct tee_device *teedev); + /** * tee_device_set_dev_groups() - Set device attribute groups * @teedev: Device to register @@ -374,4 +399,25 @@ struct tee_context *teedev_open(struct tee_device *teedev); */ void teedev_close_context(struct tee_context *ctx); +/** + * teedev_ctx_get() - Increment the reference count of a context + * @ctx: Pointer to the context + * + * This function increases the refcount of the context, which is tied to + * resources shared by the same tee_device. During the unregistration process, + * the context may remain valid even after tee_device_unregister() has returned. + * + * Users should ensure that the context's refcount is properly decreased before + * calling tee_device_put(), typically within the context's release() function. + * Alternatively, users can call tee_device_get() and teedev_ctx_get() together + * and release them simultaneously (see shm_alloc_helper()). + */ +void teedev_ctx_get(struct tee_context *ctx); + +/** + * teedev_ctx_put() - Decrease reference count on a context + * @ctx: pointer to the context + */ +void teedev_ctx_put(struct tee_context *ctx); + #endif /*__TEE_CORE_H*/ -- cgit v1.2.3 From 54a53e95a908a4cc770f0530c49f04c89e7b18dc Mon Sep 17 00:00:00 2001 From: Amirreza Zarrabi Date: Thu, 11 Sep 2025 21:07:44 -0700 Subject: tee: add TEE_IOCTL_PARAM_ATTR_TYPE_UBUF For drivers that can transfer data to the TEE without using shared memory from client, it is necessary to receive the user address directly, bypassing any processing by the TEE subsystem. Introduce TEE_IOCTL_PARAM_ATTR_TYPE_UBUF_INPUT/OUTPUT/INOUT to represent userspace buffers. Reviewed-by: Sumit Garg Tested-by: Neil Armstrong Tested-by: Harshal Dev Signed-off-by: Amirreza Zarrabi Signed-off-by: Jens Wiklander --- include/linux/tee_drv.h | 6 ++++++ include/uapi/linux/tee.h | 22 ++++++++++++++++------ 2 files changed, 22 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index 824f1251de60..7915e8869cbd 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -82,6 +82,11 @@ struct tee_param_memref { struct tee_shm *shm; }; +struct tee_param_ubuf { + void __user *uaddr; + size_t size; +}; + struct tee_param_value { u64 a; u64 b; @@ -92,6 +97,7 @@ struct tee_param { u64 attr; union { struct tee_param_memref memref; + struct tee_param_ubuf ubuf; struct tee_param_value value; } u; }; diff --git a/include/uapi/linux/tee.h b/include/uapi/linux/tee.h index d843cf980d98..0e3b735dcfca 100644 --- a/include/uapi/linux/tee.h +++ b/include/uapi/linux/tee.h @@ -151,6 +151,13 @@ struct tee_ioctl_buf_data { #define TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT 6 #define TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT 7 /* input and output */ +/* + * These defines userspace buffer parameters. + */ +#define TEE_IOCTL_PARAM_ATTR_TYPE_UBUF_INPUT 8 +#define TEE_IOCTL_PARAM_ATTR_TYPE_UBUF_OUTPUT 9 +#define TEE_IOCTL_PARAM_ATTR_TYPE_UBUF_INOUT 10 /* input and output */ + /* * Mask for the type part of the attribute, leaves room for more types */ @@ -186,14 +193,17 @@ struct tee_ioctl_buf_data { /** * struct tee_ioctl_param - parameter * @attr: attributes - * @a: if a memref, offset into the shared memory object, else a value parameter - * @b: if a memref, size of the buffer, else a value parameter + * @a: if a memref, offset into the shared memory object, + * else if a ubuf, address of the user buffer, + * else a value parameter + * @b: if a memref or ubuf, size of the buffer, else a value parameter * @c: if a memref, shared memory identifier, else a value parameter * - * @attr & TEE_PARAM_ATTR_TYPE_MASK indicates if memref or value is used in - * the union. TEE_PARAM_ATTR_TYPE_VALUE_* indicates value and - * TEE_PARAM_ATTR_TYPE_MEMREF_* indicates memref. TEE_PARAM_ATTR_TYPE_NONE - * indicates that none of the members are used. + * @attr & TEE_PARAM_ATTR_TYPE_MASK indicates if memref, ubuf, or value is + * used in the union. TEE_PARAM_ATTR_TYPE_VALUE_* indicates value, + * TEE_PARAM_ATTR_TYPE_MEMREF_* indicates memref, and TEE_PARAM_ATTR_TYPE_UBUF_* + * indicates ubuf. TEE_PARAM_ATTR_TYPE_NONE indicates that none of the members + * are used. * * Shared memory is allocated with TEE_IOC_SHM_ALLOC which returns an * identifier representing the shared memory object. A memref can reference -- cgit v1.2.3 From d5b8b0fa1775d8b59c3fc9e4aa2baa715d08f3ee Mon Sep 17 00:00:00 2001 From: Amirreza Zarrabi Date: Thu, 11 Sep 2025 21:07:45 -0700 Subject: tee: add TEE_IOCTL_PARAM_ATTR_TYPE_OBJREF The TEE subsystem allows session-based access to trusted services, requiring a session to be established to receive a service. This is not suitable for an environment that represents services as objects. An object supports various operations that a client can invoke, potentially generating a result or a new object that can be invoked independently of the original object. Add TEE_IOCTL_PARAM_ATTR_TYPE_OBJREF_INPUT/OUTPUT/INOUT to represent an object. Objects may reside in either TEE or userspace. To invoke an object in TEE, introduce a new ioctl. Use the existing SUPPL_RECV and SUPPL_SEND to invoke an object in userspace. Reviewed-by: Sumit Garg Tested-by: Neil Armstrong Tested-by: Harshal Dev Signed-off-by: Amirreza Zarrabi Signed-off-by: Jens Wiklander --- include/linux/tee_core.h | 4 ++++ include/linux/tee_drv.h | 6 ++++++ include/uapi/linux/tee.h | 41 +++++++++++++++++++++++++++++++++++------ 3 files changed, 45 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/tee_core.h b/include/linux/tee_core.h index 456a940d4710..1f3e5dad6d0d 100644 --- a/include/linux/tee_core.h +++ b/include/linux/tee_core.h @@ -83,6 +83,7 @@ struct tee_device { * @close_session: close a session * @system_session: declare session as a system session * @invoke_func: invoke a trusted function + * @object_invoke_func: invoke a TEE object * @cancel_req: request cancel of an ongoing invoke or open * @supp_recv: called for supplicant to get a command * @supp_send: called for supplicant to send a response @@ -108,6 +109,9 @@ struct tee_driver_ops { int (*invoke_func)(struct tee_context *ctx, struct tee_ioctl_invoke_arg *arg, struct tee_param *param); + int (*object_invoke_func)(struct tee_context *ctx, + struct tee_ioctl_object_invoke_arg *arg, + struct tee_param *param); int (*cancel_req)(struct tee_context *ctx, u32 cancel_id, u32 session); int (*supp_recv)(struct tee_context *ctx, u32 *func, u32 *num_params, struct tee_param *param); diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index 7915e8869cbd..88a6f9697c89 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -87,6 +87,11 @@ struct tee_param_ubuf { size_t size; }; +struct tee_param_objref { + u64 id; + u64 flags; +}; + struct tee_param_value { u64 a; u64 b; @@ -97,6 +102,7 @@ struct tee_param { u64 attr; union { struct tee_param_memref memref; + struct tee_param_objref objref; struct tee_param_ubuf ubuf; struct tee_param_value value; } u; diff --git a/include/uapi/linux/tee.h b/include/uapi/linux/tee.h index 0e3b735dcfca..9abb0f299549 100644 --- a/include/uapi/linux/tee.h +++ b/include/uapi/linux/tee.h @@ -48,8 +48,10 @@ #define TEE_GEN_CAP_PRIVILEGED (1 << 1)/* Privileged device (for supplicant) */ #define TEE_GEN_CAP_REG_MEM (1 << 2)/* Supports registering shared memory */ #define TEE_GEN_CAP_MEMREF_NULL (1 << 3)/* NULL MemRef support */ +#define TEE_GEN_CAP_OBJREF (1 << 4)/* Supports generic object reference */ -#define TEE_MEMREF_NULL (__u64)(-1) /* NULL MemRef Buffer */ +#define TEE_MEMREF_NULL ((__u64)(-1)) /* NULL MemRef Buffer */ +#define TEE_OBJREF_NULL ((__u64)(-1)) /* NULL ObjRef Object */ /* * TEE Implementation ID @@ -158,6 +160,13 @@ struct tee_ioctl_buf_data { #define TEE_IOCTL_PARAM_ATTR_TYPE_UBUF_OUTPUT 9 #define TEE_IOCTL_PARAM_ATTR_TYPE_UBUF_INOUT 10 /* input and output */ +/* + * These defines object reference parameters. + */ +#define TEE_IOCTL_PARAM_ATTR_TYPE_OBJREF_INPUT 11 +#define TEE_IOCTL_PARAM_ATTR_TYPE_OBJREF_OUTPUT 12 +#define TEE_IOCTL_PARAM_ATTR_TYPE_OBJREF_INOUT 13 + /* * Mask for the type part of the attribute, leaves room for more types */ @@ -195,15 +204,16 @@ struct tee_ioctl_buf_data { * @attr: attributes * @a: if a memref, offset into the shared memory object, * else if a ubuf, address of the user buffer, - * else a value parameter - * @b: if a memref or ubuf, size of the buffer, else a value parameter + * else if an objref, object identifier, else a value parameter + * @b: if a memref or ubuf, size of the buffer, + * else if objref, flags for the object, else a value parameter * @c: if a memref, shared memory identifier, else a value parameter * * @attr & TEE_PARAM_ATTR_TYPE_MASK indicates if memref, ubuf, or value is * used in the union. TEE_PARAM_ATTR_TYPE_VALUE_* indicates value, - * TEE_PARAM_ATTR_TYPE_MEMREF_* indicates memref, and TEE_PARAM_ATTR_TYPE_UBUF_* - * indicates ubuf. TEE_PARAM_ATTR_TYPE_NONE indicates that none of the members - * are used. + * TEE_PARAM_ATTR_TYPE_MEMREF_* indicates memref, TEE_PARAM_ATTR_TYPE_UBUF_* + * indicates ubuf, and TEE_PARAM_ATTR_TYPE_OBJREF_* indicates objref. + * TEE_PARAM_ATTR_TYPE_NONE indicates that none of the members are used. * * Shared memory is allocated with TEE_IOC_SHM_ALLOC which returns an * identifier representing the shared memory object. A memref can reference @@ -442,4 +452,23 @@ struct tee_ioctl_shm_register_fd_data { * munmap(): unmaps previously shared memory */ +/** + * struct tee_ioctl_invoke_func_arg - Invokes an object in a Trusted Application + * @id: [in] Object id + * @op: [in] Object operation, specific to the object + * @ret: [out] return value + * @num_params: [in] number of parameters following this struct + */ +struct tee_ioctl_object_invoke_arg { + __u64 id; + __u32 op; + __u32 ret; + __u32 num_params; + /* num_params tells the actual number of element in params */ + struct tee_ioctl_param params[]; +}; + +#define TEE_IOC_OBJECT_INVOKE _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 10, \ + struct tee_ioctl_buf_data) + #endif /*__TEE_H*/ -- cgit v1.2.3 From bd5139306886a9626a7d794940376806eccb9547 Mon Sep 17 00:00:00 2001 From: Amirreza Zarrabi Date: Thu, 11 Sep 2025 21:07:46 -0700 Subject: tee: increase TEE_MAX_ARG_SIZE to 4096 Increase TEE_MAX_ARG_SIZE to accommodate worst-case scenarios where additional buffer space is required to pass all arguments to TEE. This change is necessary for upcoming support for Qualcomm TEE, which requires a larger buffer for argument marshaling. Reviewed-by: Sumit Garg Tested-by: Harshal Dev Signed-off-by: Amirreza Zarrabi Signed-off-by: Jens Wiklander --- include/uapi/linux/tee.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/tee.h b/include/uapi/linux/tee.h index 9abb0f299549..a5466b503bfe 100644 --- a/include/uapi/linux/tee.h +++ b/include/uapi/linux/tee.h @@ -42,7 +42,7 @@ #define TEE_IOC_MAGIC 0xa4 #define TEE_IOC_BASE 0 -#define TEE_MAX_ARG_SIZE 1024 +#define TEE_MAX_ARG_SIZE 4096 #define TEE_GEN_CAP_GP (1 << 0)/* GlobalPlatform compliant TEE */ #define TEE_GEN_CAP_PRIVILEGED (1 << 1)/* Privileged device (for supplicant) */ -- cgit v1.2.3 From d6e290837e50f73f88f31f19bd8a7213d92e6e46 Mon Sep 17 00:00:00 2001 From: Amirreza Zarrabi Date: Thu, 11 Sep 2025 21:07:47 -0700 Subject: tee: add Qualcomm TEE driver Introduce qcomtee_object, which represents an object in both QTEE and the kernel. QTEE clients can invoke an instance of qcomtee_object to access QTEE services. If this invocation produces a new object in QTEE, an instance of qcomtee_object will be returned. Similarly, QTEE can request services from by issuing a callback request, which invokes an instance of qcomtee_object. Implement initial support for exporting qcomtee_object to userspace and QTEE, enabling the invocation of objects hosted in QTEE and userspace through the TEE subsystem. Tested-by: Neil Armstrong Tested-by: Harshal Dev Acked-by: Sumit Garg Signed-off-by: Amirreza Zarrabi Signed-off-by: Jens Wiklander --- include/uapi/linux/tee.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/tee.h b/include/uapi/linux/tee.h index a5466b503bfe..386ad36f1a0a 100644 --- a/include/uapi/linux/tee.h +++ b/include/uapi/linux/tee.h @@ -59,6 +59,7 @@ #define TEE_IMPL_ID_OPTEE 1 #define TEE_IMPL_ID_AMDTEE 2 #define TEE_IMPL_ID_TSTEE 3 +#define TEE_IMPL_ID_QTEE 4 /* * OP-TEE specific capabilities -- cgit v1.2.3 From 2c895133950646f45e5cf3900b168c952c8dbee8 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Mon, 15 Sep 2025 03:26:17 +0000 Subject: bpf: Do not limit bpf_cgroup_from_id to current's namespace The bpf_cgroup_from_id kfunc relies on cgroup_get_from_id to obtain the cgroup corresponding to a given cgroup ID. This helper can be called in a lot of contexts where the current thread can be random. A recent example was its use in sched_ext's ops.tick(), to obtain the root cgroup pointer. Since the current task can be whatever random user space task preempted by the timer tick, this makes the behavior of the helper unreliable. Refactor out __cgroup_get_from_id as the non-namespace aware version of cgroup_get_from_id, and change bpf_cgroup_from_id to make use of it. There is no compatibility breakage here, since changing the namespace against which the lookup is being done to the root cgroup namespace only permits a wider set of lookups to succeed now. The cgroup IDs across namespaces are globally unique, and thus don't need to be retranslated. Reported-by: Dan Schatzberg Signed-off-by: Kumar Kartikeya Dwivedi Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20250915032618.1551762-2-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/cgroup.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b18fb5fcb38e..b08c8e62881c 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -650,6 +650,7 @@ static inline void cgroup_kthread_ready(void) } void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen); +struct cgroup *__cgroup_get_from_id(u64 id); struct cgroup *cgroup_get_from_id(u64 id); #else /* !CONFIG_CGROUPS */ -- cgit v1.2.3 From a9273da04fa033667a4d0ccfe46c4ba55721d7d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 5 Sep 2025 14:45:39 +0200 Subject: drm/amdgpu: add AMDGPU_IDS_FLAGS_GANG_SUBMIT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a UAPI flag indicating if gang submit is supported or not. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 85b3ca14f81e..cd7402e36b6d 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1088,10 +1088,11 @@ struct drm_amdgpu_cs_chunk_cp_gfx_shadow { * Query h/w info: Flag that this is integrated (a.h.a. fusion) GPU * */ -#define AMDGPU_IDS_FLAGS_FUSION 0x1 -#define AMDGPU_IDS_FLAGS_PREEMPTION 0x2 -#define AMDGPU_IDS_FLAGS_TMZ 0x4 -#define AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD 0x8 +#define AMDGPU_IDS_FLAGS_FUSION 0x01 +#define AMDGPU_IDS_FLAGS_PREEMPTION 0x02 +#define AMDGPU_IDS_FLAGS_TMZ 0x04 +#define AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD 0x08 +#define AMDGPU_IDS_FLAGS_GANG_SUBMIT 0x10 /* * Query h/w info: Flag identifying VF/PF/PT mode -- cgit v1.2.3 From 61b2f7baa9779b12a7bf1b9800a3f2a2549a1315 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Thu, 11 Sep 2025 13:06:30 +0200 Subject: tcp: fast path functions later MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The following patch will use tcp_ecn_mode_accecn(), TCP_ACCECN_CEP_INIT_OFFSET, TCP_ACCECN_CEP_ACE_MASK in __tcp_fast_path_on() to make new flag for AccECN. No functional changes. Signed-off-by: Ilpo Järvinen Signed-off-by: Chia-Yu Chang Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250911110642.87529-3-chia-yu.chang@nokia-bell-labs.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 54 +++++++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 277914c4d067..e25340459ce4 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -821,33 +821,6 @@ static inline u32 __tcp_set_rto(const struct tcp_sock *tp) return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us); } -static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) -{ - /* mptcp hooks are only on the slow path */ - if (sk_is_mptcp((struct sock *)tp)) - return; - - tp->pred_flags = htonl((tp->tcp_header_len << 26) | - ntohl(TCP_FLAG_ACK) | - snd_wnd); -} - -static inline void tcp_fast_path_on(struct tcp_sock *tp) -{ - __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale); -} - -static inline void tcp_fast_path_check(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - - if (RB_EMPTY_ROOT(&tp->out_of_order_queue) && - tp->rcv_wnd && - atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf && - !tp->urg_data) - tcp_fast_path_on(tp); -} - u32 tcp_delack_max(const struct sock *sk); /* Compute the actual rto_min value */ @@ -1807,6 +1780,33 @@ static inline bool tcp_paws_reject(const struct tcp_options_received *rx_opt, return true; } +static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) +{ + /* mptcp hooks are only on the slow path */ + if (sk_is_mptcp((struct sock *)tp)) + return; + + tp->pred_flags = htonl((tp->tcp_header_len << 26) | + ntohl(TCP_FLAG_ACK) | + snd_wnd); +} + +static inline void tcp_fast_path_on(struct tcp_sock *tp) +{ + __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale); +} + +static inline void tcp_fast_path_check(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (RB_EMPTY_ROOT(&tp->out_of_order_queue) && + tp->rcv_wnd && + atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf && + !tp->urg_data) + tcp_fast_path_on(tp); +} + bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb, int mib_idx, u32 *last_oow_ack_time); -- cgit v1.2.3 From c3426ba2ed6942fe33c75bf17fc7513ba2c6ac64 Mon Sep 17 00:00:00 2001 From: Chia-Yu Chang Date: Thu, 11 Sep 2025 13:06:31 +0200 Subject: tcp: reorganize tcp_sock_write_txrx group for variables later Use the first 3-byte hole at the beginning of the tcp_sock_write_txrx group for 'noneagle'/'rate_app_limited' to fill in the existing hole in later patches. Therefore, the group size of tcp_sock_write_txrx is reduced from 92 + 4 to 91 + 4. In addition, the group size of tcp_sock_write_rx is changed to 96 to fit in the pahole outcome. Below are the trimmed pahole outcomes before and after this patch: [BEFORE THIS PATCH] struct tcp_sock { [...] __cacheline_group_begin__tcp_sock_write_txrx[0]; /* 2521 0 */ /* XXX 3 bytes hole, try to pack */ [...] struct tcp_options_received rx_opt; /* 2588 24 */ u8 nonagle:4; /* 2612: 0 1 */ u8 rate_app_limited:1; /* 2612: 4 1 */ /* XXX 3 bits hole, try to pack */ __cacheline_group_end__tcp_sock_write_txrx[0]; /* 2613 0 */ /* XXX 3 bytes hole, try to pack */ __cacheline_group_begin__tcp_sock_write_rx[0] __attribute__((__aligned__(8))); /* 2616 0 */ [...] __cacheline_group_end__tcp_sock_write_rx[0]; /* 2712 0 */ [...] /* size: 3200, cachelines: 50, members: 161 */ } [AFTER THIS PATCH] struct tcp_sock { [...] __cacheline_group_begin__tcp_sock_write_txrx[0]; /* 2521 0 */ u8 nonagle:4; /* 2521: 0 1 */ u8 rate_app_limited:1; /* 2521: 4 1 */ /* XXX 3 bits hole, try to pack */ /* XXX 2 bytes hole, try to pack */ [...] struct tcp_options_received rx_opt; /* 2588 24 */ __cacheline_group_end__tcp_sock_write_txrx[0]; /* 2612 0 */ /* XXX 4 bytes hole, try to pack */ __cacheline_group_begin__tcp_sock_write_rx[0] __attribute__((__aligned__(8))); /* 2616 0 */ [...] __cacheline_group_end__tcp_sock_write_rx[0]; /* 2712 0 */ [...] /* size: 3200, cachelines: 50, members: 161 */ } Signed-off-by: Chia-Yu Chang Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250911110642.87529-4-chia-yu.chang@nokia-bell-labs.com Signed-off-by: Jakub Kicinski --- include/linux/tcp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 57e478bfaef2..d103cc0e7a35 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -285,6 +285,8 @@ struct tcp_sock { * Header prediction flags * 0x5?10 << 16 + snd_wnd in net byte order */ + u8 nonagle : 4,/* Disable Nagle algorithm? */ + rate_app_limited:1; /* rate_{delivered,interval_us} limited? */ __be32 pred_flags; u64 tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */ u64 tcp_mstamp; /* most recent packet received/sent */ @@ -303,8 +305,6 @@ struct tcp_sock { * Options received (usually on last packet, some only on SYN packets). */ struct tcp_options_received rx_opt; - u8 nonagle : 4,/* Disable Nagle algorithm? */ - rate_app_limited:1; /* rate_{delivered,interval_us} limited? */ __cacheline_group_end(tcp_sock_write_txrx); /* RX read-write hotpath cache lines */ -- cgit v1.2.3 From 30f5ca00624397d81c99515bdd43286ade93d7c8 Mon Sep 17 00:00:00 2001 From: Chia-Yu Chang Date: Thu, 11 Sep 2025 13:06:32 +0200 Subject: tcp: ecn functions in separated include file The following patches will modify ECN helpers and add AccECN herlpers, and this patch moves the existing ones into a separated include file. No functional changes. Signed-off-by: Chia-Yu Chang Acked-by: Paolo Abeni Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250911110642.87529-5-chia-yu.chang@nokia-bell-labs.com Signed-off-by: Jakub Kicinski --- include/net/tcp_ecn.h | 116 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 include/net/tcp_ecn.h (limited to 'include') diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h new file mode 100644 index 000000000000..b3430557676b --- /dev/null +++ b/include/net/tcp_ecn.h @@ -0,0 +1,116 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _TCP_ECN_H +#define _TCP_ECN_H + +#include +#include + +#include +#include +#include +#include + +static inline void tcp_ecn_queue_cwr(struct tcp_sock *tp) +{ + if (tcp_ecn_mode_rfc3168(tp)) + tp->ecn_flags |= TCP_ECN_QUEUE_CWR; +} + +static inline void tcp_ecn_accept_cwr(struct sock *sk, + const struct sk_buff *skb) +{ + if (tcp_hdr(skb)->cwr) { + tcp_sk(sk)->ecn_flags &= ~TCP_ECN_DEMAND_CWR; + + /* If the sender is telling us it has entered CWR, then its + * cwnd may be very low (even just 1 packet), so we should ACK + * immediately. + */ + if (TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) + inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; + } +} + +static inline void tcp_ecn_withdraw_cwr(struct tcp_sock *tp) +{ + tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR; +} + +static inline void tcp_ecn_rcv_synack(struct tcp_sock *tp, + const struct tcphdr *th) +{ + if (tcp_ecn_mode_rfc3168(tp) && (!th->ece || th->cwr)) + tcp_ecn_mode_set(tp, TCP_ECN_DISABLED); +} + +static inline void tcp_ecn_rcv_syn(struct tcp_sock *tp, + const struct tcphdr *th) +{ + if (tcp_ecn_mode_rfc3168(tp) && (!th->ece || !th->cwr)) + tcp_ecn_mode_set(tp, TCP_ECN_DISABLED); +} + +static inline bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, + const struct tcphdr *th) +{ + if (th->ece && !th->syn && tcp_ecn_mode_rfc3168(tp)) + return true; + return false; +} + +/* Packet ECN state for a SYN-ACK */ +static inline void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb) +{ + const struct tcp_sock *tp = tcp_sk(sk); + + TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR; + if (tcp_ecn_disabled(tp)) + TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE; + else if (tcp_ca_needs_ecn(sk) || + tcp_bpf_ca_needs_ecn(sk)) + INET_ECN_xmit(sk); +} + +/* Packet ECN state for a SYN. */ +static inline void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk); + bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 || + tcp_ca_needs_ecn(sk) || bpf_needs_ecn; + + if (!use_ecn) { + const struct dst_entry *dst = __sk_dst_get(sk); + + if (dst && dst_feature(dst, RTAX_FEATURE_ECN)) + use_ecn = true; + } + + tp->ecn_flags = 0; + + if (use_ecn) { + if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn) + INET_ECN_xmit(sk); + + TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR; + tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168); + } +} + +static inline void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb) +{ + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)) + /* tp->ecn_flags are cleared at a later point in time when + * SYN ACK is ultimatively being received. + */ + TCP_SKB_CB(skb)->tcp_flags &= ~(TCPHDR_ECE | TCPHDR_CWR); +} + +static inline void +tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th) +{ + if (inet_rsk(req)->ecn_ok) + th->ece = 1; +} + +#endif /* _LINUX_TCP_ECN_H */ -- cgit v1.2.3 From 07c446e35b89bc8774792f8036e595cffdf5b162 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Tue, 16 Sep 2025 08:47:43 +0900 Subject: firewire: core: maintain phy packet receivers locally in cdev layer The list of receivers for phy packet is used only by cdev layer, while it is maintained as a member of fw_card structure. This commit maintains the list locally in cdev layer. Link: https://lore.kernel.org/r/20250915234747.915922-3-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/linux/firewire.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/firewire.h b/include/linux/firewire.h index d38c6e538e5c..f3260aacf730 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -115,8 +115,6 @@ struct fw_card { int index; struct list_head link; - struct list_head phy_receiver_list; - struct delayed_work br_work; /* bus reset job */ bool br_short; -- cgit v1.2.3 From 7d138cb269dbd2fa9b0da89a9c10503d1cf269d5 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Tue, 16 Sep 2025 08:47:44 +0900 Subject: firewire: core: use spin lock specific to topology map At present, the operation for read transaction to topology map register is not protected by any kind of lock primitives. This causes a potential problem to result in the mixed content of topology map. This commit adds and uses spin lock specific to topology map. Link: https://lore.kernel.org/r/20250915234747.915922-4-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/linux/firewire.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/firewire.h b/include/linux/firewire.h index f3260aacf730..aeb71c39e57e 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -129,7 +129,11 @@ struct fw_card { bool broadcast_channel_allocated; u32 broadcast_channel; - __be32 topology_map[(CSR_TOPOLOGY_MAP_END - CSR_TOPOLOGY_MAP) / 4]; + + struct { + __be32 buffer[(CSR_TOPOLOGY_MAP_END - CSR_TOPOLOGY_MAP) / 4]; + spinlock_t lock; + } topology_map; __be32 maint_utility_register; -- cgit v1.2.3 From 420bd7068cbfaea0a857472dd631dc48311e2a8f Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Tue, 16 Sep 2025 08:47:45 +0900 Subject: firewire: core: use spin lock specific to transaction The list of instance for asynchronous transaction to wait for response subaction is maintained as a member of fw_card structure. The card-wide spinlock is used at present for any operation over the list, however it is not necessarily suited for the purpose. This commit adds and uses the spin lock specific to maintain the list. Link: https://lore.kernel.org/r/20250915234747.915922-5-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/linux/firewire.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/firewire.h b/include/linux/firewire.h index aeb71c39e57e..8d6801cf2fca 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -88,11 +88,15 @@ struct fw_card { int node_id; int generation; - int current_tlabel; - u64 tlabel_mask; - struct list_head transaction_list; u64 reset_jiffies; + struct { + int current_tlabel; + u64 tlabel_mask; + struct list_head list; + spinlock_t lock; + } transactions; + u32 split_timeout_hi; u32 split_timeout_lo; unsigned int split_timeout_cycles; -- cgit v1.2.3 From b5725cfa4120a4d234ab112aad151d731531d093 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Tue, 16 Sep 2025 08:47:46 +0900 Subject: firewire: core: use spin lock specific to timer for split transaction At present the parameters to compute timeout time for split transaction is protected by card-wide spin lock, while it is not necessarily convenient in a point to narrower critical section. This commit adds and uses another spin lock specific for the purpose. Link: https://lore.kernel.org/r/20250915234747.915922-6-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/linux/firewire.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/firewire.h b/include/linux/firewire.h index 8d6801cf2fca..6d208769d456 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -97,18 +97,21 @@ struct fw_card { spinlock_t lock; } transactions; - u32 split_timeout_hi; - u32 split_timeout_lo; - unsigned int split_timeout_cycles; - unsigned int split_timeout_jiffies; + struct { + u32 hi; + u32 lo; + unsigned int cycles; + unsigned int jiffies; + spinlock_t lock; + } split_timeout; unsigned long long guid; unsigned max_receive; int link_speed; int config_rom_generation; - spinlock_t lock; /* Take this lock when handling the lists in - * this struct. */ + spinlock_t lock; + struct fw_node *local_node; struct fw_node *root_node; struct fw_node *irm_node; -- cgit v1.2.3 From 8c01cc2382bc351672c8eb946c16f567cfffef60 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Wed, 10 Sep 2025 21:41:22 +0100 Subject: dt-bindings: net: pcs: renesas,rzn1-miic: Add RZ/T2H and RZ/N2H support Add device tree binding support for RZ/T2H and RZ/N2H SoCs to the existing RZ/N1 MIIC converter binding. These SoCs share similar MIIC functionality but have architectural differences that require schema updates. Add new compatible strings "renesas,r9a09g077-miic" for RZ/T2H and "renesas,r9a09g087-miic" for RZ/N2H, with the latter falling back to the RZ/T2H variant. The new SoCs require reset support with two reset lines for converter register reset and converter reset, which are not present on RZ/N1. Update port configurations to accommodate the different architectures. RZ/N1 supports 5 ports numbered 1-5 with complex input mappings covering indices 0-13, while RZ/T2H and RZ/N2H support 4 ports numbered 0-3 with simplified input mappings covering indices 0-8. Extend the switch port configuration property to support value 0 for the new SoCs. Add a new dt-bindings header file with media interface connection matrix constants that map GMAC, ESC, and ETHSW ports to numeric identifiers for use with RZ/T2H and RZ/N2H device trees. Update DT schema validation to ensure proper port numbering and input mappings per SoC variant. Signed-off-by: Lad Prabhakar Tested-by: Wolfram Sang Reviewed-by: Rob Herring (Arm) Link: https://patch.msgid.link/20250910204132.319975-2-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Jakub Kicinski --- .../dt-bindings/net/renesas,r9a09g077-pcs-miic.h | 36 ++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 include/dt-bindings/net/renesas,r9a09g077-pcs-miic.h (limited to 'include') diff --git a/include/dt-bindings/net/renesas,r9a09g077-pcs-miic.h b/include/dt-bindings/net/renesas,r9a09g077-pcs-miic.h new file mode 100644 index 000000000000..43a2b5743a63 --- /dev/null +++ b/include/dt-bindings/net/renesas,r9a09g077-pcs-miic.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (C) 2025 Renesas Electronics Corporation. + */ + +#ifndef _DT_BINDINGS_RENASAS_R9A09G077_PCS_MIIC_H +#define _DT_BINDINGS_RENASAS_R9A09G077_PCS_MIIC_H + +/* + * Media Interface Connection Matrix + * =========================================================== + * + * Selects the function of the Media interface of the MAC to be used + * + * SW_MODE[2:0] | Port 0 | Port 1 | Port 2 | Port 3 + * -------------|-------------|-------------|-------------|------------- + * 000b | ETHSW Port0 | ETHSW Port1 | ETHSW Port2 | GMAC1 + * 001b | ESC Port0 | ESC Port1 | GMAC2 | GMAC1 + * 010b | ESC Port0 | ESC Port1 | ETHSW Port2 | GMAC1 + * 011b | ESC Port0 | ESC Port1 | ESC Port2 | GMAC1 + * 100b | ETHSW Port0 | ESC Port1 | ESC Port2 | GMAC1 + * 101b | ETHSW Port0 | ESC Port1 | ETHSW Port2 | GMAC1 + * 110b | ETHSW Port0 | ETHSW Port1 | GMAC2 | GMAC1 + * 111b | GMAC0 | GMAC1 | GMAC2 | - + */ +#define ETHSS_GMAC0_PORT 0 +#define ETHSS_GMAC1_PORT 1 +#define ETHSS_GMAC2_PORT 2 +#define ETHSS_ESC_PORT0 3 +#define ETHSS_ESC_PORT1 4 +#define ETHSS_ESC_PORT2 5 +#define ETHSS_ETHSW_PORT0 6 +#define ETHSS_ETHSW_PORT1 7 +#define ETHSS_ETHSW_PORT2 8 + +#endif -- cgit v1.2.3 From f9fadf23c7f1a0df72ef50a873e1bd3bd4631ec1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 4 Feb 2024 21:25:18 -0500 Subject: security_dentry_init_security(): constify qstr argument Nothing outside of fs/dcache.c has any business modifying dentry names; passing &dentry->d_name as an argument should have that argument declared as a const pointer. Acked-by: Casey Schaufler # smack part Acked-by: Paul Moore Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- include/linux/lsm_hook_defs.h | 2 +- include/linux/security.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index fd11fffdd3c3..aa4d6ec9c98b 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -85,7 +85,7 @@ LSM_HOOK(int, -EOPNOTSUPP, dentry_init_security, struct dentry *dentry, int mode, const struct qstr *name, const char **xattr_name, struct lsm_context *cp) LSM_HOOK(int, 0, dentry_create_files_as, struct dentry *dentry, int mode, - struct qstr *name, const struct cred *old, struct cred *new) + const struct qstr *name, const struct cred *old, struct cred *new) #ifdef CONFIG_SECURITY_PATH LSM_HOOK(int, 0, path_unlink, const struct path *dir, struct dentry *dentry) diff --git a/include/linux/security.h b/include/linux/security.h index 521bcb5b9717..3f694d3ebd70 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -391,7 +391,7 @@ int security_dentry_init_security(struct dentry *dentry, int mode, const char **xattr_name, struct lsm_context *lsmcxt); int security_dentry_create_files_as(struct dentry *dentry, int mode, - struct qstr *name, + const struct qstr *name, const struct cred *old, struct cred *new); int security_path_notify(const struct path *path, u64 mask, @@ -871,7 +871,7 @@ static inline int security_dentry_init_security(struct dentry *dentry, } static inline int security_dentry_create_files_as(struct dentry *dentry, - int mode, struct qstr *name, + int mode, const struct qstr *name, const struct cred *old, struct cred *new) { -- cgit v1.2.3 From ca97d6c60b1d1dff519a7e3dd86708304e657365 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 11 Jul 2025 05:45:01 -0400 Subject: generic_ci_validate_strict_name(): constify name argument Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- include/linux/fs.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index d7ab4f96d705..6dcfc1c399ca 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3719,7 +3719,8 @@ int generic_ci_d_compare(const struct dentry *dentry, unsigned int len, * happens when a directory is casefolded and the filesystem is strict * about its encoding. */ -static inline bool generic_ci_validate_strict_name(struct inode *dir, struct qstr *name) +static inline bool generic_ci_validate_strict_name(struct inode *dir, + const struct qstr *name) { if (!IS_CASEFOLDED(dir) || !sb_has_strict_encoding(dir->i_sb)) return true; @@ -3734,7 +3735,8 @@ static inline bool generic_ci_validate_strict_name(struct inode *dir, struct qst return !utf8_validate(dir->i_sb->s_encoding, name); } #else -static inline bool generic_ci_validate_strict_name(struct inode *dir, struct qstr *name) +static inline bool generic_ci_validate_strict_name(struct inode *dir, + const struct qstr *name) { return true; } -- cgit v1.2.3 From 180a9cc3fd6a020746fbd7f97b9b62295a325fd2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 9 Feb 2024 14:57:43 -0500 Subject: make it easier to catch those who try to modify ->d_name Turn d_name into an anon union of const struct qstr d_name with struct qstr __d_name. Very few places need to modify it (all in fs/dcache.c); those are switched to use of ->__d_name. Note that ->d_name can actually change under you unless you have the right locking environment; this const just prohibits accidentally doing stores without being easily spotted. Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- include/linux/dcache.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index cc3e1c1a3454..c83e02b94389 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -95,7 +95,10 @@ struct dentry { seqcount_spinlock_t d_seq; /* per dentry seqlock */ struct hlist_bl_node d_hash; /* lookup hash list */ struct dentry *d_parent; /* parent directory */ - struct qstr d_name; + union { + struct qstr __d_name; /* for use ONLY in fs/dcache.c */ + const struct qstr d_name; + }; struct inode *d_inode; /* Where the name belongs to - NULL is * negative */ union shortname_store d_shortname; -- cgit v1.2.3 From 2293c57484ae64c9a3c847c8807db8c26a3a4d41 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 12 Sep 2025 14:52:21 +0200 Subject: mptcp: pm: nl: announce deny-join-id0 flag During the connection establishment, a peer can tell the other one that it cannot establish new subflows to the initial IP address and port by setting the 'C' flag [1]. Doing so makes sense when the sender is behind a strict NAT, operating behind a legacy Layer 4 load balancer, or using anycast IP address for example. When this 'C' flag is set, the path-managers must then not try to establish new subflows to the other peer's initial IP address and port. The in-kernel PM has access to this info, but the userspace PM didn't. The RFC8684 [1] is strict about that: (...) therefore the receiver MUST NOT try to open any additional subflows toward this address and port. So it is important to tell the userspace about that as it is responsible for the respect of this flag. When a new connection is created and established, the Netlink events now contain the existing but not currently used 'flags' attribute. When MPTCP_PM_EV_FLAG_DENY_JOIN_ID0 is set, it means no other subflows to the initial IP address and port -- info that are also part of the event -- can be established. Link: https://datatracker.ietf.org/doc/html/rfc8684#section-3.1-20.6 [1] Fixes: 702c2f646d42 ("mptcp: netlink: allow userspace-driven subflow establishment") Reported-by: Marek Majkowski Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/532 Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250912-net-mptcp-pm-uspace-deny_join_id0-v1-2-40171884ade8@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/mptcp.h | 2 ++ include/uapi/linux/mptcp_pm.h | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 67d015df8893..5fd5b4cf75ca 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -31,6 +31,8 @@ #define MPTCP_INFO_FLAG_FALLBACK _BITUL(0) #define MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED _BITUL(1) +#define MPTCP_PM_EV_FLAG_DENY_JOIN_ID0 _BITUL(0) + #define MPTCP_PM_ADDR_FLAG_SIGNAL (1 << 0) #define MPTCP_PM_ADDR_FLAG_SUBFLOW (1 << 1) #define MPTCP_PM_ADDR_FLAG_BACKUP (1 << 2) diff --git a/include/uapi/linux/mptcp_pm.h b/include/uapi/linux/mptcp_pm.h index 6ac84b2f636c..7359d34da446 100644 --- a/include/uapi/linux/mptcp_pm.h +++ b/include/uapi/linux/mptcp_pm.h @@ -16,10 +16,10 @@ * good time to allocate memory and send ADD_ADDR if needed. Depending on the * traffic-patterns it can take a long time until the MPTCP_EVENT_ESTABLISHED * is sent. Attributes: token, family, saddr4 | saddr6, daddr4 | daddr6, - * sport, dport, server-side. + * sport, dport, server-side, [flags]. * @MPTCP_EVENT_ESTABLISHED: A MPTCP connection is established (can start new * subflows). Attributes: token, family, saddr4 | saddr6, daddr4 | daddr6, - * sport, dport, server-side. + * sport, dport, server-side, [flags]. * @MPTCP_EVENT_CLOSED: A MPTCP connection has stopped. Attribute: token. * @MPTCP_EVENT_ANNOUNCED: A new address has been announced by the peer. * Attributes: token, rem_id, family, daddr4 | daddr6 [, dport]. -- cgit v1.2.3 From dae575e669811b201114702d96f6854d5c8324b5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 4 Sep 2025 23:16:35 -0400 Subject: backing_file_user_path(): constify struct path * Callers never use the resulting pointer to modify the struct path it points to (nor should they). Reviewed-by: Jan Kara Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index d7ab4f96d705..3bcc878817be 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2879,7 +2879,7 @@ struct file *dentry_open_nonotify(const struct path *path, int flags, const struct cred *cred); struct file *dentry_create(const struct path *path, int flags, umode_t mode, const struct cred *cred); -struct path *backing_file_user_path(const struct file *f); +const struct path *backing_file_user_path(const struct file *f); /* * When mmapping a file on a stackable filesystem (e.g., overlayfs), the file -- cgit v1.2.3 From 63dbfb077cdad21b356e17d4ce76650e67b83159 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 6 Jul 2025 21:58:05 -0400 Subject: done_path_create(): constify path argument Reviewed-by: Jan Kara Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- include/linux/namei.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/namei.h b/include/linux/namei.h index 5d085428e471..75c0b665fbd4 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -60,7 +60,7 @@ extern int kern_path(const char *, unsigned, struct path *); extern struct dentry *kern_path_create(int, const char *, struct path *, unsigned int); extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int); -extern void done_path_create(struct path *, struct dentry *); +extern void done_path_create(const struct path *, struct dentry *); extern struct dentry *kern_path_locked(const char *, struct path *); extern struct dentry *kern_path_locked_negative(const char *, struct path *); extern struct dentry *user_path_locked_at(int , const char __user *, struct path *); -- cgit v1.2.3 From 2930afe2c9cb9aec329269e40c851bf56cdcc09c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 6 Jul 2025 21:32:41 -0400 Subject: export_operations->open(): constify path argument for the method and its sole instance... Reviewed-by: Jan Kara Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- include/linux/exportfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index cfb0dd1ea49c..f43c83e0b8c5 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -270,7 +270,7 @@ struct export_operations { int (*commit_blocks)(struct inode *inode, struct iomap *iomaps, int nr_iomaps, struct iattr *iattr); int (*permission)(struct handle_to_path_ctx *ctx, unsigned int oflags); - struct file * (*open)(struct path *path, unsigned int oflags); + struct file * (*open)(const struct path *path, unsigned int oflags); #define EXPORT_OP_NOWCC (0x1) /* don't collect v3 wcc data */ #define EXPORT_OP_NOSUBTREECHK (0x2) /* no subtree checking */ #define EXPORT_OP_CLOSE_BEFORE_UNLINK (0x4) /* close files before unlink */ -- cgit v1.2.3 From 1f6df5847454dee8608f78ee0df7352472cb2447 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 6 Jul 2025 18:45:02 -0400 Subject: drop_collected_paths(): constify arguments ... and use that to constify the pointers in callers Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- include/linux/mount.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mount.h b/include/linux/mount.h index 5f9c053b0897..c09032463b36 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -105,7 +105,7 @@ extern int may_umount(struct vfsmount *); int do_mount(const char *, const char __user *, const char *, unsigned long, void *); extern struct path *collect_paths(const struct path *, struct path *, unsigned); -extern void drop_collected_paths(struct path *, struct path *); +extern void drop_collected_paths(const struct path *, struct path *); extern void kern_unmount_array(struct vfsmount *mnt[], unsigned int num); extern int cifs_root_data(char **dev, char **opts); -- cgit v1.2.3 From b42ffcd5069d5cfb777b8982a1c55c7e2f1d3998 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Aug 2025 19:34:37 -0400 Subject: collect_paths(): constify the return value callers have no business modifying the paths they get Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- include/linux/mount.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mount.h b/include/linux/mount.h index c09032463b36..18e4b97f8a98 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -104,8 +104,8 @@ extern int may_umount_tree(struct vfsmount *); extern int may_umount(struct vfsmount *); int do_mount(const char *, const char __user *, const char *, unsigned long, void *); -extern struct path *collect_paths(const struct path *, struct path *, unsigned); -extern void drop_collected_paths(const struct path *, struct path *); +extern const struct path *collect_paths(const struct path *, struct path *, unsigned); +extern void drop_collected_paths(const struct path *, const struct path *); extern void kern_unmount_array(struct vfsmount *mnt[], unsigned int num); extern int cifs_root_data(char **dev, char **opts); -- cgit v1.2.3 From 880fcc329e2473ba02ffbc446fcd403972ab1fca Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Tue, 9 Sep 2025 00:03:24 -0700 Subject: drivers/perf: riscv: Export PMU event info function The event mapping function can be used in event info function to find out the corresponding SBI PMU event encoding during the get_event_info function as well. Refactor and export it so that it can be invoked from kvm and internal driver. Signed-off-by: Atish Patra Reviewed-by: Anup Patel Acked-by: Paul Walmsley Link: https://lore.kernel.org/r/20250909-pmu_event_info-v6-5-d8f80cacb884@rivosinc.com Signed-off-by: Anup Patel --- include/linux/perf/riscv_pmu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h index 701974639ff2..f82a28040594 100644 --- a/include/linux/perf/riscv_pmu.h +++ b/include/linux/perf/riscv_pmu.h @@ -89,6 +89,7 @@ static inline void riscv_pmu_legacy_skip_init(void) {}; struct riscv_pmu *riscv_pmu_alloc(void); #ifdef CONFIG_RISCV_PMU_SBI int riscv_pmu_get_hpm_info(u32 *hw_ctr_width, u32 *num_hw_ctr); +int riscv_pmu_get_event_info(u32 type, u64 config, u64 *econfig); #endif #endif /* CONFIG_RISCV_PMU */ -- cgit v1.2.3 From 94deac977fbd0246c971b4f1d17a6385f5e0b1a4 Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Mon, 1 Sep 2025 10:52:04 +0000 Subject: fs: add an enum for number of life time hints Add WRITE_LIFE_HINT_NR into the rw_hint enum to define the number of values write life time hints can be set to. This is useful for e.g. file systems which may want to map these values to allocation groups. Signed-off-by: Hans Holmberg Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino --- include/linux/rw_hint.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/rw_hint.h b/include/linux/rw_hint.h index 309ca72f2dfb..adcc43042c90 100644 --- a/include/linux/rw_hint.h +++ b/include/linux/rw_hint.h @@ -14,6 +14,7 @@ enum rw_hint { WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM, WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG, WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME, + WRITE_LIFE_HINT_NR, } __packed; /* Sparse ignores __packed annotations on enums, hence the #ifndef below. */ -- cgit v1.2.3 From 51dad33ede63618a6b425c650f3042d85e646dac Mon Sep 17 00:00:00 2001 From: Ming Yu Date: Fri, 12 Sep 2025 17:19:46 +0800 Subject: mfd: Add core driver for Nuvoton NCT6694 The Nuvoton NCT6694 provides an USB interface to the host to access its features. Sub-devices can use the USB functions nct6694_read_msg() and nct6694_write_msg() to issue a command. They can also request interrupt that will be called when the USB device receives its interrupt pipe. Signed-off-by: Ming Yu Link: https://lore.kernel.org/r/20250912091952.1169369-2-a0282524688@gmail.com Signed-off-by: Lee Jones --- include/linux/mfd/nct6694.h | 102 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 include/linux/mfd/nct6694.h (limited to 'include') diff --git a/include/linux/mfd/nct6694.h b/include/linux/mfd/nct6694.h new file mode 100644 index 000000000000..6eb9be2cd4a0 --- /dev/null +++ b/include/linux/mfd/nct6694.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2025 Nuvoton Technology Corp. + * + * Nuvoton NCT6694 USB transaction and data structure. + */ + +#ifndef __MFD_NCT6694_H +#define __MFD_NCT6694_H + +#define NCT6694_VENDOR_ID 0x0416 +#define NCT6694_PRODUCT_ID 0x200B +#define NCT6694_INT_IN_EP 0x81 +#define NCT6694_BULK_IN_EP 0x02 +#define NCT6694_BULK_OUT_EP 0x03 + +#define NCT6694_HCTRL_SET 0x40 +#define NCT6694_HCTRL_GET 0x80 + +#define NCT6694_URB_TIMEOUT 1000 + +enum nct6694_irq_id { + NCT6694_IRQ_GPIO0 = 0, + NCT6694_IRQ_GPIO1, + NCT6694_IRQ_GPIO2, + NCT6694_IRQ_GPIO3, + NCT6694_IRQ_GPIO4, + NCT6694_IRQ_GPIO5, + NCT6694_IRQ_GPIO6, + NCT6694_IRQ_GPIO7, + NCT6694_IRQ_GPIO8, + NCT6694_IRQ_GPIO9, + NCT6694_IRQ_GPIOA, + NCT6694_IRQ_GPIOB, + NCT6694_IRQ_GPIOC, + NCT6694_IRQ_GPIOD, + NCT6694_IRQ_GPIOE, + NCT6694_IRQ_GPIOF, + NCT6694_IRQ_CAN0, + NCT6694_IRQ_CAN1, + NCT6694_IRQ_RTC, + NCT6694_NR_IRQS, +}; + +enum nct6694_response_err_status { + NCT6694_NO_ERROR = 0, + NCT6694_FORMAT_ERROR, + NCT6694_RESERVED1, + NCT6694_RESERVED2, + NCT6694_NOT_SUPPORT_ERROR, + NCT6694_NO_RESPONSE_ERROR, + NCT6694_TIMEOUT_ERROR, + NCT6694_PENDING, +}; + +struct __packed nct6694_cmd_header { + u8 rsv1; + u8 mod; + union __packed { + __le16 offset; + struct __packed { + u8 cmd; + u8 sel; + }; + }; + u8 hctrl; + u8 rsv2; + __le16 len; +}; + +struct __packed nct6694_response_header { + u8 sequence_id; + u8 sts; + u8 reserved[4]; + __le16 len; +}; + +union __packed nct6694_usb_msg { + struct nct6694_cmd_header cmd_header; + struct nct6694_response_header response_header; +}; + +struct nct6694 { + struct device *dev; + struct ida gpio_ida; + struct ida i2c_ida; + struct ida canfd_ida; + struct ida wdt_ida; + struct irq_domain *domain; + struct mutex access_lock; + spinlock_t irq_lock; + struct urb *int_in_urb; + struct usb_device *udev; + union nct6694_usb_msg *usb_msg; + __le32 *int_buffer; + unsigned int irq_enable; +}; + +int nct6694_read_msg(struct nct6694 *nct6694, const struct nct6694_cmd_header *cmd_hd, void *buf); +int nct6694_write_msg(struct nct6694 *nct6694, const struct nct6694_cmd_header *cmd_hd, void *buf); + +#endif -- cgit v1.2.3 From 747436750bc0ef73be32391bd5d0d7dcd185da7f Mon Sep 17 00:00:00 2001 From: Ryan Wanner Date: Wed, 10 Sep 2025 09:20:38 -0700 Subject: ARM: at91: pm: Remove 2.5V regulator Remove 2.5V regulator since enabling and disabling this regulator is no longer supported. Signed-off-by: Ryan Wanner Link: https://lore.kernel.org/r/a6785a40648b315a07152bca261a42bbf0f356af.1757519351.git.Ryan.Wanner@microchip.com Signed-off-by: Nicolas Ferre --- include/soc/at91/sama7-sfrbu.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/soc/at91/sama7-sfrbu.h b/include/soc/at91/sama7-sfrbu.h index 76b740810d34..8cee48d1ae2c 100644 --- a/include/soc/at91/sama7-sfrbu.h +++ b/include/soc/at91/sama7-sfrbu.h @@ -18,13 +18,6 @@ #define AT91_SFRBU_PSWBU_SOFTSWITCH (1 << 1) /* Power switch BU source selection */ #define AT91_SFRBU_PSWBU_CTRL (1 << 0) /* Power switch BU control */ -#define AT91_SFRBU_25LDOCR (0x0C) /* SFRBU 2.5V LDO Control Register */ -#define AT91_SFRBU_25LDOCR_LDOANAKEY (0x3B6E18 << 8) /* Specific value mandatory to allow writing of other register bits. */ -#define AT91_SFRBU_25LDOCR_STATE (1 << 3) /* LDOANA Switch On/Off Control */ -#define AT91_SFRBU_25LDOCR_LP (1 << 2) /* LDOANA Low-Power Mode Control */ -#define AT91_SFRBU_PD_VALUE_MSK (0x3) -#define AT91_SFRBU_25LDOCR_PD_VALUE(v) ((v) & AT91_SFRBU_PD_VALUE_MSK) /* LDOANA Pull-down value */ - #define AT91_FRBU_DDRPWR (0x10) /* SFRBU DDR Power Control Register */ #define AT91_FRBU_DDRPWR_STATE (1 << 0) /* DDR Power Mode State */ -- cgit v1.2.3 From f8d9e56aeb87ce82ce8636cd176cc59b69aa0e41 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Fri, 22 Aug 2025 13:56:27 +0300 Subject: i3c: master: Add helpers for DMA mapping and bounce buffer handling Some I3C controllers such as MIPI I3C HCI may pad the last DWORD (32-bit) with stale data from the RX FIFO in DMA transfers if the receive length is not DWORD aligned and when the device DMA is IOMMU mapped. In such a case, a properly sized bounce buffer is required in order to avoid possible data corruption. In a review discussion, proposal was to have a common helpers in I3C core for DMA mapping and bounce buffer handling. Drivers may use the helper i3c_master_dma_map_single() to map a buffer for a DMA transfer. It internally allocates a bounce buffer if buffer is not DMA'able or when the driver requires it for a transfer. Helper i3c_master_dma_unmap_single() does the needed cleanups and data copying from the bounce buffer. Signed-off-by: Jarkko Nikula Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250822105630.2820009-2-jarkko.nikula@linux.intel.com Signed-off-by: Alexandre Belloni --- include/linux/i3c/master.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include') diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h index 043f5c7ff398..c52a82dd79a6 100644 --- a/include/linux/i3c/master.h +++ b/include/linux/i3c/master.h @@ -558,6 +558,26 @@ struct i3c_master_controller { #define i3c_bus_for_each_i3cdev(bus, dev) \ list_for_each_entry(dev, &(bus)->devs.i3c, common.node) +/** + * struct i3c_dma - DMA transfer and mapping descriptor + * @dev: device object of a device doing DMA + * @buf: destination/source buffer for DMA + * @len: length of transfer + * @map_len: length of DMA mapping + * @addr: mapped DMA address for a Host Controller Driver + * @dir: DMA direction + * @bounce_buf: an allocated bounce buffer if transfer needs it or NULL + */ +struct i3c_dma { + struct device *dev; + void *buf; + size_t len; + size_t map_len; + dma_addr_t addr; + enum dma_data_direction dir; + void *bounce_buf; +}; + int i3c_master_do_i2c_xfers(struct i3c_master_controller *master, const struct i2c_msg *xfers, int nxfers); @@ -575,6 +595,12 @@ int i3c_master_get_free_addr(struct i3c_master_controller *master, int i3c_master_add_i3c_dev_locked(struct i3c_master_controller *master, u8 addr); int i3c_master_do_daa(struct i3c_master_controller *master); +struct i3c_dma *i3c_master_dma_map_single(struct device *dev, void *ptr, + size_t len, bool force_bounce, + enum dma_data_direction dir); +void i3c_master_dma_unmap_single(struct i3c_dma *dma_xfer); +DEFINE_FREE(i3c_master_dma_unmap_single, void *, + if (_T) i3c_master_dma_unmap_single(_T)) int i3c_master_set_info(struct i3c_master_controller *master, const struct i3c_device_info *info); -- cgit v1.2.3 From 3baeae36039afc233d4a42d6ff4aa7019892619f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 29 Aug 2025 16:10:57 +0300 Subject: PCI: Use pci_release_resource() instead of release_resource() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A few places in setup-bus.c call release_resource() directly and end up duplicating functionality from pci_release_resource() such as parent check, logging, and clearing the resource. Worse yet, the way the resource is cleared is inconsistent between different sites. Convert release_resource() calls into pci_release_resource() to remove code duplication. This will also make the resource start, end, and flags behavior consistent, i.e., start address is cleared, and only IORESOURCE_UNSET is asserted for the resource. While at it, eliminate the unnecessary initialization of idx variable in pci_bridge_release_resources(). Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20250829131113.36754-9-ilpo.jarvinen@linux.intel.com --- include/linux/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 59876de13860..275df4058767 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1417,7 +1417,7 @@ void pci_reset_secondary_bus(struct pci_dev *dev); void pcibios_reset_secondary_bus(struct pci_dev *dev); void pci_update_resource(struct pci_dev *dev, int resno); int __must_check pci_assign_resource(struct pci_dev *dev, int i); -void pci_release_resource(struct pci_dev *dev, int resno); +int pci_release_resource(struct pci_dev *dev, int resno); static inline int pci_rebar_bytes_to_size(u64 bytes) { bytes = roundup_pow_of_two(bytes); -- cgit v1.2.3 From 4292a1e45fd464551efac7b2b52fd3606e956c28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 29 Aug 2025 16:11:09 +0300 Subject: PCI: Refactor distributing available memory to use loops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pci_bus_distribute_available_resources() and pci_bridge_distribute_available_resources() retain bridge window resources and related data needed for distributing the available window in independent variables for io, memory, and prefetchable memory windows. The code is essentially the same for all of them and therefore repeated three times with different variable names. Refactor pci_bus_distribute_available_resources() to take an array. This is complicated slightly by the function taking advantage of passing the struct as value, which cannot be done for arrays in C. Therefore, copy the data into a local array in the stack in the first loop. Variable names are (hopefully) improved slightly as well. Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20250829131113.36754-21-ilpo.jarvinen@linux.intel.com --- include/linux/pci.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 275df4058767..723e9cede69d 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -119,7 +119,8 @@ enum { #define PCI_CB_BRIDGE_MEM_1_WINDOW (PCI_BRIDGE_RESOURCES + 3) /* Total number of bridge resources for P2P and CardBus */ -#define PCI_BRIDGE_RESOURCE_NUM 4 +#define PCI_P2P_BRIDGE_RESOURCE_NUM 3 +#define PCI_BRIDGE_RESOURCE_NUM 4 /* Resources assigned to buses behind the bridge */ PCI_BRIDGE_RESOURCES, -- cgit v1.2.3 From 705d2ac7b2044f1ca05ba6033183151a04dbff4d Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 16 Sep 2025 15:28:02 +0100 Subject: io_uring/zcrx: allow synchronous buffer return Returning buffers via a ring is performant and convenient, but it becomes a problem when/if the user misconfigured the ring size and it becomes full. Add a synchronous way to return buffers back to the page pool via a new register opcode. It's supposed to be a reliable slow path for refilling. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 1ce17c535944..a0cc1cc0dd01 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -689,6 +689,9 @@ enum io_uring_register_op { /* query various aspects of io_uring, see linux/io_uring/query.h */ IORING_REGISTER_QUERY = 35, + /* return zcrx buffers back into circulation */ + IORING_REGISTER_ZCRX_REFILL = 36, + /* this goes last */ IORING_REGISTER_LAST, @@ -1070,6 +1073,15 @@ struct io_uring_zcrx_ifq_reg { __u64 __resv[3]; }; +struct io_uring_zcrx_sync_refill { + __u32 zcrx_id; + /* the number of entries to return */ + __u32 nr_entries; + /* pointer to an array of struct io_uring_zcrx_rqe */ + __u64 rqes; + __u64 __resv[2]; +}; + #ifdef __cplusplus } #endif -- cgit v1.2.3 From 5020d05b3476f3561377a4ab076d42fda00e3607 Mon Sep 17 00:00:00 2001 From: Huisong Li Date: Thu, 11 Sep 2025 19:24:06 +0800 Subject: ACPI: processor: Remove unused empty stubs of some functions Empty stubs are defined in processor.h for some functions provided by the ACPI processor idle driver, but those functions are only used in the main ACPI processor driver which requires the ACPI processor idle driver to be present (selecting CONFIG_ACPI_PROCESSOR causes CONFIG_ACPI_PROCESSOR_IDLE to be selected too automatically). This means that the empty stubs in question are not really necessary and if both CONFIG_ACPI_PROCESSOR and CONFIG_ACPI_PROCESSOR_IDLE are unset, the compiler complains that they are defined, but not used. Drop them to get rid of the compiler warning. Signed-off-by: Huisong Li Link: https://patch.msgid.link/20250911112408.1668431-2-lihuisong@huawei.com [ rjw: Subject and changelog rewrite ] Signed-off-by: Rafael J. Wysocki --- include/acpi/processor.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include') diff --git a/include/acpi/processor.h b/include/acpi/processor.h index ff864c1cee3a..2976a6d0c54f 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -425,26 +425,6 @@ int acpi_processor_power_state_has_changed(struct acpi_processor *pr); int acpi_processor_hotplug(struct acpi_processor *pr); void acpi_processor_register_idle_driver(void); void acpi_processor_unregister_idle_driver(void); -#else -static inline int acpi_processor_power_init(struct acpi_processor *pr) -{ - return -ENODEV; -} - -static inline int acpi_processor_power_exit(struct acpi_processor *pr) -{ - return -ENODEV; -} - -static inline int acpi_processor_power_state_has_changed(struct acpi_processor *pr) -{ - return -ENODEV; -} - -static inline int acpi_processor_hotplug(struct acpi_processor *pr) -{ - return -ENODEV; -} #endif /* CONFIG_ACPI_PROCESSOR_IDLE */ /* in processor_thermal.c */ -- cgit v1.2.3 From dadb3ebcf395ebee3626d88ac7e5e234f15bae2c Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Sun, 14 Sep 2025 15:44:26 +0200 Subject: workqueue: WQ_PERCPU added to alloc_workqueue users MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently if a user enqueue a work item using schedule_delayed_work() the used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to schedule_work() that is using system_wq and queue_work(), that makes use again of WORK_CPU_UNBOUND. This lack of consistentcy cannot be addressed without refactoring the API. alloc_workqueue() treats all queues as per-CPU by default, while unbound workqueues must opt-in via WQ_UNBOUND. This default is suboptimal: most workloads benefit from unbound queues, allowing the scheduler to place worker threads where they’re needed and reducing noise when CPUs are isolated. This patch adds a new WQ_PERCPU flag to explicitly request the use of the per-CPU behavior. Both flags coexist for one release cycle to allow callers to transition their calls. Once migration is complete, WQ_UNBOUND can be removed and unbound will become the implicit default. With the introduction of the WQ_PERCPU flag (equivalent to !WQ_UNBOUND), any alloc_workqueue() caller that doesn’t explicitly specify WQ_UNBOUND must now use WQ_PERCPU. All existing users have been updated accordingly. Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index b6834b7aee4b..71a9900c03c7 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -410,7 +410,7 @@ enum wq_flags { __WQ_LEGACY = 1 << 18, /* internal: create*_workqueue() */ /* BH wq only allows the following flags */ - __WQ_BH_ALLOWS = WQ_BH | WQ_HIGHPRI, + __WQ_BH_ALLOWS = WQ_BH | WQ_HIGHPRI | WQ_PERCPU, }; enum wq_consts { @@ -570,7 +570,7 @@ alloc_workqueue_lockdep_map(const char *fmt, unsigned int flags, int max_active, alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args) #define create_workqueue(name) \ - alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name)) + alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM | WQ_PERCPU, 1, (name)) #define create_freezable_workqueue(name) \ alloc_workqueue("%s", __WQ_LEGACY | WQ_FREEZABLE | WQ_UNBOUND | \ WQ_MEM_RECLAIM, 1, (name)) -- cgit v1.2.3 From 6b4be64fd9fec16418f365c2d8e47a7566e9eba5 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Mon, 15 Sep 2025 15:24:32 +0300 Subject: net/mlx5e: Harden uplink netdev access against device unbind The function mlx5_uplink_netdev_get() gets the uplink netdevice pointer from mdev->mlx5e_res.uplink_netdev. However, the netdevice can be removed and its pointer cleared when unbound from the mlx5_core.eth driver. This results in a NULL pointer, causing a kernel panic. BUG: unable to handle page fault for address: 0000000000001300 at RIP: 0010:mlx5e_vport_rep_load+0x22a/0x270 [mlx5_core] Call Trace: mlx5_esw_offloads_rep_load+0x68/0xe0 [mlx5_core] esw_offloads_enable+0x593/0x910 [mlx5_core] mlx5_eswitch_enable_locked+0x341/0x420 [mlx5_core] mlx5_devlink_eswitch_mode_set+0x17e/0x3a0 [mlx5_core] devlink_nl_eswitch_set_doit+0x60/0xd0 genl_family_rcv_msg_doit+0xe0/0x130 genl_rcv_msg+0x183/0x290 netlink_rcv_skb+0x4b/0xf0 genl_rcv+0x24/0x40 netlink_unicast+0x255/0x380 netlink_sendmsg+0x1f3/0x420 __sock_sendmsg+0x38/0x60 __sys_sendto+0x119/0x180 do_syscall_64+0x53/0x1d0 entry_SYSCALL_64_after_hwframe+0x4b/0x53 Ensure the pointer is valid before use by checking it for NULL. If it is valid, immediately call netdev_hold() to take a reference, and preventing the netdevice from being freed while it is in use. Fixes: 7a9fb35e8c3a ("net/mlx5e: Do not reload ethernet ports when changing eswitch mode") Signed-off-by: Jianbo Liu Reviewed-by: Cosmin Ratiu Reviewed-by: Jiri Pirko Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1757939074-617281-2-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 8c5fbfb85749..10fe492e1fed 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -663,6 +663,7 @@ struct mlx5e_resources { bool tisn_valid; } hw_objs; struct net_device *uplink_netdev; + netdevice_tracker tracker; struct mutex uplink_netdev_lock; struct mlx5_crypto_dek_priv *dek_priv; }; -- cgit v1.2.3 From fb1f4568346153d2f80fdb4ffcfa0cf4fb257d3c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 9 Sep 2025 12:06:07 -0700 Subject: scsi: ufs: core: Disable timestamp functionality if not supported Some Kioxia UFS 4 devices do not support the qTimestamp attribute. Set the UFS_DEVICE_QUIRK_NO_TIMESTAMP_SUPPORT for these devices such that no error messages appear in the kernel log about failures to set the qTimestamp attribute. Signed-off-by: Bart Van Assche Reviewed-by: Avri Altman Tested-by: Nitin Rawat # on SM8650-QRD Reviewed-by: Nitin Rawat Reviewed-by: Peter Wang Reviewed-by: Manivannan Sadhasivam Message-ID: <20250909190614.3531435-1-bvanassche@acm.org> Signed-off-by: Martin K. Petersen --- include/ufs/ufs_quirks.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/ufs/ufs_quirks.h b/include/ufs/ufs_quirks.h index f52de5ed1b3b..83563247c36c 100644 --- a/include/ufs/ufs_quirks.h +++ b/include/ufs/ufs_quirks.h @@ -113,4 +113,7 @@ struct ufs_dev_quirk { */ #define UFS_DEVICE_QUIRK_PA_HIBER8TIME (1 << 12) +/* Some UFS 4 devices do not support the qTimestamp attribute */ +#define UFS_DEVICE_QUIRK_NO_TIMESTAMP_SUPPORT (1 << 13) + #endif /* UFS_QUIRKS_H_ */ -- cgit v1.2.3 From ea6bb47fd6a4c5a332f9349c39bf7462e3e7a35b Mon Sep 17 00:00:00 2001 From: Alan Borzeszkowski Date: Wed, 27 Aug 2025 13:56:46 +0200 Subject: thunderbolt: Update thunderbolt.h header file Make Thunderbolt header file compliant with current kernel-doc standards. No functional changes. Signed-off-by: Alan Borzeszkowski Signed-off-by: Mika Westerberg --- include/linux/thunderbolt.h | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 75247486616b..0ba112175bb3 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -213,7 +213,7 @@ enum tb_link_width { * queried first * @service_ids: Used to generate IDs for the services * @in_hopids: Input HopIDs for DMA tunneling - * @out_hopids; Output HopIDs for DMA tunneling + * @out_hopids: Output HopIDs for DMA tunneling * @local_property_block: Local block of properties * @local_property_block_gen: Generation of @local_property_block * @local_property_block_len: Length of the @local_property_block in dwords @@ -356,7 +356,7 @@ int tb_xdomain_request(struct tb_xdomain *xd, const void *request, unsigned int timeout_msec); /** - * tb_protocol_handler - Protocol specific handler + * struct tb_protocol_handler - Protocol specific handler * @uuid: XDomain messages with this UUID are dispatched to this handler * @callback: Callback called with the XDomain message. Returning %1 * here tells the XDomain core that the message was handled @@ -437,7 +437,7 @@ static inline struct tb_service *tb_to_service(struct device *dev) } /** - * tb_service_driver - Thunderbolt service driver + * struct tb_service_driver - Thunderbolt service driver * @driver: Driver structure * @probe: Called when the driver is probed * @remove: Called when the driver is removed (optional) @@ -519,6 +519,7 @@ struct tb_nhi { * @head: Head of the ring (write next descriptor here) * @tail: Tail of the ring (complete next descriptor here) * @descriptors: Allocated descriptors for this ring + * @descriptors_dma: DMA address of descriptors for this ring * @queue: Queue holding frames to be transferred over this ring * @in_flight: Queue holding frames that are currently in flight * @work: Interrupt work structure @@ -571,12 +572,12 @@ typedef void (*ring_cb)(struct tb_ring *, struct ring_frame *, bool canceled); /** * enum ring_desc_flags - Flags for DMA ring descriptor - * %RING_DESC_ISOCH: Enable isonchronous DMA (Tx only) - * %RING_DESC_CRC_ERROR: In frame mode CRC check failed for the frame (Rx only) - * %RING_DESC_COMPLETED: Descriptor completed (set by NHI) - * %RING_DESC_POSTED: Always set this - * %RING_DESC_BUFFER_OVERRUN: RX buffer overrun - * %RING_DESC_INTERRUPT: Request an interrupt on completion + * @RING_DESC_ISOCH: Enable isonchronous DMA (Tx only) + * @RING_DESC_CRC_ERROR: In frame mode CRC check failed for the frame (Rx only) + * @RING_DESC_COMPLETED: Descriptor completed (set by NHI) + * @RING_DESC_POSTED: Always set this + * @RING_DESC_BUFFER_OVERRUN: RX buffer overrun + * @RING_DESC_INTERRUPT: Request an interrupt on completion */ enum ring_desc_flags { RING_DESC_ISOCH = 0x1, @@ -636,7 +637,7 @@ int __tb_ring_enqueue(struct tb_ring *ring, struct ring_frame *frame); * If ring_stop() is called after the packet has been enqueued * @frame->callback will be called with canceled set to true. * - * Return: Returns %-ESHUTDOWN if ring_stop has been called. Zero otherwise. + * Return: %-ESHUTDOWN if ring_stop() has been called, %0 otherwise. */ static inline int tb_ring_rx(struct tb_ring *ring, struct ring_frame *frame) { @@ -657,7 +658,7 @@ static inline int tb_ring_rx(struct tb_ring *ring, struct ring_frame *frame) * If ring_stop() is called after the packet has been enqueued @frame->callback * will be called with canceled set to true. * - * Return: Returns %-ESHUTDOWN if ring_stop has been called. Zero otherwise. + * Return: %-ESHUTDOWN if ring_stop has been called, %0 otherwise. */ static inline int tb_ring_tx(struct tb_ring *ring, struct ring_frame *frame) { @@ -675,6 +676,8 @@ void tb_ring_poll_complete(struct tb_ring *ring); * * Use this function when you are mapping DMA for buffers that are * passed to the ring for sending/receiving. + * + * Return: Pointer to device used for DMA mapping. */ static inline struct device *tb_ring_dma_device(struct tb_ring *ring) { -- cgit v1.2.3 From 29589343488e116ac31f6f3cfa83e43949a2207a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 8 Sep 2025 23:32:30 +0200 Subject: asm-generic: Provide generic TIF infrastructure Common TIF bits do not have to be defined by every architecture. They can be defined in a generic header. That allows adding generic TIF bits without chasing a gazillion of architecture headers, which is again a unjustified burden on anyone who works on generic infrastructure as it always needs a boat load of work to keep existing architecture code working when adding new stuff. While it is not as horrible as the ignorance of the generic entry infrastructure, it is a welcome mechanism to make architecture people rethink their approach of just leaching generic improvements into architecture code and thereby making it accumulatingly harder to maintain and improve generic code. It's about time that this changes. Provide the infrastructure and split the TIF space in half, 16 generic and 16 architecture specific bits. This could probably be extended by TIF_SINGLESTEP and BLOCKSTEP, but those are only used in architecture specific code. So leave them alone for now. Signed-off-by: Thomas Gleixner Reviewed-by: Mathieu Desnoyers Acked-by: Arnd Bergmann --- include/asm-generic/thread_info_tif.h | 48 +++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 include/asm-generic/thread_info_tif.h (limited to 'include') diff --git a/include/asm-generic/thread_info_tif.h b/include/asm-generic/thread_info_tif.h new file mode 100644 index 000000000000..ee3793e9b1a4 --- /dev/null +++ b/include/asm-generic/thread_info_tif.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_GENERIC_THREAD_INFO_TIF_H_ +#define _ASM_GENERIC_THREAD_INFO_TIF_H_ + +#include + +/* Bits 16-31 are reserved for architecture specific purposes */ + +#define TIF_NOTIFY_RESUME 0 // callback before returning to user +#define _TIF_NOTIFY_RESUME BIT(TIF_NOTIFY_RESUME) + +#define TIF_SIGPENDING 1 // signal pending +#define _TIF_SIGPENDING BIT(TIF_SIGPENDING) + +#define TIF_NOTIFY_SIGNAL 2 // signal notifications exist +#define _TIF_NOTIFY_SIGNAL BIT(TIF_NOTIFY_SIGNAL) + +#define TIF_MEMDIE 3 // is terminating due to OOM killer +#define _TIF_MEMDIE BIT(TIF_MEMDIE) + +#define TIF_NEED_RESCHED 4 // rescheduling necessary +#define _TIF_NEED_RESCHED BIT(TIF_NEED_RESCHED) + +#ifdef HAVE_TIF_NEED_RESCHED_LAZY +# define TIF_NEED_RESCHED_LAZY 5 // Lazy rescheduling needed +# define _TIF_NEED_RESCHED_LAZY BIT(TIF_NEED_RESCHED_LAZY) +#endif + +#ifdef HAVE_TIF_POLLING_NRFLAG +# define TIF_POLLING_NRFLAG 6 // idle is polling for TIF_NEED_RESCHED +# define _TIF_POLLING_NRFLAG BIT(TIF_POLLING_NRFLAG) +#endif + +#define TIF_USER_RETURN_NOTIFY 7 // notify kernel of userspace return +#define _TIF_USER_RETURN_NOTIFY BIT(TIF_USER_RETURN_NOTIFY) + +#define TIF_UPROBE 8 // breakpointed or singlestepping +#define _TIF_UPROBE BIT(TIF_UPROBE) + +#define TIF_PATCH_PENDING 9 // pending live patching update +#define _TIF_PATCH_PENDING BIT(TIF_PATCH_PENDING) + +#ifdef HAVE_TIF_RESTORE_SIGMASK +# define TIF_RESTORE_SIGMASK 10 // Restore signal mask in do_signal() */ +# define _TIF_RESTORE_SIGMASK BIT(TIF_RESTORE_SIGMASK) +#endif + +#endif /* _ASM_GENERIC_THREAD_INFO_TIF_H_ */ -- cgit v1.2.3 From de2be98541dbe0de58d2dccf7fa19dfc9d9a8260 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Thu, 11 Sep 2025 10:10:17 +0300 Subject: net/mlx5: Remove VLAN insertion fields from WQE Ether segment Now that the driver no longer uses VLAN TX insertion via the WQE Ethernet segment, the related fields and flags can be removed. Signed-off-by: Carolina Jubran Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1757574619-604874-2-git-send-email-tariqt@nvidia.com Reviewed-by: Simon Horman Signed-off-by: Leon Romanovsky --- include/linux/mlx5/qp.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index fc7eeff99a8a..5546c7bd2c83 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -237,13 +237,11 @@ enum { }; enum { - MLX5_ETH_WQE_SVLAN = 1 << 0, MLX5_ETH_WQE_TRAILER_HDR_OUTER_IP_ASSOC = 1 << 26, MLX5_ETH_WQE_TRAILER_HDR_OUTER_L4_ASSOC = 1 << 27, MLX5_ETH_WQE_TRAILER_HDR_INNER_IP_ASSOC = 3 << 26, MLX5_ETH_WQE_TRAILER_HDR_INNER_L4_ASSOC = 1 << 28, MLX5_ETH_WQE_INSERT_TRAILER = 1 << 30, - MLX5_ETH_WQE_INSERT_VLAN = 1 << 15, }; enum { @@ -275,10 +273,6 @@ struct mlx5_wqe_eth_seg { DECLARE_FLEX_ARRAY(u8, data); }; } inline_hdr; - struct { - __be16 type; - __be16 vlan_tci; - } insert; __be32 trailer; }; }; -- cgit v1.2.3 From cce65f32443b61db2370a67d2e92d16b773fe8a4 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Thu, 11 Sep 2025 10:10:18 +0300 Subject: net/mlx5: Refactor MACsec WQE metadata shifts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce MLX5_ETH_WQE_FT_META_SHIFT as a shared base offset for features that use the lower 8 bits of the WQE flow_table_metadata field, currently used for timestamping, IPsec, and MACsec. Define MLX5_ETH_WQE_FT_META_MACSEC_FS_ID_MASK so that fs_id occupies bits 2–5, making it clear that fs_id occupies bits in the metadata. Set MLX5_ETH_WQE_FT_META_MACSEC_MASK as the OR of the MACsec flag and MLX5_ETH_WQE_FT_META_MACSEC_FS_ID_MASK, corresponding to the original 0x3E mask. Update the fs_id macro to right-shift the MACsec flag by MLX5_ETH_WQE_FT_META_SHIFT and update the RoCE modify-header action to use it. Introduce the helper macro MLX5_MACSEC_TX_METADATA(fs_id) to compose the full shifted MACsec metadata value. These changes make it explicit exactly which metadata bits carry MACsec information, simplifying future feature exclusions when multiple features share the WQE flowtable metadata. In addition, drop the incorrect “RX flow steering” comment, since this applies to TX flow steering. Signed-off-by: Carolina Jubran Reviewed-by: Jianbo Liu Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1757574619-604874-3-git-send-email-tariqt@nvidia.com Reviewed-by: Simon Horman Signed-off-by: Leon Romanovsky --- include/linux/mlx5/qp.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 5546c7bd2c83..b21be7630575 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -251,9 +251,14 @@ enum { MLX5_ETH_WQE_SWP_OUTER_L4_UDP = 1 << 5, }; +/* Base shift for metadata bits used by timestamping, IPsec, and MACsec */ +#define MLX5_ETH_WQE_FT_META_SHIFT 0 + enum { - MLX5_ETH_WQE_FT_META_IPSEC = BIT(0), - MLX5_ETH_WQE_FT_META_MACSEC = BIT(1), + MLX5_ETH_WQE_FT_META_IPSEC = BIT(0) << MLX5_ETH_WQE_FT_META_SHIFT, + MLX5_ETH_WQE_FT_META_MACSEC = BIT(1) << MLX5_ETH_WQE_FT_META_SHIFT, + MLX5_ETH_WQE_FT_META_MACSEC_FS_ID_MASK = + GENMASK(5, 2) << MLX5_ETH_WQE_FT_META_SHIFT, }; struct mlx5_wqe_eth_seg { -- cgit v1.2.3 From 2ac207381c37eebc49559634ce5642119784bc7c Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Thu, 11 Sep 2025 10:10:19 +0300 Subject: net/mlx5e: Prevent WQE metadata conflicts between timestamping and offloads Update the WQE metadata assignment to avoid overriding existing metadata when setting the sysport timestamp ID. Since timestamp IDs are limited to 256 values, they use only the lower 8 bits of the metadata field. To avoid conflicts, move IPsec and MACsec metadata ID to bits 8 and 9, and shift the MACsec fs_id accordingly. This ensures safe coexistence of timestamping and offload features that use the same metadata field. Signed-off-by: Carolina Jubran Reviewed-by: Jianbo Liu Reviewed-by: Patrisious Haddad Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1757574619-604874-4-git-send-email-tariqt@nvidia.com Reviewed-by: Simon Horman Signed-off-by: Leon Romanovsky --- include/linux/mlx5/qp.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index b21be7630575..d67aedc6ea68 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -251,8 +251,9 @@ enum { MLX5_ETH_WQE_SWP_OUTER_L4_UDP = 1 << 5, }; -/* Base shift for metadata bits used by timestamping, IPsec, and MACsec */ -#define MLX5_ETH_WQE_FT_META_SHIFT 0 +/* Metadata bits 0-7 are used by timestamping */ +/* Base shift for metadata bits used by IPsec and MACsec */ +#define MLX5_ETH_WQE_FT_META_SHIFT 8 enum { MLX5_ETH_WQE_FT_META_IPSEC = BIT(0) << MLX5_ETH_WQE_FT_META_SHIFT, -- cgit v1.2.3 From 0b1619c38600fc06c73b1f59c64af0b7df08fc2c Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 15 Sep 2025 11:10:07 +0200 Subject: gpio: nomadik: fix the debugfs helper stub Commit ddeb66d2cb10 ("gpio: nomadik: don't print out global GPIO numbers in debugfs callbacks") failed to also update the stub of the debugfs helper for !CONFIG_DEBUG_FS. Fix the resulting build failure. Fixes: ddeb66d2cb10 ("gpio: nomadik: don't print out global GPIO numbers in debugfs callbacks") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202509132232.12viPUPB-lkp@intel.com/ Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20250915091007.28438-1-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/gpio-nomadik.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/gpio/gpio-nomadik.h b/include/linux/gpio/gpio-nomadik.h index 7ba53b499e16..592a774a53cd 100644 --- a/include/linux/gpio/gpio-nomadik.h +++ b/include/linux/gpio/gpio-nomadik.h @@ -268,8 +268,7 @@ void nmk_gpio_dbg_show_one(struct seq_file *s, struct pinctrl_dev *pctldev, static inline void nmk_gpio_dbg_show_one(struct seq_file *s, struct pinctrl_dev *pctldev, struct gpio_chip *chip, - unsigned int offset, - unsigned int gpio) + unsigned int offset) { } -- cgit v1.2.3 From 75d5546f60b36900051d75ee623fceccbeb6750c Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Fri, 12 Sep 2025 18:58:51 +0200 Subject: HID: hidraw: tighten ioctl command parsing The handling for variable-length ioctl commands in hidraw_ioctl() is rather complex and the check for the data direction is incomplete. Simplify this code by factoring out the various ioctls grouped by dir and size, and using a switch() statement with the size masked out, to ensure the rest of the command is correctly matched. Fixes: 9188e79ec3fd ("HID: add phys and name ioctls to hidraw") Reported-by: Arnd Bergmann Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- include/uapi/linux/hidraw.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/hidraw.h b/include/uapi/linux/hidraw.h index d5ee269864e0..ebd701b3c18d 100644 --- a/include/uapi/linux/hidraw.h +++ b/include/uapi/linux/hidraw.h @@ -48,6 +48,8 @@ struct hidraw_devinfo { #define HIDIOCGOUTPUT(len) _IOC(_IOC_WRITE|_IOC_READ, 'H', 0x0C, len) #define HIDIOCREVOKE _IOW('H', 0x0D, int) /* Revoke device access */ +#define HIDIOCTL_LAST _IOC_NR(HIDIOCREVOKE) + #define HIDRAW_FIRST_MINOR 0 #define HIDRAW_MAX_DEVICES 64 /* number of reports to buffer */ -- cgit v1.2.3 From d1dd75c6500c74b91c5286fd3277710371d3e3ca Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Sat, 13 Sep 2025 16:12:54 +0000 Subject: HID: core: Change hid_driver to use a const char* for name name is never mutated by the core HID stack. Making name a const char* simplifies passing the string from Rust to C. Otherwise, it becomes difficult to pass a 'static lifetime CStr from Rust to a char*, rather than a const char*, due to lack of guarantee that the underlying data of the CStr will not be mutated by the C code. Signed-off-by: Rahul Rameshbabu Signed-off-by: Benjamin Tissoires --- include/linux/hid.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index 2cc4f1e4ea96..426b22ed42b4 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -816,7 +816,7 @@ struct hid_usage_id { * zero from them. */ struct hid_driver { - char *name; + const char *name; const struct hid_device_id *id_table; struct list_head dyn_list; -- cgit v1.2.3 From 648dbccc03a000cd64c2a9d86012d98053545e64 Mon Sep 17 00:00:00 2001 From: Ashish Kalra Date: Tue, 16 Sep 2025 21:29:49 +0000 Subject: crypto: ccp - Add AMD Seamless Firmware Servicing (SFS) driver AMD Seamless Firmware Servicing (SFS) is a secure method to allow non-persistent updates to running firmware and settings without requiring BIOS reflash and/or system reset. SFS does not address anything that runs on the x86 processors and it can be used to update ASP firmware, modules, register settings and update firmware for other microprocessors like TMPM, etc. SFS driver support adds ioctl support to communicate the SFS commands to the ASP/PSP by using the TEE mailbox interface. The Seamless Firmware Servicing (SFS) driver is added as a PSP sub-device. For detailed information, please look at the SFS specifications: https://www.amd.com/content/dam/amd/en/documents/epyc-technical-docs/specifications/58604.pdf Signed-off-by: Ashish Kalra Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Tom Lendacky Acked-by: Herbert Xu Link: https://lore.kernel.org/cover.1758057691.git.ashish.kalra@amd.com --- include/linux/psp-platform-access.h | 2 + include/uapi/linux/psp-sfs.h | 87 +++++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+) create mode 100644 include/uapi/linux/psp-sfs.h (limited to 'include') diff --git a/include/linux/psp-platform-access.h b/include/linux/psp-platform-access.h index 1504fb012c05..540abf7de048 100644 --- a/include/linux/psp-platform-access.h +++ b/include/linux/psp-platform-access.h @@ -7,6 +7,8 @@ enum psp_platform_access_msg { PSP_CMD_NONE = 0x0, + PSP_SFS_GET_FW_VERSIONS, + PSP_SFS_UPDATE, PSP_CMD_HSTI_QUERY = 0x14, PSP_I2C_REQ_BUS_CMD = 0x64, PSP_DYNAMIC_BOOST_GET_NONCE, diff --git a/include/uapi/linux/psp-sfs.h b/include/uapi/linux/psp-sfs.h new file mode 100644 index 000000000000..94e51670383c --- /dev/null +++ b/include/uapi/linux/psp-sfs.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * Userspace interface for AMD Seamless Firmware Servicing (SFS) + * + * Copyright (C) 2025 Advanced Micro Devices, Inc. + * + * Author: Ashish Kalra + */ + +#ifndef __PSP_SFS_USER_H__ +#define __PSP_SFS_USER_H__ + +#include + +/** + * SFS: AMD Seamless Firmware Support (SFS) interface + */ + +#define PAYLOAD_NAME_SIZE 64 +#define TEE_EXT_CMD_BUFFER_SIZE 4096 + +/** + * struct sfs_user_get_fw_versions - get current level of base firmware (output). + * @blob: current level of base firmware for ASP and patch levels (input/output). + * @sfs_status: 32-bit SFS status value (output). + * @sfs_extended_status: 32-bit SFS extended status value (output). + */ +struct sfs_user_get_fw_versions { + __u8 blob[TEE_EXT_CMD_BUFFER_SIZE]; + __u32 sfs_status; + __u32 sfs_extended_status; +} __packed; + +/** + * struct sfs_user_update_package - update SFS package (input). + * @payload_name: name of SFS package to load, verify and execute (input). + * @sfs_status: 32-bit SFS status value (output). + * @sfs_extended_status: 32-bit SFS extended status value (output). + */ +struct sfs_user_update_package { + char payload_name[PAYLOAD_NAME_SIZE]; + __u32 sfs_status; + __u32 sfs_extended_status; +} __packed; + +/** + * Seamless Firmware Support (SFS) IOC + * + * possible return codes for all SFS IOCTLs: + * 0: success + * -EINVAL: invalid input + * -E2BIG: excess data passed + * -EFAULT: failed to copy to/from userspace + * -EBUSY: mailbox in recovery or in use + * -ENODEV: driver not bound with PSP device + * -EACCES: request isn't authorized + * -EINVAL: invalid parameter + * -ETIMEDOUT: request timed out + * -EAGAIN: invalid request for state machine + * -ENOENT: not implemented + * -ENFILE: overflow + * -EPERM: invalid signature + * -EIO: PSP I/O error + */ +#define SFS_IOC_TYPE 'S' + +/** + * SFSIOCFWVERS - returns blob containing FW versions + * ASP provides the current level of Base Firmware for the ASP + * and the other microprocessors as well as current patch + * level(s). + */ +#define SFSIOCFWVERS _IOWR(SFS_IOC_TYPE, 0x1, struct sfs_user_get_fw_versions) + +/** + * SFSIOCUPDATEPKG - updates package/payload + * ASP loads, verifies and executes the SFS package. + * By default, the SFS package/payload is loaded from + * /lib/firmware/amd, but alternative firmware loading + * path can be specified using kernel parameter + * firmware_class.path or the firmware loading path + * can be customized using sysfs file: + * /sys/module/firmware_class/parameters/path. + */ +#define SFSIOCUPDATEPKG _IOWR(SFS_IOC_TYPE, 0x2, struct sfs_user_update_package) + +#endif /* __PSP_SFS_USER_H__ */ -- cgit v1.2.3 From 45fe729be9a6be326a1ca25af82d34de32ba2ce8 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 16 Sep 2025 10:16:20 +0800 Subject: usb: typec: Stub out typec_switch APIs when CONFIG_TYPEC=n Ease driver development by adding stubs for the typec_switch APIs when CONFIG_TYPEC=n. Copy the same method used for the typec_mux APIs to be consistent. Acked-by: Greg Kroah-Hartman Reviewed-by: Heikki Krogerus Signed-off-by: Stephen Boyd Signed-off-by: Xu Yang Link: https://lore.kernel.org/r/20250916021620.1303995-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec_mux.h | 46 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 41 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/usb/typec_mux.h b/include/linux/usb/typec_mux.h index 2489a7857d8e..aa9ebb7e2fe0 100644 --- a/include/linux/usb/typec_mux.h +++ b/include/linux/usb/typec_mux.h @@ -3,6 +3,7 @@ #ifndef __USB_TYPEC_MUX #define __USB_TYPEC_MUX +#include #include #include @@ -24,16 +25,13 @@ struct typec_switch_desc { void *drvdata; }; +#if IS_ENABLED(CONFIG_TYPEC) + struct typec_switch *fwnode_typec_switch_get(struct fwnode_handle *fwnode); void typec_switch_put(struct typec_switch *sw); int typec_switch_set(struct typec_switch *sw, enum typec_orientation orientation); -static inline struct typec_switch *typec_switch_get(struct device *dev) -{ - return fwnode_typec_switch_get(dev_fwnode(dev)); -} - struct typec_switch_dev * typec_switch_register(struct device *parent, const struct typec_switch_desc *desc); @@ -42,6 +40,44 @@ void typec_switch_unregister(struct typec_switch_dev *sw); void typec_switch_set_drvdata(struct typec_switch_dev *sw, void *data); void *typec_switch_get_drvdata(struct typec_switch_dev *sw); +#else + +static inline struct typec_switch * +fwnode_typec_switch_get(struct fwnode_handle *fwnode) +{ + return NULL; +} + +static inline void typec_switch_put(struct typec_switch *sw) {} + +static inline int typec_switch_set(struct typec_switch *sw, + enum typec_orientation orientation) +{ + return 0; +} + +static inline struct typec_switch_dev * +typec_switch_register(struct device *parent, + const struct typec_switch_desc *desc) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void typec_switch_unregister(struct typec_switch_dev *sw) {} + +static inline void typec_switch_set_drvdata(struct typec_switch_dev *sw, void *data) {} +static inline void *typec_switch_get_drvdata(struct typec_switch_dev *sw) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +#endif /* CONFIG_TYPEC */ + +static inline struct typec_switch *typec_switch_get(struct device *dev) +{ + return fwnode_typec_switch_get(dev_fwnode(dev)); +} + struct typec_mux_state { struct typec_altmode *alt; unsigned long mode; -- cgit v1.2.3 From bfb1d99d969fe3b892db30848aeebfa19d21f57f Mon Sep 17 00:00:00 2001 From: Kuen-Han Tsai Date: Tue, 16 Sep 2025 16:21:32 +0800 Subject: usb: gadget: Store endpoint pointer in usb_request Gadget function drivers often have goto-based error handling in their bind paths, which can be bug-prone. Refactoring these paths to use __free() scope-based cleanup is desirable, but currently blocked. The blocker is that usb_ep_free_request(ep, req) requires two parameters, while the __free() mechanism can only pass a pointer to the request itself. Store an endpoint pointer in the struct usb_request. The pointer is populated centrally in usb_ep_alloc_request() on every successful allocation, making the request object self-contained. Signed-off-by: Kuen-Han Tsai Link: https://lore.kernel.org/r/20250916-ready-v1-1-4997bf277548@google.com Signed-off-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20250916-ready-v1-1-4997bf277548@google.com --- include/linux/usb/gadget.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 0f28c5512fcb..0f2079476088 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -32,6 +32,7 @@ struct usb_ep; /** * struct usb_request - describes one i/o request + * @ep: The associated endpoint set by usb_ep_alloc_request(). * @buf: Buffer used for data. Always provide this; some controllers * only use PIO, or don't use DMA for some endpoints. * @dma: DMA address corresponding to 'buf'. If you don't set this @@ -98,6 +99,7 @@ struct usb_ep; */ struct usb_request { + struct usb_ep *ep; void *buf; unsigned length; dma_addr_t dma; -- cgit v1.2.3 From 201c53c687f2b55a7cc6d9f4000af4797860174b Mon Sep 17 00:00:00 2001 From: Kuen-Han Tsai Date: Tue, 16 Sep 2025 16:21:33 +0800 Subject: usb: gadget: Introduce free_usb_request helper Introduce the free_usb_request() function that frees both the request's buffer and the request itself. This function serves as the cleanup callback for DEFINE_FREE() to enable automatic, scope-based cleanup for usb_request pointers. Signed-off-by: Kuen-Han Tsai Link: https://lore.kernel.org/r/20250916-ready-v1-2-4997bf277548@google.com Signed-off-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20250916-ready-v1-2-4997bf277548@google.com --- include/linux/usb/gadget.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 0f2079476088..3aaf19e77558 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -15,6 +15,7 @@ #ifndef __LINUX_USB_GADGET_H #define __LINUX_USB_GADGET_H +#include #include #include #include @@ -293,6 +294,28 @@ static inline void usb_ep_fifo_flush(struct usb_ep *ep) /*-------------------------------------------------------------------------*/ +/** + * free_usb_request - frees a usb_request object and its buffer + * @req: the request being freed + * + * This helper function frees both the request's buffer and the request object + * itself by calling usb_ep_free_request(). Its signature is designed to be used + * with DEFINE_FREE() to enable automatic, scope-based cleanup for usb_request + * pointers. + */ +static inline void free_usb_request(struct usb_request *req) +{ + if (!req) + return; + + kfree(req->buf); + usb_ep_free_request(req->ep, req); +} + +DEFINE_FREE(free_usb_request, struct usb_request *, free_usb_request(_T)) + +/*-------------------------------------------------------------------------*/ + struct usb_dcd_config_params { __u8 bU1devExitLat; /* U1 Device exit Latency */ #define USB_DEFAULT_U1_DEV_EXIT_LAT 0x01 /* Less then 1 microsec */ -- cgit v1.2.3 From 5bcf9e1d0a5da750ff180e7385b0bd4041686516 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Duje=20Mihanovi=C4=87?= Date: Sat, 13 Sep 2025 23:12:48 +0200 Subject: dt-bindings: clock: marvell,pxa1908: Add syscon compatible to apmu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add required syscon compatible and #power-domain-cells to the APMU controller. This is required for the SoC's power domain controller as the registers are shared. Device tree bindings for said power domains are also added. Reviewed-by: Rob Herring (Arm) Signed-off-by: Duje Mihanović Signed-off-by: Ulf Hansson --- include/dt-bindings/power/marvell,pxa1908-power.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 include/dt-bindings/power/marvell,pxa1908-power.h (limited to 'include') diff --git a/include/dt-bindings/power/marvell,pxa1908-power.h b/include/dt-bindings/power/marvell,pxa1908-power.h new file mode 100644 index 000000000000..19b088351af1 --- /dev/null +++ b/include/dt-bindings/power/marvell,pxa1908-power.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */ +/* + * Marvell PXA1908 power domains + * + * Copyright 2025, Duje Mihanović + */ + +#ifndef __DTS_MARVELL_PXA1908_POWER_H +#define __DTS_MARVELL_PXA1908_POWER_H + +#define PXA1908_POWER_DOMAIN_VPU 0 +#define PXA1908_POWER_DOMAIN_GPU 1 +#define PXA1908_POWER_DOMAIN_GPU2D 2 +#define PXA1908_POWER_DOMAIN_DSI 3 +#define PXA1908_POWER_DOMAIN_ISP 4 + +#endif -- cgit v1.2.3 From 413187f79062634575098653c40f95d439d3b157 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 17 Sep 2025 15:26:49 +0200 Subject: stddef: Remove token-pasting in TRAILING_OVERLAP() Currently, TRAILING_OVERLAP() token-pastes the FAM parameter into the name of internal pdding member `__offset_to_##FAM`. This forces FAM to be a single identifier, which prevents callers from using a FAM when it's a nested member. For instance, see the following scenario: | struct flex { | size_t count; | int data[]; | }; | struct foo { | int hdr_foo; | struct flex f; | }; | struct composite { | struct foo hdr; | int data[100]; | }; In this case, it'd be useful if TRAILING_OVERLAP() could be used in the following way: | struct composite { | TRAILING_OVERLAP(struct foo, hdr, f.data, | int data[100]; | ); | }; However, this is not current possible due to the token concatenation in `__offset_to_##FAM`, which fails when FAM contains a dot. So, remove token-pasting and use the fixed internal name `__offset_to_FAM` and, with this, expand the capabilities of TRAILING_OVERLAP(). :) Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/13b3e0a69aad837b4e32ca8269b9d91bf1fbe9ef.1758115257.git.gustavoars@kernel.org Signed-off-by: Kees Cook --- include/linux/stddef.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/stddef.h b/include/linux/stddef.h index dab49e2ec8c0..701099c67c24 100644 --- a/include/linux/stddef.h +++ b/include/linux/stddef.h @@ -108,7 +108,7 @@ enum { union { \ TYPE NAME; \ struct { \ - unsigned char __offset_to_##FAM[offsetof(TYPE, FAM)]; \ + unsigned char __offset_to_FAM[offsetof(TYPE, FAM)]; \ MEMBERS \ }; \ } -- cgit v1.2.3 From 2bbdcf02c3f3df6c85abd0a2fee0d7f0f4113e96 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 17 Sep 2025 15:28:07 +0200 Subject: stddef: Introduce __TRAILING_OVERLAP() Introduce underlying __TRAILING_OVERLAP() macro to let callers apply atributes to trailing overlapping members. For instance, the code below: | struct flex { | size_t count; | int data[]; | }; | struct { | struct flex f; | struct foo a; | struct boo b; | } __packed instance; can now be changed to the following, and preserve the __packed attribute: | __TRAILING_OVERLAP(struct flex, f, data, __packed, | struct foo a; | struct boo b; | ) instance; Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/f80c529b239ce11f0a51f714fe00ddf839e05f5e.1758115257.git.gustavoars@kernel.org Signed-off-by: Kees Cook --- include/linux/stddef.h | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/stddef.h b/include/linux/stddef.h index 701099c67c24..80b6bfb944f0 100644 --- a/include/linux/stddef.h +++ b/include/linux/stddef.h @@ -94,7 +94,8 @@ enum { __DECLARE_FLEX_ARRAY(TYPE, NAME) /** - * TRAILING_OVERLAP() - Overlap a flexible-array member with trailing members. + * __TRAILING_OVERLAP() - Overlap a flexible-array member with trailing + * members. * * Creates a union between a flexible-array member (FAM) in a struct and a set * of additional members that would otherwise follow it. @@ -102,15 +103,30 @@ enum { * @TYPE: Flexible structure type name, including "struct" keyword. * @NAME: Name for a variable to define. * @FAM: The flexible-array member within @TYPE + * @ATTRS: Any struct attributes (usually empty) * @MEMBERS: Trailing overlapping members. */ -#define TRAILING_OVERLAP(TYPE, NAME, FAM, MEMBERS) \ +#define __TRAILING_OVERLAP(TYPE, NAME, FAM, ATTRS, MEMBERS) \ union { \ TYPE NAME; \ struct { \ unsigned char __offset_to_FAM[offsetof(TYPE, FAM)]; \ MEMBERS \ - }; \ + } ATTRS; \ } +/** + * TRAILING_OVERLAP() - Overlap a flexible-array member with trailing members. + * + * Creates a union between a flexible-array member (FAM) in a struct and a set + * of additional members that would otherwise follow it. + * + * @TYPE: Flexible structure type name, including "struct" keyword. + * @NAME: Name for a variable to define. + * @FAM: The flexible-array member within @TYPE + * @MEMBERS: Trailing overlapping members. + */ +#define TRAILING_OVERLAP(TYPE, NAME, FAM, MEMBERS) \ + __TRAILING_OVERLAP(TYPE, NAME, FAM, /* no attrs */, MEMBERS) + #endif -- cgit v1.2.3 From 9664d5810e9bc919a9a661594e01eabc80befe8a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 17 Sep 2025 10:11:28 +0100 Subject: KVM: arm64: Don't access ICC_SRE_EL2 if GICv3 doesn't support v2 compatibility We currently access ICC_SRE_EL2 at each load/put on VHE, and on each entry/exit on nVHE. Both are quite onerous on NV, as this register always traps. We do this to make sure the EL1 guest doesn't flip between v2 and v3 behind our back. But all modern implementations have dropped v2, and this is just overhead. At the same time, the GICv5 spec has been fixed to allow access to ICC_SRE_EL2 in legacy mode. Use this opportunity to replace the GICv5 checks for v2 compat checks, with an ad-hoc static key. Co-developed-by: Sascha Bischoff Signed-off-by: Sascha Bischoff Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier --- include/kvm/arm_vgic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 404883c7af6e..9a6340d9c91e 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -375,6 +375,7 @@ struct vgic_cpu { extern struct static_key_false vgic_v2_cpuif_trap; extern struct static_key_false vgic_v3_cpuif_trap; +extern struct static_key_false vgic_v3_has_v2_compat; int kvm_set_legacy_vgic_v2_addr(struct kvm *kvm, struct kvm_arm_device_addr *dev_addr); void kvm_vgic_early_init(struct kvm *kvm); -- cgit v1.2.3 From 5c5db9efe323dd0b0d7917dbe5b9c0999c95e79e Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Thu, 28 Aug 2025 10:59:43 +0000 Subject: irqchip/gic-v5: Drop has_gcie_v3_compat from gic_kvm_info The presence of FEAT_GCIE_LEGACY is now handled as a CPU feature. Therefore, drop the check and flag from the GIC driver and gic_kvm_info as it is no longer required or used by KVM. Signed-off-by: Sascha Bischoff Acked-by: Thomas Gleixner Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-vgic-info.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/irqchip/arm-vgic-info.h b/include/linux/irqchip/arm-vgic-info.h index ca1713fac6e3..a470a73a805a 100644 --- a/include/linux/irqchip/arm-vgic-info.h +++ b/include/linux/irqchip/arm-vgic-info.h @@ -36,8 +36,6 @@ struct gic_kvm_info { bool has_v4_1; /* Deactivation impared, subpar stuff */ bool no_hw_deactivation; - /* v3 compat support (GICv5 hosts, only) */ - bool has_gcie_v3_compat; }; #ifdef CONFIG_KVM -- cgit v1.2.3 From 652b08afba69d5d26fe91098eb832b1bcc0f91c2 Mon Sep 17 00:00:00 2001 From: Cristian Birsan Date: Thu, 21 Nov 2024 20:16:38 +0200 Subject: ARM: at91: remove default values for PMC_PLL_ACR Remove default values for PMC PLL Analog Control Register(ACR) as the values are specific for each SoC and PLL and load them from PLL characteristics structure Co-developed-by: Andrei Simion Signed-off-by: Andrei Simion Signed-off-by: Cristian Birsan [nicolas.ferre@microchip.com: fix pll acr write sequence, preserve val] Signed-off-by: Nicolas Ferre --- include/linux/clk/at91_pmc.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/clk/at91_pmc.h b/include/linux/clk/at91_pmc.h index 7af499bdbecb..d60ce9708ea2 100644 --- a/include/linux/clk/at91_pmc.h +++ b/include/linux/clk/at91_pmc.h @@ -47,8 +47,6 @@ #define AT91_PMC_PCSR 0x18 /* Peripheral Clock Status Register */ #define AT91_PMC_PLL_ACR 0x18 /* PLL Analog Control Register [for SAM9X60] */ -#define AT91_PMC_PLL_ACR_DEFAULT_UPLL UL(0x12020010) /* Default PLL ACR value for UPLL */ -#define AT91_PMC_PLL_ACR_DEFAULT_PLLA UL(0x00020010) /* Default PLL ACR value for PLLA */ #define AT91_PMC_PLL_ACR_UTMIVR (1 << 12) /* UPLL Voltage regulator Control */ #define AT91_PMC_PLL_ACR_UTMIBG (1 << 13) /* UPLL Bandgap Control */ -- cgit v1.2.3 From 4ca24d6abbca5df76c4b189dd94fb055613de297 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 15 Sep 2025 11:08:14 -0500 Subject: lib/crypto: sha256: Add support for 2-way interleaved hashing Many arm64 and x86_64 CPUs can compute two SHA-256 hashes in nearly the same speed as one, if the instructions are interleaved. This is because SHA-256 is serialized block-by-block, and two interleaved hashes take much better advantage of the CPU's instruction-level parallelism. Meanwhile, a very common use case for SHA-256 hashing in the Linux kernel is dm-verity and fs-verity. Both use a Merkle tree that has a fixed block size, usually 4096 bytes with an empty or 32-byte salt prepended. Usually, many blocks need to be hashed at a time. This is an ideal scenario for 2-way interleaved hashing. To enable this optimization, add a new function sha256_finup_2x() to the SHA-256 library API. It computes the hash of two equal-length messages, starting from a common initial context. For now it always falls back to sequential processing. Later patches will wire up arm64 and x86_64 optimized implementations. Note that the interleaving factor could in principle be higher than 2x. However, that runs into many practical difficulties and CPU throughput limitations. Thus, both the implementations I'm adding are 2x. In the interest of using the simplest solution, the API matches that. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250915160819.140019-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/crypto/sha2.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include') diff --git a/include/crypto/sha2.h b/include/crypto/sha2.h index 15e461e568cc..e5dafb935cc8 100644 --- a/include/crypto/sha2.h +++ b/include/crypto/sha2.h @@ -375,6 +375,34 @@ void sha256_final(struct sha256_ctx *ctx, u8 out[SHA256_DIGEST_SIZE]); */ void sha256(const u8 *data, size_t len, u8 out[SHA256_DIGEST_SIZE]); +/** + * sha256_finup_2x() - Compute two SHA-256 digests from a common initial + * context. On some CPUs, this is faster than sequentially + * computing each digest. + * @ctx: an optional initial context, which may have already processed data. If + * NULL, a default initial context is used (equivalent to sha256_init()). + * @data1: data for the first message + * @data2: data for the second message + * @len: the length of each of @data1 and @data2, in bytes + * @out1: (output) the first SHA-256 message digest + * @out2: (output) the second SHA-256 message digest + * + * Context: Any context. + */ +void sha256_finup_2x(const struct sha256_ctx *ctx, const u8 *data1, + const u8 *data2, size_t len, u8 out1[SHA256_DIGEST_SIZE], + u8 out2[SHA256_DIGEST_SIZE]); + +/** + * sha256_finup_2x_is_optimized() - Check if sha256_finup_2x() is using a real + * interleaved implementation, as opposed to a + * sequential fallback + * @return: true if optimized + * + * Context: Any context. + */ +bool sha256_finup_2x_is_optimized(void); + /** * struct hmac_sha256_key - Prepared key for HMAC-SHA256 * @key: private -- cgit v1.2.3 From d4bf06592ad68ac4353a81c73e8e662cf88aa2cc Mon Sep 17 00:00:00 2001 From: Viken Dadhaniya Date: Thu, 11 Sep 2025 10:02:53 +0530 Subject: soc: qcom: geni-se: Add support to load QUP SE Firmware via Linux subsystem In Qualcomm SoCs, firmware loading for Serial Engines (SE) within the QUP hardware has traditionally been managed by TrustZone (TZ). This restriction poses a significant challenge for developers, as it limits their ability to enable various protocols on any of the SEs from the Linux side, reducing flexibility. Load the firmware to QUP SE based on the 'firmware-name' property specified in devicetree at bootup time. Co-developed-by: Mukesh Kumar Savaliya Signed-off-by: Mukesh Kumar Savaliya Signed-off-by: Viken Dadhaniya Link: https://lore.kernel.org/r/20250911043256.3523057-4-viken.dadhaniya@oss.qualcomm.com Signed-off-by: Bjorn Andersson --- include/linux/soc/qcom/geni-se.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/soc/qcom/geni-se.h b/include/linux/soc/qcom/geni-se.h index 2996a3c28ef3..0a984e2579fe 100644 --- a/include/linux/soc/qcom/geni-se.h +++ b/include/linux/soc/qcom/geni-se.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2017-2018, The Linux Foundation. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #ifndef _LINUX_QCOM_GENI_SE @@ -36,6 +37,7 @@ enum geni_se_protocol_type { GENI_SE_I2C, GENI_SE_I3C, GENI_SE_SPI_SLAVE, + GENI_SE_INVALID_PROTO = 255, }; struct geni_wrapper; @@ -531,5 +533,7 @@ void geni_icc_set_tag(struct geni_se *se, u32 tag); int geni_icc_enable(struct geni_se *se); int geni_icc_disable(struct geni_se *se); + +int geni_load_se_firmware(struct geni_se *se, enum geni_se_protocol_type protocol); #endif #endif -- cgit v1.2.3 From bdf780fbcef5df4d365404a178e7f845a317b4e9 Mon Sep 17 00:00:00 2001 From: Huisong Li Date: Thu, 11 Sep 2025 19:24:07 +0800 Subject: ACPI: processor: idle: Rearrange declarations in header file Group all of the declarations of functions that belong to the ACPI processor idle driver together in one place in processor.h. While at it, drop the unnecessary extern modifier from the declaraions of two functions. Signed-off-by: Huisong Li Link: https://patch.msgid.link/20250911112408.1668431-3-lihuisong@huawei.com [ rjw: Subject and changelog rewrite ] Signed-off-by: Rafael J. Wysocki --- include/acpi/processor.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 2976a6d0c54f..6ee4a69412de 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -425,6 +425,8 @@ int acpi_processor_power_state_has_changed(struct acpi_processor *pr); int acpi_processor_hotplug(struct acpi_processor *pr); void acpi_processor_register_idle_driver(void); void acpi_processor_unregister_idle_driver(void); +int acpi_processor_ffh_lpi_probe(unsigned int cpu); +int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi); #endif /* CONFIG_ACPI_PROCESSOR_IDLE */ /* in processor_thermal.c */ @@ -447,11 +449,6 @@ static inline void acpi_thermal_cpufreq_exit(struct cpufreq_policy *policy) } #endif /* CONFIG_CPU_FREQ */ -#ifdef CONFIG_ACPI_PROCESSOR_IDLE -extern int acpi_processor_ffh_lpi_probe(unsigned int cpu); -extern int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi); -#endif - void acpi_processor_init_invariance_cppc(void); #endif -- cgit v1.2.3 From 09a1b33c080f6ac700fadc67c8471e67bf75fda4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Aug 2025 12:33:11 -0400 Subject: preparations to taking MNT_WRITE_HOLD out of ->mnt_flags We have an unpleasant wart in accessibility rules for struct mount. There are per-superblock lists of mounts, used by sb_prepare_remount_readonly() to check if any of those is currently claimed for write access and to block further attempts to get write access on those until we are done. As soon as it is attached to a filesystem, mount becomes reachable via that list. Only sb_prepare_remount_readonly() traverses it and it only accesses a few members of struct mount. Unfortunately, ->mnt_flags is one of those and it is modified - MNT_WRITE_HOLD set and then cleared. It is done under mount_lock, so from the locking rules POV everything's fine. However, it has easily overlooked implications - once mount has been attached to a filesystem, it has to be treated as globally visible. In particular, initializing ->mnt_flags *must* be done either prior to that point or under mount_lock. All other members are still private at that point. Life gets simpler if we move that bit (and that's *all* that can get touched by access via this list) out of ->mnt_flags. It's not even hard to do - currently the list is implemented as list_head one, anchored in super_block->s_mounts and linked via mount->mnt_instance. As the first step, switch it to hlist-like open-coded structure - address of the first mount in the set is stored in ->s_mounts and ->mnt_instance replaced with ->mnt_next_for_sb and ->mnt_pprev_for_sb - the former either NULL or pointing to the next mount in set, the latter - address of either ->s_mounts or ->mnt_next_for_sb in the previous element of the set. In the next commit we'll steal the LSB of ->mnt_pprev_for_sb as replacement for MNT_WRITE_HOLD. Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- include/linux/fs.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index d7ab4f96d705..0e9c7f1460dc 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1324,6 +1324,8 @@ struct sb_writers { struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS]; }; +struct mount; + struct super_block { struct list_head s_list; /* Keep this first */ dev_t s_dev; /* search index; _not_ kdev_t */ @@ -1358,7 +1360,7 @@ struct super_block { __u16 s_encoding_flags; #endif struct hlist_bl_head s_roots; /* alternate root dentries for NFS */ - struct list_head s_mounts; /* list of mounts; _not_ for fs use */ + struct mount *s_mounts; /* list of mounts; _not_ for fs use */ struct block_device *s_bdev; /* can go away once we use an accessor for @s_bdev_file */ struct file *s_bdev_file; struct backing_dev_info *s_bdi; -- cgit v1.2.3 From 3371fa2f27134fc4ec7d40b2ae7b9e92c3b2527e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Aug 2025 13:37:12 -0400 Subject: struct mount: relocate MNT_WRITE_HOLD bit ... from ->mnt_flags to LSB of ->mnt_pprev_for_sb. This is safe - we always set and clear it within the same mount_lock scope, so we won't interfere with list operations - traversals are always forward, so they don't even look at ->mnt_prev_for_sb and both insertions and removals are in mount_lock scopes of their own, so that bit will be clear in *all* mount instances during those. Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- include/linux/mount.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mount.h b/include/linux/mount.h index 18e4b97f8a98..85e97b9340ff 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -33,7 +33,6 @@ enum mount_flags { MNT_NOSYMFOLLOW = 0x80, MNT_SHRINKABLE = 0x100, - MNT_WRITE_HOLD = 0x200, MNT_INTERNAL = 0x4000, @@ -52,7 +51,7 @@ enum mount_flags { | MNT_READONLY | MNT_NOSYMFOLLOW, MNT_ATIME_MASK = MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME, - MNT_INTERNAL_FLAGS = MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED | + MNT_INTERNAL_FLAGS = MNT_INTERNAL | MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_LOCKED }; -- cgit v1.2.3 From a79765248649de77771c24f7be08ff4c96f16f7a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 30 Aug 2025 02:48:13 -0400 Subject: constify {__,}mnt_is_readonly() Signed-off-by: Al Viro --- include/linux/mount.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mount.h b/include/linux/mount.h index 85e97b9340ff..acfe7ef86a1b 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -76,7 +76,7 @@ extern void mntput(struct vfsmount *mnt); extern struct vfsmount *mntget(struct vfsmount *mnt); extern void mnt_make_shortterm(struct vfsmount *mnt); extern struct vfsmount *mnt_clone_internal(const struct path *path); -extern bool __mnt_is_readonly(struct vfsmount *mnt); +extern bool __mnt_is_readonly(const struct vfsmount *mnt); extern bool mnt_may_suid(struct vfsmount *mnt); extern struct vfsmount *clone_private_mount(const struct path *path); -- cgit v1.2.3 From b67a8631a4a8f26a18fac236aaf61aa2412c7a0d Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 13 Sep 2025 23:08:17 +0200 Subject: net: phy: remove mdio_board_info support from phylib After having removed mdio_board_info usage from dsa_loop, there's no user left. So let's drop support for it from phylib. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/01542a2e-05f5-4f13-acef-72632b33b5be@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 6f3b25cb7f4e..7da9e19471c9 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -2129,16 +2129,6 @@ int __phy_hwtstamp_set(struct phy_device *phydev, extern const struct bus_type mdio_bus_type; extern const struct class mdio_bus_class; -struct mdio_board_info { - const char *bus_id; - char modalias[MDIO_NAME_SIZE]; - int mdio_addr; - const void *platform_data; -}; - -int mdiobus_register_board_info(const struct mdio_board_info *info, - unsigned int n); - /** * phy_module_driver() - Helper macro for registering PHY drivers * @__phy_drivers: array of PHY drivers to register -- cgit v1.2.3 From d69ae81efbc95c94a2760fc82d27cdab4c26fe76 Mon Sep 17 00:00:00 2001 From: Fenglin Wu Date: Wed, 17 Sep 2025 18:15:14 +0800 Subject: power: supply: core: Add resistance power supply property Some battery drivers provide the ability to export internal resistance as a parameter. Add internal_resistance power supply property for that purpose. Signed-off-by: Fenglin Wu Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index f21f806bfb38..f38da7c039d2 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -176,6 +176,7 @@ enum power_supply_property { POWER_SUPPLY_PROP_MANUFACTURE_YEAR, POWER_SUPPLY_PROP_MANUFACTURE_MONTH, POWER_SUPPLY_PROP_MANUFACTURE_DAY, + POWER_SUPPLY_PROP_INTERNAL_RESISTANCE, /* Properties of type `const char *' */ POWER_SUPPLY_PROP_MODEL_NAME, POWER_SUPPLY_PROP_MANUFACTURER, -- cgit v1.2.3 From cd93fbdce5981c947f22015ded3ac6bd1939b0ad Mon Sep 17 00:00:00 2001 From: Fenglin Wu Date: Wed, 17 Sep 2025 18:15:15 +0800 Subject: power: supply: core: Add state_of_health power supply property Add state_of_health power supply property to represent battery health percentage. Signed-off-by: Fenglin Wu Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index f38da7c039d2..360ffdf272da 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -177,6 +177,7 @@ enum power_supply_property { POWER_SUPPLY_PROP_MANUFACTURE_MONTH, POWER_SUPPLY_PROP_MANUFACTURE_DAY, POWER_SUPPLY_PROP_INTERNAL_RESISTANCE, + POWER_SUPPLY_PROP_STATE_OF_HEALTH, /* Properties of type `const char *' */ POWER_SUPPLY_PROP_MODEL_NAME, POWER_SUPPLY_PROP_MANUFACTURER, -- cgit v1.2.3 From 05dfe654b5932322e297aba11dd6f3f26eea6ecb Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Tue, 16 Sep 2025 17:11:36 +0300 Subject: net/mlx5: Remove unused 'offset' field from mlx5_sq_bfreg The 'offset' field was introduced in the original commit [1] and never used until commit [2], which added an unnecessary use. Remove the field and refactor the write-combining test to use a local variable instead. [1] commit a6d51b68611e ("net/mlx5: Introduce blue flame register allocator") [2] commit d98995b4bf98 ("net/mlx5: Reimplement write combining test") Signed-off-by: Cosmin Ratiu Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Signed-off-by: Jakub Kicinski --- include/linux/mlx5/driver.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index fcfc18bfeba9..5a85b6d91ba3 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -434,7 +434,6 @@ struct mlx5_sq_bfreg { struct mlx5_uars_page *up; bool wc; u32 index; - unsigned int offset; }; struct mlx5_core_health { -- cgit v1.2.3 From aa4595d0ada65d5d44fa924a42a87c175d9d88e3 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Tue, 16 Sep 2025 17:11:38 +0300 Subject: net/mlx5: Store the global doorbell in mlx5_priv The global doorbell is used for more than just Ethernet resources, so move it out of mlx5e_hw_objs into a common place (mlx5_priv), to avoid non-Ethernet modules (e.g. HWS, ASO) depending on Ethernet structs. Use this opportunity to consolidate it with the 'uar' pointer already there, which was used as an RX doorbell. Underneath the 'uar' pointer is identical to 'bfreg->up', so store a single resource and use that instead. For CQ doorbells, care is taken to always use bfreg->up->index instead of bfreg->index, which may refer to a subsequent UAR page from the same ALLOC_UAR batch on some NICs. This paves the way for cleanly supporting multiple doorbells in the Ethernet driver. Signed-off-by: Cosmin Ratiu Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Signed-off-by: Jakub Kicinski --- include/linux/mlx5/driver.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 5a85b6d91ba3..15c434fedff7 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -612,7 +612,7 @@ struct mlx5_priv { struct mlx5_ft_pool *ft_pool; struct mlx5_bfreg_data bfregs; - struct mlx5_uars_page *uar; + struct mlx5_sq_bfreg bfreg; #ifdef CONFIG_MLX5_SF struct mlx5_vhca_state_notifier *vhca_state_notifier; struct mlx5_sf_dev_table *sf_dev_table; @@ -658,7 +658,6 @@ struct mlx5e_resources { u32 pdn; struct mlx5_td td; u32 mkey; - struct mlx5_sq_bfreg bfreg; #define MLX5_MAX_NUM_TC 8 u32 tisn[MLX5_MAX_PORTS][MLX5_MAX_NUM_TC]; bool tisn_valid; -- cgit v1.2.3 From a315b723e87ba4e4573e1e5c759d512f38bdc0b3 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Tue, 16 Sep 2025 17:11:40 +0300 Subject: net/mlx5e: Prepare for using different CQ doorbells Completion queues (CQs) in mlx5 use the same global doorbell, which may become contended when accessed concurrently from many cores. This patch prepares the CQ management code for supporting different doorbells per CQ. This will be used in downstream patches to allow separate doorbells to be used by channels CQs. The main change is moving the 'uar' pointer from struct mlx5_core_cq to struct mlx5e_cq, as the uar page to be used is better off stored directly there. Other users of mlx5_core_cq also store the UAR to be used separately and therefore the pointer being removed is dead weight for them. As evidence, in this patch there are two users which set the mcq.uar pointer but didn't use it, Software Steering and old Innova CQ creation code. Instead, they rang the doorbell directly from another pointer. The 'uar' pointer added to struct mlx5e_cq remains in a hot cacheline (as before), because it may get accessed for each packet. Signed-off-by: Cosmin Ratiu Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Signed-off-by: Jakub Kicinski --- include/linux/mlx5/cq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 991526039ccb..7ef2c7c7d803 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -41,7 +41,6 @@ struct mlx5_core_cq { int cqe_sz; __be32 *set_ci_db; __be32 *arm_db; - struct mlx5_uars_page *uar; refcount_t refcount; struct completion free; unsigned vector; -- cgit v1.2.3 From 71fb4832d50b01f0af2d257360c239879ce93a8e Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Tue, 16 Sep 2025 17:11:41 +0300 Subject: net/mlx5e: Use multiple TX doorbells First, allocate more doorbells in mlx5e_create_mdev_resources: - one doorbell remains 'global' and will be used by all non-channel associated SQs (e.g. ASO, HWS, PTP, ...). - allocate additional 'num_doorbells' doorbells. This defaults to minimum between 8 and max number of channels. mlx5e_channel_pick_doorbell() now spreads out channel SQs across available doorbells. Signed-off-by: Cosmin Ratiu Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Signed-off-by: Jakub Kicinski --- include/linux/mlx5/driver.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 15c434fedff7..99b34e4809ae 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -658,6 +658,8 @@ struct mlx5e_resources { u32 pdn; struct mlx5_td td; u32 mkey; + struct mlx5_sq_bfreg *bfregs; + unsigned int num_bfregs; #define MLX5_MAX_NUM_TC 8 u32 tisn[MLX5_MAX_PORTS][MLX5_MAX_NUM_TC]; bool tisn_valid; @@ -801,6 +803,8 @@ struct mlx5_db { int index; }; +#define MLX5_DEFAULT_NUM_DOORBELLS 8 + enum { MLX5_COMP_EQ_SIZE = 1024, }; -- cgit v1.2.3 From 6bdcb735fec6cb866b0d40634d4f23effba81074 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Tue, 16 Sep 2025 17:11:43 +0300 Subject: devlink: Add a 'num_doorbells' driverinit param This parameter can be used by drivers to configure a different number of doorbells. Signed-off-by: Cosmin Ratiu Reviewed-by: Dragos Tatulea Reviewed-by: Jiri Pirko Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index 8d4362f010e4..9e824f61e40f 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -531,6 +531,7 @@ enum devlink_param_generic_id { DEVLINK_PARAM_GENERIC_ID_ENABLE_PHC, DEVLINK_PARAM_GENERIC_ID_CLOCK_ID, DEVLINK_PARAM_GENERIC_ID_TOTAL_VFS, + DEVLINK_PARAM_GENERIC_ID_NUM_DOORBELLS, /* add new param generic ids above here*/ __DEVLINK_PARAM_GENERIC_ID_MAX, @@ -598,6 +599,9 @@ enum devlink_param_generic_id { #define DEVLINK_PARAM_GENERIC_TOTAL_VFS_NAME "total_vfs" #define DEVLINK_PARAM_GENERIC_TOTAL_VFS_TYPE DEVLINK_PARAM_TYPE_U32 +#define DEVLINK_PARAM_GENERIC_NUM_DOORBELLS_NAME "num_doorbells" +#define DEVLINK_PARAM_GENERIC_NUM_DOORBELLS_TYPE DEVLINK_PARAM_TYPE_U32 + #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \ { \ .id = DEVLINK_PARAM_GENERIC_ID_##_id, \ -- cgit v1.2.3 From 542a495cbaa6dc57a310da62b501fdf318657cad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 16 Sep 2025 10:24:25 +0200 Subject: tcp: AccECN core MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change implements Accurate ECN without negotiation and AccECN Option (that will be added by later changes). Based on AccECN specifications: https://tools.ietf.org/id/draft-ietf-tcpm-accurate-ecn-28.txt Accurate ECN allows feeding back the number of CE (congestion experienced) marks accurately to the sender in contrast to RFC3168 ECN that can only signal one marks-seen-yes/no per RTT. Congestion control algorithms can take advantage of the accurate ECN information to fine-tune their congestion response to avoid drastic rate reduction when only mild congestion is encountered. With Accurate ECN, tp->received_ce (r.cep in AccECN spec) keeps track of how many segments have arrived with a CE mark. Accurate ECN uses ACE field (ECE, CWR, AE) to communicate the value back to the sender which updates tp->delivered_ce (s.cep) based on the feedback. This signalling channel is lossy when ACE field overflow occurs. Conservative strategy is selected here to deal with the ACE overflow, however, some strategies using the AccECN option later in the overall patchset mitigate against false overflows detected. The ACE field values on the wire are offset by TCP_ACCECN_CEP_INIT_OFFSET. Delivered_ce/received_ce count the real CE marks rather than forcing all downstream users to adapt to the wire offset. This patch uses the first 1-byte hole and the last 4-byte hole of the tcp_sock_write_txrx for 'received_ce_pending' and 'received_ce'. Also, the group size of tcp_sock_write_txrx is increased from 91 + 4 to 95 + 4 due to the new u32 received_ce member. Below are the trimmed pahole outcomes before and after this patch. [BEFORE THIS PATCH] struct tcp_sock { [...] __cacheline_group_begin__tcp_sock_write_txrx[0]; /* 2521 0 */ u8 nonagle:4; /* 2521: 0 1 */ u8 rate_app_limited:1; /* 2521: 4 1 */ /* XXX 3 bits hole, try to pack */ /* XXX 2 bytes hole, try to pack */ [...] u32 delivered_ce; /* 2576 4 */ u32 app_limited; /* 2580 4 */ u32 rcv_wnd; /* 2684 4 */ struct tcp_options_received rx_opt; /* 2688 24 */ __cacheline_group_end__tcp_sock_write_txrx[0]; /* 2612 0 */ /* XXX 4 bytes hole, try to pack */ [...] /* size: 3200, cachelines: 50, members: 161 */ } [AFTER THIS PATCH] struct tcp_sock { [...] __cacheline_group_begin__tcp_sock_write_txrx[0]; /* 2521 0 */ u8 nonagle:4; /* 2521: 0 1 */ u8 rate_app_limited:1; /* 2521: 4 1 */ /* XXX 3 bits hole, try to pack */ /* Force alignment to the next boundary: */ u8 :0; u8 received_ce_pending:4;/* 2522: 0 1 */ u8 unused2:4; /* 2522: 4 1 */ /* XXX 1 byte hole, try to pack */ [...] u32 delivered_ce; /* 2576 4 */ u32 received_ce; /* 2580 4 */ u32 app_limited; /* 2584 4 */ u32 rcv_wnd; /* 2588 4 */ struct tcp_options_received rx_opt; /* 2592 24 */ __cacheline_group_end__tcp_sock_write_txrx[0]; /* 2616 0 */ [...] /* size: 3200, cachelines: 50, members: 164 */ } Signed-off-by: Ilpo Järvinen Co-developed-by: Olivier Tilmans Signed-off-by: Olivier Tilmans Co-developed-by: Chia-Yu Chang Signed-off-by: Chia-Yu Chang Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250916082434.100722-2-chia-yu.chang@nokia-bell-labs.com Signed-off-by: Paolo Abeni --- include/linux/tcp.h | 3 +++ include/net/tcp.h | 15 +++++++++++++++ include/net/tcp_ecn.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 69 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index d103cc0e7a35..90cee6e53527 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -287,6 +287,8 @@ struct tcp_sock { */ u8 nonagle : 4,/* Disable Nagle algorithm? */ rate_app_limited:1; /* rate_{delivered,interval_us} limited? */ + u8 received_ce_pending:4, /* Not yet transmit cnt of received_ce */ + unused2:4; __be32 pred_flags; u64 tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */ u64 tcp_mstamp; /* most recent packet received/sent */ @@ -299,6 +301,7 @@ struct tcp_sock { u32 snd_up; /* Urgent pointer */ u32 delivered; /* Total data packets delivered incl. rexmits */ u32 delivered_ce; /* Like the above but only ECE marked packets */ + u32 received_ce; /* Like the above but for rcvd CE marked pkts */ u32 app_limited; /* limited until "delivered" reaches this val */ u32 rcv_wnd; /* Current receiver window */ /* diff --git a/include/net/tcp.h b/include/net/tcp.h index e25340459ce4..bc5159fe842e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -973,6 +973,14 @@ static inline u32 tcp_rsk_tsval(const struct tcp_request_sock *treq) #define TCPHDR_ACE (TCPHDR_ECE | TCPHDR_CWR | TCPHDR_AE) #define TCPHDR_SYN_ECN (TCPHDR_SYN | TCPHDR_ECE | TCPHDR_CWR) +#define TCP_ACCECN_CEP_ACE_MASK 0x7 +#define TCP_ACCECN_ACE_MAX_DELTA 6 + +/* To avoid/detect middlebox interference, not all counters start at 0. + * See draft-ietf-tcpm-accurate-ecn for the latest values. + */ +#define TCP_ACCECN_CEP_INIT_OFFSET 5 + /* State flags for sacked in struct tcp_skb_cb */ enum tcp_skb_cb_sacked_flags { TCPCB_SACKED_ACKED = (1 << 0), /* SKB ACK'd by a SACK block */ @@ -1782,11 +1790,18 @@ static inline bool tcp_paws_reject(const struct tcp_options_received *rx_opt, static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) { + u32 ace; + /* mptcp hooks are only on the slow path */ if (sk_is_mptcp((struct sock *)tp)) return; + ace = tcp_ecn_mode_accecn(tp) ? + ((tp->delivered_ce + TCP_ACCECN_CEP_INIT_OFFSET) & + TCP_ACCECN_CEP_ACE_MASK) : 0; + tp->pred_flags = htonl((tp->tcp_header_len << 26) | + (ace << 22) | ntohl(TCP_FLAG_ACK) | snd_wnd); } diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h index b3430557676b..b0ed89dbad41 100644 --- a/include/net/tcp_ecn.h +++ b/include/net/tcp_ecn.h @@ -12,6 +12,7 @@ static inline void tcp_ecn_queue_cwr(struct tcp_sock *tp) { + /* Do not set CWR if in AccECN mode! */ if (tcp_ecn_mode_rfc3168(tp)) tp->ecn_flags |= TCP_ECN_QUEUE_CWR; } @@ -19,8 +20,10 @@ static inline void tcp_ecn_queue_cwr(struct tcp_sock *tp) static inline void tcp_ecn_accept_cwr(struct sock *sk, const struct sk_buff *skb) { - if (tcp_hdr(skb)->cwr) { - tcp_sk(sk)->ecn_flags &= ~TCP_ECN_DEMAND_CWR; + struct tcp_sock *tp = tcp_sk(sk); + + if (tcp_ecn_mode_rfc3168(tp) && tcp_hdr(skb)->cwr) { + tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; /* If the sender is telling us it has entered CWR, then its * cwnd may be very low (even just 1 packet), so we should ACK @@ -36,6 +39,52 @@ static inline void tcp_ecn_withdraw_cwr(struct tcp_sock *tp) tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR; } +static inline u8 tcp_accecn_ace(const struct tcphdr *th) +{ + return (th->ae << 2) | (th->cwr << 1) | th->ece; +} + +static inline void tcp_accecn_init_counters(struct tcp_sock *tp) +{ + tp->received_ce = 0; + tp->received_ce_pending = 0; +} + +/* Updates Accurate ECN received counters from the received IP ECN field */ +static inline void tcp_ecn_received_counters(struct sock *sk, const struct sk_buff *skb) +{ + u8 ecnfield = TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK; + u8 is_ce = INET_ECN_is_ce(ecnfield); + struct tcp_sock *tp = tcp_sk(sk); + + if (!INET_ECN_is_not_ect(ecnfield)) { + u32 pcount = is_ce * max_t(u16, 1, skb_shinfo(skb)->gso_segs); + + /* As for accurate ECN, the TCP_ECN_SEEN flag is set by + * tcp_ecn_received_counters() when the ECN codepoint of + * received TCP data or ACK contains ECT(0), ECT(1), or CE. + */ + if (!tcp_ecn_mode_rfc3168(tp)) + tp->ecn_flags |= TCP_ECN_SEEN; + + /* ACE counter tracks *all* segments including pure ACKs */ + tp->received_ce += pcount; + tp->received_ce_pending = min(tp->received_ce_pending + pcount, + 0xfU); + } +} + +static inline void tcp_accecn_set_ace(struct tcphdr *th, struct tcp_sock *tp) +{ + u32 wire_ace; + + wire_ace = tp->received_ce + TCP_ACCECN_CEP_INIT_OFFSET; + th->ece = !!(wire_ace & 0x1); + th->cwr = !!(wire_ace & 0x2); + th->ae = !!(wire_ace & 0x4); + tp->received_ce_pending = 0; +} + static inline void tcp_ecn_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th) { -- cgit v1.2.3 From 3cae34274c79e0c60ccd1c10516973af1aed2a7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 16 Sep 2025 10:24:26 +0200 Subject: tcp: accecn: AccECN negotiation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Accurate ECN negotiation parts based on the specification: https://tools.ietf.org/id/draft-ietf-tcpm-accurate-ecn-28.txt Accurate ECN is negotiated using ECE, CWR and AE flags in the TCP header. TCP falls back into using RFC3168 ECN if one of the ends supports only RFC3168-style ECN. The AccECN negotiation includes reflecting IP ECN field value seen in SYN and SYNACK back using the same bits as negotiation to allow responding to SYN CE marks and to detect ECN field mangling. CE marks should not occur currently because SYN=1 segments are sent with Non-ECT in IP ECN field (but proposal exists to remove this restriction). Reflecting SYN IP ECN field in SYNACK is relatively simple. Reflecting SYNACK IP ECN field in the final/third ACK of the handshake is more challenging. Linux TCP code is not well prepared for using the final/third ACK a signalling channel which makes things somewhat complicated here. tcp_ecn sysctl can be used to select the highest ECN variant (Accurate ECN, ECN, No ECN) that is attemped to be negotiated and requested for incoming connection and outgoing connection: TCP_ECN_IN_NOECN_OUT_NOECN, TCP_ECN_IN_ECN_OUT_ECN, TCP_ECN_IN_ECN_OUT_NOECN, TCP_ECN_IN_ACCECN_OUT_ACCECN, TCP_ECN_IN_ACCECN_OUT_ECN, and TCP_ECN_IN_ACCECN_OUT_NOECN. After this patch, the size of tcp_request_sock remains unchanged and no new holes are added. Below are the pahole outcomes before and after this patch: [BEFORE THIS PATCH] struct tcp_request_sock { [...] u32 rcv_nxt; /* 352 4 */ u8 syn_tos; /* 356 1 */ /* size: 360, cachelines: 6, members: 16 */ } [AFTER THIS PATCH] struct tcp_request_sock { [...] u32 rcv_nxt; /* 352 4 */ u8 syn_tos; /* 356 1 */ bool accecn_ok; /* 357 1 */ u8 syn_ect_snt:2; /* 358: 0 1 */ u8 syn_ect_rcv:2; /* 358: 2 1 */ u8 accecn_fail_mode:4; /* 358: 4 1 */ /* size: 360, cachelines: 6, members: 20 */ } After this patch, the size of tcp_sock remains unchanged and no new holes are added. Also, 4 bits of the existing 2-byte hole are exploited. Below are the pahole outcomes before and after this patch: [BEFORE THIS PATCH] struct tcp_sock { [...] u8 dup_ack_counter:2; /* 2761: 0 1 */ u8 tlp_retrans:1; /* 2761: 2 1 */ u8 unused:5; /* 2761: 3 1 */ u8 thin_lto:1; /* 2762: 0 1 */ u8 fastopen_connect:1; /* 2762: 1 1 */ u8 fastopen_no_cookie:1; /* 2762: 2 1 */ u8 fastopen_client_fail:2; /* 2762: 3 1 */ u8 frto:1; /* 2762: 5 1 */ /* XXX 2 bits hole, try to pack */ [...] u8 keepalive_probes; /* 2765 1 */ /* XXX 2 bytes hole, try to pack */ [...] /* size: 3200, cachelines: 50, members: 164 */ } [AFTER THIS PATCH] struct tcp_sock { [...] u8 dup_ack_counter:2; /* 2761: 0 1 */ u8 tlp_retrans:1; /* 2761: 2 1 */ u8 syn_ect_snt:2; /* 2761: 3 1 */ u8 syn_ect_rcv:2; /* 2761: 5 1 */ u8 thin_lto:1; /* 2761: 7 1 */ u8 fastopen_connect:1; /* 2762: 0 1 */ u8 fastopen_no_cookie:1; /* 2762: 1 1 */ u8 fastopen_client_fail:2; /* 2762: 2 1 */ u8 frto:1; /* 2762: 4 1 */ /* XXX 3 bits hole, try to pack */ [...] u8 keepalive_probes; /* 2765 1 */ u8 accecn_fail_mode:4; /* 2766: 0 1 */ /* XXX 4 bits hole, try to pack */ /* XXX 1 byte hole, try to pack */ [...] /* size: 3200, cachelines: 50, members: 166 */ } Signed-off-by: Ilpo Järvinen Co-developed-by: Olivier Tilmans Signed-off-by: Olivier Tilmans Co-developed-by: Chia-Yu Chang Signed-off-by: Chia-Yu Chang Acked-by: Paolo Abeni Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250916082434.100722-3-chia-yu.chang@nokia-bell-labs.com Signed-off-by: Paolo Abeni --- include/linux/tcp.h | 8 +- include/net/tcp.h | 1 + include/net/tcp_ecn.h | 310 ++++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 296 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 90cee6e53527..b8432bed546d 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -168,6 +168,10 @@ struct tcp_request_sock { * after data-in-SYN. */ u8 syn_tos; + bool accecn_ok; + u8 syn_ect_snt: 2, + syn_ect_rcv: 2, + accecn_fail_mode:4; #ifdef CONFIG_TCP_AO u8 ao_keyid; u8 ao_rcv_next; @@ -375,7 +379,8 @@ struct tcp_sock { u8 compressed_ack; u8 dup_ack_counter:2, tlp_retrans:1, /* TLP is a retransmission */ - unused:5; + syn_ect_snt:2, /* AccECN ECT memory, only */ + syn_ect_rcv:2; /* ... needed during 3WHS + first seqno */ u8 thin_lto : 1,/* Use linear timeouts for thin streams */ fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */ fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */ @@ -391,6 +396,7 @@ struct tcp_sock { syn_fastopen_child:1; /* created TFO passive child socket */ u8 keepalive_probes; /* num of allowed keep alive probes */ + u8 accecn_fail_mode:4; /* AccECN failure handling */ u32 tcp_tx_delay; /* delay (in usec) added to TX packets */ /* RTT measurement */ diff --git a/include/net/tcp.h b/include/net/tcp.h index bc5159fe842e..da8c6640ead3 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -972,6 +972,7 @@ static inline u32 tcp_rsk_tsval(const struct tcp_request_sock *treq) #define TCPHDR_ACE (TCPHDR_ECE | TCPHDR_CWR | TCPHDR_AE) #define TCPHDR_SYN_ECN (TCPHDR_SYN | TCPHDR_ECE | TCPHDR_CWR) +#define TCPHDR_SYNACK_ACCECN (TCPHDR_SYN | TCPHDR_ACK | TCPHDR_CWR) #define TCP_ACCECN_CEP_ACE_MASK 0x7 #define TCP_ACCECN_ACE_MAX_DELTA 6 diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h index b0ed89dbad41..da0b355418bd 100644 --- a/include/net/tcp_ecn.h +++ b/include/net/tcp_ecn.h @@ -4,12 +4,26 @@ #include #include +#include #include #include #include #include +/* The highest ECN variant (Accurate ECN, ECN, or no ECN) that is + * attemped to be negotiated and requested for incoming connection + * and outgoing connection, respectively. + */ +enum tcp_ecn_mode { + TCP_ECN_IN_NOECN_OUT_NOECN = 0, + TCP_ECN_IN_ECN_OUT_ECN = 1, + TCP_ECN_IN_ECN_OUT_NOECN = 2, + TCP_ECN_IN_ACCECN_OUT_ACCECN = 3, + TCP_ECN_IN_ACCECN_OUT_ECN = 4, + TCP_ECN_IN_ACCECN_OUT_NOECN = 5, +}; + static inline void tcp_ecn_queue_cwr(struct tcp_sock *tp) { /* Do not set CWR if in AccECN mode! */ @@ -39,19 +53,125 @@ static inline void tcp_ecn_withdraw_cwr(struct tcp_sock *tp) tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR; } +/* tp->accecn_fail_mode */ +#define TCP_ACCECN_ACE_FAIL_SEND BIT(0) +#define TCP_ACCECN_ACE_FAIL_RECV BIT(1) +#define TCP_ACCECN_OPT_FAIL_SEND BIT(2) +#define TCP_ACCECN_OPT_FAIL_RECV BIT(3) + +static inline bool tcp_accecn_ace_fail_send(const struct tcp_sock *tp) +{ + return tp->accecn_fail_mode & TCP_ACCECN_ACE_FAIL_SEND; +} + +static inline bool tcp_accecn_ace_fail_recv(const struct tcp_sock *tp) +{ + return tp->accecn_fail_mode & TCP_ACCECN_ACE_FAIL_RECV; +} + +static inline bool tcp_accecn_opt_fail_send(const struct tcp_sock *tp) +{ + return tp->accecn_fail_mode & TCP_ACCECN_OPT_FAIL_SEND; +} + +static inline bool tcp_accecn_opt_fail_recv(const struct tcp_sock *tp) +{ + return tp->accecn_fail_mode & TCP_ACCECN_OPT_FAIL_RECV; +} + +static inline void tcp_accecn_fail_mode_set(struct tcp_sock *tp, u8 mode) +{ + tp->accecn_fail_mode |= mode; +} + static inline u8 tcp_accecn_ace(const struct tcphdr *th) { return (th->ae << 2) | (th->cwr << 1) | th->ece; } -static inline void tcp_accecn_init_counters(struct tcp_sock *tp) +/* Infer the ECT value our SYN arrived with from the echoed ACE field */ +static inline int tcp_accecn_extract_syn_ect(u8 ace) { - tp->received_ce = 0; - tp->received_ce_pending = 0; + /* Below is an excerpt from the 1st block of Table 2 of AccECN spec */ + static const int ace_to_ecn[8] = { + INET_ECN_ECT_0, /* 0b000 (Undefined) */ + INET_ECN_ECT_1, /* 0b001 (Undefined) */ + INET_ECN_NOT_ECT, /* 0b010 (Not-ECT is received) */ + INET_ECN_ECT_1, /* 0b011 (ECT-1 is received) */ + INET_ECN_ECT_0, /* 0b100 (ECT-0 is received) */ + INET_ECN_ECT_1, /* 0b101 (Reserved) */ + INET_ECN_CE, /* 0b110 (CE is received) */ + INET_ECN_ECT_1 /* 0b111 (Undefined) */ + }; + + return ace_to_ecn[ace & 0x7]; +} + +/* Check ECN field transition to detect invalid transitions */ +static inline bool tcp_ect_transition_valid(u8 snt, u8 rcv) +{ + if (rcv == snt) + return true; + + /* Non-ECT altered to something or something became non-ECT */ + if (snt == INET_ECN_NOT_ECT || rcv == INET_ECN_NOT_ECT) + return false; + /* CE -> ECT(0/1)? */ + if (snt == INET_ECN_CE) + return false; + return true; +} + +static inline bool tcp_accecn_validate_syn_feedback(struct sock *sk, u8 ace, + u8 sent_ect) +{ + u8 ect = tcp_accecn_extract_syn_ect(ace); + struct tcp_sock *tp = tcp_sk(sk); + + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)) + return true; + + if (!tcp_ect_transition_valid(sent_ect, ect)) { + tcp_accecn_fail_mode_set(tp, TCP_ACCECN_ACE_FAIL_RECV); + return false; + } + + return true; +} + +/* Validate the 3rd ACK based on the ACE field, see Table 4 of AccECN spec */ +static inline void tcp_accecn_third_ack(struct sock *sk, + const struct sk_buff *skb, u8 sent_ect) +{ + u8 ace = tcp_accecn_ace(tcp_hdr(skb)); + struct tcp_sock *tp = tcp_sk(sk); + + switch (ace) { + case 0x0: + /* Invalid value */ + tcp_accecn_fail_mode_set(tp, TCP_ACCECN_ACE_FAIL_RECV); + break; + case 0x7: + case 0x5: + case 0x1: + /* Unused but legal values */ + break; + default: + /* Validation only applies to first non-data packet */ + if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq && + !TCP_SKB_CB(skb)->sacked && + tcp_accecn_validate_syn_feedback(sk, ace, sent_ect)) { + if ((tcp_accecn_extract_syn_ect(ace) == INET_ECN_CE) && + !tp->delivered_ce) + tp->delivered_ce++; + } + break; + } } /* Updates Accurate ECN received counters from the received IP ECN field */ -static inline void tcp_ecn_received_counters(struct sock *sk, const struct sk_buff *skb) +static inline void tcp_ecn_received_counters(struct sock *sk, + const struct sk_buff *skb) { u8 ecnfield = TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK; u8 is_ce = INET_ECN_is_ce(ecnfield); @@ -74,27 +194,152 @@ static inline void tcp_ecn_received_counters(struct sock *sk, const struct sk_bu } } -static inline void tcp_accecn_set_ace(struct tcphdr *th, struct tcp_sock *tp) +/* AccECN specification, 5.1: [...] a server can determine that it + * negotiated AccECN as [...] if the ACK contains an ACE field with + * the value 0b010 to 0b111 (decimal 2 to 7). + */ +static inline bool cookie_accecn_ok(const struct tcphdr *th) { - u32 wire_ace; + return tcp_accecn_ace(th) > 0x1; +} + +/* Used to form the ACE flags for SYN/ACK */ +static inline u16 tcp_accecn_reflector_flags(u8 ect) +{ + /* TCP ACE flags of SYN/ACK are set based on IP-ECN received from SYN. + * Below is an excerpt from the 1st block of Table 2 of AccECN spec, + * in which TCP ACE flags are encoded as: (AE << 2) | (CWR << 1) | ECE + */ + static const u8 ecn_to_ace_flags[4] = { + 0b010, /* Not-ECT is received */ + 0b011, /* ECT(1) is received */ + 0b100, /* ECT(0) is received */ + 0b110 /* CE is received */ + }; + + return FIELD_PREP(TCPHDR_ACE, ecn_to_ace_flags[ect & 0x3]); +} - wire_ace = tp->received_ce + TCP_ACCECN_CEP_INIT_OFFSET; - th->ece = !!(wire_ace & 0x1); - th->cwr = !!(wire_ace & 0x2); - th->ae = !!(wire_ace & 0x4); +/* AccECN specification, 3.1.2: If a TCP server that implements AccECN + * receives a SYN with the three TCP header flags (AE, CWR and ECE) set + * to any combination other than 000, 011 or 111, it MUST negotiate the + * use of AccECN as if they had been set to 111. + */ +static inline bool tcp_accecn_syn_requested(const struct tcphdr *th) +{ + u8 ace = tcp_accecn_ace(th); + + return ace && ace != 0x3; +} + +static inline void tcp_accecn_init_counters(struct tcp_sock *tp) +{ + tp->received_ce = 0; tp->received_ce_pending = 0; } -static inline void tcp_ecn_rcv_synack(struct tcp_sock *tp, - const struct tcphdr *th) +/* Used for make_synack to form the ACE flags */ +static inline void tcp_accecn_echo_syn_ect(struct tcphdr *th, u8 ect) { - if (tcp_ecn_mode_rfc3168(tp) && (!th->ece || th->cwr)) + /* TCP ACE flags of SYN/ACK are set based on IP-ECN codepoint received + * from SYN. Below is an excerpt from Table 2 of the AccECN spec: + * +====================+====================================+ + * | IP-ECN codepoint | Respective ACE falgs on SYN/ACK | + * | received on SYN | AE CWR ECE | + * +====================+====================================+ + * | Not-ECT | 0 1 0 | + * | ECT(1) | 0 1 1 | + * | ECT(0) | 1 0 0 | + * | CE | 1 1 0 | + * +====================+====================================+ + */ + th->ae = !!(ect & INET_ECN_ECT_0); + th->cwr = ect != INET_ECN_ECT_0; + th->ece = ect == INET_ECN_ECT_1; +} + +static inline void tcp_accecn_set_ace(struct tcp_sock *tp, struct sk_buff *skb, + struct tcphdr *th) +{ + u32 wire_ace; + + /* The final packet of the 3WHS or anything like it must reflect + * the SYN/ACK ECT instead of putting CEP into ACE field, such + * case show up in tcp_flags. + */ + if (likely(!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACE))) { + wire_ace = tp->received_ce + TCP_ACCECN_CEP_INIT_OFFSET; + th->ece = !!(wire_ace & 0x1); + th->cwr = !!(wire_ace & 0x2); + th->ae = !!(wire_ace & 0x4); + tp->received_ce_pending = 0; + } +} + +/* See Table 2 of the AccECN draft */ +static inline void tcp_ecn_rcv_synack(struct sock *sk, const struct tcphdr *th, + u8 ip_dsfield) +{ + struct tcp_sock *tp = tcp_sk(sk); + u8 ace = tcp_accecn_ace(th); + + switch (ace) { + case 0x0: + case 0x7: + /* +========+========+============+=============+ + * | A | B | SYN/ACK | Feedback | + * | | | B->A | Mode of A | + * | | | AE CWR ECE | | + * +========+========+============+=============+ + * | AccECN | No ECN | 0 0 0 | Not ECN | + * | AccECN | Broken | 1 1 1 | Not ECN | + * +========+========+============+=============+ + */ tcp_ecn_mode_set(tp, TCP_ECN_DISABLED); + break; + case 0x1: + case 0x5: + /* +========+========+============+=============+ + * | A | B | SYN/ACK | Feedback | + * | | | B->A | Mode of A | + * | | | AE CWR ECE | | + * +========+========+============+=============+ + * | AccECN | Nonce | 1 0 1 | (Reserved) | + * | AccECN | ECN | 0 0 1 | Classic ECN | + * | Nonce | AccECN | 0 0 1 | Classic ECN | + * | ECN | AccECN | 0 0 1 | Classic ECN | + * +========+========+============+=============+ + */ + if (tcp_ecn_mode_pending(tp)) + /* Downgrade from AccECN, or requested initially */ + tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168); + break; + default: + tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN); + tp->syn_ect_rcv = ip_dsfield & INET_ECN_MASK; + if (INET_ECN_is_ce(ip_dsfield) && + tcp_accecn_validate_syn_feedback(sk, ace, + tp->syn_ect_snt)) { + tp->received_ce++; + tp->received_ce_pending++; + } + break; + } } -static inline void tcp_ecn_rcv_syn(struct tcp_sock *tp, - const struct tcphdr *th) +static inline void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th, + const struct sk_buff *skb) { + if (tcp_ecn_mode_pending(tp)) { + if (!tcp_accecn_syn_requested(th)) { + /* Downgrade to classic ECN feedback */ + tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168); + } else { + tp->syn_ect_rcv = TCP_SKB_CB(skb)->ip_dsfield & + INET_ECN_MASK; + tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN); + } + } if (tcp_ecn_mode_rfc3168(tp) && (!th->ece || !th->cwr)) tcp_ecn_mode_set(tp, TCP_ECN_DISABLED); } @@ -110,7 +355,7 @@ static inline bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, /* Packet ECN state for a SYN-ACK */ static inline void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb) { - const struct tcp_sock *tp = tcp_sk(sk); + struct tcp_sock *tp = tcp_sk(sk); TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR; if (tcp_ecn_disabled(tp)) @@ -118,6 +363,13 @@ static inline void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb) else if (tcp_ca_needs_ecn(sk) || tcp_bpf_ca_needs_ecn(sk)) INET_ECN_xmit(sk); + + if (tp->ecn_flags & TCP_ECN_MODE_ACCECN) { + TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ACE; + TCP_SKB_CB(skb)->tcp_flags |= + tcp_accecn_reflector_flags(tp->syn_ect_rcv); + tp->syn_ect_snt = inet_sk(sk)->tos & INET_ECN_MASK; + } } /* Packet ECN state for a SYN. */ @@ -125,8 +377,13 @@ static inline void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk); - bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 || - tcp_ca_needs_ecn(sk) || bpf_needs_ecn; + bool use_ecn, use_accecn; + u8 tcp_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn); + + use_accecn = tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ACCECN; + use_ecn = tcp_ecn == TCP_ECN_IN_ECN_OUT_ECN || + tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ECN || + tcp_ca_needs_ecn(sk) || bpf_needs_ecn || use_accecn; if (!use_ecn) { const struct dst_entry *dst = __sk_dst_get(sk); @@ -142,23 +399,32 @@ static inline void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) INET_ECN_xmit(sk); TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR; - tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168); + if (use_accecn) { + TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_AE; + tcp_ecn_mode_set(tp, TCP_ECN_MODE_PENDING); + tp->syn_ect_snt = inet_sk(sk)->tos & INET_ECN_MASK; + } else { + tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168); + } } } static inline void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb) { - if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)) + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)) { /* tp->ecn_flags are cleared at a later point in time when * SYN ACK is ultimatively being received. */ - TCP_SKB_CB(skb)->tcp_flags &= ~(TCPHDR_ECE | TCPHDR_CWR); + TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ACE; + } } static inline void tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th) { - if (inet_rsk(req)->ecn_ok) + if (tcp_rsk(req)->accecn_ok) + tcp_accecn_echo_syn_ect(th, tcp_rsk(req)->syn_ect_rcv); + else if (inet_rsk(req)->ecn_ok) th->ece = 1; } -- cgit v1.2.3 From 9a011277445583bab002fbf5043fab0ea03dc5dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 16 Sep 2025 10:24:27 +0200 Subject: tcp: accecn: add AccECN rx byte counters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These three byte counters track IP ECN field payload byte sums for all arriving (acceptable) packets for ECT0, ECT1, and CE. The AccECN option (added by a later patch in the series) echoes these counters back to sender side; therefore, it is placed within the group of tcp_sock_write_txrx. Below are the pahole outcomes before and after this patch, in which the group size of tcp_sock_write_txrx is increased from 95 + 4 to 107 + 4 and an extra 4-byte hole is created but will be exploited in later patches: [BEFORE THIS PATCH] struct tcp_sock { [...] u32 delivered_ce; /* 2576 4 */ u32 received_ce; /* 2580 4 */ u32 app_limited; /* 2584 4 */ u32 rcv_wnd; /* 2588 4 */ struct tcp_options_received rx_opt; /* 2592 24 */ __cacheline_group_end__tcp_sock_write_txrx[0]; /* 2616 0 */ [...] /* size: 3200, cachelines: 50, members: 166 */ } [AFTER THIS PATCH] struct tcp_sock { [...] u32 delivered_ce; /* 2576 4 */ u32 received_ce; /* 2580 4 */ u32 received_ecn_bytes[3];/* 2584 12 */ u32 app_limited; /* 2596 4 */ u32 rcv_wnd; /* 2600 4 */ struct tcp_options_received rx_opt; /* 2604 24 */ __cacheline_group_end__tcp_sock_write_txrx[0]; /* 2628 0 */ /* XXX 4 bytes hole, try to pack */ [...] /* size: 3200, cachelines: 50, members: 167 */ } Signed-off-by: Ilpo Järvinen Signed-off-by: Neal Cardwell Co-developed-by: Chia-Yu Chang Signed-off-by: Chia-Yu Chang Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250916082434.100722-4-chia-yu.chang@nokia-bell-labs.com Signed-off-by: Paolo Abeni --- include/linux/tcp.h | 4 ++++ include/net/tcp_ecn.h | 29 ++++++++++++++++++++++++++++- 2 files changed, 32 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index b8432bed546d..012d01347b3c 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -306,6 +306,10 @@ struct tcp_sock { u32 delivered; /* Total data packets delivered incl. rexmits */ u32 delivered_ce; /* Like the above but only ECE marked packets */ u32 received_ce; /* Like the above but for rcvd CE marked pkts */ + u32 received_ecn_bytes[3]; /* received byte counters for three ECN + * types: INET_ECN_ECT_1, INET_ECN_ECT_0, + * and INET_ECN_CE + */ u32 app_limited; /* limited until "delivered" reaches this val */ u32 rcv_wnd; /* Current receiver window */ /* diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h index da0b355418bd..1a41a459aa07 100644 --- a/include/net/tcp_ecn.h +++ b/include/net/tcp_ecn.h @@ -171,7 +171,7 @@ static inline void tcp_accecn_third_ack(struct sock *sk, /* Updates Accurate ECN received counters from the received IP ECN field */ static inline void tcp_ecn_received_counters(struct sock *sk, - const struct sk_buff *skb) + const struct sk_buff *skb, u32 len) { u8 ecnfield = TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK; u8 is_ce = INET_ECN_is_ce(ecnfield); @@ -191,9 +191,24 @@ static inline void tcp_ecn_received_counters(struct sock *sk, tp->received_ce += pcount; tp->received_ce_pending = min(tp->received_ce_pending + pcount, 0xfU); + + if (len > 0) + tp->received_ecn_bytes[ecnfield - 1] += len; } } +/* AccECN specification, 2.2: [...] A Data Receiver maintains four counters + * initialized at the start of the half-connection. [...] These byte counters + * reflect only the TCP payload length, excluding TCP header and TCP options. + */ +static inline void tcp_ecn_received_counters_payload(struct sock *sk, + const struct sk_buff *skb) +{ + const struct tcphdr *th = (const struct tcphdr *)skb->data; + + tcp_ecn_received_counters(sk, skb, skb->len - th->doff * 4); +} + /* AccECN specification, 5.1: [...] a server can determine that it * negotiated AccECN as [...] if the ACK contains an ACE field with * the value 0b010 to 0b111 (decimal 2 to 7). @@ -232,10 +247,22 @@ static inline bool tcp_accecn_syn_requested(const struct tcphdr *th) return ace && ace != 0x3; } +static inline void __tcp_accecn_init_bytes_counters(int *counter_array) +{ + BUILD_BUG_ON(INET_ECN_ECT_1 != 0x1); + BUILD_BUG_ON(INET_ECN_ECT_0 != 0x2); + BUILD_BUG_ON(INET_ECN_CE != 0x3); + + counter_array[INET_ECN_ECT_1 - 1] = 0; + counter_array[INET_ECN_ECT_0 - 1] = 0; + counter_array[INET_ECN_CE - 1] = 0; +} + static inline void tcp_accecn_init_counters(struct tcp_sock *tp) { tp->received_ce = 0; tp->received_ce_pending = 0; + __tcp_accecn_init_bytes_counters(tp->received_ecn_bytes); } /* Used for make_synack to form the ACE flags */ -- cgit v1.2.3 From b5e74132dfbe60329b3ff0e5c485039f2e31605c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 16 Sep 2025 10:24:30 +0200 Subject: tcp: accecn: AccECN option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Accurate ECN allows echoing back the sum of bytes for each IP ECN field value in the received packets using AccECN option. This change implements AccECN option tx & rx side processing without option send control related features that are added by a later change. Based on specification: https://tools.ietf.org/id/draft-ietf-tcpm-accurate-ecn-28.txt (Some features of the spec will be added in the later changes rather than in this one). A full-length AccECN option is always attempted but if it does not fit, the minimum length is selected based on the counters that have changed since the last update. The AccECN option (with 24-bit fields) often ends in odd sizes so the option write code tries to take advantage of some nop used to pad the other TCP options. The delivered_ecn_bytes pairs with received_ecn_bytes similar to how delivered_ce pairs with received_ce. In contrast to ACE field, however, the option is not always available to update delivered_ecn_bytes. For ACK w/o AccECN option, the delivered bytes calculated based on the cumulative ACK+SACK information are assigned to one of the counters using an estimation heuristic to select the most likely ECN byte counter. Any estimation error is corrected when the next AccECN option arrives. It may occur that the heuristic gets too confused when there are enough different byte counter deltas between ACKs with the AccECN option in which case the heuristic just gives up on updating the counters for a while. tcp_ecn_option sysctl can be used to select option sending mode for AccECN: TCP_ECN_OPTION_DISABLED, TCP_ECN_OPTION_MINIMUM, and TCP_ECN_OPTION_FULL. This patch increases the size of tcp_info struct, as there is no existing holes for new u32 variables. Below are the pahole outcomes before and after this patch: [BEFORE THIS PATCH] struct tcp_info { [...] __u32 tcpi_total_rto_time; /* 244 4 */ /* size: 248, cachelines: 4, members: 61 */ } [AFTER THIS PATCH] struct tcp_info { [...] __u32 tcpi_total_rto_time; /* 244 4 */ __u32 tcpi_received_ce; /* 248 4 */ __u32 tcpi_delivered_e1_bytes; /* 252 4 */ __u32 tcpi_delivered_e0_bytes; /* 256 4 */ __u32 tcpi_delivered_ce_bytes; /* 260 4 */ __u32 tcpi_received_e1_bytes; /* 264 4 */ __u32 tcpi_received_e0_bytes; /* 268 4 */ __u32 tcpi_received_ce_bytes; /* 272 4 */ /* size: 280, cachelines: 5, members: 68 */ } This patch uses the existing 1-byte holes in the tcp_sock_write_txrx group for new u8 members, but adds a 4-byte hole in tcp_sock_write_rx group after the new u32 delivered_ecn_bytes[3] member. Therefore, the group size of tcp_sock_write_rx is increased from 96 to 112. Below are the pahole outcomes before and after this patch: [BEFORE THIS PATCH] struct tcp_sock { [...] u8 received_ce_pending:4; /* 2522: 0 1 */ u8 unused2:4; /* 2522: 4 1 */ /* XXX 1 byte hole, try to pack */ [...] u32 rcv_rtt_last_tsecr; /* 2668 4 */ [...] __cacheline_group_end__tcp_sock_write_rx[0]; /* 2728 0 */ [...] /* size: 3200, cachelines: 50, members: 167 */ } [AFTER THIS PATCH] struct tcp_sock { [...] u8 received_ce_pending:4;/* 2522: 0 1 */ u8 unused2:4; /* 2522: 4 1 */ u8 accecn_minlen:2; /* 2523: 0 1 */ u8 est_ecnfield:2; /* 2523: 2 1 */ u8 unused3:4; /* 2523: 4 1 */ [...] u32 rcv_rtt_last_tsecr; /* 2668 4 */ u32 delivered_ecn_bytes[3];/* 2672 12 */ /* XXX 4 bytes hole, try to pack */ [...] __cacheline_group_end__tcp_sock_write_rx[0]; /* 2744 0 */ [...] /* size: 3200, cachelines: 50, members: 171 */ } Signed-off-by: Ilpo Järvinen Signed-off-by: Neal Cardwell Co-developed-by: Chia-Yu Chang Signed-off-by: Chia-Yu Chang Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250916082434.100722-7-chia-yu.chang@nokia-bell-labs.com Signed-off-by: Paolo Abeni --- include/linux/tcp.h | 9 +++-- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 13 +++++++ include/net/tcp_ecn.h | 89 +++++++++++++++++++++++++++++++++++++++++++++++- include/uapi/linux/tcp.h | 7 ++++ 5 files changed, 116 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 012d01347b3c..73557656cb2d 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -122,8 +122,9 @@ struct tcp_options_received { smc_ok : 1, /* SMC seen on SYN packet */ snd_wscale : 4, /* Window scaling received from sender */ rcv_wscale : 4; /* Window scaling to send to receiver */ - u8 saw_unknown:1, /* Received unknown option */ - unused:7; + u8 accecn:6, /* AccECN index in header, 0=no options */ + saw_unknown:1, /* Received unknown option */ + unused:1; u8 num_sacks; /* Number of SACK blocks */ u16 user_mss; /* mss requested by user in ioctl */ u16 mss_clamp; /* Maximal mss, negotiated at connection setup */ @@ -293,6 +294,9 @@ struct tcp_sock { rate_app_limited:1; /* rate_{delivered,interval_us} limited? */ u8 received_ce_pending:4, /* Not yet transmit cnt of received_ce */ unused2:4; + u8 accecn_minlen:2,/* Minimum length of AccECN option sent */ + est_ecnfield:2,/* ECN field for AccECN delivered estimates */ + unused3:4; __be32 pred_flags; u64 tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */ u64 tcp_mstamp; /* most recent packet received/sent */ @@ -337,6 +341,7 @@ struct tcp_sock { u32 rate_delivered; /* saved rate sample: packets delivered */ u32 rate_interval_us; /* saved rate sample: time elapsed */ u32 rcv_rtt_last_tsecr; + u32 delivered_ecn_bytes[3]; u64 first_tx_mstamp; /* start of window send phase */ u64 delivered_mstamp; /* time we reached "delivered" */ u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 54a7d187f62a..acbb7dd497e1 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -148,6 +148,7 @@ struct netns_ipv4 { struct local_ports ip_local_ports; u8 sysctl_tcp_ecn; + u8 sysctl_tcp_ecn_option; u8 sysctl_tcp_ecn_fallback; u8 sysctl_ip_default_ttl; diff --git a/include/net/tcp.h b/include/net/tcp.h index da8c6640ead3..6be29129465e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -213,6 +213,8 @@ static_assert((1 << ATO_BITS) > TCP_DELACK_MAX); #define TCPOPT_AO 29 /* Authentication Option (RFC5925) */ #define TCPOPT_MPTCP 30 /* Multipath TCP (RFC6824) */ #define TCPOPT_FASTOPEN 34 /* Fast open (RFC7413) */ +#define TCPOPT_ACCECN0 172 /* 0xAC: Accurate ECN Order 0 */ +#define TCPOPT_ACCECN1 174 /* 0xAE: Accurate ECN Order 1 */ #define TCPOPT_EXP 254 /* Experimental */ /* Magic number to be after the option value for sharing TCP * experimental options. See draft-ietf-tcpm-experimental-options-00.txt @@ -230,6 +232,7 @@ static_assert((1 << ATO_BITS) > TCP_DELACK_MAX); #define TCPOLEN_TIMESTAMP 10 #define TCPOLEN_MD5SIG 18 #define TCPOLEN_FASTOPEN_BASE 2 +#define TCPOLEN_ACCECN_BASE 2 #define TCPOLEN_EXP_FASTOPEN_BASE 4 #define TCPOLEN_EXP_SMC_BASE 6 @@ -243,6 +246,13 @@ static_assert((1 << ATO_BITS) > TCP_DELACK_MAX); #define TCPOLEN_MD5SIG_ALIGNED 20 #define TCPOLEN_MSS_ALIGNED 4 #define TCPOLEN_EXP_SMC_BASE_ALIGNED 8 +#define TCPOLEN_ACCECN_PERFIELD 3 + +/* Maximum number of byte counters in AccECN option + size */ +#define TCP_ACCECN_NUMFIELDS 3 +#define TCP_ACCECN_MAXSIZE (TCPOLEN_ACCECN_BASE + \ + TCPOLEN_ACCECN_PERFIELD * \ + TCP_ACCECN_NUMFIELDS) /* Flags in tp->nonagle */ #define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */ @@ -981,6 +991,9 @@ static inline u32 tcp_rsk_tsval(const struct tcp_request_sock *treq) * See draft-ietf-tcpm-accurate-ecn for the latest values. */ #define TCP_ACCECN_CEP_INIT_OFFSET 5 +#define TCP_ACCECN_E1B_INIT_OFFSET 1 +#define TCP_ACCECN_E0B_INIT_OFFSET 1 +#define TCP_ACCECN_CEB_INIT_OFFSET 0 /* State flags for sacked in struct tcp_skb_cb */ enum tcp_skb_cb_sacked_flags { diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h index 1a41a459aa07..08c7f4757e4e 100644 --- a/include/net/tcp_ecn.h +++ b/include/net/tcp_ecn.h @@ -24,6 +24,13 @@ enum tcp_ecn_mode { TCP_ECN_IN_ACCECN_OUT_NOECN = 5, }; +/* AccECN option sending when AccECN has been successfully negotiated */ +enum tcp_accecn_option { + TCP_ACCECN_OPTION_DISABLED = 0, + TCP_ACCECN_OPTION_MINIMUM = 1, + TCP_ACCECN_OPTION_FULL = 2, +}; + static inline void tcp_ecn_queue_cwr(struct tcp_sock *tp) { /* Do not set CWR if in AccECN mode! */ @@ -169,6 +176,79 @@ static inline void tcp_accecn_third_ack(struct sock *sk, } } +/* Maps IP ECN field ECT/CE code point to AccECN option field number, given + * we are sending fields with Accurate ECN Order 1: ECT(1), CE, ECT(0). + */ +static inline u8 tcp_ecnfield_to_accecn_optfield(u8 ecnfield) +{ + switch (ecnfield & INET_ECN_MASK) { + case INET_ECN_NOT_ECT: + return 0; /* AccECN does not send counts of NOT_ECT */ + case INET_ECN_ECT_1: + return 1; + case INET_ECN_CE: + return 2; + case INET_ECN_ECT_0: + return 3; + } + return 0; +} + +/* Maps IP ECN field ECT/CE code point to AccECN option field value offset. + * Some fields do not start from zero, to detect zeroing by middleboxes. + */ +static inline u32 tcp_accecn_field_init_offset(u8 ecnfield) +{ + switch (ecnfield & INET_ECN_MASK) { + case INET_ECN_NOT_ECT: + return 0; /* AccECN does not send counts of NOT_ECT */ + case INET_ECN_ECT_1: + return TCP_ACCECN_E1B_INIT_OFFSET; + case INET_ECN_CE: + return TCP_ACCECN_CEB_INIT_OFFSET; + case INET_ECN_ECT_0: + return TCP_ACCECN_E0B_INIT_OFFSET; + } + return 0; +} + +/* Maps AccECN option field #nr to IP ECN field ECT/CE bits */ +static inline unsigned int tcp_accecn_optfield_to_ecnfield(unsigned int option, + bool order) +{ + /* Based on Table 5 of the AccECN spec to map (option, order) to + * the corresponding ECN conuters (ECT-1, ECT-0, or CE). + */ + static const u8 optfield_lookup[2][3] = { + /* order = 0: 1st field ECT-0, 2nd field CE, 3rd field ECT-1 */ + { INET_ECN_ECT_0, INET_ECN_CE, INET_ECN_ECT_1 }, + /* order = 1: 1st field ECT-1, 2nd field CE, 3rd field ECT-0 */ + { INET_ECN_ECT_1, INET_ECN_CE, INET_ECN_ECT_0 } + }; + + return optfield_lookup[order][option % 3]; +} + +/* Handles AccECN option ECT and CE 24-bit byte counters update into + * the u32 value in tcp_sock. As we're processing TCP options, it is + * safe to access from - 1. + */ +static inline s32 tcp_update_ecn_bytes(u32 *cnt, const char *from, + u32 init_offset) +{ + u32 truncated = (get_unaligned_be32(from - 1) - init_offset) & + 0xFFFFFFU; + u32 delta = (truncated - *cnt) & 0xFFFFFFU; + + /* If delta has the highest bit set (24th bit) indicating + * negative, sign extend to correct an estimation using + * sign_extend32(delta, 24 - 1) + */ + delta = sign_extend32(delta, 23); + *cnt += delta; + return (s32)delta; +} + /* Updates Accurate ECN received counters from the received IP ECN field */ static inline void tcp_ecn_received_counters(struct sock *sk, const struct sk_buff *skb, u32 len) @@ -192,8 +272,12 @@ static inline void tcp_ecn_received_counters(struct sock *sk, tp->received_ce_pending = min(tp->received_ce_pending + pcount, 0xfU); - if (len > 0) + if (len > 0) { + u8 minlen = tcp_ecnfield_to_accecn_optfield(ecnfield); tp->received_ecn_bytes[ecnfield - 1] += len; + tp->accecn_minlen = max_t(u8, tp->accecn_minlen, + minlen); + } } } @@ -263,6 +347,9 @@ static inline void tcp_accecn_init_counters(struct tcp_sock *tp) tp->received_ce = 0; tp->received_ce_pending = 0; __tcp_accecn_init_bytes_counters(tp->received_ecn_bytes); + __tcp_accecn_init_bytes_counters(tp->delivered_ecn_bytes); + tp->accecn_minlen = 0; + tp->est_ecnfield = 0; } /* Used for make_synack to form the ACE flags */ diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index bdac8c42fa82..53e0e85b52be 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -316,6 +316,13 @@ struct tcp_info { * in milliseconds, including any * unfinished recovery. */ + __u32 tcpi_received_ce; /* # of CE marks received */ + __u32 tcpi_delivered_e1_bytes; /* Accurate ECN byte counters */ + __u32 tcpi_delivered_e0_bytes; + __u32 tcpi_delivered_ce_bytes; + __u32 tcpi_received_e1_bytes; + __u32 tcpi_received_e0_bytes; + __u32 tcpi_received_ce_bytes; }; /* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */ -- cgit v1.2.3 From aa55a7dde7ec506bb23448a5005ae3f4f809d022 Mon Sep 17 00:00:00 2001 From: Chia-Yu Chang Date: Tue, 16 Sep 2025 10:24:31 +0200 Subject: tcp: accecn: AccECN option send control MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of sending the option in every ACK, limit sending to those ACKs where the option is necessary: - Handshake - "Change-triggered ACK" + the ACK following it. The 2nd ACK is necessary to unambiguously indicate which of the ECN byte counters in increasing. The first ACK has two counters increasing due to the ecnfield edge. - ACKs with CE to allow CEP delta validations to take advantage of the option. - Force option to be sent every at least once per 2^22 bytes. The check is done using the bit edges of the byte counters (avoids need for extra variables). - AccECN option beacon to send a few times per RTT even if nothing in the ECN state requires that. The default is 3 times per RTT, and its period can be set via sysctl_tcp_ecn_option_beacon. Below are the pahole outcomes before and after this patch, in which the group size of tcp_sock_write_tx is increased from 89 to 97 due to the new u64 accecn_opt_tstamp member: [BEFORE THIS PATCH] struct tcp_sock { [...] u64 tcp_wstamp_ns; /* 2488 8 */ struct list_head tsorted_sent_queue; /* 2496 16 */ [...] __cacheline_group_end__tcp_sock_write_tx[0]; /* 2521 0 */ __cacheline_group_begin__tcp_sock_write_txrx[0]; /* 2521 0 */ u8 nonagle:4; /* 2521: 0 1 */ u8 rate_app_limited:1; /* 2521: 4 1 */ /* XXX 3 bits hole, try to pack */ /* Force alignment to the next boundary: */ u8 :0; u8 received_ce_pending:4;/* 2522: 0 1 */ u8 unused2:4; /* 2522: 4 1 */ u8 accecn_minlen:2; /* 2523: 0 1 */ u8 est_ecnfield:2; /* 2523: 2 1 */ u8 unused3:4; /* 2523: 4 1 */ [...] __cacheline_group_end__tcp_sock_write_txrx[0]; /* 2628 0 */ [...] /* size: 3200, cachelines: 50, members: 171 */ } [AFTER THIS PATCH] struct tcp_sock { [...] u64 tcp_wstamp_ns; /* 2488 8 */ u64 accecn_opt_tstamp; /* 2596 8 */ struct list_head tsorted_sent_queue; /* 2504 16 */ [...] __cacheline_group_end__tcp_sock_write_tx[0]; /* 2529 0 */ __cacheline_group_begin__tcp_sock_write_txrx[0]; /* 2529 0 */ u8 nonagle:4; /* 2529: 0 1 */ u8 rate_app_limited:1; /* 2529: 4 1 */ /* XXX 3 bits hole, try to pack */ /* Force alignment to the next boundary: */ u8 :0; u8 received_ce_pending:4;/* 2530: 0 1 */ u8 unused2:4; /* 2530: 4 1 */ u8 accecn_minlen:2; /* 2531: 0 1 */ u8 est_ecnfield:2; /* 2531: 2 1 */ u8 accecn_opt_demand:2; /* 2531: 4 1 */ u8 prev_ecnfield:2; /* 2531: 6 1 */ [...] __cacheline_group_end__tcp_sock_write_txrx[0]; /* 2636 0 */ [...] /* size: 3200, cachelines: 50, members: 173 */ } Signed-off-by: Chia-Yu Chang Co-developed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250916082434.100722-8-chia-yu.chang@nokia-bell-labs.com Signed-off-by: Paolo Abeni --- include/linux/tcp.h | 4 +++- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 3 +++ include/net/tcp_ecn.h | 52 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 59 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 73557656cb2d..f637b659b35a 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -275,6 +275,7 @@ struct tcp_sock { u32 mdev_us; /* medium deviation */ u32 rtt_seq; /* sequence number to update rttvar */ u64 tcp_wstamp_ns; /* departure time for next sent data packet */ + u64 accecn_opt_tstamp; /* Last AccECN option sent timestamp */ struct list_head tsorted_sent_queue; /* time-sorted sent but un-SACKed skbs */ struct sk_buff *highest_sack; /* skb just after the highest * skb with SACKed bit set @@ -296,7 +297,8 @@ struct tcp_sock { unused2:4; u8 accecn_minlen:2,/* Minimum length of AccECN option sent */ est_ecnfield:2,/* ECN field for AccECN delivered estimates */ - unused3:4; + accecn_opt_demand:2,/* Demand AccECN option for n next ACKs */ + prev_ecnfield:2; /* ECN bits from the previous segment */ __be32 pred_flags; u64 tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */ u64 tcp_mstamp; /* most recent packet received/sent */ diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index acbb7dd497e1..34eb3aecb3f2 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -149,6 +149,7 @@ struct netns_ipv4 { u8 sysctl_tcp_ecn; u8 sysctl_tcp_ecn_option; + u8 sysctl_tcp_ecn_option_beacon; u8 sysctl_tcp_ecn_fallback; u8 sysctl_ip_default_ttl; diff --git a/include/net/tcp.h b/include/net/tcp.h index 6be29129465e..78dd7b8a4145 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -100,6 +100,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* Maximal number of window scale according to RFC1323 */ #define TCP_MAX_WSCALE 14U +/* Default sending frequency of accurate ECN option per RTT */ +#define TCP_ACCECN_OPTION_BEACON 3 + /* urg_data states */ #define TCP_URG_VALID 0x0100 #define TCP_URG_NOTYET 0x0200 diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h index 08c7f4757e4e..133fb6b79500 100644 --- a/include/net/tcp_ecn.h +++ b/include/net/tcp_ecn.h @@ -176,6 +176,17 @@ static inline void tcp_accecn_third_ack(struct sock *sk, } } +/* Demand the minimum # to send AccECN optnio */ +static inline void tcp_accecn_opt_demand_min(struct sock *sk, + u8 opt_demand_min) +{ + struct tcp_sock *tp = tcp_sk(sk); + u8 opt_demand; + + opt_demand = max_t(u8, opt_demand_min, tp->accecn_opt_demand); + tp->accecn_opt_demand = opt_demand; +} + /* Maps IP ECN field ECT/CE code point to AccECN option field number, given * we are sending fields with Accurate ECN Order 1: ECT(1), CE, ECT(0). */ @@ -256,6 +267,7 @@ static inline void tcp_ecn_received_counters(struct sock *sk, u8 ecnfield = TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK; u8 is_ce = INET_ECN_is_ce(ecnfield); struct tcp_sock *tp = tcp_sk(sk); + bool ecn_edge; if (!INET_ECN_is_not_ect(ecnfield)) { u32 pcount = is_ce * max_t(u16, 1, skb_shinfo(skb)->gso_segs); @@ -274,9 +286,34 @@ static inline void tcp_ecn_received_counters(struct sock *sk, if (len > 0) { u8 minlen = tcp_ecnfield_to_accecn_optfield(ecnfield); + u32 oldbytes = tp->received_ecn_bytes[ecnfield - 1]; + u32 bytes_mask = GENMASK_U32(31, 22); + tp->received_ecn_bytes[ecnfield - 1] += len; tp->accecn_minlen = max_t(u8, tp->accecn_minlen, minlen); + + /* Send AccECN option at least once per 2^22-byte + * increase in any ECN byte counter. + */ + if ((tp->received_ecn_bytes[ecnfield - 1] ^ oldbytes) & + bytes_mask) { + tcp_accecn_opt_demand_min(sk, 1); + } + } + } + + ecn_edge = tp->prev_ecnfield != ecnfield; + if (ecn_edge || is_ce) { + tp->prev_ecnfield = ecnfield; + /* Demand Accurate ECN change-triggered ACKs. Two ACK are + * demanded to indicate unambiguously the ecnfield value + * in the latter ACK. + */ + if (tcp_ecn_mode_accecn(tp)) { + if (ecn_edge) + inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; + tp->accecn_opt_demand = 2; } } } @@ -349,6 +386,7 @@ static inline void tcp_accecn_init_counters(struct tcp_sock *tp) __tcp_accecn_init_bytes_counters(tp->received_ecn_bytes); __tcp_accecn_init_bytes_counters(tp->delivered_ecn_bytes); tp->accecn_minlen = 0; + tp->accecn_opt_demand = 0; tp->est_ecnfield = 0; } @@ -431,6 +469,7 @@ static inline void tcp_ecn_rcv_synack(struct sock *sk, const struct tcphdr *th, default: tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN); tp->syn_ect_rcv = ip_dsfield & INET_ECN_MASK; + tp->accecn_opt_demand = 2; if (INET_ECN_is_ce(ip_dsfield) && tcp_accecn_validate_syn_feedback(sk, ace, tp->syn_ect_snt)) { @@ -451,6 +490,7 @@ static inline void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th, } else { tp->syn_ect_rcv = TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK; + tp->prev_ecnfield = tp->syn_ect_rcv; tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN); } } @@ -542,4 +582,16 @@ tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th) th->ece = 1; } +static inline bool tcp_accecn_option_beacon_check(const struct sock *sk) +{ + u32 ecn_beacon = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_option_beacon); + const struct tcp_sock *tp = tcp_sk(sk); + + if (!ecn_beacon) + return false; + + return tcp_stamp_us_delta(tp->tcp_mstamp, tp->accecn_opt_tstamp) * ecn_beacon >= + (tp->srtt_us >> 3); +} + #endif /* _LINUX_TCP_ECN_H */ -- cgit v1.2.3 From b40671b5ee588c8a61b2d0eacbad32ffc57e9a8f Mon Sep 17 00:00:00 2001 From: Chia-Yu Chang Date: Tue, 16 Sep 2025 10:24:32 +0200 Subject: tcp: accecn: AccECN option failure handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AccECN option may fail in various way, handle these: - Attempt to negotiate the use of AccECN on the 1st retransmitted SYN - From the 2nd retransmitted SYN, stop AccECN negotiation - Remove option from SYN/ACK rexmits to handle blackholes - If no option arrives in SYN/ACK, assume Option is not usable - If an option arrives later, re-enabled - If option is zeroed, disable AccECN option processing This patch use existing padding bits in tcp_request_sock and holes in tcp_sock without increasing the size. Signed-off-by: Ilpo Järvinen Signed-off-by: Chia-Yu Chang Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250916082434.100722-9-chia-yu.chang@nokia-bell-labs.com Signed-off-by: Paolo Abeni --- include/linux/tcp.h | 4 +++- include/net/tcp_ecn.h | 51 +++++++++++++++++++++++++++++++++++++++++++++--- include/uapi/linux/tcp.h | 2 ++ 3 files changed, 53 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index f637b659b35a..3ca5ed02de6d 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -173,6 +173,7 @@ struct tcp_request_sock { u8 syn_ect_snt: 2, syn_ect_rcv: 2, accecn_fail_mode:4; + u8 saw_accecn_opt :2; #ifdef CONFIG_TCP_AO u8 ao_keyid; u8 ao_rcv_next; @@ -407,7 +408,8 @@ struct tcp_sock { syn_fastopen_child:1; /* created TFO passive child socket */ u8 keepalive_probes; /* num of allowed keep alive probes */ - u8 accecn_fail_mode:4; /* AccECN failure handling */ + u8 accecn_fail_mode:4, /* AccECN failure handling */ + saw_accecn_opt:2; /* An AccECN option was seen */ u32 tcp_tx_delay; /* delay (in usec) added to TX packets */ /* RTT measurement */ diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h index 133fb6b79500..f13e5cd2b1ac 100644 --- a/include/net/tcp_ecn.h +++ b/include/net/tcp_ecn.h @@ -91,6 +91,11 @@ static inline void tcp_accecn_fail_mode_set(struct tcp_sock *tp, u8 mode) tp->accecn_fail_mode |= mode; } +#define TCP_ACCECN_OPT_NOT_SEEN 0x0 +#define TCP_ACCECN_OPT_EMPTY_SEEN 0x1 +#define TCP_ACCECN_OPT_COUNTER_SEEN 0x2 +#define TCP_ACCECN_OPT_FAIL_SEEN 0x3 + static inline u8 tcp_accecn_ace(const struct tcphdr *th) { return (th->ae << 2) | (th->cwr << 1) | th->ece; @@ -146,6 +151,14 @@ static inline bool tcp_accecn_validate_syn_feedback(struct sock *sk, u8 ace, return true; } +static inline void tcp_accecn_saw_opt_fail_recv(struct tcp_sock *tp, + u8 saw_opt) +{ + tp->saw_accecn_opt = saw_opt; + if (tp->saw_accecn_opt == TCP_ACCECN_OPT_FAIL_SEEN) + tcp_accecn_fail_mode_set(tp, TCP_ACCECN_OPT_FAIL_RECV); +} + /* Validate the 3rd ACK based on the ACE field, see Table 4 of AccECN spec */ static inline void tcp_accecn_third_ack(struct sock *sk, const struct sk_buff *skb, u8 sent_ect) @@ -428,9 +441,35 @@ static inline void tcp_accecn_set_ace(struct tcp_sock *tp, struct sk_buff *skb, } } +static inline u8 tcp_accecn_option_init(const struct sk_buff *skb, + u8 opt_offset) +{ + u8 *ptr = skb_transport_header(skb) + opt_offset; + unsigned int optlen = ptr[1] - 2; + + if (WARN_ON_ONCE(ptr[0] != TCPOPT_ACCECN0 && ptr[0] != TCPOPT_ACCECN1)) + return TCP_ACCECN_OPT_FAIL_SEEN; + ptr += 2; + + /* Detect option zeroing: an AccECN connection "MAY check that the + * initial value of the EE0B field or the EE1B field is non-zero" + */ + if (optlen < TCPOLEN_ACCECN_PERFIELD) + return TCP_ACCECN_OPT_EMPTY_SEEN; + if (get_unaligned_be24(ptr) == 0) + return TCP_ACCECN_OPT_FAIL_SEEN; + if (optlen < TCPOLEN_ACCECN_PERFIELD * 3) + return TCP_ACCECN_OPT_COUNTER_SEEN; + ptr += TCPOLEN_ACCECN_PERFIELD * 2; + if (get_unaligned_be24(ptr) == 0) + return TCP_ACCECN_OPT_FAIL_SEEN; + + return TCP_ACCECN_OPT_COUNTER_SEEN; +} + /* See Table 2 of the AccECN draft */ -static inline void tcp_ecn_rcv_synack(struct sock *sk, const struct tcphdr *th, - u8 ip_dsfield) +static inline void tcp_ecn_rcv_synack(struct sock *sk, const struct sk_buff *skb, + const struct tcphdr *th, u8 ip_dsfield) { struct tcp_sock *tp = tcp_sk(sk); u8 ace = tcp_accecn_ace(th); @@ -469,7 +508,13 @@ static inline void tcp_ecn_rcv_synack(struct sock *sk, const struct tcphdr *th, default: tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN); tp->syn_ect_rcv = ip_dsfield & INET_ECN_MASK; - tp->accecn_opt_demand = 2; + if (tp->rx_opt.accecn && + tp->saw_accecn_opt < TCP_ACCECN_OPT_COUNTER_SEEN) { + u8 saw_opt = tcp_accecn_option_init(skb, tp->rx_opt.accecn); + + tcp_accecn_saw_opt_fail_recv(tp, saw_opt); + tp->accecn_opt_demand = 2; + } if (INET_ECN_is_ce(ip_dsfield) && tcp_accecn_validate_syn_feedback(sk, ace, tp->syn_ect_snt)) { diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 53e0e85b52be..dce3113787a7 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -323,6 +323,8 @@ struct tcp_info { __u32 tcpi_received_e1_bytes; __u32 tcpi_received_e0_bytes; __u32 tcpi_received_ce_bytes; + __u16 tcpi_accecn_fail_mode; + __u16 tcpi_accecn_opt_seen; }; /* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */ -- cgit v1.2.3 From fe2cddc648f0d7cdf7377e1cb5a8c7dc5547e290 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 16 Sep 2025 10:24:33 +0200 Subject: tcp: accecn: AccECN option ceb/cep and ACE field multi-wrap heuristics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The AccECN option ceb/cep heuristic algorithm is from AccECN spec Appendix A.2.2 to mitigate against false ACE field overflows. Armed with ceb delta from option, delivered bytes, and delivered packets it is possible to estimate how many times ACE field wrapped. This calculation is necessary only if more than one wrap is possible. Without SACK, delivered bytes and packets are not always trustworthy in which case TCP falls back to the simpler no-or-all wraps ceb algorithm. Signed-off-by: Ilpo Järvinen Signed-off-by: Chia-Yu Chang Acked-by: Paolo Abeni Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250916082434.100722-10-chia-yu.chang@nokia-bell-labs.com Signed-off-by: Paolo Abeni --- include/net/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 78dd7b8a4145..7c51a0a5ace8 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -256,6 +256,7 @@ static_assert((1 << ATO_BITS) > TCP_DELACK_MAX); #define TCP_ACCECN_MAXSIZE (TCPOLEN_ACCECN_BASE + \ TCPOLEN_ACCECN_PERFIELD * \ TCP_ACCECN_NUMFIELDS) +#define TCP_ACCECN_SAFETY_SHIFT 1 /* SAFETY_FACTOR in accecn draft */ /* Flags in tp->nonagle */ #define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */ -- cgit v1.2.3 From 3fbb2a6f3a70c27a6a2be80d131970608c0f84d0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 16 Sep 2025 16:09:42 +0000 Subject: ipv6: make ipv6_pinfo.saddr_cache a boolean ipv6_pinfo.saddr_cache is either NULL or &np->saddr. We do not need 8 bytes, a boolean is enough. Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250916160951.541279-2-edumazet@google.com Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- include/linux/ipv6.h | 4 ++-- include/net/ip6_route.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index f43314517396..55c4d1e4dd7d 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -216,10 +216,10 @@ struct inet6_cork { struct ipv6_pinfo { struct in6_addr saddr; struct in6_pktinfo sticky_pktinfo; - const struct in6_addr *daddr_cache; #ifdef CONFIG_IPV6_SUBTREES - const struct in6_addr *saddr_cache; + bool saddr_cache; #endif + const struct in6_addr *daddr_cache; __be32 flow_label; __u32 frag_size; diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 59f48ca3abdf..223c02d42688 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -230,7 +230,7 @@ static inline const struct rt6_info *skb_rt6_info(const struct sk_buff *skb) */ static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst, const struct in6_addr *daddr, - const struct in6_addr *saddr) + bool saddr_set) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -238,7 +238,7 @@ static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst, sk_setup_caps(sk, dst); np->daddr_cache = daddr; #ifdef CONFIG_IPV6_SUBTREES - np->saddr_cache = saddr; + np->saddr_cache = saddr_set; #endif } -- cgit v1.2.3 From 5489f333ef993bfceebce9ae98944f04eaafcc30 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 16 Sep 2025 16:09:43 +0000 Subject: ipv6: make ipv6_pinfo.daddr_cache a boolean ipv6_pinfo.daddr_cache is either NULL or &sk->sk_v6_daddr We do not need 8 bytes, a boolean is enough. Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250916160951.541279-3-edumazet@google.com Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- include/linux/ipv6.h | 2 +- include/net/ip6_route.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 55c4d1e4dd7d..8e6d9f8b3dc8 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -219,7 +219,7 @@ struct ipv6_pinfo { #ifdef CONFIG_IPV6_SUBTREES bool saddr_cache; #endif - const struct in6_addr *daddr_cache; + bool daddr_cache; __be32 flow_label; __u32 frag_size; diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 223c02d42688..7c5512baa4b2 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -229,14 +229,14 @@ static inline const struct rt6_info *skb_rt6_info(const struct sk_buff *skb) * Store a destination cache entry in a socket */ static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst, - const struct in6_addr *daddr, + bool daddr_set, bool saddr_set) { struct ipv6_pinfo *np = inet6_sk(sk); np->dst_cookie = rt6_get_cookie(dst_rt6_info(dst)); sk_setup_caps(sk, dst); - np->daddr_cache = daddr; + np->daddr_cache = daddr_set; #ifdef CONFIG_IPV6_SUBTREES np->saddr_cache = saddr_set; #endif -- cgit v1.2.3 From b76543b21fbcfbb96332fd80cc0d85bbcd72d8f0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 16 Sep 2025 16:09:45 +0000 Subject: ipv6: reorganise struct ipv6_pinfo Move fields used in tx fast path at the beginning of the structure, and seldom used ones at the end. Note that rxopt is also in the first cache line. Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250916160951.541279-5-edumazet@google.com Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- include/linux/ipv6.h | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 8e6d9f8b3dc8..43b7bb828738 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -214,18 +214,21 @@ struct inet6_cork { /* struct ipv6_pinfo - ipv6 private area */ struct ipv6_pinfo { + /* Used in tx path (inet6_csk_route_socket(), ip6_xmit()) */ struct in6_addr saddr; - struct in6_pktinfo sticky_pktinfo; + __be32 flow_label; + u32 dst_cookie; + struct ipv6_txoptions __rcu *opt; + s16 hop_limit; + u8 pmtudisc; + u8 tclass; #ifdef CONFIG_IPV6_SUBTREES bool saddr_cache; #endif bool daddr_cache; - __be32 flow_label; - __u32 frag_size; - - s16 hop_limit; u8 mcast_hops; + u32 frag_size; int ucast_oif; int mcast_oif; @@ -233,7 +236,7 @@ struct ipv6_pinfo { /* pktoption flags */ union { struct { - __u16 srcrt:1, + u16 srcrt:1, osrcrt:1, rxinfo:1, rxoinfo:1, @@ -250,29 +253,25 @@ struct ipv6_pinfo { recvfragsize:1; /* 1 bits hole */ } bits; - __u16 all; + u16 all; } rxopt; /* sockopt flags */ - __u8 srcprefs; /* 001: prefer temporary address + u8 srcprefs; /* 001: prefer temporary address * 010: prefer public address * 100: prefer care-of address */ - __u8 pmtudisc; - __u8 min_hopcount; - __u8 tclass; + u8 min_hopcount; __be32 rcv_flowinfo; + struct in6_pktinfo sticky_pktinfo; - __u32 dst_cookie; + struct sk_buff *pktoptions; + struct sk_buff *rxpmtu; + struct inet6_cork cork; struct ipv6_mc_socklist __rcu *ipv6_mc_list; struct ipv6_ac_socklist *ipv6_ac_list; struct ipv6_fl_socklist __rcu *ipv6_fl_list; - - struct ipv6_txoptions __rcu *opt; - struct sk_buff *pktoptions; - struct sk_buff *rxpmtu; - struct inet6_cork cork; }; /* We currently use available bits from inet_sk(sk)->inet_flags, -- cgit v1.2.3 From 4effb335b5dab08cb6e2c38d038910f8b527cfc9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 16 Sep 2025 16:09:48 +0000 Subject: net: group sk_backlog and sk_receive_queue UDP receivers suffer from sk_rmem_alloc updates, currently sharing a cache line with fields that need to be read-mostly (sock_read_rx group): 1) RFS enabled hosts read sk_napi_id from __udpv6_queue_rcv_skb(). 2) sk->sk_rcvbuf is read from __udp_enqueue_schedule_skb() /* --- cacheline 3 boundary (192 bytes) --- */ struct { atomic_t rmem_alloc; /* 0xc0 0x4 */ // Oops int len; /* 0xc4 0x4 */ struct sk_buff * head; /* 0xc8 0x8 */ struct sk_buff * tail; /* 0xd0 0x8 */ } sk_backlog; /* 0xc0 0x18 */ __u8 __cacheline_group_end__sock_write_rx[0]; /* 0xd8 0 */ __u8 __cacheline_group_begin__sock_read_rx[0]; /* 0xd8 0 */ struct dst_entry * sk_rx_dst; /* 0xd8 0x8 */ int sk_rx_dst_ifindex;/* 0xe0 0x4 */ u32 sk_rx_dst_cookie; /* 0xe4 0x4 */ unsigned int sk_ll_usec; /* 0xe8 0x4 */ unsigned int sk_napi_id; /* 0xec 0x4 */ u16 sk_busy_poll_budget;/* 0xf0 0x2 */ u8 sk_prefer_busy_poll;/* 0xf2 0x1 */ u8 sk_userlocks; /* 0xf3 0x1 */ int sk_rcvbuf; /* 0xf4 0x4 */ struct sk_filter * sk_filter; /* 0xf8 0x8 */ Move sk_error (which is less often dirtied) there. Alternative would be to cache align sock_read_rx but this has more implications/risks. Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250916160951.541279-8-edumazet@google.com Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- include/net/sock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 0fd465935334..867dc44140d4 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -394,7 +394,6 @@ struct sock { atomic_t sk_drops; __s32 sk_peek_off; - struct sk_buff_head sk_error_queue; struct sk_buff_head sk_receive_queue; /* * The backlog queue is special, it is always used with @@ -412,6 +411,7 @@ struct sock { } sk_backlog; #define sk_rmem_alloc sk_backlog.rmem_alloc + struct sk_buff_head sk_error_queue; __cacheline_group_end(sock_write_rx); __cacheline_group_begin(sock_read_rx); -- cgit v1.2.3 From 9db27c80622bd612549ea213390500f7377ee3e1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 16 Sep 2025 16:09:49 +0000 Subject: udp: add udp_drops_inc() helper Generic sk_drops_inc() reads sk->sk_drop_counters. We know the precise location for UDP sockets. Move sk_drop_counters out of sock_read_rxtx so that sock_write_rxtx starts at a cache line boundary. Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250916160951.541279-9-edumazet@google.com Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- include/net/sock.h | 2 +- include/net/udp.h | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 867dc44140d4..82bcdb7d7e67 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -451,7 +451,6 @@ struct sock { #ifdef CONFIG_XFRM struct xfrm_policy __rcu *sk_policy[2]; #endif - struct numa_drop_counters *sk_drop_counters; __cacheline_group_end(sock_read_rxtx); __cacheline_group_begin(sock_write_rxtx); @@ -568,6 +567,7 @@ struct sock { #ifdef CONFIG_BPF_SYSCALL struct bpf_local_storage __rcu *sk_bpf_storage; #endif + struct numa_drop_counters *sk_drop_counters; struct rcu_head sk_rcu; netns_tracker ns_tracker; struct xarray sk_user_frags; diff --git a/include/net/udp.h b/include/net/udp.h index 93b159f30e88..a08822e294b0 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -295,6 +295,11 @@ static inline void udp_lib_init_sock(struct sock *sk) set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags); } +static inline void udp_drops_inc(struct sock *sk) +{ + numa_drop_add(&udp_sk(sk)->drop_counters, 1); +} + /* hash routines shared between UDPv4/6 and UDP-Litev4/6 */ static inline int udp_lib_hash(struct sock *sk) { -- cgit v1.2.3 From 3cd04c8f4afed71a48edef0db5255afc249c2feb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 16 Sep 2025 16:09:50 +0000 Subject: udp: make busylock per socket While having all spinlocks packed into an array was a space saver, this also caused NUMA imbalance and hash collisions. UDPv6 socket size becomes 1600 after this patch. Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250916160951.541279-10-edumazet@google.com Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- include/linux/udp.h | 1 + include/net/udp.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/udp.h b/include/linux/udp.h index 6ed008ab1665..e554890c4415 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -109,6 +109,7 @@ struct udp_sock { */ struct hlist_node tunnel_list; struct numa_drop_counters drop_counters; + spinlock_t busylock ____cacheline_aligned_in_smp; }; #define udp_test_bit(nr, sk) \ diff --git a/include/net/udp.h b/include/net/udp.h index a08822e294b0..eecd64097f91 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -289,6 +289,7 @@ static inline void udp_lib_init_sock(struct sock *sk) struct udp_sock *up = udp_sk(sk); sk->sk_drop_counters = &up->drop_counters; + spin_lock_init(&up->busylock); skb_queue_head_init(&up->reader_queue); INIT_HLIST_NODE(&up->tunnel_list); up->forward_threshold = sk->sk_rcvbuf >> 2; -- cgit v1.2.3 From 6b88293aae7fb78872e5cc1ec36e2f750ae12e38 Mon Sep 17 00:00:00 2001 From: Markus Stockhausen Date: Wed, 10 Sep 2025 14:32:58 -0400 Subject: mtd: nand: move nand_check_erased_ecc_chunk() to nand/core The check function for bitflips in erased blocks will be needed by the Realtek ECC engine driver (which is currently under development). Right now it is located in raw/nand_base.c. While this is sufficient for the current usecases, there is no real dependency for an ECC engine on the raw nand library. Move the function over to a more generic place in core library. Suggested-by: Miquel Raynal Signed-off-by: Markus Stockhausen Signed-off-by: Miquel Raynal --- include/linux/mtd/nand.h | 5 +++++ include/linux/mtd/rawnand.h | 5 ----- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 07486168d104..09c8c93e4dba 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -1136,4 +1136,9 @@ static inline bool nanddev_bbt_is_initialized(struct nand_device *nand) int nanddev_mtd_erase(struct mtd_info *mtd, struct erase_info *einfo); int nanddev_mtd_max_bad_blocks(struct mtd_info *mtd, loff_t offs, size_t len); +int nand_check_erased_ecc_chunk(void *data, int datalen, + void *ecc, int ecclen, + void *extraoob, int extraooblen, + int threshold); + #endif /* __LINUX_MTD_NAND_H */ diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index e84522e31301..d30bdc3fcfd7 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1519,11 +1519,6 @@ int rawnand_sw_bch_correct(struct nand_chip *chip, unsigned char *buf, unsigned char *read_ecc, unsigned char *calc_ecc); void rawnand_sw_bch_cleanup(struct nand_chip *chip); -int nand_check_erased_ecc_chunk(void *data, int datalen, - void *ecc, int ecclen, - void *extraoob, int extraooblen, - int threshold); - int nand_ecc_choose_conf(struct nand_chip *chip, const struct nand_ecc_caps *caps, int oobavail); -- cgit v1.2.3 From 2ad49ae7330b8a456edf639c92241a343641a763 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Wed, 27 Aug 2025 10:25:36 -0500 Subject: RDMA/irdma: Introduce GEN3 vPort driver support In the IPU model, a function can host one or more logical network endpoints called vPorts. Each vPort may be associated with either a physical or an internal communication port, and can be RDMA capable. A vPort features a netdev and, if RDMA capable, must have an associated ib_dev. This change introduces a GEN3 auxiliary vPort driver responsible for registering a verbs device for every RDMA-capable vPort. Additionally, the UAPI is updated to prevent the binding of GEN3 devices to older user-space providers. Signed-off-by: Mustafa Ismail Signed-off-by: Tatyana Nikolova Link: https://patch.msgid.link/20250827152545.2056-8-tatyana.e.nikolova@intel.com Tested-by: Jacob Moroni Signed-off-by: Leon Romanovsky --- include/uapi/rdma/irdma-abi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/rdma/irdma-abi.h b/include/uapi/rdma/irdma-abi.h index bb18f15489e3..4e42054cca33 100644 --- a/include/uapi/rdma/irdma-abi.h +++ b/include/uapi/rdma/irdma-abi.h @@ -25,6 +25,7 @@ enum irdma_memreg_type { enum { IRDMA_ALLOC_UCTX_USE_RAW_ATTR = 1 << 0, IRDMA_ALLOC_UCTX_MIN_HW_WQ_SIZE = 1 << 1, + IRDMA_SUPPORT_WQE_FORMAT_V2 = 1 << 3, }; struct irdma_alloc_ucontext_req { -- cgit v1.2.3 From 563e1feb5f6ed579acb55850f1bbb831aecf645a Mon Sep 17 00:00:00 2001 From: Faisal Latif Date: Wed, 27 Aug 2025 10:25:41 -0500 Subject: RDMA/irdma: Add SRQ support Implement verb API and UAPI changes to support SRQ functionality in GEN3 devices. Signed-off-by: Faisal Latif Signed-off-by: Tatyana Nikolova Link: https://patch.msgid.link/20250827152545.2056-13-tatyana.e.nikolova@intel.com Tested-by: Jacob Moroni Signed-off-by: Leon Romanovsky --- include/uapi/rdma/irdma-abi.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/rdma/irdma-abi.h b/include/uapi/rdma/irdma-abi.h index 4e42054cca33..f7788d33376b 100644 --- a/include/uapi/rdma/irdma-abi.h +++ b/include/uapi/rdma/irdma-abi.h @@ -20,11 +20,13 @@ enum irdma_memreg_type { IRDMA_MEMREG_TYPE_MEM = 0, IRDMA_MEMREG_TYPE_QP = 1, IRDMA_MEMREG_TYPE_CQ = 2, + IRDMA_MEMREG_TYPE_SRQ = 3, }; enum { IRDMA_ALLOC_UCTX_USE_RAW_ATTR = 1 << 0, IRDMA_ALLOC_UCTX_MIN_HW_WQ_SIZE = 1 << 1, + IRDMA_ALLOC_UCTX_MAX_HW_SRQ_QUANTA = 1 << 2, IRDMA_SUPPORT_WQE_FORMAT_V2 = 1 << 3, }; @@ -55,7 +57,8 @@ struct irdma_alloc_ucontext_resp { __u8 rsvd2; __aligned_u64 comp_mask; __u16 min_hw_wq_size; - __u8 rsvd3[6]; + __u32 max_hw_srq_quanta; + __u8 rsvd3[2]; }; struct irdma_alloc_pd_resp { @@ -72,6 +75,16 @@ struct irdma_create_cq_req { __aligned_u64 user_shadow_area; }; +struct irdma_create_srq_req { + __aligned_u64 user_srq_buf; + __aligned_u64 user_shadow_area; +}; + +struct irdma_create_srq_resp { + __u32 srq_id; + __u32 srq_size; +}; + struct irdma_create_qp_req { __aligned_u64 user_wqe_bufs; __aligned_u64 user_compl_ctx; -- cgit v1.2.3 From 1b34cbbf4f011a121ef7b2d7d6e6920a036d5285 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 16 Sep 2025 17:20:59 +0800 Subject: crypto: af_alg - Disallow concurrent writes in af_alg_sendmsg Issuing two writes to the same af_alg socket is bogus as the data will be interleaved in an unpredictable fashion. Furthermore, concurrent writes may create inconsistencies in the internal socket state. Disallow this by adding a new ctx->write field that indiciates exclusive ownership for writing. Fixes: 8ff590903d5 ("crypto: algif_skcipher - User-space interface for skcipher operations") Reported-by: Muhammad Alifa Ramdhan Reported-by: Bing-Jhong Billy Jheng Signed-off-by: Herbert Xu --- include/crypto/if_alg.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h index f7b3b93f3a49..0c70f3a55575 100644 --- a/include/crypto/if_alg.h +++ b/include/crypto/if_alg.h @@ -135,6 +135,7 @@ struct af_alg_async_req { * SG? * @enc: Cryptographic operation to be performed when * recvmsg is invoked. + * @write: True if we are in the middle of a write. * @init: True if metadata has been sent. * @len: Length of memory allocated for this data structure. * @inflight: Non-zero when AIO requests are in flight. @@ -151,10 +152,11 @@ struct af_alg_ctx { size_t used; atomic_t rcvused; - bool more; - bool merge; - bool enc; - bool init; + u32 more:1, + merge:1, + enc:1, + write:1, + init:1; unsigned int len; -- cgit v1.2.3 From a3d076b0567e729d5f21a95525c4d096b1f59e79 Mon Sep 17 00:00:00 2001 From: Akiva Goldberger Date: Wed, 17 Sep 2025 16:27:58 +0300 Subject: net/mlx5: Add uar access and odp page fault counters Add bar_uar_access, odp_local_triggered_page_fault, and odp_remote_triggered_page_fault counters to the query_vnic_env command. Additionally, add corresponding capabilities bits to the HCA CAP. Signed-off-by: Akiva Goldberger Reviewed-by: Moshe Shemesh Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1758115678-643464-1-git-send-email-tariqt@nvidia.com Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 097b1b7ada63..0cf187e13def 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1958,7 +1958,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 log_max_rqt[0x5]; u8 reserved_at_390[0x3]; u8 log_max_rqt_size[0x5]; - u8 reserved_at_398[0x3]; + u8 reserved_at_398[0x1]; + u8 vnic_env_cnt_bar_uar_access[0x1]; + u8 vnic_env_cnt_odp_page_fault[0x1]; u8 log_max_tis_per_sq[0x5]; u8 ext_stride_num_range[0x1]; @@ -4019,7 +4021,13 @@ struct mlx5_ifc_vnic_diagnostic_statistics_bits { u8 handled_pkt_steering_fail[0x40]; - u8 reserved_at_360[0xc80]; + u8 bar_uar_access[0x20]; + + u8 odp_local_triggered_page_fault[0x20]; + + u8 odp_remote_triggered_page_fault[0x20]; + + u8 reserved_at_3c0[0xc20]; }; struct mlx5_ifc_traffic_counter_bits { -- cgit v1.2.3 From 00c94ca2b99e6610e483f92e531b319eeaed94aa Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 16 Sep 2025 17:09:29 -0700 Subject: psp: base PSP device support Add a netlink family for PSP and allow drivers to register support. The "PSP device" is its own object. This allows us to perform more flexible reference counting / lifetime control than if PSP information was part of net_device. In the future we should also be able to "delegate" PSP access to software devices, such as *vlan, veth or netkit more easily. Reviewed-by: Willem de Bruijn Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Zahka Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250917000954.859376-3-daniel.zahka@gmail.com Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 4 ++ include/net/psp.h | 12 ++++++ include/net/psp/functions.h | 14 +++++++ include/net/psp/types.h | 100 ++++++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/psp.h | 42 +++++++++++++++++++ 5 files changed, 172 insertions(+) create mode 100644 include/net/psp.h create mode 100644 include/net/psp/functions.h create mode 100644 include/net/psp/types.h create mode 100644 include/uapi/linux/psp.h (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f5a840c07cf1..1c54d44805fa 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1906,6 +1906,7 @@ enum netdev_reg_state { * device struct * @mpls_ptr: mpls_dev struct pointer * @mctp_ptr: MCTP specific data + * @psp_dev: PSP crypto device registered for this netdev * * @dev_addr: Hw address (before bcast, * because most packets are unicast) @@ -2310,6 +2311,9 @@ struct net_device { #if IS_ENABLED(CONFIG_MCTP) struct mctp_dev __rcu *mctp_ptr; #endif +#if IS_ENABLED(CONFIG_INET_PSP) + struct psp_dev __rcu *psp_dev; +#endif /* * Cache lines mostly used on receive path (including eth_type_trans()) diff --git a/include/net/psp.h b/include/net/psp.h new file mode 100644 index 000000000000..33bb4d1dc46e --- /dev/null +++ b/include/net/psp.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __NET_PSP_ALL_H +#define __NET_PSP_ALL_H + +#include +#include +#include + +/* Do not add any code here. Put it in the sub-headers instead. */ + +#endif /* __NET_PSP_ALL_H */ diff --git a/include/net/psp/functions.h b/include/net/psp/functions.h new file mode 100644 index 000000000000..074f9df9afc3 --- /dev/null +++ b/include/net/psp/functions.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __NET_PSP_HELPERS_H +#define __NET_PSP_HELPERS_H + +#include + +/* Driver-facing API */ +struct psp_dev * +psp_dev_create(struct net_device *netdev, struct psp_dev_ops *psd_ops, + struct psp_dev_caps *psd_caps, void *priv_ptr); +void psp_dev_unregister(struct psp_dev *psd); + +#endif /* __NET_PSP_HELPERS_H */ diff --git a/include/net/psp/types.h b/include/net/psp/types.h new file mode 100644 index 000000000000..d242b1ecee7d --- /dev/null +++ b/include/net/psp/types.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __NET_PSP_H +#define __NET_PSP_H + +#include +#include + +struct netlink_ext_ack; + +#define PSP_DEFAULT_UDP_PORT 1000 + +struct psphdr { + u8 nexthdr; + u8 hdrlen; + u8 crypt_offset; + u8 verfl; + __be32 spi; + __be64 iv; + __be64 vc[]; /* optional */ +}; + +#define PSP_SPI_KEY_ID GENMASK(30, 0) +#define PSP_SPI_KEY_PHASE BIT(31) + +#define PSPHDR_CRYPT_OFFSET GENMASK(5, 0) + +#define PSPHDR_VERFL_SAMPLE BIT(7) +#define PSPHDR_VERFL_DROP BIT(6) +#define PSPHDR_VERFL_VERSION GENMASK(5, 2) +#define PSPHDR_VERFL_VIRT BIT(1) +#define PSPHDR_VERFL_ONE BIT(0) + +#define PSP_HDRLEN_NOOPT ((sizeof(struct psphdr) - 8) / 8) + +/** + * struct psp_dev_config - PSP device configuration + * @versions: PSP versions enabled on the device + */ +struct psp_dev_config { + u32 versions; +}; + +/** + * struct psp_dev - PSP device struct + * @main_netdev: original netdevice of this PSP device + * @ops: driver callbacks + * @caps: device capabilities + * @drv_priv: driver priv pointer + * @lock: instance lock, protects all fields + * @refcnt: reference count for the instance + * @id: instance id + * @config: current device configuration + * + * @rcu: RCU head for freeing the structure + */ +struct psp_dev { + struct net_device *main_netdev; + + struct psp_dev_ops *ops; + struct psp_dev_caps *caps; + void *drv_priv; + + struct mutex lock; + refcount_t refcnt; + + u32 id; + + struct psp_dev_config config; + + struct rcu_head rcu; +}; + +/** + * struct psp_dev_caps - PSP device capabilities + */ +struct psp_dev_caps { + /** + * @versions: mask of supported PSP versions + * Set this field to 0 to indicate PSP is not supported at all. + */ + u32 versions; +}; + +#define PSP_MAX_KEY 32 + +/** + * struct psp_dev_ops - netdev driver facing PSP callbacks + */ +struct psp_dev_ops { + /** + * @set_config: set configuration of a PSP device + * Driver can inspect @psd->config for the previous configuration. + * Core will update @psd->config with @config on success. + */ + int (*set_config)(struct psp_dev *psd, struct psp_dev_config *conf, + struct netlink_ext_ack *extack); +}; + +#endif /* __NET_PSP_H */ diff --git a/include/uapi/linux/psp.h b/include/uapi/linux/psp.h new file mode 100644 index 000000000000..4a404f085190 --- /dev/null +++ b/include/uapi/linux/psp.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/psp.yaml */ +/* YNL-GEN uapi header */ + +#ifndef _UAPI_LINUX_PSP_H +#define _UAPI_LINUX_PSP_H + +#define PSP_FAMILY_NAME "psp" +#define PSP_FAMILY_VERSION 1 + +enum psp_version { + PSP_VERSION_HDR0_AES_GCM_128, + PSP_VERSION_HDR0_AES_GCM_256, + PSP_VERSION_HDR0_AES_GMAC_128, + PSP_VERSION_HDR0_AES_GMAC_256, +}; + +enum { + PSP_A_DEV_ID = 1, + PSP_A_DEV_IFINDEX, + PSP_A_DEV_PSP_VERSIONS_CAP, + PSP_A_DEV_PSP_VERSIONS_ENA, + + __PSP_A_DEV_MAX, + PSP_A_DEV_MAX = (__PSP_A_DEV_MAX - 1) +}; + +enum { + PSP_CMD_DEV_GET = 1, + PSP_CMD_DEV_ADD_NTF, + PSP_CMD_DEV_DEL_NTF, + PSP_CMD_DEV_SET, + PSP_CMD_DEV_CHANGE_NTF, + + __PSP_CMD_MAX, + PSP_CMD_MAX = (__PSP_CMD_MAX - 1) +}; + +#define PSP_MCGRP_MGMT "mgmt" + +#endif /* _UAPI_LINUX_PSP_H */ -- cgit v1.2.3 From ed8a507b748336902525aa79e3573552534e8b3e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 16 Sep 2025 17:09:30 -0700 Subject: net: modify core data structures for PSP datapath support Add pointers to psp data structures to core networking structs, and an SKB extension to carry the PSP information from the drivers to the socket layer. Reviewed-by: Willem de Bruijn Signed-off-by: Jakub Kicinski Co-developed-by: Daniel Zahka Signed-off-by: Daniel Zahka Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250917000954.859376-4-daniel.zahka@gmail.com Signed-off-by: Paolo Abeni --- include/linux/skbuff.h | 3 +++ include/net/inet_timewait_sock.h | 3 +++ include/net/psp/functions.h | 6 ++++++ include/net/psp/types.h | 7 +++++++ include/net/sock.h | 4 ++++ 5 files changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 62e7addccdf6..78ecfa7d00d0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4901,6 +4901,9 @@ enum skb_ext_id { #endif #if IS_ENABLED(CONFIG_MCTP_FLOWS) SKB_EXT_MCTP, +#endif +#if IS_ENABLED(CONFIG_INET_PSP) + SKB_EXT_PSP, #endif SKB_EXT_NUM, /* must be last */ }; diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 67a313575780..c1295246216c 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -81,6 +81,9 @@ struct inet_timewait_sock { struct timer_list tw_timer; struct inet_bind_bucket *tw_tb; struct inet_bind2_bucket *tw_tb2; +#if IS_ENABLED(CONFIG_INET_PSP) + struct psp_assoc __rcu *psp_assoc; +#endif }; #define tw_tclass tw_tos diff --git a/include/net/psp/functions.h b/include/net/psp/functions.h index 074f9df9afc3..d0043bd14299 100644 --- a/include/net/psp/functions.h +++ b/include/net/psp/functions.h @@ -5,10 +5,16 @@ #include +struct inet_timewait_sock; + /* Driver-facing API */ struct psp_dev * psp_dev_create(struct net_device *netdev, struct psp_dev_ops *psd_ops, struct psp_dev_caps *psd_caps, void *priv_ptr); void psp_dev_unregister(struct psp_dev *psd); +/* Kernel-facing API */ +static inline void psp_sk_assoc_free(struct sock *sk) { } +static inline void psp_twsk_assoc_free(struct inet_timewait_sock *tw) { } + #endif /* __NET_PSP_HELPERS_H */ diff --git a/include/net/psp/types.h b/include/net/psp/types.h index d242b1ecee7d..4922fc8d42fd 100644 --- a/include/net/psp/types.h +++ b/include/net/psp/types.h @@ -84,6 +84,13 @@ struct psp_dev_caps { #define PSP_MAX_KEY 32 +struct psp_skb_ext { + __be32 spi; + u16 dev_id; + u8 generation; + u8 version; +}; + /** * struct psp_dev_ops - netdev driver facing PSP callbacks */ diff --git a/include/net/sock.h b/include/net/sock.h index 0fd465935334..d1d3d36e39ae 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -249,6 +249,7 @@ struct sk_filter; * @sk_dst_cache: destination cache * @sk_dst_pending_confirm: need to confirm neighbour * @sk_policy: flow policy + * @psp_assoc: PSP association, if socket is PSP-secured * @sk_receive_queue: incoming packets * @sk_wmem_alloc: transmit queue bytes committed * @sk_tsq_flags: TCP Small Queues flags @@ -450,6 +451,9 @@ struct sock { #endif #ifdef CONFIG_XFRM struct xfrm_policy __rcu *sk_policy[2]; +#endif +#if IS_ENABLED(CONFIG_INET_PSP) + struct psp_assoc __rcu *psp_assoc; #endif struct numa_drop_counters *sk_drop_counters; __cacheline_group_end(sock_read_rxtx); -- cgit v1.2.3 From 659a2899a57da59f433182eba571881884d6323e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 16 Sep 2025 17:09:31 -0700 Subject: tcp: add datapath logic for PSP with inline key exchange Add validation points and state propagation to support PSP key exchange inline, on TCP connections. The expectation is that application will use some well established mechanism like TLS handshake to establish a secure channel over the connection and if both endpoints are PSP-capable - exchange and install PSP keys. Because the connection can existing in PSP-unsecured and PSP-secured state we need to make sure that there are no race conditions or retransmission leaks. On Tx - mark packets with the skb->decrypted bit when PSP key is at the enqueue time. Drivers should only encrypt packets with this bit set. This prevents retransmissions getting encrypted when original transmission was not. Similarly to TLS, we'll use sk->sk_validate_xmit_skb to make sure PSP skbs can't "escape" via a PSP-unaware device without being encrypted. On Rx - validation is done under socket lock. This moves the validation point later than xfrm, for example. Please see the documentation patch for more details on the flow of securing a connection, but for the purpose of this patch what's important is that we want to enforce the invariant that once connection is secured any skb in the receive queue has been encrypted with PSP. Add GRO and coalescing checks to prevent PSP authenticated data from being combined with cleartext data, or data with non-matching PSP state. On Rx, check skb's with psp_skb_coalesce_diff() at points before psp_sk_rx_policy_check(). After skb's are policy checked and on the socket receive queue, skb_cmp_decrypted() is sufficient for checking for coalescable PSP state. On Tx, tcp_write_collapse_fence() should be called when transitioning a socket into PSP Tx state to prevent data sent as cleartext from being coalesced with PSP encapsulated data. This change only adds the validation points, for ease of review. Subsequent change will add the ability to install keys, and flesh the enforcement logic out Reviewed-by: Willem de Bruijn Signed-off-by: Jakub Kicinski Co-developed-by: Daniel Zahka Signed-off-by: Daniel Zahka Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250917000954.859376-5-daniel.zahka@gmail.com Signed-off-by: Paolo Abeni --- include/net/dropreason-core.h | 6 ++++ include/net/psp/functions.h | 77 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) (limited to 'include') diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index d8ff24a33459..58d91ccc56e0 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -127,6 +127,8 @@ FN(CANXL_RX_INVALID_FRAME) \ FN(PFMEMALLOC) \ FN(DUALPI2_STEP_DROP) \ + FN(PSP_INPUT) \ + FN(PSP_OUTPUT) \ FNe(MAX) /** @@ -610,6 +612,10 @@ enum skb_drop_reason { * threshold of DualPI2 qdisc. */ SKB_DROP_REASON_DUALPI2_STEP_DROP, + /** @SKB_DROP_REASON_PSP_INPUT: PSP input checks failed */ + SKB_DROP_REASON_PSP_INPUT, + /** @SKB_DROP_REASON_PSP_OUTPUT: PSP output checks failed */ + SKB_DROP_REASON_PSP_OUTPUT, /** * @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which * shouldn't be used as a real 'reason' - only for tracing code gen diff --git a/include/net/psp/functions.h b/include/net/psp/functions.h index d0043bd14299..1ccc5fc238b8 100644 --- a/include/net/psp/functions.h +++ b/include/net/psp/functions.h @@ -3,6 +3,8 @@ #ifndef __NET_PSP_HELPERS_H #define __NET_PSP_HELPERS_H +#include +#include #include struct inet_timewait_sock; @@ -14,7 +16,82 @@ psp_dev_create(struct net_device *netdev, struct psp_dev_ops *psd_ops, void psp_dev_unregister(struct psp_dev *psd); /* Kernel-facing API */ +#if IS_ENABLED(CONFIG_INET_PSP) static inline void psp_sk_assoc_free(struct sock *sk) { } +static inline void +psp_twsk_init(struct inet_timewait_sock *tw, const struct sock *sk) { } static inline void psp_twsk_assoc_free(struct inet_timewait_sock *tw) { } +static inline void +psp_reply_set_decrypted(struct sk_buff *skb) { } + +static inline void +psp_enqueue_set_decrypted(struct sock *sk, struct sk_buff *skb) +{ +} + +static inline unsigned long +__psp_skb_coalesce_diff(const struct sk_buff *one, const struct sk_buff *two, + unsigned long diffs) +{ + return diffs; +} + +static inline enum skb_drop_reason +psp_sk_rx_policy_check(struct sock *sk, struct sk_buff *skb) +{ + return 0; +} + +static inline enum skb_drop_reason +psp_twsk_rx_policy_check(struct inet_timewait_sock *tw, struct sk_buff *skb) +{ + return 0; +} + +static inline struct psp_assoc *psp_skb_get_assoc_rcu(struct sk_buff *skb) +{ + return NULL; +} +#else +static inline void psp_sk_assoc_free(struct sock *sk) { } +static inline void +psp_twsk_init(struct inet_timewait_sock *tw, const struct sock *sk) { } +static inline void psp_twsk_assoc_free(struct inet_timewait_sock *tw) { } +static inline void +psp_reply_set_decrypted(struct sk_buff *skb) { } + +static inline void +psp_enqueue_set_decrypted(struct sock *sk, struct sk_buff *skb) { } + +static inline unsigned long +__psp_skb_coalesce_diff(const struct sk_buff *one, const struct sk_buff *two, + unsigned long diffs) +{ + return diffs; +} + +static inline enum skb_drop_reason +psp_sk_rx_policy_check(struct sock *sk, struct sk_buff *skb) +{ + return 0; +} + +static inline enum skb_drop_reason +psp_twsk_rx_policy_check(struct inet_timewait_sock *tw, struct sk_buff *skb) +{ + return 0; +} + +static inline struct psp_assoc *psp_skb_get_assoc_rcu(struct sk_buff *skb) +{ + return NULL; +} +#endif + +static inline unsigned long +psp_skb_coalesce_diff(const struct sk_buff *one, const struct sk_buff *two) +{ + return __psp_skb_coalesce_diff(one, two, 0); +} #endif /* __NET_PSP_HELPERS_H */ -- cgit v1.2.3 From 117f02a49b7719b210d154a0d0e728001bf4af06 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 16 Sep 2025 17:09:32 -0700 Subject: psp: add op for rotation of device key Rotating the device key is a key part of the PSP protocol design. Some external daemon needs to do it once a day, or so. Add a netlink op to perform this operation. Add a notification group for informing users that key has been rotated and they should rekey (next rotation will cut them off). Reviewed-by: Willem de Bruijn Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Zahka Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250917000954.859376-6-daniel.zahka@gmail.com Signed-off-by: Paolo Abeni --- include/net/psp/types.h | 5 +++++ include/uapi/linux/psp.h | 3 +++ 2 files changed, 8 insertions(+) (limited to 'include') diff --git a/include/net/psp/types.h b/include/net/psp/types.h index 4922fc8d42fd..66327fa80c92 100644 --- a/include/net/psp/types.h +++ b/include/net/psp/types.h @@ -102,6 +102,11 @@ struct psp_dev_ops { */ int (*set_config)(struct psp_dev *psd, struct psp_dev_config *conf, struct netlink_ext_ack *extack); + + /** + * @key_rotate: rotate the device key + */ + int (*key_rotate)(struct psp_dev *psd, struct netlink_ext_ack *extack); }; #endif /* __NET_PSP_H */ diff --git a/include/uapi/linux/psp.h b/include/uapi/linux/psp.h index 4a404f085190..cbfbf3f0f364 100644 --- a/include/uapi/linux/psp.h +++ b/include/uapi/linux/psp.h @@ -32,11 +32,14 @@ enum { PSP_CMD_DEV_DEL_NTF, PSP_CMD_DEV_SET, PSP_CMD_DEV_CHANGE_NTF, + PSP_CMD_KEY_ROTATE, + PSP_CMD_KEY_ROTATE_NTF, __PSP_CMD_MAX, PSP_CMD_MAX = (__PSP_CMD_MAX - 1) }; #define PSP_MCGRP_MGMT "mgmt" +#define PSP_MCGRP_USE "use" #endif /* _UAPI_LINUX_PSP_H */ -- cgit v1.2.3 From 8c511c1df380780b8a81050767dbfe7ca518d3a2 Mon Sep 17 00:00:00 2001 From: Daniel Zahka Date: Tue, 16 Sep 2025 17:09:33 -0700 Subject: net: move sk_validate_xmit_skb() to net/core/dev.c Move definition of sk_validate_xmit_skb() from net/core/sock.c to net/core/dev.c. This change is in preparation of the next patch, where sk_validate_xmit_skb() will need to cast sk to a tcp_timewait_sock *, and access member fields. Including linux/tcp.h from linux/sock.h creates a circular dependency, and dev.c is the only current call site of this function. Reviewed-by: Willem de Bruijn Signed-off-by: Daniel Zahka Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250917000954.859376-7-daniel.zahka@gmail.com Signed-off-by: Paolo Abeni --- include/net/sock.h | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index d1d3d36e39ae..bf92029a88d6 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2960,28 +2960,6 @@ sk_requests_wifi_status(struct sock *sk) return sk && sk_fullsock(sk) && sock_flag(sk, SOCK_WIFI_STATUS); } -/* Checks if this SKB belongs to an HW offloaded socket - * and whether any SW fallbacks are required based on dev. - * Check decrypted mark in case skb_orphan() cleared socket. - */ -static inline struct sk_buff *sk_validate_xmit_skb(struct sk_buff *skb, - struct net_device *dev) -{ -#ifdef CONFIG_SOCK_VALIDATE_XMIT - struct sock *sk = skb->sk; - - if (sk && sk_fullsock(sk) && sk->sk_validate_xmit_skb) { - skb = sk->sk_validate_xmit_skb(sk, dev, skb); - } else if (unlikely(skb_is_decrypted(skb))) { - pr_warn_ratelimited("unencrypted skb with no associated socket - dropping\n"); - kfree_skb(skb); - skb = NULL; - } -#endif - - return skb; -} - /* This helper checks if a socket is a LISTEN or NEW_SYN_RECV * SYNACK messages can be attached to either ones (depending on SYNCOOKIE) */ -- cgit v1.2.3 From 0917bb139eed467a6376db903ad7a67981ec1420 Mon Sep 17 00:00:00 2001 From: Daniel Zahka Date: Tue, 16 Sep 2025 17:09:34 -0700 Subject: net: tcp: allow tcp_timewait_sock to validate skbs before handing to device Provide a callback to validate skb's originating from tcp timewait socks before passing to the device layer. Full socks have a sk_validate_xmit_skb member for checking that a device is capable of performing offloads required for transmitting an skb. With psp, tcp timewait socks will inherit the crypto state from their corresponding full socks. Any ACKs or RSTs that originate from a tcp timewait sock carrying psp state should be psp encapsulated. Reviewed-by: Willem de Bruijn Signed-off-by: Daniel Zahka Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250917000954.859376-8-daniel.zahka@gmail.com Signed-off-by: Paolo Abeni --- include/net/inet_timewait_sock.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index c1295246216c..3a31c74c9e15 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -84,6 +84,11 @@ struct inet_timewait_sock { #if IS_ENABLED(CONFIG_INET_PSP) struct psp_assoc __rcu *psp_assoc; #endif +#ifdef CONFIG_SOCK_VALIDATE_XMIT + struct sk_buff* (*tw_validate_xmit_skb)(struct sock *sk, + struct net_device *dev, + struct sk_buff *skb); +#endif }; #define tw_tclass tw_tos -- cgit v1.2.3 From 6b46ca260e2290e3453d1355ab5b6d283d73d780 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 16 Sep 2025 17:09:35 -0700 Subject: net: psp: add socket security association code Add the ability to install PSP Rx and Tx crypto keys on TCP connections. Netlink ops are provided for both operations. Rx side combines allocating a new Rx key and installing it on the socket. Theoretically these are separate actions, but in practice they will always be used one after the other. We can add distinct "alloc" and "install" ops later. Reviewed-by: Willem de Bruijn Signed-off-by: Jakub Kicinski Co-developed-by: Daniel Zahka Signed-off-by: Daniel Zahka Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250917000954.859376-9-daniel.zahka@gmail.com Signed-off-by: Paolo Abeni --- include/net/psp/functions.h | 114 ++++++++++++++++++++++++++++++++++++++++---- include/net/psp/types.h | 57 ++++++++++++++++++++++ include/uapi/linux/psp.h | 21 ++++++++ 3 files changed, 183 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/psp/functions.h b/include/net/psp/functions.h index 1ccc5fc238b8..0d7141230f47 100644 --- a/include/net/psp/functions.h +++ b/include/net/psp/functions.h @@ -4,7 +4,9 @@ #define __NET_PSP_HELPERS_H #include +#include #include +#include #include struct inet_timewait_sock; @@ -16,41 +18,130 @@ psp_dev_create(struct net_device *netdev, struct psp_dev_ops *psd_ops, void psp_dev_unregister(struct psp_dev *psd); /* Kernel-facing API */ +void psp_assoc_put(struct psp_assoc *pas); + +static inline void *psp_assoc_drv_data(struct psp_assoc *pas) +{ + return pas->drv_data; +} + #if IS_ENABLED(CONFIG_INET_PSP) -static inline void psp_sk_assoc_free(struct sock *sk) { } -static inline void -psp_twsk_init(struct inet_timewait_sock *tw, const struct sock *sk) { } -static inline void psp_twsk_assoc_free(struct inet_timewait_sock *tw) { } -static inline void -psp_reply_set_decrypted(struct sk_buff *skb) { } +unsigned int psp_key_size(u32 version); +void psp_sk_assoc_free(struct sock *sk); +void psp_twsk_init(struct inet_timewait_sock *tw, const struct sock *sk); +void psp_twsk_assoc_free(struct inet_timewait_sock *tw); +void psp_reply_set_decrypted(struct sk_buff *skb); + +static inline struct psp_assoc *psp_sk_assoc(const struct sock *sk) +{ + return rcu_dereference_check(sk->psp_assoc, lockdep_sock_is_held(sk)); +} static inline void psp_enqueue_set_decrypted(struct sock *sk, struct sk_buff *skb) { + struct psp_assoc *pas; + + pas = psp_sk_assoc(sk); + if (pas && pas->tx.spi) + skb->decrypted = 1; } static inline unsigned long __psp_skb_coalesce_diff(const struct sk_buff *one, const struct sk_buff *two, unsigned long diffs) { + struct psp_skb_ext *a, *b; + + a = skb_ext_find(one, SKB_EXT_PSP); + b = skb_ext_find(two, SKB_EXT_PSP); + + diffs |= (!!a) ^ (!!b); + if (!diffs && unlikely(a)) + diffs |= memcmp(a, b, sizeof(*a)); return diffs; } +static inline bool +psp_is_allowed_nondata(struct sk_buff *skb, struct psp_assoc *pas) +{ + bool fin = !!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN); + u32 end_seq = TCP_SKB_CB(skb)->end_seq; + u32 seq = TCP_SKB_CB(skb)->seq; + bool pure_fin; + + pure_fin = fin && end_seq - seq == 1; + + return seq == end_seq || (pure_fin && seq == pas->upgrade_seq); +} + +static inline bool +psp_pse_matches_pas(struct psp_skb_ext *pse, struct psp_assoc *pas) +{ + return pse && pas->rx.spi == pse->spi && + pas->generation == pse->generation && + pas->version == pse->version && + pas->dev_id == pse->dev_id; +} + +static inline enum skb_drop_reason +__psp_sk_rx_policy_check(struct sk_buff *skb, struct psp_assoc *pas) +{ + struct psp_skb_ext *pse = skb_ext_find(skb, SKB_EXT_PSP); + + if (!pas) + return pse ? SKB_DROP_REASON_PSP_INPUT : 0; + + if (likely(psp_pse_matches_pas(pse, pas))) { + if (unlikely(!pas->peer_tx)) + pas->peer_tx = 1; + + return 0; + } + + if (!pse) { + if (!pas->tx.spi || + (!pas->peer_tx && psp_is_allowed_nondata(skb, pas))) + return 0; + } + + return SKB_DROP_REASON_PSP_INPUT; +} + static inline enum skb_drop_reason psp_sk_rx_policy_check(struct sock *sk, struct sk_buff *skb) { - return 0; + return __psp_sk_rx_policy_check(skb, psp_sk_assoc(sk)); } static inline enum skb_drop_reason psp_twsk_rx_policy_check(struct inet_timewait_sock *tw, struct sk_buff *skb) { - return 0; + return __psp_sk_rx_policy_check(skb, rcu_dereference(tw->psp_assoc)); +} + +static inline struct psp_assoc *psp_sk_get_assoc_rcu(struct sock *sk) +{ + struct inet_timewait_sock *tw; + struct psp_assoc *pas; + int state; + + state = 1 << READ_ONCE(sk->sk_state); + if (!sk_is_inet(sk) || state & TCPF_NEW_SYN_RECV) + return NULL; + + tw = inet_twsk(sk); + pas = state & TCPF_TIME_WAIT ? rcu_dereference(tw->psp_assoc) : + rcu_dereference(sk->psp_assoc); + return pas; } static inline struct psp_assoc *psp_skb_get_assoc_rcu(struct sk_buff *skb) { - return NULL; + if (!skb->decrypted || !skb->sk) + return NULL; + + return psp_sk_get_assoc_rcu(skb->sk); } #else static inline void psp_sk_assoc_free(struct sock *sk) { } @@ -60,6 +151,11 @@ static inline void psp_twsk_assoc_free(struct inet_timewait_sock *tw) { } static inline void psp_reply_set_decrypted(struct sk_buff *skb) { } +static inline struct psp_assoc *psp_sk_assoc(const struct sock *sk) +{ + return NULL; +} + static inline void psp_enqueue_set_decrypted(struct sock *sk, struct sk_buff *skb) { } diff --git a/include/net/psp/types.h b/include/net/psp/types.h index 66327fa80c92..b0e32e7165a3 100644 --- a/include/net/psp/types.h +++ b/include/net/psp/types.h @@ -51,6 +51,7 @@ struct psp_dev_config { * @refcnt: reference count for the instance * @id: instance id * @config: current device configuration + * @active_assocs: list of registered associations * * @rcu: RCU head for freeing the structure */ @@ -68,6 +69,8 @@ struct psp_dev { struct psp_dev_config config; + struct list_head active_assocs; + struct rcu_head rcu; }; @@ -80,6 +83,12 @@ struct psp_dev_caps { * Set this field to 0 to indicate PSP is not supported at all. */ u32 versions; + + /** + * @assoc_drv_spc: size of driver-specific state in Tx assoc + * Determines the size of struct psp_assoc::drv_spc + */ + u32 assoc_drv_spc; }; #define PSP_MAX_KEY 32 @@ -91,6 +100,32 @@ struct psp_skb_ext { u8 version; }; +struct psp_key_parsed { + __be32 spi; + u8 key[PSP_MAX_KEY]; +}; + +struct psp_assoc { + struct psp_dev *psd; + + u16 dev_id; + u8 generation; + u8 version; + u8 peer_tx; + + u32 upgrade_seq; + + struct psp_key_parsed tx; + struct psp_key_parsed rx; + + refcount_t refcnt; + struct rcu_head rcu; + struct work_struct work; + struct list_head assocs_list; + + u8 drv_data[] __aligned(8); +}; + /** * struct psp_dev_ops - netdev driver facing PSP callbacks */ @@ -107,6 +142,28 @@ struct psp_dev_ops { * @key_rotate: rotate the device key */ int (*key_rotate)(struct psp_dev *psd, struct netlink_ext_ack *extack); + + /** + * @rx_spi_alloc: allocate an Rx SPI+key pair + * Allocate an Rx SPI and resulting derived key. + * This key should remain valid until key rotation. + */ + int (*rx_spi_alloc)(struct psp_dev *psd, u32 version, + struct psp_key_parsed *assoc, + struct netlink_ext_ack *extack); + + /** + * @tx_key_add: add a Tx key to the device + * Install an association in the device. Core will allocate space + * for the driver to use at drv_data. + */ + int (*tx_key_add)(struct psp_dev *psd, struct psp_assoc *pas, + struct netlink_ext_ack *extack); + /** + * @tx_key_del: remove a Tx key from the device + * Remove an association from the device. + */ + void (*tx_key_del)(struct psp_dev *psd, struct psp_assoc *pas); }; #endif /* __NET_PSP_H */ diff --git a/include/uapi/linux/psp.h b/include/uapi/linux/psp.h index cbfbf3f0f364..607c42c39ba5 100644 --- a/include/uapi/linux/psp.h +++ b/include/uapi/linux/psp.h @@ -26,6 +26,25 @@ enum { PSP_A_DEV_MAX = (__PSP_A_DEV_MAX - 1) }; +enum { + PSP_A_ASSOC_DEV_ID = 1, + PSP_A_ASSOC_VERSION, + PSP_A_ASSOC_RX_KEY, + PSP_A_ASSOC_TX_KEY, + PSP_A_ASSOC_SOCK_FD, + + __PSP_A_ASSOC_MAX, + PSP_A_ASSOC_MAX = (__PSP_A_ASSOC_MAX - 1) +}; + +enum { + PSP_A_KEYS_KEY = 1, + PSP_A_KEYS_SPI, + + __PSP_A_KEYS_MAX, + PSP_A_KEYS_MAX = (__PSP_A_KEYS_MAX - 1) +}; + enum { PSP_CMD_DEV_GET = 1, PSP_CMD_DEV_ADD_NTF, @@ -34,6 +53,8 @@ enum { PSP_CMD_DEV_CHANGE_NTF, PSP_CMD_KEY_ROTATE, PSP_CMD_KEY_ROTATE_NTF, + PSP_CMD_RX_ASSOC, + PSP_CMD_TX_ASSOC, __PSP_CMD_MAX, PSP_CMD_MAX = (__PSP_CMD_MAX - 1) -- cgit v1.2.3 From e97269257fe437910cddc7c642a636ca3cf9fb1d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 16 Sep 2025 17:09:36 -0700 Subject: net: psp: update the TCP MSS to reflect PSP packet overhead PSP eats 40B of header space. Adjust MSS appropriately. We can either modify tcp_mtu_to_mss() / tcp_mss_to_mtu() or reuse icsk_ext_hdr_len. The former option is more TCP specific and has runtime overhead. The latter is a bit of a hack as PSP is not an ext_hdr. If one squints hard enough, UDP encap is just a more practical version of IPv6 exthdr, so go with the latter. Happy to change. Reviewed-by: Willem de Bruijn Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Zahka Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250917000954.859376-10-daniel.zahka@gmail.com Signed-off-by: Paolo Abeni --- include/net/psp/functions.h | 14 ++++++++++++++ include/net/psp/types.h | 3 +++ 2 files changed, 17 insertions(+) (limited to 'include') diff --git a/include/net/psp/functions.h b/include/net/psp/functions.h index 0d7141230f47..183a3c9216b7 100644 --- a/include/net/psp/functions.h +++ b/include/net/psp/functions.h @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -143,6 +144,14 @@ static inline struct psp_assoc *psp_skb_get_assoc_rcu(struct sk_buff *skb) return psp_sk_get_assoc_rcu(skb->sk); } + +static inline unsigned int psp_sk_overhead(const struct sock *sk) +{ + int psp_encap = sizeof(struct udphdr) + PSP_HDR_SIZE + PSP_TRL_SIZE; + bool has_psp = rcu_access_pointer(sk->psp_assoc); + + return has_psp ? psp_encap : 0; +} #else static inline void psp_sk_assoc_free(struct sock *sk) { } static inline void @@ -182,6 +191,11 @@ static inline struct psp_assoc *psp_skb_get_assoc_rcu(struct sk_buff *skb) { return NULL; } + +static inline unsigned int psp_sk_overhead(const struct sock *sk) +{ + return 0; +} #endif static inline unsigned long diff --git a/include/net/psp/types.h b/include/net/psp/types.h index b0e32e7165a3..f93ad0e6c04f 100644 --- a/include/net/psp/types.h +++ b/include/net/psp/types.h @@ -93,6 +93,9 @@ struct psp_dev_caps { #define PSP_MAX_KEY 32 +#define PSP_HDR_SIZE 16 /* We don't support optional fields, yet */ +#define PSP_TRL_SIZE 16 /* AES-GCM/GMAC trailer size */ + struct psp_skb_ext { __be32 spi; u16 dev_id; -- cgit v1.2.3 From e78851058b35deb9f2d60ecf698fbf7ae7790d09 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 16 Sep 2025 17:09:37 -0700 Subject: psp: track generations of device key There is a (somewhat theoretical in absence of multi-host support) possibility that another entity will rotate the key and we won't know. This may lead to accepting packets with matching SPI but which used different crypto keys than we expected. The PSP Architecture specification mentions that an implementation should track device key generation when device keys are managed by the NIC. Some PSP implementations may opt to include this key generation state in decryption metadata each time a device key is used to decrypt a packet. If that is the case, that key generation counter can also be used when policy checking a decrypted skb against a psp_assoc. This is an optional feature that is not explicitly part of the PSP spec, but can provide additional security in the case where an attacker may have the ability to force key rotations faster than rekeying can occur. Since we're tracking "key generations" more explicitly now, maintain different lists for associations from different generations. This way we can catch stale associations (the user space should listen to rotation notifications and change the keys). Drivers can "opt out" of generation tracking by setting the generation value to 0. Reviewed-by: Willem de Bruijn Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Zahka Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250917000954.859376-11-daniel.zahka@gmail.com Signed-off-by: Paolo Abeni --- include/net/psp/types.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/net/psp/types.h b/include/net/psp/types.h index f93ad0e6c04f..ec218747ced0 100644 --- a/include/net/psp/types.h +++ b/include/net/psp/types.h @@ -50,8 +50,12 @@ struct psp_dev_config { * @lock: instance lock, protects all fields * @refcnt: reference count for the instance * @id: instance id + * @generation: current generation of the device key * @config: current device configuration * @active_assocs: list of registered associations + * @prev_assocs: associations which use old (but still usable) + * device key + * @stale_assocs: associations which use a rotated out key * * @rcu: RCU head for freeing the structure */ @@ -67,13 +71,19 @@ struct psp_dev { u32 id; + u8 generation; + struct psp_dev_config config; struct list_head active_assocs; + struct list_head prev_assocs; + struct list_head stale_assocs; struct rcu_head rcu; }; +#define PSP_GEN_VALID_MASK 0x7f + /** * struct psp_dev_caps - PSP device capabilities */ -- cgit v1.2.3 From fc724515741a1b86ca0457825fdb784ab038e92c Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Tue, 16 Sep 2025 17:09:40 -0700 Subject: psp: provide encapsulation helper for drivers Create a new function psp_encapsulate(), which takes a TCP packet and PSP encapsulates it according to the "Transport Mode Packet Format" section of the PSP Architecture Specification. psp_encapsulate() does not push a PSP trailer onto the skb. Both IPv6 and IPv4 are supported. Virtualization cookie is not included. Reviewed-by: Willem de Bruijn Signed-off-by: Raed Salem Signed-off-by: Rahul Rameshbabu Signed-off-by: Cosmin Ratiu Co-developed-by: Daniel Zahka Signed-off-by: Daniel Zahka Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250917000954.859376-14-daniel.zahka@gmail.com Signed-off-by: Paolo Abeni --- include/net/psp/functions.h | 2 ++ include/net/psp/types.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/psp/functions.h b/include/net/psp/functions.h index 183a3c9216b7..0a539e1b39f4 100644 --- a/include/net/psp/functions.h +++ b/include/net/psp/functions.h @@ -17,6 +17,8 @@ struct psp_dev * psp_dev_create(struct net_device *netdev, struct psp_dev_ops *psd_ops, struct psp_dev_caps *psd_caps, void *priv_ptr); void psp_dev_unregister(struct psp_dev *psd); +bool psp_dev_encapsulate(struct net *net, struct sk_buff *skb, __be32 spi, + u8 ver, __be16 sport); /* Kernel-facing API */ void psp_assoc_put(struct psp_assoc *pas); diff --git a/include/net/psp/types.h b/include/net/psp/types.h index ec218747ced0..d9688e66cf09 100644 --- a/include/net/psp/types.h +++ b/include/net/psp/types.h @@ -20,6 +20,8 @@ struct psphdr { __be64 vc[]; /* optional */ }; +#define PSP_ENCAP_HLEN (sizeof(struct udphdr) + sizeof(struct psphdr)) + #define PSP_SPI_KEY_ID GENMASK(30, 0) #define PSP_SPI_KEY_PHASE BIT(31) -- cgit v1.2.3 From 0eddb8023cee546eb05658ef3322234de8461f3b Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Tue, 16 Sep 2025 17:09:44 -0700 Subject: psp: provide decapsulation and receive helper for drivers Create psp_dev_rcv(), which drivers can call to psp decapsulate and attach a psp_skb_ext to an skb. psp_dev_rcv() only supports what the PSP architecture specification refers to as "transport mode" packets, where the L3 header is either IPv6 or IPv4. Reviewed-by: Willem de Bruijn Signed-off-by: Raed Salem Signed-off-by: Rahul Rameshbabu Signed-off-by: Cosmin Ratiu Co-developed-by: Daniel Zahka Signed-off-by: Daniel Zahka Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250917000954.859376-18-daniel.zahka@gmail.com Signed-off-by: Paolo Abeni --- include/net/psp/functions.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/psp/functions.h b/include/net/psp/functions.h index 0a539e1b39f4..91ba06733321 100644 --- a/include/net/psp/functions.h +++ b/include/net/psp/functions.h @@ -19,6 +19,7 @@ psp_dev_create(struct net_device *netdev, struct psp_dev_ops *psd_ops, void psp_dev_unregister(struct psp_dev *psd); bool psp_dev_encapsulate(struct net *net, struct sk_buff *skb, __be32 spi, u8 ver, __be16 sport); +int psp_dev_rcv(struct sk_buff *skb, u16 dev_id, u8 generation, bool strip_icv); /* Kernel-facing API */ void psp_assoc_put(struct psp_assoc *pas); -- cgit v1.2.3 From 6684b91d04b408843bd65e2120f6db2159e4f40b Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 16 Sep 2025 21:08:38 -0700 Subject: bnxt_en: Implement ethtool .get_tunable() for ETHTOOL_PFC_PREVENTION_TOUT Return the current PFC watchdog timeout value if it is supported. Reviewed-by: Andy Gospodarek Reviewed-by: Somnath Kotur Signed-off-by: Michael Chan Link: https://patch.msgid.link/20250917040839.1924698-10-michael.chan@broadcom.com Signed-off-by: Paolo Abeni --- include/linux/bnxt/hsi.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'include') diff --git a/include/linux/bnxt/hsi.h b/include/linux/bnxt/hsi.h index 8c5dac3b3ef3..23e7b1290a92 100644 --- a/include/linux/bnxt/hsi.h +++ b/include/linux/bnxt/hsi.h @@ -6751,6 +6751,46 @@ struct hwrm_queue_dscp2pri_cfg_output { u8 valid; }; +/* hwrm_queue_pfcwd_timeout_qcaps_input (size:128b/16B) */ +struct hwrm_queue_pfcwd_timeout_qcaps_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; +}; + +/* hwrm_queue_pfcwd_timeout_qcaps_output (size:128b/16B) */ +struct hwrm_queue_pfcwd_timeout_qcaps_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 max_pfcwd_timeout; + u8 unused_0[5]; + u8 valid; +}; + +/* hwrm_queue_pfcwd_timeout_qcfg_input (size:128b/16B) */ +struct hwrm_queue_pfcwd_timeout_qcfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; +}; + +/* hwrm_queue_pfcwd_timeout_qcfg_output (size:128b/16B) */ +struct hwrm_queue_pfcwd_timeout_qcfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 pfcwd_timeout_value; + u8 unused_0[5]; + u8 valid; +}; + /* hwrm_vnic_alloc_input (size:192b/24B) */ struct hwrm_vnic_alloc_input { __le16 req_type; -- cgit v1.2.3 From fa18932afb29b51530508f28adcc824a8c00b712 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 16 Sep 2025 21:08:39 -0700 Subject: bnxt_en: Implement ethtool .set_tunable() for ETHTOOL_PFC_PREVENTION_TOUT Support the setting of the tunable if it is supported by firmware. The supported range is 0 to the maximum msec value reported by firmware. PFC_STORM_PREVENTION_AUTO is also supported and 0 means it is disabled. Reviewed-by: Andy Gospodarek Signed-off-by: Michael Chan Link: https://patch.msgid.link/20250917040839.1924698-11-michael.chan@broadcom.com Signed-off-by: Paolo Abeni --- include/linux/bnxt/hsi.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include') diff --git a/include/linux/bnxt/hsi.h b/include/linux/bnxt/hsi.h index 23e7b1290a92..47c34990cf23 100644 --- a/include/linux/bnxt/hsi.h +++ b/include/linux/bnxt/hsi.h @@ -6771,6 +6771,27 @@ struct hwrm_queue_pfcwd_timeout_qcaps_output { u8 valid; }; +/* hwrm_queue_pfcwd_timeout_cfg_input (size:192b/24B) */ +struct hwrm_queue_pfcwd_timeout_cfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 pfcwd_timeout_value; + u8 unused_0[6]; +}; + +/* hwrm_queue_pfcwd_timeout_cfg_output (size:128b/16B) */ +struct hwrm_queue_pfcwd_timeout_cfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 unused_0[7]; + u8 valid; +}; + /* hwrm_queue_pfcwd_timeout_qcfg_input (size:128b/16B) */ struct hwrm_queue_pfcwd_timeout_qcfg_input { __le16 req_type; -- cgit v1.2.3 From f72e2cff13aefe305fc8fc6afe4f43626e4ad88c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 16 Sep 2025 15:48:01 +0200 Subject: compiler_types: Add __assume macro Make the statement attribute "assume" with a new __assume macro available. The assume attribute is used to indicate that a certain condition is assumed to be true. Compilers may or may not use this indication to generate optimized code. If this condition is violated at runtime, the behavior is undefined. Note that the clang documentation states that optimizers may react differently to this attribute, and this may even have a negative performance impact. Therefore this attribute should be used with care. Signed-off-by: Heiko Carstens Reviewed-by: Nathan Chancellor Signed-off-by: Alexander Gordeev --- include/linux/compiler_types.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 16755431fc11..2f3e80bf9f35 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -329,6 +329,29 @@ struct ftrace_likely_data { #define __no_sanitize_or_inline __always_inline #endif +/* + * The assume attribute is used to indicate that a certain condition is + * assumed to be true. If this condition is violated at runtime, the behavior + * is undefined. Compilers may or may not use this indication to generate + * optimized code. + * + * Note that the clang documentation states that optimizers may react + * differently to this attribute, and this may even have a negative + * performance impact. Therefore this attribute should be used with care. + * + * Optional: only supported since gcc >= 13 + * Optional: only supported since clang >= 19 + * + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Statement-Attributes.html#index-assume-statement-attribute + * clang: https://clang.llvm.org/docs/AttributeReference.html#id13 + * + */ +#ifdef CONFIG_CC_HAS_ASSUME +# define __assume(expr) __attribute__((__assume__(expr))) +#else +# define __assume(expr) +#endif + /* * Optional: only supported since gcc >= 15 * Optional: only supported since clang >= 18 -- cgit v1.2.3 From 84eaf4359c36b0ba888f571a964138d22ba5914f Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 17 Sep 2025 02:58:11 -0700 Subject: net: ethtool: add get_rx_ring_count callback to optimize RX ring queries Add a new optional get_rx_ring_count callback in ethtool_ops to allow drivers to provide the number of RX rings directly without going through the full get_rxnfc flow classification interface. Create ethtool_get_rx_ring_count() to use .get_rx_ring_count if available, falling back to get_rxnfc() otherwise. It needs to be non-static, given it will be called by other ethtool functions laters, as those calling get_rxfh(). Signed-off-by: Breno Leitao Link: https://patch.msgid.link/20250917-gxrings-v4-4-dae520e2e1cb@debian.org Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index d7d757e72554..c869b7f8bce8 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -968,6 +968,7 @@ struct kernel_ethtool_ts_info { * @reset: Reset (part of) the device, as specified by a bitmask of * flags from &enum ethtool_reset_flags. Returns a negative * error code or zero. + * @get_rx_ring_count: Return the number of RX rings * @get_rxfh_key_size: Get the size of the RX flow hash key. * Returns zero if not supported for this specific device. * @get_rxfh_indir_size: Get the size of the RX flow hash indirection table. @@ -1162,6 +1163,7 @@ struct ethtool_ops { int (*set_rxnfc)(struct net_device *, struct ethtool_rxnfc *); int (*flash_device)(struct net_device *, struct ethtool_flash *); int (*reset)(struct net_device *, u32 *); + u32 (*get_rx_ring_count)(struct net_device *dev); u32 (*get_rxfh_key_size)(struct net_device *); u32 (*get_rxfh_indir_size)(struct net_device *); int (*get_rxfh)(struct net_device *, struct ethtool_rxfh_param *); -- cgit v1.2.3 From 87ebb628a5acb892eba41ef1d8989beb8f036034 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 17 Sep 2025 13:53:37 +0000 Subject: net: clear sk->sk_ino in sk_set_socket(sk, NULL) Andrei Vagin reported that blamed commit broke CRIU. Indeed, while we want to keep sk_uid unchanged when a socket is cloned, we want to clear sk->sk_ino. Otherwise, sock_diag might report multiple sockets sharing the same inode number. Move the clearing part from sock_orphan() to sk_set_socket(sk, NULL), called both from sock_orphan() and sk_clone_lock(). Fixes: 5d6b58c932ec ("net: lockless sock_i_ino()") Closes: https://lore.kernel.org/netdev/aMhX-VnXkYDpKd9V@google.com/ Closes: https://github.com/checkpoint-restore/criu/issues/2744 Reported-by: Andrei Vagin Signed-off-by: Eric Dumazet Acked-by: Andrei Vagin Link: https://patch.msgid.link/20250917135337.1736101-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index fb13322a11fc..2e14283c5be1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2061,6 +2061,9 @@ static inline void sk_set_socket(struct sock *sk, struct socket *sock) if (sock) { WRITE_ONCE(sk->sk_uid, SOCK_INODE(sock)->i_uid); WRITE_ONCE(sk->sk_ino, SOCK_INODE(sock)->i_ino); + } else { + /* Note: sk_uid is unchanged. */ + WRITE_ONCE(sk->sk_ino, 0); } } @@ -2082,8 +2085,6 @@ static inline void sock_orphan(struct sock *sk) sock_set_flag(sk, SOCK_DEAD); sk_set_socket(sk, NULL); sk->sk_wq = NULL; - /* Note: sk_uid is unchanged. */ - WRITE_ONCE(sk->sk_ino, 0); write_unlock_bh(&sk->sk_callback_lock); } -- cgit v1.2.3 From 8d5b7009aabc27e626e4167fedf1e1c1c3d6b143 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Fri, 5 Sep 2025 21:19:45 +0530 Subject: mei: bus: add mei_cldev_mtu interface Add a new helper function that allows MEI client drivers to query the maximum transmission unit (MTU) for a connected MEI client. This is useful for clients that need to transmit large payloads, such as firmware blobs, allowing them to determine the maximum message size that can be safely sent before starting transmission and size of the buffer to allocate when receiving data. Reviewed-by: Mika Westerberg Signed-off-by: Alexander Usyskin Signed-off-by: Badal Nilawar Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Rodrigo Vivi Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20250905154953.3974335-2-badal.nilawar@intel.com Signed-off-by: Lucas De Marchi --- include/linux/mei_cl_bus.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h index 725fd7727422..a82755e1fc40 100644 --- a/include/linux/mei_cl_bus.h +++ b/include/linux/mei_cl_bus.h @@ -113,6 +113,7 @@ int mei_cldev_register_notif_cb(struct mei_cl_device *cldev, mei_cldev_cb_t notif_cb); u8 mei_cldev_ver(const struct mei_cl_device *cldev); +size_t mei_cldev_mtu(const struct mei_cl_device *cldev); void *mei_cldev_get_drvdata(const struct mei_cl_device *cldev); void mei_cldev_set_drvdata(struct mei_cl_device *cldev, void *data); -- cgit v1.2.3 From 741eeabb7c78c555c4c8e39df91b2b8e8d6f5ec6 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Fri, 5 Sep 2025 21:19:46 +0530 Subject: mei: late_bind: add late binding component driver Introduce a new MEI client driver to support Late Binding firmware upload/update for Intel discrete graphics platforms. Late Binding is a runtime firmware upload/update mechanism that allows payloads, such as fan control and voltage regulator, to be securely delivered and applied without requiring SPI flash updates or system reboots. This driver enables the Xe graphics driver and other user-space tools to push such firmware blobs to the authentication firmware via the MEI interface. The driver handles authentication, versioning, and communication with the authentication firmware, which in turn coordinates with the PUnit/PCODE to apply the payload. This is a foundational component for enabling dynamic, secure, and re-entrant configuration updates on platforms like Battlemage. Cc: Badal Nilawar Reviewed-by: Mika Westerberg Signed-off-by: Badal Nilawar Reviewed-by: Anshuman Gupta Signed-off-by: Rodrigo Vivi Signed-off-by: Alexander Usyskin Reviewed-by: Lucas De Marchi Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20250905154953.3974335-3-badal.nilawar@intel.com Signed-off-by: Lucas De Marchi --- include/drm/intel/i915_component.h | 1 + include/drm/intel/intel_lb_mei_interface.h | 70 ++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 include/drm/intel/intel_lb_mei_interface.h (limited to 'include') diff --git a/include/drm/intel/i915_component.h b/include/drm/intel/i915_component.h index 4ea3b17aa143..8082db222e00 100644 --- a/include/drm/intel/i915_component.h +++ b/include/drm/intel/i915_component.h @@ -31,6 +31,7 @@ enum i915_component_type { I915_COMPONENT_HDCP, I915_COMPONENT_PXP, I915_COMPONENT_GSC_PROXY, + INTEL_COMPONENT_LB, }; /* MAX_PORT is the number of port diff --git a/include/drm/intel/intel_lb_mei_interface.h b/include/drm/intel/intel_lb_mei_interface.h new file mode 100644 index 000000000000..d65be2cba2ab --- /dev/null +++ b/include/drm/intel/intel_lb_mei_interface.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright (c) 2025 Intel Corporation + */ + +#ifndef _INTEL_LB_MEI_INTERFACE_H_ +#define _INTEL_LB_MEI_INTERFACE_H_ + +#include + +struct device; + +/** + * define INTEL_LB_FLAG_IS_PERSISTENT - Mark the payload as persistent + * + * This flag indicates that the late binding payload should be stored + * persistently in flash across warm resets. + */ +#define INTEL_LB_FLAG_IS_PERSISTENT BIT(0) + +/** + * enum intel_lb_type - enum to determine late binding payload type + * @INTEL_LB_TYPE_FAN_CONTROL: Fan controller configuration + */ +enum intel_lb_type { + INTEL_LB_TYPE_FAN_CONTROL = 1, +}; + +/** + * enum intel_lb_status - Status codes returned on late binding transmissions + * @INTEL_LB_STATUS_SUCCESS: Operation completed successfully + * @INTEL_LB_STATUS_4ID_MISMATCH: Mismatch in the expected 4ID (firmware identity/token) + * @INTEL_LB_STATUS_ARB_FAILURE: Arbitration failure (e.g. conflicting access or state) + * @INTEL_LB_STATUS_GENERAL_ERROR: General firmware error not covered by other codes + * @INTEL_LB_STATUS_INVALID_PARAMS: One or more input parameters are invalid + * @INTEL_LB_STATUS_INVALID_SIGNATURE: Payload has an invalid or untrusted signature + * @INTEL_LB_STATUS_INVALID_PAYLOAD: Payload contents are not accepted by firmware + * @INTEL_LB_STATUS_TIMEOUT: Operation timed out before completion + */ +enum intel_lb_status { + INTEL_LB_STATUS_SUCCESS = 0, + INTEL_LB_STATUS_4ID_MISMATCH = 1, + INTEL_LB_STATUS_ARB_FAILURE = 2, + INTEL_LB_STATUS_GENERAL_ERROR = 3, + INTEL_LB_STATUS_INVALID_PARAMS = 4, + INTEL_LB_STATUS_INVALID_SIGNATURE = 5, + INTEL_LB_STATUS_INVALID_PAYLOAD = 6, + INTEL_LB_STATUS_TIMEOUT = 7, +}; + +/** + * struct intel_lb_component_ops - Ops for late binding services + */ +struct intel_lb_component_ops { + /** + * push_payload - Sends a payload to the authentication firmware + * @dev: Device struct corresponding to the mei device + * @type: Payload type (see &enum intel_lb_type) + * @flags: Payload flags bitmap (e.g. %INTEL_LB_FLAGS_IS_PERSISTENT) + * @payload: Pointer to payload buffer + * @payload_size: Payload buffer size in bytes + * + * Return: 0 success, negative errno value on transport failure, + * positive status returned by firmware + */ + int (*push_payload)(struct device *dev, u32 type, u32 flags, + const void *payload, size_t payload_size); +}; + +#endif /* _INTEL_LB_MEI_INTERFACE_H_ */ -- cgit v1.2.3 From df8922afc37aa2111ca79a216653a629146763ad Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 18 Sep 2025 13:59:15 -0600 Subject: io_uring/msg_ring: kill alloc_cache for io_kiocb allocations A recent commit: fc582cd26e88 ("io_uring/msg_ring: ensure io_kiocb freeing is deferred for RCU") fixed an issue with not deferring freeing of io_kiocb structs that msg_ring allocates to after the current RCU grace period. But this only covers requests that don't end up in the allocation cache. If a request goes into the alloc cache, it can get reused before it is sane to do so. A recent syzbot report would seem to indicate that there's something there, however it may very well just be because of the KASAN poisoning that the alloc_cache handles manually. Rather than attempt to make the alloc_cache sane for that use case, just drop the usage of the alloc_cache for msg_ring request payload data. Fixes: 50cf5f3842af ("io_uring/msg_ring: add an alloc cache for io_kiocb entries") Link: https://lore.kernel.org/io-uring/68cc2687.050a0220.139b6.0005.GAE@google.com/ Reported-by: syzbot+baa2e0f4e02df602583e@syzkaller.appspotmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 80a178f3d896..12f5ee43850e 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -420,9 +420,6 @@ struct io_ring_ctx { struct list_head defer_list; unsigned nr_drained; - struct io_alloc_cache msg_cache; - spinlock_t msg_lock; - #ifdef CONFIG_NET_RX_BUSY_POLL struct list_head napi_list; /* track busy poll napi_id */ spinlock_t napi_lock; /* napi_list lock */ -- cgit v1.2.3 From a660194dd101e937c319171ad99c3fbe466fd825 Mon Sep 17 00:00:00 2001 From: Dev Jain Date: Wed, 17 Sep 2025 12:02:07 -0700 Subject: arm64: Enable permission change on arm64 kernel block mappings This patch paves the path to enable huge mappings in vmalloc space and linear map space by default on arm64. For this we must ensure that we can handle any permission games on the kernel (init_mm) pagetable. Previously, __change_memory_common() used apply_to_page_range() which does not support changing permissions for block mappings. We move away from this by using the pagewalk API, similar to what riscv does right now. It is the responsibility of the caller to ensure that the range over which permissions are being changed falls on leaf mapping boundaries. For systems with BBML2, this will be handled in future patches by dyanmically splitting the mappings when required. Unlike apply_to_page_range(), the pagewalk API currently enforces the init_mm.mmap_lock to be held. To avoid the unnecessary bottleneck of the mmap_lock for our usecase, this patch extends this generic API to be used locklessly, so as to retain the existing behaviour for changing permissions. Apart from this reason, it is noted at [1] that KFENCE can manipulate kernel pgtable entries during softirqs. It does this by calling set_memory_valid() -> __change_memory_common(). This being a non-sleepable context, we cannot take the init_mm mmap lock. Add comments to highlight the conditions under which we can use the lockless variant - no underlying VMA, and the user having exclusive control over the range, thus guaranteeing no concurrent access. We require that the start and end of a given range do not partially overlap block mappings, or cont mappings. Return -EINVAL in case a partial block mapping is detected in any of the PGD/P4D/PUD/PMD levels; add a corresponding comment in update_range_prot() to warn that eliminating such a condition is the responsibility of the caller. Note that, the pte level callback may change permissions for a whole contpte block, and that will be done one pte at a time, as opposed to an atomic operation for the block mappings. This is fine as any access will decode either the old or the new permission until the TLBI. apply_to_page_range() currently performs all pte level callbacks while in lazy mmu mode. Since arm64 can optimize performance by batching barriers when modifying kernel pgtables in lazy mmu mode, we would like to continue to benefit from this optimisation. Unfortunately walk_kernel_page_table_range() does not use lazy mmu mode. However, since the pagewalk framework is not allocating any memory, we can safely bracket the whole operation inside lazy mmu mode ourselves. Therefore, wrap the call to walk_kernel_page_table_range() with the lazy MMU helpers. Link: https://lore.kernel.org/linux-arm-kernel/89d0ad18-4772-4d8f-ae8a-7c48d26a927e@arm.com/ [1] Signed-off-by: Dev Jain Signed-off-by: Yang Shi Reviewed-by: Ryan Roberts Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- include/linux/pagewalk.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h index 682472c15495..88e18615dd72 100644 --- a/include/linux/pagewalk.h +++ b/include/linux/pagewalk.h @@ -134,6 +134,9 @@ int walk_page_range(struct mm_struct *mm, unsigned long start, int walk_kernel_page_table_range(unsigned long start, unsigned long end, const struct mm_walk_ops *ops, pgd_t *pgd, void *private); +int walk_kernel_page_table_range_lockless(unsigned long start, + unsigned long end, const struct mm_walk_ops *ops, + pgd_t *pgd, void *private); int walk_page_range_vma(struct vm_area_struct *vma, unsigned long start, unsigned long end, const struct mm_walk_ops *ops, void *private); -- cgit v1.2.3 From 6b8ba0db92cd01450acaf375caf4c126aa913d72 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 4 Sep 2025 05:21:08 +0000 Subject: ASoC: soc-dapm: add snd_soc_dapm_to_dev() Because struct snd_soc_dapm_context is soc-dapm framework specific, user driver don't need to access its member directly, we would like to hide them. struct snd_soc_dapm_context will be removed from header in the future. Some drivers need to get dev from dapm (which will be removed). We need such function. Add it. Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/87cy86x06z.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index ed39458b94bf..ccd36a198a13 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -587,7 +587,7 @@ struct snd_soc_dapm_context { unsigned int idle_bias_off:1; /* Use BIAS_OFF instead of STANDBY */ unsigned int suspend_bias_off:1; /* Use BIAS_OFF in suspend if the DAPM is idle */ - struct device *dev; /* from parent - for debug */ + struct device *dev; /* from parent - for debug */ /* REMOVE ME */ struct snd_soc_component *component; /* parent component */ struct snd_soc_card *card; /* parent card */ @@ -660,6 +660,7 @@ void snd_soc_dapm_connect_dai_link_widgets(struct snd_soc_card *card); int snd_soc_dapm_update_dai(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params, struct snd_soc_dai *dai); int snd_soc_dapm_widget_name_cmp(struct snd_soc_dapm_widget *widget, const char *s); +struct device *snd_soc_dapm_to_dev(struct snd_soc_dapm_context *dapm); /* dapm path setup */ int snd_soc_dapm_new_widgets(struct snd_soc_card *card); -- cgit v1.2.3 From c8df096bca84c9eb04b656015c8430d0b87ebbcf Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 4 Sep 2025 05:21:13 +0000 Subject: ASoC: soc-dapm: add snd_soc_dapm_to_card() Because struct snd_soc_dapm_context is soc-dapm framework specific, user driver don't need to access its member directly, we would like to hide them. struct snd_soc_dapm_context will be removed from header in the future. Some drivers need to get card from dapm (which will be removed). We need such function. Add it. Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/87bjnqx06v.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index ccd36a198a13..dbb71e396feb 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -661,6 +661,7 @@ int snd_soc_dapm_update_dai(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params, struct snd_soc_dai *dai); int snd_soc_dapm_widget_name_cmp(struct snd_soc_dapm_widget *widget, const char *s); struct device *snd_soc_dapm_to_dev(struct snd_soc_dapm_context *dapm); +struct snd_soc_card *snd_soc_dapm_to_card(struct snd_soc_dapm_context *dapm); /* dapm path setup */ int snd_soc_dapm_new_widgets(struct snd_soc_card *card); -- cgit v1.2.3 From 96e311b561a2d393a786a2aeb50cd5e02d06afb3 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 4 Sep 2025 05:21:17 +0000 Subject: ASoC: soc-dapm: use dapm->component instead of container_of() Because struct snd_soc_dapm_context is soc-dapm framework specific, user driver don't need to access its member directly, we would like to hide them. struct snd_soc_dapm_context will be removed from header in the future. Now, snd_soc_dapm_to_component() (A) will convert dapm to component by container_of() (a). (A) static inline struct snd_soc_component *snd_soc_dapm_to_component( struct snd_soc_dapm_context *dapm) { (a) return container_of(dapm, struct snd_soc_component, dapm); } dapm of component works, but dapm of card will be "unknown" pointer (= not NULL), because (a) is using "container_of()". OTOH, ASoC will call snd_soc_dapm_init() (X) to initialize dapm, and it will be called from snd_soc_bind_card() (p) (for card) or soc_probe_component() (q) (for component) with component pointer. (p) static int snd_soc_bind_card(...) { ... (X) snd_soc_dapm_init(..., NULL); ... ^^^^ } (q) static int soc_probe_component(...) { ... (X) snd_soc_dapm_init(..., component); ... ^^^^^^^^^ } And snd_soc_dapm_init() (X) will fill dapm->component (x) (X) void snd_soc_dapm_init(..., component, ...) { ... (x) dapm->component = component; ... } We can simply use dapm->component in snd_soc_dapm_to_component() (A). In this case, dapm of card (p) will be just NULL. Use dapm->component instead of container_of(). The picky note can be removed by this patch. Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/87a53ax06q.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc-component.h | 15 --------------- include/sound/soc-dapm.h | 1 + 2 files changed, 1 insertion(+), 15 deletions(-) (limited to 'include') diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h index 48e45cbe82e5..7322d5d4c0bd 100644 --- a/include/sound/soc-component.h +++ b/include/sound/soc-component.h @@ -260,21 +260,6 @@ struct snd_soc_component { #define for_each_component_dais_safe(component, dai, _dai)\ list_for_each_entry_safe(dai, _dai, &(component)->dai_list, list) -/** - * snd_soc_dapm_to_component() - Casts a DAPM context to the component it is - * embedded in - * @dapm: The DAPM context to cast to the component - * - * This function must only be used on DAPM contexts that are known to be part of - * a component (e.g. in a component driver). Otherwise the behavior is - * undefined. - */ -static inline struct snd_soc_component *snd_soc_dapm_to_component( - struct snd_soc_dapm_context *dapm) -{ - return container_of(dapm, struct snd_soc_component, dapm); -} - /** * snd_soc_component_get_dapm() - Returns the DAPM context associated with a * component diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index dbb71e396feb..c6470d391eef 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -662,6 +662,7 @@ int snd_soc_dapm_update_dai(struct snd_pcm_substream *substream, int snd_soc_dapm_widget_name_cmp(struct snd_soc_dapm_widget *widget, const char *s); struct device *snd_soc_dapm_to_dev(struct snd_soc_dapm_context *dapm); struct snd_soc_card *snd_soc_dapm_to_card(struct snd_soc_dapm_context *dapm); +struct snd_soc_component *snd_soc_dapm_to_component(struct snd_soc_dapm_context *dapm); /* dapm path setup */ int snd_soc_dapm_new_widgets(struct snd_soc_card *card); -- cgit v1.2.3 From a1c99b6097afe64ed493c05b522ee4d6f9b0094d Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 4 Sep 2025 05:21:21 +0000 Subject: ASoC: soc-component: add snd_soc_component_to_dapm() Because struct snd_soc_dapm_context is soc-dapm framework specific, user driver don't need to access its member directly, we would like to hide them. struct snd_soc_dapm_context will be removed from header in the future. Current dapm of card/component are using "instance", but it will be "pointer" if snd_soc_dapm_context was removed from header. snd_soc_component_to_dapm() is needed to switch to the new style while maintaining compatibility Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/878qiux06m.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc-component.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h index 7322d5d4c0bd..b954f34d6025 100644 --- a/include/sound/soc-component.h +++ b/include/sound/soc-component.h @@ -261,16 +261,19 @@ struct snd_soc_component { list_for_each_entry_safe(dai, _dai, &(component)->dai_list, list) /** - * snd_soc_component_get_dapm() - Returns the DAPM context associated with a + * snd_soc_component_to_dapm() - Returns the DAPM context associated with a * component * @component: The component for which to get the DAPM context */ -static inline struct snd_soc_dapm_context *snd_soc_component_get_dapm( +static inline struct snd_soc_dapm_context *snd_soc_component_to_dapm( struct snd_soc_component *component) { return &component->dapm; } +// FIXME +#define snd_soc_component_get_dapm snd_soc_component_to_dapm + /** * snd_soc_component_cache_sync() - Sync the register cache with the hardware * @component: COMPONENT to sync -- cgit v1.2.3 From e38a80c5c24f3058bd5da6f2910e2b672493f4f2 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 4 Sep 2025 05:21:25 +0000 Subject: ASoC: soc-card: add snd_soc_card_to_dapm() Because struct snd_soc_dapm_context is soc-dapm framework specific, user driver don't need to access its member directly, we would like to hide them. struct snd_soc_dapm_context will be removed from header in the future. Current dapm of card/component are using "instance", but it will be "pointer" if snd_soc_dapm_context was removed from header. snd_soc_card_to_dapm() is needed to switch to the new style while maintaining compatibility Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/877byex06i.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 1fffef311c41..ddc508ff7b9b 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -1120,6 +1120,11 @@ static inline int snd_soc_card_is_instantiated(struct snd_soc_card *card) return card && card->instantiated; } +static inline struct snd_soc_dapm_context *snd_soc_card_to_dapm(struct snd_soc_card *card) +{ + return &card->dapm; +} + /* SoC machine DAI configuration, glues a codec and cpu DAI together */ struct snd_soc_pcm_runtime { struct device *dev; -- cgit v1.2.3 From 3bc0a92cb2062fce54ddd97ad68ad6fe358c3ff0 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 4 Sep 2025 05:21:29 +0000 Subject: ASoC: soc-dapm: remove suspend_bias_off from snd_soc_dapm_context We can directly use suspend_bias_off via snd_soc_component, no need to keep it on dapm. Remove it. Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/875xdyx06e.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index c6470d391eef..498f8af79cfa 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -585,7 +585,6 @@ struct snd_soc_dapm_context { /* bit field */ unsigned int idle_bias_off:1; /* Use BIAS_OFF instead of STANDBY */ - unsigned int suspend_bias_off:1; /* Use BIAS_OFF in suspend if the DAPM is idle */ struct device *dev; /* from parent - for debug */ /* REMOVE ME */ struct snd_soc_component *component; /* parent component */ -- cgit v1.2.3 From 889dd56f8c03586e5489050e7457a405fae6a420 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 4 Sep 2025 05:21:33 +0000 Subject: ASoC: soc-dapm: tidyup idle_bias handling - step1 Current soc-dapm is using "idle_bias_off", and its default settings came from snd_soc_component "idle_bias_on". It is complicated/confusable. Let's handling it as "idle_bias". Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/874itix06a.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index 498f8af79cfa..9618a54a5348 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -583,8 +583,7 @@ struct snd_soc_dapm_update { struct snd_soc_dapm_context { enum snd_soc_bias_level bias_level; - /* bit field */ - unsigned int idle_bias_off:1; /* Use BIAS_OFF instead of STANDBY */ + bool idle_bias; /* Use BIAS_OFF instead of STANDBY when false */ struct device *dev; /* from parent - for debug */ /* REMOVE ME */ struct snd_soc_component *component; /* parent component */ -- cgit v1.2.3 From 2e7f0a86123d54a94fa3d309efdfbac02f2999b8 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 4 Sep 2025 05:21:41 +0000 Subject: ASoC: soc-dapm: add snd_soc_dapm_get_bias_level() Because struct snd_soc_dapm_context is soc-dapm framework specific, user driver don't need to access its member directly, we would like to hide them. struct snd_soc_dapm_context will be removed from header in the future. Many drivers are directly using dapm->idle_bias, but it should get it via get_idle_bias() function. Makes it as global function. Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/871pomx062.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index 9618a54a5348..e978be4010b8 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -662,6 +662,8 @@ struct device *snd_soc_dapm_to_dev(struct snd_soc_dapm_context *dapm); struct snd_soc_card *snd_soc_dapm_to_card(struct snd_soc_dapm_context *dapm); struct snd_soc_component *snd_soc_dapm_to_component(struct snd_soc_dapm_context *dapm); +bool snd_soc_dapm_get_idle_bias(struct snd_soc_dapm_context *dapm); + /* dapm path setup */ int snd_soc_dapm_new_widgets(struct snd_soc_card *card); void snd_soc_dapm_free(struct snd_soc_dapm_context *dapm); -- cgit v1.2.3 From cb3c715d89607f8896c0f20fe528a08e7ebffea9 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 4 Sep 2025 05:21:45 +0000 Subject: ASoC: soc-dapm: add snd_soc_dapm_set_idle_bias() Because struct snd_soc_dapm_context is soc-dapm framework specific, user driver don't need to access its member directly, we would like to hide them. struct snd_soc_dapm_context will be removed from header in the future. Many drivers are directly setting dapm->idle_bias, but it will be impossible soon. adds snd_soc_dapm_set_idle_bias() for them. Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/87zfbavllj.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index e978be4010b8..75941324886b 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -663,6 +663,7 @@ struct snd_soc_card *snd_soc_dapm_to_card(struct snd_soc_dapm_context *dapm); struct snd_soc_component *snd_soc_dapm_to_component(struct snd_soc_dapm_context *dapm); bool snd_soc_dapm_get_idle_bias(struct snd_soc_dapm_context *dapm); +void snd_soc_dapm_set_idle_bias(struct snd_soc_dapm_context *dapm, bool on); /* dapm path setup */ int snd_soc_dapm_new_widgets(struct snd_soc_card *card); -- cgit v1.2.3 From 76cffc3eb1bdee0a7e8cca090adfd46a740f1cb0 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Tue, 9 Sep 2025 13:19:44 +0100 Subject: soundwire: bus: add of_sdw_find_device_by_node helper There has been more than 3 instances of this helper in multiple codec drivers, it does not make sense to keep duplicating this part of code. Lets add a helper of_sdw_find_device_by_node for codec drivers to use it. Signed-off-by: Srinivas Kandagatla Reviewed-by: Dmitry Baryshkov Acked-by: Vinod Koul Link: https://patch.msgid.link/20250909121954.225833-4-srinivas.kandagatla@oss.qualcomm.com Signed-off-by: Mark Brown --- include/linux/soundwire/sdw.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 0832776262ac..096213956d31 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -19,6 +19,7 @@ struct dentry; struct fwnode_handle; +struct device_node; struct sdw_bus; struct sdw_slave; @@ -1086,6 +1087,8 @@ int sdw_stream_add_slave(struct sdw_slave *slave, int sdw_stream_remove_slave(struct sdw_slave *slave, struct sdw_stream_runtime *stream); +struct device *of_sdw_find_device_by_node(struct device_node *np); + int sdw_slave_get_scale_index(struct sdw_slave *slave, u8 *base); /* messaging and data APIs */ @@ -1119,6 +1122,12 @@ static inline int sdw_stream_remove_slave(struct sdw_slave *slave, return -EINVAL; } +static inline struct device *of_sdw_find_device_by_node(struct device_node *np) +{ + WARN_ONCE(1, "SoundWire API is disabled"); + return NULL; +} + /* messaging and data APIs */ static inline int sdw_read(struct sdw_slave *slave, u32 addr) { -- cgit v1.2.3 From 2e07017b28e8bbace4a4973d11d0646575d36f94 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Tue, 9 Sep 2025 13:19:45 +0100 Subject: soundwire: bus: add sdw_slave_get_current_bank helper There has been 2 instances of this helper in codec drivers, it does not make sense to keep duplicating this part of code. Lets add a helper sdw_get_current_bank() for codec drivers to use it. Signed-off-by: Srinivas Kandagatla Acked-by: Vinod Koul Reviewed-by: Dmitry Baryshkov Link: https://patch.msgid.link/20250909121954.225833-5-srinivas.kandagatla@oss.qualcomm.com Signed-off-by: Mark Brown --- include/linux/soundwire/sdw.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 096213956d31..e6a3476bcef1 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -1089,6 +1089,8 @@ int sdw_stream_remove_slave(struct sdw_slave *slave, struct device *of_sdw_find_device_by_node(struct device_node *np); +int sdw_slave_get_current_bank(struct sdw_slave *sdev); + int sdw_slave_get_scale_index(struct sdw_slave *slave, u8 *base); /* messaging and data APIs */ @@ -1128,6 +1130,12 @@ static inline struct device *of_sdw_find_device_by_node(struct device_node *np) return NULL; } +static inline int sdw_slave_get_current_bank(struct sdw_slave *sdev) +{ + WARN_ONCE(1, "SoundWire API is disabled"); + return -EINVAL; +} + /* messaging and data APIs */ static inline int sdw_read(struct sdw_slave *slave, u32 addr) { -- cgit v1.2.3 From ef1e734dbe257ce8bc42383b9977b5558f061288 Mon Sep 17 00:00:00 2001 From: Dzmitry Sankouski Date: Thu, 18 Sep 2025 20:06:48 +0300 Subject: power: supply: max77705_charger: use regfields for config registers Using regfields allows to cleanup masks and register offset definition, allowing to access register info by it's functional name. Signed-off-by: Dzmitry Sankouski Signed-off-by: Sebastian Reichel --- include/linux/power/max77705_charger.h | 102 +++++++++++++++++---------------- 1 file changed, 54 insertions(+), 48 deletions(-) (limited to 'include') diff --git a/include/linux/power/max77705_charger.h b/include/linux/power/max77705_charger.h index fdec9af9c541..a612795577b6 100644 --- a/include/linux/power/max77705_charger.h +++ b/include/linux/power/max77705_charger.h @@ -9,6 +9,8 @@ #ifndef __MAX77705_CHARGER_H #define __MAX77705_CHARGER_H __FILE__ +#include + /* MAX77705_CHG_REG_CHG_INT */ #define MAX77705_BYP_I BIT(0) #define MAX77705_INP_LIMIT_I BIT(1) @@ -63,7 +65,6 @@ #define MAX77705_BUCK_SHIFT 2 #define MAX77705_BOOST_SHIFT 3 #define MAX77705_WDTEN_SHIFT 4 -#define MAX77705_MODE_MASK GENMASK(3, 0) #define MAX77705_CHG_MASK BIT(MAX77705_CHG_SHIFT) #define MAX77705_UNO_MASK BIT(MAX77705_UNO_SHIFT) #define MAX77705_OTG_MASK BIT(MAX77705_OTG_SHIFT) @@ -74,34 +75,19 @@ #define MAX77705_OTG_CTRL (MAX77705_OTG_MASK | MAX77705_BOOST_MASK) /* MAX77705_CHG_REG_CNFG_01 */ -#define MAX77705_FCHGTIME_SHIFT 0 -#define MAX77705_FCHGTIME_MASK GENMASK(2, 0) -#define MAX77705_CHG_RSTRT_SHIFT 4 -#define MAX77705_CHG_RSTRT_MASK GENMASK(5, 4) #define MAX77705_FCHGTIME_DISABLE 0 #define MAX77705_CHG_RSTRT_DISABLE 0x3 -#define MAX77705_PQEN_SHIFT 7 -#define MAX77705_PQEN_MASK BIT(7) #define MAX77705_CHG_PQEN_DISABLE 0 #define MAX77705_CHG_PQEN_ENABLE 1 /* MAX77705_CHG_REG_CNFG_02 */ -#define MAX77705_OTG_ILIM_SHIFT 6 -#define MAX77705_OTG_ILIM_MASK GENMASK(7, 6) #define MAX77705_OTG_ILIM_500 0 #define MAX77705_OTG_ILIM_900 1 #define MAX77705_OTG_ILIM_1200 2 #define MAX77705_OTG_ILIM_1500 3 -#define MAX77705_CHG_CC GENMASK(5, 0) /* MAX77705_CHG_REG_CNFG_03 */ -#define MAX77705_TO_ITH_SHIFT 0 -#define MAX77705_TO_ITH_MASK GENMASK(2, 0) -#define MAX77705_TO_TIME_SHIFT 3 -#define MAX77705_TO_TIME_MASK GENMASK(5, 3) -#define MAX77705_SYS_TRACK_DIS_SHIFT 7 -#define MAX77705_SYS_TRACK_DIS_MASK BIT(7) #define MAX77705_TO_ITH_150MA 0 #define MAX77705_TO_TIME_30M 3 #define MAX77705_SYS_TRACK_ENABLE 0 @@ -110,15 +96,8 @@ /* MAX77705_CHG_REG_CNFG_04 */ #define MAX77705_CHG_MINVSYS_SHIFT 6 #define MAX77705_CHG_MINVSYS_MASK GENMASK(7, 6) -#define MAX77705_CHG_PRM_SHIFT 0 -#define MAX77705_CHG_PRM_MASK GENMASK(5, 0) - -#define MAX77705_CHG_CV_PRM_SHIFT 0 -#define MAX77705_CHG_CV_PRM_MASK GENMASK(5, 0) /* MAX77705_CHG_REG_CNFG_05 */ -#define MAX77705_REG_B2SOVRC_SHIFT 0 -#define MAX77705_REG_B2SOVRC_MASK GENMASK(3, 0) #define MAX77705_B2SOVRC_DISABLE 0 #define MAX77705_B2SOVRC_4_5A 6 #define MAX77705_B2SOVRC_4_8A 8 @@ -128,9 +107,8 @@ #define MAX77705_WDTCLR_SHIFT 0 #define MAX77705_WDTCLR_MASK GENMASK(1, 0) #define MAX77705_WDTCLR 1 -#define MAX77705_CHGPROT_MASK GENMASK(3, 2) -#define MAX77705_CHGPROT_UNLOCKED GENMASK(3, 2) -#define MAX77705_SLOWEST_LX_SLOPE GENMASK(6, 5) +#define MAX77705_CHGPROT_UNLOCKED 3 +#define MAX77705_SLOWEST_LX_SLOPE 3 /* MAX77705_CHG_REG_CNFG_07 */ #define MAX77705_CHG_FMBST 4 @@ -140,36 +118,14 @@ #define MAX77705_REG_FGSRC_MASK BIT(MAX77705_REG_FGSRC_SHIFT) /* MAX77705_CHG_REG_CNFG_08 */ -#define MAX77705_REG_FSW_SHIFT 0 -#define MAX77705_REG_FSW_MASK GENMASK(1, 0) #define MAX77705_CHG_FSW_3MHz 0 #define MAX77705_CHG_FSW_2MHz 1 #define MAX77705_CHG_FSW_1_5MHz 2 /* MAX77705_CHG_REG_CNFG_09 */ -#define MAX77705_CHG_CHGIN_LIM_MASK GENMASK(6, 0) -#define MAX77705_CHG_EN_MASK BIT(7) #define MAX77705_CHG_DISABLE 0 -#define MAX77705_CHARGER_CHG_CHARGING(_reg) \ - (((_reg) & MAX77705_CHG_EN_MASK) > 1) - - -/* MAX77705_CHG_REG_CNFG_10 */ -#define MAX77705_CHG_WCIN_LIM GENMASK(5, 0) - -/* MAX77705_CHG_REG_CNFG_11 */ -#define MAX77705_VBYPSET_SHIFT 0 -#define MAX77705_VBYPSET_MASK GENMASK(6, 0) /* MAX77705_CHG_REG_CNFG_12 */ -#define MAX77705_CHGINSEL_SHIFT 5 -#define MAX77705_CHGINSEL_MASK BIT(MAX77705_CHGINSEL_SHIFT) -#define MAX77705_WCINSEL_SHIFT 6 -#define MAX77705_WCINSEL_MASK BIT(MAX77705_WCINSEL_SHIFT) -#define MAX77705_VCHGIN_REG_MASK GENMASK(4, 3) -#define MAX77705_WCIN_REG_MASK GENMASK(2, 1) -#define MAX77705_REG_DISKIP_SHIFT 0 -#define MAX77705_REG_DISKIP_MASK BIT(MAX77705_REG_DISKIP_SHIFT) /* REG=4.5V, UVLO=4.7V */ #define MAX77705_VCHGIN_4_5 0 /* REG=4.5V, UVLO=4.7V */ @@ -183,9 +139,59 @@ #define MAX77705_CURRENT_CHGIN_MIN 100000 #define MAX77705_CURRENT_CHGIN_MAX 3200000 +enum max77705_field_idx { + MAX77705_CHGPROT, + MAX77705_CHG_EN, + MAX77705_CHG_CC_LIM, + MAX77705_CHG_CHGIN_LIM, + MAX77705_CHG_CV_PRM, + MAX77705_CHG_PQEN, + MAX77705_CHG_RSTRT, + MAX77705_CHG_WCIN, + MAX77705_FCHGTIME, + MAX77705_LX_SLOPE, + MAX77705_MODE, + MAX77705_OTG_ILIM, + MAX77705_REG_B2SOVRC, + MAX77705_REG_DISKIP, + MAX77705_REG_FSW, + MAX77705_SYS_TRACK, + MAX77705_TO, + MAX77705_TO_TIME, + MAX77705_VBYPSET, + MAX77705_VCHGIN, + MAX77705_WCIN, + MAX77705_N_REGMAP_FIELDS, +}; + +static const struct reg_field max77705_reg_field[MAX77705_N_REGMAP_FIELDS] = { + [MAX77705_MODE] = REG_FIELD(MAX77705_CHG_REG_CNFG_00, 0, 3), + [MAX77705_FCHGTIME] = REG_FIELD(MAX77705_CHG_REG_CNFG_01, 0, 2), + [MAX77705_CHG_RSTRT] = REG_FIELD(MAX77705_CHG_REG_CNFG_01, 4, 5), + [MAX77705_CHG_PQEN] = REG_FIELD(MAX77705_CHG_REG_CNFG_01, 7, 7), + [MAX77705_CHG_CC_LIM] = REG_FIELD(MAX77705_CHG_REG_CNFG_02, 0, 5), + [MAX77705_OTG_ILIM] = REG_FIELD(MAX77705_CHG_REG_CNFG_02, 6, 7), + [MAX77705_TO] = REG_FIELD(MAX77705_CHG_REG_CNFG_03, 0, 2), + [MAX77705_TO_TIME] = REG_FIELD(MAX77705_CHG_REG_CNFG_03, 3, 5), + [MAX77705_SYS_TRACK] = REG_FIELD(MAX77705_CHG_REG_CNFG_03, 7, 7), + [MAX77705_CHG_CV_PRM] = REG_FIELD(MAX77705_CHG_REG_CNFG_04, 0, 5), + [MAX77705_REG_B2SOVRC] = REG_FIELD(MAX77705_CHG_REG_CNFG_05, 0, 3), + [MAX77705_CHGPROT] = REG_FIELD(MAX77705_CHG_REG_CNFG_06, 2, 3), + [MAX77705_LX_SLOPE] = REG_FIELD(MAX77705_CHG_REG_CNFG_06, 5, 6), + [MAX77705_REG_FSW] = REG_FIELD(MAX77705_CHG_REG_CNFG_08, 0, 1), + [MAX77705_CHG_CHGIN_LIM] = REG_FIELD(MAX77705_CHG_REG_CNFG_09, 0, 6), + [MAX77705_CHG_EN] = REG_FIELD(MAX77705_CHG_REG_CNFG_09, 7, 7), + [MAX77705_CHG_WCIN] = REG_FIELD(MAX77705_CHG_REG_CNFG_10, 0, 5), + [MAX77705_VBYPSET] = REG_FIELD(MAX77705_CHG_REG_CNFG_11, 0, 6), + [MAX77705_REG_DISKIP] = REG_FIELD(MAX77705_CHG_REG_CNFG_12, 0, 0), + [MAX77705_WCIN] = REG_FIELD(MAX77705_CHG_REG_CNFG_12, 1, 2), + [MAX77705_VCHGIN] = REG_FIELD(MAX77705_CHG_REG_CNFG_12, 3, 4), +}; + struct max77705_charger_data { struct device *dev; struct regmap *regmap; + struct regmap_field *rfield[MAX77705_N_REGMAP_FIELDS]; struct power_supply_battery_info *bat_info; struct workqueue_struct *wqueue; struct work_struct chgin_work; -- cgit v1.2.3 From bc7d3a0f92dad811110f5602f58fe756cefce2b8 Mon Sep 17 00:00:00 2001 From: Dzmitry Sankouski Date: Thu, 18 Sep 2025 20:06:52 +0300 Subject: power: supply: max77705_charger: use REGMAP_IRQ_REG_LINE macro Refactor regmap_irq declarations with REGMAP_IRQ_REG_LINE saves a few lines on definitions. Signed-off-by: Dzmitry Sankouski Signed-off-by: Sebastian Reichel --- include/linux/power/max77705_charger.h | 42 +++++++++++++--------------------- 1 file changed, 16 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/linux/power/max77705_charger.h b/include/linux/power/max77705_charger.h index a612795577b6..6653abfdf747 100644 --- a/include/linux/power/max77705_charger.h +++ b/include/linux/power/max77705_charger.h @@ -12,34 +12,24 @@ #include /* MAX77705_CHG_REG_CHG_INT */ -#define MAX77705_BYP_I BIT(0) -#define MAX77705_INP_LIMIT_I BIT(1) -#define MAX77705_BATP_I BIT(2) -#define MAX77705_BAT_I BIT(3) -#define MAX77705_CHG_I BIT(4) -#define MAX77705_WCIN_I BIT(5) -#define MAX77705_CHGIN_I BIT(6) -#define MAX77705_AICL_I BIT(7) - -/* MAX77705_CHG_REG_CHG_INT_MASK */ -#define MAX77705_BYP_IM BIT(0) -#define MAX77705_INP_LIMIT_IM BIT(1) -#define MAX77705_BATP_IM BIT(2) -#define MAX77705_BAT_IM BIT(3) -#define MAX77705_CHG_IM BIT(4) -#define MAX77705_WCIN_IM BIT(5) -#define MAX77705_CHGIN_IM BIT(6) -#define MAX77705_AICL_IM BIT(7) +#define MAX77705_BYP_I (0) +#define MAX77705_INP_LIMIT_I (1) +#define MAX77705_BATP_I (2) +#define MAX77705_BAT_I (3) +#define MAX77705_CHG_I (4) +#define MAX77705_WCIN_I (5) +#define MAX77705_CHGIN_I (6) +#define MAX77705_AICL_I (7) /* MAX77705_CHG_REG_CHG_INT_OK */ -#define MAX77705_BYP_OK BIT(0) -#define MAX77705_DISQBAT_OK BIT(1) -#define MAX77705_BATP_OK BIT(2) -#define MAX77705_BAT_OK BIT(3) -#define MAX77705_CHG_OK BIT(4) -#define MAX77705_WCIN_OK BIT(5) -#define MAX77705_CHGIN_OK BIT(6) -#define MAX77705_AICL_OK BIT(7) +#define MAX77705_BYP_OK BIT(MAX77705_BYP_I) +#define MAX77705_DISQBAT_OK BIT(MAX77705_INP_LIMIT_I) +#define MAX77705_BATP_OK BIT(MAX77705_BATP_I) +#define MAX77705_BAT_OK BIT(MAX77705_BAT_I) +#define MAX77705_CHG_OK BIT(MAX77705_CHG_I) +#define MAX77705_WCIN_OK BIT(MAX77705_WCIN_I) +#define MAX77705_CHGIN_OK BIT(MAX77705_CHGIN_I) +#define MAX77705_AICL_OK BIT(MAX77705_AICL_I) /* MAX77705_CHG_REG_DETAILS_00 */ #define MAX77705_BATP_DTLS BIT(0) -- cgit v1.2.3 From 603b4416232524dafde8e2cf859788dae786dea1 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Sun, 14 Sep 2025 23:51:30 +0200 Subject: bpf: Update the bpf_prog_calc_tag to use SHA256 Exclusive maps restrict map access to specific programs using a hash. The current hash used for this is SHA1, which is prone to collisions. This patch uses SHA256, which is more resilient against collisions. This new hash is stored in bpf_prog and used by the verifier to determine if a program can access a given exclusive map. The original 64-bit tags are kept, as they are used by users as a short, possibly colliding program identifier for non-security purposes. Signed-off-by: KP Singh Link: https://lore.kernel.org/r/20250914215141.15144-2-kpsingh@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 41f776071ff5..d75902074bd1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -31,6 +31,7 @@ #include #include #include +#include struct bpf_verifier_env; struct bpf_verifier_log; @@ -1717,7 +1718,10 @@ struct bpf_prog { enum bpf_attach_type expected_attach_type; /* For some prog types */ u32 len; /* Number of filter blocks */ u32 jited_len; /* Size of jited insns in bytes */ - u8 tag[BPF_TAG_SIZE]; + union { + u8 digest[SHA256_DIGEST_SIZE]; + u8 tag[BPF_TAG_SIZE]; + }; struct bpf_prog_stats __percpu *stats; int __percpu *active; unsigned int (*bpf_func)(const void *ctx, -- cgit v1.2.3 From baefdbdf6812e120c9fba9cfb101d3656f478026 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Sun, 14 Sep 2025 23:51:31 +0200 Subject: bpf: Implement exclusive map creation Exclusive maps allow maps to only be accessed by program with a program with a matching hash which is specified in the excl_prog_hash attr. For the signing use-case, this allows the trusted loader program to load the map and verify the integrity Signed-off-by: KP Singh Link: https://lore.kernel.org/r/20250914215141.15144-3-kpsingh@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 6 ++++++ 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d75902074bd1..c6a6ee1b2938 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -329,6 +329,7 @@ struct bpf_map { atomic64_t sleepable_refcnt; s64 __percpu *elem_count; u64 cookie; /* write-once */ + char *excl_prog_sha; }; static inline const char *btf_field_type_name(enum btf_field_type type) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 233de8677382..57687b2e1c47 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1522,6 +1522,12 @@ union bpf_attr { * If provided, map_flags should have BPF_F_TOKEN_FD flag set. */ __s32 map_token_fd; + + /* Hash of the program that has exclusive access to the map. + */ + __aligned_u64 excl_prog_hash; + /* Size of the passed excl_prog_hash. */ + __u32 excl_prog_hash_size; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM and BPF_MAP_FREEZE commands */ -- cgit v1.2.3 From ea2e6467ac36bf3d785defc89e58269b15d182f7 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Sun, 14 Sep 2025 23:51:35 +0200 Subject: bpf: Return hashes of maps in BPF_OBJ_GET_INFO_BY_FD Currently only array maps are supported, but the implementation can be extended for other maps and objects. The hash is memoized only for exclusive and frozen maps as their content is stable until the exclusive program modifies the map. This is required for BPF signing, enabling a trusted loader program to verify a map's integrity. The loader retrieves the map's runtime hash from the kernel and compares it against an expected hash computed at build time. Signed-off-by: KP Singh Link: https://lore.kernel.org/r/20250914215141.15144-7-kpsingh@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 3 +++ include/uapi/linux/bpf.h | 2 ++ 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c6a6ee1b2938..e0c2c78a5faa 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -110,6 +111,7 @@ struct bpf_map_ops { long (*map_pop_elem)(struct bpf_map *map, void *value); long (*map_peek_elem)(struct bpf_map *map, void *value); void *(*map_lookup_percpu_elem)(struct bpf_map *map, void *key, u32 cpu); + int (*map_get_hash)(struct bpf_map *map, u32 hash_buf_size, void *hash_buf); /* funcs called by prog_array and perf_event_array map */ void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, @@ -289,6 +291,7 @@ struct bpf_map_owner { }; struct bpf_map { + u8 sha[SHA256_DIGEST_SIZE]; const struct bpf_map_ops *ops; struct bpf_map *inner_map_meta; #ifdef CONFIG_SECURITY diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 57687b2e1c47..0987b52d5648 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6672,6 +6672,8 @@ struct bpf_map_info { __u32 btf_value_type_id; __u32 btf_vmlinux_id; __u64 map_extra; + __aligned_u64 hash; + __u32 hash_size; } __attribute__((aligned(8))); struct bpf_btf_info { -- cgit v1.2.3 From 8cd189e414bb705312fbfff7f7b5605f6de2459a Mon Sep 17 00:00:00 2001 From: KP Singh Date: Sun, 14 Sep 2025 23:51:36 +0200 Subject: bpf: Move the signature kfuncs to helpers.c No functional changes, except for the addition of the headers for the kfuncs so that they can be used for signature verification. Signed-off-by: KP Singh Link: https://lore.kernel.org/r/20250914215141.15144-8-kpsingh@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e0c2c78a5faa..dfc1a27b56d5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -3424,6 +3424,38 @@ static inline int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, #endif /* CONFIG_BPF_SYSCALL */ #endif /* defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) */ +#if defined(CONFIG_KEYS) && defined(CONFIG_BPF_SYSCALL) + +struct bpf_key *bpf_lookup_user_key(s32 serial, u64 flags); +struct bpf_key *bpf_lookup_system_key(u64 id); +void bpf_key_put(struct bpf_key *bkey); +int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p, + struct bpf_dynptr *sig_p, + struct bpf_key *trusted_keyring); + +#else +static inline struct bpf_key *bpf_lookup_user_key(u32 serial, u64 flags) +{ + return NULL; +} + +static inline struct bpf_key *bpf_lookup_system_key(u64 id) +{ + return NULL; +} + +static inline void bpf_key_put(struct bpf_key *bkey) +{ +} + +static inline int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p, + struct bpf_dynptr *sig_p, + struct bpf_key *trusted_keyring) +{ + return -EOPNOTSUPP; +} +#endif /* defined(CONFIG_KEYS) && defined(CONFIG_BPF_SYSCALL) */ + /* verifier prototypes for helper functions called from eBPF programs */ extern const struct bpf_func_proto bpf_map_lookup_elem_proto; extern const struct bpf_func_proto bpf_map_update_elem_proto; -- cgit v1.2.3 From eafedbc7c050c44744fbdf80bdf3315e860b7513 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Fri, 19 Sep 2025 06:42:07 +0000 Subject: rust_binder: add Rust Binder driver We're generally not proponents of rewrites (nasty uncomfortable things that make you late for dinner!). So why rewrite Binder? Binder has been evolving over the past 15+ years to meet the evolving needs of Android. Its responsibilities, expectations, and complexity have grown considerably during that time. While we expect Binder to continue to evolve along with Android, there are a number of factors that currently constrain our ability to develop/maintain it. Briefly those are: 1. Complexity: Binder is at the intersection of everything in Android and fulfills many responsibilities beyond IPC. It has become many things to many people, and due to its many features and their interactions with each other, its complexity is quite high. In just 6kLOC it must deliver transactions to the right threads. It must correctly parse and translate the contents of transactions, which can contain several objects of different types (e.g., pointers, fds) that can interact with each other. It controls the size of thread pools in userspace, and ensures that transactions are assigned to threads in ways that avoid deadlocks where the threadpool has run out of threads. It must track refcounts of objects that are shared by several processes by forwarding refcount changes between the processes correctly. It must handle numerous error scenarios and it combines/nests 13 different locks, 7 reference counters, and atomic variables. Finally, It must do all of this as fast and efficiently as possible. Minor performance regressions can cause a noticeably degraded user experience. 2. Things to improve: Thousand-line functions [1], error-prone error handling [2], and confusing structure can occur as a code base grows organically. After more than a decade of development, this codebase could use an overhaul. [1]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/android/binder.c?h=v6.5#n2896 [2]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/android/binder.c?h=v6.5#n3658 3. Security critical: Binder is a critical part of Android's sandboxing strategy. Even Android's most de-privileged sandboxes (e.g. the Chrome renderer, or SW Codec) have direct access to Binder. More than just about any other component, it's important that Binder provide robust security, and itself be robust against security vulnerabilities. It's #1 (high complexity) that has made continuing to evolve Binder and resolving #2 (tech debt) exceptionally difficult without causing #3 (security issues). For Binder to continue to meet Android's needs, we need better ways to manage (and reduce!) complexity without increasing the risk. The biggest change is obviously the choice of programming language. We decided to use Rust because it directly addresses a number of the challenges within Binder that we have faced during the last years. It prevents mistakes with ref counting, locking, bounds checking, and also does a lot to reduce the complexity of error handling. Additionally, we've been able to use the more expressive type system to encode the ownership semantics of the various structs and pointers, which takes the complexity of managing object lifetimes out of the hands of the programmer, reducing the risk of use-after-frees and similar problems. Rust has many different pointer types that it uses to encode ownership semantics into the type system, and this is probably one of the most important aspects of how it helps in Binder. The Binder driver has a lot of different objects that have complex ownership semantics; some pointers own a refcount, some pointers have exclusive ownership, and some pointers just reference the object and it is kept alive in some other manner. With Rust, we can use a different pointer type for each kind of pointer, which enables the compiler to enforce that the ownership semantics are implemented correctly. Another useful feature is Rust's error handling. Rust allows for more simplified error handling with features such as destructors, and you get compilation failures if errors are not properly handled. This means that even though Rust requires you to spend more lines of code than C on things such as writing down invariants that are left implicit in C, the Rust driver is still slightly smaller than C binder: Rust is 5.5kLOC and C is 5.8kLOC. (These numbers are excluding blank lines, comments, binderfs, and any debugging facilities in C that are not yet implemented in the Rust driver. The numbers include abstractions in rust/kernel/ that are unlikely to be used by other drivers than Binder.) Although this rewrite completely rethinks how the code is structured and how assumptions are enforced, we do not fundamentally change *how* the driver does the things it does. A lot of careful thought has gone into the existing design. The rewrite is aimed rather at improving code health, structure, readability, robustness, security, maintainability and extensibility. We also include more inline documentation, and improve how assumptions in the code are enforced. Furthermore, all unsafe code is annotated with a SAFETY comment that explains why it is correct. We have left the binderfs filesystem component in C. Rewriting it in Rust would be a large amount of work and requires a lot of bindings to the file system interfaces. Binderfs has not historically had the same challenges with security and complexity, so rewriting binderfs seems to have lower value than the rest of Binder. Correctness and feature parity ------------------------------ Rust binder passes all tests that validate the correctness of Binder in the Android Open Source Project. We can boot a device, and run a variety of apps and functionality without issues. We have performed this both on the Cuttlefish Android emulator device, and on a Pixel 6 Pro. As for feature parity, Rust binder currently implements all features that C binder supports, with the exception of some debugging facilities. The missing debugging facilities will be added before we submit the Rust implementation upstream. Tracepoints ----------- I did not include all of the tracepoints as I felt that the mechansim for making C access fields of Rust structs should be discussed on list separately. I also did not include the support for building Rust Binder as a module since that requires exporting a bunch of additional symbols on the C side. Original RFC Link with old benchmark numbers: https://lore.kernel.org/r/20231101-rust-binder-v1-0-08ba9197f637@google.com Co-developed-by: Wedson Almeida Filho Signed-off-by: Wedson Almeida Filho Co-developed-by: Matt Gilbride Signed-off-by: Matt Gilbride Acked-by: Carlos Llamas Acked-by: Paul Moore Signed-off-by: Alice Ryhl Link: https://lore.kernel.org/r/20250919-rust-binder-v2-1-a384b09f28dd@google.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/android/binder.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h index 1fd92021a573..03ee4c7010d7 100644 --- a/include/uapi/linux/android/binder.h +++ b/include/uapi/linux/android/binder.h @@ -38,7 +38,7 @@ enum { BINDER_TYPE_PTR = B_PACK_CHARS('p', 't', '*', B_TYPE_LARGE), }; -enum { +enum flat_binder_object_flags { FLAT_BINDER_FLAG_PRIORITY_MASK = 0xff, FLAT_BINDER_FLAG_ACCEPTS_FDS = 0x100, -- cgit v1.2.3 From 01b4a3061b1d4ded108e1a700b4414c00662954c Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Mon, 8 Sep 2025 14:12:55 +0300 Subject: wifi: nl80211: Add more configuration options for NAN commands Current NAN APIs have only basic configuration for master preference and operating bands. Add and parse additional parameters which provide more control over NAN synchronization. The newly added attributes allow to publish additional NAN attributes and vendor elements in NAN beacons, control scan and discovery beacons periodicity, enable/disable DW notifications etc. Signed-off-by: Andrei Otcheretianski tested: Miriam Rachel Korenblit Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250908140015.a4779492bf8e.I375feb919bd72358173766b9fe10010c40796b33@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 60 +++++++++++++++++++++++ include/uapi/linux/nl80211.h | 110 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 168 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 4072a67c9cc9..e2f4ca500ea3 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3912,6 +3912,38 @@ struct cfg80211_qos_map { struct cfg80211_dscp_range up[8]; }; +/** + * struct cfg80211_nan_band_config - NAN band specific configuration + * + * @chan: Pointer to the IEEE 802.11 channel structure. The channel to be used + * for NAN operations on this band. For 2.4 GHz band, this is always + * channel 6. For 5 GHz band, the channel is either 44 or 149, according + * to the regulatory constraints. If chan pointer is NULL the entire band + * configuration entry is considered invalid and should not be used. + * @rssi_close: RSSI close threshold used for NAN state transition algorithm + * as described in chapters 3.3.6 and 3.3.7 "NAN Device Role and State + * Transition" of Wi-Fi Aware Specification v4.0. If not + * specified (set to 0), default device value is used. The value should + * be greater than -60 dBm. + * @rssi_middle: RSSI middle threshold used for NAN state transition algorithm. + * as described in chapters 3.3.6 and 3.3.7 "NAN Device Role and State + * Transition" of Wi-Fi Aware Specification v4.0. If not + * specified (set to 0), default device value is used. The value should be + * greater than -75 dBm and less than rssi_close. + * @awake_dw_interval: Committed DW interval. Valid values range: 0-5. 0 + * indicates no wakeup for DW and can't be used on 2.4GHz band, otherwise + * 2^(n-1). + * @disable_scan: If true, the device will not scan this band for cluster + * merge. Disabling scan on 2.4 GHz band is not allowed. + */ +struct cfg80211_nan_band_config { + struct ieee80211_channel *chan; + s8 rssi_close; + s8 rssi_middle; + u8 awake_dw_interval; + bool disable_scan; +}; + /** * struct cfg80211_nan_conf - NAN configuration * @@ -3921,10 +3953,31 @@ struct cfg80211_qos_map { * @bands: operating bands, a bitmap of &enum nl80211_band values. * For instance, for NL80211_BAND_2GHZ, bit 0 would be set * (i.e. BIT(NL80211_BAND_2GHZ)). + * @cluster_id: cluster ID used for NAN synchronization. This is a MAC address + * that can take a value from 50-6F-9A-01-00-00 to 50-6F-9A-01-FF-FF. + * If NULL, the device will pick a random Cluster ID. + * @scan_period: period (in seconds) between NAN scans. + * @scan_dwell_time: dwell time (in milliseconds) for NAN scans. + * @discovery_beacon_interval: interval (in TUs) for discovery beacons. + * @band_cfgs: array of band specific configurations, indexed by + * &enum nl80211_band values. + * @extra_nan_attrs: pointer to additional NAN attributes. + * @extra_nan_attrs_len: length of the additional NAN attributes. + * @vendor_elems: pointer to vendor-specific elements. + * @vendor_elems_len: length of the vendor-specific elements. */ struct cfg80211_nan_conf { u8 master_pref; u8 bands; + const u8 *cluster_id; + u16 scan_period; + u16 scan_dwell_time; + u8 discovery_beacon_interval; + struct cfg80211_nan_band_config band_cfgs[NUM_NL80211_BANDS]; + const u8 *extra_nan_attrs; + u16 extra_nan_attrs_len; + const u8 *vendor_elems; + u16 vendor_elems_len; }; /** @@ -3933,10 +3986,17 @@ struct cfg80211_nan_conf { * * @CFG80211_NAN_CONF_CHANGED_PREF: master preference * @CFG80211_NAN_CONF_CHANGED_BANDS: operating bands + * @CFG80211_NAN_CONF_CHANGED_CONFIG: changed additional configuration. + * When this flag is set, it indicates that some additional attribute(s) + * (other then master_pref and bands) have been changed. In this case, + * all the unchanged attributes will be properly configured to their + * previous values. The driver doesn't need to store any + * previous configuration besides master_pref and bands. */ enum cfg80211_nan_conf_changes { CFG80211_NAN_CONF_CHANGED_PREF = BIT(0), CFG80211_NAN_CONF_CHANGED_BANDS = BIT(1), + CFG80211_NAN_CONF_CHANGED_CONFIG = BIT(2), }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index aed0b4c5d5e8..20b8202a3d58 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1085,8 +1085,9 @@ * %NL80211_ATTR_NAN_MASTER_PREF attribute and optional * %NL80211_ATTR_BANDS attributes. If %NL80211_ATTR_BANDS is * omitted or set to 0, it means don't-care and the device will - * decide what to use. After this command NAN functions can be - * added. + * decide what to use. Additional cluster configuration may be + * optionally provided with %NL80211_ATTR_NAN_CONFIG. + * After this command NAN functions can be added. * @NL80211_CMD_STOP_NAN: Stop the NAN operation, identified by * its %NL80211_ATTR_WDEV interface. * @NL80211_CMD_ADD_NAN_FUNCTION: Add a NAN function. The function is defined @@ -1115,6 +1116,10 @@ * current configuration is not changed. If it is present but * set to zero, the configuration is changed to don't-care * (i.e. the device can decide what to do). + * Additional parameters may be provided with + * %NL80211_ATTR_NAN_CONFIG. User space should provide all previously + * configured nested attributes under %NL80211_ATTR_NAN_CONFIG, even if + * only a subset was changed. * @NL80211_CMD_NAN_MATCH: Notification sent when a match is reported. * This will contain a %NL80211_ATTR_NAN_MATCH nested attribute and * %NL80211_ATTR_COOKIE. @@ -2936,6 +2941,12 @@ enum nl80211_commands { * indicate that it wants strict checking on the BSS parameters to be * modified. * + * @NL80211_ATTR_NAN_CONFIG: Nested attribute for + * extended NAN cluster configuration. This is used with + * %NL80211_CMD_START_NAN and %NL80211_CMD_CHANGE_NAN_CONFIG. + * See &enum nl80211_nan_conf_attributes for details. + * This attribute is optional. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3498,6 +3509,7 @@ enum nl80211_attrs { NL80211_ATTR_S1G_LONG_BEACON_PERIOD, NL80211_ATTR_S1G_SHORT_BEACON, NL80211_ATTR_BSS_PARAM, + NL80211_ATTR_NAN_CONFIG, /* add attributes here, update the policy in nl80211.c */ @@ -7323,6 +7335,100 @@ enum nl80211_nan_match_attributes { NL80211_NAN_MATCH_ATTR_MAX = NUM_NL80211_NAN_MATCH_ATTR - 1 }; +/** + * enum nl80211_nan_band_conf_attributes - NAN band configuration attributes + * @__NL80211_NAN_BAND_CONF_INVALID: Invalid. + * @NL80211_NAN_BAND_CONF_BAND: Band for which the configuration is + * being set. The value is according to &enum nl80211_band (u8). + * @NL80211_NAN_BAND_CONF_FREQ: Discovery frequency. This attribute shall not + * be present on 2.4 GHZ band. On 5 GHz band its presence is optional. + * The allowed values are 5220 (channel 44) or 5745 (channel 149). + * If not present, channel 149 is used if allowed, otherwise channel 44 + * will be selected. The value is in MHz (u16). + * @NL80211_NAN_BAND_CONF_RSSI_CLOSE: RSSI close threshold used for NAN state + * transition algorithm as described in chapters 3.3.6 and 3.3.7 "NAN + * Device Role and State Transition" of Wi-Fi Aware (TM) Specification + * v4.0. If not specified, default device value is used. The value should + * be greater than -60 dBm (s8). + * @NL80211_NAN_BAND_CONF_RSSI_MIDDLE: RSSI middle threshold used for NAN state + * transition algorithm as described in chapters 3.3.6 and 3.3.7 "NAN + * Device Role and State Transition" of Wi-Fi Aware (TM) Specification + * v4.0. If not present, default device value is used. The value should be + * greater than -75 dBm and less than %NL80211_NAN_BAND_CONF_RSSI_CLOSE + * (s8). + * @NL80211_NAN_BAND_CONF_WAKE_DW: Committed DW information (values 0-5). + * Value 0 means that the device will not wake up during the + * discovery window. Values 1-5 mean that the device will wake up + * during each 2^(n - 1) discovery window, where n is the value of + * this attribute. Setting this attribute to 0 is not allowed on + * 2.4 GHz band (u8). This is an optional parameter (default is 1). + * @NL80211_NAN_BAND_CONF_DISABLE_SCAN: Optional flag attribute to disable + * scanning (for cluster merge) on the band. If set, the device will not + * scan on this band anymore. Disabling scanning on 2.4 GHz band is not + * allowed. + * @NUM_NL80211_NAN_BAND_CONF_ATTR: Internal. + * @NL80211_NAN_BAND_CONF_ATTR_MAX: Highest NAN band configuration attribute. + * + * These attributes are used to configure NAN band-specific parameters. Note, + * that both RSSI attributes should be configured (or both left unset). + */ +enum nl80211_nan_band_conf_attributes { + __NL80211_NAN_BAND_CONF_INVALID, + NL80211_NAN_BAND_CONF_BAND, + NL80211_NAN_BAND_CONF_FREQ, + NL80211_NAN_BAND_CONF_RSSI_CLOSE, + NL80211_NAN_BAND_CONF_RSSI_MIDDLE, + NL80211_NAN_BAND_CONF_WAKE_DW, + NL80211_NAN_BAND_CONF_DISABLE_SCAN, + + /* keep last */ + NUM_NL80211_NAN_BAND_CONF_ATTR, + NL80211_NAN_BAND_CONF_ATTR_MAX = NUM_NL80211_NAN_BAND_CONF_ATTR - 1, +}; + +/** + * enum nl80211_nan_conf_attributes - NAN configuration attributes + * @__NL80211_NAN_CONF_INVALID: Invalid attribute, used for validation. + * @NL80211_NAN_CONF_CLUSTER_ID: ID for the NAN cluster. This is a MAC + * address that can take values from 50-6F-9A-01-00-00 to + * 50-6F-9A-01-FF-FF. This attribute is optional. If not present, + * a random Cluster ID will be chosen. + * @NL80211_NAN_CONF_EXTRA_ATTRS: Additional NAN attributes to be + * published in the beacons. This is an optional byte array. + * @NL80211_NAN_CONF_VENDOR_ELEMS: Vendor-specific elements that will + * be published in the beacons. This is an optional byte array. + * @NL80211_NAN_CONF_BAND_CONFIGS: This is a nested array attribute, + * containing multiple entries for each supported band. Each band + * configuration consists of &enum nl80211_nan_band_conf_attributes. + * @NL80211_NAN_CONF_SCAN_PERIOD: Scan period in seconds. If not configured, + * device default is used. Zero value will disable scanning. + * This is u16 (optional). + * @NL80211_NAN_CONF_SCAN_DWELL_TIME: Scan dwell time in TUs per channel. + * Only non-zero values are valid. If not configured the device default + * value is used. This is u16 (optional) + * @NL80211_NAN_CONF_DISCOVERY_BEACON_INTERVAL: Discovery beacon interval + * in TUs. Valid range is 50-200 TUs. If not configured the device default + * value is used. This is u8 (optional) + * @NUM_NL80211_NAN_CONF_ATTR: Internal. + * @NL80211_NAN_CONF_ATTR_MAX: Highest NAN configuration attribute. + * + * These attributes are used to configure NAN-specific parameters. + */ +enum nl80211_nan_conf_attributes { + __NL80211_NAN_CONF_INVALID, + NL80211_NAN_CONF_CLUSTER_ID, + NL80211_NAN_CONF_EXTRA_ATTRS, + NL80211_NAN_CONF_VENDOR_ELEMS, + NL80211_NAN_CONF_BAND_CONFIGS, + NL80211_NAN_CONF_SCAN_PERIOD, + NL80211_NAN_CONF_SCAN_DWELL_TIME, + NL80211_NAN_CONF_DISCOVERY_BEACON_INTERVAL, + + /* keep last */ + NUM_NL80211_NAN_CONF_ATTR, + NL80211_NAN_CONF_ATTR_MAX = NUM_NL80211_NAN_CONF_ATTR - 1, +}; + /** * enum nl80211_external_auth_action - Action to perform with external * authentication request. Used by NL80211_ATTR_EXTERNAL_AUTH_ACTION. -- cgit v1.2.3 From ba9b2ceaa2558a38a5da59fd654b641610a8568e Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Mon, 8 Sep 2025 14:12:56 +0300 Subject: wifi: nl80211: Add NAN Discovery Window (DW) notification This notification will be used by the device to inform user space about upcoming DW. When received, user space will be able to prepare multicast Service Discovery Frames (SDFs) to be transmitted during the next DW using %NL80211_CMD_FRAME command on the NAN management interface. The device/driver will take care to transmit the frames in the correct timing. This allows to implement a synchronized Discovery Engine (DE) in user space, if the device doesn't support DE offload. Note that this notification can be sent before the actual DW starts as long as the driver/device handles the actual timing of the SDF transmission. Signed-off-by: Andrei Otcheretianski Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250908140015.0e1d15031bab.I5b1721e61b63910452b3c5cdcdc1e94cb094d4c9@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 12 ++++++++++++ include/uapi/linux/nl80211.h | 16 ++++++++++++++++ 2 files changed, 28 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index e2f4ca500ea3..0c1311d254be 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3959,6 +3959,8 @@ struct cfg80211_nan_band_config { * @scan_period: period (in seconds) between NAN scans. * @scan_dwell_time: dwell time (in milliseconds) for NAN scans. * @discovery_beacon_interval: interval (in TUs) for discovery beacons. + * @enable_dw_notification: flag to enable/disable discovery window + * notifications. * @band_cfgs: array of band specific configurations, indexed by * &enum nl80211_band values. * @extra_nan_attrs: pointer to additional NAN attributes. @@ -3973,6 +3975,7 @@ struct cfg80211_nan_conf { u16 scan_period; u16 scan_dwell_time; u8 discovery_beacon_interval; + bool enable_dw_notification; struct cfg80211_nan_band_config band_cfgs[NUM_NL80211_BANDS]; const u8 *extra_nan_attrs; u16 extra_nan_attrs_len; @@ -10062,6 +10065,15 @@ void cfg80211_schedule_channels_check(struct wireless_dev *wdev); */ void cfg80211_epcs_changed(struct net_device *netdev, bool enabled); +/** + * cfg80211_next_nan_dw_notif - Notify about the next NAN Discovery Window (DW) + * @wdev: Pointer to the wireless device structure + * @chan: DW channel (6, 44 or 149) + * @gfp: Memory allocation flags + */ +void cfg80211_next_nan_dw_notif(struct wireless_dev *wdev, + struct ieee80211_channel *chan, gfp_t gfp); + #ifdef CONFIG_CFG80211_DEBUGFS /** * wiphy_locked_debugfs_read - do a locked read in debugfs diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 20b8202a3d58..d674608e2635 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1349,6 +1349,15 @@ * control EPCS configuration. Used to notify userland on the current state * of EPCS. * + * @NL80211_CMD_NAN_NEXT_DW_NOTIFICATION: This command is used to notify + * user space about the next NAN Discovery Window (DW). User space may use + * it to prepare frames to be sent in the next DW. + * %NL80211_ATTR_WIPHY_FREQ is used to indicate the frequency of the next + * DW. SDF transmission should be requested with %NL80211_CMD_FRAME and + * the device/driver shall take care of the actual transmission timing. + * This notification is only sent to the NAN interface owning socket + * (see %NL80211_ATTR_SOCKET_OWNER flag). + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1609,6 +1618,8 @@ enum nl80211_commands { NL80211_CMD_ASSOC_MLO_RECONF, NL80211_CMD_EPCS_CFG, + NL80211_CMD_NAN_NEXT_DW_NOTIFICATION, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -7409,6 +7420,10 @@ enum nl80211_nan_band_conf_attributes { * @NL80211_NAN_CONF_DISCOVERY_BEACON_INTERVAL: Discovery beacon interval * in TUs. Valid range is 50-200 TUs. If not configured the device default * value is used. This is u8 (optional) + * @NL80211_NAN_CONF_NOTIFY_DW: If set, the driver will notify userspace about + * the upcoming discovery window with + * %NL80211_CMD_NAN_NEXT_DW_NOTIFICATION. + * This is a flag attribute. * @NUM_NL80211_NAN_CONF_ATTR: Internal. * @NL80211_NAN_CONF_ATTR_MAX: Highest NAN configuration attribute. * @@ -7423,6 +7438,7 @@ enum nl80211_nan_conf_attributes { NL80211_NAN_CONF_SCAN_PERIOD, NL80211_NAN_CONF_SCAN_DWELL_TIME, NL80211_NAN_CONF_DISCOVERY_BEACON_INTERVAL, + NL80211_NAN_CONF_NOTIFY_DW, /* keep last */ NUM_NL80211_NAN_CONF_ATTR, -- cgit v1.2.3 From 1ccfd8db34fb3b1852284668094d7207499c2415 Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Mon, 8 Sep 2025 14:12:57 +0300 Subject: wifi: cfg80211: Add cluster joined notification APIs The drivers should notify upper layers and user space when a NAN device joins a cluster. This is needed, for example, to set the correct addr3 in SDF frames. Add API to report cluster join event. Signed-off-by: Andrei Otcheretianski Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250908140015.ad27b7b6e4d9.I70b213a2a49f18d1ba2ad325e67e8eff51cc7a1f@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 14 ++++++++++++++ include/uapi/linux/nl80211.h | 8 ++++++++ 2 files changed, 22 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0c1311d254be..1b10bd31bdd6 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -10074,6 +10074,20 @@ void cfg80211_epcs_changed(struct net_device *netdev, bool enabled); void cfg80211_next_nan_dw_notif(struct wireless_dev *wdev, struct ieee80211_channel *chan, gfp_t gfp); +/** + * cfg80211_nan_cluster_joined - Notify about NAN cluster join + * @wdev: Pointer to the wireless device structure + * @cluster_id: Cluster ID of the NAN cluster that was joined or started + * @new_cluster: Indicates if this is a new cluster or an existing one + * @gfp: Memory allocation flags + * + * This function is used to notify user space when a NAN cluster has been + * joined, providing the cluster ID and a flag whether it is a new cluster. + */ +void cfg80211_nan_cluster_joined(struct wireless_dev *wdev, + const u8 *cluster_id, bool new_cluster, + gfp_t gfp); + #ifdef CONFIG_CFG80211_DEBUGFS /** * wiphy_locked_debugfs_read - do a locked read in debugfs diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index d674608e2635..c5a7658b7297 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1357,6 +1357,9 @@ * the device/driver shall take care of the actual transmission timing. * This notification is only sent to the NAN interface owning socket * (see %NL80211_ATTR_SOCKET_OWNER flag). + * @NL80211_CMD_NAN_CLUSTER_JOINED: This command is used to notify + * user space that the NAN new cluster has been joined. The cluster ID is + * indicated by %NL80211_ATTR_MAC. * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use @@ -1619,6 +1622,7 @@ enum nl80211_commands { NL80211_CMD_EPCS_CFG, NL80211_CMD_NAN_NEXT_DW_NOTIFICATION, + NL80211_CMD_NAN_CLUSTER_JOINED, /* add new commands above here */ @@ -2957,6 +2961,9 @@ enum nl80211_commands { * %NL80211_CMD_START_NAN and %NL80211_CMD_CHANGE_NAN_CONFIG. * See &enum nl80211_nan_conf_attributes for details. * This attribute is optional. + * @NL80211_ATTR_NAN_NEW_CLUSTER: Flag attribute indicating that a new + * NAN cluster has been created. This is used with + * %NL80211_CMD_NAN_CLUSTER_JOINED * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined @@ -3521,6 +3528,7 @@ enum nl80211_attrs { NL80211_ATTR_S1G_SHORT_BEACON, NL80211_ATTR_BSS_PARAM, NL80211_ATTR_NAN_CONFIG, + NL80211_ATTR_NAN_NEW_CLUSTER, /* add attributes here, update the policy in nl80211.c */ -- cgit v1.2.3 From 3cbadd84f5c4ea792c0df3506639a2cb57ba9b11 Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Mon, 8 Sep 2025 14:12:58 +0300 Subject: wifi: nl80211: Add more NAN capabilities Add better break down for NAN capabilities, as NAN has multiple optional features. This allows to better indicate which features are supported or or offloaded to the device. Signed-off-by: Andrei Otcheretianski Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250908140015.bb02cd8c1596.I01fb2e8dc3662b847f3c27117bc4e199fc96d0a3@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 55 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index c5a7658b7297..423e258cdbd2 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2964,6 +2964,10 @@ enum nl80211_commands { * @NL80211_ATTR_NAN_NEW_CLUSTER: Flag attribute indicating that a new * NAN cluster has been created. This is used with * %NL80211_CMD_NAN_CLUSTER_JOINED + * @NL80211_ATTR_NAN_CAPABILITIES: Nested attribute for NAN capabilities. + * This is used with %NL80211_CMD_GET_WIPHY to indicate the NAN + * capabilities supported by the driver. See &enum nl80211_nan_capabilities + * for details. * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined @@ -3529,6 +3533,7 @@ enum nl80211_attrs { NL80211_ATTR_BSS_PARAM, NL80211_ATTR_NAN_CONFIG, NL80211_ATTR_NAN_NEW_CLUSTER, + NL80211_ATTR_NAN_CAPABILITIES, /* add attributes here, update the policy in nl80211.c */ @@ -8362,4 +8367,54 @@ enum nl80211_s1g_short_beacon_attrs { __NL80211_S1G_SHORT_BEACON_ATTR_LAST - 1 }; +/** + * enum nl80211_nan_capabilities - NAN (Neighbor Aware Networking) + * capabilities. + * + * @__NL80211_NAN_CAPABILITIES_INVALID: Invalid. + * @NL80211_NAN_CAPA_CONFIGURABLE_SYNC: Flag attribute indicating that + * the device supports configurable synchronization. If set, the device + * should be able to handle %NL80211_ATTR_NAN_CONFIG + * attribute in the %NL80211_CMD_START_NAN (and change) command. + * @NL80211_NAN_CAPA_USERSPACE_DE: Flag attribute indicating that + * NAN Discovery Engine (DE) is not offloaded and the driver assumes + * user space DE implementation. When set, %NL80211_CMD_ADD_NAN_FUNCTION, + * %NL80211_CMD_DEL_NAN_FUNCTION and %NL80211_CMD_NAN_MATCH commands + * should not be used. In addition, the device/driver should support + * sending discovery window (DW) notifications using + * %NL80211_CMD_NAN_NEXT_DW_NOTIFICATION and handling transmission and + * reception of NAN SDF frames on NAN device interface during DW windows. + * (%NL80211_CMD_FRAME is used to transmit SDFs) + * @NL80211_NAN_CAPA_OP_MODE: u8 attribute indicating the supported operation + * modes as defined in Wi-Fi Aware (TM) specification Table 81 (Operation + * Mode field format). + * @NL80211_NAN_CAPA_NUM_ANTENNAS: u8 attribute indicating the number of + * TX and RX antennas supported by the device. Lower nibble indicates + * the number of TX antennas and upper nibble indicates the number of RX + * antennas. Value 0 indicates the information is not available. + * See table 79 of Wi-Fi Aware (TM) specification (Number of + * Antennas field). + * @NL80211_NAN_CAPA_MAX_CHANNEL_SWITCH_TIME: u16 attribute indicating the + * maximum time in microseconds that the device requires to switch + * channels. + * @NL80211_NAN_CAPA_CAPABILITIES: u8 attribute containing the + * capabilities of the device as defined in Wi-Fi Aware (TM) + * specification Table 79 (Capabilities field). + * @__NL80211_NAN_CAPABILITIES_LAST: Internal + * @NL80211_NAN_CAPABILITIES_MAX: Highest NAN capability attribute. + */ +enum nl80211_nan_capabilities { + __NL80211_NAN_CAPABILITIES_INVALID, + + NL80211_NAN_CAPA_CONFIGURABLE_SYNC, + NL80211_NAN_CAPA_USERSPACE_DE, + NL80211_NAN_CAPA_OP_MODE, + NL80211_NAN_CAPA_NUM_ANTENNAS, + NL80211_NAN_CAPA_MAX_CHANNEL_SWITCH_TIME, + NL80211_NAN_CAPA_CAPABILITIES, + /* keep last */ + __NL80211_NAN_CAPABILITIES_LAST, + NL80211_NAN_CAPABILITIES_MAX = __NL80211_NAN_CAPABILITIES_LAST - 1, +}; + #endif /* __LINUX_NL80211_H */ -- cgit v1.2.3 From b9c3d426c8a5823b3a1e5078719750c6abb0d2c1 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 8 Sep 2025 14:12:59 +0300 Subject: wifi: cfg80211: Advertise supported NAN capabilities Allow drivers to specify the supported NAN capabilities and support advertising the NAN capabilities to user space. Signed-off-by: Ilan Peer Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250908140015.2976966556f5.Ic6e43b10049573180c909dad806f279cfb31143e@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 17 +++++++++++++++++ include/net/cfg80211.h | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index d350263f23f3..2110345de8ef 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -6065,4 +6065,21 @@ static inline u32 ieee80211_eml_trans_timeout_in_us(u16 eml_cap) _data + ieee80211_mle_common_size(_data),\ _len - ieee80211_mle_common_size(_data)) +/* NAN operation mode, as defined in Wi-Fi Aware (TM) specification Table 81 */ +#define NAN_OP_MODE_PHY_MODE_VHT 0x01 +#define NAN_OP_MODE_PHY_MODE_HE 0x10 +#define NAN_OP_MODE_PHY_MODE_MASK 0x11 +#define NAN_OP_MODE_80P80MHZ 0x02 +#define NAN_OP_MODE_160MHZ 0x04 +#define NAN_OP_MODE_PNDL_SUPPRTED 0x08 + +/* NAN Device capabilities, as defined in Wi-Fi Aware (TM) specification + * Table 79 + */ +#define NAN_DEV_CAPA_DFS_OWNER 0x01 +#define NAN_DEV_CAPA_EXT_KEY_ID_SUPPORTED 0x02 +#define NAN_DEV_CAPA_SIM_NDP_RX_SUPPORTED 0x04 +#define NAN_DEV_CAPA_NDPE_SUPPORTED 0x08 +#define NAN_DEV_CAPA_S3_SUPPORTED 0x10 + #endif /* LINUX_IEEE80211_H */ diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 1b10bd31bdd6..e30c1886c530 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5711,6 +5711,42 @@ struct wiphy_radio { u32 antenna_mask; }; +/** + * enum wiphy_nan_flags - NAN capabilities + * + * @WIPHY_NAN_FLAGS_CONFIGURABLE_SYNC: Device supports NAN configurable + * synchronization. + * @WIPHY_NAN_FLAGS_USERSPACE_DE: Device doesn't support DE offload. + */ +enum wiphy_nan_flags { + WIPHY_NAN_FLAGS_CONFIGURABLE_SYNC = BIT(0), + WIPHY_NAN_FLAGS_USERSPACE_DE = BIT(1), +}; + +/** + * struct wiphy_nan_capa - NAN capabilities + * + * This structure describes the NAN capabilities of a wiphy. + * + * @flags: NAN capabilities flags, see &enum wiphy_nan_flags + * @op_mode: NAN operation mode, as defined in Wi-Fi Aware (TM) specification + * Table 81. + * @n_antennas: number of antennas supported by the device for Tx/Rx. Lower + * nibble indicates the number of TX antennas and upper nibble indicates the + * number of RX antennas. Value 0 indicates the information is not + * available. + * @max_channel_switch_time: maximum channel switch time in milliseconds. + * @dev_capabilities: NAN device capabilities as defined in Wi-Fi Aware (TM) + * specification Table 79 (Capabilities field). + */ +struct wiphy_nan_capa { + u32 flags; + u8 op_mode; + u8 n_antennas; + u16 max_channel_switch_time; + u8 dev_capabilities; +}; + #define CFG80211_HW_TIMESTAMP_ALL_PEERS 0xffff /** @@ -5884,6 +5920,7 @@ struct wiphy_radio { * bitmap of &enum nl80211_band values. For instance, for * NL80211_BAND_2GHZ, bit 0 would be set * (i.e. BIT(NL80211_BAND_2GHZ)). + * @nan_capa: NAN capabilities * * @txq_limit: configuration of internal TX queue frame limit * @txq_memory_limit: configuration internal TX queue memory limit @@ -6065,6 +6102,7 @@ struct wiphy { u32 bss_select_support; u8 nan_supported_bands; + struct wiphy_nan_capa nan_capa; u32 txq_limit; u32 txq_memory_limit; -- cgit v1.2.3 From 1884e2594b084a6b1eb438e5eda586f284d80fee Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 8 Sep 2025 14:13:01 +0300 Subject: wifi: cfg80211: Store the NAN cluster ID When the driver indicates that the device has joined a cluster, store the cluster ID. This is needed for data path operations, e.g., filtering received frames etc. Signed-off-by: Ilan Peer Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250908140015.63e9fef2a3aa.I6c858185c9e71f84bd2c5174d7ee45902b4391c3@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index e30c1886c530..26fd42e189ce 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -6681,6 +6681,9 @@ struct wireless_dev { struct { struct cfg80211_chan_def chandef; } ocb; + struct { + u8 cluster_id[ETH_ALEN] __aligned(2); + } nan; } u; struct { -- cgit v1.2.3 From fc41f4a28ac4d462487903229494eeb266f68a40 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 8 Sep 2025 14:13:02 +0300 Subject: wifi: mac80211: Support Tx of action frame for NAN Add support for sending management frame over a NAN Device interface: - Declare support for the supported management frames types. - Since action frame transmissions over a NAN Device interface do not necessarily require a channel configuration, e.g., they can be transmitted during DW, modify the Tx path to avoid accessing channel information for NAN Device interface. - In addition modify the points in the Tx path logic to account for cases that a band is not specified in the Tx information. Signed-off-by: Ilan Peer Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250908140015.23b160089228.I65a58af753bcbcfb5c4ad8ef372d546f889725ba@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a45e4bee65d4..a5140ecf334b 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3192,6 +3192,10 @@ ieee80211_get_tx_rate(const struct ieee80211_hw *hw, { if (WARN_ON_ONCE(c->control.rates[0].idx < 0)) return NULL; + + if (c->band >= NUM_NL80211_BANDS) + return NULL; + return &hw->wiphy->bands[c->band]->bitrates[c->control.rates[0].idx]; } -- cgit v1.2.3 From 04f17cfea2442ef2ed01da7ba1f686a58a50048e Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 8 Sep 2025 14:13:06 +0300 Subject: wifi: mac80211: Export an API to check if NAN is started So it can be used by drivers to check if NAN Device interface is started or not. Signed-off-by: Ilan Peer Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250908140015.c69652f77eb6.Ie4f3d197e0706e742e3d97614fadc11b22adfbc6@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a5140ecf334b..a55085cf4ec4 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -7838,4 +7838,10 @@ int ieee80211_emulate_switch_vif_chanctx(struct ieee80211_hw *hw, int n_vifs, enum ieee80211_chanctx_switch_mode mode); +/** + * ieee80211_vif_nan_started - Return whether a NAN vif is started + * @vif: the vif + * Return: %true iff the vif is a NAN interface and NAN is started + */ +bool ieee80211_vif_nan_started(struct ieee80211_vif *vif); #endif /* MAC80211_H */ -- cgit v1.2.3 From d0688dc2b172d19e20fdb8be8c37930da12aaf88 Mon Sep 17 00:00:00 2001 From: Lachlan Hodges Date: Thu, 18 Sep 2025 15:19:11 +1000 Subject: wifi: cfg80211: correctly implement and validate S1G chandef Currently, the S1G channelisation implementation differs from that of VHT, which is the PHY that S1G is based on. The major difference between the clock rate is 1/10th of VHT. However how their channelisation is represented within cfg80211 and mac80211 vastly differ. To rectify this, remove the use of IEEE80211_CHAN_1/2/4.. flags that were previously used to indicate the control channel width, however it should be implied that the control channels are 1MHz in the case of S1G. Additionally, introduce the invert - being IEEE80211_CHAN_NO_4/8/16MHz - that imply the control channel may not be used for a certain bandwidth. With these new flags, we can perform regulatory and chandef validation just as we would for VHT. To deal with the notion that S1G PHYs may contain a 2MHz primary channel, introduce a new variable, s1g_primary_2mhz, which indicates whether we are operating on a 2MHz primary channel. In this case, the chandef::chan points to the 1MHz primary channel pointed to by the primary channel location. Alongside this, introduce some new helper routines that can extract the sibling 1MHz channel. The sibling being the alternate 1MHz primary subchannel within the 2MHz primary channel that is not pointed to by chandef::chan. Furthermore, due to unique restrictions imposed on S1G PHYs, introduce a new flag, IEEE80211_CHAN_S1G_NO_PRIMARY, which states that the 1MHz channel cannot be used as a primary channel. This is assumed to be set by vendors as it is hardware and regdom specific, When we validate a 2MHz primary channel, we need to ensure both 1MHz subchannels do not contain this flag. If one or both of the 1MHz subchannels contain this flag then the 2MHz primary is not permitted for use as a primary channel. Properly integrate S1G channel validation such that it is implemented according with other PHY types such as VHT. Additionally, implement a new S1G-specific regulatory flag to allow cfg80211 to understand specific vendor requirements for S1G PHYs. Signed-off-by: Arien Judge Signed-off-by: Andrew Pope Signed-off-by: Lachlan Hodges Link: https://patch.msgid.link/20250918051913.500781-2-lachlan.hodges@morsemicro.com [remove redundant NL80211_ATTR_S1G_PRIMARY_2MHZ check] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 95 ++++++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/nl80211.h | 15 +++++++ 2 files changed, 110 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 26fd42e189ce..2d612c760dd1 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -129,6 +129,13 @@ struct wiphy; * with very low power (VLP), even if otherwise set to NO_IR. * @IEEE80211_CHAN_ALLOW_20MHZ_ACTIVITY: Allow activity on a 20 MHz channel, * even if otherwise set to NO_IR. + * @IEEE80211_CHAN_S1G_NO_PRIMARY: Prevents the channel for use as an S1G + * primary channel. Does not prevent the wider operating channel + * described by the chandef from being used. In order for a 2MHz primary + * to be used, both 1MHz subchannels shall not contain this flag. + * @IEEE80211_CHAN_NO_4MHZ: 4 MHz bandwidth is not permitted on this channel. + * @IEEE80211_CHAN_NO_8MHZ: 8 MHz bandwidth is not permitted on this channel. + * @IEEE80211_CHAN_NO_16MHZ: 16 MHz bandwidth is not permitted on this channel. */ enum ieee80211_channel_flags { IEEE80211_CHAN_DISABLED = BIT(0), @@ -158,6 +165,10 @@ enum ieee80211_channel_flags { IEEE80211_CHAN_CAN_MONITOR = BIT(24), IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP = BIT(25), IEEE80211_CHAN_ALLOW_20MHZ_ACTIVITY = BIT(26), + IEEE80211_CHAN_S1G_NO_PRIMARY = BIT(27), + IEEE80211_CHAN_NO_4MHZ = BIT(28), + IEEE80211_CHAN_NO_8MHZ = BIT(29), + IEEE80211_CHAN_NO_16MHZ = BIT(30), }; #define IEEE80211_CHAN_NO_HT40 \ @@ -821,6 +832,9 @@ struct key_params { * @punctured: mask of the punctured 20 MHz subchannels, with * bits turned on being disabled (punctured); numbered * from lower to higher frequency (like in the spec) + * @s1g_primary_2mhz: Indicates if the control channel pointed to + * by 'chan' exists as a 1MHz primary subchannel within an + * S1G 2MHz primary channel. */ struct cfg80211_chan_def { struct ieee80211_channel *chan; @@ -830,6 +844,7 @@ struct cfg80211_chan_def { struct ieee80211_edmg edmg; u16 freq1_offset; u16 punctured; + bool s1g_primary_2mhz; }; /* @@ -990,6 +1005,18 @@ cfg80211_chandef_is_edmg(const struct cfg80211_chan_def *chandef) return chandef->edmg.channels || chandef->edmg.bw_config; } +/** + * cfg80211_chandef_is_s1g - check if chandef represents an S1G channel + * @chandef: the channel definition + * + * Return: %true if S1G. + */ +static inline bool +cfg80211_chandef_is_s1g(const struct cfg80211_chan_def *chandef) +{ + return chandef->chan->band == NL80211_BAND_S1GHZ; +} + /** * cfg80211_chandef_compatible - check if two channel definitions are compatible * @chandef1: first channel definition @@ -10179,4 +10206,72 @@ ssize_t wiphy_locked_debugfs_write(struct wiphy *wiphy, struct file *file, void *data); #endif +/** + * cfg80211_s1g_get_start_freq_khz - get S1G chandef start frequency + * @chandef: the chandef to use + * + * Return: the chandefs starting frequency in KHz + */ +static inline u32 +cfg80211_s1g_get_start_freq_khz(const struct cfg80211_chan_def *chandef) +{ + u32 bw_mhz = cfg80211_chandef_get_width(chandef); + u32 center_khz = + MHZ_TO_KHZ(chandef->center_freq1) + chandef->freq1_offset; + return center_khz - bw_mhz * 500 + 500; +} + +/** + * cfg80211_s1g_get_end_freq_khz - get S1G chandef end frequency + * @chandef: the chandef to use + * + * Return: the chandefs ending frequency in KHz + */ +static inline u32 +cfg80211_s1g_get_end_freq_khz(const struct cfg80211_chan_def *chandef) +{ + u32 bw_mhz = cfg80211_chandef_get_width(chandef); + u32 center_khz = + MHZ_TO_KHZ(chandef->center_freq1) + chandef->freq1_offset; + return center_khz + bw_mhz * 500 - 500; +} + +/** + * cfg80211_s1g_get_primary_sibling - retrieve the sibling 1MHz subchannel + * for an S1G chandef using a 2MHz primary channel. + * @wiphy: wiphy the channel belongs to + * @chandef: the chandef to use + * + * When chandef::s1g_primary_2mhz is set to true, we are operating on a 2MHz + * primary channel. The 1MHz subchannel designated by the primary channel + * location exists within chandef::chan, whilst the 'sibling' is denoted as + * being the other 1MHz subchannel that make up the 2MHz primary channel. + * + * Returns: the sibling 1MHz &struct ieee80211_channel, or %NULL on failure. + */ +static inline struct ieee80211_channel * +cfg80211_s1g_get_primary_sibling(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef) +{ + int width_mhz = cfg80211_chandef_get_width(chandef); + u32 pri_1mhz_khz, sibling_1mhz_khz, op_low_1mhz_khz, pri_index; + + if (!chandef->s1g_primary_2mhz || width_mhz < 2) + return NULL; + + pri_1mhz_khz = ieee80211_channel_to_khz(chandef->chan); + op_low_1mhz_khz = cfg80211_s1g_get_start_freq_khz(chandef); + + /* + * Compute the index of the primary 1 MHz subchannel within the + * operating channel, relative to the lowest 1 MHz center frequency. + * Flip the least significant bit to select the even/odd sibling, + * then translate that index back into a channel frequency. + */ + pri_index = (pri_1mhz_khz - op_low_1mhz_khz) / 1000; + sibling_1mhz_khz = op_low_1mhz_khz + ((pri_index ^ 1) * 1000); + + return ieee80211_get_channel_khz(wiphy, sibling_1mhz_khz); +} + #endif /* __NET_CFG80211_H */ diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 423e258cdbd2..8134f10e4e6c 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2969,6 +2969,10 @@ enum nl80211_commands { * capabilities supported by the driver. See &enum nl80211_nan_capabilities * for details. * + * @NL80211_ATTR_S1G_PRIMARY_2MHZ: flag attribute indicating that the S1G + * primary channel is 2 MHz wide, and the control channel designates + * the 1 MHz primary subchannel within that 2 MHz primary. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3535,6 +3539,8 @@ enum nl80211_attrs { NL80211_ATTR_NAN_NEW_CLUSTER, NL80211_ATTR_NAN_CAPABILITIES, + NL80211_ATTR_S1G_PRIMARY_2MHZ, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -4432,6 +4438,12 @@ enum nl80211_wmm_rule { * very low power (VLP) AP, despite being NO_IR. * @NL80211_FREQUENCY_ATTR_ALLOW_20MHZ_ACTIVITY: This channel can be active in * 20 MHz bandwidth, despite being NO_IR. + * @NL80211_FREQUENCY_ATTR_NO_4MHZ: 4 MHz operation is not allowed on this + * channel in current regulatory domain. + * @NL80211_FREQUENCY_ATTR_NO_8MHZ: 8 MHz operation is not allowed on this + * channel in current regulatory domain. + * @NL80211_FREQUENCY_ATTR_NO_16MHZ: 16 MHz operation is not allowed on this + * channel in current regulatory domain. * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number * currently defined * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use @@ -4477,6 +4489,9 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_CAN_MONITOR, NL80211_FREQUENCY_ATTR_ALLOW_6GHZ_VLP_AP, NL80211_FREQUENCY_ATTR_ALLOW_20MHZ_ACTIVITY, + NL80211_FREQUENCY_ATTR_NO_4MHZ, + NL80211_FREQUENCY_ATTR_NO_8MHZ, + NL80211_FREQUENCY_ATTR_NO_16MHZ, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, -- cgit v1.2.3 From 31e7681da78d7e8d2d83185c0e640012a018f229 Mon Sep 17 00:00:00 2001 From: Lachlan Hodges Date: Thu, 18 Sep 2025 15:19:12 +1000 Subject: wifi: mac80211: correctly initialise S1G chandef for STA When moving to the APs channel, ensure we correctly initialise the chandef and perform the required validation. Additionally, if the AP is beaconing on a 2MHz primary, calculate the 2MHz primary center frequency by extracting the sibling 1MHz primary and averaging the frequencies to find the 2MHz primary center frequency. Signed-off-by: Lachlan Hodges Link: https://patch.msgid.link/20250918051913.500781-3-lachlan.hodges@morsemicro.com Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 2110345de8ef..ddff9102f633 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1182,6 +1182,18 @@ enum ieee80211_s1g_chanwidth { IEEE80211_S1G_CHANWIDTH_16MHZ = 15, }; +/** + * enum ieee80211_s1g_pri_chanwidth - S1G primary channel widths + * described in IEEE80211-2024 Table 10-39. + * + * @IEEE80211_S1G_PRI_CHANWIDTH_2MHZ: 2MHz primary channel + * @IEEE80211_S1G_PRI_CHANWIDTH_1MHZ: 1MHz primary channel + */ +enum ieee80211_s1g_pri_chanwidth { + IEEE80211_S1G_PRI_CHANWIDTH_2MHZ = 0, + IEEE80211_S1G_PRI_CHANWIDTH_1MHZ = 1, +}; + #define WLAN_SA_QUERY_TR_ID_LEN 2 #define WLAN_MEMBERSHIP_LEN 8 #define WLAN_USER_POSITION_LEN 16 @@ -3170,8 +3182,12 @@ ieee80211_he_spr_size(const u8 *he_spr_ie) #define S1G_CAP9_LINK_ADAPT_PER_CONTROL_RESPONSE BIT(0) -#define S1G_OPER_CH_WIDTH_PRIMARY_1MHZ BIT(0) +#define S1G_OPER_CH_WIDTH_PRIMARY BIT(0) #define S1G_OPER_CH_WIDTH_OPER GENMASK(4, 1) +#define S1G_OPER_CH_PRIMARY_LOCATION BIT(5) + +#define S1G_2M_PRIMARY_LOCATION_LOWER 0 +#define S1G_2M_PRIMARY_LOCATION_UPPER 1 /* EHT MAC capabilities as defined in P802.11be_D2.0 section 9.4.2.313.2 */ #define IEEE80211_EHT_MAC_CAP0_EPCS_PRIO_ACCESS 0x01 -- cgit v1.2.3 From cbcd507f01deb983d5cad0a25b6495930ab59593 Mon Sep 17 00:00:00 2001 From: Lachlan Hodges Date: Thu, 18 Sep 2025 15:19:13 +1000 Subject: wifi: cfg80211: remove ieee80211_s1g_channel_width With the introduction of proper S1G channel flags, this function is no longer used. Remove it. Signed-off-by: Lachlan Hodges Link: https://patch.msgid.link/20250918051913.500781-4-lachlan.hodges@morsemicro.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 2d612c760dd1..1c041ce7a03b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -6818,16 +6818,6 @@ ieee80211_channel_to_khz(const struct ieee80211_channel *chan) return MHZ_TO_KHZ(chan->center_freq) + chan->freq_offset; } -/** - * ieee80211_s1g_channel_width - get allowed channel width from @chan - * - * Only allowed for band NL80211_BAND_S1GHZ - * @chan: channel - * Return: The allowed channel width for this center_freq - */ -enum nl80211_chan_width -ieee80211_s1g_channel_width(const struct ieee80211_channel *chan); - /** * ieee80211_channel_to_freq_khz - convert channel number to frequency * @chan: channel number -- cgit v1.2.3 From e1b849cfa6b61f1c866a908c9e8dd9b5aaab820b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 9 Apr 2025 17:12:59 +0200 Subject: writeback: Avoid contention on wb->list_lock when switching inodes There can be multiple inode switch works that are trying to switch inodes to / from the same wb. This can happen in particular if some cgroup exits which owns many (thousands) inodes and we need to switch them all. In this case several inode_switch_wbs_work_fn() instances will be just spinning on the same wb->list_lock while only one of them makes forward progress. This wastes CPU cycles and quickly leads to softlockup reports and unusable system. Instead of running several inode_switch_wbs_work_fn() instances in parallel switching to the same wb and contending on wb->list_lock, run just one work item per wb and manage a queue of isw items switching to this wb. Acked-by: Tejun Heo Signed-off-by: Jan Kara --- include/linux/backing-dev-defs.h | 4 ++++ include/linux/writeback.h | 2 ++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 2ad261082bba..c5c9d89c73ed 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -152,6 +152,10 @@ struct bdi_writeback { struct list_head blkcg_node; /* anchored at blkcg->cgwb_list */ struct list_head b_attached; /* attached inodes, protected by list_lock */ struct list_head offline_node; /* anchored at offline_cgwbs */ + struct work_struct switch_work; /* work used to perform inode switching + * to this wb */ + struct llist_head switch_wbs_ctxs; /* queued contexts for + * writeback switching */ union { struct work_struct release_work; diff --git a/include/linux/writeback.h b/include/linux/writeback.h index a2848d731a46..15a4bc4ab819 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -265,6 +265,8 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) bio_associate_blkg_from_css(bio, wbc->wb->blkcg_css); } +void inode_switch_wbs_work_fn(struct work_struct *work); + #else /* CONFIG_CGROUP_WRITEBACK */ static inline void inode_attach_wb(struct inode *inode, struct folio *folio) -- cgit v1.2.3 From 0cee64c547e3c9cda646af3e075a64f445ee8148 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 12 Sep 2025 12:38:38 +0200 Subject: writeback: Add tracepoint to track pending inode switches Add trace_inode_switch_wbs_queue tracepoint to allow insight into how many inodes are queued to switch their bdi_writeback structure. Acked-by: Tejun Heo Signed-off-by: Jan Kara Signed-off-by: Christian Brauner --- include/trace/events/writeback.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include') diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 1e23919c0da9..c08aff044e80 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -213,6 +213,35 @@ TRACE_EVENT(inode_foreign_history, ) ); +TRACE_EVENT(inode_switch_wbs_queue, + + TP_PROTO(struct bdi_writeback *old_wb, struct bdi_writeback *new_wb, + unsigned int count), + + TP_ARGS(old_wb, new_wb, count), + + TP_STRUCT__entry( + __array(char, name, 32) + __field(ino_t, old_cgroup_ino) + __field(ino_t, new_cgroup_ino) + __field(unsigned int, count) + ), + + TP_fast_assign( + strscpy_pad(__entry->name, bdi_dev_name(old_wb->bdi), 32); + __entry->old_cgroup_ino = __trace_wb_assign_cgroup(old_wb); + __entry->new_cgroup_ino = __trace_wb_assign_cgroup(new_wb); + __entry->count = count; + ), + + TP_printk("bdi %s: old_cgroup_ino=%lu new_cgroup_ino=%lu count=%u", + __entry->name, + (unsigned long)__entry->old_cgroup_ino, + (unsigned long)__entry->new_cgroup_ino, + __entry->count + ) +); + TRACE_EVENT(inode_switch_wbs, TP_PROTO(struct inode *inode, struct bdi_writeback *old_wb, -- cgit v1.2.3 From e3e1812f8e25ac277f5cc9249802365300c582e3 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 12 Sep 2025 13:52:28 +0200 Subject: ns: move to_ns_common() to ns_common.h Move the helper to ns_common.h where it belongs. Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/ns_common.h | 20 ++++++++++++++++++++ include/linux/nsproxy.h | 11 ----------- 2 files changed, 20 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h index 7d22ea50b098..bc2e0758e1c9 100644 --- a/include/linux/ns_common.h +++ b/include/linux/ns_common.h @@ -6,6 +6,15 @@ struct proc_ns_operations; +struct cgroup_namespace; +struct ipc_namespace; +struct mnt_namespace; +struct net; +struct pid_namespace; +struct time_namespace; +struct user_namespace; +struct uts_namespace; + struct ns_common { struct dentry *stashed; const struct proc_ns_operations *ops; @@ -13,4 +22,15 @@ struct ns_common { refcount_t count; }; +#define to_ns_common(__ns) \ + _Generic((__ns), \ + struct cgroup_namespace *: &(__ns)->ns, \ + struct ipc_namespace *: &(__ns)->ns, \ + struct mnt_namespace *: &(__ns)->ns, \ + struct net *: &(__ns)->ns, \ + struct pid_namespace *: &(__ns)->ns, \ + struct time_namespace *: &(__ns)->ns, \ + struct user_namespace *: &(__ns)->ns, \ + struct uts_namespace *: &(__ns)->ns) + #endif diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index dab6a1734a22..e6bec522b139 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -42,17 +42,6 @@ struct nsproxy { }; extern struct nsproxy init_nsproxy; -#define to_ns_common(__ns) \ - _Generic((__ns), \ - struct cgroup_namespace *: &(__ns->ns), \ - struct ipc_namespace *: &(__ns->ns), \ - struct net *: &(__ns->ns), \ - struct pid_namespace *: &(__ns->ns), \ - struct mnt_namespace *: &(__ns->ns), \ - struct time_namespace *: &(__ns->ns), \ - struct user_namespace *: &(__ns->ns), \ - struct uts_namespace *: &(__ns->ns)) - /* * A structure to encompass all bits needed to install * a partial or complete new set of namespaces. -- cgit v1.2.3 From 9296f46a9645cf753d2522093485cebe77635aa6 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 12 Sep 2025 13:52:29 +0200 Subject: nsfs: add nsfs.h header And move the stuff out from proc_ns.h where it really doesn't belong. Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/nsfs.h | 26 ++++++++++++++++++++++++++ include/linux/proc_ns.h | 13 +------------ 2 files changed, 27 insertions(+), 12 deletions(-) create mode 100644 include/linux/nsfs.h (limited to 'include') diff --git a/include/linux/nsfs.h b/include/linux/nsfs.h new file mode 100644 index 000000000000..fb84aa538091 --- /dev/null +++ b/include/linux/nsfs.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2025 Christian Brauner */ + +#ifndef _LINUX_NSFS_H +#define _LINUX_NSFS_H + +#include + +struct path; +struct task_struct; +struct proc_ns_operations; + +int ns_get_path(struct path *path, struct task_struct *task, + const struct proc_ns_operations *ns_ops); +typedef struct ns_common *ns_get_path_helper_t(void *); +int ns_get_path_cb(struct path *path, ns_get_path_helper_t ns_get_cb, + void *private_data); + +bool ns_match(const struct ns_common *ns, dev_t dev, ino_t ino); + +int ns_get_name(char *buf, size_t size, struct task_struct *task, + const struct proc_ns_operations *ns_ops); +void nsfs_init(void); + +#endif /* _LINUX_NSFS_H */ + diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index 4b20375f3783..5e1a4b378b79 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -5,7 +5,7 @@ #ifndef _LINUX_PROC_NS_H #define _LINUX_PROC_NS_H -#include +#include #include struct pid_namespace; @@ -75,16 +75,5 @@ static inline int ns_alloc_inum(struct ns_common *ns) #define ns_free_inum(ns) proc_free_inum((ns)->inum) #define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private) -extern int ns_get_path(struct path *path, struct task_struct *task, - const struct proc_ns_operations *ns_ops); -typedef struct ns_common *ns_get_path_helper_t(void *); -extern int ns_get_path_cb(struct path *path, ns_get_path_helper_t ns_get_cb, - void *private_data); - -extern bool ns_match(const struct ns_common *ns, dev_t dev, ino_t ino); - -extern int ns_get_name(char *buf, size_t size, struct task_struct *task, - const struct proc_ns_operations *ns_ops); -extern void nsfs_init(void); #endif /* _LINUX_PROC_NS_H */ -- cgit v1.2.3 From 660def10b01b248fd97255afacb7b0e305ac833a Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 12 Sep 2025 13:52:30 +0200 Subject: ns: uniformly initialize ns_common No point in cargo-culting the same code across all the different types. Use one common initializer. Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/proc_ns.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index 5e1a4b378b79..dbb119bda097 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -72,6 +72,22 @@ static inline int ns_alloc_inum(struct ns_common *ns) return proc_alloc_inum(&ns->inum); } +static inline int ns_common_init(struct ns_common *ns, + const struct proc_ns_operations *ops, + bool alloc_inum) +{ + if (alloc_inum) { + int ret; + ret = proc_alloc_inum(&ns->inum); + if (ret) + return ret; + } + refcount_set(&ns->count, 1); + ns->stashed = NULL; + ns->ops = ops; + return 0; +} + #define ns_free_inum(ns) proc_free_inum((ns)->inum) #define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private) -- cgit v1.2.3 From 86c5aba210b145d7de011a5abaf9b785aa70a183 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 12 Sep 2025 13:52:39 +0200 Subject: ns: remove ns_alloc_inum() It's now unused. Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/proc_ns.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index dbb119bda097..e50d312f9fee 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -66,12 +66,6 @@ static inline void proc_free_inum(unsigned int inum) {} #endif /* CONFIG_PROC_FS */ -static inline int ns_alloc_inum(struct ns_common *ns) -{ - WRITE_ONCE(ns->stashed, NULL); - return proc_alloc_inum(&ns->inum); -} - static inline int ns_common_init(struct ns_common *ns, const struct proc_ns_operations *ops, bool alloc_inum) -- cgit v1.2.3 From 885fc8ac0a4dc70f5d87b80b0977292870e35c60 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 12 Sep 2025 13:52:40 +0200 Subject: nstree: make iterator generic Move the namespace iteration infrastructure originally introduced for mount namespaces into a generic library usable by all namespace types. Signed-off-by: Christian Brauner --- include/linux/ns_common.h | 9 +++++ include/linux/nstree.h | 91 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/proc_ns.h | 3 ++ 3 files changed, 103 insertions(+) create mode 100644 include/linux/nstree.h (limited to 'include') diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h index bc2e0758e1c9..7224072cccc5 100644 --- a/include/linux/ns_common.h +++ b/include/linux/ns_common.h @@ -3,6 +3,7 @@ #define _LINUX_NS_COMMON_H #include +#include struct proc_ns_operations; @@ -20,6 +21,14 @@ struct ns_common { const struct proc_ns_operations *ops; unsigned int inum; refcount_t count; + union { + struct { + u64 ns_id; + struct rb_node ns_tree_node; + struct list_head ns_list_node; + }; + struct rcu_head ns_rcu; + }; }; #define to_ns_common(__ns) \ diff --git a/include/linux/nstree.h b/include/linux/nstree.h new file mode 100644 index 000000000000..29ad6402260c --- /dev/null +++ b/include/linux/nstree.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_NSTREE_H +#define _LINUX_NSTREE_H + +#include +#include +#include +#include +#include +#include + +/** + * struct ns_tree - Namespace tree + * @ns_tree: Rbtree of namespaces of a particular type + * @ns_list: Sequentially walkable list of all namespaces of this type + * @ns_tree_lock: Seqlock to protect the tree and list + */ +struct ns_tree { + struct rb_root ns_tree; + struct list_head ns_list; + seqlock_t ns_tree_lock; + int type; +}; + +extern struct ns_tree cgroup_ns_tree; +extern struct ns_tree ipc_ns_tree; +extern struct ns_tree mnt_ns_tree; +extern struct ns_tree net_ns_tree; +extern struct ns_tree pid_ns_tree; +extern struct ns_tree time_ns_tree; +extern struct ns_tree user_ns_tree; +extern struct ns_tree uts_ns_tree; + +#define to_ns_tree(__ns) \ + _Generic((__ns), \ + struct cgroup_namespace *: &(cgroup_ns_tree), \ + struct ipc_namespace *: &(ipc_ns_tree), \ + struct net *: &(net_ns_tree), \ + struct pid_namespace *: &(pid_ns_tree), \ + struct mnt_namespace *: &(mnt_ns_tree), \ + struct time_namespace *: &(time_ns_tree), \ + struct user_namespace *: &(user_ns_tree), \ + struct uts_namespace *: &(uts_ns_tree)) + +u64 ns_tree_gen_id(struct ns_common *ns); +void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree *ns_tree); +void __ns_tree_remove(struct ns_common *ns, struct ns_tree *ns_tree); +struct ns_common *ns_tree_lookup_rcu(u64 ns_id, int ns_type); +struct ns_common *__ns_tree_adjoined_rcu(struct ns_common *ns, + struct ns_tree *ns_tree, + bool previous); + +static inline void __ns_tree_add(struct ns_common *ns, struct ns_tree *ns_tree) +{ + ns_tree_gen_id(ns); + __ns_tree_add_raw(ns, ns_tree); +} + +/** + * ns_tree_add_raw - Add a namespace to a namespace + * @ns: Namespace to add + * + * This function adds a namespace to the appropriate namespace tree + * without assigning a id. + */ +#define ns_tree_add_raw(__ns) __ns_tree_add_raw(to_ns_common(__ns), to_ns_tree(__ns)) + +/** + * ns_tree_add - Add a namespace to a namespace tree + * @ns: Namespace to add + * + * This function assigns a new id to the namespace and adds it to the + * appropriate namespace tree and list. + */ +#define ns_tree_add(__ns) __ns_tree_add(to_ns_common(__ns), to_ns_tree(__ns)) + +/** + * ns_tree_remove - Remove a namespace from a namespace tree + * @ns: Namespace to remove + * + * This function removes a namespace from the appropriate namespace + * tree and list. + */ +#define ns_tree_remove(__ns) __ns_tree_remove(to_ns_common(__ns), to_ns_tree(__ns)) + +#define ns_tree_adjoined_rcu(__ns, __previous) \ + __ns_tree_adjoined_rcu(to_ns_common(__ns), to_ns_tree(__ns), __previous) + +#define ns_tree_active(__ns) (!RB_EMPTY_NODE(&to_ns_common(__ns)->ns_tree_node)) + +#endif /* _LINUX_NSTREE_H */ diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index e50d312f9fee..7f89f0829e60 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -79,6 +79,9 @@ static inline int ns_common_init(struct ns_common *ns, refcount_set(&ns->count, 1); ns->stashed = NULL; ns->ops = ops; + ns->ns_id = 0; + RB_CLEAR_NODE(&ns->ns_tree_node); + INIT_LIST_HEAD(&ns->ns_list_node); return 0; } -- cgit v1.2.3 From b36c823b9a4be5b0c8e38c3fd60cade7d41c216c Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 12 Sep 2025 13:52:46 +0200 Subject: time: support ns lookup Support the generic ns lookup infrastructure to support file handles for namespaces. Reviewed-by: Thomas Gleixner Signed-off-by: Christian Brauner --- include/linux/time_namespace.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h index bb2c52f4fc94..7f6af7a9771e 100644 --- a/include/linux/time_namespace.h +++ b/include/linux/time_namespace.h @@ -33,6 +33,7 @@ struct time_namespace { extern struct time_namespace init_time_ns; #ifdef CONFIG_TIME_NS +void __init time_ns_init(void); extern int vdso_join_timens(struct task_struct *task, struct time_namespace *ns); extern void timens_commit(struct task_struct *tsk, struct time_namespace *ns); @@ -108,6 +109,10 @@ static inline ktime_t timens_ktime_to_host(clockid_t clockid, ktime_t tim) } #else +static inline void __init time_ns_init(void) +{ +} + static inline int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) { -- cgit v1.2.3 From d7afdf889561058068ab46fd8f306c70ef29216a Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 12 Sep 2025 13:52:49 +0200 Subject: ns: add to__ns() to respective headers Every namespace type has a container_of(ns, , ns) static inline function that is currently not exposed in the header. So we have a bunch of places that open-code it via container_of(). Move it to the headers so we can use it directly. Reviewed-by: Aleksa Sarai Signed-off-by: Christian Brauner --- include/linux/cgroup.h | 5 +++++ include/linux/ipc_namespace.h | 5 +++++ include/linux/pid_namespace.h | 5 +++++ include/linux/time_namespace.h | 4 ++++ include/linux/user_namespace.h | 5 +++++ include/linux/utsname.h | 5 +++++ include/net/net_namespace.h | 5 +++++ 7 files changed, 34 insertions(+) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b18fb5fcb38e..9ca25346f7cb 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -794,6 +794,11 @@ extern struct cgroup_namespace init_cgroup_ns; #ifdef CONFIG_CGROUPS +static inline struct cgroup_namespace *to_cg_ns(struct ns_common *ns) +{ + return container_of(ns, struct cgroup_namespace, ns); +} + void free_cgroup_ns(struct cgroup_namespace *ns); struct cgroup_namespace *copy_cgroup_ns(unsigned long flags, diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index e8240cf2611a..924e4754374f 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -129,6 +129,11 @@ static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; } #endif #if defined(CONFIG_IPC_NS) +static inline struct ipc_namespace *to_ipc_ns(struct ns_common *ns) +{ + return container_of(ns, struct ipc_namespace, ns); +} + extern struct ipc_namespace *copy_ipcs(unsigned long flags, struct user_namespace *user_ns, struct ipc_namespace *ns); diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 7c67a5811199..ba0efc8c8596 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -54,6 +54,11 @@ extern struct pid_namespace init_pid_ns; #define PIDNS_ADDING (1U << 31) #ifdef CONFIG_PID_NS +static inline struct pid_namespace *to_pid_ns(struct ns_common *ns) +{ + return container_of(ns, struct pid_namespace, ns); +} + static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns) { if (ns != &init_pid_ns) diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h index 7f6af7a9771e..a47a4ce4183e 100644 --- a/include/linux/time_namespace.h +++ b/include/linux/time_namespace.h @@ -33,6 +33,10 @@ struct time_namespace { extern struct time_namespace init_time_ns; #ifdef CONFIG_TIME_NS +static inline struct time_namespace *to_time_ns(struct ns_common *ns) +{ + return container_of(ns, struct time_namespace, ns); +} void __init time_ns_init(void); extern int vdso_join_timens(struct task_struct *task, struct time_namespace *ns); diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index a0bb6d012137..a09056ad090e 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -168,6 +168,11 @@ static inline void set_userns_rlimit_max(struct user_namespace *ns, #ifdef CONFIG_USER_NS +static inline struct user_namespace *to_user_ns(struct ns_common *ns) +{ + return container_of(ns, struct user_namespace, ns); +} + static inline struct user_namespace *get_user_ns(struct user_namespace *ns) { if (ns) diff --git a/include/linux/utsname.h b/include/linux/utsname.h index bf7613ba412b..5d34c4f0f945 100644 --- a/include/linux/utsname.h +++ b/include/linux/utsname.h @@ -30,6 +30,11 @@ struct uts_namespace { extern struct uts_namespace init_uts_ns; #ifdef CONFIG_UTS_NS +static inline struct uts_namespace *to_uts_ns(struct ns_common *ns) +{ + return container_of(ns, struct uts_namespace, ns); +} + static inline void get_uts_ns(struct uts_namespace *ns) { refcount_inc(&ns->ns.count); diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 025a7574b275..fd090ceb80bf 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -262,6 +262,11 @@ void ipx_unregister_sysctl(void); #ifdef CONFIG_NET_NS void __put_net(struct net *net); +static inline struct net *to_net_ns(struct ns_common *ns) +{ + return container_of(ns, struct net, ns); +} + /* Try using get_net_track() instead */ static inline struct net *get_net(struct net *net) { -- cgit v1.2.3 From d2afdb73f8ad77b49eca9d110d0c54bf30d1df0f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 12 Sep 2025 13:52:50 +0200 Subject: nsfs: add current_in_namespace() Add a helper to easily check whether a given namespace is the caller's current namespace. This is currently open-coded in a lot of places. Simply switch on the type and compare the results. Reviewed-by: Aleksa Sarai Signed-off-by: Christian Brauner --- include/linux/nsfs.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nsfs.h b/include/linux/nsfs.h index fb84aa538091..e5a5fa83d36b 100644 --- a/include/linux/nsfs.h +++ b/include/linux/nsfs.h @@ -5,6 +5,8 @@ #define _LINUX_NSFS_H #include +#include +#include struct path; struct task_struct; @@ -22,5 +24,17 @@ int ns_get_name(char *buf, size_t size, struct task_struct *task, const struct proc_ns_operations *ns_ops); void nsfs_init(void); -#endif /* _LINUX_NSFS_H */ +#define __current_namespace_from_type(__ns) \ + _Generic((__ns), \ + struct cgroup_namespace *: current->nsproxy->cgroup_ns, \ + struct ipc_namespace *: current->nsproxy->ipc_ns, \ + struct net *: current->nsproxy->net_ns, \ + struct pid_namespace *: task_active_pid_ns(current), \ + struct mnt_namespace *: current->nsproxy->mnt_ns, \ + struct time_namespace *: current->nsproxy->time_ns, \ + struct user_namespace *: current_user_ns(), \ + struct uts_namespace *: current->nsproxy->uts_ns) + +#define current_in_namespace(__ns) (__current_namespace_from_type(__ns) == __ns) +#endif /* _LINUX_NSFS_H */ -- cgit v1.2.3 From 5222470b2fbb3740f931f189db33dd1367b1ae75 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 12 Sep 2025 13:52:51 +0200 Subject: nsfs: support file handles A while ago we added support for file handles to pidfs so pidfds can be encoded and decoded as file handles. Userspace has adopted this quickly and it's proven very useful. Implement file handles for namespaces as well. A process is not always able to open /proc/self/ns/. That requires procfs to be mounted and for /proc/self/ or /proc/self/ns/ to not be overmounted. However, userspace can always derive a namespace fd from a pidfd. And that always works for a task's own namespace. There's no need to introduce unnecessary behavioral differences between /proc/self/ns/ fds, pidfd-derived namespace fds, and file-handle-derived namespace fds. So namespace file handles are always decodable if the caller is located in the namespace the file handle refers to. This also allows a task to e.g., store a set of file handles to its namespaces in a file on-disk so it can verify when it gets rexeced that they're still valid and so on. This is akin to the pidfd use-case. Or just plainly for namespace comparison reasons where a file handle to the task's own namespace can be easily compared against others. Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- include/linux/exportfs.h | 6 ++++++ include/uapi/linux/nsfs.h | 9 +++++++++ 2 files changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index cfb0dd1ea49c..3aac58a520c7 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -122,6 +122,12 @@ enum fid_type { FILEID_BCACHEFS_WITHOUT_PARENT = 0xb1, FILEID_BCACHEFS_WITH_PARENT = 0xb2, + /* + * + * 64 bit namespace identifier, 32 bit namespace type, 32 bit inode number. + */ + FILEID_NSFS = 0xf1, + /* * 64 bit unique kernfs id */ diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h index 97d8d80d139f..fa86fe3c8bd3 100644 --- a/include/uapi/linux/nsfs.h +++ b/include/uapi/linux/nsfs.h @@ -53,4 +53,13 @@ enum init_ns_ino { MNT_NS_INIT_INO = 0xEFFFFFF8U, }; +struct nsfs_file_handle { + __u64 ns_id; + __u32 ns_type; + __u32 ns_inum; +}; + +#define NSFS_FILE_HANDLE_SIZE_VER0 16 /* sizeof first published struct */ +#define NSFS_FILE_HANDLE_SIZE_LATEST sizeof(struct nsfs_file_handle) /* sizeof latest published struct */ + #endif /* __LINUX_NSFS_H */ -- cgit v1.2.3 From e83f0b5d10dcf62833008327cb661c7d118bca85 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 12 Sep 2025 13:52:52 +0200 Subject: nsfs: support exhaustive file handles Pidfd file handles are exhaustive meaning they don't require a handle on another pidfd to pass to open_by_handle_at() so it can derive the filesystem to decode in. Instead it can be derived from the file handle itself. The same is possible for namespace file handles. Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- include/uapi/linux/fcntl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index f291ab4f94eb..3741ea1b73d8 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -111,6 +111,7 @@ #define PIDFD_SELF_THREAD_GROUP -10001 /* Current thread group leader. */ #define FD_PIDFS_ROOT -10002 /* Root of the pidfs filesystem */ +#define FD_NSFS_ROOT -10003 /* Root of the nsfs filesystem */ #define FD_INVALID -10009 /* Invalid file descriptor: -10000 - EBADF = -10009 */ /* Generic flags for the *at(2) family of syscalls. */ -- cgit v1.2.3 From f861225b9ee9cb2da1c7b2f5f921856cb8ca86bb Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 12 Sep 2025 13:52:53 +0200 Subject: nsfs: add missing id retrieval support The mount namespace has supported id retrieval for a while already. Add support for the other types as well. Signed-off-by: Christian Brauner --- include/uapi/linux/nsfs.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h index fa86fe3c8bd3..5d5bf22464c9 100644 --- a/include/uapi/linux/nsfs.h +++ b/include/uapi/linux/nsfs.h @@ -16,8 +16,6 @@ #define NS_GET_NSTYPE _IO(NSIO, 0x3) /* Get owner UID (in the caller's user namespace) for a user namespace */ #define NS_GET_OWNER_UID _IO(NSIO, 0x4) -/* Get the id for a mount namespace */ -#define NS_GET_MNTNS_ID _IOR(NSIO, 0x5, __u64) /* Translate pid from target pid namespace into the caller's pid namespace. */ #define NS_GET_PID_FROM_PIDNS _IOR(NSIO, 0x6, int) /* Return thread-group leader id of pid in the callers pid namespace. */ @@ -42,6 +40,10 @@ struct mnt_ns_info { /* Get previous namespace. */ #define NS_MNT_GET_PREV _IOR(NSIO, 12, struct mnt_ns_info) +/* Retrieve namespace identifiers. */ +#define NS_GET_MNTNS_ID _IOR(NSIO, 5, __u64) +#define NS_GET_ID _IOR(NSIO, 13, __u64) + enum init_ns_ino { IPC_NS_INIT_INO = 0xEFFFFFFFU, UTS_NS_INIT_INO = 0xEFFFFFFEU, -- cgit v1.2.3 From 93f67a7ddadf6ed8997c000df9790e5d64617196 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 17 Sep 2025 12:28:00 +0200 Subject: uts: split namespace into separate header We have dedicated headers for all namespace types. Add one for the uts namespace as well. Now it's consistent for all namespace types. Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/uts_namespace.h | 65 +++++++++++++++++++++++++++++++++++++++++++ include/linux/utsname.h | 58 +------------------------------------- 2 files changed, 66 insertions(+), 57 deletions(-) create mode 100644 include/linux/uts_namespace.h (limited to 'include') diff --git a/include/linux/uts_namespace.h b/include/linux/uts_namespace.h new file mode 100644 index 000000000000..c2b619bb4e57 --- /dev/null +++ b/include/linux/uts_namespace.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_UTS_NAMESPACE_H +#define _LINUX_UTS_NAMESPACE_H + +#include +#include + +struct user_namespace; +extern struct user_namespace init_user_ns; + +struct uts_namespace { + struct new_utsname name; + struct user_namespace *user_ns; + struct ucounts *ucounts; + struct ns_common ns; +} __randomize_layout; + +extern struct uts_namespace init_uts_ns; + +#ifdef CONFIG_UTS_NS +static inline struct uts_namespace *to_uts_ns(struct ns_common *ns) +{ + return container_of(ns, struct uts_namespace, ns); +} + +static inline void get_uts_ns(struct uts_namespace *ns) +{ + refcount_inc(&ns->ns.count); +} + +extern struct uts_namespace *copy_utsname(unsigned long flags, + struct user_namespace *user_ns, struct uts_namespace *old_ns); +extern void free_uts_ns(struct uts_namespace *ns); + +static inline void put_uts_ns(struct uts_namespace *ns) +{ + if (refcount_dec_and_test(&ns->ns.count)) + free_uts_ns(ns); +} + +void uts_ns_init(void); +#else +static inline void get_uts_ns(struct uts_namespace *ns) +{ +} + +static inline void put_uts_ns(struct uts_namespace *ns) +{ +} + +static inline struct uts_namespace *copy_utsname(unsigned long flags, + struct user_namespace *user_ns, struct uts_namespace *old_ns) +{ + if (flags & CLONE_NEWUTS) + return ERR_PTR(-EINVAL); + + return old_ns; +} + +static inline void uts_ns_init(void) +{ +} +#endif + +#endif /* _LINUX_UTS_NAMESPACE_H */ diff --git a/include/linux/utsname.h b/include/linux/utsname.h index 5d34c4f0f945..547bd4439706 100644 --- a/include/linux/utsname.h +++ b/include/linux/utsname.h @@ -7,7 +7,7 @@ #include #include #include -#include +#include enum uts_proc { UTS_PROC_ARCH, @@ -18,62 +18,6 @@ enum uts_proc { UTS_PROC_DOMAINNAME, }; -struct user_namespace; -extern struct user_namespace init_user_ns; - -struct uts_namespace { - struct new_utsname name; - struct user_namespace *user_ns; - struct ucounts *ucounts; - struct ns_common ns; -} __randomize_layout; -extern struct uts_namespace init_uts_ns; - -#ifdef CONFIG_UTS_NS -static inline struct uts_namespace *to_uts_ns(struct ns_common *ns) -{ - return container_of(ns, struct uts_namespace, ns); -} - -static inline void get_uts_ns(struct uts_namespace *ns) -{ - refcount_inc(&ns->ns.count); -} - -extern struct uts_namespace *copy_utsname(unsigned long flags, - struct user_namespace *user_ns, struct uts_namespace *old_ns); -extern void free_uts_ns(struct uts_namespace *ns); - -static inline void put_uts_ns(struct uts_namespace *ns) -{ - if (refcount_dec_and_test(&ns->ns.count)) - free_uts_ns(ns); -} - -void uts_ns_init(void); -#else -static inline void get_uts_ns(struct uts_namespace *ns) -{ -} - -static inline void put_uts_ns(struct uts_namespace *ns) -{ -} - -static inline struct uts_namespace *copy_utsname(unsigned long flags, - struct user_namespace *user_ns, struct uts_namespace *old_ns) -{ - if (flags & CLONE_NEWUTS) - return ERR_PTR(-EINVAL); - - return old_ns; -} - -static inline void uts_ns_init(void) -{ -} -#endif - #ifdef CONFIG_PROC_SYSCTL extern void uts_proc_notify(enum uts_proc proc); #else -- cgit v1.2.3 From b2a0b192084acd0a86d66cbbc61e17ba1f5bd583 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 17 Sep 2025 12:28:01 +0200 Subject: mnt: expose pointer to init_mnt_ns There's various scenarios where we need to know whether we are in the initial set of namespaces or not to e.g., shortcut permission checking. All namespaces expose that information. Let's do that too. Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/mnt_namespace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h index 70b366b64816..6d1c4c218c14 100644 --- a/include/linux/mnt_namespace.h +++ b/include/linux/mnt_namespace.h @@ -11,6 +11,8 @@ struct fs_struct; struct user_namespace; struct ns_common; +extern struct mnt_namespace init_mnt_ns; + extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *, struct user_namespace *, struct fs_struct *); extern void put_mnt_ns(struct mnt_namespace *ns); -- cgit v1.2.3 From f74ca6da113d5d4b21c00bb4da3f3c137162b4fe Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 17 Sep 2025 12:28:02 +0200 Subject: nscommon: move to separate file It's really awkward spilling the ns common infrastructure into multiple headers. Move it to a separate file. Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/ns_common.h | 3 +++ include/linux/proc_ns.h | 19 ------------------- 2 files changed, 3 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h index 7224072cccc5..78b17fe80b62 100644 --- a/include/linux/ns_common.h +++ b/include/linux/ns_common.h @@ -31,6 +31,9 @@ struct ns_common { }; }; +int ns_common_init(struct ns_common *ns, const struct proc_ns_operations *ops, + bool alloc_inum); + #define to_ns_common(__ns) \ _Generic((__ns), \ struct cgroup_namespace *: &(__ns)->ns, \ diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index 7f89f0829e60..9f21670b5824 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -66,25 +66,6 @@ static inline void proc_free_inum(unsigned int inum) {} #endif /* CONFIG_PROC_FS */ -static inline int ns_common_init(struct ns_common *ns, - const struct proc_ns_operations *ops, - bool alloc_inum) -{ - if (alloc_inum) { - int ret; - ret = proc_alloc_inum(&ns->inum); - if (ret) - return ret; - } - refcount_set(&ns->count, 1); - ns->stashed = NULL; - ns->ops = ops; - ns->ns_id = 0; - RB_CLEAR_NODE(&ns->ns_tree_node); - INIT_LIST_HEAD(&ns->ns_list_node); - return 0; -} - #define ns_free_inum(ns) proc_free_inum((ns)->inum) #define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private) -- cgit v1.2.3 From 5fc6bef178f1b644f1439e520c8f83bfc83a1252 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 17 Sep 2025 12:28:03 +0200 Subject: cgroup: split namespace into separate header We have dedicated headers for all namespace types. Add one for the cgroup namespace as well. Now it's consistent for all namespace types and easy to figure out what to include. Acked-by: Tejun Heo Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/cgroup.h | 51 +----------------------------------- include/linux/cgroup_namespace.h | 56 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 50 deletions(-) create mode 100644 include/linux/cgroup_namespace.h (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 9ca25346f7cb..5156fed8cbc3 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -27,6 +27,7 @@ #include #include +#include struct kernel_clone_args; @@ -783,56 +784,6 @@ static inline void cgroup_sk_free(struct sock_cgroup_data *skcd) {} #endif /* CONFIG_CGROUP_DATA */ -struct cgroup_namespace { - struct ns_common ns; - struct user_namespace *user_ns; - struct ucounts *ucounts; - struct css_set *root_cset; -}; - -extern struct cgroup_namespace init_cgroup_ns; - -#ifdef CONFIG_CGROUPS - -static inline struct cgroup_namespace *to_cg_ns(struct ns_common *ns) -{ - return container_of(ns, struct cgroup_namespace, ns); -} - -void free_cgroup_ns(struct cgroup_namespace *ns); - -struct cgroup_namespace *copy_cgroup_ns(unsigned long flags, - struct user_namespace *user_ns, - struct cgroup_namespace *old_ns); - -int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen, - struct cgroup_namespace *ns); - -static inline void get_cgroup_ns(struct cgroup_namespace *ns) -{ - refcount_inc(&ns->ns.count); -} - -static inline void put_cgroup_ns(struct cgroup_namespace *ns) -{ - if (refcount_dec_and_test(&ns->ns.count)) - free_cgroup_ns(ns); -} - -#else /* !CONFIG_CGROUPS */ - -static inline void free_cgroup_ns(struct cgroup_namespace *ns) { } -static inline struct cgroup_namespace * -copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns, - struct cgroup_namespace *old_ns) -{ - return old_ns; -} - -static inline void get_cgroup_ns(struct cgroup_namespace *ns) { } -static inline void put_cgroup_ns(struct cgroup_namespace *ns) { } - -#endif /* !CONFIG_CGROUPS */ #ifdef CONFIG_CGROUPS diff --git a/include/linux/cgroup_namespace.h b/include/linux/cgroup_namespace.h new file mode 100644 index 000000000000..c02bb76c5e32 --- /dev/null +++ b/include/linux/cgroup_namespace.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_CGROUP_NAMESPACE_H +#define _LINUX_CGROUP_NAMESPACE_H + +struct cgroup_namespace { + struct ns_common ns; + struct user_namespace *user_ns; + struct ucounts *ucounts; + struct css_set *root_cset; +}; + +extern struct cgroup_namespace init_cgroup_ns; + +#ifdef CONFIG_CGROUPS + +static inline struct cgroup_namespace *to_cg_ns(struct ns_common *ns) +{ + return container_of(ns, struct cgroup_namespace, ns); +} + +void free_cgroup_ns(struct cgroup_namespace *ns); + +struct cgroup_namespace *copy_cgroup_ns(unsigned long flags, + struct user_namespace *user_ns, + struct cgroup_namespace *old_ns); + +int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen, + struct cgroup_namespace *ns); + +static inline void get_cgroup_ns(struct cgroup_namespace *ns) +{ + refcount_inc(&ns->ns.count); +} + +static inline void put_cgroup_ns(struct cgroup_namespace *ns) +{ + if (refcount_dec_and_test(&ns->ns.count)) + free_cgroup_ns(ns); +} + +#else /* !CONFIG_CGROUPS */ + +static inline void free_cgroup_ns(struct cgroup_namespace *ns) { } +static inline struct cgroup_namespace * +copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns, + struct cgroup_namespace *old_ns) +{ + return old_ns; +} + +static inline void get_cgroup_ns(struct cgroup_namespace *ns) { } +static inline void put_cgroup_ns(struct cgroup_namespace *ns) { } + +#endif /* !CONFIG_CGROUPS */ + +#endif /* _LINUX_CGROUP_NAMESPACE_H */ -- cgit v1.2.3 From cc47f434271ba90c18c16e0bba360df38a8bc954 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 17 Sep 2025 12:28:04 +0200 Subject: nsfs: add inode number for anon namespace Add an inode number anonymous namespaces. Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/uapi/linux/nsfs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h index 5d5bf22464c9..e098759ec917 100644 --- a/include/uapi/linux/nsfs.h +++ b/include/uapi/linux/nsfs.h @@ -53,6 +53,9 @@ enum init_ns_ino { TIME_NS_INIT_INO = 0xEFFFFFFAU, NET_NS_INIT_INO = 0xEFFFFFF9U, MNT_NS_INIT_INO = 0xEFFFFFF8U, +#ifdef __KERNEL__ + MNT_NS_ANON_INO = 0xEFFFFFF7U, +#endif }; struct nsfs_file_handle { -- cgit v1.2.3 From 5612ff3ec588be09f11a9424db6d1186bcdeb3fa Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 17 Sep 2025 12:28:07 +0200 Subject: nscommon: simplify initialization There's a lot of information that namespace implementers don't need to know about at all. Encapsulate this all in the initialization helper. Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/ns_common.h | 39 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h index 78b17fe80b62..05c7a7dd211b 100644 --- a/include/linux/ns_common.h +++ b/include/linux/ns_common.h @@ -16,6 +16,15 @@ struct time_namespace; struct user_namespace; struct uts_namespace; +extern struct cgroup_namespace init_cgroup_ns; +extern struct ipc_namespace init_ipc_ns; +extern struct mnt_namespace init_mnt_ns; +extern struct net init_net; +extern struct pid_namespace init_pid_ns; +extern struct time_namespace init_time_ns; +extern struct user_namespace init_user_ns; +extern struct uts_namespace init_uts_ns; + struct ns_common { struct dentry *stashed; const struct proc_ns_operations *ops; @@ -31,8 +40,7 @@ struct ns_common { }; }; -int ns_common_init(struct ns_common *ns, const struct proc_ns_operations *ops, - bool alloc_inum); +int __ns_common_init(struct ns_common *ns, const struct proc_ns_operations *ops, int inum); #define to_ns_common(__ns) \ _Generic((__ns), \ @@ -45,4 +53,31 @@ int ns_common_init(struct ns_common *ns, const struct proc_ns_operations *ops, struct user_namespace *: &(__ns)->ns, \ struct uts_namespace *: &(__ns)->ns) +#define ns_init_inum(__ns) \ + _Generic((__ns), \ + struct cgroup_namespace *: CGROUP_NS_INIT_INO, \ + struct ipc_namespace *: IPC_NS_INIT_INO, \ + struct mnt_namespace *: MNT_NS_INIT_INO, \ + struct net *: NET_NS_INIT_INO, \ + struct pid_namespace *: PID_NS_INIT_INO, \ + struct time_namespace *: TIME_NS_INIT_INO, \ + struct user_namespace *: USER_NS_INIT_INO, \ + struct uts_namespace *: UTS_NS_INIT_INO) + +#define ns_init_ns(__ns) \ + _Generic((__ns), \ + struct cgroup_namespace *: &init_cgroup_ns, \ + struct ipc_namespace *: &init_ipc_ns, \ + struct mnt_namespace *: &init_mnt_ns, \ + struct net *: &init_net, \ + struct pid_namespace *: &init_pid_ns, \ + struct time_namespace *: &init_time_ns, \ + struct user_namespace *: &init_user_ns, \ + struct uts_namespace *: &init_uts_ns) + +#define ns_common_init(__ns, __ops) \ + __ns_common_init(to_ns_common(__ns), __ops, (((__ns) == ns_init_ns(__ns)) ? ns_init_inum(__ns) : 0)) + +#define ns_common_init_inum(__ns, __ops, __inum) __ns_common_init(to_ns_common(__ns), __ops, __inum) + #endif -- cgit v1.2.3 From 5d36370f34312776d202e5c35d1a786d8b07a9c3 Mon Sep 17 00:00:00 2001 From: Alexey Klimov Date: Wed, 17 Sep 2025 08:32:50 +0100 Subject: ALSA: compress: add raw opus codec define and opus decoder structs Adds a raw opus codec define and raw opus decoder structs. This is for raw OPUS packets not packed in any type of container (for instance OGG container). The decoder struct fields are taken from corresponding RFC document: RFC 7845 Section 5. Cc: Srinivas Kandagatla Cc: Vinod Koul Co-developed-by: Annemarie Porter Signed-off-by: Annemarie Porter Signed-off-by: Alexey Klimov Signed-off-by: Takashi Iwai --- include/uapi/sound/compress_params.h | 43 +++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/sound/compress_params.h b/include/uapi/sound/compress_params.h index bc7648a30746..faf4fa911f7f 100644 --- a/include/uapi/sound/compress_params.h +++ b/include/uapi/sound/compress_params.h @@ -43,7 +43,8 @@ #define SND_AUDIOCODEC_BESPOKE ((__u32) 0x0000000E) #define SND_AUDIOCODEC_ALAC ((__u32) 0x0000000F) #define SND_AUDIOCODEC_APE ((__u32) 0x00000010) -#define SND_AUDIOCODEC_MAX SND_AUDIOCODEC_APE +#define SND_AUDIOCODEC_OPUS_RAW ((__u32) 0x00000011) +#define SND_AUDIOCODEC_MAX SND_AUDIOCODEC_OPUS_RAW /* * Profile and modes are listed with bit masks. This allows for a @@ -324,6 +325,45 @@ struct snd_dec_ape { __u32 seek_table_present; } __attribute__((packed, aligned(4))); +/** + * struct snd_dec_opus - Opus decoder parameters (raw opus packets) + * @version: Usually should be '1' but can be split into major (4 upper bits) + * and minor (4 lower bits) sub-fields. + * @num_channels: Number of output channels. + * @pre_skip: Number of samples to discard at 48 kHz. + * @sample_rate: Sample rate of original input. + * @output_gain: Gain to apply when decoding (in Q7.8 format). + * @mapping_family: Order and meaning of output channels. Only values 0 and 1 + * are expected; values 2..255 are not recommended for playback. + * + * Optional channel mapping table. Describes mapping of opus streams to decoded + * channels. + * @struct snd_dec_opus_ch_map + * @stream_count: Number of streams encoded in each Ogg packet. + * @coupled_count: Number of streams whose decoders are used for two + * channels. + * @channel_map: describes which decoded channel to be used for each one. + * See RFC doc for details. + * This supports only mapping families 0 and 1, therefore max + * number of channels is 8. + * + * These options were extracted from RFC7845 Section 5. + */ + +struct snd_dec_opus { + __u8 version; + __u8 num_channels; + __u16 pre_skip; + __u32 sample_rate; + __u16 output_gain; + __u8 mapping_family; + struct snd_dec_opus_ch_map { + __u8 stream_count; + __u8 coupled_count; + __u8 channel_map[8]; + } chan_map; +} __attribute__((packed, aligned(4))); + union snd_codec_options { struct snd_enc_wma wma; struct snd_enc_vorbis vorbis; @@ -334,6 +374,7 @@ union snd_codec_options { struct snd_dec_wma wma_d; struct snd_dec_alac alac_d; struct snd_dec_ape ape_d; + struct snd_dec_opus opus_d; struct { __u32 out_sample_rate; } src_d; -- cgit v1.2.3 From b07d2514b91c30ab16fdf8f9cc3523bef969becf Mon Sep 17 00:00:00 2001 From: Alexey Klimov Date: Wed, 17 Sep 2025 08:32:51 +0100 Subject: ALSA: compress_offload: increase SNDRV_COMPRESS_VERSION minor version by 1 Since addition of raw opus codec support we need to update compress API minor version by one. Bump the SNDRV_COMPRESS_VERSION to 0.4.1. Signed-off-by: Alexey Klimov Acked-by: Vinod Koul Signed-off-by: Takashi Iwai --- include/uapi/sound/compress_offload.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/sound/compress_offload.h b/include/uapi/sound/compress_offload.h index 26f756cc2e62..b610683fd8db 100644 --- a/include/uapi/sound/compress_offload.h +++ b/include/uapi/sound/compress_offload.h @@ -13,7 +13,7 @@ #include #include -#define SNDRV_COMPRESS_VERSION SNDRV_PROTOCOL_VERSION(0, 4, 0) +#define SNDRV_COMPRESS_VERSION SNDRV_PROTOCOL_VERSION(0, 4, 1) /** * struct snd_compressed_buffer - compressed buffer * @fragment_size: size of buffer fragment in bytes -- cgit v1.2.3 From be5f21d3985f00827e09b798f7a07ebd6dd7f54a Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 17 Sep 2025 12:28:08 +0200 Subject: ns: add ns_common_free() And drop ns_free_inum(). Anything common that can be wasted centrally should be wasted in the new common helper. Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/ns_common.h | 3 +++ include/linux/proc_ns.h | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h index 05c7a7dd211b..19833ac547f9 100644 --- a/include/linux/ns_common.h +++ b/include/linux/ns_common.h @@ -41,6 +41,7 @@ struct ns_common { }; int __ns_common_init(struct ns_common *ns, const struct proc_ns_operations *ops, int inum); +void __ns_common_free(struct ns_common *ns); #define to_ns_common(__ns) \ _Generic((__ns), \ @@ -80,4 +81,6 @@ int __ns_common_init(struct ns_common *ns, const struct proc_ns_operations *ops, #define ns_common_init_inum(__ns, __ops, __inum) __ns_common_init(to_ns_common(__ns), __ops, __inum) +#define ns_common_free(__ns) __ns_common_free(to_ns_common((__ns))) + #endif diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index 9f21670b5824..08016f6e0e6f 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -66,8 +66,6 @@ static inline void proc_free_inum(unsigned int inum) {} #endif /* CONFIG_PROC_FS */ -#define ns_free_inum(ns) proc_free_inum((ns)->inum) - #define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private) #endif /* _LINUX_PROC_NS_H */ -- cgit v1.2.3 From 224ef741ce87aa6474b82e0eb76e0e8e1bafe544 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 18 Sep 2025 12:11:46 +0200 Subject: ns: add reference count helpers Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/ns_common.h | 45 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h index 19833ac547f9..65e258e1fdc6 100644 --- a/include/linux/ns_common.h +++ b/include/linux/ns_common.h @@ -43,16 +43,24 @@ struct ns_common { int __ns_common_init(struct ns_common *ns, const struct proc_ns_operations *ops, int inum); void __ns_common_free(struct ns_common *ns); -#define to_ns_common(__ns) \ - _Generic((__ns), \ - struct cgroup_namespace *: &(__ns)->ns, \ - struct ipc_namespace *: &(__ns)->ns, \ - struct mnt_namespace *: &(__ns)->ns, \ - struct net *: &(__ns)->ns, \ - struct pid_namespace *: &(__ns)->ns, \ - struct time_namespace *: &(__ns)->ns, \ - struct user_namespace *: &(__ns)->ns, \ - struct uts_namespace *: &(__ns)->ns) +#define to_ns_common(__ns) \ + _Generic((__ns), \ + struct cgroup_namespace *: &(__ns)->ns, \ + const struct cgroup_namespace *: &(__ns)->ns, \ + struct ipc_namespace *: &(__ns)->ns, \ + const struct ipc_namespace *: &(__ns)->ns, \ + struct mnt_namespace *: &(__ns)->ns, \ + const struct mnt_namespace *: &(__ns)->ns, \ + struct net *: &(__ns)->ns, \ + const struct net *: &(__ns)->ns, \ + struct pid_namespace *: &(__ns)->ns, \ + const struct pid_namespace *: &(__ns)->ns, \ + struct time_namespace *: &(__ns)->ns, \ + const struct time_namespace *: &(__ns)->ns, \ + struct user_namespace *: &(__ns)->ns, \ + const struct user_namespace *: &(__ns)->ns, \ + struct uts_namespace *: &(__ns)->ns, \ + const struct uts_namespace *: &(__ns)->ns) #define ns_init_inum(__ns) \ _Generic((__ns), \ @@ -83,4 +91,21 @@ void __ns_common_free(struct ns_common *ns); #define ns_common_free(__ns) __ns_common_free(to_ns_common((__ns))) +static __always_inline __must_check bool __ns_ref_put(struct ns_common *ns) +{ + return refcount_dec_and_test(&ns->count); +} + +static __always_inline __must_check bool __ns_ref_get(struct ns_common *ns) +{ + return refcount_inc_not_zero(&ns->count); +} + +#define ns_ref_read(__ns) refcount_read(&to_ns_common((__ns))->count) +#define ns_ref_inc(__ns) refcount_inc(&to_ns_common((__ns))->count) +#define ns_ref_get(__ns) __ns_ref_get(to_ns_common((__ns))) +#define ns_ref_put(__ns) __ns_ref_put(to_ns_common((__ns))) +#define ns_ref_put_and_lock(__ns, __lock) \ + refcount_dec_and_lock(&to_ns_common((__ns))->count, (__lock)) + #endif -- cgit v1.2.3 From 06099e374f3ab818f0501671b21493ba2e1b94b9 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 18 Sep 2025 12:11:48 +0200 Subject: cgroup: port to ns_ref_*() helpers Stop accessing ns.count directly. Acked-by: Tejun Heo Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/cgroup_namespace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup_namespace.h b/include/linux/cgroup_namespace.h index c02bb76c5e32..b7dbf4d623d2 100644 --- a/include/linux/cgroup_namespace.h +++ b/include/linux/cgroup_namespace.h @@ -29,12 +29,12 @@ int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen, static inline void get_cgroup_ns(struct cgroup_namespace *ns) { - refcount_inc(&ns->ns.count); + ns_ref_inc(ns); } static inline void put_cgroup_ns(struct cgroup_namespace *ns) { - if (refcount_dec_and_test(&ns->ns.count)) + if (ns_ref_put(ns)) free_cgroup_ns(ns); } -- cgit v1.2.3 From d4825c99d6a738c565d5142ce37369368a4352da Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 18 Sep 2025 12:11:49 +0200 Subject: ipc: port to ns_ref_*() helpers Stop accessing ns.count directly. Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/ipc_namespace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index 924e4754374f..21eff63f47da 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -140,14 +140,14 @@ extern struct ipc_namespace *copy_ipcs(unsigned long flags, static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) { if (ns) - refcount_inc(&ns->ns.count); + ns_ref_inc(ns); return ns; } static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns) { if (ns) { - if (refcount_inc_not_zero(&ns->ns.count)) + if (ns_ref_get(ns)) return ns; } -- cgit v1.2.3 From 07897b38eadf5a370a6001790239f23036d5b970 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 18 Sep 2025 12:11:50 +0200 Subject: pid: port to ns_ref_*() helpers Stop accessing ns.count directly. Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/pid_namespace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index ba0efc8c8596..5b2f29d369c4 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -62,7 +62,7 @@ static inline struct pid_namespace *to_pid_ns(struct ns_common *ns) static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns) { if (ns != &init_pid_ns) - refcount_inc(&ns->ns.count); + ns_ref_inc(ns); return ns; } -- cgit v1.2.3 From e0c173f1fa02c0b08720aa8aa0cc91c3063146ae Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 18 Sep 2025 12:11:51 +0200 Subject: time: port to ns_ref_*() helpers Stop accessing ns.count directly. Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/time_namespace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h index a47a4ce4183e..f3b9567cf1f4 100644 --- a/include/linux/time_namespace.h +++ b/include/linux/time_namespace.h @@ -44,7 +44,7 @@ extern void timens_commit(struct task_struct *tsk, struct time_namespace *ns); static inline struct time_namespace *get_time_ns(struct time_namespace *ns) { - refcount_inc(&ns->ns.count); + ns_ref_inc(ns); return ns; } @@ -57,7 +57,7 @@ struct page *find_timens_vvar_page(struct vm_area_struct *vma); static inline void put_time_ns(struct time_namespace *ns) { - if (refcount_dec_and_test(&ns->ns.count)) + if (ns_ref_put(ns)) free_time_ns(ns); } -- cgit v1.2.3 From 96d997ea5ad1911cc393ffdb5c928b532f2f921a Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 18 Sep 2025 12:11:52 +0200 Subject: user: port to ns_ref_*() helpers Stop accessing ns.count directly. Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/user_namespace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index a09056ad090e..9a9aebbf96b9 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -176,7 +176,7 @@ static inline struct user_namespace *to_user_ns(struct ns_common *ns) static inline struct user_namespace *get_user_ns(struct user_namespace *ns) { if (ns) - refcount_inc(&ns->ns.count); + ns_ref_inc(ns); return ns; } @@ -186,7 +186,7 @@ extern void __put_user_ns(struct user_namespace *ns); static inline void put_user_ns(struct user_namespace *ns) { - if (ns && refcount_dec_and_test(&ns->ns.count)) + if (ns && ns_ref_put(ns)) __put_user_ns(ns); } -- cgit v1.2.3 From 2438b7d63ad866d6b2bb7b8d3455a6365d9b0fbe Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 18 Sep 2025 12:11:56 +0200 Subject: uts: port to ns_ref_*() helpers Stop accessing ns.count directly. Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/uts_namespace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/uts_namespace.h b/include/linux/uts_namespace.h index c2b619bb4e57..23b4f0e1b338 100644 --- a/include/linux/uts_namespace.h +++ b/include/linux/uts_namespace.h @@ -25,7 +25,7 @@ static inline struct uts_namespace *to_uts_ns(struct ns_common *ns) static inline void get_uts_ns(struct uts_namespace *ns) { - refcount_inc(&ns->ns.count); + ns_ref_inc(ns); } extern struct uts_namespace *copy_utsname(unsigned long flags, @@ -34,7 +34,7 @@ extern void free_uts_ns(struct uts_namespace *ns); static inline void put_uts_ns(struct uts_namespace *ns) { - if (refcount_dec_and_test(&ns->ns.count)) + if (ns_ref_put(ns)) free_uts_ns(ns); } -- cgit v1.2.3 From 99d33ce100cbc982647c9299cadb1277cfad503e Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 18 Sep 2025 12:11:57 +0200 Subject: net: port to ns_ref_*() helpers Stop accessing ns.count directly. Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/net/net_namespace.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index fd090ceb80bf..3e7c825e5810 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -270,7 +270,7 @@ static inline struct net *to_net_ns(struct ns_common *ns) /* Try using get_net_track() instead */ static inline struct net *get_net(struct net *net) { - refcount_inc(&net->ns.count); + ns_ref_inc(net); return net; } @@ -281,7 +281,7 @@ static inline struct net *maybe_get_net(struct net *net) * exists. If the reference count is zero this * function fails and returns NULL. */ - if (!refcount_inc_not_zero(&net->ns.count)) + if (!ns_ref_get(net)) net = NULL; return net; } @@ -289,7 +289,7 @@ static inline struct net *maybe_get_net(struct net *net) /* Try using put_net_track() instead */ static inline void put_net(struct net *net) { - if (refcount_dec_and_test(&net->ns.count)) + if (ns_ref_put(net)) __put_net(net); } @@ -301,7 +301,7 @@ int net_eq(const struct net *net1, const struct net *net2) static inline int check_net(const struct net *net) { - return refcount_read(&net->ns.count) != 0; + return ns_ref_read(net) != 0; } void net_drop_ns(void *); -- cgit v1.2.3 From 024596a4e2802e457a9f92af79f246fa9631f8de Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 18 Sep 2025 12:11:59 +0200 Subject: ns: rename to __ns_ref Make it easier to grep and rename to ns_count. Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/ns_common.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h index 65e258e1fdc6..aea8528d799a 100644 --- a/include/linux/ns_common.h +++ b/include/linux/ns_common.h @@ -29,7 +29,7 @@ struct ns_common { struct dentry *stashed; const struct proc_ns_operations *ops; unsigned int inum; - refcount_t count; + refcount_t __ns_ref; /* do not use directly */ union { struct { u64 ns_id; @@ -93,19 +93,19 @@ void __ns_common_free(struct ns_common *ns); static __always_inline __must_check bool __ns_ref_put(struct ns_common *ns) { - return refcount_dec_and_test(&ns->count); + return refcount_dec_and_test(&ns->__ns_ref); } static __always_inline __must_check bool __ns_ref_get(struct ns_common *ns) { - return refcount_inc_not_zero(&ns->count); + return refcount_inc_not_zero(&ns->__ns_ref); } -#define ns_ref_read(__ns) refcount_read(&to_ns_common((__ns))->count) -#define ns_ref_inc(__ns) refcount_inc(&to_ns_common((__ns))->count) +#define ns_ref_read(__ns) refcount_read(&to_ns_common((__ns))->__ns_ref) +#define ns_ref_inc(__ns) refcount_inc(&to_ns_common((__ns))->__ns_ref) #define ns_ref_get(__ns) __ns_ref_get(to_ns_common((__ns))) #define ns_ref_put(__ns) __ns_ref_put(to_ns_common((__ns))) #define ns_ref_put_and_lock(__ns, __lock) \ - refcount_dec_and_lock(&to_ns_common((__ns))->count, (__lock)) + refcount_dec_and_lock(&to_ns_common((__ns))->__ns_ref, (__lock)) #endif -- cgit v1.2.3 From daf4c2929fb792d24af0cd7bb6ca1f2949190fa4 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Thu, 18 Sep 2025 19:18:34 -0700 Subject: bpf: bpf_verifier_state->cleaned flag instead of REG_LIVE_DONE Prepare for bpf_reg_state->live field removal by introducing a separate flag to track if clean_verifier_state() had been applied to the state. No functional changes. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250918-callchain-sensitive-liveness-v3-1-c3cd27bacc60@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 020de62bd09c..ac16da8b49dc 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -45,7 +45,6 @@ enum bpf_reg_liveness { REG_LIVE_READ64 = 0x2, /* likewise, but full 64-bit content matters */ REG_LIVE_READ = REG_LIVE_READ32 | REG_LIVE_READ64, REG_LIVE_WRITTEN = 0x4, /* reg was written first, screening off later reads */ - REG_LIVE_DONE = 0x8, /* liveness won't be updating this register anymore */ }; #define ITER_PREFIX "bpf_iter_" @@ -445,6 +444,7 @@ struct bpf_verifier_state { bool speculative; bool in_sleepable; + bool cleaned; /* first and last insn idx of this verifier state */ u32 first_insn_idx; -- cgit v1.2.3 From 3b20d3c120bae1e18ee11aa04531b161743db682 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Thu, 18 Sep 2025 19:18:37 -0700 Subject: bpf: declare a few utility functions as internal api Namely, rename the following functions and add prototypes to bpf_verifier.h: - find_containing_subprog -> bpf_find_containing_subprog - insn_successors -> bpf_insn_successors - calls_callback -> bpf_calls_callback - fmt_stack_mask -> bpf_fmt_stack_mask Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250918-callchain-sensitive-liveness-v3-4-c3cd27bacc60@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index ac16da8b49dc..93563564bde5 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -1065,4 +1065,9 @@ void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_verifie void print_insn_state(struct bpf_verifier_env *env, const struct bpf_verifier_state *vstate, u32 frameno); +struct bpf_subprog_info *bpf_find_containing_subprog(struct bpf_verifier_env *env, int off); +int bpf_insn_successors(struct bpf_prog *prog, u32 idx, u32 succ[2]); +void bpf_fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask); +bool bpf_calls_callback(struct bpf_verifier_env *env, int insn_idx); + #endif /* _LINUX_BPF_VERIFIER_H */ -- cgit v1.2.3 From efcda22aa541bbda827e54302baf9ae4fd44cdf2 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Thu, 18 Sep 2025 19:18:38 -0700 Subject: bpf: compute instructions postorder per subprogram The next patch would require doing postorder traversal of individual subprograms. Facilitate this by moving env->cfg.insn_postorder computation from check_cfg() to a separate pass, as check_cfg() descends into called subprograms (and it needs to, because of merge_callee_effects() logic). env->cfg.insn_postorder is used only by compute_live_registers(), this function does not track cross subprogram dependencies, thus the change does not affect it's operation. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250918-callchain-sensitive-liveness-v3-5-c3cd27bacc60@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 93563564bde5..bd87e80f9423 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -665,6 +665,7 @@ struct bpf_subprog_info { /* 'start' has to be the first field otherwise find_subprog() won't work */ u32 start; /* insn idx of function entry point */ u32 linfo_idx; /* The idx to the main_prog->aux->linfo */ + u32 postorder_start; /* The idx to the env->cfg.insn_postorder */ u16 stack_depth; /* max. stack depth used by this function */ u16 stack_extra; /* offsets in range [stack_depth .. fastcall_stack_off) @@ -794,7 +795,10 @@ struct bpf_verifier_env { struct { int *insn_state; int *insn_stack; - /* vector of instruction indexes sorted in post-order */ + /* + * vector of instruction indexes sorted in post-order, grouped by subprogram, + * see bpf_subprog_info->postorder_start. + */ int *insn_postorder; int cur_stack; /* current position in the insn_postorder vector */ -- cgit v1.2.3 From b3698c356ad92bcdb9920655bc9df02a2a8946f9 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Thu, 18 Sep 2025 19:18:39 -0700 Subject: bpf: callchain sensitive stack liveness tracking using CFG This commit adds a flow-sensitive, context-sensitive, path-insensitive data flow analysis for live stack slots: - flow-sensitive: uses program control flow graph to compute data flow values; - context-sensitive: collects data flow values for each possible call chain in a program; - path-insensitive: does not distinguish between separate control flow graph paths reaching the same instruction. Compared to the current path-sensitive analysis, this approach trades some precision for not having to enumerate every path in the program. This gives a theoretical capability to run the analysis before main verification pass. See cover letter for motivation. The basic idea is as follows: - Data flow values indicate stack slots that might be read and stack slots that are definitely written. - Data flow values are collected for each (call chain, instruction number) combination in the program. - Within a subprogram, data flow values are propagated using control flow graph. - Data flow values are transferred from entry instructions of callee subprograms to call sites in caller subprograms. In other words, a tree of all possible call chains is constructed. Each node of this tree represents a subprogram. Read and write marks are collected for each instruction of each node. Live stack slots are first computed for lower level nodes. Then, information about outer stack slots that might be read or are definitely written by a subprogram is propagated one level up, to the corresponding call instructions of the upper nodes. Procedure repeats until root node is processed. In the absence of value range analysis, stack read/write marks are collected during main verification pass, and data flow computation is triggered each time verifier.c:states_equal() needs to query the information. Implementation details are documented in kernel/bpf/liveness.c. Quantitative data about verification performance changes and memory consumption is in the cover letter. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250918-callchain-sensitive-liveness-v3-6-c3cd27bacc60@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index bd87e80f9423..2e3bdd50e2ba 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -745,6 +745,8 @@ struct bpf_scc_info { struct bpf_scc_visit visits[]; }; +struct bpf_liveness; + /* single container for all structs * one verifier_env per bpf_check() call */ @@ -846,6 +848,7 @@ struct bpf_verifier_env { struct bpf_insn insn_buf[INSN_BUF_SIZE]; struct bpf_insn epilogue_buf[INSN_BUF_SIZE]; struct bpf_scc_callchain callchain_buf; + struct bpf_liveness *liveness; /* array of pointers to bpf_scc_info indexed by SCC id */ struct bpf_scc_info **scc_info; u32 scc_cnt; @@ -1074,4 +1077,15 @@ int bpf_insn_successors(struct bpf_prog *prog, u32 idx, u32 succ[2]); void bpf_fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask); bool bpf_calls_callback(struct bpf_verifier_env *env, int insn_idx); +int bpf_stack_liveness_init(struct bpf_verifier_env *env); +void bpf_stack_liveness_free(struct bpf_verifier_env *env); +int bpf_update_live_stack(struct bpf_verifier_env *env); +int bpf_mark_stack_read(struct bpf_verifier_env *env, u32 frameno, u32 insn_idx, u64 mask); +void bpf_mark_stack_write(struct bpf_verifier_env *env, u32 frameno, u64 mask); +int bpf_reset_stack_write_marks(struct bpf_verifier_env *env, u32 insn_idx); +int bpf_commit_stack_write_marks(struct bpf_verifier_env *env); +int bpf_live_stack_query_init(struct bpf_verifier_env *env, struct bpf_verifier_state *st); +bool bpf_stack_slot_alive(struct bpf_verifier_env *env, u32 frameno, u32 spi); +void bpf_reset_live_stack_callchain(struct bpf_verifier_env *env); + #endif /* _LINUX_BPF_VERIFIER_H */ -- cgit v1.2.3 From ccf25a67c7e29cfa6815d193054789b45ef825ad Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Thu, 18 Sep 2025 19:18:41 -0700 Subject: bpf: signal error if old liveness is more conservative than new Unlike the new algorithm, register chain based liveness tracking is fully path sensitive, and thus should be strictly more accurate. Validate the new algorithm by signaling an error whenever it considers a stack slot dead while the old algorithm considers it alive. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250918-callchain-sensitive-liveness-v3-8-c3cd27bacc60@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 2e3bdd50e2ba..dec5da3a2e59 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -852,6 +852,7 @@ struct bpf_verifier_env { /* array of pointers to bpf_scc_info indexed by SCC id */ struct bpf_scc_info **scc_info; u32 scc_cnt; + bool internal_error; }; static inline struct bpf_func_info_aux *subprog_aux(struct bpf_verifier_env *env, int subprog) -- cgit v1.2.3 From 107e169799057bc6a379ddb625cbe1e51cfc7d72 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Thu, 18 Sep 2025 19:18:42 -0700 Subject: bpf: disable and remove registers chain based liveness Remove register chain based liveness tracking: - struct bpf_reg_state->{parent,live} fields are no longer needed; - REG_LIVE_WRITTEN marks are superseded by bpf_mark_stack_write() calls; - mark_reg_read() calls are superseded by bpf_mark_stack_read(); - log.c:print_liveness() is superseded by logging in liveness.c; - propagate_liveness() is superseded by bpf_update_live_stack(); - no need to establish register chains in is_state_visited() anymore; - fix a bunch of tests expecting "_w" suffixes in verifier log messages. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250918-callchain-sensitive-liveness-v3-9-c3cd27bacc60@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 25 ------------------------- 1 file changed, 25 deletions(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index dec5da3a2e59..c7515da8500c 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -26,27 +26,6 @@ /* Patch buffer size */ #define INSN_BUF_SIZE 32 -/* Liveness marks, used for registers and spilled-regs (in stack slots). - * Read marks propagate upwards until they find a write mark; they record that - * "one of this state's descendants read this reg" (and therefore the reg is - * relevant for states_equal() checks). - * Write marks collect downwards and do not propagate; they record that "the - * straight-line code that reached this state (from its parent) wrote this reg" - * (and therefore that reads propagated from this state or its descendants - * should not propagate to its parent). - * A state with a write mark can receive read marks; it just won't propagate - * them to its parent, since the write mark is a property, not of the state, - * but of the link between it and its parent. See mark_reg_read() and - * mark_stack_slot_read() in kernel/bpf/verifier.c. - */ -enum bpf_reg_liveness { - REG_LIVE_NONE = 0, /* reg hasn't been read or written this branch */ - REG_LIVE_READ32 = 0x1, /* reg was read, so we're sensitive to initial value */ - REG_LIVE_READ64 = 0x2, /* likewise, but full 64-bit content matters */ - REG_LIVE_READ = REG_LIVE_READ32 | REG_LIVE_READ64, - REG_LIVE_WRITTEN = 0x4, /* reg was written first, screening off later reads */ -}; - #define ITER_PREFIX "bpf_iter_" enum bpf_iter_state { @@ -211,8 +190,6 @@ struct bpf_reg_state { * allowed and has the same effect as bpf_sk_release(sk). */ u32 ref_obj_id; - /* parentage chain for liveness checking */ - struct bpf_reg_state *parent; /* Inside the callee two registers can be both PTR_TO_STACK like * R1=fp-8 and R2=fp-8, but one of them points to this function stack * while another to the caller's stack. To differentiate them 'frameno' @@ -225,7 +202,6 @@ struct bpf_reg_state { * patching which only happens after main verification finished. */ s32 subreg_def; - enum bpf_reg_liveness live; /* if (!precise && SCALAR_VALUE) min/max/tnum don't affect safety */ bool precise; }; @@ -852,7 +828,6 @@ struct bpf_verifier_env { /* array of pointers to bpf_scc_info indexed by SCC id */ struct bpf_scc_info **scc_info; u32 scc_cnt; - bool internal_error; }; static inline struct bpf_func_info_aux *subprog_aux(struct bpf_verifier_env *env, int subprog) -- cgit v1.2.3 From 79f047c7d968b21ff4b72bd70c4533140553c56c Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Thu, 18 Sep 2025 19:18:43 -0700 Subject: bpf: table based bpf_insn_successors() Converting bpf_insn_successors() to use lookup table makes it ~1.5 times faster. Also remove unnecessary conditionals: - `idx + 1 < prog->len` is unnecessary because after check_cfg() all jump targets are guaranteed to be within a program; - `i == 0 || succ[0] != dst` is unnecessary because any client of bpf_insn_successors() can handle duplicate edges: - compute_live_registers() - compute_scc() Moving bpf_insn_successors() to liveness.c allows its inlining in liveness.c:__update_stack_liveness(). Such inlining speeds up __update_stack_liveness() by ~40%. bpf_insn_successors() is used in both verifier.c and liveness.c. perf shows such move does not negatively impact users in verifier.c, as these are executed only once before main varification pass. Unlike __update_stack_liveness() which can be triggered multiple times. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250918-callchain-sensitive-liveness-v3-10-c3cd27bacc60@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index c7515da8500c..4c497e839526 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -1049,6 +1049,7 @@ void print_insn_state(struct bpf_verifier_env *env, const struct bpf_verifier_st u32 frameno); struct bpf_subprog_info *bpf_find_containing_subprog(struct bpf_verifier_env *env, int off); +int bpf_jmp_offset(struct bpf_insn *insn); int bpf_insn_successors(struct bpf_prog *prog, u32 idx, u32 succ[2]); void bpf_fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask); bool bpf_calls_callback(struct bpf_verifier_env *env, int insn_idx); -- cgit v1.2.3 From fbd401e95e569ad0307e4301012f2d8e1ec1ee98 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 18 Sep 2025 23:10:31 +0200 Subject: ACPI: processor: idle: Redefine two functions as void Notice that acpi_processor_power_init() and acpi_processor_power_exit() don't need to return any values because their callers don't check them anyway, so redefine those functions as void. While at it, rearrange the code in acpi_processor_power_init() to reduce the indentation level, get rid of a redundant local variable in that function, and rephrase a code comment in it. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Mario Limonciello (AMD) --- include/acpi/processor.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 6ee4a69412de..24fdaa3c2899 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -419,8 +419,8 @@ static inline void acpi_processor_throttling_init(void) {} /* in processor_idle.c */ extern struct cpuidle_driver acpi_idle_driver; #ifdef CONFIG_ACPI_PROCESSOR_IDLE -int acpi_processor_power_init(struct acpi_processor *pr); -int acpi_processor_power_exit(struct acpi_processor *pr); +void acpi_processor_power_init(struct acpi_processor *pr); +void acpi_processor_power_exit(struct acpi_processor *pr); int acpi_processor_power_state_has_changed(struct acpi_processor *pr); int acpi_processor_hotplug(struct acpi_processor *pr); void acpi_processor_register_idle_driver(void); -- cgit v1.2.3 From f8d2f8205be8cceef2dd3c0e68e7af3c5f83c75c Mon Sep 17 00:00:00 2001 From: Daniel Zahka Date: Thu, 18 Sep 2025 08:52:02 -0700 Subject: psp: make struct sock argument const in psp_sk_get_assoc_rcu() This function does not need a mutable reference to its argument. Signed-off-by: Daniel Zahka Link: https://patch.msgid.link/20250918155205.2197603-2-daniel.zahka@gmail.com Signed-off-by: Jakub Kicinski --- include/net/psp/functions.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/psp/functions.h b/include/net/psp/functions.h index 91ba06733321..fb3cbe8427ea 100644 --- a/include/net/psp/functions.h +++ b/include/net/psp/functions.h @@ -124,7 +124,7 @@ psp_twsk_rx_policy_check(struct inet_timewait_sock *tw, struct sk_buff *skb) return __psp_sk_rx_policy_check(skb, rcu_dereference(tw->psp_assoc)); } -static inline struct psp_assoc *psp_sk_get_assoc_rcu(struct sock *sk) +static inline struct psp_assoc *psp_sk_get_assoc_rcu(const struct sock *sk) { struct inet_timewait_sock *tw; struct psp_assoc *pas; -- cgit v1.2.3 From 803cdb6ddca3e24418226e17e4b1c1134619aca8 Mon Sep 17 00:00:00 2001 From: Daniel Zahka Date: Thu, 18 Sep 2025 08:52:03 -0700 Subject: psp: fix preemptive inet_twsk() cast in psp_sk_get_assoc_rcu() It is weird to cast to a timewait_sock before checking sk_state, even if the use is after such a check. Remove the tw local variable, and use inet_twsk() directly in the timewait branch. Signed-off-by: Daniel Zahka Link: https://patch.msgid.link/20250918155205.2197603-3-daniel.zahka@gmail.com Signed-off-by: Jakub Kicinski --- include/net/psp/functions.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/psp/functions.h b/include/net/psp/functions.h index fb3cbe8427ea..980de7e58f8a 100644 --- a/include/net/psp/functions.h +++ b/include/net/psp/functions.h @@ -126,7 +126,6 @@ psp_twsk_rx_policy_check(struct inet_timewait_sock *tw, struct sk_buff *skb) static inline struct psp_assoc *psp_sk_get_assoc_rcu(const struct sock *sk) { - struct inet_timewait_sock *tw; struct psp_assoc *pas; int state; @@ -134,9 +133,9 @@ static inline struct psp_assoc *psp_sk_get_assoc_rcu(const struct sock *sk) if (!sk_is_inet(sk) || state & TCPF_NEW_SYN_RECV) return NULL; - tw = inet_twsk(sk); - pas = state & TCPF_TIME_WAIT ? rcu_dereference(tw->psp_assoc) : - rcu_dereference(sk->psp_assoc); + pas = state & TCPF_TIME_WAIT ? + rcu_dereference(inet_twsk(sk)->psp_assoc) : + rcu_dereference(sk->psp_assoc); return pas; } -- cgit v1.2.3 From 28bb24dadd0ed70aed43cf9af3a54c22c3ce04b2 Mon Sep 17 00:00:00 2001 From: Daniel Zahka Date: Thu, 18 Sep 2025 08:52:04 -0700 Subject: psp: don't use flags for checking sk_state Using flags to check sk_state only makes sense to check for a subset of states in parallel e.g. sk_fullsock(). We are not doing that here. Compare for individual states directly. Signed-off-by: Daniel Zahka Link: https://patch.msgid.link/20250918155205.2197603-4-daniel.zahka@gmail.com Signed-off-by: Jakub Kicinski --- include/net/psp/functions.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/psp/functions.h b/include/net/psp/functions.h index 980de7e58f8a..ef7743664da3 100644 --- a/include/net/psp/functions.h +++ b/include/net/psp/functions.h @@ -129,11 +129,11 @@ static inline struct psp_assoc *psp_sk_get_assoc_rcu(const struct sock *sk) struct psp_assoc *pas; int state; - state = 1 << READ_ONCE(sk->sk_state); - if (!sk_is_inet(sk) || state & TCPF_NEW_SYN_RECV) + state = READ_ONCE(sk->sk_state); + if (!sk_is_inet(sk) || state == TCP_NEW_SYN_RECV) return NULL; - pas = state & TCPF_TIME_WAIT ? + pas = state == TCP_TIME_WAIT ? rcu_dereference(inet_twsk(sk)->psp_assoc) : rcu_dereference(sk->psp_assoc); return pas; -- cgit v1.2.3 From f1bf77491d5e48ab5477f585ee5fca2aa524bd15 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 18 Sep 2025 19:25:35 +0000 Subject: psp: Fix typo in kdoc for struct psp_dev_caps.assoc_drv_spc. assoc_drv_spc is the size of psp_assoc.drv_data[]. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250918192539.1587586-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/psp/types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/psp/types.h b/include/net/psp/types.h index d9688e66cf09..31cee64b7c86 100644 --- a/include/net/psp/types.h +++ b/include/net/psp/types.h @@ -98,7 +98,7 @@ struct psp_dev_caps { /** * @assoc_drv_spc: size of driver-specific state in Tx assoc - * Determines the size of struct psp_assoc::drv_spc + * Determines the size of struct psp_assoc::drv_data */ u32 assoc_drv_spc; }; -- cgit v1.2.3 From 32a8d2a197c1d2d36badb401657aa193938f071c Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 17 Sep 2025 16:12:01 +0100 Subject: net: stmmac: rework mac_interface and phy_interface documentation Based on new research, it has come to light that the comment that I added in a014c35556b9 ("net: stmmac: clarify difference between "interface" and "phy_interface"") is not fully correct. Update the comment to properly describe the difference between the two. All of the DTS files in the kernel tree do not mention the "mac-mode" property, which results in mac_interface and phy_interface being the same. Also, none of the platform glue drivers set mac_interface to anything but PHY_INTERFACE_MODE_NA. This means that for all the platforms known to mainline, mac_interface is either the same as phy_interface, or it is PHY_INTERFACE_MODE_NA. Thus, updating the definition for mac_interface in stmmac.h has no material effect on current uses known to mainline, but the change opens the door to cleaning up all uses. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1uytpB-00000006H23-0pRi@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/linux/stmmac.h | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index e284f04964bf..f14f34ec6d5e 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -190,18 +190,32 @@ struct plat_stmmacenet_data { int bus_id; int phy_addr; /* MAC ----- optional PCS ----- SerDes ----- optional PHY ----- Media - * ^ ^ - * mac_interface phy_interface + * ^ ^ + * mac_interface phy_interface * - * mac_interface is the MAC-side interface, which may be the same - * as phy_interface if there is no intervening PCS. If there is a - * PCS, then mac_interface describes the interface mode between the - * MAC and PCS, and phy_interface describes the interface mode - * between the PCS and PHY. + * The Synopsys dwmac core only covers the MAC and an optional + * integrated PCS. Where the integrated PCS is used with a SerDes, + * e.g. for 1000base-X or Cisco SGMII, the connection between the + * PCS and SerDes will be TBI. + * + * Where the Synopsys dwmac core has been instantiated with multiple + * interface modes, these are selected via core-external configuration + * which is sampled when the dwmac core is reset. How this is done is + * platform glue specific, but this defines the interface used from + * the Synopsys dwmac core to the rest of the SoC. + * + * Where PCS other than the optional integrated Synopsys dwmac PCS + * is used, this counts as "the rest of the SoC" in the above + * paragraph. + * + * Thus, mac_interface is of little use inside the stmmac code; + * please do not use unless there is a definite requirement, and + * make sure to gain review feedback first. */ phy_interface_t mac_interface; /* phy_interface is the PHY-side interface - the interface used by - * an attached PHY. + * an attached PHY or SFP etc. This is equivalent to the interface + * that phylink uses. */ phy_interface_t phy_interface; struct stmmac_mdio_bus_data *mdio_bus_data; -- cgit v1.2.3 From 6b0ed6a3a89cd2d04980e15a44c645bebb077418 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 17 Sep 2025 16:12:47 +0100 Subject: net: stmmac: remove mac_interface mac_interface has served little purpose, and has only caused confusion. Now that we have cleaned up all platform glue drivers which should not have been using mac_interface, there are no users remaining. Remove mac_interface. This results in the special dwmac specific "mac-mode" DT property becoming redundant, and an in case, no DTS files in the kernel make use of this property. Add a warning if the property is set, and it is different from the "phy-mode". Signed-off-by: Russell King (Oracle) Acked-by: Vladimir Zapolskiy Link: https://patch.msgid.link/E1uytpv-00000006H2x-196h@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/linux/stmmac.h | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index f14f34ec6d5e..fa1318bac06c 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -190,8 +190,8 @@ struct plat_stmmacenet_data { int bus_id; int phy_addr; /* MAC ----- optional PCS ----- SerDes ----- optional PHY ----- Media - * ^ ^ - * mac_interface phy_interface + * ^ + * phy_interface * * The Synopsys dwmac core only covers the MAC and an optional * integrated PCS. Where the integrated PCS is used with a SerDes, @@ -208,12 +208,7 @@ struct plat_stmmacenet_data { * is used, this counts as "the rest of the SoC" in the above * paragraph. * - * Thus, mac_interface is of little use inside the stmmac code; - * please do not use unless there is a definite requirement, and - * make sure to gain review feedback first. - */ - phy_interface_t mac_interface; - /* phy_interface is the PHY-side interface - the interface used by + * phy_interface is the PHY-side interface - the interface used by * an attached PHY or SFP etc. This is equivalent to the interface * that phylink uses. */ -- cgit v1.2.3 From b73b8146d7ff68e245525adb944a4c998d423d59 Mon Sep 17 00:00:00 2001 From: Alasdair McWilliam Date: Wed, 17 Sep 2025 10:55:42 +0100 Subject: rtnetlink: add needed_{head,tail}room attributes Various network interface types make use of needed_{head,tail}room values to efficiently reserve buffer space for additional encapsulation headers, such as VXLAN, Geneve, IPSec, etc. However, it is not currently possible to query these values in a generic way. Introduce ability to query the needed_{head,tail}room values of a network device via rtnetlink, such that applications that may wish to use these values can do so. For example, Cilium agent iterates over present devices based on user config (direct routing, vxlan, geneve, wireguard etc.) and in future will configure netkit in order to expose the needed_{head,tail}room into K8s pods. See b9ed315d3c4c ("netkit: Allow for configuring needed_{head,tail}room"). Suggested-by: Daniel Borkmann Signed-off-by: Alasdair McWilliam Reviewed-by: Daniel Borkmann Link: https://patch.msgid.link/20250917095543.14039-1-alasdair@mcwilliam.dev Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_link.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 45f56c9f95d9..3b491d96e52e 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -379,6 +379,8 @@ enum { IFLA_DPLL_PIN, IFLA_MAX_PACING_OFFLOAD_HORIZON, IFLA_NETNS_IMMUTABLE, + IFLA_HEADROOM, + IFLA_TAILROOM, __IFLA_MAX }; -- cgit v1.2.3 From 1c7e4a618509476658bafba35fffb3a5cfb213b1 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Mon, 15 Sep 2025 11:19:54 +0200 Subject: net: ipv4: make udp_v4_early_demux explicitly return drop reason udp_v4_early_demux already returns drop reasons as it either returns 0 or ip_mc_validate_source, which itself returns drop reasons. Its return value is also already used as a drop reason itself. Makes this explicit by making it return drop reasons. Signed-off-by: Antoine Tenart Reviewed-by: David Ahern Link: https://patch.msgid.link/20250915091958.15382-2-atenart@kernel.org Signed-off-by: Jakub Kicinski --- include/net/udp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/udp.h b/include/net/udp.h index eecd64097f91..059a0cee5f55 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -404,7 +404,7 @@ static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags, return __skb_recv_udp(sk, flags, &off, err); } -int udp_v4_early_demux(struct sk_buff *skb); +enum skb_drop_reason udp_v4_early_demux(struct sk_buff *skb); bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst); int udp_err(struct sk_buff *, u32); int udp_abort(struct sock *sk, int err); -- cgit v1.2.3 From b34df17d588de926212527a2f2ce72bc4e330260 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 18 Sep 2025 05:25:57 -0700 Subject: net: netpoll: remove unused netpoll pointer from netpoll_info The netpoll_info structure contains an useless pointer back to its associated netpoll. This field is never used, and the assignment in __netpoll_setup() is does not comtemplate multiple instances, as reported by Jay[1]. Drop both the member and its initialization to simplify the structure. Link: https://lore.kernel.org/all/2930648.1757463506@famine/ [1] Signed-off-by: Breno Leitao Link: https://patch.msgid.link/20250918-netpoll_jv-v1-1-67d50eeb2c26@debian.org Signed-off-by: Jakub Kicinski --- include/linux/netpoll.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index b5ea9882eda8..f22eec466040 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -55,7 +55,6 @@ struct netpoll_info { struct delayed_work tx_work; - struct netpoll *netpoll; struct rcu_head rcu; }; -- cgit v1.2.3 From 8be1f299041220512195e40590bb4984f297ae48 Mon Sep 17 00:00:00 2001 From: Troy Mitchell Date: Thu, 11 Sep 2025 11:34:03 +0800 Subject: dt-bindings: clock: spacemit: introduce i2s pre-clock to fix i2s clock Previously, the K1 clock driver did not include the parent clocks of the I2S sysclk. Introduce pre-clock to fix I2S clock. Otherwise, the I2S clock may not work as expected. This patch adds their definitions to allow proper registration in the driver and usage in the device tree. Fixes: 1b72c59db0add ("clk: spacemit: Add clock support for SpacemiT K1 SoC") Acked-by: Krzysztof Kozlowski Signed-off-by: Troy Mitchell Signed-off-by: Stephen Boyd --- include/dt-bindings/clock/spacemit,k1-syscon.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/spacemit,k1-syscon.h b/include/dt-bindings/clock/spacemit,k1-syscon.h index 505205453d7f..0f8b59d6753c 100644 --- a/include/dt-bindings/clock/spacemit,k1-syscon.h +++ b/include/dt-bindings/clock/spacemit,k1-syscon.h @@ -77,6 +77,10 @@ #define CLK_I2S_BCLK 30 #define CLK_APB 31 #define CLK_WDT_BUS 32 +#define CLK_I2S_153P6 33 +#define CLK_I2S_153P6_BASE 34 +#define CLK_I2S_SYSCLK_SRC 35 +#define CLK_I2S_BCLK_FACTOR 36 /* MPMU resets */ #define RESET_WDT 0 -- cgit v1.2.3 From 519cff1d85694cbdf33b27591740e7e37348e6b4 Mon Sep 17 00:00:00 2001 From: Troy Mitchell Date: Thu, 11 Sep 2025 11:34:05 +0800 Subject: clk: spacemit: fix i2s clock Defining i2s_bclk and i2s_sysclk as fixed-rate clocks is insufficient for real I2S use cases. Moreover, the current I2S clock configuration does not work as expected due to missing parent clocks. This patch adds the missing parent clocks, defines i2s_sysclk as a DDN clock, and i2s_bclk as a DIV clock. A special note for i2s_bclk: From the register definition, the i2s_bclk divider always implies an additional 1/2 factor. The following table shows the correspondence between index and frequency division coefficients: | index | div | |-------|-------| | 0 | 2 | | 1 | 4 | | 2 | 6 | | 3 | 8 | From a software perspective, introducing i2s_bclk_factor as the parent of i2s_bclk is sufficient to address the issue. The I2S-related clock registers can be found here [1]. Link: https://developer.spacemit.com/documentation?token=LCrKwWDasiJuROkVNusc2pWTnEb [1] Fixes: 1b72c59db0add ("clk: spacemit: Add clock support for SpacemiT K1 SoC") Co-developer: Jinmei Wei Suggested-by: Haylen Chu Signed-off-by: Jinmei Wei Signed-off-by: Troy Mitchell Signed-off-by: Stephen Boyd --- include/soc/spacemit/k1-syscon.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/soc/spacemit/k1-syscon.h b/include/soc/spacemit/k1-syscon.h index c59bd7a38e5b..354751562c55 100644 --- a/include/soc/spacemit/k1-syscon.h +++ b/include/soc/spacemit/k1-syscon.h @@ -30,6 +30,7 @@ to_spacemit_ccu_adev(struct auxiliary_device *adev) /* MPMU register offset */ #define MPMU_POSR 0x0010 +#define MPMU_FCCR 0x0008 #define POSR_PLL1_LOCK BIT(27) #define POSR_PLL2_LOCK BIT(28) #define POSR_PLL3_LOCK BIT(29) -- cgit v1.2.3 From f75f66683ded09f7135aef2e763c245a07c8271a Mon Sep 17 00:00:00 2001 From: Dan Moulding Date: Mon, 8 Sep 2025 10:12:43 -0600 Subject: crypto: comp - Use same definition of context alloc and free ops In commit 42d9f6c77479 ("crypto: acomp - Move scomp stream allocation code into acomp"), the crypto_acomp_streams struct was made to rely on having the alloc_ctx and free_ctx operations defined in the same order as the scomp_alg struct. But in that same commit, the alloc_ctx and free_ctx members of scomp_alg may be randomized by structure layout randomization, since they are contained in a pure ops structure (containing only function pointers). If the pointers within scomp_alg are randomized, but those in crypto_acomp_streams are not, then the order may no longer match. This fixes the problem by removing the union from scomp_alg so that both crypto_acomp_streams and scomp_alg will share the same definition of alloc_ctx and free_ctx, ensuring they will always have the same layout. Signed-off-by: Dan Moulding Suggested-by: Herbert Xu Fixes: 42d9f6c77479 ("crypto: acomp - Move scomp stream allocation code into acomp") Signed-off-by: Herbert Xu --- include/crypto/internal/scompress.h | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'include') diff --git a/include/crypto/internal/scompress.h b/include/crypto/internal/scompress.h index 533d6c16a491..6a2c5f2e90f9 100644 --- a/include/crypto/internal/scompress.h +++ b/include/crypto/internal/scompress.h @@ -18,11 +18,8 @@ struct crypto_scomp { /** * struct scomp_alg - synchronous compression algorithm * - * @alloc_ctx: Function allocates algorithm specific context - * @free_ctx: Function frees context allocated with alloc_ctx * @compress: Function performs a compress operation * @decompress: Function performs a de-compress operation - * @base: Common crypto API algorithm data structure * @streams: Per-cpu memory for algorithm * @calg: Cmonn algorithm data structure shared with acomp */ @@ -34,13 +31,7 @@ struct scomp_alg { unsigned int slen, u8 *dst, unsigned int *dlen, void *ctx); - union { - struct { - void *(*alloc_ctx)(void); - void (*free_ctx)(void *ctx); - }; - struct crypto_acomp_streams streams; - }; + struct crypto_acomp_streams streams; union { struct COMP_ALG_COMMON; -- cgit v1.2.3 From bee8a520eb84950193d0566ea2c2e46406a4b6ce Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 9 Sep 2025 17:50:56 +0800 Subject: rhashtable: Use rcu_dereference_all and rcu_dereference_all_check Add rcu_dereference_all and rcu_dereference_all_check so that library code such as rhashtable can be used with any RCU variant. As it stands rcu_dereference is used within rashtable, which creates false-positive warnings if the user calls it from another RCU context, such as preempt_disable(). Use the rcu_dereference_all and rcu_dereference_all_check calls in rhashtable to suppress these warnings. Also replace the rcu_dereference_raw calls in the list iterators with rcu_dereference_all to uncover buggy calls. Reported-by: Menglong Dong Signed-off-by: Herbert Xu Reviewed-by: Paul E. McKenney Signed-off-by: Herbert Xu --- include/linux/rcupdate.h | 26 ++++++++++++++++++++++++++ include/linux/rhashtable.h | 14 +++++++------- 2 files changed, 33 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 120536f4c6eb..448eb1f0cb48 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -713,6 +713,24 @@ do { \ (c) || rcu_read_lock_sched_held(), \ __rcu) +/** + * rcu_dereference_all_check() - rcu_dereference_all with debug checking + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place + * + * This is similar to rcu_dereference_check(), but allows protection + * by all forms of vanilla RCU readers, including preemption disabled, + * bh-disabled, and interrupt-disabled regions of code. Note that "vanilla + * RCU" excludes SRCU and the various Tasks RCU flavors. Please note + * that this macro should not be backported to any Linux-kernel version + * preceding v5.0 due to changes in synchronize_rcu() semantics prior + * to that version. + */ +#define rcu_dereference_all_check(p, c) \ + __rcu_dereference_check((p), __UNIQUE_ID(rcu), \ + (c) || rcu_read_lock_any_held(), \ + __rcu) + /* * The tracing infrastructure traces RCU (we want that), but unfortunately * some of the RCU checks causes tracing to lock up the system. @@ -767,6 +785,14 @@ do { \ */ #define rcu_dereference_sched(p) rcu_dereference_sched_check(p, 0) +/** + * rcu_dereference_all() - fetch RCU-all-protected pointer for dereferencing + * @p: The pointer to read, prior to dereferencing + * + * Makes rcu_dereference_check() do the dirty work. + */ +#define rcu_dereference_all(p) rcu_dereference_all_check(p, 0) + /** * rcu_pointer_handoff() - Hand off a pointer from RCU to other mechanism * @p: The pointer to hand off diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index e740157f3cd7..05a221ce79a6 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -272,13 +272,13 @@ struct rhash_lock_head __rcu **rht_bucket_nested_insert( rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht)) #define rht_dereference_rcu(p, ht) \ - rcu_dereference_check(p, lockdep_rht_mutex_is_held(ht)) + rcu_dereference_all_check(p, lockdep_rht_mutex_is_held(ht)) #define rht_dereference_bucket(p, tbl, hash) \ rcu_dereference_protected(p, lockdep_rht_bucket_is_held(tbl, hash)) #define rht_dereference_bucket_rcu(p, tbl, hash) \ - rcu_dereference_check(p, lockdep_rht_bucket_is_held(tbl, hash)) + rcu_dereference_all_check(p, lockdep_rht_bucket_is_held(tbl, hash)) #define rht_entry(tpos, pos, member) \ ({ tpos = container_of(pos, typeof(*tpos), member); 1; }) @@ -373,7 +373,7 @@ static inline struct rhash_head *__rht_ptr( static inline struct rhash_head *rht_ptr_rcu( struct rhash_lock_head __rcu *const *bkt) { - return __rht_ptr(rcu_dereference(*bkt), bkt); + return __rht_ptr(rcu_dereference_all(*bkt), bkt); } static inline struct rhash_head *rht_ptr( @@ -497,7 +497,7 @@ static inline void rht_assign_unlock(struct bucket_table *tbl, for (({barrier(); }), \ pos = head; \ !rht_is_a_nulls(pos); \ - pos = rcu_dereference_raw(pos->next)) + pos = rcu_dereference_all(pos->next)) /** * rht_for_each_rcu - iterate over rcu hash chain @@ -513,7 +513,7 @@ static inline void rht_assign_unlock(struct bucket_table *tbl, for (({barrier(); }), \ pos = rht_ptr_rcu(rht_bucket(tbl, hash)); \ !rht_is_a_nulls(pos); \ - pos = rcu_dereference_raw(pos->next)) + pos = rcu_dereference_all(pos->next)) /** * rht_for_each_entry_rcu_from - iterated over rcu hash chain from given head @@ -560,7 +560,7 @@ static inline void rht_assign_unlock(struct bucket_table *tbl, * list returned by rhltable_lookup. */ #define rhl_for_each_rcu(pos, list) \ - for (pos = list; pos; pos = rcu_dereference_raw(pos->next)) + for (pos = list; pos; pos = rcu_dereference_all(pos->next)) /** * rhl_for_each_entry_rcu - iterate over rcu hash table list of given type @@ -574,7 +574,7 @@ static inline void rht_assign_unlock(struct bucket_table *tbl, */ #define rhl_for_each_entry_rcu(tpos, pos, list, member) \ for (pos = list; pos && rht_entry(tpos, pos, member); \ - pos = rcu_dereference_raw(pos->next)) + pos = rcu_dereference_all(pos->next)) static inline int rhashtable_compare(struct rhashtable_compare_arg *arg, const void *obj) -- cgit v1.2.3 From 3d716c51e0e8791f8dd72479a3e6d5e7650ac35e Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Sat, 13 Sep 2025 18:57:51 +0800 Subject: crypto: hisilicon/qm - mask axi error before memory init After the device memory is cleared, if the software sends the doorbell operation, the hardware may trigger a axi error when processing the doorbell. This error is caused by memory clearing and hardware access to address 0. Therefore, the axi error is masked during this period. Signed-off-by: Weili Qian Signed-off-by: Chenghai Huang Signed-off-by: Herbert Xu --- include/linux/hisi_acc_qm.h | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index f2254ddc327c..c4690e365ade 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -104,6 +104,8 @@ #define UACCE_MODE_SVA 1 /* use uacce sva mode */ #define UACCE_MODE_DESC "0(default) means only register to crypto, 1 means both register to crypto and uacce" +#define QM_ECC_MBIT BIT(2) + enum qm_stop_reason { QM_NORMAL, QM_SOFT_RESET, @@ -240,19 +242,22 @@ enum acc_err_result { ACC_ERR_RECOVERED, }; -struct hisi_qm_err_info { - char *acpi_rst; - u32 msi_wr_port; +struct hisi_qm_err_mask { u32 ecc_2bits_mask; - u32 qm_shutdown_mask; - u32 dev_shutdown_mask; - u32 qm_reset_mask; - u32 dev_reset_mask; + u32 shutdown_mask; + u32 reset_mask; u32 ce; u32 nfe; u32 fe; }; +struct hisi_qm_err_info { + char *acpi_rst; + u32 msi_wr_port; + struct hisi_qm_err_mask qm_err; + struct hisi_qm_err_mask dev_err; +}; + struct hisi_qm_err_status { u32 is_qm_ecc_mbit; u32 is_dev_ecc_mbit; @@ -273,6 +278,8 @@ struct hisi_qm_err_ini { enum acc_err_result (*get_err_result)(struct hisi_qm *qm); bool (*dev_is_abnormal)(struct hisi_qm *qm); int (*set_priv_status)(struct hisi_qm *qm); + void (*disable_axi_error)(struct hisi_qm *qm); + void (*enable_axi_error)(struct hisi_qm *qm); }; struct hisi_qm_cap_info { -- cgit v1.2.3 From 79525b51acc1c8e331ab47eb131a99f5370a76c2 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 19 Sep 2025 12:38:58 -0700 Subject: io_uring: fix nvme's 32b cqes on mixed cq The nvme uring_cmd only uses 32b CQEs. If the ring uses a mixed CQ, then we need to make sure we flag the completion as a 32b CQE. On the other hand, if nvme uring_cmd was using a dedicated 32b CQE, the posting was missing the extra memcpy because it only applied to bit CQEs on a mixed CQ. Fixes: e26dca67fde1943 ("io_uring: add support for IORING_SETUP_CQE_MIXED") Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/io_uring/cmd.h | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h index c8185f54fde9..02d50f08f668 100644 --- a/include/linux/io_uring/cmd.h +++ b/include/linux/io_uring/cmd.h @@ -56,8 +56,8 @@ int io_uring_cmd_import_fixed_vec(struct io_uring_cmd *ioucmd, * Note: the caller should never hard code @issue_flags and is only allowed * to pass the mask provided by the core io_uring code. */ -void io_uring_cmd_done(struct io_uring_cmd *cmd, s32 ret, u64 res2, - unsigned issue_flags); +void __io_uring_cmd_done(struct io_uring_cmd *cmd, s32 ret, u64 res2, + unsigned issue_flags, bool is_cqe32); void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd, io_uring_cmd_tw_t task_work_cb, @@ -104,8 +104,8 @@ static inline int io_uring_cmd_import_fixed_vec(struct io_uring_cmd *ioucmd, { return -EOPNOTSUPP; } -static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, s32 ret, - u64 ret2, unsigned issue_flags) +static inline void __io_uring_cmd_done(struct io_uring_cmd *cmd, s32 ret, + u64 ret2, unsigned issue_flags, bool is_cqe32) { } static inline void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd, @@ -159,6 +159,18 @@ static inline void *io_uring_cmd_ctx_handle(struct io_uring_cmd *cmd) return cmd_to_io_kiocb(cmd)->ctx; } +static inline void io_uring_cmd_done(struct io_uring_cmd *ioucmd, s32 ret, + u64 res2, unsigned issue_flags) +{ + return __io_uring_cmd_done(ioucmd, ret, res2, issue_flags, false); +} + +static inline void io_uring_cmd_done32(struct io_uring_cmd *ioucmd, s32 ret, + u64 res2, unsigned issue_flags) +{ + return __io_uring_cmd_done(ioucmd, ret, res2, issue_flags, true); +} + int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq, void (*release)(void *), unsigned int index, unsigned int issue_flags); -- cgit v1.2.3 From 9e622804d57e2d08f0271200606bd1270f75126f Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 25 Aug 2025 11:10:20 -0400 Subject: Bluetooth: hci_event: Fix UAF in hci_acl_create_conn_sync This fixes the following UFA in hci_acl_create_conn_sync where a connection still pending is command submission (conn->state == BT_OPEN) maybe freed, also since this also can happen with the likes of hci_le_create_conn_sync fix it as well: BUG: KASAN: slab-use-after-free in hci_acl_create_conn_sync+0x5ef/0x790 net/bluetooth/hci_sync.c:6861 Write of size 2 at addr ffff88805ffcc038 by task kworker/u11:2/9541 CPU: 1 UID: 0 PID: 9541 Comm: kworker/u11:2 Not tainted 6.16.0-rc7 #3 PREEMPT(full) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 Workqueue: hci3 hci_cmd_sync_work Call Trace: dump_stack_lvl+0x189/0x250 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:378 [inline] print_report+0xca/0x230 mm/kasan/report.c:480 kasan_report+0x118/0x150 mm/kasan/report.c:593 hci_acl_create_conn_sync+0x5ef/0x790 net/bluetooth/hci_sync.c:6861 hci_cmd_sync_work+0x210/0x3a0 net/bluetooth/hci_sync.c:332 process_one_work kernel/workqueue.c:3238 [inline] process_scheduled_works+0xae1/0x17b0 kernel/workqueue.c:3321 worker_thread+0x8a0/0xda0 kernel/workqueue.c:3402 kthread+0x70e/0x8a0 kernel/kthread.c:464 ret_from_fork+0x3fc/0x770 arch/x86/kernel/process.c:148 ret_from_fork_asm+0x1a/0x30 home/kwqcheii/source/fuzzing/kernel/kasan/linux-6.16-rc7/arch/x86/entry/entry_64.S:245 Allocated by task 123736: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3e/0x80 mm/kasan/common.c:68 poison_kmalloc_redzone mm/kasan/common.c:377 [inline] __kasan_kmalloc+0x93/0xb0 mm/kasan/common.c:394 kasan_kmalloc include/linux/kasan.h:260 [inline] __kmalloc_cache_noprof+0x230/0x3d0 mm/slub.c:4359 kmalloc_noprof include/linux/slab.h:905 [inline] kzalloc_noprof include/linux/slab.h:1039 [inline] __hci_conn_add+0x233/0x1b30 net/bluetooth/hci_conn.c:939 hci_conn_add_unset net/bluetooth/hci_conn.c:1051 [inline] hci_connect_acl+0x16c/0x4e0 net/bluetooth/hci_conn.c:1634 pair_device+0x418/0xa70 net/bluetooth/mgmt.c:3556 hci_mgmt_cmd+0x9c9/0xef0 net/bluetooth/hci_sock.c:1719 hci_sock_sendmsg+0x6ca/0xef0 net/bluetooth/hci_sock.c:1839 sock_sendmsg_nosec net/socket.c:712 [inline] __sock_sendmsg+0x219/0x270 net/socket.c:727 sock_write_iter+0x258/0x330 net/socket.c:1131 new_sync_write fs/read_write.c:593 [inline] vfs_write+0x54b/0xa90 fs/read_write.c:686 ksys_write+0x145/0x250 fs/read_write.c:738 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xfa/0x3b0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f Freed by task 103680: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3e/0x80 mm/kasan/common.c:68 kasan_save_free_info+0x46/0x50 mm/kasan/generic.c:576 poison_slab_object mm/kasan/common.c:247 [inline] __kasan_slab_free+0x62/0x70 mm/kasan/common.c:264 kasan_slab_free include/linux/kasan.h:233 [inline] slab_free_hook mm/slub.c:2381 [inline] slab_free mm/slub.c:4643 [inline] kfree+0x18e/0x440 mm/slub.c:4842 device_release+0x9c/0x1c0 kobject_cleanup lib/kobject.c:689 [inline] kobject_release lib/kobject.c:720 [inline] kref_put include/linux/kref.h:65 [inline] kobject_put+0x22b/0x480 lib/kobject.c:737 hci_conn_cleanup net/bluetooth/hci_conn.c:175 [inline] hci_conn_del+0x8ff/0xcb0 net/bluetooth/hci_conn.c:1173 hci_conn_complete_evt+0x3c7/0x1040 net/bluetooth/hci_event.c:3199 hci_event_func net/bluetooth/hci_event.c:7477 [inline] hci_event_packet+0x7e0/0x1200 net/bluetooth/hci_event.c:7531 hci_rx_work+0x46a/0xe80 net/bluetooth/hci_core.c:4070 process_one_work kernel/workqueue.c:3238 [inline] process_scheduled_works+0xae1/0x17b0 kernel/workqueue.c:3321 worker_thread+0x8a0/0xda0 kernel/workqueue.c:3402 kthread+0x70e/0x8a0 kernel/kthread.c:464 ret_from_fork+0x3fc/0x770 arch/x86/kernel/process.c:148 ret_from_fork_asm+0x1a/0x30 home/kwqcheii/source/fuzzing/kernel/kasan/linux-6.16-rc7/arch/x86/entry/entry_64.S:245 Last potentially related work creation: kasan_save_stack+0x3e/0x60 mm/kasan/common.c:47 kasan_record_aux_stack+0xbd/0xd0 mm/kasan/generic.c:548 insert_work+0x3d/0x330 kernel/workqueue.c:2183 __queue_work+0xbd9/0xfe0 kernel/workqueue.c:2345 queue_delayed_work_on+0x18b/0x280 kernel/workqueue.c:2561 pairing_complete+0x1e7/0x2b0 net/bluetooth/mgmt.c:3451 pairing_complete_cb+0x1ac/0x230 net/bluetooth/mgmt.c:3487 hci_connect_cfm include/net/bluetooth/hci_core.h:2064 [inline] hci_conn_failed+0x24d/0x310 net/bluetooth/hci_conn.c:1275 hci_conn_complete_evt+0x3c7/0x1040 net/bluetooth/hci_event.c:3199 hci_event_func net/bluetooth/hci_event.c:7477 [inline] hci_event_packet+0x7e0/0x1200 net/bluetooth/hci_event.c:7531 hci_rx_work+0x46a/0xe80 net/bluetooth/hci_core.c:4070 process_one_work kernel/workqueue.c:3238 [inline] process_scheduled_works+0xae1/0x17b0 kernel/workqueue.c:3321 worker_thread+0x8a0/0xda0 kernel/workqueue.c:3402 kthread+0x70e/0x8a0 kernel/kthread.c:464 ret_from_fork+0x3fc/0x770 arch/x86/kernel/process.c:148 ret_from_fork_asm+0x1a/0x30 home/kwqcheii/source/fuzzing/kernel/kasan/linux-6.16-rc7/arch/x86/entry/entry_64.S:245 Fixes: aef2aa4fa98e ("Bluetooth: hci_event: Fix creating hci_conn object on error status") Reported-by: Junvyyang, Tencent Zhuque Lab Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 6906af7a8f24..6560b32f3125 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1245,6 +1245,27 @@ static inline struct hci_conn *hci_conn_hash_lookup_ba(struct hci_dev *hdev, return NULL; } +static inline struct hci_conn *hci_conn_hash_lookup_role(struct hci_dev *hdev, + __u8 type, __u8 role, + bdaddr_t *ba) +{ + struct hci_conn_hash *h = &hdev->conn_hash; + struct hci_conn *c; + + rcu_read_lock(); + + list_for_each_entry_rcu(c, &h->list, list) { + if (c->type == type && c->role == role && !bacmp(&c->dst, ba)) { + rcu_read_unlock(); + return c; + } + } + + rcu_read_unlock(); + + return NULL; +} + static inline struct hci_conn *hci_conn_hash_lookup_le(struct hci_dev *hdev, bdaddr_t *ba, __u8 ba_type) -- cgit v1.2.3 From 64e7df08ed43e45aa1a8382b459b516d04d47e99 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Thu, 24 Jul 2025 10:39:05 +0200 Subject: dt-bindings: clock: mt7622: Add AFE_MRGIF clock Add the missing AFE Merge Interface clock to MT7622 to make use of it in the audio subsystem. While at it, also remove the useless CLK_AUDIO_NR_CLK definition. Signed-off-by: AngeloGioacchino Del Regno Acked-by: Rob Herring (Arm) Signed-off-by: Stephen Boyd --- include/dt-bindings/clock/mt7622-clk.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/dt-bindings/clock/mt7622-clk.h b/include/dt-bindings/clock/mt7622-clk.h index c12e7eab0788..a173eb132892 100644 --- a/include/dt-bindings/clock/mt7622-clk.h +++ b/include/dt-bindings/clock/mt7622-clk.h @@ -228,7 +228,7 @@ #define CLK_AUDIO_MEM_ASRC4 44 #define CLK_AUDIO_MEM_ASRC5 45 #define CLK_AUDIO_AFE_CONN 46 -#define CLK_AUDIO_NR_CLK 47 +#define CLK_AUDIO_AFE_MRGIF 47 /* SSUSBSYS */ -- cgit v1.2.3 From dd240e95f1bee671f58148dea25e3be7cb39b50d Mon Sep 17 00:00:00 2001 From: Laura Nao Date: Mon, 15 Sep 2025 17:19:29 +0200 Subject: dt-bindings: clock: mediatek: Describe MT8196 clock controllers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce binding documentation for system clocks, functional clocks, and PEXTP0/1 and UFS reset controllers on MediaTek MT8196. This binding also includes a handle to the hardware voter, a fixed-function MCU designed to aggregate votes from the application processor and other remote processors to manage clocks and power domains. The HWV on MT8196/MT6991 is incomplete and requires software to manually enable power supplies, parent clocks, and FENC, as well as write to both the HWV MMIO and the controller registers. Because of these constraints, the HWV cannot be modeled using generic clock, power domain, or interconnect APIs. Instead, a custom phandle is exceptionally used to provide direct, syscon-like register access to drivers. Reviewed-by: Nícolas F. R. A. Prado Co-developed-by: AngeloGioacchino Del Regno Signed-off-by: AngeloGioacchino Del Regno Reviewed-by: Krzysztof Kozlowski Signed-off-by: Laura Nao Signed-off-by: Stephen Boyd --- include/dt-bindings/clock/mediatek,mt8196-clock.h | 803 +++++++++++++++++++++ include/dt-bindings/reset/mediatek,mt8196-resets.h | 26 + 2 files changed, 829 insertions(+) create mode 100644 include/dt-bindings/clock/mediatek,mt8196-clock.h create mode 100644 include/dt-bindings/reset/mediatek,mt8196-resets.h (limited to 'include') diff --git a/include/dt-bindings/clock/mediatek,mt8196-clock.h b/include/dt-bindings/clock/mediatek,mt8196-clock.h new file mode 100644 index 000000000000..ae0946ab7621 --- /dev/null +++ b/include/dt-bindings/clock/mediatek,mt8196-clock.h @@ -0,0 +1,803 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */ +/* + * Copyright (c) 2025 MediaTek Inc. + * Guangjie Song + * Copyright (c) 2025 Collabora Ltd. + * Laura Nao + */ + +#ifndef _DT_BINDINGS_CLK_MT8196_H +#define _DT_BINDINGS_CLK_MT8196_H + +/* CKSYS */ +#define CLK_TOP_AXI 0 +#define CLK_TOP_MEM_SUB 1 +#define CLK_TOP_IO_NOC 2 +#define CLK_TOP_P_AXI 3 +#define CLK_TOP_UFS_PEXTP0_AXI 4 +#define CLK_TOP_PEXTP1_USB_AXI 5 +#define CLK_TOP_P_FMEM_SUB 6 +#define CLK_TOP_PEXPT0_MEM_SUB 7 +#define CLK_TOP_PEXTP1_USB_MEM_SUB 8 +#define CLK_TOP_P_NOC 9 +#define CLK_TOP_EMI_N 10 +#define CLK_TOP_EMI_S 11 +#define CLK_TOP_AP2CONN_HOST 12 +#define CLK_TOP_ATB 13 +#define CLK_TOP_CIRQ 14 +#define CLK_TOP_PBUS_156M 15 +#define CLK_TOP_EFUSE 16 +#define CLK_TOP_MCL3GIC 17 +#define CLK_TOP_MCINFRA 18 +#define CLK_TOP_DSP 19 +#define CLK_TOP_MFG_REF 20 +#define CLK_TOP_MFG_EB 21 +#define CLK_TOP_UART 22 +#define CLK_TOP_SPI0_BCLK 23 +#define CLK_TOP_SPI1_BCLK 24 +#define CLK_TOP_SPI2_BCLK 25 +#define CLK_TOP_SPI3_BCLK 26 +#define CLK_TOP_SPI4_BCLK 27 +#define CLK_TOP_SPI5_BCLK 28 +#define CLK_TOP_SPI6_BCLK 29 +#define CLK_TOP_SPI7_BCLK 30 +#define CLK_TOP_MSDC30_1 31 +#define CLK_TOP_MSDC30_2 32 +#define CLK_TOP_DISP_PWM 33 +#define CLK_TOP_USB_TOP_1P 34 +#define CLK_TOP_USB_XHCI_1P 35 +#define CLK_TOP_USB_FMCNT_P1 36 +#define CLK_TOP_I2C_P 37 +#define CLK_TOP_I2C_EAST 38 +#define CLK_TOP_I2C_WEST 39 +#define CLK_TOP_I2C_NORTH 40 +#define CLK_TOP_AES_UFSFDE 41 +#define CLK_TOP_UFS 42 +#define CLK_TOP_AUD_1 43 +#define CLK_TOP_AUD_2 44 +#define CLK_TOP_ADSP 45 +#define CLK_TOP_ADSP_UARTHUB_B 46 +#define CLK_TOP_DPMAIF_MAIN 47 +#define CLK_TOP_PWM 48 +#define CLK_TOP_MCUPM 49 +#define CLK_TOP_IPSEAST 50 +#define CLK_TOP_TL 51 +#define CLK_TOP_TL_P1 52 +#define CLK_TOP_TL_P2 53 +#define CLK_TOP_EMI_INTERFACE_546 54 +#define CLK_TOP_SDF 55 +#define CLK_TOP_UARTHUB_BCLK 56 +#define CLK_TOP_DPSW_CMP_26M 57 +#define CLK_TOP_SMAP 58 +#define CLK_TOP_SSR_PKA 59 +#define CLK_TOP_SSR_DMA 60 +#define CLK_TOP_SSR_KDF 61 +#define CLK_TOP_SSR_RNG 62 +#define CLK_TOP_SPU0 63 +#define CLK_TOP_SPU1 64 +#define CLK_TOP_DXCC 65 +#define CLK_TOP_APLL_I2SIN0 66 +#define CLK_TOP_APLL_I2SIN1 67 +#define CLK_TOP_APLL_I2SIN2 68 +#define CLK_TOP_APLL_I2SIN3 69 +#define CLK_TOP_APLL_I2SIN4 70 +#define CLK_TOP_APLL_I2SIN6 71 +#define CLK_TOP_APLL_I2SOUT0 72 +#define CLK_TOP_APLL_I2SOUT1 73 +#define CLK_TOP_APLL_I2SOUT2 74 +#define CLK_TOP_APLL_I2SOUT3 75 +#define CLK_TOP_APLL_I2SOUT4 76 +#define CLK_TOP_APLL_I2SOUT6 77 +#define CLK_TOP_APLL_FMI2S 78 +#define CLK_TOP_APLL_TDMOUT 79 +#define CLK_TOP_APLL12_DIV_TDMOUT_M 80 +#define CLK_TOP_APLL12_DIV_TDMOUT_B 81 +#define CLK_TOP_MAINPLL_D3 82 +#define CLK_TOP_MAINPLL_D4 83 +#define CLK_TOP_MAINPLL_D4_D2 84 +#define CLK_TOP_MAINPLL_D4_D4 85 +#define CLK_TOP_MAINPLL_D4_D8 86 +#define CLK_TOP_MAINPLL_D5 87 +#define CLK_TOP_MAINPLL_D5_D2 88 +#define CLK_TOP_MAINPLL_D5_D4 89 +#define CLK_TOP_MAINPLL_D5_D8 90 +#define CLK_TOP_MAINPLL_D6 91 +#define CLK_TOP_MAINPLL_D6_D2 92 +#define CLK_TOP_MAINPLL_D7 93 +#define CLK_TOP_MAINPLL_D7_D2 94 +#define CLK_TOP_MAINPLL_D7_D4 95 +#define CLK_TOP_MAINPLL_D7_D8 96 +#define CLK_TOP_MAINPLL_D9 97 +#define CLK_TOP_UNIVPLL_D4 98 +#define CLK_TOP_UNIVPLL_D4_D2 99 +#define CLK_TOP_UNIVPLL_D4_D4 100 +#define CLK_TOP_UNIVPLL_D4_D8 101 +#define CLK_TOP_UNIVPLL_D5 102 +#define CLK_TOP_UNIVPLL_D5_D2 103 +#define CLK_TOP_UNIVPLL_D5_D4 104 +#define CLK_TOP_UNIVPLL_D6 105 +#define CLK_TOP_UNIVPLL_D6_D2 106 +#define CLK_TOP_UNIVPLL_D6_D4 107 +#define CLK_TOP_UNIVPLL_D6_D8 108 +#define CLK_TOP_UNIVPLL_D6_D16 109 +#define CLK_TOP_UNIVPLL_192M 110 +#define CLK_TOP_UNIVPLL_192M_D4 111 +#define CLK_TOP_UNIVPLL_192M_D8 112 +#define CLK_TOP_UNIVPLL_192M_D16 113 +#define CLK_TOP_UNIVPLL_192M_D32 114 +#define CLK_TOP_UNIVPLL_192M_D10 115 +#define CLK_TOP_TVDPLL1_D2 116 +#define CLK_TOP_MSDCPLL_D2 117 +#define CLK_TOP_OSC_D2 118 +#define CLK_TOP_OSC_D3 119 +#define CLK_TOP_OSC_D4 120 +#define CLK_TOP_OSC_D5 121 +#define CLK_TOP_OSC_D7 122 +#define CLK_TOP_OSC_D8 123 +#define CLK_TOP_OSC_D10 124 +#define CLK_TOP_OSC_D14 125 +#define CLK_TOP_OSC_D20 126 +#define CLK_TOP_OSC_D32 127 +#define CLK_TOP_OSC_D40 128 +#define CLK_TOP_SFLASH 129 + +/* APMIXEDSYS */ +#define CLK_APMIXED_MAINPLL 0 +#define CLK_APMIXED_UNIVPLL 1 +#define CLK_APMIXED_MSDCPLL 2 +#define CLK_APMIXED_ADSPPLL 3 +#define CLK_APMIXED_EMIPLL 4 +#define CLK_APMIXED_EMIPLL2 5 +#define CLK_APMIXED_NET1PLL 6 +#define CLK_APMIXED_SGMIIPLL 7 + +/* CKSYS_GP2 */ +#define CLK_TOP2_SENINF0 0 +#define CLK_TOP2_SENINF1 1 +#define CLK_TOP2_SENINF2 2 +#define CLK_TOP2_SENINF3 3 +#define CLK_TOP2_SENINF4 4 +#define CLK_TOP2_SENINF5 5 +#define CLK_TOP2_IMG1 6 +#define CLK_TOP2_IPE 7 +#define CLK_TOP2_CAM 8 +#define CLK_TOP2_CAMTM 9 +#define CLK_TOP2_DPE 10 +#define CLK_TOP2_VDEC 11 +#define CLK_TOP2_CCUSYS 12 +#define CLK_TOP2_CCUTM 13 +#define CLK_TOP2_VENC 14 +#define CLK_TOP2_DP1 15 +#define CLK_TOP2_DP0 16 +#define CLK_TOP2_DISP 17 +#define CLK_TOP2_MDP 18 +#define CLK_TOP2_MMINFRA 19 +#define CLK_TOP2_MMINFRA_SNOC 20 +#define CLK_TOP2_MMUP 21 +#define CLK_TOP2_MMINFRA_AO 22 +#define CLK_TOP2_MAINPLL2_D2 23 +#define CLK_TOP2_MAINPLL2_D3 24 +#define CLK_TOP2_MAINPLL2_D4 25 +#define CLK_TOP2_MAINPLL2_D4_D2 26 +#define CLK_TOP2_MAINPLL2_D4_D4 27 +#define CLK_TOP2_MAINPLL2_D5 28 +#define CLK_TOP2_MAINPLL2_D5_D2 29 +#define CLK_TOP2_MAINPLL2_D6 30 +#define CLK_TOP2_MAINPLL2_D6_D2 31 +#define CLK_TOP2_MAINPLL2_D7 32 +#define CLK_TOP2_MAINPLL2_D7_D2 33 +#define CLK_TOP2_MAINPLL2_D9 34 +#define CLK_TOP2_UNIVPLL2_D3 35 +#define CLK_TOP2_UNIVPLL2_D4 36 +#define CLK_TOP2_UNIVPLL2_D4_D2 37 +#define CLK_TOP2_UNIVPLL2_D5 38 +#define CLK_TOP2_UNIVPLL2_D5_D2 39 +#define CLK_TOP2_UNIVPLL2_D6 40 +#define CLK_TOP2_UNIVPLL2_D6_D2 41 +#define CLK_TOP2_UNIVPLL2_D6_D4 42 +#define CLK_TOP2_UNIVPLL2_D7 43 +#define CLK_TOP2_IMGPLL_D2 44 +#define CLK_TOP2_IMGPLL_D4 45 +#define CLK_TOP2_IMGPLL_D5 46 +#define CLK_TOP2_IMGPLL_D5_D2 47 +#define CLK_TOP2_MMPLL2_D3 48 +#define CLK_TOP2_MMPLL2_D4 49 +#define CLK_TOP2_MMPLL2_D4_D2 50 +#define CLK_TOP2_MMPLL2_D5 51 +#define CLK_TOP2_MMPLL2_D5_D2 52 +#define CLK_TOP2_MMPLL2_D6 53 +#define CLK_TOP2_MMPLL2_D6_D2 54 +#define CLK_TOP2_MMPLL2_D7 55 +#define CLK_TOP2_MMPLL2_D9 56 +#define CLK_TOP2_TVDPLL1_D4 57 +#define CLK_TOP2_TVDPLL1_D8 58 +#define CLK_TOP2_TVDPLL1_D16 59 +#define CLK_TOP2_TVDPLL2_D2 60 +#define CLK_TOP2_TVDPLL2_D4 61 +#define CLK_TOP2_TVDPLL2_D8 62 +#define CLK_TOP2_TVDPLL2_D16 63 +#define CLK_TOP2_DVO 64 +#define CLK_TOP2_DVO_FAVT 65 +#define CLK_TOP2_TVDPLL3_D2 66 +#define CLK_TOP2_TVDPLL3_D4 67 +#define CLK_TOP2_TVDPLL3_D8 68 +#define CLK_TOP2_TVDPLL3_D16 69 + +/* APMIXEDSYS_GP2 */ +#define CLK_APMIXED2_MAINPLL2 0 +#define CLK_APMIXED2_UNIVPLL2 1 +#define CLK_APMIXED2_MMPLL2 2 +#define CLK_APMIXED2_IMGPLL 3 +#define CLK_APMIXED2_TVDPLL1 4 +#define CLK_APMIXED2_TVDPLL2 5 +#define CLK_APMIXED2_TVDPLL3 6 + +/* IMP_IIC_WRAP_E */ +#define CLK_IMPE_I2C5 0 + +/* IMP_IIC_WRAP_W */ +#define CLK_IMPW_I2C0 0 +#define CLK_IMPW_I2C3 1 +#define CLK_IMPW_I2C6 2 +#define CLK_IMPW_I2C10 3 + +/* IMP_IIC_WRAP_N */ +#define CLK_IMPN_I2C1 0 +#define CLK_IMPN_I2C2 1 +#define CLK_IMPN_I2C4 2 +#define CLK_IMPN_I2C7 3 +#define CLK_IMPN_I2C8 4 +#define CLK_IMPN_I2C9 5 + +/* IMP_IIC_WRAP_C */ +#define CLK_IMPC_I2C11 0 +#define CLK_IMPC_I2C12 1 +#define CLK_IMPC_I2C13 2 +#define CLK_IMPC_I2C14 3 + +/* PERICFG_AO */ +#define CLK_PERI_AO_UART0_BCLK 0 +#define CLK_PERI_AO_UART1_BCLK 1 +#define CLK_PERI_AO_UART2_BCLK 2 +#define CLK_PERI_AO_UART3_BCLK 3 +#define CLK_PERI_AO_UART4_BCLK 4 +#define CLK_PERI_AO_UART5_BCLK 5 +#define CLK_PERI_AO_PWM_X16W_HCLK 6 +#define CLK_PERI_AO_PWM_X16W_BCLK 7 +#define CLK_PERI_AO_PWM_PWM_BCLK0 8 +#define CLK_PERI_AO_PWM_PWM_BCLK1 9 +#define CLK_PERI_AO_PWM_PWM_BCLK2 10 +#define CLK_PERI_AO_PWM_PWM_BCLK3 11 +#define CLK_PERI_AO_SPI0_BCLK 12 +#define CLK_PERI_AO_SPI1_BCLK 13 +#define CLK_PERI_AO_SPI2_BCLK 14 +#define CLK_PERI_AO_SPI3_BCLK 15 +#define CLK_PERI_AO_SPI4_BCLK 16 +#define CLK_PERI_AO_SPI5_BCLK 17 +#define CLK_PERI_AO_SPI6_BCLK 18 +#define CLK_PERI_AO_SPI7_BCLK 19 +#define CLK_PERI_AO_AP_DMA_X32W_BCLK 20 +#define CLK_PERI_AO_MSDC1_MSDC_SRC 21 +#define CLK_PERI_AO_MSDC1_HCLK 22 +#define CLK_PERI_AO_MSDC1_AXI 23 +#define CLK_PERI_AO_MSDC1_HCLK_WRAP 24 +#define CLK_PERI_AO_MSDC2_MSDC_SRC 25 +#define CLK_PERI_AO_MSDC2_HCLK 26 +#define CLK_PERI_AO_MSDC2_AXI 27 +#define CLK_PERI_AO_MSDC2_HCLK_WRAP 28 +#define CLK_PERI_AO_FLASHIF_FLASH 29 +#define CLK_PERI_AO_FLASHIF_27M 30 +#define CLK_PERI_AO_FLASHIF_DRAM 31 +#define CLK_PERI_AO_FLASHIF_AXI 32 +#define CLK_PERI_AO_FLASHIF_BCLK 33 + +/* UFSCFG_AO */ +#define CLK_UFSAO_UNIPRO_TX_SYM 0 +#define CLK_UFSAO_UNIPRO_RX_SYM0 1 +#define CLK_UFSAO_UNIPRO_RX_SYM1 2 +#define CLK_UFSAO_UNIPRO_SYS 3 +#define CLK_UFSAO_UNIPRO_SAP 4 +#define CLK_UFSAO_PHY_SAP 5 +#define CLK_UFSAO_UFSHCI_UFS 6 +#define CLK_UFSAO_UFSHCI_AES 7 + +/* PEXTP0CFG_AO */ +#define CLK_PEXT_PEXTP_MAC_P0_TL 0 +#define CLK_PEXT_PEXTP_MAC_P0_REF 1 +#define CLK_PEXT_PEXTP_PHY_P0_MCU_BUS 2 +#define CLK_PEXT_PEXTP_PHY_P0_PEXTP_REF 3 +#define CLK_PEXT_PEXTP_MAC_P0_AXI_250 4 +#define CLK_PEXT_PEXTP_MAC_P0_AHB_APB 5 +#define CLK_PEXT_PEXTP_MAC_P0_PL_P 6 +#define CLK_PEXT_PEXTP_VLP_AO_P0_LP 7 + +/* PEXTP1CFG_AO */ +#define CLK_PEXT1_PEXTP_MAC_P1_TL 0 +#define CLK_PEXT1_PEXTP_MAC_P1_REF 1 +#define CLK_PEXT1_PEXTP_MAC_P2_TL 2 +#define CLK_PEXT1_PEXTP_MAC_P2_REF 3 +#define CLK_PEXT1_PEXTP_PHY_P1_MCU_BUS 4 +#define CLK_PEXT1_PEXTP_PHY_P1_PEXTP_REF 5 +#define CLK_PEXT1_PEXTP_PHY_P2_MCU_BUS 6 +#define CLK_PEXT1_PEXTP_PHY_P2_PEXTP_REF 7 +#define CLK_PEXT1_PEXTP_MAC_P1_AXI_250 8 +#define CLK_PEXT1_PEXTP_MAC_P1_AHB_APB 9 +#define CLK_PEXT1_PEXTP_MAC_P1_PL_P 10 +#define CLK_PEXT1_PEXTP_MAC_P2_AXI_250 11 +#define CLK_PEXT1_PEXTP_MAC_P2_AHB_APB 12 +#define CLK_PEXT1_PEXTP_MAC_P2_PL_P 13 +#define CLK_PEXT1_PEXTP_VLP_AO_P1_LP 14 +#define CLK_PEXT1_PEXTP_VLP_AO_P2_LP 15 + +/* VLP_CKSYS */ +#define CLK_VLP_APLL1 0 +#define CLK_VLP_APLL2 1 +#define CLK_VLP_SCP 2 +#define CLK_VLP_SCP_SPI 3 +#define CLK_VLP_SCP_IIC 4 +#define CLK_VLP_SCP_IIC_HS 5 +#define CLK_VLP_PWRAP_ULPOSC 6 +#define CLK_VLP_SPMI_M_TIA_32K 7 +#define CLK_VLP_APXGPT_26M_B 8 +#define CLK_VLP_DPSW 9 +#define CLK_VLP_DPSW_CENTRAL 10 +#define CLK_VLP_SPMI_M_MST 11 +#define CLK_VLP_DVFSRC 12 +#define CLK_VLP_PWM_VLP 13 +#define CLK_VLP_AXI_VLP 14 +#define CLK_VLP_SYSTIMER_26M 15 +#define CLK_VLP_SSPM 16 +#define CLK_VLP_SRCK 17 +#define CLK_VLP_CAMTG0 18 +#define CLK_VLP_CAMTG1 19 +#define CLK_VLP_CAMTG2 20 +#define CLK_VLP_CAMTG3 21 +#define CLK_VLP_CAMTG4 22 +#define CLK_VLP_CAMTG5 23 +#define CLK_VLP_CAMTG6 24 +#define CLK_VLP_CAMTG7 25 +#define CLK_VLP_SSPM_26M 26 +#define CLK_VLP_ULPOSC_SSPM 27 +#define CLK_VLP_VLP_PBUS_26M 28 +#define CLK_VLP_DEBUG_ERR_FLAG 29 +#define CLK_VLP_DPMSRDMA 30 +#define CLK_VLP_VLP_PBUS_156M 31 +#define CLK_VLP_SPM 32 +#define CLK_VLP_MMINFRA 33 +#define CLK_VLP_USB_TOP 34 +#define CLK_VLP_USB_XHCI 35 +#define CLK_VLP_NOC_VLP 36 +#define CLK_VLP_AUDIO_H 37 +#define CLK_VLP_AUD_ENGEN1 38 +#define CLK_VLP_AUD_ENGEN2 39 +#define CLK_VLP_AUD_INTBUS 40 +#define CLK_VLP_SPVLP_26M 41 +#define CLK_VLP_SPU0_VLP 42 +#define CLK_VLP_SPU1_VLP 43 +#define CLK_VLP_CLK26M 44 +#define CLK_VLP_APLL1_D4 45 +#define CLK_VLP_APLL1_D8 46 +#define CLK_VLP_APLL2_D4 47 +#define CLK_VLP_APLL2_D8 48 + +/* DISPSYS_CONFIG */ +#define CLK_MM_CONFIG 0 +#define CLK_MM_DISP_MUTEX0 1 +#define CLK_MM_DISP_AAL0 2 +#define CLK_MM_DISP_AAL1 3 +#define CLK_MM_DISP_C3D0 4 +#define CLK_MM_DISP_C3D1 5 +#define CLK_MM_DISP_C3D2 6 +#define CLK_MM_DISP_C3D3 7 +#define CLK_MM_DISP_CCORR0 8 +#define CLK_MM_DISP_CCORR1 9 +#define CLK_MM_DISP_CCORR2 10 +#define CLK_MM_DISP_CCORR3 11 +#define CLK_MM_DISP_CHIST0 12 +#define CLK_MM_DISP_CHIST1 13 +#define CLK_MM_DISP_COLOR0 14 +#define CLK_MM_DISP_COLOR1 15 +#define CLK_MM_DISP_DITHER0 16 +#define CLK_MM_DISP_DITHER1 17 +#define CLK_MM_DISP_DLI_ASYNC0 18 +#define CLK_MM_DISP_DLI_ASYNC1 19 +#define CLK_MM_DISP_DLI_ASYNC2 20 +#define CLK_MM_DISP_DLI_ASYNC3 21 +#define CLK_MM_DISP_DLI_ASYNC4 22 +#define CLK_MM_DISP_DLI_ASYNC5 23 +#define CLK_MM_DISP_DLI_ASYNC6 24 +#define CLK_MM_DISP_DLI_ASYNC7 25 +#define CLK_MM_DISP_DLI_ASYNC8 26 +#define CLK_MM_DISP_DLI_ASYNC9 27 +#define CLK_MM_DISP_DLI_ASYNC10 28 +#define CLK_MM_DISP_DLI_ASYNC11 29 +#define CLK_MM_DISP_DLI_ASYNC12 30 +#define CLK_MM_DISP_DLI_ASYNC13 31 +#define CLK_MM_DISP_DLI_ASYNC14 32 +#define CLK_MM_DISP_DLI_ASYNC15 33 +#define CLK_MM_DISP_DLO_ASYNC0 34 +#define CLK_MM_DISP_DLO_ASYNC1 35 +#define CLK_MM_DISP_DLO_ASYNC2 36 +#define CLK_MM_DISP_DLO_ASYNC3 37 +#define CLK_MM_DISP_DLO_ASYNC4 38 +#define CLK_MM_DISP_DLO_ASYNC5 39 +#define CLK_MM_DISP_DLO_ASYNC6 40 +#define CLK_MM_DISP_DLO_ASYNC7 41 +#define CLK_MM_DISP_DLO_ASYNC8 42 +#define CLK_MM_DISP_GAMMA0 43 +#define CLK_MM_DISP_GAMMA1 44 +#define CLK_MM_MDP_AAL0 45 +#define CLK_MM_MDP_AAL1 46 +#define CLK_MM_MDP_RDMA0 47 +#define CLK_MM_DISP_POSTMASK0 48 +#define CLK_MM_DISP_POSTMASK1 49 +#define CLK_MM_MDP_RSZ0 50 +#define CLK_MM_MDP_RSZ1 51 +#define CLK_MM_DISP_SPR0 52 +#define CLK_MM_DISP_TDSHP0 53 +#define CLK_MM_DISP_TDSHP1 54 +#define CLK_MM_DISP_WDMA0 55 +#define CLK_MM_DISP_Y2R0 56 +#define CLK_MM_SMI_SUB_COMM0 57 +#define CLK_MM_DISP_FAKE_ENG0 58 + +/* DISPSYS1_CONFIG */ +#define CLK_MM1_DISPSYS1_CONFIG 0 +#define CLK_MM1_DISPSYS1_S_CONFIG 1 +#define CLK_MM1_DISP_MUTEX0 2 +#define CLK_MM1_DISP_DLI_ASYNC20 3 +#define CLK_MM1_DISP_DLI_ASYNC21 4 +#define CLK_MM1_DISP_DLI_ASYNC22 5 +#define CLK_MM1_DISP_DLI_ASYNC23 6 +#define CLK_MM1_DISP_DLI_ASYNC24 7 +#define CLK_MM1_DISP_DLI_ASYNC25 8 +#define CLK_MM1_DISP_DLI_ASYNC26 9 +#define CLK_MM1_DISP_DLI_ASYNC27 10 +#define CLK_MM1_DISP_DLI_ASYNC28 11 +#define CLK_MM1_DISP_RELAY0 12 +#define CLK_MM1_DISP_RELAY1 13 +#define CLK_MM1_DISP_RELAY2 14 +#define CLK_MM1_DISP_RELAY3 15 +#define CLK_MM1_DISP_DP_INTF0 16 +#define CLK_MM1_DISP_DP_INTF1 17 +#define CLK_MM1_DISP_DSC_WRAP0 18 +#define CLK_MM1_DISP_DSC_WRAP1 19 +#define CLK_MM1_DISP_DSC_WRAP2 20 +#define CLK_MM1_DISP_DSC_WRAP3 21 +#define CLK_MM1_DISP_DSI0 22 +#define CLK_MM1_DISP_DSI1 23 +#define CLK_MM1_DISP_DSI2 24 +#define CLK_MM1_DISP_DVO0 25 +#define CLK_MM1_DISP_GDMA0 26 +#define CLK_MM1_DISP_MERGE0 27 +#define CLK_MM1_DISP_MERGE1 28 +#define CLK_MM1_DISP_MERGE2 29 +#define CLK_MM1_DISP_ODDMR0 30 +#define CLK_MM1_DISP_POSTALIGN0 31 +#define CLK_MM1_DISP_DITHER2 32 +#define CLK_MM1_DISP_R2Y0 33 +#define CLK_MM1_DISP_SPLITTER0 34 +#define CLK_MM1_DISP_SPLITTER1 35 +#define CLK_MM1_DISP_SPLITTER2 36 +#define CLK_MM1_DISP_SPLITTER3 37 +#define CLK_MM1_DISP_VDCM0 38 +#define CLK_MM1_DISP_WDMA1 39 +#define CLK_MM1_DISP_WDMA2 40 +#define CLK_MM1_DISP_WDMA3 41 +#define CLK_MM1_DISP_WDMA4 42 +#define CLK_MM1_MDP_RDMA1 43 +#define CLK_MM1_SMI_LARB0 44 +#define CLK_MM1_MOD1 45 +#define CLK_MM1_MOD2 46 +#define CLK_MM1_MOD3 47 +#define CLK_MM1_MOD4 48 +#define CLK_MM1_MOD5 49 +#define CLK_MM1_MOD6 50 +#define CLK_MM1_CG0 51 +#define CLK_MM1_CG1 52 +#define CLK_MM1_CG2 53 +#define CLK_MM1_CG3 54 +#define CLK_MM1_CG4 55 +#define CLK_MM1_CG5 56 +#define CLK_MM1_CG6 57 +#define CLK_MM1_CG7 58 +#define CLK_MM1_F26M 59 + +/* OVLSYS_CONFIG */ +#define CLK_OVLSYS_CONFIG 0 +#define CLK_OVL_FAKE_ENG0 1 +#define CLK_OVL_FAKE_ENG1 2 +#define CLK_OVL_MUTEX0 3 +#define CLK_OVL_EXDMA0 4 +#define CLK_OVL_EXDMA1 5 +#define CLK_OVL_EXDMA2 6 +#define CLK_OVL_EXDMA3 7 +#define CLK_OVL_EXDMA4 8 +#define CLK_OVL_EXDMA5 9 +#define CLK_OVL_EXDMA6 10 +#define CLK_OVL_EXDMA7 11 +#define CLK_OVL_EXDMA8 12 +#define CLK_OVL_EXDMA9 13 +#define CLK_OVL_BLENDER0 14 +#define CLK_OVL_BLENDER1 15 +#define CLK_OVL_BLENDER2 16 +#define CLK_OVL_BLENDER3 17 +#define CLK_OVL_BLENDER4 18 +#define CLK_OVL_BLENDER5 19 +#define CLK_OVL_BLENDER6 20 +#define CLK_OVL_BLENDER7 21 +#define CLK_OVL_BLENDER8 22 +#define CLK_OVL_BLENDER9 23 +#define CLK_OVL_OUTPROC0 24 +#define CLK_OVL_OUTPROC1 25 +#define CLK_OVL_OUTPROC2 26 +#define CLK_OVL_OUTPROC3 27 +#define CLK_OVL_OUTPROC4 28 +#define CLK_OVL_OUTPROC5 29 +#define CLK_OVL_MDP_RSZ0 30 +#define CLK_OVL_MDP_RSZ1 31 +#define CLK_OVL_DISP_WDMA0 32 +#define CLK_OVL_DISP_WDMA1 33 +#define CLK_OVL_UFBC_WDMA0 34 +#define CLK_OVL_MDP_RDMA0 35 +#define CLK_OVL_MDP_RDMA1 36 +#define CLK_OVL_BWM0 37 +#define CLK_OVL_DLI0 38 +#define CLK_OVL_DLI1 39 +#define CLK_OVL_DLI2 40 +#define CLK_OVL_DLI3 41 +#define CLK_OVL_DLI4 42 +#define CLK_OVL_DLI5 43 +#define CLK_OVL_DLI6 44 +#define CLK_OVL_DLI7 45 +#define CLK_OVL_DLI8 46 +#define CLK_OVL_DLO0 47 +#define CLK_OVL_DLO1 48 +#define CLK_OVL_DLO2 49 +#define CLK_OVL_DLO3 50 +#define CLK_OVL_DLO4 51 +#define CLK_OVL_DLO5 52 +#define CLK_OVL_DLO6 53 +#define CLK_OVL_DLO7 54 +#define CLK_OVL_DLO8 55 +#define CLK_OVL_DLO9 56 +#define CLK_OVL_DLO10 57 +#define CLK_OVL_DLO11 58 +#define CLK_OVL_DLO12 59 +#define CLK_OVLSYS_RELAY0 60 +#define CLK_OVL_INLINEROT0 61 +#define CLK_OVL_SMI 62 +#define CLK_OVL_SMI_SMI 63 + + +/* OVLSYS1_CONFIG */ +#define CLK_OVL1_OVLSYS_CONFIG 0 +#define CLK_OVL1_OVL_FAKE_ENG0 1 +#define CLK_OVL1_OVL_FAKE_ENG1 2 +#define CLK_OVL1_OVL_MUTEX0 3 +#define CLK_OVL1_OVL_EXDMA0 4 +#define CLK_OVL1_OVL_EXDMA1 5 +#define CLK_OVL1_OVL_EXDMA2 6 +#define CLK_OVL1_OVL_EXDMA3 7 +#define CLK_OVL1_OVL_EXDMA4 8 +#define CLK_OVL1_OVL_EXDMA5 9 +#define CLK_OVL1_OVL_EXDMA6 10 +#define CLK_OVL1_OVL_EXDMA7 11 +#define CLK_OVL1_OVL_EXDMA8 12 +#define CLK_OVL1_OVL_EXDMA9 13 +#define CLK_OVL1_OVL_BLENDER0 14 +#define CLK_OVL1_OVL_BLENDER1 15 +#define CLK_OVL1_OVL_BLENDER2 16 +#define CLK_OVL1_OVL_BLENDER3 17 +#define CLK_OVL1_OVL_BLENDER4 18 +#define CLK_OVL1_OVL_BLENDER5 19 +#define CLK_OVL1_OVL_BLENDER6 20 +#define CLK_OVL1_OVL_BLENDER7 21 +#define CLK_OVL1_OVL_BLENDER8 22 +#define CLK_OVL1_OVL_BLENDER9 23 +#define CLK_OVL1_OVL_OUTPROC0 24 +#define CLK_OVL1_OVL_OUTPROC1 25 +#define CLK_OVL1_OVL_OUTPROC2 26 +#define CLK_OVL1_OVL_OUTPROC3 27 +#define CLK_OVL1_OVL_OUTPROC4 28 +#define CLK_OVL1_OVL_OUTPROC5 29 +#define CLK_OVL1_OVL_MDP_RSZ0 30 +#define CLK_OVL1_OVL_MDP_RSZ1 31 +#define CLK_OVL1_OVL_DISP_WDMA0 32 +#define CLK_OVL1_OVL_DISP_WDMA1 33 +#define CLK_OVL1_OVL_UFBC_WDMA0 34 +#define CLK_OVL1_OVL_MDP_RDMA0 35 +#define CLK_OVL1_OVL_MDP_RDMA1 36 +#define CLK_OVL1_OVL_BWM0 37 +#define CLK_OVL1_DLI0 38 +#define CLK_OVL1_DLI1 39 +#define CLK_OVL1_DLI2 40 +#define CLK_OVL1_DLI3 41 +#define CLK_OVL1_DLI4 42 +#define CLK_OVL1_DLI5 43 +#define CLK_OVL1_DLI6 44 +#define CLK_OVL1_DLI7 45 +#define CLK_OVL1_DLI8 46 +#define CLK_OVL1_DLO0 47 +#define CLK_OVL1_DLO1 48 +#define CLK_OVL1_DLO2 49 +#define CLK_OVL1_DLO3 50 +#define CLK_OVL1_DLO4 51 +#define CLK_OVL1_DLO5 52 +#define CLK_OVL1_DLO6 53 +#define CLK_OVL1_DLO7 54 +#define CLK_OVL1_DLO8 55 +#define CLK_OVL1_DLO9 56 +#define CLK_OVL1_DLO10 57 +#define CLK_OVL1_DLO11 58 +#define CLK_OVL1_DLO12 59 +#define CLK_OVL1_OVLSYS_RELAY0 60 +#define CLK_OVL1_OVL_INLINEROT0 61 +#define CLK_OVL1_SMI 62 + + +/* VDEC_SOC_GCON_BASE */ +#define CLK_VDE1_LARB1_CKEN 0 +#define CLK_VDE1_LAT_CKEN 1 +#define CLK_VDE1_LAT_ACTIVE 2 +#define CLK_VDE1_LAT_CKEN_ENG 3 +#define CLK_VDE1_VDEC_CKEN 4 +#define CLK_VDE1_VDEC_ACTIVE 5 +#define CLK_VDE1_VDEC_CKEN_ENG 6 +#define CLK_VDE1_VDEC_SOC_APTV_EN 7 +#define CLK_VDE1_VDEC_SOC_APTV_TOP_EN 8 +#define CLK_VDE1_VDEC_SOC_IPS_EN 9 + +/* VDEC_GCON_BASE */ +#define CLK_VDE2_LARB1_CKEN 0 +#define CLK_VDE2_LAT_CKEN 1 +#define CLK_VDE2_LAT_ACTIVE 2 +#define CLK_VDE2_LAT_CKEN_ENG 3 +#define CLK_VDE2_VDEC_CKEN 4 +#define CLK_VDE2_VDEC_ACTIVE 5 +#define CLK_VDE2_VDEC_CKEN_ENG 6 + +/* VENC_GCON */ +#define CLK_VEN1_CKE0_LARB 0 +#define CLK_VEN1_CKE1_VENC 1 +#define CLK_VEN1_CKE2_JPGENC 2 +#define CLK_VEN1_CKE3_JPGDEC 3 +#define CLK_VEN1_CKE4_JPGDEC_C1 4 +#define CLK_VEN1_CKE5_GALS 5 +#define CLK_VEN1_CKE29_VENC_ADAB_CTRL 6 +#define CLK_VEN1_CKE29_VENC_XPC_CTRL 7 +#define CLK_VEN1_CKE6_GALS_SRAM 8 +#define CLK_VEN1_RES_FLAT 9 + +/* VENC_GCON_CORE1 */ +#define CLK_VEN2_CKE0_LARB 0 +#define CLK_VEN2_CKE1_VENC 1 +#define CLK_VEN2_CKE2_JPGENC 2 +#define CLK_VEN2_CKE3_JPGDEC 3 +#define CLK_VEN2_CKE5_GALS 4 +#define CLK_VEN2_CKE29_VENC_XPC_CTRL 5 +#define CLK_VEN2_CKE6_GALS_SRAM 6 +#define CLK_VEN2_RES_FLAT 7 + +/* VENC_GCON_CORE2 */ +#define CLK_VEN_C2_CKE0_LARB 0 +#define CLK_VEN_C2_CKE1_VENC 1 +#define CLK_VEN_C2_CKE5_GALS 2 +#define CLK_VEN_C2_CKE29_VENC_XPC_CTRL 3 +#define CLK_VEN_C2_CKE6_GALS_SRAM 4 +#define CLK_VEN_C2_RES_FLAT 5 + +/* MDPSYS_CONFIG */ +#define CLK_MDP_MDP_MUTEX0 0 +#define CLK_MDP_SMI0 1 +#define CLK_MDP_SMI0_SMI 2 +#define CLK_MDP_APB_BUS 3 +#define CLK_MDP_MDP_RDMA0 4 +#define CLK_MDP_MDP_RDMA1 5 +#define CLK_MDP_MDP_RDMA2 6 +#define CLK_MDP_MDP_BIRSZ0 7 +#define CLK_MDP_MDP_HDR0 8 +#define CLK_MDP_MDP_AAL0 9 +#define CLK_MDP_MDP_RSZ0 10 +#define CLK_MDP_MDP_RSZ2 11 +#define CLK_MDP_MDP_TDSHP0 12 +#define CLK_MDP_MDP_COLOR0 13 +#define CLK_MDP_MDP_WROT0 14 +#define CLK_MDP_MDP_WROT1 15 +#define CLK_MDP_MDP_WROT2 16 +#define CLK_MDP_MDP_FAKE_ENG0 17 +#define CLK_MDP_APB_DB 18 +#define CLK_MDP_MDP_DLI_ASYNC0 19 +#define CLK_MDP_MDP_DLI_ASYNC1 20 +#define CLK_MDP_MDP_DLO_ASYNC0 21 +#define CLK_MDP_MDP_DLO_ASYNC1 22 +#define CLK_MDP_MDP_DLI_ASYNC2 23 +#define CLK_MDP_MDP_DLO_ASYNC2 24 +#define CLK_MDP_MDP_DLO_ASYNC3 25 +#define CLK_MDP_IMG_DL_ASYNC0 26 +#define CLK_MDP_MDP_RROT0 27 +#define CLK_MDP_MDP_MERGE0 28 +#define CLK_MDP_MDP_C3D0 29 +#define CLK_MDP_MDP_FG0 30 +#define CLK_MDP_MDP_CLA2 31 +#define CLK_MDP_MDP_DLO_ASYNC4 32 +#define CLK_MDP_VPP_RSZ0 33 +#define CLK_MDP_VPP_RSZ1 34 +#define CLK_MDP_MDP_DLO_ASYNC5 35 +#define CLK_MDP_IMG0 36 +#define CLK_MDP_F26M 37 +#define CLK_MDP_IMG_DL_RELAY0 38 +#define CLK_MDP_IMG_DL_RELAY1 39 + +/* MDPSYS1_CONFIG */ +#define CLK_MDP1_MDP_MUTEX0 0 +#define CLK_MDP1_SMI0 1 +#define CLK_MDP1_SMI0_SMI 2 +#define CLK_MDP1_APB_BUS 3 +#define CLK_MDP1_MDP_RDMA0 4 +#define CLK_MDP1_MDP_RDMA1 5 +#define CLK_MDP1_MDP_RDMA2 6 +#define CLK_MDP1_MDP_BIRSZ0 7 +#define CLK_MDP1_MDP_HDR0 8 +#define CLK_MDP1_MDP_AAL0 9 +#define CLK_MDP1_MDP_RSZ0 10 +#define CLK_MDP1_MDP_RSZ2 11 +#define CLK_MDP1_MDP_TDSHP0 12 +#define CLK_MDP1_MDP_COLOR0 13 +#define CLK_MDP1_MDP_WROT0 14 +#define CLK_MDP1_MDP_WROT1 15 +#define CLK_MDP1_MDP_WROT2 16 +#define CLK_MDP1_MDP_FAKE_ENG0 17 +#define CLK_MDP1_APB_DB 18 +#define CLK_MDP1_MDP_DLI_ASYNC0 19 +#define CLK_MDP1_MDP_DLI_ASYNC1 20 +#define CLK_MDP1_MDP_DLO_ASYNC0 21 +#define CLK_MDP1_MDP_DLO_ASYNC1 22 +#define CLK_MDP1_MDP_DLI_ASYNC2 23 +#define CLK_MDP1_MDP_DLO_ASYNC2 24 +#define CLK_MDP1_MDP_DLO_ASYNC3 25 +#define CLK_MDP1_IMG_DL_ASYNC0 26 +#define CLK_MDP1_MDP_RROT0 27 +#define CLK_MDP1_MDP_MERGE0 28 +#define CLK_MDP1_MDP_C3D0 29 +#define CLK_MDP1_MDP_FG0 30 +#define CLK_MDP1_MDP_CLA2 31 +#define CLK_MDP1_MDP_DLO_ASYNC4 32 +#define CLK_MDP1_VPP_RSZ0 33 +#define CLK_MDP1_VPP_RSZ1 34 +#define CLK_MDP1_MDP_DLO_ASYNC5 35 +#define CLK_MDP1_IMG0 36 +#define CLK_MDP1_F26M 37 +#define CLK_MDP1_IMG_DL_RELAY0 38 +#define CLK_MDP1_IMG_DL_RELAY1 39 + +/* DISP_VDISP_AO_CONFIG */ +#define CLK_MM_V_DISP_VDISP_AO_CONFIG 0 +#define CLK_MM_V_DISP_DPC 1 +#define CLK_MM_V_SMI_SUB_SOMM0 2 + +/* MFGPLL_PLL_CTRL */ +#define CLK_MFG_AO_MFGPLL 0 + +/* MFGPLL_SC0_PLL_CTRL */ +#define CLK_MFGSC0_AO_MFGPLL_SC0 0 + +/* MFGPLL_SC1_PLL_CTRL */ +#define CLK_MFGSC1_AO_MFGPLL_SC1 0 + +/* CCIPLL_PLL_CTRL */ +#define CLK_CCIPLL 0 + +/* ARMPLL_LL_PLL_CTRL */ +#define CLK_CPLL_ARMPLL_LL 0 + +/* ARMPLL_BL_PLL_CTRL */ +#define CLK_CPBL_ARMPLL_BL 0 + +/* ARMPLL_B_PLL_CTRL */ +#define CLK_CPB_ARMPLL_B 0 + +/* PTPPLL_PLL_CTRL */ +#define CLK_PTPPLL 0 + +#endif /* _DT_BINDINGS_CLK_MT8196_H */ diff --git a/include/dt-bindings/reset/mediatek,mt8196-resets.h b/include/dt-bindings/reset/mediatek,mt8196-resets.h new file mode 100644 index 000000000000..46ced0850d91 --- /dev/null +++ b/include/dt-bindings/reset/mediatek,mt8196-resets.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2025 Collabora Ltd. + * Author: AngeloGioacchino Del Regno + */ + +#ifndef _DT_BINDINGS_RESET_CONTROLLER_MT8196 +#define _DT_BINDINGS_RESET_CONTROLLER_MT8196 + +/* PEXTP0 resets */ +#define MT8196_PEXTP0_RST0_PCIE0_MAC 0 +#define MT8196_PEXTP0_RST0_PCIE0_PHY 1 + +/* PEXTP1 resets */ +#define MT8196_PEXTP1_RST0_PCIE1_MAC 0 +#define MT8196_PEXTP1_RST0_PCIE1_PHY 1 +#define MT8196_PEXTP1_RST0_PCIE2_MAC 2 +#define MT8196_PEXTP1_RST0_PCIE2_PHY 3 + +/* UFS resets */ +#define MT8196_UFSAO_RST0_UFS_MPHY 0 +#define MT8196_UFSAO_RST1_UFS_UNIPRO 1 +#define MT8196_UFSAO_RST1_UFS_CRYPTO 2 +#define MT8196_UFSAO_RST1_UFSHCI 3 + +#endif /* _DT_BINDINGS_RESET_CONTROLLER_MT8196 */ -- cgit v1.2.3 From 49f6c8b74d9a20c0dd16bd75c93b981a3d420a37 Mon Sep 17 00:00:00 2001 From: Gabriel Fernandez Date: Wed, 25 Jun 2025 11:07:24 +0200 Subject: dt-bindings: stm32: add STM32MP21 clocks and reset bindings Adds clock and reset binding entries for STM32MP21 SoC family. Signed-off-by: Nicolas Le Bayon Reviewed-by: Conor Dooley Signed-off-by: Gabriel Fernandez Signed-off-by: Stephen Boyd --- include/dt-bindings/clock/st,stm32mp21-rcc.h | 426 +++++++++++++++++++++++++++ include/dt-bindings/reset/st,stm32mp21-rcc.h | 138 +++++++++ 2 files changed, 564 insertions(+) create mode 100644 include/dt-bindings/clock/st,stm32mp21-rcc.h create mode 100644 include/dt-bindings/reset/st,stm32mp21-rcc.h (limited to 'include') diff --git a/include/dt-bindings/clock/st,stm32mp21-rcc.h b/include/dt-bindings/clock/st,stm32mp21-rcc.h new file mode 100644 index 000000000000..054b785f2796 --- /dev/null +++ b/include/dt-bindings/clock/st,stm32mp21-rcc.h @@ -0,0 +1,426 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */ +/* + * Copyright (C) STMicroelectronics 2025 - All Rights Reserved + * Author: Gabriel Fernandez + */ + +#ifndef _DT_BINDINGS_STM32MP21_CLKS_H_ +#define _DT_BINDINGS_STM32MP21_CLKS_H_ + +/* INTERNAL/EXTERNAL OSCILLATORS */ +#define HSI_CK 0 +#define HSE_CK 1 +#define MSI_CK 2 +#define LSI_CK 3 +#define LSE_CK 4 +#define I2S_CK 5 +#define RTC_CK 6 +#define SPDIF_CK_SYMB 7 + +/* PLL CLOCKS */ +#define PLL1_CK 8 +#define PLL2_CK 9 +#define PLL4_CK 10 +#define PLL5_CK 11 +#define PLL6_CK 12 +#define PLL7_CK 13 +#define PLL8_CK 14 + +#define CK_CPU1 15 + +/* APB DIV CLOCKS */ +#define CK_ICN_APB1 16 +#define CK_ICN_APB2 17 +#define CK_ICN_APB3 18 +#define CK_ICN_APB4 19 +#define CK_ICN_APB5 20 +#define CK_ICN_APBDBG 21 + +/* GLOBAL TIMER */ +#define TIMG1_CK 22 +#define TIMG2_CK 23 + +/* FLEXGEN CLOCKS */ +#define CK_ICN_HS_MCU 24 +#define CK_ICN_SDMMC 25 +#define CK_ICN_DDR 26 +#define CK_ICN_DISPLAY 27 +#define CK_ICN_HSL 28 +#define CK_ICN_NIC 29 +#define CK_ICN_VID 30 +#define CK_FLEXGEN_07 31 +#define CK_FLEXGEN_08 32 +#define CK_FLEXGEN_09 33 +#define CK_FLEXGEN_10 34 +#define CK_FLEXGEN_11 35 +#define CK_FLEXGEN_12 36 +#define CK_FLEXGEN_13 37 +#define CK_FLEXGEN_14 38 +#define CK_FLEXGEN_15 39 +#define CK_FLEXGEN_16 40 +#define CK_FLEXGEN_17 41 +#define CK_FLEXGEN_18 42 +#define CK_FLEXGEN_19 43 +#define CK_FLEXGEN_20 44 +#define CK_FLEXGEN_21 45 +#define CK_FLEXGEN_22 46 +#define CK_FLEXGEN_23 47 +#define CK_FLEXGEN_24 48 +#define CK_FLEXGEN_25 49 +#define CK_FLEXGEN_26 50 +#define CK_FLEXGEN_27 51 +#define CK_FLEXGEN_28 52 +#define CK_FLEXGEN_29 53 +#define CK_FLEXGEN_30 54 +#define CK_FLEXGEN_31 55 +#define CK_FLEXGEN_32 56 +#define CK_FLEXGEN_33 57 +#define CK_FLEXGEN_34 58 +#define CK_FLEXGEN_35 59 +#define CK_FLEXGEN_36 60 +#define CK_FLEXGEN_37 61 +#define CK_FLEXGEN_38 62 +#define CK_FLEXGEN_39 63 +#define CK_FLEXGEN_40 64 +#define CK_FLEXGEN_41 65 +#define CK_FLEXGEN_42 66 +#define CK_FLEXGEN_43 67 +#define CK_FLEXGEN_44 68 +#define CK_FLEXGEN_45 69 +#define CK_FLEXGEN_46 70 +#define CK_FLEXGEN_47 71 +#define CK_FLEXGEN_48 72 +#define CK_FLEXGEN_49 73 +#define CK_FLEXGEN_50 74 +#define CK_FLEXGEN_51 75 +#define CK_FLEXGEN_52 76 +#define CK_FLEXGEN_53 77 +#define CK_FLEXGEN_54 78 +#define CK_FLEXGEN_55 79 +#define CK_FLEXGEN_56 80 +#define CK_FLEXGEN_57 81 +#define CK_FLEXGEN_58 82 +#define CK_FLEXGEN_59 83 +#define CK_FLEXGEN_60 84 +#define CK_FLEXGEN_61 85 +#define CK_FLEXGEN_62 86 +#define CK_FLEXGEN_63 87 + +/* LOW SPEED MCU CLOCK */ +#define CK_ICN_LS_MCU 88 + +#define CK_BUS_STM 89 +#define CK_BUS_FMC 90 +#define CK_BUS_ETH1 91 +#define CK_BUS_ETH2 92 +#define CK_BUS_DDRPHYC 93 +#define CK_BUS_SYSCPU1 94 +#define CK_BUS_HPDMA1 95 +#define CK_BUS_HPDMA2 96 +#define CK_BUS_HPDMA3 97 +#define CK_BUS_ADC1 98 +#define CK_BUS_ADC2 99 +#define CK_BUS_IPCC1 100 +#define CK_BUS_DCMIPSSI 101 +#define CK_BUS_CRC 102 +#define CK_BUS_MDF1 103 +#define CK_BUS_BKPSRAM 104 +#define CK_BUS_HASH1 105 +#define CK_BUS_HASH2 106 +#define CK_BUS_RNG1 107 +#define CK_BUS_RNG2 108 +#define CK_BUS_CRYP1 109 +#define CK_BUS_CRYP2 110 +#define CK_BUS_SAES 111 +#define CK_BUS_PKA 112 +#define CK_BUS_GPIOA 113 +#define CK_BUS_GPIOB 114 +#define CK_BUS_GPIOC 115 +#define CK_BUS_GPIOD 116 +#define CK_BUS_GPIOE 117 +#define CK_BUS_GPIOF 118 +#define CK_BUS_GPIOG 119 +#define CK_BUS_GPIOH 120 +#define CK_BUS_GPIOI 121 +#define CK_BUS_GPIOZ 122 +#define CK_BUS_RTC 124 +#define CK_BUS_LPUART1 125 +#define CK_BUS_LPTIM3 126 +#define CK_BUS_LPTIM4 127 +#define CK_BUS_LPTIM5 128 +#define CK_BUS_TIM2 129 +#define CK_BUS_TIM3 130 +#define CK_BUS_TIM4 131 +#define CK_BUS_TIM5 132 +#define CK_BUS_TIM6 133 +#define CK_BUS_TIM7 134 +#define CK_BUS_TIM10 135 +#define CK_BUS_TIM11 136 +#define CK_BUS_TIM12 137 +#define CK_BUS_TIM13 138 +#define CK_BUS_TIM14 139 +#define CK_BUS_LPTIM1 140 +#define CK_BUS_LPTIM2 141 +#define CK_BUS_SPI2 142 +#define CK_BUS_SPI3 143 +#define CK_BUS_SPDIFRX 144 +#define CK_BUS_USART2 145 +#define CK_BUS_USART3 146 +#define CK_BUS_UART4 147 +#define CK_BUS_UART5 148 +#define CK_BUS_I2C1 149 +#define CK_BUS_I2C2 150 +#define CK_BUS_I2C3 151 +#define CK_BUS_I3C1 152 +#define CK_BUS_I3C2 153 +#define CK_BUS_I3C3 154 +#define CK_BUS_TIM1 155 +#define CK_BUS_TIM8 156 +#define CK_BUS_TIM15 157 +#define CK_BUS_TIM16 158 +#define CK_BUS_TIM17 159 +#define CK_BUS_SAI1 160 +#define CK_BUS_SAI2 161 +#define CK_BUS_SAI3 162 +#define CK_BUS_SAI4 163 +#define CK_BUS_USART1 164 +#define CK_BUS_USART6 165 +#define CK_BUS_UART7 166 +#define CK_BUS_FDCAN 167 +#define CK_BUS_SPI1 168 +#define CK_BUS_SPI4 169 +#define CK_BUS_SPI5 170 +#define CK_BUS_SPI6 171 +#define CK_BUS_BSEC 172 +#define CK_BUS_IWDG1 173 +#define CK_BUS_IWDG2 174 +#define CK_BUS_IWDG3 175 +#define CK_BUS_IWDG4 176 +#define CK_BUS_WWDG1 177 +#define CK_BUS_VREF 178 +#define CK_BUS_DTS 179 +#define CK_BUS_SERC 180 +#define CK_BUS_HDP 181 +#define CK_BUS_DDRPERFM 182 +#define CK_BUS_OTG 183 +#define CK_BUS_LTDC 184 +#define CK_BUS_CSI 185 +#define CK_BUS_DCMIPP 186 +#define CK_BUS_DDRC 187 +#define CK_BUS_DDRCFG 188 +#define CK_BUS_STGEN 189 +#define CK_SYSDBG 190 +#define CK_KER_TIM2 191 +#define CK_KER_TIM3 192 +#define CK_KER_TIM4 193 +#define CK_KER_TIM5 194 +#define CK_KER_TIM6 195 +#define CK_KER_TIM7 196 +#define CK_KER_TIM10 197 +#define CK_KER_TIM11 198 +#define CK_KER_TIM12 199 +#define CK_KER_TIM13 200 +#define CK_KER_TIM14 201 +#define CK_KER_TIM1 202 +#define CK_KER_TIM8 203 +#define CK_KER_TIM15 204 +#define CK_KER_TIM16 205 +#define CK_KER_TIM17 206 +#define CK_BUS_SYSRAM 207 +#define CK_BUS_RETRAM 208 +#define CK_BUS_OSPI1 209 +#define CK_BUS_OTFD1 210 +#define CK_BUS_SRAM1 211 +#define CK_BUS_SDMMC1 212 +#define CK_BUS_SDMMC2 213 +#define CK_BUS_SDMMC3 214 +#define CK_BUS_DDR 215 +#define CK_BUS_RISAF4 216 +#define CK_BUS_USBHOHCI 217 +#define CK_BUS_USBHEHCI 218 +#define CK_KER_LPTIM1 219 +#define CK_KER_LPTIM2 220 +#define CK_KER_USART2 221 +#define CK_KER_UART4 222 +#define CK_KER_USART3 223 +#define CK_KER_UART5 224 +#define CK_KER_SPI2 225 +#define CK_KER_SPI3 226 +#define CK_KER_SPDIFRX 227 +#define CK_KER_I2C1 228 +#define CK_KER_I2C2 229 +#define CK_KER_I3C1 230 +#define CK_KER_I3C2 231 +#define CK_KER_I2C3 232 +#define CK_KER_I3C3 233 +#define CK_KER_SPI1 234 +#define CK_KER_SPI4 235 +#define CK_KER_SPI5 236 +#define CK_KER_SPI6 237 +#define CK_KER_USART1 238 +#define CK_KER_USART6 239 +#define CK_KER_UART7 240 +#define CK_KER_MDF1 241 +#define CK_KER_SAI1 242 +#define CK_KER_SAI2 243 +#define CK_KER_SAI3 244 +#define CK_KER_SAI4 245 +#define CK_KER_FDCAN 246 +#define CK_KER_CSI 247 +#define CK_KER_CSITXESC 248 +#define CK_KER_CSIPHY 249 +#define CK_KER_STGEN 250 +#define CK_KER_USB2PHY2EN 251 +#define CK_KER_LPUART1 252 +#define CK_KER_LPTIM3 253 +#define CK_KER_LPTIM4 254 +#define CK_KER_LPTIM5 255 +#define CK_KER_TSDBG 256 +#define CK_KER_TPIU 257 +#define CK_BUS_ETR 258 +#define CK_BUS_SYSATB 259 +#define CK_KER_ADC1 260 +#define CK_KER_ADC2 261 +#define CK_KER_OSPI1 262 +#define CK_KER_FMC 263 +#define CK_KER_SDMMC1 264 +#define CK_KER_SDMMC2 265 +#define CK_KER_SDMMC3 266 +#define CK_KER_ETH1 267 +#define CK_KER_ETH2 268 +#define CK_KER_ETH1PTP 269 +#define CK_KER_ETH2PTP 270 +#define CK_KER_USB2PHY1 271 +#define CK_KER_USB2PHY2 272 +#define CK_MCO1 273 +#define CK_MCO2 274 +#define CK_KER_DTS 275 +#define CK_ETH1_RX 276 +#define CK_ETH1_TX 277 +#define CK_ETH1_MAC 278 +#define CK_ETH2_RX 279 +#define CK_ETH2_TX 280 +#define CK_ETH2_MAC 281 +#define CK_ETH1_STP 282 +#define CK_ETH2_STP 283 +#define CK_KER_LTDC 284 +#define HSE_DIV2_CK 285 +#define CK_DBGMCU 286 +#define CK_DAP 287 +#define CK_KER_ETR 288 +#define CK_KER_STM 289 + +#define CK_SCMI_ICN_HS_MCU 0 +#define CK_SCMI_ICN_SDMMC 1 +#define CK_SCMI_ICN_DDR 2 +#define CK_SCMI_ICN_DISPLAY 3 +#define CK_SCMI_ICN_HSL 4 +#define CK_SCMI_ICN_NIC 5 +#define CK_SCMI_FLEXGEN_07 7 +#define CK_SCMI_FLEXGEN_08 8 +#define CK_SCMI_FLEXGEN_09 9 +#define CK_SCMI_FLEXGEN_10 10 +#define CK_SCMI_FLEXGEN_11 11 +#define CK_SCMI_FLEXGEN_12 12 +#define CK_SCMI_FLEXGEN_13 13 +#define CK_SCMI_FLEXGEN_14 14 +#define CK_SCMI_FLEXGEN_15 15 +#define CK_SCMI_FLEXGEN_16 16 +#define CK_SCMI_FLEXGEN_17 17 +#define CK_SCMI_FLEXGEN_18 18 +#define CK_SCMI_FLEXGEN_19 19 +#define CK_SCMI_FLEXGEN_20 20 +#define CK_SCMI_FLEXGEN_21 21 +#define CK_SCMI_FLEXGEN_22 22 +#define CK_SCMI_FLEXGEN_23 23 +#define CK_SCMI_FLEXGEN_24 24 +#define CK_SCMI_FLEXGEN_25 25 +#define CK_SCMI_FLEXGEN_26 26 +#define CK_SCMI_FLEXGEN_27 27 +#define CK_SCMI_FLEXGEN_28 28 +#define CK_SCMI_FLEXGEN_29 29 +#define CK_SCMI_FLEXGEN_30 30 +#define CK_SCMI_FLEXGEN_31 31 +#define CK_SCMI_FLEXGEN_32 32 +#define CK_SCMI_FLEXGEN_33 33 +#define CK_SCMI_FLEXGEN_34 34 +#define CK_SCMI_FLEXGEN_35 35 +#define CK_SCMI_FLEXGEN_36 36 +#define CK_SCMI_FLEXGEN_37 37 +#define CK_SCMI_FLEXGEN_38 38 +#define CK_SCMI_FLEXGEN_39 39 +#define CK_SCMI_FLEXGEN_40 40 +#define CK_SCMI_FLEXGEN_41 41 +#define CK_SCMI_FLEXGEN_42 42 +#define CK_SCMI_FLEXGEN_43 43 +#define CK_SCMI_FLEXGEN_44 44 +#define CK_SCMI_FLEXGEN_45 45 +#define CK_SCMI_FLEXGEN_46 46 +#define CK_SCMI_FLEXGEN_47 47 +#define CK_SCMI_FLEXGEN_48 48 +#define CK_SCMI_FLEXGEN_49 49 +#define CK_SCMI_FLEXGEN_50 50 +#define CK_SCMI_FLEXGEN_51 51 +#define CK_SCMI_FLEXGEN_52 52 +#define CK_SCMI_FLEXGEN_53 53 +#define CK_SCMI_FLEXGEN_54 54 +#define CK_SCMI_FLEXGEN_55 55 +#define CK_SCMI_FLEXGEN_56 56 +#define CK_SCMI_FLEXGEN_57 57 +#define CK_SCMI_FLEXGEN_58 58 +#define CK_SCMI_FLEXGEN_59 59 +#define CK_SCMI_FLEXGEN_60 60 +#define CK_SCMI_FLEXGEN_61 61 +#define CK_SCMI_FLEXGEN_62 62 +#define CK_SCMI_FLEXGEN_63 63 +#define CK_SCMI_ICN_LS_MCU 64 +#define CK_SCMI_HSE 65 +#define CK_SCMI_LSE 66 +#define CK_SCMI_HSI 67 +#define CK_SCMI_LSI 68 +#define CK_SCMI_MSI 69 +#define CK_SCMI_HSE_DIV2 70 +#define CK_SCMI_CPU1 71 +#define CK_SCMI_SYSCPU1 72 +#define CK_SCMI_PLL2 73 +#define CK_SCMI_RTC 74 +#define CK_SCMI_RTCCK 75 +#define CK_SCMI_ICN_APB1 76 +#define CK_SCMI_ICN_APB2 77 +#define CK_SCMI_ICN_APB3 78 +#define CK_SCMI_ICN_APB4 79 +#define CK_SCMI_ICN_APB5 80 +#define CK_SCMI_ICN_APBDBG 81 +#define CK_SCMI_TIMG1 82 +#define CK_SCMI_TIMG2 83 +#define CK_SCMI_BKPSRAM 84 +#define CK_SCMI_BSEC 85 +#define CK_SCMI_BUS_ETR 86 +#define CK_SCMI_FMC 87 +#define CK_SCMI_GPIOA 88 +#define CK_SCMI_GPIOB 89 +#define CK_SCMI_GPIOC 90 +#define CK_SCMI_GPIOD 91 +#define CK_SCMI_GPIOE 92 +#define CK_SCMI_GPIOF 93 +#define CK_SCMI_GPIOG 94 +#define CK_SCMI_GPIOH 95 +#define CK_SCMI_GPIOI 96 +#define CK_SCMI_GPIOZ 97 +#define CK_SCMI_HPDMA1 98 +#define CK_SCMI_HPDMA2 99 +#define CK_SCMI_HPDMA3 100 +#define CK_SCMI_IPCC1 101 +#define CK_SCMI_RETRAM 102 +#define CK_SCMI_SRAM1 103 +#define CK_SCMI_SYSRAM 104 +#define CK_SCMI_OSPI1 105 +#define CK_SCMI_TPIU 106 +#define CK_SCMI_SYSDBG 107 +#define CK_SCMI_SYSATB 108 +#define CK_SCMI_TSDBG 109 +#define CK_SCMI_BUS_STM 110 +#define CK_SCMI_KER_STM 111 +#define CK_SCMI_KER_ETR 112 + +#endif /* _DT_BINDINGS_STM32MP21_CLKS_H_ */ diff --git a/include/dt-bindings/reset/st,stm32mp21-rcc.h b/include/dt-bindings/reset/st,stm32mp21-rcc.h new file mode 100644 index 000000000000..6463bd73d025 --- /dev/null +++ b/include/dt-bindings/reset/st,stm32mp21-rcc.h @@ -0,0 +1,138 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */ +/* + * Copyright (C) STMicroelectronics 2025 - All Rights Reserved + * Author: Gabriel Fernandez + */ + +#ifndef _DT_BINDINGS_STM32MP21_RESET_H_ +#define _DT_BINDINGS_STM32MP21_RESET_H_ + +#define TIM1_R 0 +#define TIM2_R 1 +#define TIM3_R 2 +#define TIM4_R 3 +#define TIM5_R 4 +#define TIM6_R 5 +#define TIM7_R 6 +#define TIM8_R 7 +#define TIM10_R 8 +#define TIM11_R 9 +#define TIM12_R 10 +#define TIM13_R 11 +#define TIM14_R 12 +#define TIM15_R 13 +#define TIM16_R 14 +#define TIM17_R 15 +#define LPTIM1_R 16 +#define LPTIM2_R 17 +#define LPTIM3_R 18 +#define LPTIM4_R 19 +#define LPTIM5_R 20 +#define SPI1_R 21 +#define SPI2_R 22 +#define SPI3_R 23 +#define SPI4_R 24 +#define SPI5_R 25 +#define SPI6_R 26 +#define SPDIFRX_R 27 +#define USART1_R 28 +#define USART2_R 29 +#define USART3_R 30 +#define UART4_R 31 +#define UART5_R 32 +#define USART6_R 33 +#define UART7_R 34 +#define LPUART1_R 35 +#define I2C1_R 36 +#define I2C2_R 37 +#define I2C3_R 38 +#define SAI1_R 39 +#define SAI2_R 40 +#define SAI3_R 41 +#define SAI4_R 42 +#define MDF1_R 43 +#define FDCAN_R 44 +#define HDP_R 45 +#define ADC1_R 46 +#define ADC2_R 47 +#define ETH1_R 48 +#define ETH2_R 49 +#define USBH_R 50 +#define USB2PHY1_R 51 +#define USB2PHY2_R 52 +#define SDMMC1_R 53 +#define SDMMC1DLL_R 54 +#define SDMMC2_R 55 +#define SDMMC2DLL_R 56 +#define SDMMC3_R 57 +#define SDMMC3DLL_R 58 +#define LTDC_R 59 +#define CSI_R 60 +#define DCMIPP_R 61 +#define DCMIPSSI_R 62 +#define WWDG1_R 63 +#define VREF_R 64 +#define DTS_R 65 +#define CRC_R 66 +#define SERC_R 67 +#define I3C1_R 68 +#define I3C2_R 69 +#define I3C3_R 70 +#define IWDG2_KER_R 71 +#define IWDG4_KER_R 72 +#define RNG1_R 73 +#define RNG2_R 74 +#define PKA_R 75 +#define SAES_R 76 +#define HASH1_R 77 +#define HASH2_R 78 +#define CRYP1_R 79 +#define CRYP2_R 80 +#define OSPI1_R 81 +#define OSPI1DLL_R 82 +#define OTG_R 83 +#define FMC_R 84 +#define DBG_R 85 +#define GPIOA_R 86 +#define GPIOB_R 87 +#define GPIOC_R 88 +#define GPIOD_R 89 +#define GPIOE_R 90 +#define GPIOF_R 91 +#define GPIOG_R 92 +#define GPIOH_R 93 +#define GPIOI_R 94 +#define GPIOZ_R 95 +#define HPDMA1_R 96 +#define HPDMA2_R 97 +#define HPDMA3_R 98 +#define IPCC1_R 99 +#define C2_HOLDBOOT_R 100 +#define C1_HOLDBOOT_R 101 +#define C1_R 102 +#define C1P1POR_R 103 +#define C1P1_R 104 +#define C2_R 105 +#define SYS_R 106 +#define VSW_R 107 +#define C1MS_R 108 +#define DDRCP_R 109 +#define DDRCAPB_R 110 +#define DDRPHYCAPB_R 111 +#define DDRCFG_R 112 +#define DDR_R 113 +#define DDRPERFM_R 114 +#define IWDG1_SYS_R 116 +#define IWDG2_SYS_R 117 +#define IWDG3_SYS_R 118 +#define IWDG4_SYS_R 119 + +#define RST_SCMI_C1_R 0 +#define RST_SCMI_C2_R 1 +#define RST_SCMI_C1_HOLDBOOT_R 2 +#define RST_SCMI_C2_HOLDBOOT_R 3 +#define RST_SCMI_FMC 4 +#define RST_SCMI_OSPI1 5 +#define RST_SCMI_OSPI1DLL 6 + +#endif /* _DT_BINDINGS_STM32MP21_RESET_H_ */ -- cgit v1.2.3 From 793e6b74806eea39da26f2fb7e4b640608d9598d Mon Sep 17 00:00:00 2001 From: Yao Zi Date: Fri, 19 Sep 2025 14:26:42 +0000 Subject: dt-bindings: clock: loongson2: Add Loongson-2K0300 compatible Document the clock controller shipped in Loongson-2K0300 SoC, which generates various clock signals for SoC peripherals. Differing from previous generations of SoCs, LS2K0300 requires a 120MHz external clock input. Signed-off-by: Yao Zi Reviewed-by: Krzysztof Kozlowski Reviewed-by: Yanteng Si Signed-off-by: Stephen Boyd --- include/dt-bindings/clock/loongson,ls2k-clk.h | 36 +++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/loongson,ls2k-clk.h b/include/dt-bindings/clock/loongson,ls2k-clk.h index 4279ba595f1e..8cbb86b2cf1e 100644 --- a/include/dt-bindings/clock/loongson,ls2k-clk.h +++ b/include/dt-bindings/clock/loongson,ls2k-clk.h @@ -43,4 +43,40 @@ #define LOONGSON2_I2S_CLK 33 #define LOONGSON2_MISC_CLK 34 +#define LS2K0300_CLK_STABLE 0 +#define LS2K0300_NODE_PLL 1 +#define LS2K0300_DDR_PLL 2 +#define LS2K0300_PIX_PLL 3 +#define LS2K0300_CLK_THSENS 4 +#define LS2K0300_CLK_NODE_DIV 5 +#define LS2K0300_CLK_NODE_PLL_GATE 6 +#define LS2K0300_CLK_NODE_SCALE 7 +#define LS2K0300_CLK_NODE_GATE 8 +#define LS2K0300_CLK_GMAC_DIV 9 +#define LS2K0300_CLK_GMAC_GATE 10 +#define LS2K0300_CLK_I2S_DIV 11 +#define LS2K0300_CLK_I2S_SCALE 12 +#define LS2K0300_CLK_I2S_GATE 13 +#define LS2K0300_CLK_DDR_DIV 14 +#define LS2K0300_CLK_DDR_GATE 15 +#define LS2K0300_CLK_NET_DIV 16 +#define LS2K0300_CLK_NET_GATE 17 +#define LS2K0300_CLK_DEV_DIV 18 +#define LS2K0300_CLK_DEV_GATE 19 +#define LS2K0300_CLK_PIX_DIV 20 +#define LS2K0300_CLK_PIX_PLL_GATE 21 +#define LS2K0300_CLK_PIX_SCALE 22 +#define LS2K0300_CLK_PIX_GATE 23 +#define LS2K0300_CLK_GMACBP_DIV 24 +#define LS2K0300_CLK_GMACBP_GATE 25 +#define LS2K0300_CLK_USB_SCALE 26 +#define LS2K0300_CLK_USB_GATE 27 +#define LS2K0300_CLK_APB_SCALE 28 +#define LS2K0300_CLK_APB_GATE 29 +#define LS2K0300_CLK_BOOT_SCALE 30 +#define LS2K0300_CLK_BOOT_GATE 31 +#define LS2K0300_CLK_SDIO_SCALE 32 +#define LS2K0300_CLK_SDIO_GATE 33 +#define LS2K0300_CLK_GMAC_IN 34 + #endif -- cgit v1.2.3 From 8ea304cff08f0605b425a3fb494ad47175214ea3 Mon Sep 17 00:00:00 2001 From: Ryan Chen Date: Wed, 17 Sep 2025 10:05:37 +0800 Subject: dt-bindings: clock: ast2700: modify soc0/1 clock define -add SOC0_CLK_AHBMUX: add SOC0_CLK_AHBMUX for ahb clock source divide. mpll-> ahb_mux -> div_table -> clk_ahb hpll-> -new add clock: SOC0_CLK_MPHYSRC: UFS MPHY clock source. SOC0_CLK_U2PHY_REFCLKSRC: USB2.0 phy clock reference source. SOC1_CLK_I3C: I3C clock source. Signed-off-by: Ryan Chen Acked-by: Krzysztof Kozlowski Signed-off-by: Stephen Boyd --- include/dt-bindings/clock/aspeed,ast2700-scu.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/aspeed,ast2700-scu.h b/include/dt-bindings/clock/aspeed,ast2700-scu.h index 63021af3caf5..bacf712e8e04 100644 --- a/include/dt-bindings/clock/aspeed,ast2700-scu.h +++ b/include/dt-bindings/clock/aspeed,ast2700-scu.h @@ -68,6 +68,9 @@ #define SCU0_CLK_GATE_UFSCLK 53 #define SCU0_CLK_GATE_EMMCCLK 54 #define SCU0_CLK_GATE_RVAS1CLK 55 +#define SCU0_CLK_U2PHY_REFCLKSRC 56 +#define SCU0_CLK_AHBMUX 57 +#define SCU0_CLK_MPHYSRC 58 /* SOC1 clk */ #define SCU1_CLKIN 0 @@ -159,5 +162,6 @@ #define SCU1_CLK_GATE_PORTCUSB2CLK 84 #define SCU1_CLK_GATE_PORTDUSB2CLK 85 #define SCU1_CLK_GATE_LTPI1TXCLK 86 +#define SCU1_CLK_I3C 87 #endif -- cgit v1.2.3 From 1e338f4d99e6814ede16bad1db1cc463aad8032c Mon Sep 17 00:00:00 2001 From: Sabyrzhan Tasbolatov Date: Sun, 10 Aug 2025 17:57:45 +0500 Subject: kasan: introduce ARCH_DEFER_KASAN and unify static key across modes Patch series "kasan: unify kasan_enabled() and remove arch-specific implementations", v6. This patch series addresses the fragmentation in KASAN initialization across architectures by introducing a unified approach that eliminates duplicate static keys and arch-specific kasan_arch_is_ready() implementations. The core issue is that different architectures have inconsistent approaches to KASAN readiness tracking: - PowerPC, LoongArch, and UML arch, each implement own kasan_arch_is_ready() - Only HW_TAGS mode had a unified static key (kasan_flag_enabled) - Generic and SW_TAGS modes relied on arch-specific solutions or always-on behavior This patch (of 2): Introduce CONFIG_ARCH_DEFER_KASAN to identify architectures [1] that need to defer KASAN initialization until shadow memory is properly set up, and unify the static key infrastructure across all KASAN modes. [1] PowerPC, UML, LoongArch selects ARCH_DEFER_KASAN. The core issue is that different architectures haveinconsistent approaches to KASAN readiness tracking: - PowerPC, LoongArch, and UML arch, each implement own kasan_arch_is_ready() - Only HW_TAGS mode had a unified static key (kasan_flag_enabled) - Generic and SW_TAGS modes relied on arch-specific solutions or always-on behavior This patch addresses the fragmentation in KASAN initialization across architectures by introducing a unified approach that eliminates duplicate static keys and arch-specific kasan_arch_is_ready() implementations. Let's replace kasan_arch_is_ready() with existing kasan_enabled() check, which examines the static key being enabled if arch selects ARCH_DEFER_KASAN or has HW_TAGS mode support. For other arch, kasan_enabled() checks the enablement during compile time. Now KASAN users can use a single kasan_enabled() check everywhere. Link: https://lkml.kernel.org/r/20250810125746.1105476-1-snovitoll@gmail.com Link: https://lkml.kernel.org/r/20250810125746.1105476-2-snovitoll@gmail.com Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217049 Signed-off-by: Sabyrzhan Tasbolatov Reviewed-by: Christophe Leroy Reviewed-by: Ritesh Harjani (IBM) #powerpc Cc: Alexander Gordeev Cc: Alexander Potapenko Cc: Alexandre Ghiti Cc: Alexandre Ghiti Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Baoquan He Cc: David Gow Cc: Dmitriy Vyukov Cc: Heiko Carstens Cc: Huacai Chen Cc: Marco Elver Cc: Qing Zhang Cc: Sabyrzhan Tasbolatov Cc: Vincenzo Frascino Signed-off-by: Andrew Morton --- include/linux/kasan-enabled.h | 32 +++++++++++++++++++++++--------- include/linux/kasan.h | 6 ++++++ 2 files changed, 29 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/kasan-enabled.h b/include/linux/kasan-enabled.h index 6f612d69ea0c..9eca967d8526 100644 --- a/include/linux/kasan-enabled.h +++ b/include/linux/kasan-enabled.h @@ -4,32 +4,46 @@ #include -#ifdef CONFIG_KASAN_HW_TAGS - +#if defined(CONFIG_ARCH_DEFER_KASAN) || defined(CONFIG_KASAN_HW_TAGS) +/* + * Global runtime flag for KASAN modes that need runtime control. + * Used by ARCH_DEFER_KASAN architectures and HW_TAGS mode. + */ DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled); +/* + * Runtime control for shadow memory initialization or HW_TAGS mode. + * Uses static key for architectures that need deferred KASAN or HW_TAGS. + */ static __always_inline bool kasan_enabled(void) { return static_branch_likely(&kasan_flag_enabled); } -static inline bool kasan_hw_tags_enabled(void) +static inline void kasan_enable(void) { - return kasan_enabled(); + static_branch_enable(&kasan_flag_enabled); } - -#else /* CONFIG_KASAN_HW_TAGS */ - -static inline bool kasan_enabled(void) +#else +/* For architectures that can enable KASAN early, use compile-time check. */ +static __always_inline bool kasan_enabled(void) { return IS_ENABLED(CONFIG_KASAN); } +static inline void kasan_enable(void) {} +#endif /* CONFIG_ARCH_DEFER_KASAN || CONFIG_KASAN_HW_TAGS */ + +#ifdef CONFIG_KASAN_HW_TAGS +static inline bool kasan_hw_tags_enabled(void) +{ + return kasan_enabled(); +} +#else static inline bool kasan_hw_tags_enabled(void) { return false; } - #endif /* CONFIG_KASAN_HW_TAGS */ #endif /* LINUX_KASAN_ENABLED_H */ diff --git a/include/linux/kasan.h b/include/linux/kasan.h index fe5ce9215821..b509a8d36949 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -543,6 +543,12 @@ void kasan_report_async(void); #endif /* CONFIG_KASAN_HW_TAGS */ +#ifdef CONFIG_KASAN_GENERIC +void __init kasan_init_generic(void); +#else +static inline void kasan_init_generic(void) { } +#endif + #ifdef CONFIG_KASAN_SW_TAGS void __init kasan_init_sw_tags(void); #else -- cgit v1.2.3 From 2ccd9fecd9163f168761d4398564c81554f636ef Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 29 Aug 2025 17:15:27 +0100 Subject: mm: remove unused zpool layer With zswap using zsmalloc directly, there are no more in-tree users of this code. Remove it. With zpool gone, zsmalloc is now always a simple dependency and no longer something the user needs to configure. Hide CONFIG_ZSMALLOC from the user and have zswap and zram pull it in as needed. Link: https://lkml.kernel.org/r/20250829162212.208258-3-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: SeongJae Park Acked-by: Yosry Ahmed Cc: Chengming Zhou Cc: Nhat Pham Cc: Vitaly Wool Signed-off-by: Andrew Morton --- include/linux/zpool.h | 86 --------------------------------------------------- 1 file changed, 86 deletions(-) delete mode 100644 include/linux/zpool.h (limited to 'include') diff --git a/include/linux/zpool.h b/include/linux/zpool.h deleted file mode 100644 index 369ef068fad8..000000000000 --- a/include/linux/zpool.h +++ /dev/null @@ -1,86 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * zpool memory storage api - * - * Copyright (C) 2014 Dan Streetman - * - * This is a common frontend for the zswap compressed memory storage - * implementations. - */ - -#ifndef _ZPOOL_H_ -#define _ZPOOL_H_ - -struct zpool; - -bool zpool_has_pool(char *type); - -struct zpool *zpool_create_pool(const char *type, const char *name, gfp_t gfp); - -const char *zpool_get_type(struct zpool *pool); - -void zpool_destroy_pool(struct zpool *pool); - -int zpool_malloc(struct zpool *pool, size_t size, gfp_t gfp, - unsigned long *handle, const int nid); - -void zpool_free(struct zpool *pool, unsigned long handle); - -void *zpool_obj_read_begin(struct zpool *zpool, unsigned long handle, - void *local_copy); - -void zpool_obj_read_end(struct zpool *zpool, unsigned long handle, - void *handle_mem); - -void zpool_obj_write(struct zpool *zpool, unsigned long handle, - void *handle_mem, size_t mem_len); - -u64 zpool_get_total_pages(struct zpool *pool); - - -/** - * struct zpool_driver - driver implementation for zpool - * @type: name of the driver. - * @list: entry in the list of zpool drivers. - * @create: create a new pool. - * @destroy: destroy a pool. - * @malloc: allocate mem from a pool. - * @free: free mem from a pool. - * @sleep_mapped: whether zpool driver can sleep during map. - * @map: map a handle. - * @unmap: unmap a handle. - * @total_size: get total size of a pool. - * - * This is created by a zpool implementation and registered - * with zpool. - */ -struct zpool_driver { - char *type; - struct module *owner; - atomic_t refcount; - struct list_head list; - - void *(*create)(const char *name, gfp_t gfp); - void (*destroy)(void *pool); - - int (*malloc)(void *pool, size_t size, gfp_t gfp, - unsigned long *handle, const int nid); - void (*free)(void *pool, unsigned long handle); - - void *(*obj_read_begin)(void *pool, unsigned long handle, - void *local_copy); - void (*obj_read_end)(void *pool, unsigned long handle, - void *handle_mem); - void (*obj_write)(void *pool, unsigned long handle, - void *handle_mem, size_t mem_len); - - u64 (*total_pages)(void *pool); -}; - -void zpool_register_driver(struct zpool_driver *driver); - -int zpool_unregister_driver(struct zpool_driver *driver); - -bool zpool_can_sleep_mapped(struct zpool *pool); - -#endif -- cgit v1.2.3 From 0bf2edf041dcb0b304a8dbda8c699771d5a245d2 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 1 Sep 2025 17:03:27 +0200 Subject: mm/page_alloc: reject unreasonable folio/compound page sizes in alloc_contig_range_noprof() Let's reject them early, which in turn makes folio_alloc_gigantic() reject them properly. To avoid converting from order to nr_pages, let's just add MAX_FOLIO_ORDER and calculate MAX_FOLIO_NR_PAGES based on that. While at it, let's just make the order a "const unsigned order". Link: https://lkml.kernel.org/r/20250901150359.867252-7-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Zi Yan Acked-by: SeongJae Park Reviewed-by: Wei Yang Reviewed-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Signed-off-by: Andrew Morton --- include/linux/mm.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 00c8a54127d3..77737cbf2216 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2055,11 +2055,13 @@ static inline long folio_nr_pages(const struct folio *folio) /* Only hugetlbfs can allocate folios larger than MAX_ORDER */ #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE -#define MAX_FOLIO_NR_PAGES (1UL << PUD_ORDER) +#define MAX_FOLIO_ORDER PUD_ORDER #else -#define MAX_FOLIO_NR_PAGES MAX_ORDER_NR_PAGES +#define MAX_FOLIO_ORDER MAX_PAGE_ORDER #endif +#define MAX_FOLIO_NR_PAGES (1UL << MAX_FOLIO_ORDER) + /* * compound_nr() returns the number of pages in this potentially compound * page. compound_nr() can be called on a tail page, and is defined to -- cgit v1.2.3 From 4751c39eee0c3fcc742aa7d7242ce2b78faa3606 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 1 Sep 2025 17:03:32 +0200 Subject: mm: limit folio/compound page sizes in problematic kernel configs Let's limit the maximum folio size in problematic kernel config where the memmap is allocated per memory section (SPARSEMEM without SPARSEMEM_VMEMMAP) to a single memory section. Currently, only a single architectures supports ARCH_HAS_GIGANTIC_PAGE but not SPARSEMEM_VMEMMAP: sh. Fortunately, the biggest hugetlb size sh supports is 64 MiB (HUGETLB_PAGE_SIZE_64MB) and the section size is at least 64 MiB (SECTION_SIZE_BITS == 26), so their use case is not degraded. As folios and memory sections are naturally aligned to their order-2 size in memory, consequently a single folio can no longer span multiple memory sections on these problematic kernel configs. nth_page() is no longer required when operating within a single compound page / folio. Link: https://lkml.kernel.org/r/20250901150359.867252-12-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Zi Yan Acked-by: Mike Rapoport (Microsoft) Reviewed-by: Wei Yang Reviewed-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Signed-off-by: Andrew Morton --- include/linux/mm.h | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 77737cbf2216..2dee79fa2efc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2053,11 +2053,25 @@ static inline long folio_nr_pages(const struct folio *folio) return folio_large_nr_pages(folio); } -/* Only hugetlbfs can allocate folios larger than MAX_ORDER */ -#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE -#define MAX_FOLIO_ORDER PUD_ORDER -#else +#if !defined(CONFIG_ARCH_HAS_GIGANTIC_PAGE) +/* + * We don't expect any folios that exceed buddy sizes (and consequently + * memory sections). + */ #define MAX_FOLIO_ORDER MAX_PAGE_ORDER +#elif defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) +/* + * Only pages within a single memory section are guaranteed to be + * contiguous. By limiting folios to a single memory section, all folio + * pages are guaranteed to be contiguous. + */ +#define MAX_FOLIO_ORDER PFN_SECTION_SHIFT +#else +/* + * There is no real limit on the folio size. We limit them to the maximum we + * currently expect (e.g., hugetlb, dax). + */ +#define MAX_FOLIO_ORDER PUD_ORDER #endif #define MAX_FOLIO_NR_PAGES (1UL << MAX_FOLIO_ORDER) -- cgit v1.2.3 From 73b3294b1152e94c1971a735b8db8c7503fd97a1 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 1 Sep 2025 17:03:33 +0200 Subject: mm: simplify folio_page() and folio_page_idx() Now that a single folio/compound page can no longer span memory sections in problematic kernel configurations, we can stop using nth_page() in folio_page() and folio_page_idx(). While at it, turn both macros into static inline functions and add kernel doc for folio_page_idx(). Link: https://lkml.kernel.org/r/20250901150359.867252-13-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Zi Yan Reviewed-by: Wei Yang Reviewed-by: Lorenzo Stoakes Signed-off-by: Andrew Morton --- include/linux/mm.h | 16 ++++++++++++++-- include/linux/page-flags.h | 5 ++++- 2 files changed, 18 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 2dee79fa2efc..f6880e3225c5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -210,10 +210,8 @@ extern unsigned long sysctl_admin_reserve_kbytes; #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) -#define folio_page_idx(folio, p) (page_to_pfn(p) - folio_pfn(folio)) #else #define nth_page(page,n) ((page) + (n)) -#define folio_page_idx(folio, p) ((p) - &(folio)->page) #endif /* to align the pointer to the (next) page boundary */ @@ -225,6 +223,20 @@ extern unsigned long sysctl_admin_reserve_kbytes; /* test whether an address (unsigned long or pointer) is aligned to PAGE_SIZE */ #define PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)(addr), PAGE_SIZE) +/** + * folio_page_idx - Return the number of a page in a folio. + * @folio: The folio. + * @page: The folio page. + * + * This function expects that the page is actually part of the folio. + * The returned number is relative to the start of the folio. + */ +static inline unsigned long folio_page_idx(const struct folio *folio, + const struct page *page) +{ + return page - &folio->page; +} + static inline struct folio *lru_to_folio(struct list_head *head) { return list_entry((head)->prev, struct folio, lru); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index d53a86e68c89..a88b61eec3f8 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -316,7 +316,10 @@ static __always_inline unsigned long _compound_head(const struct page *page) * check that the page number lies within @folio; the caller is presumed * to have a reference to the page. */ -#define folio_page(folio, n) nth_page(&(folio)->page, n) +static inline struct page *folio_page(struct folio *folio, unsigned long n) +{ + return &folio->page + n; +} static __always_inline int PageTail(const struct page *page) { -- cgit v1.2.3 From 6972706f95926838f9bd3ec2b2393c034bdb85ba Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 1 Sep 2025 17:03:43 +0200 Subject: mm/cma: refuse handing out non-contiguous page ranges Let's disallow handing out PFN ranges with non-contiguous pages, so we can remove the nth-page usage in __cma_alloc(), and so any callers don't have to worry about that either when wanting to blindly iterate pages. This is really only a problem in configs with SPARSEMEM but without SPARSEMEM_VMEMMAP, and only when we would cross memory sections in some cases. Will this cause harm? Probably not, because it's mostly 32bit that does not support SPARSEMEM_VMEMMAP. If this ever becomes a problem we could look into allocating the memmap for the memory sections spanned by a single CMA region in one go from memblock. [david@redhat.com: we can have NUMMU configs with SPARSEMEM enabled] Link: https://lkml.kernel.org/r/6ec933b1-b3f7-41c0-95d8-e518bb87375e@redhat.com Link: https://lkml.kernel.org/r/20250901150359.867252-23-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Alexandru Elisei Reviewed-by: Lorenzo Stoakes Signed-off-by: Andrew Morton --- include/linux/mm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index f6880e3225c5..2ca1eb2db63e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -209,9 +209,15 @@ extern unsigned long sysctl_user_reserve_kbytes; extern unsigned long sysctl_admin_reserve_kbytes; #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) +bool page_range_contiguous(const struct page *page, unsigned long nr_pages); #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) #else #define nth_page(page,n) ((page) + (n)) +static inline bool page_range_contiguous(const struct page *page, + unsigned long nr_pages) +{ + return true; +} #endif /* to align the pointer to the (next) page boundary */ -- cgit v1.2.3 From 80e7bb74d4ff24725f0ddb1c72d8de45a3d975f6 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 1 Sep 2025 17:03:45 +0200 Subject: scatterlist: disallow non-contigous page ranges in a single SG entry The expectation is that there is currently no user that would pass in non-contigous page ranges: no allocator, not even VMA, will hand these out. The only problematic part would be if someone would provide a range obtained directly from memblock, or manually merge problematic ranges. If we find such cases, we should fix them to create separate SG entries. Let's check in sg_set_page() that this is really the case. No need to check in sg_set_folio(), as pages in a folio are guaranteed to be contiguous. As sg_set_page() gets inlined into modules, we have to export the page_range_contiguous() helper -- use EXPORT_SYMBOL, there is nothing special about this helper such that we would want to enforce GPL-only modules. We can now drop the nth_page() usage in sg_page_iter_page(). Link: https://lkml.kernel.org/r/20250901150359.867252-25-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Marek Szyprowski Reviewed-by: Lorenzo Stoakes Signed-off-by: Andrew Morton --- include/linux/scatterlist.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 6f8a4965f9b9..29f6ceb98d74 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -158,6 +158,7 @@ static inline void sg_assign_page(struct scatterlist *sg, struct page *page) static inline void sg_set_page(struct scatterlist *sg, struct page *page, unsigned int len, unsigned int offset) { + VM_WARN_ON_ONCE(!page_range_contiguous(page, ALIGN(len + offset, PAGE_SIZE) / PAGE_SIZE)); sg_assign_page(sg, page); sg->offset = offset; sg->length = len; @@ -600,7 +601,7 @@ void __sg_page_iter_start(struct sg_page_iter *piter, */ static inline struct page *sg_page_iter_page(struct sg_page_iter *piter) { - return nth_page(sg_page(piter->sg), piter->sg_pgoffset); + return sg_page(piter->sg) + piter->sg_pgoffset; } /** -- cgit v1.2.3 From ce00897b94bc5c62fab962625efcf1ab824d3688 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 1 Sep 2025 17:03:54 +0200 Subject: crypto: remove nth_page() usage within SG entry It's no longer required to use nth_page() when iterating pages within a single SG entry, so let's drop the nth_page() usage. Link: https://lkml.kernel.org/r/20250901150359.867252-34-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Lorenzo Stoakes Acked-by: Herbert Xu Cc: "David S. Miller" Signed-off-by: Andrew Morton --- include/crypto/scatterwalk.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/crypto/scatterwalk.h b/include/crypto/scatterwalk.h index 15ab743f68c8..83d14376ff2b 100644 --- a/include/crypto/scatterwalk.h +++ b/include/crypto/scatterwalk.h @@ -159,7 +159,7 @@ static inline void scatterwalk_map(struct scatter_walk *walk) if (IS_ENABLED(CONFIG_HIGHMEM)) { struct page *page; - page = nth_page(base_page, offset >> PAGE_SHIFT); + page = base_page + (offset >> PAGE_SHIFT); offset = offset_in_page(offset); addr = kmap_local_page(page) + offset; } else { @@ -259,7 +259,7 @@ static inline void scatterwalk_done_dst(struct scatter_walk *walk, end += (offset_in_page(offset) + offset_in_page(nbytes) + PAGE_SIZE - 1) >> PAGE_SHIFT; for (i = start; i < end; i++) - flush_dcache_page(nth_page(base_page, i)); + flush_dcache_page(base_page + i); } scatterwalk_advance(walk, nbytes); } -- cgit v1.2.3 From d5170ce4d71b3843613ee1840bca50ad71c3671e Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 1 Sep 2025 17:03:57 +0200 Subject: block: update comment of "struct bio_vec" regarding nth_page() Ever since commit 858c708d9efb ("block: move the bi_size update out of __bio_try_merge_page"), page_is_mergeable() no longer exists, and the logic in bvec_try_merge_page() is now a simple page pointer comparison. Link: https://lkml.kernel.org/r/20250901150359.867252-37-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Lorenzo Stoakes Signed-off-by: Andrew Morton --- include/linux/bvec.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 0a80e1f9aa20..3fc0efa0825b 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -22,11 +22,8 @@ struct page; * @bv_len: Number of bytes in the address range. * @bv_offset: Start of the address range relative to the start of @bv_page. * - * The following holds for a bvec if n * PAGE_SIZE < bv_offset + bv_len: - * - * nth_page(@bv_page, n) == @bv_page + n - * - * This holds because page_is_mergeable() checks the above property. + * All pages within a bio_vec starting from @bv_page are contiguous and + * can simply be iterated (see bvec_advance()). */ struct bio_vec { struct page *bv_page; -- cgit v1.2.3 From 84efbefa26df36a845a9210ee962aa6866f99bb7 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 1 Sep 2025 17:03:58 +0200 Subject: mm: remove nth_page() Now that all users are gone, let's remove it. Link: https://lkml.kernel.org/r/20250901150359.867252-38-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Lorenzo Stoakes Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 2ca1eb2db63e..b26ca8b2162d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -210,9 +210,7 @@ extern unsigned long sysctl_admin_reserve_kbytes; #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) bool page_range_contiguous(const struct page *page, unsigned long nr_pages); -#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) #else -#define nth_page(page,n) ((page) + (n)) static inline bool page_range_contiguous(const struct page *page, unsigned long nr_pages) { -- cgit v1.2.3 From 4a25f995bd59843a898b531bb3e472d710ef9439 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 10 Sep 2025 21:39:56 +0800 Subject: mm: hugetlb: directly pass order when allocate a hugetlb folio Use order instead of struct hstate to remove huge_page_order() call from all hugetlb folio allocation, also order_is_gigantic() is added to check whether it is a gigantic order. Link: https://lkml.kernel.org/r/20250910133958.301467-4-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Acked-by: Oscar Salvador Reviewed-by: Sidhartha Kumar Reviewed-by: Jane Chu Reviewed-by: Zi Yan Cc: Brendan Jackman Cc: David Hildenbrand Cc: Johannes Weiner Cc: Muchun Song Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 526d27e88b3b..8e63e46b8e1f 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -788,9 +788,14 @@ static inline unsigned huge_page_shift(struct hstate *h) return h->order + PAGE_SHIFT; } +static inline bool order_is_gigantic(unsigned int order) +{ + return order > MAX_PAGE_ORDER; +} + static inline bool hstate_is_gigantic(struct hstate *h) { - return huge_page_order(h) > MAX_PAGE_ORDER; + return order_is_gigantic(huge_page_order(h)); } static inline unsigned int pages_per_huge_page(const struct hstate *h) -- cgit v1.2.3 From 8eccb066f28747e966bda716cb90dbca13b78032 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 1 Sep 2025 22:50:10 +0200 Subject: mm: constify shmem related test functions for improved const-correctness Patch series "mm: establish const-correctness for pointer parameters", v6. This series is to improved const-correctness in the low-level memory-management subsystem, which provides a basis for further constification further up the call stack (e.g. filesystems). I started this work when I tried to constify the Ceph filesystem code, but found that to be impossible because many "mm" functions accept non-const pointers, even though they modify nothing. This patch (of 12): We select certain test functions which either invoke each other, functions that are already const-ified, or no further functions. It is therefore relatively trivial to const-ify them, which provides a basis for further const-ification further up the call stack. Link: https://lkml.kernel.org/r/20250901205021.3573313-1-max.kellermann@ionos.com Link: https://lkml.kernel.org/r/20250901205021.3573313-2-max.kellermann@ionos.com Signed-off-by: Max Kellermann Reviewed-by: Vishal Moola (Oracle) Reviewed-by: Lorenzo Stoakes Acked-by: David Hildenbrand Acked-by: Vlastimil Babka Acked-by: Mike Rapoport (Microsoft) Acked-by: Shakeel Butt Cc: Alexander Gordeev Cc: Al Viro Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Axel Rasmussen Cc: Baolin Wang Cc: Borislav Betkov Cc: Christian Borntraeger Cc: Christian Brauner Cc: Christian Zankel Cc: David Rientjes Cc: David S. Miller Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Helge Deller Cc: "H. Peter Anvin" Cc: Hugh Dickins Cc: Ingo Molnar Cc: James Bottomley Cc: Jan Kara Cc: Jocelyn Falempe Cc: Liam Howlett Cc: Mark Brown Cc: Matthew Wilcox (Oracle) Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Hocko Cc: "Nysal Jan K.A" Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Russel King Cc: Suren Baghdasaryan Cc: Sven Schnelle Cc: Thomas Gleinxer Cc: Thomas Huth Cc: Vasily Gorbik Cc: Wei Xu Cc: Yuanchu Xie Signed-off-by: Andrew Morton --- include/linux/mm.h | 8 ++++---- include/linux/shmem_fs.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index b26ca8b2162d..45a47b555499 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -995,11 +995,11 @@ static inline void vma_iter_set(struct vma_iterator *vmi, unsigned long addr) * The vma_is_shmem is not inline because it is used only by slow * paths in userfault. */ -bool vma_is_shmem(struct vm_area_struct *vma); -bool vma_is_anon_shmem(struct vm_area_struct *vma); +bool vma_is_shmem(const struct vm_area_struct *vma); +bool vma_is_anon_shmem(const struct vm_area_struct *vma); #else -static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; } -static inline bool vma_is_anon_shmem(struct vm_area_struct *vma) { return false; } +static inline bool vma_is_shmem(const struct vm_area_struct *vma) { return false; } +static inline bool vma_is_anon_shmem(const struct vm_area_struct *vma) { return false; } #endif int vma_is_stack_for_current(struct vm_area_struct *vma); diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 6d0f9c599ff7..0e47465ef0fd 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -99,9 +99,9 @@ extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); extern int shmem_lock(struct file *file, int lock, struct ucounts *ucounts); #ifdef CONFIG_SHMEM -bool shmem_mapping(struct address_space *mapping); +bool shmem_mapping(const struct address_space *mapping); #else -static inline bool shmem_mapping(struct address_space *mapping) +static inline bool shmem_mapping(const struct address_space *mapping) { return false; } -- cgit v1.2.3 From 7c3e97ac0d75306d9d03de575c9878f8fd9efe3b Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 1 Sep 2025 22:50:11 +0200 Subject: mm: constify pagemap related test/getter functions For improved const-correctness. We select certain test functions which either invoke each other, functions that are already const-ified, or no further functions. It is therefore relatively trivial to const-ify them, which provides a basis for further const-ification further up the call stack. Link: https://lkml.kernel.org/r/20250901205021.3573313-3-max.kellermann@ionos.com Signed-off-by: Max Kellermann Reviewed-by: Vishal Moola (Oracle) Reviewed-by: Lorenzo Stoakes Acked-by: David Hildenbrand Acked-by: Vlastimil Babka Acked-by: Mike Rapoport (Microsoft) Acked-by: Shakeel Butt Cc: Alexander Gordeev Cc: Al Viro Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Axel Rasmussen Cc: Baolin Wang Cc: Borislav Betkov Cc: Christian Borntraeger Cc: Christian Brauner Cc: Christian Zankel Cc: David Rientjes Cc: David S. Miller Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Helge Deller Cc: "H. Peter Anvin" Cc: Hugh Dickins Cc: Ingo Molnar Cc: James Bottomley Cc: Jan Kara Cc: Jocelyn Falempe Cc: Liam Howlett Cc: Mark Brown Cc: Matthew Wilcox (Oracle) Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Hocko Cc: "Nysal Jan K.A" Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Russel King Cc: Suren Baghdasaryan Cc: Sven Schnelle Cc: Thomas Gleinxer Cc: Thomas Huth Cc: Vasily Gorbik Cc: Wei Xu Cc: Yuanchu Xie Signed-off-by: Andrew Morton --- include/linux/pagemap.h | 55 +++++++++++++++++++++++++------------------------ 1 file changed, 28 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index f0dfdfb13cd9..0d66a252b06f 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -140,7 +140,7 @@ static inline int inode_drain_writes(struct inode *inode) return filemap_write_and_wait(inode->i_mapping); } -static inline bool mapping_empty(struct address_space *mapping) +static inline bool mapping_empty(const struct address_space *mapping) { return xa_empty(&mapping->i_pages); } @@ -166,7 +166,7 @@ static inline bool mapping_empty(struct address_space *mapping) * refcount and the referenced bit, which will be elevated or set in * the process of adding new cache pages to an inode. */ -static inline bool mapping_shrinkable(struct address_space *mapping) +static inline bool mapping_shrinkable(const struct address_space *mapping) { void *head; @@ -267,7 +267,7 @@ static inline void mapping_clear_unevictable(struct address_space *mapping) clear_bit(AS_UNEVICTABLE, &mapping->flags); } -static inline bool mapping_unevictable(struct address_space *mapping) +static inline bool mapping_unevictable(const struct address_space *mapping) { return mapping && test_bit(AS_UNEVICTABLE, &mapping->flags); } @@ -277,7 +277,7 @@ static inline void mapping_set_exiting(struct address_space *mapping) set_bit(AS_EXITING, &mapping->flags); } -static inline int mapping_exiting(struct address_space *mapping) +static inline int mapping_exiting(const struct address_space *mapping) { return test_bit(AS_EXITING, &mapping->flags); } @@ -287,7 +287,7 @@ static inline void mapping_set_no_writeback_tags(struct address_space *mapping) set_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags); } -static inline int mapping_use_writeback_tags(struct address_space *mapping) +static inline int mapping_use_writeback_tags(const struct address_space *mapping) { return !test_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags); } @@ -333,7 +333,7 @@ static inline void mapping_set_inaccessible(struct address_space *mapping) set_bit(AS_INACCESSIBLE, &mapping->flags); } -static inline bool mapping_inaccessible(struct address_space *mapping) +static inline bool mapping_inaccessible(const struct address_space *mapping) { return test_bit(AS_INACCESSIBLE, &mapping->flags); } @@ -343,18 +343,18 @@ static inline void mapping_set_writeback_may_deadlock_on_reclaim(struct address_ set_bit(AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM, &mapping->flags); } -static inline bool mapping_writeback_may_deadlock_on_reclaim(struct address_space *mapping) +static inline bool mapping_writeback_may_deadlock_on_reclaim(const struct address_space *mapping) { return test_bit(AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM, &mapping->flags); } -static inline gfp_t mapping_gfp_mask(struct address_space * mapping) +static inline gfp_t mapping_gfp_mask(const struct address_space *mapping) { return mapping->gfp_mask; } /* Restricts the given gfp_mask to what the mapping allows. */ -static inline gfp_t mapping_gfp_constraint(struct address_space *mapping, +static inline gfp_t mapping_gfp_constraint(const struct address_space *mapping, gfp_t gfp_mask) { return mapping_gfp_mask(mapping) & gfp_mask; @@ -477,7 +477,7 @@ mapping_min_folio_order(const struct address_space *mapping) } static inline unsigned long -mapping_min_folio_nrpages(struct address_space *mapping) +mapping_min_folio_nrpages(const struct address_space *mapping) { return 1UL << mapping_min_folio_order(mapping); } @@ -491,7 +491,7 @@ mapping_min_folio_nrpages(struct address_space *mapping) * new folio to the page cache and need to know what index to give it, * call this function. */ -static inline pgoff_t mapping_align_index(struct address_space *mapping, +static inline pgoff_t mapping_align_index(const struct address_space *mapping, pgoff_t index) { return round_down(index, mapping_min_folio_nrpages(mapping)); @@ -501,7 +501,7 @@ static inline pgoff_t mapping_align_index(struct address_space *mapping, * Large folio support currently depends on THP. These dependencies are * being worked on but are not yet fixed. */ -static inline bool mapping_large_folio_support(struct address_space *mapping) +static inline bool mapping_large_folio_support(const struct address_space *mapping) { /* AS_FOLIO_ORDER is only reasonable for pagecache folios */ VM_WARN_ONCE((unsigned long)mapping & FOLIO_MAPPING_ANON, @@ -516,7 +516,7 @@ static inline size_t mapping_max_folio_size(const struct address_space *mapping) return PAGE_SIZE << mapping_max_folio_order(mapping); } -static inline int filemap_nr_thps(struct address_space *mapping) +static inline int filemap_nr_thps(const struct address_space *mapping) { #ifdef CONFIG_READ_ONLY_THP_FOR_FS return atomic_read(&mapping->nr_thps); @@ -930,7 +930,7 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping, * * Return: The index of the folio which follows this folio in the file. */ -static inline pgoff_t folio_next_index(struct folio *folio) +static inline pgoff_t folio_next_index(const struct folio *folio) { return folio->index + folio_nr_pages(folio); } @@ -959,7 +959,7 @@ static inline struct page *folio_file_page(struct folio *folio, pgoff_t index) * e.g., shmem did not move this folio to the swap cache. * Return: true or false. */ -static inline bool folio_contains(struct folio *folio, pgoff_t index) +static inline bool folio_contains(const struct folio *folio, pgoff_t index) { VM_WARN_ON_ONCE_FOLIO(folio_test_swapcache(folio), folio); return index - folio->index < folio_nr_pages(folio); @@ -1036,13 +1036,13 @@ static inline loff_t page_offset(struct page *page) /* * Get the offset in PAGE_SIZE (even for hugetlb folios). */ -static inline pgoff_t folio_pgoff(struct folio *folio) +static inline pgoff_t folio_pgoff(const struct folio *folio) { return folio->index; } -static inline pgoff_t linear_page_index(struct vm_area_struct *vma, - unsigned long address) +static inline pgoff_t linear_page_index(const struct vm_area_struct *vma, + const unsigned long address) { pgoff_t pgoff; pgoff = (address - vma->vm_start) >> PAGE_SHIFT; @@ -1462,7 +1462,7 @@ static inline unsigned int __readahead_batch(struct readahead_control *rac, * readahead_pos - The byte offset into the file of this readahead request. * @rac: The readahead request. */ -static inline loff_t readahead_pos(struct readahead_control *rac) +static inline loff_t readahead_pos(const struct readahead_control *rac) { return (loff_t)rac->_index * PAGE_SIZE; } @@ -1471,7 +1471,7 @@ static inline loff_t readahead_pos(struct readahead_control *rac) * readahead_length - The number of bytes in this readahead request. * @rac: The readahead request. */ -static inline size_t readahead_length(struct readahead_control *rac) +static inline size_t readahead_length(const struct readahead_control *rac) { return rac->_nr_pages * PAGE_SIZE; } @@ -1480,7 +1480,7 @@ static inline size_t readahead_length(struct readahead_control *rac) * readahead_index - The index of the first page in this readahead request. * @rac: The readahead request. */ -static inline pgoff_t readahead_index(struct readahead_control *rac) +static inline pgoff_t readahead_index(const struct readahead_control *rac) { return rac->_index; } @@ -1489,7 +1489,7 @@ static inline pgoff_t readahead_index(struct readahead_control *rac) * readahead_count - The number of pages in this readahead request. * @rac: The readahead request. */ -static inline unsigned int readahead_count(struct readahead_control *rac) +static inline unsigned int readahead_count(const struct readahead_control *rac) { return rac->_nr_pages; } @@ -1498,12 +1498,12 @@ static inline unsigned int readahead_count(struct readahead_control *rac) * readahead_batch_length - The number of bytes in the current batch. * @rac: The readahead request. */ -static inline size_t readahead_batch_length(struct readahead_control *rac) +static inline size_t readahead_batch_length(const struct readahead_control *rac) { return rac->_batch_count * PAGE_SIZE; } -static inline unsigned long dir_pages(struct inode *inode) +static inline unsigned long dir_pages(const struct inode *inode) { return (unsigned long)(inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT; @@ -1517,8 +1517,8 @@ static inline unsigned long dir_pages(struct inode *inode) * Return: the number of bytes in the folio up to EOF, * or -EFAULT if the folio was truncated. */ -static inline ssize_t folio_mkwrite_check_truncate(struct folio *folio, - struct inode *inode) +static inline ssize_t folio_mkwrite_check_truncate(const struct folio *folio, + const struct inode *inode) { loff_t size = i_size_read(inode); pgoff_t index = size >> PAGE_SHIFT; @@ -1549,7 +1549,8 @@ static inline ssize_t folio_mkwrite_check_truncate(struct folio *folio, * Return: The number of filesystem blocks covered by this folio. */ static inline -unsigned int i_blocks_per_folio(struct inode *inode, struct folio *folio) +unsigned int i_blocks_per_folio(const struct inode *inode, + const struct folio *folio) { return folio_size(folio) >> inode->i_blkbits; } -- cgit v1.2.3 From 959b0886256b6896b44633e0e07c5464169087c1 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 1 Sep 2025 22:50:12 +0200 Subject: mm: constify zone related test/getter functions For improved const-correctness. We select certain test functions which either invoke each other, functions that are already const-ified, or no further functions. It is therefore relatively trivial to const-ify them, which provides a basis for further const-ification further up the call stack. Link: https://lkml.kernel.org/r/20250901205021.3573313-4-max.kellermann@ionos.com Signed-off-by: Max Kellermann Reviewed-by: Vishal Moola (Oracle) Reviewed-by: Lorenzo Stoakes Acked-by: David Hildenbrand Acked-by: Vlastimil Babka Acked-by: Mike Rapoport (Microsoft) Acked-by: Shakeel Butt Cc: Alexander Gordeev Cc: Al Viro Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Axel Rasmussen Cc: Baolin Wang Cc: Borislav Betkov Cc: Christian Borntraeger Cc: Christian Brauner Cc: Christian Zankel Cc: David Rientjes Cc: David S. Miller Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Helge Deller Cc: "H. Peter Anvin" Cc: Hugh Dickins Cc: Ingo Molnar Cc: James Bottomley Cc: Jan Kara Cc: Jocelyn Falempe Cc: Liam Howlett Cc: Mark Brown Cc: Matthew Wilcox (Oracle) Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Hocko Cc: "Nysal Jan K.A" Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Russel King Cc: Suren Baghdasaryan Cc: Sven Schnelle Cc: Thomas Gleinxer Cc: Thomas Huth Cc: Vasily Gorbik Cc: Wei Xu Cc: Yuanchu Xie Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index f3272ef5131b..6c4eae96160d 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1104,7 +1104,7 @@ static inline unsigned long promo_wmark_pages(const struct zone *z) return wmark_pages(z, WMARK_PROMO); } -static inline unsigned long zone_managed_pages(struct zone *zone) +static inline unsigned long zone_managed_pages(const struct zone *zone) { return (unsigned long)atomic_long_read(&zone->managed_pages); } @@ -1128,12 +1128,12 @@ static inline bool zone_spans_pfn(const struct zone *zone, unsigned long pfn) return zone->zone_start_pfn <= pfn && pfn < zone_end_pfn(zone); } -static inline bool zone_is_initialized(struct zone *zone) +static inline bool zone_is_initialized(const struct zone *zone) { return zone->initialized; } -static inline bool zone_is_empty(struct zone *zone) +static inline bool zone_is_empty(const struct zone *zone) { return zone->spanned_pages == 0; } @@ -1273,7 +1273,7 @@ static inline bool folio_is_zone_movable(const struct folio *folio) * Return true if [start_pfn, start_pfn + nr_pages) range has a non-empty * intersection with the given zone */ -static inline bool zone_intersects(struct zone *zone, +static inline bool zone_intersects(const struct zone *zone, unsigned long start_pfn, unsigned long nr_pages) { if (zone_is_empty(zone)) @@ -1581,12 +1581,12 @@ static inline int local_memory_node(int node_id) { return node_id; }; #define zone_idx(zone) ((zone) - (zone)->zone_pgdat->node_zones) #ifdef CONFIG_ZONE_DEVICE -static inline bool zone_is_zone_device(struct zone *zone) +static inline bool zone_is_zone_device(const struct zone *zone) { return zone_idx(zone) == ZONE_DEVICE; } #else -static inline bool zone_is_zone_device(struct zone *zone) +static inline bool zone_is_zone_device(const struct zone *zone) { return false; } @@ -1598,19 +1598,19 @@ static inline bool zone_is_zone_device(struct zone *zone) * populated_zone(). If the whole zone is reserved then we can easily * end up with populated_zone() && !managed_zone(). */ -static inline bool managed_zone(struct zone *zone) +static inline bool managed_zone(const struct zone *zone) { return zone_managed_pages(zone); } /* Returns true if a zone has memory */ -static inline bool populated_zone(struct zone *zone) +static inline bool populated_zone(const struct zone *zone) { return zone->present_pages; } #ifdef CONFIG_NUMA -static inline int zone_to_nid(struct zone *zone) +static inline int zone_to_nid(const struct zone *zone) { return zone->node; } @@ -1620,7 +1620,7 @@ static inline void zone_set_nid(struct zone *zone, int nid) zone->node = nid; } #else -static inline int zone_to_nid(struct zone *zone) +static inline int zone_to_nid(const struct zone *zone) { return 0; } @@ -1647,7 +1647,7 @@ static inline int is_highmem_idx(enum zone_type idx) * @zone: pointer to struct zone variable * Return: 1 for a highmem zone, 0 otherwise */ -static inline int is_highmem(struct zone *zone) +static inline int is_highmem(const struct zone *zone) { return is_highmem_idx(zone_idx(zone)); } @@ -1713,12 +1713,12 @@ static inline struct zone *zonelist_zone(struct zoneref *zoneref) return zoneref->zone; } -static inline int zonelist_zone_idx(struct zoneref *zoneref) +static inline int zonelist_zone_idx(const struct zoneref *zoneref) { return zoneref->zone_idx; } -static inline int zonelist_node_idx(struct zoneref *zoneref) +static inline int zonelist_node_idx(const struct zoneref *zoneref) { return zone_to_nid(zoneref->zone); } @@ -2021,7 +2021,7 @@ static inline struct page *__section_mem_map_addr(struct mem_section *section) return (struct page *)map; } -static inline int present_section(struct mem_section *section) +static inline int present_section(const struct mem_section *section) { return (section && (section->section_mem_map & SECTION_MARKED_PRESENT)); } @@ -2031,12 +2031,12 @@ static inline int present_section_nr(unsigned long nr) return present_section(__nr_to_section(nr)); } -static inline int valid_section(struct mem_section *section) +static inline int valid_section(const struct mem_section *section) { return (section && (section->section_mem_map & SECTION_HAS_MEM_MAP)); } -static inline int early_section(struct mem_section *section) +static inline int early_section(const struct mem_section *section) { return (section && (section->section_mem_map & SECTION_IS_EARLY)); } @@ -2046,27 +2046,27 @@ static inline int valid_section_nr(unsigned long nr) return valid_section(__nr_to_section(nr)); } -static inline int online_section(struct mem_section *section) +static inline int online_section(const struct mem_section *section) { return (section && (section->section_mem_map & SECTION_IS_ONLINE)); } #ifdef CONFIG_ZONE_DEVICE -static inline int online_device_section(struct mem_section *section) +static inline int online_device_section(const struct mem_section *section) { unsigned long flags = SECTION_IS_ONLINE | SECTION_TAINT_ZONE_DEVICE; return section && ((section->section_mem_map & flags) == flags); } #else -static inline int online_device_section(struct mem_section *section) +static inline int online_device_section(const struct mem_section *section) { return 0; } #endif #ifdef CONFIG_SPARSEMEM_VMEMMAP_PREINIT -static inline int preinited_vmemmap_section(struct mem_section *section) +static inline int preinited_vmemmap_section(const struct mem_section *section) { return (section && (section->section_mem_map & SECTION_IS_VMEMMAP_PREINIT)); @@ -2076,7 +2076,7 @@ void sparse_vmemmap_init_nid_early(int nid); void sparse_vmemmap_init_nid_late(int nid); #else -static inline int preinited_vmemmap_section(struct mem_section *section) +static inline int preinited_vmemmap_section(const struct mem_section *section) { return 0; } -- cgit v1.2.3 From b119fb0927738f150cbd179d23d08057dccd75c1 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 1 Sep 2025 22:50:13 +0200 Subject: fs: constify mapping related test functions for improved const-correctness We select certain test functions which either invoke each other, functions that are already const-ified, or no further functions. It is therefore relatively trivial to const-ify them, which provides a basis for further const-ification further up the call stack. Link: https://lkml.kernel.org/r/20250901205021.3573313-5-max.kellermann@ionos.com Signed-off-by: Max Kellermann Reviewed-by: Vishal Moola (Oracle) Reviewed-by: Lorenzo Stoakes Acked-by: David Hildenbrand Reviewed-by: Jan Kara Reviewed-by: Christian Brauner Acked-by: Vlastimil Babka Acked-by: Mike Rapoport (Microsoft) Acked-by: Shakeel Butt Cc: Alexander Gordeev Cc: Al Viro Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Axel Rasmussen Cc: Baolin Wang Cc: Borislav Betkov Cc: Christian Borntraeger Cc: Christian Zankel Cc: David Rientjes Cc: David S. Miller Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Helge Deller Cc: "H. Peter Anvin" Cc: Hugh Dickins Cc: Ingo Molnar Cc: James Bottomley Cc: Jocelyn Falempe Cc: Liam Howlett Cc: Mark Brown Cc: Matthew Wilcox (Oracle) Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Hocko Cc: "Nysal Jan K.A" Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Russel King Cc: Suren Baghdasaryan Cc: Sven Schnelle Cc: Thomas Gleinxer Cc: Thomas Huth Cc: Vasily Gorbik Cc: Wei Xu Cc: Yuanchu Xie Signed-off-by: Andrew Morton --- include/linux/fs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index d7ab4f96d705..0783c5d05d3f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -537,7 +537,7 @@ struct address_space { /* * Returns true if any of the pages in the mapping are marked with the tag. */ -static inline bool mapping_tagged(struct address_space *mapping, xa_mark_t tag) +static inline bool mapping_tagged(const struct address_space *mapping, xa_mark_t tag) { return xa_marked(&mapping->i_pages, tag); } @@ -585,7 +585,7 @@ static inline void i_mmap_assert_write_locked(struct address_space *mapping) /* * Might pages of this file be mapped into userspace? */ -static inline int mapping_mapped(struct address_space *mapping) +static inline int mapping_mapped(const struct address_space *mapping) { return !RB_EMPTY_ROOT(&mapping->i_mmap.rb_root); } @@ -599,7 +599,7 @@ static inline int mapping_mapped(struct address_space *mapping) * If i_mmap_writable is negative, no new writable mappings are allowed. You * can only deny writable mappings, if none exists right now. */ -static inline int mapping_writably_mapped(struct address_space *mapping) +static inline int mapping_writably_mapped(const struct address_space *mapping) { return atomic_read(&mapping->i_mmap_writable) > 0; } -- cgit v1.2.3 From 4680092f8ccb4406e771a6b1a2c0243ebd40bab7 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 1 Sep 2025 22:50:14 +0200 Subject: mm: constify process_shares_mm() for improved const-correctness This function only reads from the pointer arguments. Local (loop) variables are also annotated with `const` to clarify that these will not be written to. Link: https://lkml.kernel.org/r/20250901205021.3573313-6-max.kellermann@ionos.com Signed-off-by: Max Kellermann Reviewed-by: Lorenzo Stoakes Acked-by: David Hildenbrand Acked-by: Vlastimil Babka Acked-by: Mike Rapoport (Microsoft) Acked-by: Shakeel Butt Cc: Alexander Gordeev Cc: Al Viro Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Axel Rasmussen Cc: Baolin Wang Cc: Borislav Betkov Cc: Christian Borntraeger Cc: Christian Brauner Cc: Christian Zankel Cc: David Rientjes Cc: David S. Miller Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Helge Deller Cc: "H. Peter Anvin" Cc: Hugh Dickins Cc: Ingo Molnar Cc: James Bottomley Cc: Jan Kara Cc: Jocelyn Falempe Cc: Liam Howlett Cc: Mark Brown Cc: Matthew Wilcox (Oracle) Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Hocko Cc: "Nysal Jan K.A" Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Russel King Cc: Suren Baghdasaryan Cc: Sven Schnelle Cc: Thomas Gleinxer Cc: Thomas Huth Cc: Vasily Gorbik Cc: Vishal Moola (Oracle) Cc: Wei Xu Cc: Yuanchu Xie Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 45a47b555499..b3b63058e1a3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3872,7 +3872,7 @@ static inline int in_gate_area(struct mm_struct *mm, unsigned long addr) } #endif /* __HAVE_ARCH_GATE_AREA */ -extern bool process_shares_mm(struct task_struct *p, struct mm_struct *mm); +bool process_shares_mm(const struct task_struct *p, const struct mm_struct *mm); void drop_slab(void); -- cgit v1.2.3 From 0bf25cfc9e795ab302ee23550fdeebd2aeedf800 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 1 Sep 2025 22:50:15 +0200 Subject: mm, s390: constify mapping related test/getter functions For improved const-correctness. We select certain test functions which either invoke each other, functions that are already const-ified, or no further functions. It is therefore relatively trivial to const-ify them, which provides a basis for further const-ification further up the call stack. (Even though seemingly unrelated, this also constifies the pointer parameter of mmap_is_legacy() in arch/s390/mm/mmap.c because a copy of the function exists in mm/util.c.) Link: https://lkml.kernel.org/r/20250901205021.3573313-7-max.kellermann@ionos.com Signed-off-by: Max Kellermann Reviewed-by: Vishal Moola (Oracle) Reviewed-by: Lorenzo Stoakes Acked-by: David Hildenbrand Acked-by: Vlastimil Babka Acked-by: Mike Rapoport (Microsoft) Acked-by: Shakeel Butt Cc: Alexander Gordeev Cc: Al Viro Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Axel Rasmussen Cc: Baolin Wang Cc: Borislav Betkov Cc: Christian Borntraeger Cc: Christian Brauner Cc: Christian Zankel Cc: David Rientjes Cc: David S. Miller Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Helge Deller Cc: "H. Peter Anvin" Cc: Hugh Dickins Cc: Ingo Molnar Cc: James Bottomley Cc: Jan Kara Cc: Jocelyn Falempe Cc: Liam Howlett Cc: Mark Brown Cc: Matthew Wilcox (Oracle) Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Hocko Cc: "Nysal Jan K.A" Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Russel King Cc: Suren Baghdasaryan Cc: Sven Schnelle Cc: Thomas Gleinxer Cc: Thomas Huth Cc: Vasily Gorbik Cc: Wei Xu Cc: Yuanchu Xie Signed-off-by: Andrew Morton --- include/linux/mm.h | 6 +++--- include/linux/pagemap.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index b3b63058e1a3..221e98bb7689 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1002,7 +1002,7 @@ static inline bool vma_is_shmem(const struct vm_area_struct *vma) { return false static inline bool vma_is_anon_shmem(const struct vm_area_struct *vma) { return false; } #endif -int vma_is_stack_for_current(struct vm_area_struct *vma); +int vma_is_stack_for_current(const struct vm_area_struct *vma); /* flush_tlb_range() takes a vma, not a mm, and can care about flags */ #define TLB_FLUSH_VMA(mm,flags) { .vm_mm = (mm), .vm_flags = (flags) } @@ -2617,7 +2617,7 @@ void folio_add_pin(struct folio *folio); int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc); int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc, - struct task_struct *task, bool bypass_rlim); + const struct task_struct *task, bool bypass_rlim); struct kvec; struct page *get_dump_page(unsigned long addr, int *locked); @@ -3380,7 +3380,7 @@ void anon_vma_interval_tree_verify(struct anon_vma_chain *node); avc; avc = anon_vma_interval_tree_iter_next(avc, start, last)) /* mmap.c */ -extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin); +extern int __vm_enough_memory(const struct mm_struct *mm, long pages, int cap_sys_admin); extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); extern void exit_mmap(struct mm_struct *); bool mmap_read_lock_maybe_expand(struct mm_struct *mm, struct vm_area_struct *vma, diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 0d66a252b06f..aec4a11565bc 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -545,7 +545,7 @@ static inline void filemap_nr_thps_dec(struct address_space *mapping) #endif } -struct address_space *folio_mapping(struct folio *); +struct address_space *folio_mapping(const struct folio *folio); /** * folio_flush_mapping - Find the file mapping this folio belongs to. -- cgit v1.2.3 From a955cca37288fe37cc1cde8d291e02717c8a7409 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 1 Sep 2025 22:50:17 +0200 Subject: mm: constify arch_pick_mmap_layout() for improved const-correctness This function only reads from the rlimit pointer (but writes to the mm_struct pointer which is kept without `const`). All callees are already const-ified or (internal functions) are being constified by this patch. Link: https://lkml.kernel.org/r/20250901205021.3573313-9-max.kellermann@ionos.com Signed-off-by: Max Kellermann Reviewed-by: Vishal Moola (Oracle) Reviewed-by: Lorenzo Stoakes Acked-by: David Hildenbrand Acked-by: Vlastimil Babka Acked-by: Mike Rapoport (Microsoft) Acked-by: Shakeel Butt Cc: Alexander Gordeev Cc: Al Viro Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Axel Rasmussen Cc: Baolin Wang Cc: Borislav Betkov Cc: Christian Borntraeger Cc: Christian Brauner Cc: Christian Zankel Cc: David Rientjes Cc: David S. Miller Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Helge Deller Cc: "H. Peter Anvin" Cc: Hugh Dickins Cc: Ingo Molnar Cc: James Bottomley Cc: Jan Kara Cc: Jocelyn Falempe Cc: Liam Howlett Cc: Mark Brown Cc: Matthew Wilcox (Oracle) Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Hocko Cc: "Nysal Jan K.A" Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Russel King Cc: Suren Baghdasaryan Cc: Sven Schnelle Cc: Thomas Gleinxer Cc: Thomas Huth Cc: Vasily Gorbik Cc: Wei Xu Cc: Yuanchu Xie Signed-off-by: Andrew Morton --- include/linux/sched/mm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 2201da0afecc..0232d983b715 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -178,7 +178,7 @@ static inline void mm_update_next_owner(struct mm_struct *mm) #endif extern void arch_pick_mmap_layout(struct mm_struct *mm, - struct rlimit *rlim_stack); + const struct rlimit *rlim_stack); unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, @@ -211,7 +211,7 @@ generic_get_unmapped_area_topdown(struct file *filp, unsigned long addr, unsigned long flags, vm_flags_t vm_flags); #else static inline void arch_pick_mmap_layout(struct mm_struct *mm, - struct rlimit *rlim_stack) {} + const struct rlimit *rlim_stack) {} #endif static inline bool in_vfork(struct task_struct *tsk) -- cgit v1.2.3 From 89bf840b84bb53393436426cd4acd80604bd26fd Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 1 Sep 2025 22:50:18 +0200 Subject: mm: constify ptdesc_pmd_pts_count() and folio_get_private() These functions from mm_types.h are trivial getters that should never write to the given pointers. Link: https://lkml.kernel.org/r/20250901205021.3573313-10-max.kellermann@ionos.com Signed-off-by: Max Kellermann Reviewed-by: Vishal Moola (Oracle) Reviewed-by: Lorenzo Stoakes Acked-by: David Hildenbrand Acked-by: Vlastimil Babka Acked-by: Mike Rapoport (Microsoft) Acked-by: Shakeel Butt Cc: Alexander Gordeev Cc: Al Viro Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Axel Rasmussen Cc: Baolin Wang Cc: Borislav Betkov Cc: Christian Borntraeger Cc: Christian Brauner Cc: Christian Zankel Cc: David Rientjes Cc: David S. Miller Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Helge Deller Cc: "H. Peter Anvin" Cc: Hugh Dickins Cc: Ingo Molnar Cc: James Bottomley Cc: Jan Kara Cc: Jocelyn Falempe Cc: Liam Howlett Cc: Mark Brown Cc: Matthew Wilcox (Oracle) Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Hocko Cc: "Nysal Jan K.A" Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Russel King Cc: Suren Baghdasaryan Cc: Sven Schnelle Cc: Thomas Gleinxer Cc: Thomas Huth Cc: Vasily Gorbik Cc: Wei Xu Cc: Yuanchu Xie Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index d934a3a5b443..275e8060d918 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -632,7 +632,7 @@ static inline void ptdesc_pmd_pts_dec(struct ptdesc *ptdesc) atomic_dec(&ptdesc->pt_share_count); } -static inline int ptdesc_pmd_pts_count(struct ptdesc *ptdesc) +static inline int ptdesc_pmd_pts_count(const struct ptdesc *ptdesc) { return atomic_read(&ptdesc->pt_share_count); } @@ -660,7 +660,7 @@ static inline void set_page_private(struct page *page, unsigned long private) page->private = private; } -static inline void *folio_get_private(struct folio *folio) +static inline void *folio_get_private(const struct folio *folio) { return folio->private; } -- cgit v1.2.3 From f346a9473a2fbbab785d1733d475160f1fc54e5a Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 1 Sep 2025 22:50:19 +0200 Subject: mm: constify various inline functions for improved const-correctness We select certain test functions plus folio_migrate_refs() from mm_inline.h which either invoke each other, functions that are already const-ified, or no further functions. It is therefore relatively trivial to const-ify them, which provides a basis for further const-ification further up the call stack. One exception is the function folio_migrate_refs() which does write to the "new" folio pointer; there, only the "old" folio pointer is being constified; only its "flags" field is read, but nothing written. Link: https://lkml.kernel.org/r/20250901205021.3573313-11-max.kellermann@ionos.com Signed-off-by: Max Kellermann Reviewed-by: Vishal Moola (Oracle) Reviewed-by: Lorenzo Stoakes Acked-by: David Hildenbrand Acked-by: Vlastimil Babka Acked-by: Mike Rapoport (Microsoft) Acked-by: Shakeel Butt Cc: Alexander Gordeev Cc: Al Viro Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Axel Rasmussen Cc: Baolin Wang Cc: Borislav Betkov Cc: Christian Borntraeger Cc: Christian Brauner Cc: Christian Zankel Cc: David Rientjes Cc: David S. Miller Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Helge Deller Cc: "H. Peter Anvin" Cc: Hugh Dickins Cc: Ingo Molnar Cc: James Bottomley Cc: Jan Kara Cc: Jocelyn Falempe Cc: Liam Howlett Cc: Mark Brown Cc: Matthew Wilcox (Oracle) Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Hocko Cc: "Nysal Jan K.A" Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Russel King Cc: Suren Baghdasaryan Cc: Sven Schnelle Cc: Thomas Gleinxer Cc: Thomas Huth Cc: Vasily Gorbik Cc: Wei Xu Cc: Yuanchu Xie Signed-off-by: Andrew Morton --- include/linux/mm_inline.h | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 150302b4a905..d6c1011b38f2 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -25,7 +25,7 @@ * 0 if @folio is a normal anonymous folio, a tmpfs folio or otherwise * ram or swap backed folio. */ -static inline int folio_is_file_lru(struct folio *folio) +static inline int folio_is_file_lru(const struct folio *folio) { return !folio_test_swapbacked(folio); } @@ -84,7 +84,7 @@ static __always_inline void __folio_clear_lru_flags(struct folio *folio) * Return: The LRU list a folio should be on, as an index * into the array of LRU lists. */ -static __always_inline enum lru_list folio_lru_list(struct folio *folio) +static __always_inline enum lru_list folio_lru_list(const struct folio *folio) { enum lru_list lru; @@ -141,7 +141,7 @@ static inline int lru_tier_from_refs(int refs, bool workingset) return workingset ? MAX_NR_TIERS - 1 : order_base_2(refs); } -static inline int folio_lru_refs(struct folio *folio) +static inline int folio_lru_refs(const struct folio *folio) { unsigned long flags = READ_ONCE(folio->flags.f); @@ -154,14 +154,14 @@ static inline int folio_lru_refs(struct folio *folio) return ((flags & LRU_REFS_MASK) >> LRU_REFS_PGOFF) + 1; } -static inline int folio_lru_gen(struct folio *folio) +static inline int folio_lru_gen(const struct folio *folio) { unsigned long flags = READ_ONCE(folio->flags.f); return ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; } -static inline bool lru_gen_is_active(struct lruvec *lruvec, int gen) +static inline bool lru_gen_is_active(const struct lruvec *lruvec, int gen) { unsigned long max_seq = lruvec->lrugen.max_seq; @@ -217,12 +217,13 @@ static inline void lru_gen_update_size(struct lruvec *lruvec, struct folio *foli VM_WARN_ON_ONCE(lru_gen_is_active(lruvec, old_gen) && !lru_gen_is_active(lruvec, new_gen)); } -static inline unsigned long lru_gen_folio_seq(struct lruvec *lruvec, struct folio *folio, +static inline unsigned long lru_gen_folio_seq(const struct lruvec *lruvec, + const struct folio *folio, bool reclaiming) { int gen; int type = folio_is_file_lru(folio); - struct lru_gen_folio *lrugen = &lruvec->lrugen; + const struct lru_gen_folio *lrugen = &lruvec->lrugen; /* * +-----------------------------------+-----------------------------------+ @@ -302,7 +303,7 @@ static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, return true; } -static inline void folio_migrate_refs(struct folio *new, struct folio *old) +static inline void folio_migrate_refs(struct folio *new, const struct folio *old) { unsigned long refs = READ_ONCE(old->flags.f) & LRU_REFS_MASK; @@ -330,7 +331,7 @@ static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, return false; } -static inline void folio_migrate_refs(struct folio *new, struct folio *old) +static inline void folio_migrate_refs(struct folio *new, const struct folio *old) { } @@ -508,7 +509,7 @@ static inline void dec_tlb_flush_pending(struct mm_struct *mm) atomic_dec(&mm->tlb_flush_pending); } -static inline bool mm_tlb_flush_pending(struct mm_struct *mm) +static inline bool mm_tlb_flush_pending(const struct mm_struct *mm) { /* * Must be called after having acquired the PTL; orders against that @@ -521,7 +522,7 @@ static inline bool mm_tlb_flush_pending(struct mm_struct *mm) return atomic_read(&mm->tlb_flush_pending); } -static inline bool mm_tlb_flush_nested(struct mm_struct *mm) +static inline bool mm_tlb_flush_nested(const struct mm_struct *mm) { /* * Similar to mm_tlb_flush_pending(), we must have acquired the PTL @@ -605,7 +606,7 @@ pte_install_uffd_wp_if_needed(struct vm_area_struct *vma, unsigned long addr, return false; } -static inline bool vma_has_recency(struct vm_area_struct *vma) +static inline bool vma_has_recency(const struct vm_area_struct *vma) { if (vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ)) return false; -- cgit v1.2.3 From da0045587d59d4ffd7710fa45cea51e5a48453a4 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 1 Sep 2025 22:50:20 +0200 Subject: mm: constify assert/test functions in mm.h For improved const-correctness. We select certain assert and test functions which either invoke each other, functions that are already const-ified, or no further functions. It is therefore relatively trivial to const-ify them, which provides a basis for further const-ification further up the call stack. Link: https://lkml.kernel.org/r/20250901205021.3573313-12-max.kellermann@ionos.com Signed-off-by: Max Kellermann Reviewed-by: Lorenzo Stoakes Acked-by: David Hildenbrand Acked-by: Vlastimil Babka Acked-by: Mike Rapoport (Microsoft) Acked-by: Shakeel Butt Cc: Alexander Gordeev Cc: Al Viro Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Axel Rasmussen Cc: Baolin Wang Cc: Borislav Betkov Cc: Christian Borntraeger Cc: Christian Brauner Cc: Christian Zankel Cc: David Rientjes Cc: David S. Miller Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Helge Deller Cc: "H. Peter Anvin" Cc: Hugh Dickins Cc: Ingo Molnar Cc: James Bottomley Cc: Jan Kara Cc: Jocelyn Falempe Cc: Liam Howlett Cc: Mark Brown Cc: Matthew Wilcox (Oracle) Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Hocko Cc: "Nysal Jan K.A" Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Russel King Cc: Suren Baghdasaryan Cc: Sven Schnelle Cc: Thomas Gleinxer Cc: Thomas Huth Cc: Vasily Gorbik Cc: Vishal Moola (Oracle) Cc: Wei Xu Cc: Yuanchu Xie Signed-off-by: Andrew Morton --- include/linux/mm.h | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 221e98bb7689..a6bfa46937a8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -719,7 +719,7 @@ static inline void release_fault_lock(struct vm_fault *vmf) mmap_read_unlock(vmf->vma->vm_mm); } -static inline void assert_fault_locked(struct vm_fault *vmf) +static inline void assert_fault_locked(const struct vm_fault *vmf) { if (vmf->flags & FAULT_FLAG_VMA_LOCK) vma_assert_locked(vmf->vma); @@ -732,7 +732,7 @@ static inline void release_fault_lock(struct vm_fault *vmf) mmap_read_unlock(vmf->vma->vm_mm); } -static inline void assert_fault_locked(struct vm_fault *vmf) +static inline void assert_fault_locked(const struct vm_fault *vmf) { mmap_assert_locked(vmf->vma->vm_mm); } @@ -875,7 +875,7 @@ static inline bool vma_is_initial_stack(const struct vm_area_struct *vma) vma->vm_end >= vma->vm_mm->start_stack; } -static inline bool vma_is_temporary_stack(struct vm_area_struct *vma) +static inline bool vma_is_temporary_stack(const struct vm_area_struct *vma) { int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP); @@ -889,7 +889,7 @@ static inline bool vma_is_temporary_stack(struct vm_area_struct *vma) return false; } -static inline bool vma_is_foreign(struct vm_area_struct *vma) +static inline bool vma_is_foreign(const struct vm_area_struct *vma) { if (!current->mm) return true; @@ -900,7 +900,7 @@ static inline bool vma_is_foreign(struct vm_area_struct *vma) return false; } -static inline bool vma_is_accessible(struct vm_area_struct *vma) +static inline bool vma_is_accessible(const struct vm_area_struct *vma) { return vma->vm_flags & VM_ACCESS_FLAGS; } @@ -911,7 +911,7 @@ static inline bool is_shared_maywrite(vm_flags_t vm_flags) (VM_SHARED | VM_MAYWRITE); } -static inline bool vma_is_shared_maywrite(struct vm_area_struct *vma) +static inline bool vma_is_shared_maywrite(const struct vm_area_struct *vma) { return is_shared_maywrite(vma->vm_flags); } @@ -1855,7 +1855,7 @@ static inline struct folio *pfn_folio(unsigned long pfn) } #ifdef CONFIG_MMU -static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) +static inline pte_t mk_pte(const struct page *page, pgprot_t pgprot) { return pfn_pte(page_to_pfn(page), pgprot); } @@ -1870,7 +1870,7 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) * * Return: A page table entry suitable for mapping this folio. */ -static inline pte_t folio_mk_pte(struct folio *folio, pgprot_t pgprot) +static inline pte_t folio_mk_pte(const struct folio *folio, pgprot_t pgprot) { return pfn_pte(folio_pfn(folio), pgprot); } @@ -1886,7 +1886,7 @@ static inline pte_t folio_mk_pte(struct folio *folio, pgprot_t pgprot) * * Return: A page table entry suitable for mapping this folio. */ -static inline pmd_t folio_mk_pmd(struct folio *folio, pgprot_t pgprot) +static inline pmd_t folio_mk_pmd(const struct folio *folio, pgprot_t pgprot) { return pmd_mkhuge(pfn_pmd(folio_pfn(folio), pgprot)); } @@ -1902,7 +1902,7 @@ static inline pmd_t folio_mk_pmd(struct folio *folio, pgprot_t pgprot) * * Return: A page table entry suitable for mapping this folio. */ -static inline pud_t folio_mk_pud(struct folio *folio, pgprot_t pgprot) +static inline pud_t folio_mk_pud(const struct folio *folio, pgprot_t pgprot) { return pud_mkhuge(pfn_pud(folio_pfn(folio), pgprot)); } @@ -3520,7 +3520,7 @@ struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr) return mtree_load(&mm->mm_mt, addr); } -static inline unsigned long stack_guard_start_gap(struct vm_area_struct *vma) +static inline unsigned long stack_guard_start_gap(const struct vm_area_struct *vma) { if (vma->vm_flags & VM_GROWSDOWN) return stack_guard_gap; @@ -3532,7 +3532,7 @@ static inline unsigned long stack_guard_start_gap(struct vm_area_struct *vma) return 0; } -static inline unsigned long vm_start_gap(struct vm_area_struct *vma) +static inline unsigned long vm_start_gap(const struct vm_area_struct *vma) { unsigned long gap = stack_guard_start_gap(vma); unsigned long vm_start = vma->vm_start; @@ -3543,7 +3543,7 @@ static inline unsigned long vm_start_gap(struct vm_area_struct *vma) return vm_start; } -static inline unsigned long vm_end_gap(struct vm_area_struct *vma) +static inline unsigned long vm_end_gap(const struct vm_area_struct *vma) { unsigned long vm_end = vma->vm_end; @@ -3555,7 +3555,7 @@ static inline unsigned long vm_end_gap(struct vm_area_struct *vma) return vm_end; } -static inline unsigned long vma_pages(struct vm_area_struct *vma) +static inline unsigned long vma_pages(const struct vm_area_struct *vma) { return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; } @@ -3572,7 +3572,7 @@ static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm, return vma; } -static inline bool range_in_vma(struct vm_area_struct *vma, +static inline bool range_in_vma(const struct vm_area_struct *vma, unsigned long start, unsigned long end) { return (vma && vma->vm_start <= start && end <= vma->vm_end); @@ -3688,7 +3688,7 @@ static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags) * Indicates whether GUP can follow a PROT_NONE mapped page, or whether * a (NUMA hinting) fault is required. */ -static inline bool gup_can_follow_protnone(struct vm_area_struct *vma, +static inline bool gup_can_follow_protnone(const struct vm_area_struct *vma, unsigned int flags) { /* @@ -3818,7 +3818,7 @@ static inline bool debug_guardpage_enabled(void) return static_branch_unlikely(&_debug_guardpage_enabled); } -static inline bool page_is_guard(struct page *page) +static inline bool page_is_guard(const struct page *page) { if (!debug_guardpage_enabled()) return false; @@ -3849,7 +3849,7 @@ static inline void debug_pagealloc_map_pages(struct page *page, int numpages) {} static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages) {} static inline unsigned int debug_guardpage_minorder(void) { return 0; } static inline bool debug_guardpage_enabled(void) { return false; } -static inline bool page_is_guard(struct page *page) { return false; } +static inline bool page_is_guard(const struct page *page) { return false; } static inline bool set_page_guard(struct zone *zone, struct page *page, unsigned int order) { return false; } static inline void clear_page_guard(struct zone *zone, struct page *page, @@ -3931,7 +3931,7 @@ void vmemmap_free(unsigned long start, unsigned long end, #endif #ifdef CONFIG_SPARSEMEM_VMEMMAP -static inline unsigned long vmem_altmap_offset(struct vmem_altmap *altmap) +static inline unsigned long vmem_altmap_offset(const struct vmem_altmap *altmap) { /* number of pfns from base where pfn_to_page() is valid */ if (altmap) @@ -3945,7 +3945,7 @@ static inline void vmem_altmap_free(struct vmem_altmap *altmap, altmap->alloc -= nr_pfns; } #else -static inline unsigned long vmem_altmap_offset(struct vmem_altmap *altmap) +static inline unsigned long vmem_altmap_offset(const struct vmem_altmap *altmap) { return 0; } -- cgit v1.2.3 From a847b17009ec271514b269c90320a3893cd9b667 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 1 Sep 2025 22:50:21 +0200 Subject: mm: constify highmem related functions for improved const-correctness Lots of functions in mm/highmem.c do not write to the given pointers and do not call functions that take non-const pointers and can therefore be constified. This includes functions like kunmap() which might be implemented in a way that writes to the pointer (e.g. to update reference counters or mapping fields), but currently are not. kmap() on the other hand cannot be made const because it calls set_page_address() which is non-const in some architectures/configurations. [akpm@linux-foundation.org: "fix" folio_page() build failure] Link: https://lkml.kernel.org/r/20250901205021.3573313-13-max.kellermann@ionos.com Signed-off-by: Max Kellermann Reviewed-by: Lorenzo Stoakes Acked-by: David Hildenbrand Acked-by: Vlastimil Babka Acked-by: Mike Rapoport (Microsoft) Acked-by: Shakeel Butt Cc: Alexander Gordeev Cc: Al Viro Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Axel Rasmussen Cc: Baolin Wang Cc: Borislav Betkov Cc: Christian Borntraeger Cc: Christian Brauner Cc: Christian Zankel Cc: David Rientjes Cc: David S. Miller Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Helge Deller Cc: "H. Peter Anvin" Cc: Hugh Dickins Cc: Ingo Molnar Cc: James Bottomley Cc: Jan Kara Cc: Jocelyn Falempe Cc: Liam Howlett Cc: Mark Brown Cc: Matthew Wilcox (Oracle) Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Hocko Cc: "Nysal Jan K.A" Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Russel King Cc: Suren Baghdasaryan Cc: Sven Schnelle Cc: Thomas Gleinxer Cc: Thomas Huth Cc: Vasily Gorbik Cc: Vishal Moola (Oracle) Cc: Wei Xu Cc: Yuanchu Xie Signed-off-by: Andrew Morton --- include/linux/highmem-internal.h | 36 ++++++++++++++++++------------------ include/linux/highmem.h | 8 ++++---- include/linux/page-flags.h | 4 ++-- 3 files changed, 24 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h index 36053c3d6d64..0574c21ca45d 100644 --- a/include/linux/highmem-internal.h +++ b/include/linux/highmem-internal.h @@ -7,7 +7,7 @@ */ #ifdef CONFIG_KMAP_LOCAL void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot); -void *__kmap_local_page_prot(struct page *page, pgprot_t prot); +void *__kmap_local_page_prot(const struct page *page, pgprot_t prot); void kunmap_local_indexed(const void *vaddr); void kmap_local_fork(struct task_struct *tsk); void __kmap_local_sched_out(void); @@ -33,7 +33,7 @@ static inline void kmap_flush_tlb(unsigned long addr) { } #endif void *kmap_high(struct page *page); -void kunmap_high(struct page *page); +void kunmap_high(const struct page *page); void __kmap_flush_unused(void); struct page *__kmap_to_page(void *addr); @@ -50,7 +50,7 @@ static inline void *kmap(struct page *page) return addr; } -static inline void kunmap(struct page *page) +static inline void kunmap(const struct page *page) { might_sleep(); if (!PageHighMem(page)) @@ -68,12 +68,12 @@ static inline void kmap_flush_unused(void) __kmap_flush_unused(); } -static inline void *kmap_local_page(struct page *page) +static inline void *kmap_local_page(const struct page *page) { return __kmap_local_page_prot(page, kmap_prot); } -static inline void *kmap_local_page_try_from_panic(struct page *page) +static inline void *kmap_local_page_try_from_panic(const struct page *page) { if (!PageHighMem(page)) return page_address(page); @@ -81,13 +81,13 @@ static inline void *kmap_local_page_try_from_panic(struct page *page) return NULL; } -static inline void *kmap_local_folio(struct folio *folio, size_t offset) +static inline void *kmap_local_folio(const struct folio *folio, size_t offset) { - struct page *page = folio_page(folio, offset / PAGE_SIZE); + const struct page *page = folio_page(folio, offset / PAGE_SIZE); return __kmap_local_page_prot(page, kmap_prot) + offset % PAGE_SIZE; } -static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot) +static inline void *kmap_local_page_prot(const struct page *page, pgprot_t prot) { return __kmap_local_page_prot(page, prot); } @@ -102,7 +102,7 @@ static inline void __kunmap_local(const void *vaddr) kunmap_local_indexed(vaddr); } -static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) +static inline void *kmap_atomic_prot(const struct page *page, pgprot_t prot) { if (IS_ENABLED(CONFIG_PREEMPT_RT)) migrate_disable(); @@ -113,7 +113,7 @@ static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) return __kmap_local_page_prot(page, prot); } -static inline void *kmap_atomic(struct page *page) +static inline void *kmap_atomic(const struct page *page) { return kmap_atomic_prot(page, kmap_prot); } @@ -173,32 +173,32 @@ static inline void *kmap(struct page *page) return page_address(page); } -static inline void kunmap_high(struct page *page) { } +static inline void kunmap_high(const struct page *page) { } static inline void kmap_flush_unused(void) { } -static inline void kunmap(struct page *page) +static inline void kunmap(const struct page *page) { #ifdef ARCH_HAS_FLUSH_ON_KUNMAP kunmap_flush_on_unmap(page_address(page)); #endif } -static inline void *kmap_local_page(struct page *page) +static inline void *kmap_local_page(const struct page *page) { return page_address(page); } -static inline void *kmap_local_page_try_from_panic(struct page *page) +static inline void *kmap_local_page_try_from_panic(const struct page *page) { return page_address(page); } -static inline void *kmap_local_folio(struct folio *folio, size_t offset) +static inline void *kmap_local_folio(const struct folio *folio, size_t offset) { return folio_address(folio) + offset; } -static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot) +static inline void *kmap_local_page_prot(const struct page *page, pgprot_t prot) { return kmap_local_page(page); } @@ -215,7 +215,7 @@ static inline void __kunmap_local(const void *addr) #endif } -static inline void *kmap_atomic(struct page *page) +static inline void *kmap_atomic(const struct page *page) { if (IS_ENABLED(CONFIG_PREEMPT_RT)) migrate_disable(); @@ -225,7 +225,7 @@ static inline void *kmap_atomic(struct page *page) return page_address(page); } -static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) +static inline void *kmap_atomic_prot(const struct page *page, pgprot_t prot) { return kmap_atomic(page); } diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 6234f316468c..105cc4c00cc3 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -43,7 +43,7 @@ static inline void *kmap(struct page *page); * Counterpart to kmap(). A NOOP for CONFIG_HIGHMEM=n and for mappings of * pages in the low memory area. */ -static inline void kunmap(struct page *page); +static inline void kunmap(const struct page *page); /** * kmap_to_page - Get the page for a kmap'ed address @@ -93,7 +93,7 @@ static inline void kmap_flush_unused(void); * disabling migration in order to keep the virtual address stable across * preemption. No caller of kmap_local_page() can rely on this side effect. */ -static inline void *kmap_local_page(struct page *page); +static inline void *kmap_local_page(const struct page *page); /** * kmap_local_folio - Map a page in this folio for temporary usage @@ -129,7 +129,7 @@ static inline void *kmap_local_page(struct page *page); * Context: Can be invoked from any context. * Return: The virtual address of @offset. */ -static inline void *kmap_local_folio(struct folio *folio, size_t offset); +static inline void *kmap_local_folio(const struct folio *folio, size_t offset); /** * kmap_atomic - Atomically map a page for temporary usage - Deprecated! @@ -176,7 +176,7 @@ static inline void *kmap_local_folio(struct folio *folio, size_t offset); * kunmap_atomic(vaddr2); * kunmap_atomic(vaddr1); */ -static inline void *kmap_atomic(struct page *page); +static inline void *kmap_atomic(const struct page *page); /* Highmem related interfaces for management code */ static inline unsigned long nr_free_highpages(void); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index a88b61eec3f8..568011930e35 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -316,9 +316,9 @@ static __always_inline unsigned long _compound_head(const struct page *page) * check that the page number lies within @folio; the caller is presumed * to have a reference to the page. */ -static inline struct page *folio_page(struct folio *folio, unsigned long n) +static inline struct page *folio_page(const struct folio *folio, unsigned long n) { - return &folio->page + n; + return (struct page *)(&folio->page + n); } static __always_inline int PageTail(const struct page *page) -- cgit v1.2.3 From 9fd53c8122271d9fe8b687f50a9bdf5588d41d0b Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Fri, 11 Jul 2025 13:55:09 +0800 Subject: mm/filemap: align last_index to folio size On XFS systems with pagesize=4K, blocksize=16K, and CONFIG_TRANSPARENT_HUGEPAGE enabled, We observed the following readahead behaviors: # echo 3 > /proc/sys/vm/drop_caches # dd if=test of=/dev/null bs=64k count=1 # ./tools/mm/page-types -r -L -f /mnt/xfs/test foffset offset flags 0 136d4c __RU_l_________H______t_________________F_1 1 136d4d __RU_l__________T_____t_________________F_1 2 136d4e __RU_l__________T_____t_________________F_1 3 136d4f __RU_l__________T_____t_________________F_1 ... c 136bb8 __RU_l_________H______t_________________F_1 d 136bb9 __RU_l__________T_____t_________________F_1 e 136bba __RU_l__________T_____t_________________F_1 f 136bbb __RU_l__________T_____t_________________F_1 <-- first read 10 13c2cc ___U_l_________H______t______________I__F_1 <-- readahead flag 11 13c2cd ___U_l__________T_____t______________I__F_1 12 13c2ce ___U_l__________T_____t______________I__F_1 13 13c2cf ___U_l__________T_____t______________I__F_1 ... 1c 1405d4 ___U_l_________H______t_________________F_1 1d 1405d5 ___U_l__________T_____t_________________F_1 1e 1405d6 ___U_l__________T_____t_________________F_1 1f 1405d7 ___U_l__________T_____t_________________F_1 [ra_size = 32, req_count = 16, async_size = 16] # echo 3 > /proc/sys/vm/drop_caches # dd if=test of=/dev/null bs=60k count=1 # ./page-types -r -L -f /mnt/xfs/test foffset offset flags 0 136048 __RU_l_________H______t_________________F_1 ... c 110a40 __RU_l_________H______t_________________F_1 d 110a41 __RU_l__________T_____t_________________F_1 e 110a42 __RU_l__________T_____t_________________F_1 <-- first read f 110a43 __RU_l__________T_____t_________________F_1 <-- first readahead flag 10 13e7a8 ___U_l_________H______t_________________F_1 ... 20 137a00 ___U_l_________H______t_______P______I__F_1 <-- second readahead flag (20 - 2f) 21 137a01 ___U_l__________T_____t_______P______I__F_1 ... 3f 10d4af ___U_l__________T_____t_______P_________F_1 [first readahead: ra_size = 32, req_count = 15, async_size = 17] When reading 64k data (same for 61-63k range, where last_index is page-aligned in filemap_get_pages()), 128k readahead is triggered via page_cache_sync_ra() and the PG_readahead flag is set on the next folio (the one containing 0x10 page). When reading 60k data, 128k readahead is also triggered via page_cache_sync_ra(). However, in this case the readahead flag is set on the 0xf page. Although the requested read size (req_count) is 60k, the actual read will be aligned to folio size (64k), which triggers the readahead flag and initiates asynchronous readahead via page_cache_async_ra(). This results in two readahead operations totaling 256k. The root cause is that when the requested size is smaller than the actual read size (due to folio alignment), it triggers asynchronous readahead. By changing last_index alignment from page size to folio size, we ensure the requested size matches the actual read size, preventing the case where a single read operation triggers two readahead operations. After applying the patch: # echo 3 > /proc/sys/vm/drop_caches # dd if=test of=/dev/null bs=60k count=1 # ./page-types -r -L -f /mnt/xfs/test foffset offset flags 0 136d4c __RU_l_________H______t_________________F_1 1 136d4d __RU_l__________T_____t_________________F_1 2 136d4e __RU_l__________T_____t_________________F_1 3 136d4f __RU_l__________T_____t_________________F_1 ... c 136bb8 __RU_l_________H______t_________________F_1 d 136bb9 __RU_l__________T_____t_________________F_1 e 136bba __RU_l__________T_____t_________________F_1 <-- first read f 136bbb __RU_l__________T_____t_________________F_1 10 13c2cc ___U_l_________H______t______________I__F_1 <-- readahead flag 11 13c2cd ___U_l__________T_____t______________I__F_1 12 13c2ce ___U_l__________T_____t______________I__F_1 13 13c2cf ___U_l__________T_____t______________I__F_1 ... 1c 1405d4 ___U_l_________H______t_________________F_1 1d 1405d5 ___U_l__________T_____t_________________F_1 1e 1405d6 ___U_l__________T_____t_________________F_1 1f 1405d7 ___U_l__________T_____t_________________F_1 [ra_size = 32, req_count = 16, async_size = 16] The same phenomenon will occur when reading from 49k to 64k. Set the readahead flag to the next folio. Because the minimum order of folio in address_space equals the block size (at least in xfs and bcachefs that already support bs > ps), having request_count aligned to block size will not cause overread. [klarasmodin@gmail.com: fix overflow on 32-bit] Link: https://lkml.kernel.org/r/yru7qf5gvyzccq5ohhpylvxug5lr5tf54omspbjh4sm6pcdb2r@fpjgj2pxw7va [akpm@linux-foundation.org: update it for Max's constification efforts] Link: https://lkml.kernel.org/r/20250711055509.91587-1-youling.tang@linux.dev Co-developed-by: Chi Zhiling Signed-off-by: Chi Zhiling Signed-off-by: Youling Tang Signed-off-by: Klara Modin Reviewed-by: Ryan Roberts Reviewed-by: Jan Kara Cc: Matthew Wilcox (Oracle) Cc: Youling Tang Cc: David Hildenbrand Cc: Klara Modin Signed-off-by: Andrew Morton --- include/linux/pagemap.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index aec4a11565bc..185644e288ea 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -482,6 +482,12 @@ mapping_min_folio_nrpages(const struct address_space *mapping) return 1UL << mapping_min_folio_order(mapping); } +static inline unsigned long +mapping_min_folio_nrbytes(const struct address_space *mapping) +{ + return mapping_min_folio_nrpages(mapping) << PAGE_SHIFT; +} + /** * mapping_align_index() - Align index for this mapping. * @mapping: The address_space. -- cgit v1.2.3 From 94326d3130b5e78a35265bbf7822148372b39231 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 3 Sep 2025 20:10:39 +0100 Subject: mm: remove mlock_count from struct page All users now use folio->mlock_count so we can remove this element of struct page. Move the useful comments over to struct folio. Link: https://lkml.kernel.org/r/20250903191041.1630338-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Hugh Dickins Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 275e8060d918..ff2b4e13215f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -94,14 +94,6 @@ struct page { union { struct list_head lru; - /* Or, for the Unevictable "LRU list" slot */ - struct { - /* Always even, to negate PageTail */ - void *__filler; - /* Count page's or folio's mlocks */ - unsigned int mlock_count; - }; - /* Or, free page */ struct list_head buddy_list; struct list_head pcp_list; @@ -391,7 +383,9 @@ struct folio { union { struct list_head lru; /* private: avoid cluttering the output */ + /* For the Unevictable "LRU list" slot */ struct { + /* Avoid compound_head */ void *__filler; /* public: */ unsigned int mlock_count; -- cgit v1.2.3 From da939ef4c494246bc2102ecb628bbcc71d650410 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Tue, 2 Sep 2025 08:35:11 +0000 Subject: rust: maple_tree: add MapleTree MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "Add Rust abstraction for Maple Trees", v3. This will be used in the Tyr driver [1] to allocate from the GPU's VA space that is not owned by userspace, but by the kernel, for kernel GPU mappings. Danilo tells me that in nouveau, the maple tree is used for keeping track of "VM regions" on top of GPUVM, and that he will most likely end up doing the same in the Rust Nova driver as well. These abstractions intentionally do not expose any way to make use of external locking. You are required to use the internal spinlock. For now, we do not support loads that only utilize rcu for protection. This contains some parts taken from Andrew Ballance's RFC [2] from April. However, it has also been reworked significantly compared to that RFC taking the use-cases in Tyr into account. This patch (of 3): The maple tree will be used in the Tyr driver to allocate and keep track of GPU allocations created internally (i.e. not by userspace). It will likely also be used in the Nova driver eventually. This adds the simplest methods for additional and removal that do not require any special care with respect to concurrency. This implementation is based on the RFC by Andrew but with significant changes to simplify the implementation. [ojeda@kernel.org: fix intra-doc links] Link: https://lkml.kernel.org/r/20250910140212.997771-1-ojeda@kernel.org Link: https://lkml.kernel.org/r/20250902-maple-tree-v3-0-fb5c8958fb1e@google.com Link: https://lkml.kernel.org/r/20250902-maple-tree-v3-1-fb5c8958fb1e@google.com Link: https://lore.kernel.org/r/20250627-tyr-v1-1-cb5f4c6ced46@collabora.com [1] Link: https://lore.kernel.org/r/20250405060154.1550858-1-andrewjballance@gmail.com [2] Co-developed-by: Andrew Ballance Signed-off-by: Andrew Ballance Signed-off-by: Alice Ryhl Reviewed-by: Danilo Krummrich Cc: Andreas Hindborg Cc: Björn Roy Baron Cc: Boqun Feng Cc: Daniel Almeida Cc: Gary Guo Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Miguel Ojeda Cc: Trevor Gross Signed-off-by: Andrew Morton --- include/linux/maple_tree.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index 41e633264e51..05730171d201 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -481,6 +481,9 @@ struct ma_wr_state { #define MA_ERROR(err) \ ((struct maple_enode *)(((unsigned long)err << 2) | 2UL)) +/* + * When changing MA_STATE, remember to also change rust/kernel/maple_tree.rs + */ #define MA_STATE(name, mt, first, end) \ struct ma_state name = { \ .tree = mt, \ -- cgit v1.2.3 From a488ba3124c82d704963fcd760fe653df1987b13 Mon Sep 17 00:00:00 2001 From: Pankaj Raghav Date: Fri, 5 Sep 2025 17:00:12 +0200 Subject: huge_memory: return -EINVAL in folio split functions when THP is disabled split_huge_page_to_list_[to_order](), split_huge_page() and try_folio_split() return 0 on success and error codes on failure. When THP is disabled, these functions return 0 indicating success even though an error code should be returned as it is not possible to split a folio when THP is disabled. Make all these functions return -EINVAL to indicate failure instead of 0. As large folios depend on CONFIG_THP, issue warning as this function should not be called without a large folio. Link: https://lkml.kernel.org/r/20250905150012.93714-1-kernel@pankajraghav.com Signed-off-by: Pankaj Raghav Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202509051753.riCeG7LC-lkp@intel.com/ Acked-by: David Hildenbrand Acked-by: Zi Yan Acked-by: Kiryl Shutsemau Reviewed-by: Lorenzo Stoakes Reviewed-by: Barry Song Reviewed-by: Anshuman Khandual Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 29ef70022da1..f327d62fc985 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -588,22 +588,26 @@ static inline int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, unsigned int new_order) { - return 0; + VM_WARN_ON_ONCE_PAGE(1, page); + return -EINVAL; } static inline int split_huge_page(struct page *page) { - return 0; + VM_WARN_ON_ONCE_PAGE(1, page); + return -EINVAL; } static inline int split_folio_to_list(struct folio *folio, struct list_head *list) { - return 0; + VM_WARN_ON_ONCE_FOLIO(1, folio); + return -EINVAL; } static inline int try_folio_split(struct folio *folio, struct page *page, struct list_head *list) { - return 0; + VM_WARN_ON_ONCE_FOLIO(1, folio); + return -EINVAL; } static inline void deferred_split_folio(struct folio *folio, bool partially_mapped) {} -- cgit v1.2.3 From 614d850efda98e4455e4f2b55e64864f68a4e370 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Thu, 4 Sep 2025 08:59:26 +1000 Subject: mm/memremap: remove unused get_dev_pagemap() parameter GUP no longer uses get_dev_pagemap(). As it was the only user of the get_dev_pagemap() pgmap caching feature it can be removed. Link: https://lkml.kernel.org/r/20250903225926.34702-2-apopple@nvidia.com Signed-off-by: Alistair Popple Acked-by: David Hildenbrand Reviewed-by: Jason Gunthorpe Reviewed-by: Dan Williams Cc: John Hubbard Cc: Oscar Salvador Cc: Peter Xu Signed-off-by: Andrew Morton --- include/linux/memremap.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/memremap.h b/include/linux/memremap.h index aa1b6aa877a0..e5951ba12a28 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -211,8 +211,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid); void memunmap_pages(struct dev_pagemap *pgmap); void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap); void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap); -struct dev_pagemap *get_dev_pagemap(unsigned long pfn, - struct dev_pagemap *pgmap); +struct dev_pagemap *get_dev_pagemap(unsigned long pfn); bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn); unsigned long memremap_compat_align(void); @@ -234,8 +233,7 @@ static inline void devm_memunmap_pages(struct device *dev, { } -static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn, - struct dev_pagemap *pgmap) +static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn) { return NULL; } -- cgit v1.2.3 From 4522aed4fffbbd18ab3581d733d0572d45780d07 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Wed, 17 Sep 2025 00:00:51 +0800 Subject: mm, swap: rename and move some swap cluster definition and helpers No feature change, move cluster related definitions and helpers to mm/swap.h, also tidy up and add a "swap_" prefix for cluster lock/unlock helpers, so they can be used outside of swap files. And while at it, add kerneldoc. Link: https://lkml.kernel.org/r/20250916160100.31545-7-ryncsn@gmail.com Signed-off-by: Kairui Song Reviewed-by: Baolin Wang Reviewed-by: Barry Song Acked-by: Chris Li Acked-by: David Hildenbrand Suggested-by: Chris Li Acked-by: Nhat Pham Cc: Baoquan He Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Johannes Weiner Cc: Kemeng Shi Cc: kernel test robot Cc: Lorenzo Stoakes Cc: Matthew Wilcox (Oracle) Cc: Yosry Ahmed Cc: Zi Yan Cc: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/swap.h | 34 ---------------------------------- 1 file changed, 34 deletions(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index a2bb20841616..78cc48a65512 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -235,40 +235,6 @@ enum { /* Special value in each swap_map continuation */ #define SWAP_CONT_MAX 0x7f /* Max count */ -/* - * We use this to track usage of a cluster. A cluster is a block of swap disk - * space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All - * free clusters are organized into a list. We fetch an entry from the list to - * get a free cluster. - * - * The flags field determines if a cluster is free. This is - * protected by cluster lock. - */ -struct swap_cluster_info { - spinlock_t lock; /* - * Protect swap_cluster_info fields - * other than list, and swap_info_struct->swap_map - * elements corresponding to the swap cluster. - */ - u16 count; - u8 flags; - u8 order; - struct list_head list; -}; - -/* All on-list cluster must have a non-zero flag. */ -enum swap_cluster_flags { - CLUSTER_FLAG_NONE = 0, /* For temporary off-list cluster */ - CLUSTER_FLAG_FREE, - CLUSTER_FLAG_NONFULL, - CLUSTER_FLAG_FRAG, - /* Clusters with flags above are allocatable */ - CLUSTER_FLAG_USABLE = CLUSTER_FLAG_FRAG, - CLUSTER_FLAG_FULL, - CLUSTER_FLAG_DISCARD, - CLUSTER_FLAG_MAX, -}; - /* * The first page in the swap file is the swap header, which is always marked * bad to prevent it from being allocated as an entry. This also prevents the -- cgit v1.2.3 From 0fcf8ef4fdab8e5c91d1bce39c7fe6565974ffad Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Wed, 17 Sep 2025 00:00:52 +0800 Subject: mm, swap: tidy up swap device and cluster info helpers swp_swap_info is the most commonly used helper for retrieving swap info. It has an internal check that may lead to a NULL return value, but almost none of its caller checks the return value, making the internal check pointless. In fact, most of these callers already ensured the entry is valid and never expect a NULL value. Tidy this up and improve the function names. If the caller can make sure the swap entry/type is valid and the device is pinned, use the new introduced __swap_entry_to_info/__swap_type_to_info instead. They have more debug sanity checks and lower overhead as they are inlined. Callers that may expect a NULL value should use swap_entry_to_info/swap_type_to_info instead. No feature change. The rearranged codes should have had no effect, or they should have been hitting NULL de-ref bugs already. Only some new sanity checks are added so potential issues may show up in debug build. The new helpers will be frequently used with swap table later when working with swap cache folios. A locked swap cache folio ensures the entries are valid and stable so these helpers are very helpful. Link: https://lkml.kernel.org/r/20250916160100.31545-8-ryncsn@gmail.com Signed-off-by: Kairui Song Acked-by: Chris Li Reviewed-by: Barry Song Acked-by: David Hildenbrand Suggested-by: Chris Li Cc: Baolin Wang Cc: Baoquan He Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Johannes Weiner Cc: Kemeng Shi Cc: kernel test robot Cc: Lorenzo Stoakes Cc: Matthew Wilcox (Oracle) Cc: Nhat Pham Cc: Yosry Ahmed Cc: Zi Yan Cc: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/swap.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index 78cc48a65512..762f8db0e811 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -479,7 +479,6 @@ extern sector_t swapdev_block(int, pgoff_t); extern int __swap_count(swp_entry_t entry); extern bool swap_entry_swapped(struct swap_info_struct *si, swp_entry_t entry); extern int swp_swapcount(swp_entry_t entry); -struct swap_info_struct *swp_swap_info(swp_entry_t entry); struct backing_dev_info; extern int init_swap_address_space(unsigned int type, unsigned long nr_pages); extern void exit_swap_address_space(unsigned int type); @@ -492,11 +491,6 @@ static inline void put_swap_device(struct swap_info_struct *si) } #else /* CONFIG_SWAP */ -static inline struct swap_info_struct *swp_swap_info(swp_entry_t entry) -{ - return NULL; -} - static inline struct swap_info_struct *get_swap_device(swp_entry_t entry) { return NULL; -- cgit v1.2.3 From 8578e0c00dcf0c58fbc32d4904ecaf8e802a6590 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Wed, 17 Sep 2025 00:00:56 +0800 Subject: mm, swap: use the swap table for the swap cache and switch API Introduce basic swap table infrastructures, which are now just a fixed-sized flat array inside each swap cluster, with access wrappers. Each cluster contains a swap table of 512 entries. Each table entry is an opaque atomic long. It could be in 3 types: a shadow type (XA_VALUE), a folio type (pointer), or NULL. In this first step, it only supports storing a folio or shadow, and it is a drop-in replacement for the current swap cache. Convert all swap cache users to use the new sets of APIs. Chris Li has been suggesting using a new infrastructure for swap cache for better performance, and that idea combined well with the swap table as the new backing structure. Now the lock contention range is reduced to 2M clusters, which is much smaller than the 64M address_space. And we can also drop the multiple address_space design. All the internal works are done with swap_cache_get_* helpers. Swap cache lookup is still lock-less like before, and the helper's contexts are same with original swap cache helpers. They still require a pin on the swap device to prevent the backing data from being freed. Swap cache updates are now protected by the swap cluster lock instead of the XArray lock. This is mostly handled internally, but new __swap_cache_* helpers require the caller to lock the cluster. So, a few new cluster access and locking helpers are also introduced. A fully cluster-based unified swap table can be implemented on top of this to take care of all count tracking and synchronization work, with dynamic allocation. It should reduce the memory usage while making the performance even better. Link: https://lkml.kernel.org/r/20250916160100.31545-12-ryncsn@gmail.com Co-developed-by: Chris Li Signed-off-by: Chris Li Signed-off-by: Kairui Song Acked-by: Chris Li Suggested-by: Chris Li Cc: Baolin Wang Cc: Baoquan He Cc: Barry Song Cc: David Hildenbrand Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Johannes Weiner Cc: Kemeng Shi Cc: kernel test robot Cc: Lorenzo Stoakes Cc: Matthew Wilcox (Oracle) Cc: Nhat Pham Cc: Yosry Ahmed Cc: Zi Yan Cc: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/swap.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index 762f8db0e811..e818fbade1e2 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -480,8 +480,6 @@ extern int __swap_count(swp_entry_t entry); extern bool swap_entry_swapped(struct swap_info_struct *si, swp_entry_t entry); extern int swp_swapcount(swp_entry_t entry); struct backing_dev_info; -extern int init_swap_address_space(unsigned int type, unsigned long nr_pages); -extern void exit_swap_address_space(unsigned int type); extern struct swap_info_struct *get_swap_device(swp_entry_t entry); sector_t swap_folio_sector(struct folio *folio); -- cgit v1.2.3 From 6106864b878e1ce5ecab4b8ffffff85e9ec69b78 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Tue, 2 Sep 2025 08:36:11 +0000 Subject: maple_tree: remove lockdep_map_p typedef Having the ma_external_lock field exist when CONFIG_LOCKDEP=n isn't used anywhere, so just get rid of it. This also avoids generating a typedef called lockdep_map_p that could overlap with typedefs in other header files. Link: https://lkml.kernel.org/r/20250902-maple-lockdep-p-v1-1-3ae5a398a379@google.com Signed-off-by: Alice Ryhl Reviewed-by: Danilo Krummrich Reviewed-by: Liam R. Howlett Signed-off-by: Andrew Morton --- include/linux/maple_tree.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index 05730171d201..47f9002ae92d 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -194,7 +194,6 @@ enum store_type { #define MAPLE_RESERVED_RANGE 4096 #ifdef CONFIG_LOCKDEP -typedef struct lockdep_map *lockdep_map_p; #define mt_lock_is_held(mt) \ (!(mt)->ma_external_lock || lock_is_held((mt)->ma_external_lock)) @@ -207,7 +206,6 @@ typedef struct lockdep_map *lockdep_map_p; #define mt_on_stack(mt) (mt).ma_external_lock = NULL #else -typedef struct { /* nothing */ } lockdep_map_p; #define mt_lock_is_held(mt) 1 #define mt_write_lock_is_held(mt) 1 #define mt_set_external_lock(mt, lock) do { } while (0) @@ -230,8 +228,10 @@ typedef struct { /* nothing */ } lockdep_map_p; */ struct maple_tree { union { - spinlock_t ma_lock; - lockdep_map_p ma_external_lock; + spinlock_t ma_lock; +#ifdef CONFIG_LOCKDEP + struct lockdep_map *ma_external_lock; +#endif }; unsigned int ma_flags; void __rcu *ma_root; -- cgit v1.2.3 From 522abd92279a8ea55bcc687f77697d4c0aaba6c0 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 8 Sep 2025 18:11:00 +0100 Subject: ptdesc: convert __page_flags to pt_flags Patch series "Some ptdesc cleanups". The first two patches here are preparation for splitting struct ptdesc from struct page and struct folio. I think their only dependency is on the memdesc_flags_t patches from August which is in mm-new. The third patch is just something I noticed while working on the code. This patch (of 3): Use the new memdesc_flags_t type to show that these are the same bits as page/folio/slab and thesefore have the zone/node/section information in them. Remove a use of ptdesc_folio() by converting pagetable_is_reserved() to use test_bit() directly. Link: https://lkml.kernel.org/r/20250908171104.2409217-1-willy@infradead.org Link: https://lkml.kernel.org/r/20250908171104.2409217-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Acked-by: David Hildenbrand Cc: Vishal Moola (Oracle) Signed-off-by: Andrew Morton --- include/linux/mm.h | 7 ++++++- include/linux/mm_types.h | 6 +++--- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index a6bfa46937a8..8dd71392eba7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2934,6 +2934,11 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a } #endif /* CONFIG_MMU */ +enum pt_flags { + PT_reserved = PG_reserved, + /* High bits are used for zone/node/section */ +}; + static inline struct ptdesc *virt_to_ptdesc(const void *x) { return page_ptdesc(virt_to_page(x)); @@ -2951,7 +2956,7 @@ static inline void *ptdesc_address(const struct ptdesc *pt) static inline bool pagetable_is_reserved(struct ptdesc *pt) { - return folio_test_reserved(ptdesc_folio(pt)); + return test_bit(PT_reserved, &pt->pt_flags.f); } /** diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index ff2b4e13215f..f048dc80646e 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -524,7 +524,7 @@ FOLIO_MATCH(compound_head, _head_3); /** * struct ptdesc - Memory descriptor for page tables. - * @__page_flags: Same as page flags. Powerpc only. + * @pt_flags: enum pt_flags plus zone/node/section. * @pt_rcu_head: For freeing page table pages. * @pt_list: List of used page tables. Used for s390 gmap shadow pages * (which are not linked into the user page tables) and x86 @@ -546,7 +546,7 @@ FOLIO_MATCH(compound_head, _head_3); * understanding of the issues. */ struct ptdesc { - unsigned long __page_flags; + memdesc_flags_t pt_flags; union { struct rcu_head pt_rcu_head; @@ -584,7 +584,7 @@ struct ptdesc { #define TABLE_MATCH(pg, pt) \ static_assert(offsetof(struct page, pg) == offsetof(struct ptdesc, pt)) -TABLE_MATCH(flags, __page_flags); +TABLE_MATCH(flags, pt_flags); TABLE_MATCH(compound_head, pt_list); TABLE_MATCH(compound_head, _pt_pad_1); TABLE_MATCH(mapping, __page_mapping); -- cgit v1.2.3 From f0c92726e89f5c6c092526787465617a68af154f Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 8 Sep 2025 18:11:01 +0100 Subject: ptdesc: remove references to folios from __pagetable_ctor() and pagetable_dtor() In preparation for splitting struct ptdesc from struct page and struct folio, remove mentions of struct folio from these functions. Introduce ptdesc_nr_pages() to avoid using lruvec_stat_add/sub_folio() Link: https://lkml.kernel.org/r/20250908171104.2409217-3-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Acked-by: David Hildenbrand Cc: Vishal Moola (Oracle) Signed-off-by: Andrew Morton --- include/linux/mm.h | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 8dd71392eba7..25f56e209ec8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2097,9 +2097,9 @@ static inline long folio_nr_pages(const struct folio *folio) * page. compound_nr() can be called on a tail page, and is defined to * return 1 in that case. */ -static inline long compound_nr(struct page *page) +static inline long compound_nr(const struct page *page) { - struct folio *folio = (struct folio *)page; + const struct folio *folio = (struct folio *)page; if (!test_bit(PG_head, &folio->flags.f)) return 1; @@ -3066,21 +3066,26 @@ static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; } static inline void ptlock_free(struct ptdesc *ptdesc) {} #endif /* defined(CONFIG_SPLIT_PTE_PTLOCKS) */ +static inline unsigned long ptdesc_nr_pages(const struct ptdesc *ptdesc) +{ + return compound_nr(ptdesc_page(ptdesc)); +} + static inline void __pagetable_ctor(struct ptdesc *ptdesc) { - struct folio *folio = ptdesc_folio(ptdesc); + pg_data_t *pgdat = NODE_DATA(memdesc_nid(ptdesc->pt_flags)); - __folio_set_pgtable(folio); - lruvec_stat_add_folio(folio, NR_PAGETABLE); + __SetPageTable(ptdesc_page(ptdesc)); + mod_node_page_state(pgdat, NR_PAGETABLE, ptdesc_nr_pages(ptdesc)); } static inline void pagetable_dtor(struct ptdesc *ptdesc) { - struct folio *folio = ptdesc_folio(ptdesc); + pg_data_t *pgdat = NODE_DATA(memdesc_nid(ptdesc->pt_flags)); ptlock_free(ptdesc); - __folio_clear_pgtable(folio); - lruvec_stat_sub_folio(folio, NR_PAGETABLE); + __ClearPageTable(ptdesc_page(ptdesc)); + mod_node_page_state(pgdat, NR_PAGETABLE, -ptdesc_nr_pages(ptdesc)); } static inline void pagetable_dtor_free(struct ptdesc *ptdesc) -- cgit v1.2.3 From 90ec2df9dd31653ceac4a35d2440b108bdf27550 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 8 Sep 2025 18:11:02 +0100 Subject: ptdesc: remove ptdesc_to_virt() This has the same effect as ptdesc_address() so convert the callers to use that and delete the function. Add kernel-doc for ptdesc_address(). Link: https://lkml.kernel.org/r/20250908171104.2409217-4-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Acked-by: David Hildenbrand Cc: Vishal Moola (Oracle) Signed-off-by: Andrew Morton --- include/linux/mm.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 25f56e209ec8..da6e0abad2cb 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2944,11 +2944,12 @@ static inline struct ptdesc *virt_to_ptdesc(const void *x) return page_ptdesc(virt_to_page(x)); } -static inline void *ptdesc_to_virt(const struct ptdesc *pt) -{ - return page_to_virt(ptdesc_page(pt)); -} - +/** + * ptdesc_address - Virtual address of page table. + * @pt: Page table descriptor. + * + * Return: The first byte of the page table described by @pt. + */ static inline void *ptdesc_address(const struct ptdesc *pt) { return folio_address(ptdesc_folio(pt)); -- cgit v1.2.3 From 602837268999912b3c0e0db21b67818ffbde7141 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 8 Sep 2025 16:55:34 +0200 Subject: readahead: add trace points Add a couple of trace points to make debugging readahead logic easier. [jack@suse.cz: v2] Link: https://lkml.kernel.org/r/20250909145849.5090-2-jack@suse.cz Link: https://lkml.kernel.org/r/20250908145533.31528-2-jack@suse.cz Signed-off-by: Jan Kara Tested-by: Pankaj Raghav Signed-off-by: Andrew Morton --- include/trace/events/readahead.h | 132 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 include/trace/events/readahead.h (limited to 'include') diff --git a/include/trace/events/readahead.h b/include/trace/events/readahead.h new file mode 100644 index 000000000000..0997ac5eceab --- /dev/null +++ b/include/trace/events/readahead.h @@ -0,0 +1,132 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM readahead + +#if !defined(_TRACE_FILEMAP_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_READAHEAD_H + +#include +#include +#include +#include +#include + +TRACE_EVENT(page_cache_ra_unbounded, + TP_PROTO(struct inode *inode, pgoff_t index, unsigned long nr_to_read, + unsigned long lookahead_size), + + TP_ARGS(inode, index, nr_to_read, lookahead_size), + + TP_STRUCT__entry( + __field(unsigned long, i_ino) + __field(dev_t, s_dev) + __field(pgoff_t, index) + __field(unsigned long, nr_to_read) + __field(unsigned long, lookahead_size) + ), + + TP_fast_assign( + __entry->i_ino = inode->i_ino; + __entry->s_dev = inode->i_sb->s_dev; + __entry->index = index; + __entry->nr_to_read = nr_to_read; + __entry->lookahead_size = lookahead_size; + ), + + TP_printk( + "dev=%d:%d ino=%lx index=%lu nr_to_read=%lu lookahead_size=%lu", + MAJOR(__entry->s_dev), MINOR(__entry->s_dev), __entry->i_ino, + __entry->index, __entry->nr_to_read, __entry->lookahead_size + ) +); + +TRACE_EVENT(page_cache_ra_order, + TP_PROTO(struct inode *inode, pgoff_t index, struct file_ra_state *ra), + + TP_ARGS(inode, index, ra), + + TP_STRUCT__entry( + __field(unsigned long, i_ino) + __field(dev_t, s_dev) + __field(pgoff_t, index) + __field(unsigned int, order) + __field(unsigned int, size) + __field(unsigned int, async_size) + __field(unsigned int, ra_pages) + ), + + TP_fast_assign( + __entry->i_ino = inode->i_ino; + __entry->s_dev = inode->i_sb->s_dev; + __entry->index = index; + __entry->order = ra->order; + __entry->size = ra->size; + __entry->async_size = ra->async_size; + __entry->ra_pages = ra->ra_pages; + ), + + TP_printk( + "dev=%d:%d ino=%lx index=%lu order=%u size=%u async_size=%u ra_pages=%u", + MAJOR(__entry->s_dev), MINOR(__entry->s_dev), __entry->i_ino, + __entry->index, __entry->order, __entry->size, + __entry->async_size, __entry->ra_pages + ) +); + +DECLARE_EVENT_CLASS(page_cache_ra_op, + TP_PROTO(struct inode *inode, pgoff_t index, struct file_ra_state *ra, + unsigned long req_count), + + TP_ARGS(inode, index, ra, req_count), + + TP_STRUCT__entry( + __field(unsigned long, i_ino) + __field(dev_t, s_dev) + __field(pgoff_t, index) + __field(unsigned int, order) + __field(unsigned int, size) + __field(unsigned int, async_size) + __field(unsigned int, ra_pages) + __field(unsigned int, mmap_miss) + __field(loff_t, prev_pos) + __field(unsigned long, req_count) + ), + + TP_fast_assign( + __entry->i_ino = inode->i_ino; + __entry->s_dev = inode->i_sb->s_dev; + __entry->index = index; + __entry->order = ra->order; + __entry->size = ra->size; + __entry->async_size = ra->async_size; + __entry->ra_pages = ra->ra_pages; + __entry->mmap_miss = ra->mmap_miss; + __entry->prev_pos = ra->prev_pos; + __entry->req_count = req_count; + ), + + TP_printk( + "dev=%d:%d ino=%lx index=%lu req_count=%lu order=%u size=%u async_size=%u ra_pages=%u mmap_miss=%u prev_pos=%lld", + MAJOR(__entry->s_dev), MINOR(__entry->s_dev), __entry->i_ino, + __entry->index, __entry->req_count, __entry->order, + __entry->size, __entry->async_size, __entry->ra_pages, + __entry->mmap_miss, __entry->prev_pos + ) +); + +DEFINE_EVENT(page_cache_ra_op, page_cache_sync_ra, + TP_PROTO(struct inode *inode, pgoff_t index, struct file_ra_state *ra, + unsigned long req_count), + TP_ARGS(inode, index, ra, req_count) +); + +DEFINE_EVENT(page_cache_ra_op, page_cache_async_ra, + TP_PROTO(struct inode *inode, pgoff_t index, struct file_ra_state *ra, + unsigned long req_count), + TP_ARGS(inode, index, ra, req_count) +); + +#endif /* _TRACE_FILEMAP_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From e7a5f249e6db3b41a4618763e0f840639f3578f4 Mon Sep 17 00:00:00 2001 From: Chanwon Park Date: Mon, 8 Sep 2025 19:04:10 +0900 Subject: mm: re-enable kswapd when memory pressure subsides or demotion is toggled If kswapd fails to reclaim pages from a node MAX_RECLAIM_RETRIES in a row, kswapd on that node gets disabled. That is, the system won't wakeup kswapd for that node until page reclamation is observed at least once. That reclamation is mostly done by direct reclaim, which in turn enables kswapd back. However, on systems with CXL memory nodes, workloads with high anon page usage can disable kswapd indefinitely, without triggering direct reclaim. This can be reproduced with following steps: numa node 0 (32GB memory, 48 CPUs) numa node 2~5 (512GB CXL memory, 128GB each) (numa node 1 is disabled) swap space 8GB 1) Set /sys/kernel/mm/demotion_enabled to 0. 2) Set /proc/sys/kernel/numa_balancing to 0. 3) Run a process that allocates and random accesses 500GB of anon pages. 4) Let the process exit normally. During 3), free memory on node 0 gets lower than low watermark, and kswapd runs and depletes swap space. Then, kswapd fails consecutively and gets disabled. Allocation afterwards happens on CXL memory, so node 0 never gains more memory pressure to trigger direct reclaim. After 4), kswapd on node 0 remains disabled, and tasks running on that node are unable to swap. If you turn on NUMA_BALANCING_MEMORY_TIERING and demotion now, it won't work properly since kswapd is disabled. To mitigate this problem, reset kswapd_failures to 0 on following conditions: a) ZONE_BELOW_HIGH bit of a zone in hopeless node with a fallback memory node gets cleared. b) demotion_enabled is changed from false to true. Rationale for a): ZONE_BELOW_HIGH bit being cleared might be a sign that the node may be reclaimable afterwards. This won't help much if the memory-hungry process keeps running without freeing anything, but at least the node will go back to reclaimable state when the process exits. Rationale for b): When demotion_enabled is false, kswapd can only reclaim anon pages by swapping them out to swap space. If demotion_enabled is turned on, kswapd can demote anon pages to another node for reclaiming. So, the original failure count for determining reclaimability is no longer valid. Since kswapd_failures resets may be missed by ++ operation, it is changed from int to atomic_t. [akpm@linux-foundation.org: tweak whitespace] Link: https://lkml.kernel.org/r/aL6qGi69jWXfPc4D@pcw-MS-7D22 Signed-off-by: Chanwon Park Cc: Brendan Jackman Cc: David Hildenbrand Cc: Johannes Weiner Cc: Lorenzo Stoakes Cc: Michal Hocko Cc: Qi Zheng Cc: Shakeel Butt Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 6c4eae96160d..7fb7331c5725 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1440,7 +1440,7 @@ typedef struct pglist_data { int kswapd_order; enum zone_type kswapd_highest_zoneidx; - int kswapd_failures; /* Number of 'reclaimed == 0' runs */ + atomic_t kswapd_failures; /* Number of 'reclaimed == 0' runs */ #ifdef CONFIG_COMPACTION int kcompactd_max_order; -- cgit v1.2.3 From 032c31127f27acb1b8152b512830ecef04ed2ebc Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Tue, 9 Sep 2025 13:13:57 -0700 Subject: mm: vm_event_item: explicit #include for THREAD_SIZE This header uses THREAD_SIZE, which is provided by the thread_info.h header but is not included in this header. Depending on the #include ordering in other files, this can produce preprocessor errors. Link: https://lkml.kernel.org/r/20250909201419.827638-1-briannorris@chromium.org Signed-off-by: Brian Norris Reviewed-by: Lorenzo Stoakes Cc: David Hildenbrand Cc: Liam Howlett Cc: Michal Hocko Cc: Mike Rapoport Cc: Suren Baghdasaryan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/vm_event_item.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 9e15a088ba38..92f80b4d69a6 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -2,6 +2,8 @@ #ifndef VM_EVENT_ITEM_H_INCLUDED #define VM_EVENT_ITEM_H_INCLUDED +#include + #ifdef CONFIG_ZONE_DMA #define DMA_ZONE(xx) xx##_DMA, #else -- cgit v1.2.3 From fa17bcd5f65ed702df001579cca8c885fa6bf3e7 Mon Sep 17 00:00:00 2001 From: Aristeu Rozanski Date: Tue, 26 Aug 2025 11:37:21 -0400 Subject: mm: make folio page count functions return unsigned As raised by Andrew [1], a folio/compound page never spans a negative number of pages. Consequently, let's use "unsigned long" instead of "long" consistently for folio_nr_pages(), folio_large_nr_pages() and compound_nr(). Using "unsigned long" as return value is fine, because even "(long)-folio_nr_pages()" will keep on working as expected. Using "unsigned int" instead would actually break these use cases. This patch takes the first step changing these to return unsigned long (and making drm_gem_get_pages() use the new types instead of replacing min()). In the future, we might want to make more callers of these functions to consistently use "unsigned long". Link: https://lore.kernel.org/linux-mm/20250503182858.5a02729fcffd6d4723afcfc2@linux-foundation.org/ Link: https://lkml.kernel.org/r/20250826153721.GA23292@cathedrallabs.org Link: https://lore.kernel.org/linux-mm/20250503182858.5a02729fcffd6d4723afcfc2@linux-foundation.org/ [1] Signed-off-by: Aristeu Rozanski Suggested-by: Andrew Morton Suggested-by: David Hildenbrand Acked-by: David Hildenbrand Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Cc: David Airlie Cc: Simona Vetter Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- include/linux/mm.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index da6e0abad2cb..8f5b4df9b166 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1018,12 +1018,12 @@ static inline unsigned int folio_large_order(const struct folio *folio) } #ifdef NR_PAGES_IN_LARGE_FOLIO -static inline long folio_large_nr_pages(const struct folio *folio) +static inline unsigned long folio_large_nr_pages(const struct folio *folio) { return folio->_nr_pages; } #else -static inline long folio_large_nr_pages(const struct folio *folio) +static inline unsigned long folio_large_nr_pages(const struct folio *folio) { return 1L << folio_large_order(folio); } @@ -2062,7 +2062,7 @@ static inline void set_page_links(struct page *page, enum zone_type zone, * * Return: A positive power of two. */ -static inline long folio_nr_pages(const struct folio *folio) +static inline unsigned long folio_nr_pages(const struct folio *folio) { if (!folio_test_large(folio)) return 1; @@ -2097,7 +2097,7 @@ static inline long folio_nr_pages(const struct folio *folio) * page. compound_nr() can be called on a tail page, and is defined to * return 1 in that case. */ -static inline long compound_nr(const struct page *page) +static inline unsigned long compound_nr(const struct page *page) { const struct folio *folio = (struct folio *)page; -- cgit v1.2.3 From 3a37469e5ac004905e4125bf60b43cc5216e83dc Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 10 Sep 2025 15:29:17 +0100 Subject: mm: constify compound_order() and page_size() Patch series "Small cleanups". These small cleanups can be applied now to reduce conflicts during the next merge window. They're all from various efforts to split struct page from other memdescs. Thanks to Vlastimil for the suggestion. This patch (of 3): These functions do not modify their arguments. Telling the compiler this may improve code generation, and allows us to pass const arguments from other functions. Link: https://lkml.kernel.org/r/20250910142923.2465470-1-willy@infradead.org Link: https://lkml.kernel.org/r/20250910142923.2465470-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan Acked-by: David Hildenbrand Acked-by: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/mm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 8f5b4df9b166..fcb1e72eea40 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1036,9 +1036,9 @@ static inline unsigned long folio_large_nr_pages(const struct folio *folio) * set before the order is initialised, or this may be a tail page. * See compaction.c for some good examples. */ -static inline unsigned int compound_order(struct page *page) +static inline unsigned int compound_order(const struct page *page) { - struct folio *folio = (struct folio *)page; + const struct folio *folio = (struct folio *)page; if (!test_bit(PG_head, &folio->flags.f)) return 0; @@ -1256,7 +1256,7 @@ int folio_mc_copy(struct folio *dst, struct folio *src); unsigned long nr_free_buffer_pages(void); /* Returns the number of bytes in this potentially compound page. */ -static inline unsigned long page_size(struct page *page) +static inline unsigned long page_size(const struct page *page) { return PAGE_SIZE << compound_order(page); } -- cgit v1.2.3 From 9d003dec972563efb8ce14c9962af3652d0e201d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 10 Sep 2025 15:29:19 +0100 Subject: mm: remove page->order We already use page->private for storing the order of a page while it's in the buddy allocator system; extend that to also storing the order while it's in the pcp_llist. Link: https://lkml.kernel.org/r/20250910142923.2465470-4-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Alexei Starovoitov Acked-by: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index f048dc80646e..6920c816f6c6 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -97,10 +97,7 @@ struct page { /* Or, free page */ struct list_head buddy_list; struct list_head pcp_list; - struct { - struct llist_node pcp_llist; - unsigned int order; - }; + struct llist_node pcp_llist; }; struct address_space *mapping; union { @@ -111,7 +108,8 @@ struct page { * @private: Mapping-private opaque data. * Usually used for buffer_heads if PagePrivate. * Used for swp_entry_t if swapcache flag set. - * Indicates order in the buddy system if PageBuddy. + * Indicates order in the buddy system if PageBuddy + * or on pcp_llist. */ unsigned long private; }; -- cgit v1.2.3 From d02ac836e4d6bdfd7d44927d01a4cd048ad4aba8 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sat, 13 Sep 2025 17:03:39 -0700 Subject: include/linux/pgtable.h: convert arch_enter_lazy_mmu_mode() and friends to static inlines For all the usual reasons, plus a new one. Calling (void)arch_enter_lazy_mmu_mode(); deservedly blows up. Cc: Balbir Singh Signed-off-by: Andrew Morton --- include/linux/pgtable.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 94249e671a7e..32e8457ad535 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -232,9 +232,9 @@ static inline int pmd_dirty(pmd_t pmd) * and the mode cannot be used in interrupt context. */ #ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE -#define arch_enter_lazy_mmu_mode() do {} while (0) -#define arch_leave_lazy_mmu_mode() do {} while (0) -#define arch_flush_lazy_mmu_mode() do {} while (0) +static inline void arch_enter_lazy_mmu_mode(void) {} +static inline void arch_leave_lazy_mmu_mode(void) {} +static inline void arch_flush_lazy_mmu_mode(void) {} #endif #ifndef pte_batch_hint -- cgit v1.2.3 From 59d4d36158ba3cdbce141d8e9261eea154d4c441 Mon Sep 17 00:00:00 2001 From: zhongjinji Date: Tue, 16 Sep 2025 00:29:45 +0800 Subject: mm/oom_kill: thaw the entire OOM victim process Patch series "Improvements to Victim Process Thawing and OOM Reaper Traversal Order", v10. This patch series focuses on optimizing victim process thawing and refining the traversal order of the OOM reaper. Since __thaw_task() is used to thaw a single thread of the victim, thawing only one thread cannot guarantee the exit of the OOM victim when it is frozen. Patch 1 thaw the entire process of the OOM victim to ensure that OOM victims are able to terminate themselves. Even if the oom_reaper is delayed, patch 2 is still beneficial for reaping processes with a large address space footprint, and it also greatly improves process_mrelease. This patch (of 10): OOM killer is a mechanism that selects and kills processes when the system runs out of memory to reclaim resources and keep the system stable. But the oom victim cannot terminate on its own when it is frozen, even if the OOM victim task is thawed through __thaw_task(). This is because __thaw_task() can only thaw a single OOM victim thread, and cannot thaw the entire OOM victim process. In addition, freezing_slow_path() determines whether a task is an OOM victim by checking the task's TIF_MEMDIE flag. When a task is identified as an OOM victim, the freezer bypasses both PM freezing and cgroup freezing states to thaw it. Historically, TIF_MEMDIE was a "this is the oom victim & it has access to memory reserves" flag in the past. It has that thread vs. process problems and tsk_is_oom_victim was introduced later to get rid of them and other issues as well as the guarantee that we can identify the oom victim's mm reliably for other oom_reaper. Therefore, thaw_process() is introduced to unfreeze all threads within the OOM victim process, ensuring that every thread is properly thawed. The freezer now uses tsk_is_oom_victim() to determine OOM victim status, allowing all victim threads to be unfrozen as necessary. With this change, the entire OOM victim process will be thawed when an OOM event occurs, ensuring that the victim can terminate on its own. Link: https://lkml.kernel.org/r/20250915162946.5515-1-zhongjinji@honor.com Link: https://lkml.kernel.org/r/20250915162946.5515-2-zhongjinji@honor.com Signed-off-by: zhongjinji Reviewed-by: Suren Baghdasaryan Acked-by: Shakeel Butt Acked-by: Michal Hocko Acked-by: Liam R. Howlett Cc: David Rientjes Cc: Len Brown Cc: Lorenzo Stoakes Cc: Thomas Gleinxer Signed-off-by: Andrew Morton --- include/linux/freezer.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index b303472255be..32884c9721e5 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -47,6 +47,7 @@ extern int freeze_processes(void); extern int freeze_kernel_threads(void); extern void thaw_processes(void); extern void thaw_kernel_threads(void); +extern void thaw_process(struct task_struct *p); static inline bool try_to_freeze(void) { @@ -80,6 +81,7 @@ static inline int freeze_processes(void) { return -ENOSYS; } static inline int freeze_kernel_threads(void) { return -ENOSYS; } static inline void thaw_processes(void) {} static inline void thaw_kernel_threads(void) {} +static inline void thaw_process(struct task_struct *p) {} static inline bool try_to_freeze(void) { return false; } -- cgit v1.2.3 From b9e2f58ffb84afcbba7e66f96ca14f98e0e88f26 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Mon, 15 Sep 2025 16:02:24 -0700 Subject: alloc_tag: mark inaccurate allocation counters in /proc/allocinfo output While rare, memory allocation profiling can contain inaccurate counters if slab object extension vector allocation fails. That allocation might succeed later but prior to that, slab allocations that would have used that object extension vector will not be accounted for. To indicate incorrect counters, "accurate:no" marker is appended to the call site line in the /proc/allocinfo output. Bump up /proc/allocinfo version to reflect the change in the file format and update documentation. Example output with invalid counters: allocinfo - version: 2.0 0 0 arch/x86/kernel/kdebugfs.c:105 func:create_setup_data_nodes 0 0 arch/x86/kernel/alternative.c:2090 func:alternatives_smp_module_add 0 0 arch/x86/kernel/alternative.c:127 func:__its_alloc accurate:no 0 0 arch/x86/kernel/fpu/regset.c:160 func:xstateregs_set 0 0 arch/x86/kernel/fpu/xstate.c:1590 func:fpstate_realloc 0 0 arch/x86/kernel/cpu/aperfmperf.c:379 func:arch_enable_hybrid_capacity_scale 0 0 arch/x86/kernel/cpu/amd_cache_disable.c:258 func:init_amd_l3_attrs 49152 48 arch/x86/kernel/cpu/mce/core.c:2709 func:mce_device_create accurate:no 32768 1 arch/x86/kernel/cpu/mce/genpool.c:132 func:mce_gen_pool_create 0 0 arch/x86/kernel/cpu/mce/amd.c:1341 func:mce_threshold_create_device [surenb@google.com: document new "accurate:no" marker] Fixes: 39d117e04d15 ("alloc_tag: mark inaccurate allocation counters in /proc/allocinfo output") [akpm@linux-foundation.org: simplification per Usama, reflow text] [akpm@linux-foundation.org: add newline to prevent docs warning, per Randy] Link: https://lkml.kernel.org/r/20250915230224.4115531-1-surenb@google.com Signed-off-by: Suren Baghdasaryan Suggested-by: Johannes Weiner Acked-by: Shakeel Butt Acked-by: Usama Arif Acked-by: Johannes Weiner Cc: David Rientjes Cc: David Wang <00107082@163.com> Cc: Kent Overstreet Cc: Pasha Tatashin Cc: Roman Gushchin Cc: Sourav Panda Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/alloc_tag.h | 12 ++++++++++++ include/linux/codetag.h | 5 ++++- 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h index 9ef2633e2c08..d40ac39bfbe8 100644 --- a/include/linux/alloc_tag.h +++ b/include/linux/alloc_tag.h @@ -221,6 +221,16 @@ static inline void alloc_tag_sub(union codetag_ref *ref, size_t bytes) ref->ct = NULL; } +static inline void alloc_tag_set_inaccurate(struct alloc_tag *tag) +{ + tag->ct.flags |= CODETAG_FLAG_INACCURATE; +} + +static inline bool alloc_tag_is_inaccurate(struct alloc_tag *tag) +{ + return !!(tag->ct.flags & CODETAG_FLAG_INACCURATE); +} + #define alloc_tag_record(p) ((p) = current->alloc_tag) #else /* CONFIG_MEM_ALLOC_PROFILING */ @@ -230,6 +240,8 @@ static inline bool mem_alloc_profiling_enabled(void) { return false; } static inline void alloc_tag_add(union codetag_ref *ref, struct alloc_tag *tag, size_t bytes) {} static inline void alloc_tag_sub(union codetag_ref *ref, size_t bytes) {} +static inline void alloc_tag_set_inaccurate(struct alloc_tag *tag) {} +static inline bool alloc_tag_is_inaccurate(struct alloc_tag *tag) { return false; } #define alloc_tag_record(p) do {} while (0) #endif /* CONFIG_MEM_ALLOC_PROFILING */ diff --git a/include/linux/codetag.h b/include/linux/codetag.h index 457ed8fd3214..8ea2a5f7c98a 100644 --- a/include/linux/codetag.h +++ b/include/linux/codetag.h @@ -16,13 +16,16 @@ struct module; #define CODETAG_SECTION_START_PREFIX "__start_" #define CODETAG_SECTION_STOP_PREFIX "__stop_" +/* codetag flags */ +#define CODETAG_FLAG_INACCURATE (1 << 0) + /* * An instance of this structure is created in a special ELF section at every * code location being tagged. At runtime, the special section is treated as * an array of these. */ struct codetag { - unsigned int flags; /* used in later patches */ + unsigned int flags; unsigned int lineno; const char *modname; const char *function; -- cgit v1.2.3 From ab152db3cae520154d572cff32e63de441672454 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 15 Sep 2025 20:35:05 -0700 Subject: mm/damon/core: implement damon_initialized() function Patch series "mm/damon: define and use DAMON initialization check function". DAMON is initialized in subsystem initialization time, by damon_init(). If DAMON API functions are called before the initialization, the system could crash. Actually such issues happened and were fixed [1] in the past. For the fix, DAMON API callers have updated to check if DAMON is initialized or not, using their own hacks. The hacks are unnecessarily duplicated on every DAMON API callers and therefore it would be difficult to reliably maintain in the long term. Make it reliable and easy to maintain. For this, implement a new DAMON core layer API function that returns if DAMON is successfully initialized. If it returns true, it means DAMON API functions are safe to be used. After the introduction of the new API, update DAMON API callers to use the new function instead of their own hacks. This patch (of 7): If DAMON is tried to be used when it is not yet successfully initialized, the caller could be crashed. DAMON core layer is not providing a reliable way to see if it is successfully initialized and therefore ready to be used, though. As a result, DAMON API callers are implementing their own hacks to see it. The hacks simply assume DAMON should be ready on module init time. It is not reliable as DAMON initialization can indeed fail if KMEM_CACHE() fails, and difficult to maintain as those are duplicates. Implement a core layer API function for better reliability and maintainability to replace the hacks with followup commits. Link: https://lkml.kernel.org/r/20250916033511.116366-2-sj@kernel.org Link: https://lkml.kernel.org/r/20250916033511.116366-2-sj@kernel.org Link: https://lore.kernel.org/20250909022238.2989-1-sj@kernel.org [1] Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/damon.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/damon.h b/include/linux/damon.h index aa7381be388c..cae8c613c5fc 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -938,6 +938,7 @@ static inline unsigned int damon_max_nr_accesses(const struct damon_attrs *attrs } +bool damon_initialized(void); int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive); int damon_stop(struct damon_ctx **ctxs, int nr_ctxs); bool damon_is_running(struct damon_ctx *ctx); -- cgit v1.2.3 From 473b73222f3d8cc66bcd840bf9c3260619620789 Mon Sep 17 00:00:00 2001 From: Dev Jain Date: Mon, 8 Sep 2025 13:20:28 +0530 Subject: mm: drop all references of writable and SCAN_PAGE_RO Now that all actionable outcomes from checking pte_write() are gone, drop the related references. Link: https://lkml.kernel.org/r/20250908075028.38431-3-dev.jain@arm.com Signed-off-by: Dev Jain Acked-by: David Hildenbrand Acked-by: Zi Yan Reviewed-by: Kiryl Shutsemau Reviewed-by: Lorenzo Stoakes Reviewed-by: Baolin Wang Reviewed-by: Zach O'Keefe Reviewed-by: Anshuman Khandual Cc: Baolin Wang Cc: Barry Song Cc: Hugh Dickins Cc: Liam Howlett Cc: Mariano Pache Cc: Matthew Wilcox (Oracle) Cc: Ryan Roberts Cc: Wei Yang Signed-off-by: Andrew Morton --- include/trace/events/huge_memory.h | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h index 2305df6cb485..dd94d14a2427 100644 --- a/include/trace/events/huge_memory.h +++ b/include/trace/events/huge_memory.h @@ -19,7 +19,6 @@ EM( SCAN_PTE_NON_PRESENT, "pte_non_present") \ EM( SCAN_PTE_UFFD_WP, "pte_uffd_wp") \ EM( SCAN_PTE_MAPPED_HUGEPAGE, "pte_mapped_hugepage") \ - EM( SCAN_PAGE_RO, "no_writable_page") \ EM( SCAN_LACK_REFERENCED_PAGE, "lack_referenced_page") \ EM( SCAN_PAGE_NULL, "page_null") \ EM( SCAN_SCAN_ABORT, "scan_aborted") \ @@ -55,15 +54,14 @@ SCAN_STATUS TRACE_EVENT(mm_khugepaged_scan_pmd, - TP_PROTO(struct mm_struct *mm, struct folio *folio, bool writable, + TP_PROTO(struct mm_struct *mm, struct folio *folio, int referenced, int none_or_zero, int status, int unmapped), - TP_ARGS(mm, folio, writable, referenced, none_or_zero, status, unmapped), + TP_ARGS(mm, folio, referenced, none_or_zero, status, unmapped), TP_STRUCT__entry( __field(struct mm_struct *, mm) __field(unsigned long, pfn) - __field(bool, writable) __field(int, referenced) __field(int, none_or_zero) __field(int, status) @@ -73,17 +71,15 @@ TRACE_EVENT(mm_khugepaged_scan_pmd, TP_fast_assign( __entry->mm = mm; __entry->pfn = folio ? folio_pfn(folio) : -1; - __entry->writable = writable; __entry->referenced = referenced; __entry->none_or_zero = none_or_zero; __entry->status = status; __entry->unmapped = unmapped; ), - TP_printk("mm=%p, scan_pfn=0x%lx, writable=%d, referenced=%d, none_or_zero=%d, status=%s, unmapped=%d", + TP_printk("mm=%p, scan_pfn=0x%lx, referenced=%d, none_or_zero=%d, status=%s, unmapped=%d", __entry->mm, __entry->pfn, - __entry->writable, __entry->referenced, __entry->none_or_zero, __print_symbolic(__entry->status, SCAN_STATUS), @@ -117,15 +113,14 @@ TRACE_EVENT(mm_collapse_huge_page, TRACE_EVENT(mm_collapse_huge_page_isolate, TP_PROTO(struct folio *folio, int none_or_zero, - int referenced, bool writable, int status), + int referenced, int status), - TP_ARGS(folio, none_or_zero, referenced, writable, status), + TP_ARGS(folio, none_or_zero, referenced, status), TP_STRUCT__entry( __field(unsigned long, pfn) __field(int, none_or_zero) __field(int, referenced) - __field(bool, writable) __field(int, status) ), @@ -133,15 +128,13 @@ TRACE_EVENT(mm_collapse_huge_page_isolate, __entry->pfn = folio ? folio_pfn(folio) : -1; __entry->none_or_zero = none_or_zero; __entry->referenced = referenced; - __entry->writable = writable; __entry->status = status; ), - TP_printk("scan_pfn=0x%lx, none_or_zero=%d, referenced=%d, writable=%d, status=%s", + TP_printk("scan_pfn=0x%lx, none_or_zero=%d, referenced=%d, status=%s", __entry->pfn, __entry->none_or_zero, __entry->referenced, - __entry->writable, __print_symbolic(__entry->status, SCAN_STATUS)) ); -- cgit v1.2.3 From d6d673dd1e92b2bed0096e7e7e9fe5d7e7d2156c Mon Sep 17 00:00:00 2001 From: Ashwini Sahu Date: Mon, 8 Sep 2025 15:26:45 +0530 Subject: uapi: vduse: fix typo in comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a spelling mistake in vduse.h: "regsion" → "region" in the documentation for struct vduse_iova_info. No functional change. Signed-off-by: Ashwini Sahu Message-Id: <20250908095645.610336-1-ashwini@wisig.com> Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/vduse.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/vduse.h b/include/uapi/linux/vduse.h index 68a627d04afa..10ad71aa00d6 100644 --- a/include/uapi/linux/vduse.h +++ b/include/uapi/linux/vduse.h @@ -237,7 +237,7 @@ struct vduse_iova_umem { * struct vduse_iova_info - information of one IOVA region * @start: start of the IOVA region * @last: last of the IOVA region - * @capability: capability of the IOVA regsion + * @capability: capability of the IOVA region * @reserved: for future use, needs to be initialized to zero * * Structure used by VDUSE_IOTLB_GET_INFO ioctl to get information of -- cgit v1.2.3 From a05e4e935a6689542d86162b33a484cc704ce39a Mon Sep 17 00:00:00 2001 From: Alyssa Ross Date: Fri, 29 Aug 2025 17:09:44 +0200 Subject: virtio_config: clarify output parameters This was ambiguous enough for a broken patch (206cc44588f7 ("virtio: reject shm region if length is zero")) to make it into the kernel, so make it clearer. Link: https://lore.kernel.org/r/20250816071600-mutt-send-email-mst@kernel.org/ Signed-off-by: Alyssa Ross Message-Id: <20250829150944.233505-1-hi@alyssa.is> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 8bf156dde554..7427b79d6f3d 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -193,14 +193,15 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev, } static inline void virtio_get_features(struct virtio_device *vdev, - u64 *features) + u64 *features_out) { if (vdev->config->get_extended_features) { - vdev->config->get_extended_features(vdev, features); + vdev->config->get_extended_features(vdev, features_out); return; } - virtio_features_from_u64(features, vdev->config->get_features(vdev)); + virtio_features_from_u64(features_out, + vdev->config->get_features(vdev)); } /** @@ -326,11 +327,11 @@ int virtqueue_set_affinity(struct virtqueue *vq, const struct cpumask *cpu_mask) static inline bool virtio_get_shm_region(struct virtio_device *vdev, - struct virtio_shm_region *region, u8 id) + struct virtio_shm_region *region_out, u8 id) { if (!vdev->config->get_shm_region) return false; - return vdev->config->get_shm_region(vdev, region, id); + return vdev->config->get_shm_region(vdev, region_out, id); } static inline bool virtio_is_little_endian(struct virtio_device *vdev) -- cgit v1.2.3 From 2ee3a75e42081db3d951c0893f5d654f16d1c0e8 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 18 Jul 2025 11:26:15 +1000 Subject: nfsd: discard nfsd_file_get_local() This interface was deprecated by commit e6f7e1487ab5 ("nfs_localio: simplify interface to nfsd for getting nfsd_file") and is now unused. So let's remove it. Signed-off-by: NeilBrown Reviewed-by: Mike Snitzer Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/nfslocalio.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h index 5c7c92659e73..59ea90bd136b 100644 --- a/include/linux/nfslocalio.h +++ b/include/linux/nfslocalio.h @@ -63,7 +63,6 @@ struct nfsd_localio_operations { struct nfsd_file __rcu **pnf, const fmode_t); struct net *(*nfsd_file_put_local)(struct nfsd_file __rcu **); - struct nfsd_file *(*nfsd_file_get_local)(struct nfsd_file *); struct file *(*nfsd_file_file)(struct nfsd_file *); } ____cacheline_aligned; -- cgit v1.2.3 From c97b737ef8f10f28424822c139e3b22b9e9bcc2b Mon Sep 17 00:00:00 2001 From: Sergey Bashirov Date: Fri, 18 Jul 2025 11:09:56 +0300 Subject: sunrpc: Change ret code of xdr_stream_decode_opaque_fixed Since the opaque is fixed in size, the caller already knows how many bytes were decoded, on success. Thus, xdr_stream_decode_opaque_fixed() doesn't need to return that value. And, xdr_stream_decode_u32 and _u64 both return zero on success. This patch simplifies the caller's error checking to avoid potential integer promotion issues. Suggested-by: Dan Carpenter Signed-off-by: Sergey Bashirov Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 8a9ec617cf66..8d354015d762 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -721,7 +721,7 @@ xdr_stream_decode_u64(struct xdr_stream *xdr, __u64 *ptr) * @len: size of buffer pointed to by @ptr * * Return values: - * On success, returns size of object stored in @ptr + * %0 on success * %-EBADMSG on XDR buffer overflow */ static inline ssize_t @@ -732,7 +732,7 @@ xdr_stream_decode_opaque_fixed(struct xdr_stream *xdr, void *ptr, size_t len) if (unlikely(!p)) return -EBADMSG; xdr_decode_opaque_fixed(p, ptr, len); - return len; + return 0; } /** -- cgit v1.2.3 From afc5b36e29b95fbd31a60b9630d148857e5e513d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 30 Jul 2025 09:24:32 -0400 Subject: vfs: add ATTR_CTIME_SET flag When ATTR_ATIME_SET and ATTR_MTIME_SET are set in the ia_valid mask, the notify_change() logic takes that to mean that the request should set those values explicitly, and not override them with "now". With the advent of delegated timestamps, similar functionality is needed for the ctime. Add a ATTR_CTIME_SET flag, and use that to indicate that the ctime should be accepted as-is. Also, clean up the if statements to eliminate the extra negatives. In setattr_copy() and setattr_copy_mgtime() use inode_set_ctime_deleg() when ATTR_CTIME_SET is set, instead of basing the decision on ATTR_DELEG. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 601d036a6c78..74f2bfc51926 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -238,6 +238,7 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define ATTR_ATIME_SET (1 << 7) #define ATTR_MTIME_SET (1 << 8) #define ATTR_FORCE (1 << 9) /* Not a change, but a change it */ +#define ATTR_CTIME_SET (1 << 10) #define ATTR_KILL_SUID (1 << 11) #define ATTR_KILL_SGID (1 << 12) #define ATTR_FILE (1 << 13) -- cgit v1.2.3 From 898374fdd7f06fa4c4a66e8be3135efeae6128d5 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 19 Aug 2025 14:04:02 -0400 Subject: nfsd: unregister with rpcbind when deleting a transport When a listener is added, a part of creation of transport also registers program/port with rpcbind. However, when the listener is removed, while transport goes away, rpcbind still has the entry for that port/type. When deleting the transport, unregister with rpcbind when appropriate. ---v2 created a new xpt_flag XPT_RPCB_UNREG to mark TCP and UDP transport and at xprt destroy send rpcbind unregister if flag set. Suggested-by: Chuck Lever Fixes: d093c9089260 ("nfsd: fix management of listener transports") Cc: stable@vger.kernel.org Signed-off-by: Olga Kornievskaia Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_xprt.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 369a89aea186..2b886f7eb295 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -104,6 +104,9 @@ enum { * it has access to. It is NOT counted * in ->sv_tmpcnt. */ + XPT_RPCB_UNREG, /* transport that needs unregistering + * with rpcbind (TCP, UDP) on destroy + */ }; /* -- cgit v1.2.3 From d73d06dac604043b94a5f18ebb6a69da1b867702 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 20 Aug 2025 10:27:28 -0400 Subject: SUNRPC: Move the svc_rpcb_cleanup() call sites Clean up: because svc_rpcb_cleanup() and svc_xprt_destroy_all() are always invoked in pairs, we can deduplicate code by moving the svc_rpcb_cleanup() call sites into svc_xprt_destroy_all(). Tested-by: Olga Kornievskaia Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_xprt.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 2b886f7eb295..da2a2531e110 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -168,7 +168,8 @@ int svc_xprt_create(struct svc_serv *serv, const char *xprt_name, struct net *net, const int family, const unsigned short port, int flags, const struct cred *cred); -void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net); +void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net, + bool unregister); void svc_xprt_received(struct svc_xprt *xprt); void svc_xprt_enqueue(struct svc_xprt *xprt); void svc_xprt_put(struct svc_xprt *xprt); -- cgit v1.2.3 From e0d3bba84ff8b82d4e8820856a7850afb17c14f9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 19 Sep 2025 12:23:25 +0200 Subject: wifi: cfg80211: remove IEEE80211_CHAN_{1,2,4,8,16}MHZ flags These were used by S1G for older chandef representation, but are no longer needed. Clean them up, even if we can't drop them from the userspace API entirely. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 1c041ce7a03b..781624f5913a 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -101,16 +101,6 @@ struct wiphy; * @IEEE80211_CHAN_NO_10MHZ: 10 MHz bandwidth is not permitted * on this channel. * @IEEE80211_CHAN_NO_HE: HE operation is not permitted on this channel. - * @IEEE80211_CHAN_1MHZ: 1 MHz bandwidth is permitted - * on this channel. - * @IEEE80211_CHAN_2MHZ: 2 MHz bandwidth is permitted - * on this channel. - * @IEEE80211_CHAN_4MHZ: 4 MHz bandwidth is permitted - * on this channel. - * @IEEE80211_CHAN_8MHZ: 8 MHz bandwidth is permitted - * on this channel. - * @IEEE80211_CHAN_16MHZ: 16 MHz bandwidth is permitted - * on this channel. * @IEEE80211_CHAN_NO_320MHZ: If the driver supports 320 MHz on the band, * this flag indicates that a 320 MHz channel cannot use this * channel as the control or any of the secondary channels. @@ -152,11 +142,7 @@ enum ieee80211_channel_flags { IEEE80211_CHAN_NO_20MHZ = BIT(11), IEEE80211_CHAN_NO_10MHZ = BIT(12), IEEE80211_CHAN_NO_HE = BIT(13), - IEEE80211_CHAN_1MHZ = BIT(14), - IEEE80211_CHAN_2MHZ = BIT(15), - IEEE80211_CHAN_4MHZ = BIT(16), - IEEE80211_CHAN_8MHZ = BIT(17), - IEEE80211_CHAN_16MHZ = BIT(18), + /* can use free bits here */ IEEE80211_CHAN_NO_320MHZ = BIT(19), IEEE80211_CHAN_NO_EHT = BIT(20), IEEE80211_CHAN_DFS_CONCURRENT = BIT(21), -- cgit v1.2.3 From d9a2211dd3aee3ef29fc675f70a1941bc3f4f51f Mon Sep 17 00:00:00 2001 From: Haixu Cui Date: Mon, 8 Sep 2025 17:23:46 +0800 Subject: virtio: Add ID for virtio SPI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add VIRTIO_ID_SPI definition for virtio SPI. Signed-off-by: Haixu Cui Reviewed-by: Viresh Kumar Reviewed-by: Alex Bennée Link: https://patch.msgid.link/20250908092348.1283552-2-quic_haixcui@quicinc.com Signed-off-by: Mark Brown --- include/uapi/linux/virtio_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index 7aa2eb766205..6c12db16faa3 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -68,6 +68,7 @@ #define VIRTIO_ID_AUDIO_POLICY 39 /* virtio audio policy */ #define VIRTIO_ID_BT 40 /* virtio bluetooth */ #define VIRTIO_ID_GPIO 41 /* virtio gpio */ +#define VIRTIO_ID_SPI 45 /* virtio spi */ /* * Virtio Transitional IDs -- cgit v1.2.3 From 6a1f3390fafeafe130b8128b3047452b92911a98 Mon Sep 17 00:00:00 2001 From: Haixu Cui Date: Mon, 8 Sep 2025 17:23:47 +0800 Subject: virtio-spi: Add virtio-spi.h Add virtio-spi.h header for virtio SPI. Signed-off-by: Haixu Cui Link: https://patch.msgid.link/20250908092348.1283552-3-quic_haixcui@quicinc.com Signed-off-by: Mark Brown --- include/uapi/linux/virtio_spi.h | 181 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 181 insertions(+) create mode 100644 include/uapi/linux/virtio_spi.h (limited to 'include') diff --git a/include/uapi/linux/virtio_spi.h b/include/uapi/linux/virtio_spi.h new file mode 100644 index 000000000000..8ab3c970cdd3 --- /dev/null +++ b/include/uapi/linux/virtio_spi.h @@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Copyright (C) 2023 OpenSynergy GmbH + * Copyright (C) 2025 Qualcomm Innovation Center, Inc. All rights reserved. + */ +#ifndef _LINUX_VIRTIO_VIRTIO_SPI_H +#define _LINUX_VIRTIO_VIRTIO_SPI_H + +#include +#include +#include +#include + +/* Sample data on trailing clock edge */ +#define VIRTIO_SPI_CPHA _BITUL(0) +/* Clock is high when IDLE */ +#define VIRTIO_SPI_CPOL _BITUL(1) +/* Chip Select is active high */ +#define VIRTIO_SPI_CS_HIGH _BITUL(2) +/* Transmit LSB first */ +#define VIRTIO_SPI_MODE_LSB_FIRST _BITUL(3) +/* Loopback mode */ +#define VIRTIO_SPI_MODE_LOOP _BITUL(4) + +/** + * struct virtio_spi_config - All config fields are read-only for the + * Virtio SPI driver + * @cs_max_number: maximum number of chipselect the host SPI controller + * supports. + * @cs_change_supported: indicates if the host SPI controller supports to toggle + * chipselect after each transfer in one message: + * 0: unsupported, chipselect will be kept in active state throughout the + * message transaction; + * 1: supported. + * Note: Message here contains a sequence of SPI transfers. + * @tx_nbits_supported: indicates the supported number of bit for writing: + * bit 0: DUAL (2-bit transfer), 1 for supported + * bit 1: QUAD (4-bit transfer), 1 for supported + * bit 2: OCTAL (8-bit transfer), 1 for supported + * other bits are reserved as 0, 1-bit transfer is always supported. + * @rx_nbits_supported: indicates the supported number of bit for reading: + * bit 0: DUAL (2-bit transfer), 1 for supported + * bit 1: QUAD (4-bit transfer), 1 for supported + * bit 2: OCTAL (8-bit transfer), 1 for supported + * other bits are reserved as 0, 1-bit transfer is always supported. + * @bits_per_word_mask: mask indicating which values of bits_per_word are + * supported. If not set, no limitation for bits_per_word. + * @mode_func_supported: indicates the following features are supported or not: + * bit 0-1: CPHA feature + * 0b00: invalid, should support as least one CPHA setting + * 0b01: supports CPHA=0 only + * 0b10: supports CPHA=1 only + * 0b11: supports CPHA=0 and CPHA=1. + * bit 2-3: CPOL feature + * 0b00: invalid, should support as least one CPOL setting + * 0b01: supports CPOL=0 only + * 0b10: supports CPOL=1 only + * 0b11: supports CPOL=0 and CPOL=1. + * bit 4: chipselect active high feature, 0 for unsupported and 1 for + * supported, chipselect active low is supported by default. + * bit 5: LSB first feature, 0 for unsupported and 1 for supported, + * MSB first is supported by default. + * bit 6: loopback mode feature, 0 for unsupported and 1 for supported, + * normal mode is supported by default. + * @max_freq_hz: the maximum clock rate supported in Hz unit, 0 means no + * limitation for transfer speed. + * @max_word_delay_ns: the maximum word delay supported, in nanoseconds. + * A value of 0 indicates that word delay is unsupported. + * Each transfer may consist of a sequence of words. + * @max_cs_setup_ns: the maximum delay supported after chipselect is asserted, + * in ns unit, 0 means delay is not supported to introduce after chipselect is + * asserted. + * @max_cs_hold_ns: the maximum delay supported before chipselect is deasserted, + * in ns unit, 0 means delay is not supported to introduce before chipselect + * is deasserted. + * @max_cs_incative_ns: maximum delay supported after chipselect is deasserted, + * in ns unit, 0 means delay is not supported to introduce after chipselect is + * deasserted. + */ +struct virtio_spi_config { + __u8 cs_max_number; + __u8 cs_change_supported; +#define VIRTIO_SPI_RX_TX_SUPPORT_DUAL _BITUL(0) +#define VIRTIO_SPI_RX_TX_SUPPORT_QUAD _BITUL(1) +#define VIRTIO_SPI_RX_TX_SUPPORT_OCTAL _BITUL(2) + __u8 tx_nbits_supported; + __u8 rx_nbits_supported; + __le32 bits_per_word_mask; +#define VIRTIO_SPI_MF_SUPPORT_CPHA_0 _BITUL(0) +#define VIRTIO_SPI_MF_SUPPORT_CPHA_1 _BITUL(1) +#define VIRTIO_SPI_MF_SUPPORT_CPOL_0 _BITUL(2) +#define VIRTIO_SPI_MF_SUPPORT_CPOL_1 _BITUL(3) +#define VIRTIO_SPI_MF_SUPPORT_CS_HIGH _BITUL(4) +#define VIRTIO_SPI_MF_SUPPORT_LSB_FIRST _BITUL(5) +#define VIRTIO_SPI_MF_SUPPORT_LOOPBACK _BITUL(6) + __le32 mode_func_supported; + __le32 max_freq_hz; + __le32 max_word_delay_ns; + __le32 max_cs_setup_ns; + __le32 max_cs_hold_ns; + __le32 max_cs_inactive_ns; +}; + +/** + * struct spi_transfer_head - virtio SPI transfer descriptor + * @chip_select_id: chipselect index the SPI transfer used. + * @bits_per_word: the number of bits in each SPI transfer word. + * @cs_change: whether to deselect device after finishing this transfer + * before starting the next transfer, 0 means cs keep asserted and + * 1 means cs deasserted then asserted again. + * @tx_nbits: bus width for write transfer. + * 0,1: bus width is 1, also known as SINGLE + * 2 : bus width is 2, also known as DUAL + * 4 : bus width is 4, also known as QUAD + * 8 : bus width is 8, also known as OCTAL + * other values are invalid. + * @rx_nbits: bus width for read transfer. + * 0,1: bus width is 1, also known as SINGLE + * 2 : bus width is 2, also known as DUAL + * 4 : bus width is 4, also known as QUAD + * 8 : bus width is 8, also known as OCTAL + * other values are invalid. + * @reserved: for future use. + * @mode: SPI transfer mode. + * bit 0: CPHA, determines the timing (i.e. phase) of the data + * bits relative to the clock pulses.For CPHA=0, the + * "out" side changes the data on the trailing edge of the + * preceding clock cycle, while the "in" side captures the data + * on (or shortly after) the leading edge of the clock cycle. + * For CPHA=1, the "out" side changes the data on the leading + * edge of the current clock cycle, while the "in" side + * captures the data on (or shortly after) the trailing edge of + * the clock cycle. + * bit 1: CPOL, determines the polarity of the clock. CPOL=0 is a + * clock which idles at 0, and each cycle consists of a pulse + * of 1. CPOL=1 is a clock which idles at 1, and each cycle + * consists of a pulse of 0. + * bit 2: CS_HIGH, if 1, chip select active high, else active low. + * bit 3: LSB_FIRST, determines per-word bits-on-wire, if 0, MSB + * first, else LSB first. + * bit 4: LOOP, loopback mode. + * @freq: the transfer speed in Hz. + * @word_delay_ns: delay to be inserted between consecutive words of a + * transfer, in ns unit. + * @cs_setup_ns: delay to be introduced after CS is asserted, in ns + * unit. + * @cs_delay_hold_ns: delay to be introduced before CS is deasserted + * for each transfer, in ns unit. + * @cs_change_delay_inactive_ns: delay to be introduced after CS is + * deasserted and before next asserted, in ns unit. + */ +struct spi_transfer_head { + __u8 chip_select_id; + __u8 bits_per_word; + __u8 cs_change; + __u8 tx_nbits; + __u8 rx_nbits; + __u8 reserved[3]; + __le32 mode; + __le32 freq; + __le32 word_delay_ns; + __le32 cs_setup_ns; + __le32 cs_delay_hold_ns; + __le32 cs_change_delay_inactive_ns; +}; + +/** + * struct spi_transfer_result - virtio SPI transfer result + * @result: Transfer result code. + * VIRTIO_SPI_TRANS_OK: Transfer successful. + * VIRTIO_SPI_PARAM_ERR: Parameter error. + * VIRTIO_SPI_TRANS_ERR: Transfer error. + */ +struct spi_transfer_result { +#define VIRTIO_SPI_TRANS_OK 0 +#define VIRTIO_SPI_PARAM_ERR 1 +#define VIRTIO_SPI_TRANS_ERR 2 + __u8 result; +}; + +#endif /* #ifndef _LINUX_VIRTIO_VIRTIO_SPI_H */ -- cgit v1.2.3 From 099f942182e3695554cba44e4bafb08a4111b50f Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Mon, 15 Sep 2025 20:37:20 +0200 Subject: spi: keep track of number of chipselects in spi_device There are several places where we need to iterate over a device's chipselect. To be able to do it efficiently, store the number of chipselects in spi_device, like we do for controllers. Since we now use a device supplied value, add a check to make sure it isn't more than we can support. Signed-off-by: Jonas Gorski Link: https://patch.msgid.link/20250915183725.219473-3-jonas.gorski@gmail.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index e9ea43234d9a..49c048277e97 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -170,6 +170,7 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg, * two delays will be added up. * @chip_select: Array of physical chipselect, spi->chipselect[i] gives * the corresponding physical CS for logical CS i. + * @num_chipselect: Number of physical chipselects used. * @cs_index_mask: Bit mask of the active chipselect(s) in the chipselect array * @cs_gpiod: Array of GPIO descriptors of the corresponding chipselect lines * (optional, NULL when not using a GPIO line) @@ -229,6 +230,7 @@ struct spi_device { struct spi_delay cs_inactive; u8 chip_select[SPI_CS_CNT_MAX]; + u8 num_chipselect; /* * Bit mask of the chipselect(s) that the driver need to use from @@ -315,7 +317,7 @@ static inline bool spi_is_csgpiod(struct spi_device *spi) { u8 idx; - for (idx = 0; idx < SPI_CS_CNT_MAX; idx++) { + for (idx = 0; idx < spi->num_chipselect; idx++) { if (spi_get_csgpiod(spi, idx)) return true; } -- cgit v1.2.3 From 08fda410bae41cc8dde9697f9104da525be53153 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Mon, 15 Sep 2025 20:37:24 +0200 Subject: spi: reduce device chip select limit again The spi chipselect limit SPI_CS_CNT_MAX was raised with commit 2f8c7c3715f2 ("spi: Raise limit on number of chip selects") from 4 to 16 to accommodate spi controllers with more than 4 chip selects, and then later to 24 with commit 96893cdd4760 ("spi: Raise limit on number of chip selects to 24"). Now that we removed SPI_CS_CNT_MAX limiting the chip selects of controllers, we can reduce the amount of chip selects per device again to 4, the original value. Signed-off-by: Jonas Gorski Link: https://patch.msgid.link/20250915183725.219473-7-jonas.gorski@gmail.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 49c048277e97..df4842abbc6f 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -21,7 +21,7 @@ #include /* Max no. of CS supported per spi device */ -#define SPI_CS_CNT_MAX 24 +#define SPI_CS_CNT_MAX 4 struct dma_chan; struct software_node; -- cgit v1.2.3 From e336ab509b43ea601801dfa05b4270023c3ed007 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Mon, 15 Sep 2025 20:37:25 +0200 Subject: spi: rename SPI_CS_CNT_MAX => SPI_DEVICE_CS_CNT_MAX Rename SPI_CS_CNT_MAX to SPI_DEVICE_CS_CNT_MAX to make it more obvious that this is the max number of CS per device supported, not per controller. Signed-off-by: Jonas Gorski Link: https://patch.msgid.link/20250915183725.219473-8-jonas.gorski@gmail.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index df4842abbc6f..cb2c2df31089 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -21,7 +21,7 @@ #include /* Max no. of CS supported per spi device */ -#define SPI_CS_CNT_MAX 4 +#define SPI_DEVICE_CS_CNT_MAX 4 struct dma_chan; struct software_node; @@ -229,7 +229,7 @@ struct spi_device { struct spi_delay cs_hold; struct spi_delay cs_inactive; - u8 chip_select[SPI_CS_CNT_MAX]; + u8 chip_select[SPI_DEVICE_CS_CNT_MAX]; u8 num_chipselect; /* @@ -238,9 +238,9 @@ struct spi_device { * multiple chip selects & memories are connected in parallel * then more than one bit need to be set in cs_index_mask. */ - u32 cs_index_mask : SPI_CS_CNT_MAX; + u32 cs_index_mask : SPI_DEVICE_CS_CNT_MAX; - struct gpio_desc *cs_gpiod[SPI_CS_CNT_MAX]; /* Chip select gpio desc */ + struct gpio_desc *cs_gpiod[SPI_DEVICE_CS_CNT_MAX]; /* Chip select gpio desc */ /* * Likely need more hooks for more protocol options affecting how @@ -721,8 +721,8 @@ struct spi_controller { bool auto_runtime_pm; bool fallback; bool last_cs_mode_high; - s8 last_cs[SPI_CS_CNT_MAX]; - u32 last_cs_index_mask : SPI_CS_CNT_MAX; + s8 last_cs[SPI_DEVICE_CS_CNT_MAX]; + u32 last_cs_index_mask : SPI_DEVICE_CS_CNT_MAX; struct completion xfer_completion; size_t max_dma_len; -- cgit v1.2.3 From 96384a34dd15b0e7357a34af5c848d1115a35e62 Mon Sep 17 00:00:00 2001 From: Niranjan H Y Date: Fri, 12 Sep 2025 14:06:22 +0530 Subject: ASoc: tas2783A: machine driver amp utility for TI devices Machine driver amp utility file to initialize and support multiple tas2783a devices are added. Reviewed-by: Bard Liao Signed-off-by: Niranjan H Y -- v5: - removed empty line in soc_sdw_ti_amp.c Link: https://patch.msgid.link/20250912083624.804-3-niranjan.hy@ti.com Signed-off-by: Mark Brown --- include/sound/soc_sdw_utils.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h index 6049a5d0cfcd..3c5e9b2af7f1 100644 --- a/include/sound/soc_sdw_utils.h +++ b/include/sound/soc_sdw_utils.h @@ -248,5 +248,13 @@ int asoc_sdw_cs42l43_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_so int asoc_sdw_cs42l43_dmic_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); int asoc_sdw_cs_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); int asoc_sdw_maxim_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); +/* TI */ +int asoc_sdw_ti_amp_init(struct snd_soc_card *card, + struct snd_soc_dai_link *dai_links, + struct asoc_sdw_codec_info *info, + bool playback); +int asoc_sdw_ti_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); +int asoc_sdw_ti_amp_initial_settings(struct snd_soc_card *card, + const char *name_prefix); #endif -- cgit v1.2.3 From 8535bd38b4d58a3d19bf8e7dfa66e1d8180b316a Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 22 Sep 2025 14:42:35 +0200 Subject: cgroup: add missing ns_common include Add the missing include of the ns_common header. Acked-by: Tejun Heo Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/cgroup_namespace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/cgroup_namespace.h b/include/linux/cgroup_namespace.h index b7dbf4d623d2..81ccbdee425b 100644 --- a/include/linux/cgroup_namespace.h +++ b/include/linux/cgroup_namespace.h @@ -2,6 +2,8 @@ #ifndef _LINUX_CGROUP_NAMESPACE_H #define _LINUX_CGROUP_NAMESPACE_H +#include + struct cgroup_namespace { struct ns_common ns; struct user_namespace *user_ns; -- cgit v1.2.3 From d7610cb7454bbd8bf6d58f71b0ed57155d3c545f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 22 Sep 2025 14:42:36 +0200 Subject: ns: simplify ns_common_init() further Simply derive the ns operations from the namespace type. Acked-by: Thomas Gleixner Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/ns_common.h | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h index aea8528d799a..56492cd9ff8d 100644 --- a/include/linux/ns_common.h +++ b/include/linux/ns_common.h @@ -25,6 +25,17 @@ extern struct time_namespace init_time_ns; extern struct user_namespace init_user_ns; extern struct uts_namespace init_uts_ns; +extern const struct proc_ns_operations netns_operations; +extern const struct proc_ns_operations utsns_operations; +extern const struct proc_ns_operations ipcns_operations; +extern const struct proc_ns_operations pidns_operations; +extern const struct proc_ns_operations pidns_for_children_operations; +extern const struct proc_ns_operations userns_operations; +extern const struct proc_ns_operations mntns_operations; +extern const struct proc_ns_operations cgroupns_operations; +extern const struct proc_ns_operations timens_operations; +extern const struct proc_ns_operations timens_for_children_operations; + struct ns_common { struct dentry *stashed; const struct proc_ns_operations *ops; @@ -84,10 +95,21 @@ void __ns_common_free(struct ns_common *ns); struct user_namespace *: &init_user_ns, \ struct uts_namespace *: &init_uts_ns) -#define ns_common_init(__ns, __ops) \ - __ns_common_init(to_ns_common(__ns), __ops, (((__ns) == ns_init_ns(__ns)) ? ns_init_inum(__ns) : 0)) - -#define ns_common_init_inum(__ns, __ops, __inum) __ns_common_init(to_ns_common(__ns), __ops, __inum) +#define to_ns_operations(__ns) \ + _Generic((__ns), \ + struct cgroup_namespace *: (IS_ENABLED(CONFIG_CGROUPS) ? &cgroupns_operations : NULL), \ + struct ipc_namespace *: (IS_ENABLED(CONFIG_IPC_NS) ? &ipcns_operations : NULL), \ + struct mnt_namespace *: &mntns_operations, \ + struct net *: (IS_ENABLED(CONFIG_NET_NS) ? &netns_operations : NULL), \ + struct pid_namespace *: (IS_ENABLED(CONFIG_PID_NS) ? &pidns_operations : NULL), \ + struct time_namespace *: (IS_ENABLED(CONFIG_TIME_NS) ? &timens_operations : NULL), \ + struct user_namespace *: (IS_ENABLED(CONFIG_USER_NS) ? &userns_operations : NULL), \ + struct uts_namespace *: (IS_ENABLED(CONFIG_UTS_NS) ? &utsns_operations : NULL)) + +#define ns_common_init(__ns) \ + __ns_common_init(to_ns_common(__ns), to_ns_operations(__ns), (((__ns) == ns_init_ns(__ns)) ? ns_init_inum(__ns) : 0)) + +#define ns_common_init_inum(__ns, __inum) __ns_common_init(to_ns_common(__ns), to_ns_operations(__ns), __inum) #define ns_common_free(__ns) __ns_common_free(to_ns_common((__ns))) -- cgit v1.2.3 From 0950c64ae38661bd97127e9aa0522f1624f82006 Mon Sep 17 00:00:00 2001 From: Kriish Sharma Date: Mon, 22 Sep 2025 12:26:06 +0000 Subject: workqueue: fix texinfodocs warning for WQ_* flags reference Sphinx emitted a warning during make texinfodocs: WARNING: Inline literal start-string without end-string. This was caused by the trailing '*' in "%WQ_*" being parsed as reStructuredText markup in the kernel-doc comment. Escape the '*' in the comment so that Sphinx treats it as a literal character, resolving the warning. Signed-off-by: Kriish Sharma Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 71a9900c03c7..dabc351cc127 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -502,7 +502,7 @@ void workqueue_softirq_dead(unsigned int cpu); * min_active which is set to min(@max_active, %WQ_DFL_MIN_ACTIVE). This means * that the sum of per-node max_active's may be larger than @max_active. * - * For detailed information on %WQ_* flags, please refer to + * For detailed information on %WQ_\* flags, please refer to * Documentation/core-api/workqueue.rst. * * RETURNS: -- cgit v1.2.3 From cd875625b475dc4e28ac302ccb3422cc9f678f89 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 18 Sep 2025 17:33:18 -0700 Subject: ptp: document behavior of PTP_STRICT_FLAGS Commit 6138e687c7b6 ("ptp: Introduce strict checking of external time stamp options.") added the PTP_STRICT_FLAGS to the set of flags supported for the external timestamp request ioctl. It is only supported by PTP_EXTTS_REQUEST2, as it was introduced the introduction of the new ioctls. Further, the kernel has always set this flag for PTP_EXTTS_REQUEST2 regardless of whether or not the user requested the behavior. This effectively means that the flag is not useful for userspace. If the user issues a PTP_EXTTS_REQUEST ioctl, the flag is ignored due to not being supported on the old ioctl. If the user issues a PTP_EXTTS_REQUEST2 ioctl, the flag will be set by the kernel regardless of whether the user set the flag in their structure. Add a comment documenting this behavior in the uAPI header file. Signed-off-by: Jacob Keller Reviewed-by: Vadim Fedorenko Acked-by: Richard Cochran Reviewed-by: Kory Maincent Tested-by: James Clark Link: https://patch.msgid.link/20250918-jk-fix-bcm-phy-supported-flags-v1-3-747b60407c9c@intel.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/ptp_clock.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h index 18eefa6d93d6..2c3346e91dbe 100644 --- a/include/uapi/linux/ptp_clock.h +++ b/include/uapi/linux/ptp_clock.h @@ -37,6 +37,9 @@ /* * flag fields valid for the new PTP_EXTTS_REQUEST2 ioctl. + * + * Note: PTP_STRICT_FLAGS is always enabled by the kernel for + * PTP_EXTTS_REQUEST2 regardless of whether it is set by userspace. */ #define PTP_EXTTS_VALID_FLAGS (PTP_ENABLE_FEATURE | \ PTP_RISING_EDGE | \ -- cgit v1.2.3 From 6445bb832dc0ba0ab816e5bd79ef0209cdd46d3a Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 19 Sep 2025 08:35:28 +0000 Subject: tcp: Remove osk from __inet_hash() arg. __inet_hash() is called from inet_hash() and inet6_hash with osk NULL. Let's remove the 2nd arg from __inet_hash(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250919083706.1863217-2-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/inet_hashtables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index a3b32241c2f2..64bc8870db88 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -289,7 +289,7 @@ int inet_hashinfo2_init_mod(struct inet_hashinfo *h); bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk); bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, bool *found_dup_sk); -int __inet_hash(struct sock *sk, struct sock *osk); +int __inet_hash(struct sock *sk); int inet_hash(struct sock *sk); void inet_unhash(struct sock *sk); -- cgit v1.2.3 From 0ac44301e3bf4f5abc892ab530188ca95c61e59f Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 19 Sep 2025 08:35:29 +0000 Subject: tcp: Remove inet6_hash(). inet_hash() and inet6_hash() are exactly the same. Also, we do not need to export inet6_hash(). Let's consolidate the two into __inet_hash() and rename it to inet_hash(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250919083706.1863217-3-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/inet6_hashtables.h | 2 -- include/net/inet_hashtables.h | 1 - 2 files changed, 3 deletions(-) (limited to 'include') diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 1f985d2012ce..282e29237d93 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -167,8 +167,6 @@ struct sock *inet6_lookup(const struct net *net, struct sk_buff *skb, int doff, const struct in6_addr *daddr, const __be16 dport, const int dif); -int inet6_hash(struct sock *sk); - static inline bool inet6_match(const struct net *net, const struct sock *sk, const struct in6_addr *saddr, const struct in6_addr *daddr, diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 64bc8870db88..b787be651ce7 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -289,7 +289,6 @@ int inet_hashinfo2_init_mod(struct inet_hashinfo *h); bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk); bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, bool *found_dup_sk); -int __inet_hash(struct sock *sk); int inet_hash(struct sock *sk); void inet_unhash(struct sock *sk); -- cgit v1.2.3 From c9809f03c158f07eaa76c7dd3606fc0a184520f2 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 19 Sep 2025 14:08:58 +0200 Subject: mptcp: pm: netlink: only add server-side attr when true This attribute is a boolean. No need to add it to set it to 'false'. Indeed, the default value when this attribute is not set is naturally 'false'. A few bytes can then be saved by not adding this attribute if the connection is not on the server side. This prepares the future deprecation of its attribute, in favour of a new flag. Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250919-net-next-mptcp-server-side-flag-v1-1-a97a5d561a8b@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/mptcp_pm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/mptcp_pm.h b/include/uapi/linux/mptcp_pm.h index 7359d34da446..bf44a5cf5b5a 100644 --- a/include/uapi/linux/mptcp_pm.h +++ b/include/uapi/linux/mptcp_pm.h @@ -16,10 +16,10 @@ * good time to allocate memory and send ADD_ADDR if needed. Depending on the * traffic-patterns it can take a long time until the MPTCP_EVENT_ESTABLISHED * is sent. Attributes: token, family, saddr4 | saddr6, daddr4 | daddr6, - * sport, dport, server-side, [flags]. + * sport, dport, [server-side], [flags]. * @MPTCP_EVENT_ESTABLISHED: A MPTCP connection is established (can start new * subflows). Attributes: token, family, saddr4 | saddr6, daddr4 | daddr6, - * sport, dport, server-side, [flags]. + * sport, dport, [server-side], [flags]. * @MPTCP_EVENT_CLOSED: A MPTCP connection has stopped. Attribute: token. * @MPTCP_EVENT_ANNOUNCED: A new address has been announced by the peer. * Attributes: token, rem_id, family, daddr4 | daddr6 [, dport]. -- cgit v1.2.3 From 3d7ae91107b839ffeeb19730a2e2a46e0054bae8 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 19 Sep 2025 14:08:59 +0200 Subject: mptcp: pm: netlink: announce server-side flag Now that the 'flags' attribute is used, it seems interesting to add one flag for 'server-side', a boolean value. This is duplicating the info from the dedicated 'server-side' attribute, but it will be deprecated in the next commit, and removed in a few versions. Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250919-net-next-mptcp-server-side-flag-v1-2-a97a5d561a8b@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/mptcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 5fd5b4cf75ca..95d621f6d598 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -32,6 +32,7 @@ #define MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED _BITUL(1) #define MPTCP_PM_EV_FLAG_DENY_JOIN_ID0 _BITUL(0) +#define MPTCP_PM_EV_FLAG_SERVER_SIDE _BITUL(1) #define MPTCP_PM_ADDR_FLAG_SIGNAL (1 << 0) #define MPTCP_PM_ADDR_FLAG_SUBFLOW (1 << 1) -- cgit v1.2.3 From 5c967ebb551919661166305c0ff9422e41065c02 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 19 Sep 2025 14:09:02 +0200 Subject: mptcp: use _BITUL() instead of (1 << x) Simply to use the proper way to declare bits, and to align with all other flags declared in this file. No functional changes intended. Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250919-net-next-mptcp-server-side-flag-v1-5-a97a5d561a8b@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/mptcp.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 95d621f6d598..15eef878690b 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -34,11 +34,11 @@ #define MPTCP_PM_EV_FLAG_DENY_JOIN_ID0 _BITUL(0) #define MPTCP_PM_EV_FLAG_SERVER_SIDE _BITUL(1) -#define MPTCP_PM_ADDR_FLAG_SIGNAL (1 << 0) -#define MPTCP_PM_ADDR_FLAG_SUBFLOW (1 << 1) -#define MPTCP_PM_ADDR_FLAG_BACKUP (1 << 2) -#define MPTCP_PM_ADDR_FLAG_FULLMESH (1 << 3) -#define MPTCP_PM_ADDR_FLAG_IMPLICIT (1 << 4) +#define MPTCP_PM_ADDR_FLAG_SIGNAL _BITUL(0) +#define MPTCP_PM_ADDR_FLAG_SUBFLOW _BITUL(1) +#define MPTCP_PM_ADDR_FLAG_BACKUP _BITUL(2) +#define MPTCP_PM_ADDR_FLAG_FULLMESH _BITUL(3) +#define MPTCP_PM_ADDR_FLAG_IMPLICIT _BITUL(4) struct mptcp_info { __u8 mptcpi_subflows; -- cgit v1.2.3 From a571f08d3db215dd6ec294d8faac8cc4184bc4e4 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 16 Sep 2025 22:46:36 +0100 Subject: net: phy: add phy_interface_copy() Add a helper for copying PHY interface bitmasks. This will be used by the SFP bus code, which will then be moved to phylink in the subsequent patches. Reviewed-by: Andrew Lunn Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1uydVU-000000061W8-2IDT@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 7da9e19471c9..d09fc42e61f3 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -169,6 +169,11 @@ static inline bool phy_interface_empty(const unsigned long *intf) return bitmap_empty(intf, PHY_INTERFACE_MODE_MAX); } +static inline void phy_interface_copy(unsigned long *d, const unsigned long *s) +{ + bitmap_copy(d, s, PHY_INTERFACE_MODE_MAX); +} + static inline unsigned int phy_interface_weight(const unsigned long *intf) { return bitmap_weight(intf, PHY_INTERFACE_MODE_MAX); -- cgit v1.2.3 From ddae6127afbba46e32af3b31eb7bba939e1fad96 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 16 Sep 2025 22:46:41 +0100 Subject: net: sfp: pre-parse the module support Pre-parse the module support on insert rather than when the upstream requests the data. This will allow more flexible and extensible parsing. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1uydVZ-000000061WE-2pXD@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/linux/sfp.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include') diff --git a/include/linux/sfp.h b/include/linux/sfp.h index 60c65cea74f6..5fb59cf49882 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -521,6 +521,28 @@ struct ethtool_eeprom; struct ethtool_modinfo; struct sfp_bus; +/** + * struct sfp_module_caps - sfp module capabilities + * @interfaces: bitmap of interfaces that the module may support + * @link_modes: bitmap of ethtool link modes that the module may support + */ +struct sfp_module_caps { + DECLARE_PHY_INTERFACE_MASK(interfaces); + __ETHTOOL_DECLARE_LINK_MODE_MASK(link_modes); + /** + * @may_have_phy: indicate whether the module may have an ethernet PHY + * There is no way to be sure that a module has a PHY as the EEPROM + * doesn't contain this information. When set, this does not mean that + * the module definitely has a PHY. + */ + bool may_have_phy; + /** + * @port: one of ethtool %PORT_* definitions, parsed from the module + * EEPROM, or %PORT_OTHER if the port type is not known. + */ + u8 port; +}; + /** * struct sfp_upstream_ops - upstream operations structure * @attach: called when the sfp socket driver is bound to the upstream -- cgit v1.2.3 From 64fb4a3ae8a5d9c4d27d9f4ae58e38200fc3d44b Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 16 Sep 2025 22:46:51 +0100 Subject: net: sfp: provide sfp_get_module_caps() Provide a function to retrieve the current sfp_module_caps structure so that upstreams can get the entire module support in one go. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1uydVj-000000061WQ-3q47@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/linux/sfp.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/sfp.h b/include/linux/sfp.h index 5fb59cf49882..9f29fcad52be 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -576,6 +576,7 @@ struct sfp_upstream_ops { }; #if IS_ENABLED(CONFIG_SFP) +const struct sfp_module_caps *sfp_get_module_caps(struct sfp_bus *bus); int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id, unsigned long *support); bool sfp_may_have_phy(struct sfp_bus *bus, const struct sfp_eeprom_id *id); @@ -600,6 +601,12 @@ int sfp_bus_add_upstream(struct sfp_bus *bus, void *upstream, void sfp_bus_del_upstream(struct sfp_bus *bus); const char *sfp_get_name(struct sfp_bus *bus); #else +static inline const struct sfp_module_caps * +sfp_get_module_caps(struct sfp_bus *bus) +{ + return NULL; +} + static inline int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id, unsigned long *support) -- cgit v1.2.3 From 9ce138735efcb395974952972aa5dbd1d444ac2c Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 16 Sep 2025 22:47:07 +0100 Subject: net: sfp: remove old sfp_parse_* functions Remove the old sfp_parse_*() functions that are now no longer used. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1uydVz-000000061Wj-13Yd@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/linux/sfp.h | 25 ------------------------- 1 file changed, 25 deletions(-) (limited to 'include') diff --git a/include/linux/sfp.h b/include/linux/sfp.h index 9f29fcad52be..5c71945a5e4d 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -577,11 +577,6 @@ struct sfp_upstream_ops { #if IS_ENABLED(CONFIG_SFP) const struct sfp_module_caps *sfp_get_module_caps(struct sfp_bus *bus); -int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id, - unsigned long *support); -bool sfp_may_have_phy(struct sfp_bus *bus, const struct sfp_eeprom_id *id); -void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, - unsigned long *support, unsigned long *interfaces); phy_interface_t sfp_select_interface(struct sfp_bus *bus, const unsigned long *link_modes); @@ -607,26 +602,6 @@ sfp_get_module_caps(struct sfp_bus *bus) return NULL; } -static inline int sfp_parse_port(struct sfp_bus *bus, - const struct sfp_eeprom_id *id, - unsigned long *support) -{ - return PORT_OTHER; -} - -static inline bool sfp_may_have_phy(struct sfp_bus *bus, - const struct sfp_eeprom_id *id) -{ - return false; -} - -static inline void sfp_parse_support(struct sfp_bus *bus, - const struct sfp_eeprom_id *id, - unsigned long *support, - unsigned long *interfaces) -{ -} - static inline phy_interface_t sfp_select_interface(struct sfp_bus *bus, const unsigned long *link_modes) { -- cgit v1.2.3 From 17b14d235f58155a05cd9371e4559361ca3c67da Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 19 Sep 2025 20:48:49 +0000 Subject: net: move sk_uid and sk_protocol to sock_read_tx sk_uid and sk_protocol are read from inet6_csk_route_socket() for each TCP transmit. Also read from udpv6_sendmsg(), udp_sendmsg() and others. Move them to sock_read_tx for better cache locality. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250919204856.2977245-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index ee95081b0c0b..66c2f396b57d 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -492,6 +492,9 @@ struct sock { long sk_sndtimeo; u32 sk_priority; u32 sk_mark; + kuid_t sk_uid; + u16 sk_protocol; + u16 sk_type; struct dst_entry __rcu *sk_dst_cache; netdev_features_t sk_route_caps; #ifdef CONFIG_SOCK_VALIDATE_XMIT @@ -517,15 +520,12 @@ struct sock { sk_no_check_tx : 1, sk_no_check_rx : 1; u8 sk_shutdown; - u16 sk_type; - u16 sk_protocol; unsigned long sk_lingertime; struct proto *sk_prot_creator; rwlock_t sk_callback_lock; int sk_err_soft; u32 sk_ack_backlog; u32 sk_max_ack_backlog; - kuid_t sk_uid; unsigned long sk_ino; spinlock_t sk_peer_lock; int sk_bind_phc; -- cgit v1.2.3 From 9303c3ced111803dcd1aa36a778f290977935ca5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 19 Sep 2025 20:48:50 +0000 Subject: net: move sk->sk_err_soft and sk->sk_sndbuf sk->sk_sndbuf is read-mostly in tx path, so move it from sock_write_tx group to more appropriate sock_read_tx. sk->sk_err_soft was not identified previously, but is used from tcp_ack(). Move it to sock_write_tx group for better cache locality. Also change tcp_ack() to clear sk->sk_err_soft only if needed. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250919204856.2977245-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 66c2f396b57d..b4fefeea0213 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -467,7 +467,7 @@ struct sock { __cacheline_group_begin(sock_write_tx); int sk_write_pending; atomic_t sk_omem_alloc; - int sk_sndbuf; + int sk_err_soft; int sk_wmem_queued; refcount_t sk_wmem_alloc; @@ -507,6 +507,7 @@ struct sock { unsigned int sk_gso_max_size; gfp_t sk_allocation; u32 sk_txhash; + int sk_sndbuf; u8 sk_pacing_shift; bool sk_use_task_frag; __cacheline_group_end(sock_read_tx); @@ -523,7 +524,6 @@ struct sock { unsigned long sk_lingertime; struct proto *sk_prot_creator; rwlock_t sk_callback_lock; - int sk_err_soft; u32 sk_ack_backlog; u32 sk_max_ack_backlog; unsigned long sk_ino; -- cgit v1.2.3 From 1b44d700023e77dd92821e7811db825e75a1a394 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 19 Sep 2025 20:48:52 +0000 Subject: tcp: move tcp->rcv_tstamp to tcp_sock_write_txrx group tcp_ack() writes this field, it belongs to tcp_sock_write_txrx. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250919204856.2977245-5-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/tcp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 3ca5ed02de6d..1e6c2ded22c9 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -238,7 +238,6 @@ struct tcp_sock { /* RX read-mostly hotpath cache lines */ __cacheline_group_begin(tcp_sock_read_rx); u32 copied_seq; /* Head of yet unread data */ - u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ u32 snd_wl1; /* Sequence for window update */ u32 tlp_high_seq; /* snd_nxt at the time of TLP */ u32 rttvar_us; /* smoothed mdev_max */ @@ -246,13 +245,13 @@ struct tcp_sock { u16 advmss; /* Advertised MSS */ u16 urg_data; /* Saved octet of OOB data and control flags */ u32 lost; /* Total data packets lost incl. rexmits */ + u32 snd_ssthresh; /* Slow start size threshold */ struct minmax rtt_min; /* OOO segments go in this rbtree. Socket lock must be held. */ struct rb_root out_of_order_queue; #if defined(CONFIG_TLS_DEVICE) void (*tcp_clean_acked)(struct sock *sk, u32 acked_seq); #endif - u32 snd_ssthresh; /* Slow start size threshold */ u8 recvmsg_inq : 1;/* Indicate # of bytes in queue upon recvmsg */ __cacheline_group_end(tcp_sock_read_rx); @@ -319,6 +318,7 @@ struct tcp_sock { */ u32 app_limited; /* limited until "delivered" reaches this val */ u32 rcv_wnd; /* Current receiver window */ + u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ /* * Options received (usually on last packet, some only on SYN packets). */ -- cgit v1.2.3 From 969904dcd77dbb0a773d66cddaa59eccc6415d03 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 19 Sep 2025 20:48:53 +0000 Subject: tcp: move recvmsg_inq to tcp_sock_read_txrx Fill a hole in tcp_sock_read_txrx, instead of possibly wasting a cache line. Note that tcp_recvmsg_locked() is also reading tp->repair, so this removes one cache line miss in tcp recvmsg(). Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250919204856.2977245-6-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/tcp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 1e6c2ded22c9..c1d7fce251d7 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -232,7 +232,8 @@ struct tcp_sock { repair : 1, tcp_usec_ts : 1, /* TSval values in usec */ is_sack_reneg:1, /* in recovery from loss with SACK reneg? */ - is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */ + is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */ + recvmsg_inq : 1;/* Indicate # of bytes in queue upon recvmsg */ __cacheline_group_end(tcp_sock_read_txrx); /* RX read-mostly hotpath cache lines */ @@ -252,7 +253,6 @@ struct tcp_sock { #if defined(CONFIG_TLS_DEVICE) void (*tcp_clean_acked)(struct sock *sk, u32 acked_seq); #endif - u8 recvmsg_inq : 1;/* Indicate # of bytes in queue upon recvmsg */ __cacheline_group_end(tcp_sock_read_rx); /* TX read-write hotpath cache lines */ -- cgit v1.2.3 From a105ea47a4e855d24ebf65f1c5fb907162e7b8cf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 19 Sep 2025 20:48:54 +0000 Subject: tcp: move tcp_clean_acked to tcp_sock_read_tx group tp->tcp_clean_acked is fetched in tx path when snd_una is updated. This field thus belongs to tcp_sock_read_tx group. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250919204856.2977245-7-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/tcp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index c1d7fce251d7..3f282130c863 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -215,6 +215,9 @@ struct tcp_sock { u16 gso_segs; /* Max number of segs per GSO packet */ /* from STCP, retrans queue hinting */ struct sk_buff *retransmit_skb_hint; +#if defined(CONFIG_TLS_DEVICE) + void (*tcp_clean_acked)(struct sock *sk, u32 acked_seq); +#endif __cacheline_group_end(tcp_sock_read_tx); /* TXRX read-mostly hotpath cache lines */ @@ -250,9 +253,6 @@ struct tcp_sock { struct minmax rtt_min; /* OOO segments go in this rbtree. Socket lock must be held. */ struct rb_root out_of_order_queue; -#if defined(CONFIG_TLS_DEVICE) - void (*tcp_clean_acked)(struct sock *sk, u32 acked_seq); -#endif __cacheline_group_end(tcp_sock_read_rx); /* TX read-write hotpath cache lines */ -- cgit v1.2.3 From 31c4511bbb0c75c525b6e4f4fe4167f2e9d3b05c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 19 Sep 2025 20:48:55 +0000 Subject: tcp: move mtu_info to remove two 32bit holes This removes 8bytes waste on 64bit builds. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250919204856.2977245-8-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/tcp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 3f282130c863..20b8c6e21fef 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -448,6 +448,9 @@ struct tcp_sock { * the first SYN. */ u32 undo_marker; /* snd_una upon a new recovery episode. */ int undo_retrans; /* number of undoable retransmissions. */ + u32 mtu_info; /* We received an ICMP_FRAG_NEEDED / ICMPV6_PKT_TOOBIG + * while socket was owned by user. + */ u64 bytes_retrans; /* RFC4898 tcpEStatsPerfOctetsRetrans * Total data bytes retransmitted */ @@ -494,9 +497,6 @@ struct tcp_sock { u32 probe_seq_end; } mtu_probe; u32 plb_rehash; /* PLB-triggered rehash attempts */ - u32 mtu_info; /* We received an ICMP_FRAG_NEEDED / ICMPV6_PKT_TOOBIG - * while socket was owned by user. - */ #if IS_ENABLED(CONFIG_MPTCP) bool is_mptcp; #endif -- cgit v1.2.3 From 649091ef597bb7de34dd8ceea39bbc4252970558 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 19 Sep 2025 20:48:56 +0000 Subject: tcp: reclaim 8 bytes in struct request_sock_queue synflood_warned had to be u32 for xchg(), but ensuring atomicity is not really needed. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250919204856.2977245-9-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/request_sock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 6a5ec1418e85..cd4d4cf71d0d 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -185,8 +185,8 @@ struct fastopen_queue { struct request_sock_queue { spinlock_t rskq_lock; u8 rskq_defer_accept; + u8 synflood_warned; - u32 synflood_warned; atomic_t qlen; atomic_t young; -- cgit v1.2.3 From 349271568303695f0ac3563af153d2b4542f6986 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Sun, 21 Sep 2025 18:01:16 +0200 Subject: bpf: Implement signature verification for BPF programs This patch extends the BPF_PROG_LOAD command by adding three new fields to `union bpf_attr` in the user-space API: - signature: A pointer to the signature blob. - signature_size: The size of the signature blob. - keyring_id: The serial number of a loaded kernel keyring (e.g., the user or session keyring) containing the trusted public keys. When a BPF program is loaded with a signature, the kernel: 1. Retrieves the trusted keyring using the provided `keyring_id`. 2. Verifies the supplied signature against the BPF program's instruction buffer. 3. If the signature is valid and was generated by a key in the trusted keyring, the program load proceeds. 4. If no signature is provided, the load proceeds as before, allowing for backward compatibility. LSMs can chose to restrict unsigned programs and implement a security policy. 5. If signature verification fails for any reason, the program is not loaded. Tested-by: syzbot@syzkaller.appspotmail.com Signed-off-by: KP Singh Link: https://lore.kernel.org/r/20250921160120.9711-2-kpsingh@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/verification.h | 1 + include/uapi/linux/bpf.h | 10 ++++++++++ 2 files changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/verification.h b/include/linux/verification.h index 4f3022d081c3..dec7f2beabfd 100644 --- a/include/linux/verification.h +++ b/include/linux/verification.h @@ -36,6 +36,7 @@ enum key_being_used_for { VERIFYING_KEY_SIGNATURE, VERIFYING_KEY_SELF_SIGNATURE, VERIFYING_UNSPECIFIED_SIGNATURE, + VERIFYING_BPF_SIGNATURE, NR__KEY_BEING_USED_FOR }; #ifdef CONFIG_SYSTEM_DATA_VERIFICATION diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0987b52d5648..f3b173e48b0f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1611,6 +1611,16 @@ union bpf_attr { * continuous. */ __u32 fd_array_cnt; + /* Pointer to a buffer containing the signature of the BPF + * program. + */ + __aligned_u64 signature; + /* Size of the signature buffer in bytes. */ + __u32 signature_size; + /* ID of the kernel keyring to be used for signature + * verification. + */ + __s32 keyring_id; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ -- cgit v1.2.3 From 39f17c707454290900b608ee5a200b5db9245626 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 14 Sep 2025 13:09:08 +0200 Subject: sched/task.h: fix the wrong comment on task_lock() nesting with tasklist_lock The ancient comment above task_lock() states that it can be nested outside of read_lock(&tasklist_lock), but this is no longer true: CPU_0 CPU_1 CPU_2 task_lock() read_lock(tasklist) write_lock_irq(tasklist) read_lock(tasklist) task_lock() Unless CPU_0 calls read_lock() in IRQ context, queued_read_lock_slowpath() won't get the lock immediately, it will spin waiting for the pending writer on CPU_2, resulting in a deadlock. Link: https://lkml.kernel.org/r/20250914110908.GA18769@redhat.com Signed-off-by: Oleg Nesterov Cc: Christian Brauner Cc: Jiri Slaby Cc: Mateusz Guzik Signed-off-by: Andrew Morton --- include/linux/sched/task.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index ea41795a352b..8ff98b18b24b 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -210,9 +210,8 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) * pins the final release of task.io_context. Also protects ->cpuset and * ->cgroup.subsys[]. And ->vfork_done. And ->sysvshm.shm_clist. * - * Nests both inside and outside of read_lock(&tasklist_lock). - * It must not be nested with write_lock_irq(&tasklist_lock), - * neither inside nor outside. + * Nests inside of read_lock(&tasklist_lock). It must not be nested with + * write_lock_irq(&tasklist_lock), neither inside nor outside. */ static inline void task_lock(struct task_struct *p) { -- cgit v1.2.3 From af6703838ecb1513efdd2502a8f7bb6472c5ce96 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Wed, 3 Sep 2025 18:48:41 +0100 Subject: mm: specify separate file and vm_file params in vm_area_desc Patch series "mm: do not assume file == vma->vm_file in compat_vma_mmap_prepare()", v2. As part of the efforts to eliminate the problematic f_op->mmap callback, a new callback - f_op->mmap_prepare was provided. While we are converting these callbacks, we must deal with 'stacked' filesystems and drivers - those which in their own f_op->mmap callback invoke an inner f_op->mmap callback. To accomodate for this, a compatibility layer is provided that, via vfs_mmap(), detects if f_op->mmap_prepare is provided and if so, generates a vm_area_desc containing the VMA's metadata and invokes the call. So far, we have provided desc->file equal to vma->vm_file. However this is not necessarily valid, especially in the case of stacked drivers which wish to assign a new file after the inner hook is invoked. To account for this, we adjust vm_area_desc to have both file and vm_file fields. The .vm_file field is strictly set to vma->vm_file (or in the case of a new mapping, what will become vma->vm_file). However, .file is set to whichever file vfs_mmap() is invoked with when using the compatibilty layer. Therefore, if the VMA's file needs to be updated in .mmap_prepare, desc->vm_file should be assigned, whilst desc->file should be read. No current f_op->mmap_prepare users assign desc->file so this is safe to do. This makes the .mmap_prepare callback in the context of a stacked filesystem or driver completely consistent with the existing .mmap implementations. While we're here, we do a few small cleanups, and ensure that we const-ify things correctly in the vm_area_desc struct to avoid hooks accidentally trying to assign fields they should not. This patch (of 2): Stacked filesystems and drivers may invoke mmap hooks with a struct file pointer that differs from the overlying file. We will make this functionality possible in a subsequent patch. In order to prepare for this, let's update vm_area_struct to separately provide desc->file and desc->vm_file parameters. The desc->file parameter is the file that the hook is expected to operate upon, and is not assignable (though the hok may wish to e.g. update the file's accessed time for instance). The desc->vm_file defaults to what will become vma->vm_file and is what the hook must reassign should it wish to change the VMA"s vma->vm_file. For now we keep desc->file, vm_file the same to remain consistent. No f_op->mmap_prepare() callback sets a new vma->vm_file currently, so this is safe to change. While we're here, make the mm_struct desc->mm pointers at immutable as well as the desc->mm field itself. As part of this change, also update the single hook which this would otherwise break - mlock_future_ok(), invoked by secretmem_mmap_prepare()). We additionally update set_vma_from_desc() to compare fields in a more logical fashion, checking the (possibly) user-modified fields as the first operand against the existing value as the second one. Additionally, update VMA tests to accommodate changes. Link: https://lkml.kernel.org/r/cover.1756920635.git.lorenzo.stoakes@oracle.com Link: https://lkml.kernel.org/r/3fa15a861bb7419f033d22970598aa61850ea267.1756920635.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Cc: Al Viro Cc: Christian Brauner Cc: David Hildenbrand Cc: Jan Kara Cc: Jann Horn Cc: Liam Howlett Cc: Michal Hocko Cc: Mike Rapoport Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Pedro Falcato Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 6920c816f6c6..965dedb3ccfa 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -777,13 +777,14 @@ struct pfnmap_track_ctx { */ struct vm_area_desc { /* Immutable state. */ - struct mm_struct *mm; + const struct mm_struct *const mm; + struct file *const file; /* May vary from vm_file in stacked callers. */ unsigned long start; unsigned long end; /* Mutable fields. Populated with initial state. */ pgoff_t pgoff; - struct file *file; + struct file *vm_file; vm_flags_t vm_flags; pgprot_t page_prot; -- cgit v1.2.3 From f7a741c53b712542aedd9382f215fbe969f8a580 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Wed, 3 Sep 2025 18:48:42 +0100 Subject: mm: do not assume file == vma->vm_file in compat_vma_mmap_prepare() In commit bb666b7c2707 ("mm: add mmap_prepare() compatibility layer for nested file systems") we introduced the ability for stacked drivers and file systems to correctly invoke the f_op->mmap_prepare() handler from an f_op->mmap() handler via a compatibility layer implemented in compat_vma_mmap_prepare(). This populates vm_area_desc fields according to those found in the (not yet fully initialised) VMA passed to f_op->mmap(). However this function implicitly assumes that the struct file which we are operating upon is equal to vma->vm_file. This is not a safe assumption in all cases. The only really sane situation in which this matters would be something like e.g. i915_gem_dmabuf_mmap() which invokes vfs_mmap() against obj->base.filp: ret = vfs_mmap(obj->base.filp, vma); if (ret) return ret; And then sets the VMA's file to this, should the mmap operation succeed: vma_set_file(vma, obj->base.filp); That is - it is the file that is intended to back the VMA mapping. This is not an issue currently, as so far we have only implemented f_op->mmap_prepare() handlers for some file systems and internal mm uses, and the only stacked f_op->mmap() operations that can be performed upon these are those in backing_file_mmap() and coda_file_mmap(), both of which use vma->vm_file. However, moving forward, as we convert drivers to using f_op->mmap_prepare(), this will become a problem. Resolve this issue by explicitly setting desc->file to the provided file parameter and update callers accordingly. Callers are expected to read desc->file and update desc->vm_file - the former will be the file provided by the caller (if stacked, this may differ from vma->vm_file). If the caller needs to differentiate between the two they therefore now can. While we are here, also provide a variant of compat_vma_mmap_prepare() that operates against a pointer to any file_operations struct and does not assume that the file_operations struct we are interested in is file->f_op. This function is __compat_vma_mmap_prepare() and we invoke it from compat_vma_mmap_prepare() so that we share code between the two functions. This is important, because some drivers provide hooks in a separate struct, for instance struct drm_device provides an fops field for this purpose. Also update the VMA selftests accordingly. Link: https://lkml.kernel.org/r/dd0c72df8a33e8ffaa243eeb9b01010b670610e9.1756920635.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Christian Brauner Reviewed-by: Pedro Falcato Reviewed-by: Liam R. Howlett Cc: Al Viro Cc: David Hildenbrand Cc: Jan Kara Cc: Jann Horn Cc: Michal Hocko Cc: Mike Rapoport Cc: Suren Baghdasaryan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 0783c5d05d3f..594bd4d0521e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2279,6 +2279,8 @@ static inline bool can_mmap_file(struct file *file) return true; } +int __compat_vma_mmap_prepare(const struct file_operations *f_op, + struct file *file, struct vm_area_struct *vma); int compat_vma_mmap_prepare(struct file *file, struct vm_area_struct *vma); static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma) -- cgit v1.2.3 From ef9f603fd3d4b7937f2cdbce40e47df0a54b2a55 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Mon, 22 Sep 2025 11:02:31 -0600 Subject: io_uring/cmd: drop unused res2 param from io_uring_cmd_done() Commit 79525b51acc1 ("io_uring: fix nvme's 32b cqes on mixed cq") split out a separate io_uring_cmd_done32() helper for ->uring_cmd() implementations that return 32-byte CQEs. The res2 value passed to io_uring_cmd_done() is now unused because __io_uring_cmd_done() ignores it when is_cqe32 is passed as false. So drop the parameter from io_uring_cmd_done() to simplify the callers and clarify that it's not possible to return an extra value beyond the 32-bit CQE result. Signed-off-by: Caleb Sander Mateos Signed-off-by: Jens Axboe --- include/linux/io_uring/cmd.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h index 02d50f08f668..7509025b4071 100644 --- a/include/linux/io_uring/cmd.h +++ b/include/linux/io_uring/cmd.h @@ -160,9 +160,9 @@ static inline void *io_uring_cmd_ctx_handle(struct io_uring_cmd *cmd) } static inline void io_uring_cmd_done(struct io_uring_cmd *ioucmd, s32 ret, - u64 res2, unsigned issue_flags) + unsigned issue_flags) { - return __io_uring_cmd_done(ioucmd, ret, res2, issue_flags, false); + return __io_uring_cmd_done(ioucmd, ret, 0, issue_flags, false); } static inline void io_uring_cmd_done32(struct io_uring_cmd *ioucmd, s32 ret, -- cgit v1.2.3 From 7c7da8aa3fd69b77e8efaa14b80bddd948547739 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Tue, 23 Sep 2025 15:37:09 +0900 Subject: can: dev: turn can_set_static_ctrlmode() into a non-inline function can_set_static_ctrlmode() is declared as a static inline. But it is only called in the probe function of the devices and so does not really benefit from any kind of optimization. Transform it into a "normal" function by moving it to drivers/net/can/dev/dev.c Signed-off-by: Vincent Mailhol Link: https://patch.msgid.link/20250923-can-fix-mtu-v3-2-581bde113f52@kernel.org Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 9a92cbe5b2cb..5dc58360c2d7 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -125,27 +125,6 @@ static inline s32 can_get_relative_tdco(const struct can_priv *priv) return (s32)priv->fd.tdc.tdco - sample_point_in_tc; } -/* helper to define static CAN controller features at device creation time */ -static inline int __must_check can_set_static_ctrlmode(struct net_device *dev, - u32 static_mode) -{ - struct can_priv *priv = netdev_priv(dev); - - /* alloc_candev() succeeded => netdev_priv() is valid at this point */ - if (priv->ctrlmode_supported & static_mode) { - netdev_warn(dev, - "Controller features can not be supported and static at the same time\n"); - return -EINVAL; - } - priv->ctrlmode = static_mode; - - /* override MTU which was set by default in can_setup()? */ - if (static_mode & CAN_CTRLMODE_FD) - dev->mtu = CANFD_MTU; - - return 0; -} - static inline u32 can_get_static_ctrlmode(struct can_priv *priv) { return priv->ctrlmode & ~priv->ctrlmode_supported; @@ -188,6 +167,8 @@ struct can_priv *safe_candev_priv(struct net_device *dev); int open_candev(struct net_device *dev); void close_candev(struct net_device *dev); int can_change_mtu(struct net_device *dev, int new_mtu); +int __must_check can_set_static_ctrlmode(struct net_device *dev, + u32 static_mode); int can_eth_ioctl_hwts(struct net_device *netdev, struct ifreq *ifr, int cmd); int can_ethtool_op_get_ts_info_hwts(struct net_device *dev, struct kernel_ethtool_ts_info *info); -- cgit v1.2.3 From d57f4b874946e997be52f5ebb5e0e1dad368c16f Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 17 Sep 2025 15:22:04 +0200 Subject: tcp: Update bind bucket state on port release Today, once an inet_bind_bucket enters a state where fastreuse >= 0 or fastreuseport >= 0 after a socket is explicitly bound to a port, it remains in that state until all sockets are removed and the bucket is destroyed. In this state, the bucket is skipped during ephemeral port selection in connect(). For applications using a reduced ephemeral port range (IP_LOCAL_PORT_RANGE socket option), this can cause faster port exhaustion since blocked buckets are excluded from reuse. The reason the bucket state isn't updated on port release is unclear. Possibly a performance trade-off to avoid scanning bucket owners, or just an oversight. Fix it by recalculating the bucket state when a socket releases a port. To limit overhead, each inet_bind2_bucket stores its own (fastreuse, fastreuseport) state. On port release, only the relevant port-addr bucket is scanned, and the overall state is derived from these. Signed-off-by: Jakub Sitnicki Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250917-update-bind-bucket-state-on-unhash-v5-1-57168b661b47@cloudflare.com Signed-off-by: Paolo Abeni --- include/net/inet_connection_sock.h | 5 +++-- include/net/inet_hashtables.h | 2 ++ include/net/inet_timewait_sock.h | 3 ++- include/net/sock.h | 4 ++++ 4 files changed, 11 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 0737d8e178dd..b4b886647607 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -316,8 +316,9 @@ int inet_csk_listen_start(struct sock *sk); void inet_csk_listen_stop(struct sock *sk); /* update the fast reuse flag when adding a socket */ -void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, - struct sock *sk); +void inet_csk_update_fastreuse(const struct sock *sk, + struct inet_bind_bucket *tb, + struct inet_bind2_bucket *tb2); struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu); diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index b787be651ce7..ac05a52d9e13 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -108,6 +108,8 @@ struct inet_bind2_bucket { struct hlist_node bhash_node; /* List of sockets hashed to this bucket */ struct hlist_head owners; + signed char fastreuse; + signed char fastreuseport; }; static inline struct net *ib_net(const struct inet_bind_bucket *ib) diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 3a31c74c9e15..63a644ff30de 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -70,7 +70,8 @@ struct inet_timewait_sock { unsigned int tw_transparent : 1, tw_flowlabel : 20, tw_usec_ts : 1, - tw_pad : 2, /* 2 bits hole */ + tw_connect_bind : 1, + tw_pad : 1, /* 1 bit hole */ tw_tos : 8; u32 tw_txhash; u32 tw_priority; diff --git a/include/net/sock.h b/include/net/sock.h index b4fefeea0213..8c5b64f41ab7 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1494,6 +1494,10 @@ static inline int __sk_prot_rehash(struct sock *sk) #define SOCK_BINDADDR_LOCK 4 #define SOCK_BINDPORT_LOCK 8 +/** + * define SOCK_CONNECT_BIND - &sock->sk_userlocks flag for auto-bind at connect() time + */ +#define SOCK_CONNECT_BIND 16 struct socket_alloc { struct socket socket; -- cgit v1.2.3 From 91daac8a6893c65e18f194946ad3ad9df5e9de8d Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Tue, 16 Sep 2025 08:10:07 +0200 Subject: genirq/msi: Remove msi_post_free() The only user of msi_post_free() - powerpc/pseries - has been changed to use msi_teardown(). Remove this unused callback. Signed-off-by: Nam Cao Reviewed-by: Thomas Gleixner Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250916061007.964005-1-namcao@linutronix.de --- include/linux/msi.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index e5e86a8529fb..faac634ac230 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -431,8 +431,6 @@ struct msi_domain_info; * function. * @domain_free_irqs: Optional function to override the default free * function. - * @msi_post_free: Optional function which is invoked after freeing - * all interrupts. * @msi_translate: Optional translate callback to support the odd wire to * MSI bridges, e.g. MBIGEN * @@ -473,8 +471,6 @@ struct msi_domain_ops { struct device *dev, int nvec); void (*domain_free_irqs)(struct irq_domain *domain, struct device *dev); - void (*msi_post_free)(struct irq_domain *domain, - struct device *dev); int (*msi_translate)(struct irq_domain *domain, struct irq_fwspec *fwspec, irq_hw_number_t *hwirq, unsigned int *type); }; -- cgit v1.2.3 From 884eee8e43f3072db4111178c98b9aa5c57bcf92 Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Thu, 18 Sep 2025 13:04:47 +0200 Subject: net/smc: Remove error handling of unregister_dmb() smcd_buf_free() calls smc_ism_unregister_dmb(lgr->smcd, buf_desc) and then unconditionally frees buf_desc. Remove the cleaning up of fields of buf_desc in smc_ism_unregister_dmb(), because it is not helpful. This removes the only usage of ISM_ERROR from the smc module. So move it to drivers/s390/net/ism.h. Signed-off-by: Alexandra Winter Reviewed-by: Mahanta Jambigi Reviewed-by: Dust Li Link: https://patch.msgid.link/20250918110500.1731261-2-wintera@linux.ibm.com Signed-off-by: Paolo Abeni --- include/net/smc.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/smc.h b/include/net/smc.h index db84e4e35080..a9c023dd1380 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -44,8 +44,6 @@ struct smcd_dmb { #define ISM_RESERVED_VLANID 0x1FFF -#define ISM_ERROR 0xFFFF - struct smcd_dev; struct smcd_gid { -- cgit v1.2.3 From 35758b0032c056cdff3e8f5a70669cb3e2c8d0e4 Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Thu, 18 Sep 2025 13:04:49 +0200 Subject: dibs: Create drivers/dibs Create the file structure for a 'DIBS - Direct Internal Buffer Sharing' shim layer that will provide generic functionality and declarations for dibs device drivers and dibs clients. Following patches will add functionality. Signed-off-by: Alexandra Winter Link: https://patch.msgid.link/20250918110500.1731261-4-wintera@linux.ibm.com Signed-off-by: Paolo Abeni --- include/linux/dibs.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 include/linux/dibs.h (limited to 'include') diff --git a/include/linux/dibs.h b/include/linux/dibs.h new file mode 100644 index 000000000000..3f4175aaa732 --- /dev/null +++ b/include/linux/dibs.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Direct Internal Buffer Sharing + * + * Definitions for the DIBS module + * + * Copyright IBM Corp. 2025 + */ +#ifndef _DIBS_H +#define _DIBS_H + +/* DIBS - Direct Internal Buffer Sharing - concept + * ----------------------------------------------- + * In the case of multiple system sharing the same hardware, dibs fabrics can + * provide dibs devices to these systems. The systems use dibs devices of the + * same fabric to communicate via dmbs (Direct Memory Buffers). Each dmb has + * exactly one owning local dibs device and one remote using dibs device, that + * is authorized to write into this dmb. This access control is provided by the + * dibs fabric. + * + * Because the access to the dmb is based on access to physical memory, it is + * lossless and synchronous. The remote devices can directly access any offset + * of the dmb. + * + * Dibs fabrics, dibs devices and dmbs are identified by tokens and ids. + * Dibs fabric id is unique within the same hardware (with the exception of the + * dibs loopback fabric), dmb token is unique within the same fabric, dibs + * device gids are guaranteed to be unique within the same fabric and + * statistically likely to be globally unique. The exchange of these tokens and + * ids between the systems is not part of the dibs concept. + * + * The dibs layer provides an abstraction between dibs device drivers and dibs + * clients. + */ + +#define MAX_DIBS_CLIENTS 8 + +struct dibs_client { + const char *name; +}; + +#endif /* _DIBS_H */ -- cgit v1.2.3 From d324a2ca3f8efd57f5839aa2690554a5cbb3586f Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Thu, 18 Sep 2025 13:04:50 +0200 Subject: dibs: Register smc as dibs_client Formally register smc as dibs client. Functionality will be moved by follow-on patches from ism_client to dibs_client until eventually ism_client can be removed. As DIBS is only a shim layer without any dependencies, we can depend SMC on DIBS without adding indirect dependencies. A follow-on patch will remove dependency of SMC on ISM. Signed-off-by: Alexandra Winter Reviewed-by: Julian Ruess Link: https://patch.msgid.link/20250918110500.1731261-5-wintera@linux.ibm.com Signed-off-by: Paolo Abeni --- include/linux/dibs.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/dibs.h b/include/linux/dibs.h index 3f4175aaa732..7bedeaf52c1b 100644 --- a/include/linux/dibs.h +++ b/include/linux/dibs.h @@ -33,10 +33,33 @@ * clients. */ +/* DIBS client + * ----------- + */ #define MAX_DIBS_CLIENTS 8 struct dibs_client { + /* client name for logging and debugging purposes */ const char *name; + /* client index - provided and used by dibs layer */ + u8 id; }; +/* Functions to be called by dibs clients: + */ +/** + * dibs_register_client() - register a client with dibs layer + * @client: this client + * + * Return: zero on success. + */ +int dibs_register_client(struct dibs_client *client); +/** + * dibs_unregister_client() - unregister a client with dibs layer + * @client: this client + * + * Return: zero on success. + */ +int dibs_unregister_client(struct dibs_client *client); + #endif /* _DIBS_H */ -- cgit v1.2.3 From 269726968f95ebc00e3a47f91eebd6818991d6fa Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Thu, 18 Sep 2025 13:04:51 +0200 Subject: dibs: Register ism as dibs device Register ism devices with the dibs layer. Follow-on patches will move functionality to the dibs layer. As DIBS is only a shim layer without any dependencies, we can depend ISM on DIBS without adding indirect dependencies. A follow-on patch will remove implication of SMC by ISM. Define struct dibs_dev. Follow-on patches will move more content into dibs_dev. The goal of follow-on patches is that ism_dev will only contain fields that are special for this device driver. The same concept will apply to other dibs device drivers. Define dibs_dev_alloc(), dibs_dev_add() and dibs_dev_del() to be called by dibs device drivers and call them from ism_drv.c Use ism_dev.dibs for a pointer to dibs_dev. Signed-off-by: Alexandra Winter Link: https://patch.msgid.link/20250918110500.1731261-6-wintera@linux.ibm.com Signed-off-by: Paolo Abeni --- include/linux/dibs.h | 38 ++++++++++++++++++++++++++++++++++++++ include/linux/ism.h | 1 + 2 files changed, 39 insertions(+) (limited to 'include') diff --git a/include/linux/dibs.h b/include/linux/dibs.h index 7bedeaf52c1b..c12db19c98c0 100644 --- a/include/linux/dibs.h +++ b/include/linux/dibs.h @@ -9,6 +9,7 @@ #ifndef _DIBS_H #define _DIBS_H +#include /* DIBS - Direct Internal Buffer Sharing - concept * ----------------------------------------------- * In the case of multiple system sharing the same hardware, dibs fabrics can @@ -62,4 +63,41 @@ int dibs_register_client(struct dibs_client *client); */ int dibs_unregister_client(struct dibs_client *client); +/* DIBS devices + * ------------ + */ +struct dibs_dev { + struct list_head list; +}; + +/* ------- End of client-only functions ----------- */ + +/* + * Functions to be called by dibs device drivers: + */ +/** + * dibs_dev_alloc() - allocate and reference device structure + * + * The following fields will be valid upon successful return: dev + * NOTE: Use put_device(dibs_get_dev(@dibs)) to give up your reference instead + * of freeing @dibs @dev directly once you have successfully called this + * function. + * Return: Pointer to dibs device structure + */ +struct dibs_dev *dibs_dev_alloc(void); +/** + * dibs_dev_add() - register with dibs layer and all clients + * @dibs: dibs device + * + * The following fields must be valid upon entry: dev, ops, drv_priv + * All fields will be valid upon successful return. + * Return: zero on success + */ +int dibs_dev_add(struct dibs_dev *dibs); +/** + * dibs_dev_del() - unregister from dibs layer and all clients + * @dibs: dibs device + */ +void dibs_dev_del(struct dibs_dev *dibs); + #endif /* _DIBS_H */ diff --git a/include/linux/ism.h b/include/linux/ism.h index 8358b4cd7ba6..9a53d3c48c16 100644 --- a/include/linux/ism.h +++ b/include/linux/ism.h @@ -30,6 +30,7 @@ struct ism_dev { spinlock_t lock; /* protects the ism device */ spinlock_t cmd_lock; /* serializes cmds */ struct list_head list; + struct dibs_dev *dibs; struct pci_dev *pdev; struct ism_sba *sba; -- cgit v1.2.3 From 69baaac9361edd169713562f088829a1be9c51a9 Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Thu, 18 Sep 2025 13:04:53 +0200 Subject: dibs: Define dibs_client_ops and dibs_dev_ops Move the device add() and remove() functions from ism_client to dibs_client_ops and call add_dev()/del_dev() for ism devices and dibs_loopback devices. dibs_client_ops->add_dev() = smcd_register_dev() for the smc_dibs_client. This is the first step to handle ism and loopback devices alike (as dibs devices) in the smc dibs client. Define dibs_dev->ops and move smcd_ops->get_chid to dibs_dev_ops->get_fabric_id() for ism and loopback devices. See below for why this needs to be in the same patch as dibs_client_ops->add_dev(). The following changes contain intermediate steps, that will be obsoleted by follow-on patches, once more functionality has been moved to dibs: Use different smcd_ops and max_dmbs for ism and loopback. Follow-on patches will change SMC-D to directly use dibs_ops instead of smcd_ops. In smcd_register_dev() it is now necessary to identify a dibs_loopback device before smcd_dev and smcd_ops->get_chid() are available. So provide dibs_dev_ops->get_fabric_id() in this patch and evaluate it in smc_ism_is_loopback(). Call smc_loopback_init() in smcd_register_dev() and call smc_loopback_exit() in smcd_unregister_dev() to handle the functionality that is still in smc_loopback. Follow-on patches will move all smc_loopback code to dibs_loopback. In smcd_[un]register_dev() use only ism device name, this will be replaced by dibs device name by a follow-on patch. End of changes with intermediate parts. Allocate an smcd event workqueue for all dibs devices, although dibs_loopback does not generate events. Use kernel memory instead of devres memory for smcd_dev and smcd->conn. Since commit a72178cfe855 ("net/smc: Fix dependency of SMC on ISM") an ism device and its driver can have a longer lifetime than the smc module, so smc should not rely on devres to free its resources [1]. It is now the responsibility of the smc client to free smcd and smcd->conn for all dibs devices, ism devices as well as loopback. Call client->ops->del_dev() for all existing dibs devices in dibs_unregister_client(), so all device related structures can be freed in the client. When dibs_unregister_client() is called in the context of smc_exit() or smc_core_reboot_event(), these functions have already called smc_lgrs_shutdown() which calls smc_smcd_terminate_all(smcd) and sets going_away. This is done a second time in smcd_unregister_dev(). This is analogous to how smcr is handled in these functions, by calling first smc_lgrs_shutdown() and then smc_ib_unregister_client() > smc_ib_remove_dev(), so leave it that way. It may be worth investigating, whether smc_lgrs_shutdown() is still required or useful. Remove CONFIG_SMC_LO. CONFIG_DIBS_LO now controls whether a dibs loopback device exists or not. Link: https://www.kernel.org/doc/Documentation/driver-model/devres.txt [1] Signed-off-by: Alexandra Winter Reviewed-by: Mahanta Jambigi Link: https://patch.msgid.link/20250918110500.1731261-8-wintera@linux.ibm.com Signed-off-by: Paolo Abeni --- include/linux/dibs.h | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++-- include/linux/ism.h | 2 -- include/net/smc.h | 3 +- 3 files changed, 89 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/dibs.h b/include/linux/dibs.h index c12db19c98c0..805ab33271b5 100644 --- a/include/linux/dibs.h +++ b/include/linux/dibs.h @@ -34,14 +34,45 @@ * clients. */ +struct dibs_dev; + /* DIBS client * ----------- */ #define MAX_DIBS_CLIENTS 8 +/* All dibs clients have access to all dibs devices. + * A dibs client provides the following functions to be called by dibs layer or + * dibs device drivers: + */ +struct dibs_client_ops { + /** + * add_dev() - add a dibs device + * @dev: device that was added + * + * Will be called during dibs_register_client() for all existing + * dibs devices and whenever a new dibs device is registered. + * dev is usable until dibs_client.remove() is called. + * *dev is protected by device refcounting. + */ + void (*add_dev)(struct dibs_dev *dev); + /** + * del_dev() - remove a dibs device + * @dev: device to be removed + * + * Will be called whenever a dibs device is removed. + * Will be called during dibs_unregister_client() for all existing + * dibs devices and whenever a dibs device is unregistered. + * The device has already stopped initiative for this client: + * No new handlers will be started. + * The device is no longer usable by this client after this call. + */ + void (*del_dev)(struct dibs_dev *dev); +}; struct dibs_client { /* client name for logging and debugging purposes */ const char *name; + const struct dibs_client_ops *ops; /* client index - provided and used by dibs layer */ u8 id; }; @@ -52,6 +83,7 @@ struct dibs_client { * dibs_register_client() - register a client with dibs layer * @client: this client * + * Will call client->ops->add_dev() for all existing dibs devices. * Return: zero on success. */ int dibs_register_client(struct dibs_client *client); @@ -59,21 +91,74 @@ int dibs_register_client(struct dibs_client *client); * dibs_unregister_client() - unregister a client with dibs layer * @client: this client * + * Will call client->ops->del_dev() for all existing dibs devices. * Return: zero on success. */ int dibs_unregister_client(struct dibs_client *client); +/* dibs clients can call dibs device ops. */ + /* DIBS devices * ------------ */ + +/* Defined fabric id / CHID for all loopback devices: + * All dibs loopback devices report this fabric id. In this case devices with + * the same fabric id can NOT communicate via dibs. Only loopback devices with + * the same dibs device gid can communicate (=same device with itself). + */ +#define DIBS_LOOPBACK_FABRIC 0xFFFF + +/* A dibs device provides the following functions to be called by dibs clients. + * They are mandatory, unless marked 'optional'. + */ +struct dibs_dev_ops { + /** + * get_fabric_id() + * @dev: local dibs device + * + * Only devices on the same dibs fabric can communicate. Fabric_id is + * unique inside the same HW system. Use fabric_id for fast negative + * checks, but only query_remote_gid() can give a reliable positive + * answer: + * Different fabric_id: dibs is not possible + * Same fabric_id: dibs may be possible or not + * (e.g. different HW systems) + * EXCEPTION: DIBS_LOOPBACK_FABRIC denotes an ism_loopback device + * that can only communicate with itself. Use dibs_dev.gid + * or query_remote_gid()to determine whether sender and + * receiver use the same ism_loopback device. + * Return: 2 byte dibs fabric id + */ + u16 (*get_fabric_id)(struct dibs_dev *dev); +}; + struct dibs_dev { struct list_head list; + /* To be filled by device driver, before calling dibs_dev_add(): */ + const struct dibs_dev_ops *ops; + /* priv pointer for device driver */ + void *drv_priv; + + /* priv pointer per client; for client usage only */ + void *priv[MAX_DIBS_CLIENTS]; }; +static inline void dibs_set_priv(struct dibs_dev *dev, + struct dibs_client *client, void *priv) +{ + dev->priv[client->id] = priv; +} + +static inline void *dibs_get_priv(struct dibs_dev *dev, + struct dibs_client *client) +{ + return dev->priv[client->id]; +} + /* ------- End of client-only functions ----------- */ -/* - * Functions to be called by dibs device drivers: +/* Functions to be called by dibs device drivers: */ /** * dibs_dev_alloc() - allocate and reference device structure diff --git a/include/linux/ism.h b/include/linux/ism.h index 9a53d3c48c16..c818a25996db 100644 --- a/include/linux/ism.h +++ b/include/linux/ism.h @@ -59,8 +59,6 @@ struct ism_event { struct ism_client { const char *name; - void (*add)(struct ism_dev *dev); - void (*remove)(struct ism_dev *dev); void (*handle_event)(struct ism_dev *dev, struct ism_event *event); /* Parameter dmbemask contains a bit vector with updated DMBEs, if sent * via ism_move_data(). Callback function must handle all active bits diff --git a/include/net/smc.h b/include/net/smc.h index a9c023dd1380..e271891b85e6 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -15,6 +15,7 @@ #include #include #include +#include #include "linux/ism.h" struct sock; @@ -62,7 +63,6 @@ struct smcd_ops { unsigned int size); int (*supports_v2)(void); void (*get_local_gid)(struct smcd_dev *dev, struct smcd_gid *gid); - u16 (*get_chid)(struct smcd_dev *dev); struct device* (*get_dev)(struct smcd_dev *dev); /* optional operations */ @@ -81,6 +81,7 @@ struct smcd_dev { const struct smcd_ops *ops; void *priv; void *client; + struct dibs_dev *dibs; struct list_head list; spinlock_t lock; struct smc_connection **conn; -- cgit v1.2.3 From 845c334a0186a23c2ac4abfb444e499fec831b24 Mon Sep 17 00:00:00 2001 From: Julian Ruess Date: Thu, 18 Sep 2025 13:04:54 +0200 Subject: dibs: Move struct device to dibs_dev Move struct device from ism_dev and smc_lo_dev to dibs_dev, and define a corresponding release function. Free ism_dev in ism_remove() and smc_lo_dev in smc_lo_dev_remove(). Replace smcd->ops->get_dev(smcd) by using dibs->dev directly. An alternative design would be to embed dibs_dev as a field in ism_dev and do the same for other dibs device driver specific structs. However that would have the disadvantage that each dibs device driver needs to allocate dibs_dev and each dibs device driver needs a different device release function. The advantage would be that ism_dev and other device driver specific structs would be covered by device reference counts. Signed-off-by: Julian Ruess Co-developed-by: Alexandra Winter Signed-off-by: Alexandra Winter Reviewed-by: Mahanta Jambigi Link: https://patch.msgid.link/20250918110500.1731261-9-wintera@linux.ibm.com Signed-off-by: Paolo Abeni --- include/linux/dibs.h | 1 + include/linux/ism.h | 1 - include/net/smc.h | 1 - 3 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/dibs.h b/include/linux/dibs.h index 805ab33271b5..793c6e1ece0f 100644 --- a/include/linux/dibs.h +++ b/include/linux/dibs.h @@ -135,6 +135,7 @@ struct dibs_dev_ops { struct dibs_dev { struct list_head list; + struct device dev; /* To be filled by device driver, before calling dibs_dev_add(): */ const struct dibs_dev_ops *ops; /* priv pointer for device driver */ diff --git a/include/linux/ism.h b/include/linux/ism.h index c818a25996db..84f1afb3dded 100644 --- a/include/linux/ism.h +++ b/include/linux/ism.h @@ -42,7 +42,6 @@ struct ism_dev { struct ism_eq *ieq; dma_addr_t ieq_dma_addr; - struct device dev; u64 local_gid; int ieq_idx; diff --git a/include/net/smc.h b/include/net/smc.h index e271891b85e6..05faac83371e 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -63,7 +63,6 @@ struct smcd_ops { unsigned int size); int (*supports_v2)(void); void (*get_local_gid)(struct smcd_dev *dev, struct smcd_gid *gid); - struct device* (*get_dev)(struct smcd_dev *dev); /* optional operations */ int (*add_vlan_id)(struct smcd_dev *dev, u64 vlan_id); -- cgit v1.2.3 From 05e68d8dedf34f270cc3769ffe7f0ed413f23add Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Thu, 18 Sep 2025 13:04:56 +0200 Subject: dibs: Local gid for dibs devices Define a uuid_t GID attribute to identify a dibs device. SMC uses 64 Bit and 128 Bit Global Identifiers (GIDs) per device, that need to be sent via the SMC protocol. Because the smc code uses integers, network endianness and host endianness need to be considered. Avoid this in the dibs layer by using uuid_t byte arrays. Future patches could change SMC to use uuid_t. For now conversion helper functions are introduced. ISM devices provide 64 Bit GIDs. Map them to dibs uuid_t GIDs like this: _________________________________________ | 64 Bit ISM-vPCI GID | 00000000_00000000 | ----------------------------------------- If interpreted as UUID [1], this would be interpreted as the UIID variant, that is reserved for NCS backward compatibility. So it will not collide with UUIDs that were generated according to the standard. smc_loopback already uses version 4 UUIDs as 128 Bit GIDs, move that to dibs loopback. A temporary change to smc_lo_query_rgid() is required, that will be moved to dibs_loopback with a follow-on patch. Provide gid of a dibs device as sysfs read-only attribute. Link: https://datatracker.ietf.org/doc/html/rfc4122 [1] Signed-off-by: Alexandra Winter Reviewed-by: Julian Ruess Reviewed-by: Mahanta Jambigi Link: https://patch.msgid.link/20250918110500.1731261-11-wintera@linux.ibm.com Signed-off-by: Paolo Abeni --- include/linux/dibs.h | 3 +++ include/linux/ism.h | 1 - include/net/smc.h | 1 - 3 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/dibs.h b/include/linux/dibs.h index 793c6e1ece0f..904f37505c27 100644 --- a/include/linux/dibs.h +++ b/include/linux/dibs.h @@ -10,6 +10,8 @@ #define _DIBS_H #include +#include + /* DIBS - Direct Internal Buffer Sharing - concept * ----------------------------------------------- * In the case of multiple system sharing the same hardware, dibs fabrics can @@ -138,6 +140,7 @@ struct dibs_dev { struct device dev; /* To be filled by device driver, before calling dibs_dev_add(): */ const struct dibs_dev_ops *ops; + uuid_t gid; /* priv pointer for device driver */ void *drv_priv; diff --git a/include/linux/ism.h b/include/linux/ism.h index 84f1afb3dded..a926dd61b5a1 100644 --- a/include/linux/ism.h +++ b/include/linux/ism.h @@ -42,7 +42,6 @@ struct ism_dev { struct ism_eq *ieq; dma_addr_t ieq_dma_addr; - u64 local_gid; int ieq_idx; struct ism_client *subs[MAX_CLIENTS]; diff --git a/include/net/smc.h b/include/net/smc.h index 05faac83371e..9cb8385bbc6e 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -62,7 +62,6 @@ struct smcd_ops { bool sf, unsigned int offset, void *data, unsigned int size); int (*supports_v2)(void); - void (*get_local_gid)(struct smcd_dev *dev, struct smcd_gid *gid); /* optional operations */ int (*add_vlan_id)(struct smcd_dev *dev, u64 vlan_id); -- cgit v1.2.3 From 92a0f7bb081dde6e88368816b8ba51352ddabb1d Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Thu, 18 Sep 2025 13:04:57 +0200 Subject: dibs: Move vlan support to dibs_dev_ops It can be debated how much benefit definition of vlan ids for dibs devices brings, as the dmbs are accessible only by a single peer anyhow. But ism provides vlan support and smcd exploits it, so move it to dibs layer as an optional feature. smcd_loopback simply ignores all vlan settings, do the same in dibs_loopback. SMC-D and ISM have a method to use the invalid VLAN ID 1FFF (ISM_RESERVED_VLANID), to indicate that both communication peers support routable SMC-Dv2. Tolerate it in dibs, but move it to SMC only. Signed-off-by: Alexandra Winter Link: https://patch.msgid.link/20250918110500.1731261-12-wintera@linux.ibm.com Signed-off-by: Paolo Abeni --- include/linux/dibs.h | 19 +++++++++++++++++++ include/net/smc.h | 5 ----- 2 files changed, 19 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/dibs.h b/include/linux/dibs.h index 904f37505c27..166148fb8d76 100644 --- a/include/linux/dibs.h +++ b/include/linux/dibs.h @@ -133,6 +133,25 @@ struct dibs_dev_ops { * Return: 2 byte dibs fabric id */ u16 (*get_fabric_id)(struct dibs_dev *dev); + /** + * add_vlan_id() - add dibs device to vlan (optional, deprecated) + * @dev: dibs device + * @vlan_id: vlan id + * + * In order to write into a vlan-tagged dmb, the remote device needs + * to belong to the this vlan. A device can belong to more than 1 vlan. + * Any device can access an untagged dmb. + * Deprecated, only supported for backwards compatibility. + * Return: zero on success + */ + int (*add_vlan_id)(struct dibs_dev *dev, u64 vlan_id); + /** + * del_vlan_id() - remove dibs device from vlan (optional, deprecated) + * @dev: dibs device + * @vlan_id: vlan id + * Return: zero on success + */ + int (*del_vlan_id)(struct dibs_dev *dev, u64 vlan_id); }; struct dibs_dev { diff --git a/include/net/smc.h b/include/net/smc.h index 9cb8385bbc6e..51b4aefc106a 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -61,13 +61,8 @@ struct smcd_ops { int (*move_data)(struct smcd_dev *dev, u64 dmb_tok, unsigned int idx, bool sf, unsigned int offset, void *data, unsigned int size); - int (*supports_v2)(void); /* optional operations */ - int (*add_vlan_id)(struct smcd_dev *dev, u64 vlan_id); - int (*del_vlan_id)(struct smcd_dev *dev, u64 vlan_id); - int (*set_vlan_required)(struct smcd_dev *dev); - int (*reset_vlan_required)(struct smcd_dev *dev); int (*signal_event)(struct smcd_dev *dev, struct smcd_gid *rgid, u32 trigger_irq, u32 event_code, u64 info); int (*support_dmb_nocopy)(struct smcd_dev *dev); -- cgit v1.2.3 From 719c3b67bb7ea95bb8158b03c75641c8fc8f94a0 Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Thu, 18 Sep 2025 13:04:58 +0200 Subject: dibs: Move query_remote_gid() to dibs_dev_ops Provide the dibs_dev_ops->query_remote_gid() in ism and dibs_loopback dibs_devices. And call it in smc dibs_client. Reviewed-by: Julian Ruess Signed-off-by: Alexandra Winter Link: https://patch.msgid.link/20250918110500.1731261-13-wintera@linux.ibm.com Signed-off-by: Paolo Abeni --- include/linux/dibs.h | 14 ++++++++++++++ include/net/smc.h | 2 -- 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/dibs.h b/include/linux/dibs.h index 166148fb8d76..c75a40fe3039 100644 --- a/include/linux/dibs.h +++ b/include/linux/dibs.h @@ -133,6 +133,20 @@ struct dibs_dev_ops { * Return: 2 byte dibs fabric id */ u16 (*get_fabric_id)(struct dibs_dev *dev); + /** + * query_remote_gid() + * @dev: local dibs device + * @rgid: gid of remote dibs device + * @vid_valid: if zero, vid will be ignored; + * deprecated, ignored if device does not support vlan + * @vid: VLAN id; deprecated, ignored if device does not support vlan + * + * Query whether a remote dibs device is reachable via this local device + * and this vlan id. + * Return: 0 if remote gid is reachable. + */ + int (*query_remote_gid)(struct dibs_dev *dev, const uuid_t *rgid, + u32 vid_valid, u32 vid); /** * add_vlan_id() - add dibs device to vlan (optional, deprecated) * @dev: dibs device diff --git a/include/net/smc.h b/include/net/smc.h index 51b4aefc106a..5bd135fb4d49 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -53,8 +53,6 @@ struct smcd_gid { }; struct smcd_ops { - int (*query_remote_gid)(struct smcd_dev *dev, struct smcd_gid *rgid, - u32 vid_valid, u32 vid); int (*register_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb, void *client); int (*unregister_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb); -- cgit v1.2.3 From cc21191b584c6f7836b0f10774f8278b7cbfba10 Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Thu, 18 Sep 2025 13:04:59 +0200 Subject: dibs: Move data path to dibs layer Use struct dibs_dmb instead of struct smc_dmb and move the corresponding client tables to dibs_dev. Leave driver specific implementation details like sba in the device drivers. Register and unregister dmbs via dibs_dev_ops. A dmb is dedicated to a single client, but a dibs device can have dmbs for more than one client. Trigger dibs clients via dibs_client_ops->handle_irq(), when data is received into a dmb. For dibs_loopback replace scheduling an smcd receive tasklet with calling dibs_client_ops->handle_irq(). For loopback devices attach_dmb(), detach_dmb() and move_data() need to access the dmb tables, so move those to dibs_dev_ops in this patch as well. Remove remaining definitions of smc_loopback as they are no longer required, now that everything is in dibs_loopback. Note that struct ism_client and struct ism_dev are still required in smc until a follow-on patch moves event handling to dibs. (Loopback does not use events). Signed-off-by: Alexandra Winter Link: https://patch.msgid.link/20250918110500.1731261-14-wintera@linux.ibm.com Signed-off-by: Paolo Abeni --- include/linux/dibs.h | 177 +++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/ism.h | 23 ------- include/net/smc.h | 22 ------- 3 files changed, 177 insertions(+), 45 deletions(-) (limited to 'include') diff --git a/include/linux/dibs.h b/include/linux/dibs.h index c75a40fe3039..be009c614205 100644 --- a/include/linux/dibs.h +++ b/include/linux/dibs.h @@ -36,12 +36,44 @@ * clients. */ +/* DMB - Direct Memory Buffer + * -------------------------- + * A dibs client provides a dmb as input buffer for a local receiving + * dibs device for exactly one (remote) sending dibs device. Only this + * sending device can send data into this dmb using move_data(). Sender + * and receiver can be the same device. A dmb belongs to exactly one client. + */ +struct dibs_dmb { + /* tok - Token for this dmb + * Used by remote and local devices and clients to address this dmb. + * Provided by dibs fabric. Unique per dibs fabric. + */ + u64 dmb_tok; + /* rgid - GID of designated remote sending device */ + uuid_t rgid; + /* cpu_addr - buffer address */ + void *cpu_addr; + /* len - buffer length */ + u32 dmb_len; + /* idx - Index of this DMB on this receiving device */ + u32 idx; + /* VLAN support (deprecated) + * In order to write into a vlan-tagged dmb, the remote device needs + * to belong to the this vlan + */ + u32 vlan_valid; + u32 vlan_id; + /* optional, used by device driver */ + dma_addr_t dma_addr; +}; + struct dibs_dev; /* DIBS client * ----------- */ #define MAX_DIBS_CLIENTS 8 +#define NO_DIBS_CLIENT 0xff /* All dibs clients have access to all dibs devices. * A dibs client provides the following functions to be called by dibs layer or * dibs device drivers: @@ -69,6 +101,22 @@ struct dibs_client_ops { * The device is no longer usable by this client after this call. */ void (*del_dev)(struct dibs_dev *dev); + /** + * handle_irq() - Handle signaling for a DMB + * @dev: device that owns the dmb + * @idx: Index of the dmb that got signalled + * @dmbemask: signaling mask of the dmb + * + * Handle signaling for a dmb that was registered by this client + * for this device. + * The dibs device can coalesce multiple signaling triggers into a + * single call of handle_irq(). dmbemask can be used to indicate + * different kinds of triggers. + * + * Context: Called in IRQ context by dibs device driver + */ + void (*handle_irq)(struct dibs_dev *dev, unsigned int idx, + u16 dmbemask); }; struct dibs_client { @@ -147,6 +195,77 @@ struct dibs_dev_ops { */ int (*query_remote_gid)(struct dibs_dev *dev, const uuid_t *rgid, u32 vid_valid, u32 vid); + /** + * max_dmbs() + * Return: Max number of DMBs that can be registered for this kind of + * dibs_dev + */ + int (*max_dmbs)(void); + /** + * register_dmb() - allocate and register a dmb + * @dev: dibs device + * @dmb: dmb struct to be registered + * @client: dibs client + * @vid: VLAN id; deprecated, ignored if device does not support vlan + * + * The following fields of dmb must provide valid input: + * @rgid: gid of remote user device + * @dmb_len: buffer length + * @idx: Optionally:requested idx (if non-zero) + * @vlan_valid: if zero, vlan_id will be ignored; + * deprecated, ignored if device does not support vlan + * @vlan_id: deprecated, ignored if device does not support vlan + * Upon return in addition the following fields will be valid: + * @dmb_tok: for usage by remote and local devices and clients + * @cpu_addr: allocated buffer + * @idx: dmb index, unique per dibs device + * @dma_addr: to be used by device driver,if applicable + * + * Allocate a dmb buffer and register it with this device and for this + * client. + * Return: zero on success + */ + int (*register_dmb)(struct dibs_dev *dev, struct dibs_dmb *dmb, + struct dibs_client *client); + /** + * unregister_dmb() - unregister and free a dmb + * @dev: dibs device + * @dmb: dmb struct to be unregistered + * The following fields of dmb must provide valid input: + * @dmb_tok + * @cpu_addr + * @idx + * + * Free dmb.cpu_addr and unregister the dmb from this device. + * Return: zero on success + */ + int (*unregister_dmb)(struct dibs_dev *dev, struct dibs_dmb *dmb); + /** + * move_data() - write into a remote dmb + * @dev: Local sending dibs device + * @dmb_tok: Token of the remote dmb + * @idx: signaling index in dmbemask + * @sf: signaling flag; + * if true, idx will be turned on at target dmbemask mask + * and target device will be signaled. + * @offset: offset within target dmb + * @data: pointer to data to be sent + * @size: length of data to be sent, can be zero. + * + * Use dev to write data of size at offset into a remote dmb + * identified by dmb_tok. Data is moved synchronously, *data can + * be freed when this function returns. + * + * If signaling flag (sf) is true, bit number idx bit will be turned + * on in the dmbemask mask when handle_irq() is called at the remote + * dibs client that owns the target dmb. The target device may chose + * to coalesce the signaling triggers of multiple move_data() calls + * to the same target dmb into a single handle_irq() call. + * Return: zero on success + */ + int (*move_data)(struct dibs_dev *dev, u64 dmb_tok, unsigned int idx, + bool sf, unsigned int offset, void *data, + unsigned int size); /** * add_vlan_id() - add dibs device to vlan (optional, deprecated) * @dev: dibs device @@ -166,6 +285,55 @@ struct dibs_dev_ops { * Return: zero on success */ int (*del_vlan_id)(struct dibs_dev *dev, u64 vlan_id); + /** + * support_mmapped_rdmb() - can this device provide memory mapped + * remote dmbs? (optional) + * @dev: dibs device + * + * A dibs device can provide a kernel address + length, that represent + * a remote target dmb (like MMIO). Alternatively to calling + * move_data(), a dibs client can write into such a ghost-send-buffer + * (= to this kernel address) and the data will automatically + * immediately appear in the target dmb, even without calling + * move_data(). + * + * Either all 3 function pointers for support_dmb_nocopy(), + * attach_dmb() and detach_dmb() are defined, or all of them must + * be NULL. + * + * Return: non-zero, if memory mapped remote dmbs are supported. + */ + int (*support_mmapped_rdmb)(struct dibs_dev *dev); + /** + * attach_dmb() - attach local memory to a remote dmb + * @dev: Local sending ism device + * @dmb: all other parameters are passed in the form of a + * dmb struct + * TODO: (THIS IS CONFUSING, should be changed) + * dmb_tok: (in) Token of the remote dmb, we want to attach to + * cpu_addr: (out) MMIO address + * dma_addr: (out) MMIO address (if applicable, invalid otherwise) + * dmb_len: (out) length of local MMIO region, + * equal to length of remote DMB. + * sba_idx: (out) index of remote dmb (NOT HELPFUL, should be removed) + * + * Provides a memory address to the sender that can be used to + * directly write into the remote dmb. + * Memory is available until detach_dmb is called + * + * Return: Zero upon success, Error code otherwise + */ + int (*attach_dmb)(struct dibs_dev *dev, struct dibs_dmb *dmb); + /** + * detach_dmb() - Detach the ghost buffer from a remote dmb + * @dev: ism device + * @token: dmb token of the remote dmb + * + * No need to free cpu_addr. + * + * Return: Zero upon success, Error code otherwise + */ + int (*detach_dmb)(struct dibs_dev *dev, u64 token); }; struct dibs_dev { @@ -179,6 +347,15 @@ struct dibs_dev { /* priv pointer per client; for client usage only */ void *priv[MAX_DIBS_CLIENTS]; + + /* get this lock before accessing any of the fields below */ + spinlock_t lock; + /* array of client ids indexed by dmb idx; + * can be used as indices into priv and subs arrays + */ + u8 *dmb_clientid_arr; + /* Sparse array of all ISM clients */ + struct dibs_client *subs[MAX_DIBS_CLIENTS]; }; static inline void dibs_set_priv(struct dibs_dev *dev, diff --git a/include/linux/ism.h b/include/linux/ism.h index a926dd61b5a1..b7feb4dcd5a8 100644 --- a/include/linux/ism.h +++ b/include/linux/ism.h @@ -11,17 +11,6 @@ #include -struct ism_dmb { - u64 dmb_tok; - u64 rgid; - u32 dmb_len; - u32 sba_idx; - u32 vlan_valid; - u32 vlan_id; - void *cpu_addr; - dma_addr_t dma_addr; -}; - /* Unless we gain unexpected popularity, this limit should hold for a while */ #define MAX_CLIENTS 8 #define ISM_NR_DMBS 1920 @@ -36,7 +25,6 @@ struct ism_dev { struct ism_sba *sba; dma_addr_t sba_dma_addr; DECLARE_BITMAP(sba_bitmap, ISM_NR_DMBS); - u8 *sba_client_arr; /* entries are indices into 'clients' array */ void *priv[MAX_CLIENTS]; struct ism_eq *ieq; @@ -58,11 +46,6 @@ struct ism_event { struct ism_client { const char *name; void (*handle_event)(struct ism_dev *dev, struct ism_event *event); - /* Parameter dmbemask contains a bit vector with updated DMBEs, if sent - * via ism_move_data(). Callback function must handle all active bits - * indicated by dmbemask. - */ - void (*handle_irq)(struct ism_dev *dev, unsigned int bit, u16 dmbemask); /* Private area - don't touch! */ u8 id; }; @@ -79,12 +62,6 @@ static inline void ism_set_priv(struct ism_dev *dev, struct ism_client *client, dev->priv[client->id] = priv; } -int ism_register_dmb(struct ism_dev *dev, struct ism_dmb *dmb, - struct ism_client *client); -int ism_unregister_dmb(struct ism_dev *dev, struct ism_dmb *dmb); -int ism_move(struct ism_dev *dev, u64 dmb_tok, unsigned int idx, bool sf, - unsigned int offset, void *data, unsigned int size); - const struct smcd_ops *ism_get_smcd_ops(void); #endif /* _ISM_H */ diff --git a/include/net/smc.h b/include/net/smc.h index 5bd135fb4d49..8e3debcf7db5 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -28,17 +28,6 @@ struct smc_hashinfo { }; /* SMCD/ISM device driver interface */ -struct smcd_dmb { - u64 dmb_tok; - u64 rgid; - u32 dmb_len; - u32 sba_idx; - u32 vlan_valid; - u32 vlan_id; - void *cpu_addr; - dma_addr_t dma_addr; -}; - #define ISM_EVENT_DMB 0 #define ISM_EVENT_GID 1 #define ISM_EVENT_SWR 2 @@ -53,25 +42,14 @@ struct smcd_gid { }; struct smcd_ops { - int (*register_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb, - void *client); - int (*unregister_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb); - int (*move_data)(struct smcd_dev *dev, u64 dmb_tok, unsigned int idx, - bool sf, unsigned int offset, void *data, - unsigned int size); - /* optional operations */ int (*signal_event)(struct smcd_dev *dev, struct smcd_gid *rgid, u32 trigger_irq, u32 event_code, u64 info); - int (*support_dmb_nocopy)(struct smcd_dev *dev); - int (*attach_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb); - int (*detach_dmb)(struct smcd_dev *dev, u64 token); }; struct smcd_dev { const struct smcd_ops *ops; void *priv; - void *client; struct dibs_dev *dibs; struct list_head list; spinlock_t lock; -- cgit v1.2.3 From a612dbe8d04d47af91fa88f0599c1370cc70f687 Mon Sep 17 00:00:00 2001 From: Julian Ruess Date: Thu, 18 Sep 2025 13:05:00 +0200 Subject: dibs: Move event handling to dibs layer Add defines for all event types and subtypes an ism device is known to produce as it can be helpful for debugging purposes. Introduces a generic 'struct dibs_event' and adopt ism device driver and smc-d client accordingly. Tolerate and ignore other type and subtype values to enable future device extensions. SMC-D and ISM are now independent. struct ism_dev can be moved to drivers/s390/net/ism.h. Note that in smc, the term 'ism' is still used. Future patches could replace that with 'dibs' or 'smc-d' as appropriate. Signed-off-by: Julian Ruess Co-developed-by: Alexandra Winter Signed-off-by: Alexandra Winter Link: https://patch.msgid.link/20250918110500.1731261-15-wintera@linux.ibm.com Signed-off-by: Paolo Abeni --- include/linux/dibs.h | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++ include/net/smc.h | 15 ------------- 2 files changed, 62 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/dibs.h b/include/linux/dibs.h index be009c614205..c75607f8a5cf 100644 --- a/include/linux/dibs.h +++ b/include/linux/dibs.h @@ -67,6 +67,41 @@ struct dibs_dmb { dma_addr_t dma_addr; }; +/* DIBS events + * ----------- + * Dibs devices can optionally notify dibs clients about events that happened + * in the fabric or at the remote device or remote dmb. + */ +enum dibs_event_type { + /* Buffer event, e.g. a remote dmb was unregistered */ + DIBS_BUF_EVENT, + /* Device event, e.g. a remote dibs device was disabled */ + DIBS_DEV_EVENT, + /* Software event, a dibs client can send an event signal to a + * remote dibs device. + */ + DIBS_SW_EVENT, + DIBS_OTHER_TYPE }; + +enum dibs_event_subtype { + DIBS_BUF_UNREGISTERED, + DIBS_DEV_DISABLED, + DIBS_DEV_ERR_STATE, + DIBS_OTHER_SUBTYPE +}; + +struct dibs_event { + u32 type; + u32 subtype; + /* uuid_null if invalid */ + uuid_t gid; + /* zero if invalid */ + u64 buffer_tok; + u64 time; + /* additional data or zero */ + u64 data; +}; + struct dibs_dev; /* DIBS client @@ -117,6 +152,15 @@ struct dibs_client_ops { */ void (*handle_irq)(struct dibs_dev *dev, unsigned int idx, u16 dmbemask); + /** + * handle_event() - Handle control information sent by device + * @dev: device reporting the event + * @event: ism event structure + * + * * Context: Called in IRQ context by dibs device driver + */ + void (*handle_event)(struct dibs_dev *dev, + const struct dibs_event *event); }; struct dibs_client { @@ -285,6 +329,24 @@ struct dibs_dev_ops { * Return: zero on success */ int (*del_vlan_id)(struct dibs_dev *dev, u64 vlan_id); + /** + * signal_event() - trigger an event at a remote dibs device (optional) + * @dev: local dibs device + * @rgid: gid of remote dibs device + * trigger_irq: zero: notification may be coalesced with other events + * non-zero: notify immediately + * @subtype: 4 byte event code, meaning is defined by dibs client + * @data: 8 bytes of additional information, + * meaning is defined by dibs client + * + * dibs devices can offer support for sending a control event of type + * EVENT_SWR to a remote dibs device. + * NOTE: handle_event() will be called for all registered dibs clients + * at the remote device. + * Return: zero on success + */ + int (*signal_event)(struct dibs_dev *dev, const uuid_t *rgid, + u32 trigger_irq, u32 event_code, u64 info); /** * support_mmapped_rdmb() - can this device provide memory mapped * remote dmbs? (optional) diff --git a/include/net/smc.h b/include/net/smc.h index 8e3debcf7db5..08bee529ed8d 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -16,7 +16,6 @@ #include #include #include -#include "linux/ism.h" struct sock; @@ -28,28 +27,14 @@ struct smc_hashinfo { }; /* SMCD/ISM device driver interface */ -#define ISM_EVENT_DMB 0 -#define ISM_EVENT_GID 1 -#define ISM_EVENT_SWR 2 - #define ISM_RESERVED_VLANID 0x1FFF -struct smcd_dev; - struct smcd_gid { u64 gid; u64 gid_ext; }; -struct smcd_ops { - /* optional operations */ - int (*signal_event)(struct smcd_dev *dev, struct smcd_gid *rgid, - u32 trigger_irq, u32 event_code, u64 info); -}; - struct smcd_dev { - const struct smcd_ops *ops; - void *priv; struct dibs_dev *dibs; struct list_head list; spinlock_t lock; -- cgit v1.2.3 From 9095d207417477eb50e84fd0652895db77ec584e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Thu, 14 Aug 2025 14:22:12 -0300 Subject: fs: Create sb_encoding() helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Filesystems that need to deal with the super block encoding need to use a if IS_ENABLED(CONFIG_UNICODE) around it because this struct member is not declared otherwise. In order to move this if/endif guards outside of the filesytem code and make it simpler, create a new function that returns the s_encoding member of struct super_block if Unicode is enabled, and return NULL otherwise. Suggested-by: Amir Goldstein Reviewed-by: Amir Goldstein Signed-off-by: André Almeida Reviewed-by: Gabriel Krisman Bertazi Signed-off-by: Amir Goldstein --- include/linux/fs.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index d7ab4f96d705..43b3a7cf6750 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3740,15 +3740,20 @@ static inline bool generic_ci_validate_strict_name(struct inode *dir, struct qst } #endif -static inline bool sb_has_encoding(const struct super_block *sb) +static inline struct unicode_map *sb_encoding(const struct super_block *sb) { #if IS_ENABLED(CONFIG_UNICODE) - return !!sb->s_encoding; + return sb->s_encoding; #else - return false; + return NULL; #endif } +static inline bool sb_has_encoding(const struct super_block *sb) +{ + return !!sb_encoding(sb); +} + int may_setattr(struct mnt_idmap *idmap, struct inode *inode, unsigned int ia_valid); int setattr_prepare(struct mnt_idmap *, struct dentry *, struct iattr *); -- cgit v1.2.3 From 23253e278a4574114d4c2729ed70f70b4ec7a30e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Thu, 14 Aug 2025 14:22:13 -0300 Subject: fs: Create sb_same_encoding() helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For cases where a file lookup can look in different filesystems (like in overlayfs), both super blocks must have the same encoding and the same flags. To help with that, create a sb_same_encoding() function. Reviewed-by: Amir Goldstein Signed-off-by: André Almeida Reviewed-by: Gabriel Krisman Bertazi Signed-off-by: Amir Goldstein --- include/linux/fs.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 43b3a7cf6750..ec867f112fd5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3754,6 +3754,24 @@ static inline bool sb_has_encoding(const struct super_block *sb) return !!sb_encoding(sb); } +/* + * Compare if two super blocks have the same encoding and flags + */ +static inline bool sb_same_encoding(const struct super_block *sb1, + const struct super_block *sb2) +{ +#if IS_ENABLED(CONFIG_UNICODE) + if (sb1->s_encoding == sb2->s_encoding) + return true; + + return (sb1->s_encoding && sb2->s_encoding && + (sb1->s_encoding->version == sb2->s_encoding->version) && + (sb1->s_encoding_flags == sb2->s_encoding_flags)); +#else + return true; +#endif +} + int may_setattr(struct mnt_idmap *idmap, struct inode *inode, unsigned int ia_valid); int setattr_prepare(struct mnt_idmap *, struct dentry *, struct iattr *); -- cgit v1.2.3 From 0f67b56d84b4c49adfd61f19f81f84ec613ab51a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 14 Aug 2025 16:46:21 +0100 Subject: clocksource/drivers/arm_arch_timer_mmio: Switch over to standalone driver Remove all the MMIO support from the per-CPU timer driver, and switch over to the standalove driver. Signed-off-by: Marc Zyngier Signed-off-by: Daniel Lezcano Tested-by: Sudeep Holla Reviewed-by: Sudeep Holla Link: https://lore.kernel.org/r/20250814154622.10193-4-maz@kernel.org --- include/clocksource/arm_arch_timer.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h index ce6521ad04d1..2eda895f19f5 100644 --- a/include/clocksource/arm_arch_timer.h +++ b/include/clocksource/arm_arch_timer.h @@ -9,9 +9,6 @@ #include #include -#define ARCH_TIMER_TYPE_CP15 BIT(0) -#define ARCH_TIMER_TYPE_MEM BIT(1) - #define ARCH_TIMER_CTRL_ENABLE (1 << 0) #define ARCH_TIMER_CTRL_IT_MASK (1 << 1) #define ARCH_TIMER_CTRL_IT_STAT (1 << 2) @@ -51,8 +48,6 @@ enum arch_timer_spi_nr { #define ARCH_TIMER_PHYS_ACCESS 0 #define ARCH_TIMER_VIRT_ACCESS 1 -#define ARCH_TIMER_MEM_PHYS_ACCESS 2 -#define ARCH_TIMER_MEM_VIRT_ACCESS 3 #define ARCH_TIMER_MEM_MAX_FRAMES 8 -- cgit v1.2.3 From 0494fc345b377d1207c2cbfef67dc51f6ec874c0 Mon Sep 17 00:00:00 2001 From: Gokul Praveen Date: Tue, 12 Aug 2025 16:23:46 +0530 Subject: clocksource/drivers/timer-ti-dm : Capture functionality for OMAP DM timer Add PWM capture function in DM timer driver. OMAP DM timer hardware supports capture feature.It can be used to timestamp events (falling/rising edges) detected on input signal. Signed-off-by: Gokul Praveen Signed-off-by: Daniel Lezcano Reviewed-by: Neha Malcom Francis Link: https://lore.kernel.org/r/20250812105346.203541-1-g-praveen@ti.com --- include/linux/platform_data/dmtimer-omap.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/platform_data/dmtimer-omap.h b/include/linux/platform_data/dmtimer-omap.h index 95d852aef130..726d89143842 100644 --- a/include/linux/platform_data/dmtimer-omap.h +++ b/include/linux/platform_data/dmtimer-omap.h @@ -36,9 +36,13 @@ struct omap_dm_timer_ops { int (*set_pwm)(struct omap_dm_timer *timer, int def_on, int toggle, int trigger, int autoreload); int (*get_pwm_status)(struct omap_dm_timer *timer); + int (*set_cap)(struct omap_dm_timer *timer, + int autoreload, bool config_period); + int (*get_cap_status)(struct omap_dm_timer *timer); int (*set_prescaler)(struct omap_dm_timer *timer, int prescaler); unsigned int (*read_counter)(struct omap_dm_timer *timer); + unsigned int (*read_cap)(struct omap_dm_timer *timer, bool is_period); int (*write_counter)(struct omap_dm_timer *timer, unsigned int value); unsigned int (*read_status)(struct omap_dm_timer *timer); -- cgit v1.2.3 From 17eb98d6b517b6e7faaebed496fd688dbb1771d9 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 22 Sep 2025 14:29:48 +1000 Subject: VFS/ovl: add lookup_one_positive_killable() ovl wants a lookup which won't block on a fatal signal. It currently uses down_write_killable() and then repeatedly calls to lookup_one() The lock may not be needed if the name is already in the dcache and it aids proposed future changes if the locking is kept internal to namei.c So this patch adds lookup_one_positive_killable() which is like lookup_one_positive() but will abort in the face of a fatal signal. overlayfs is changed to use this. Note that instead of always getting an exclusive lock, ovl now only gets a shared lock, and only sometimes. The exclusive lock was never needed. However down_read_killable() was only added in v4.15 but overlayfs started using down_write_killable() here in v4.7. Note that the linked list ->first_maybe_whiteout ->next_maybe_white is local to the thread so there is no concurrency in that list which could be threatened by removing the locking. Reviewed-by: Amir Goldstein Signed-off-by: NeilBrown Signed-off-by: Christian Brauner --- include/linux/namei.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/namei.h b/include/linux/namei.h index 5d085428e471..551a1a01e5e7 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -80,6 +80,9 @@ struct dentry *lookup_one_unlocked(struct mnt_idmap *idmap, struct dentry *lookup_one_positive_unlocked(struct mnt_idmap *idmap, struct qstr *name, struct dentry *base); +struct dentry *lookup_one_positive_killable(struct mnt_idmap *idmap, + struct qstr *name, + struct dentry *base); extern int follow_down_one(struct path *); extern int follow_down(struct path *path, unsigned int flags); -- cgit v1.2.3 From d7fb2c410240348edee7867c29b60688175dcc11 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 22 Sep 2025 14:29:50 +1000 Subject: VFS: unify old_mnt_idmap and new_mnt_idmap in renamedata A rename operation can only rename within a single mount. Callers of vfs_rename() must and do ensure this is the case. So there is no point in having two mnt_idmaps in renamedata as they are always the same. Only one of them is passed to ->rename in any case. This patch replaces both with a single "mnt_idmap" and changes all callers. Reviewed-by: Jeff Layton Signed-off-by: NeilBrown Signed-off-by: Christian Brauner --- include/linux/fs.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index d7ab4f96d705..73b39e5bb9e4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2008,20 +2008,18 @@ int vfs_unlink(struct mnt_idmap *, struct inode *, struct dentry *, /** * struct renamedata - contains all information required for renaming - * @old_mnt_idmap: idmap of the old mount the inode was found from + * @mnt_idmap: idmap of the mount in which the rename is happening. * @old_parent: parent of source * @old_dentry: source - * @new_mnt_idmap: idmap of the new mount the inode was found from * @new_parent: parent of destination * @new_dentry: destination * @delegated_inode: returns an inode needing a delegation break * @flags: rename flags */ struct renamedata { - struct mnt_idmap *old_mnt_idmap; + struct mnt_idmap *mnt_idmap; struct dentry *old_parent; struct dentry *old_dentry; - struct mnt_idmap *new_mnt_idmap; struct dentry *new_parent; struct dentry *new_dentry; struct inode **delegated_inode; -- cgit v1.2.3 From 76a53de6f7ff0641570364234fb4489f4d4fc8e9 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 22 Sep 2025 14:29:51 +1000 Subject: VFS/audit: introduce kern_path_parent() for audit audit_alloc_mark() and audit_get_nd() both need to perform a path lookup getting the parent dentry (which must exist) and the final target (following a LAST_NORM name) which sometimes doesn't need to exist. They don't need the parent to be locked, but use kern_path_locked() or kern_path_locked_negative() anyway. This is somewhat misleading to the casual reader. This patch introduces a more targeted function, kern_path_parent(), which returns not holding locks. On success the "path" will be set to the parent, which must be found, and the return value is the dentry of the target, which might be negative. This will clear the way to rename kern_path_locked() which is otherwise only used to prepare for removing something. It also allows us to remove kern_path_locked_negative(), which is transformed into the new kern_path_parent(). Signed-off-by: NeilBrown Signed-off-by: Christian Brauner --- include/linux/namei.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/namei.h b/include/linux/namei.h index 551a1a01e5e7..1d5038c21c20 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -57,12 +57,12 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name, struct dentry *base, unsigned int flags); extern int kern_path(const char *, unsigned, struct path *); +struct dentry *kern_path_parent(const char *name, struct path *parent); extern struct dentry *kern_path_create(int, const char *, struct path *, unsigned int); extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int); extern void done_path_create(struct path *, struct dentry *); extern struct dentry *kern_path_locked(const char *, struct path *); -extern struct dentry *kern_path_locked_negative(const char *, struct path *); extern struct dentry *user_path_locked_at(int , const char __user *, struct path *); int vfs_path_parent_lookup(struct filename *filename, unsigned int flags, struct path *parent, struct qstr *last, int *type, -- cgit v1.2.3 From 3d18f80ce181ba27f37d0ec1c550b22acb01dd49 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 22 Sep 2025 14:29:52 +1000 Subject: VFS: rename kern_path_locked() and related functions. kern_path_locked() is now only used to prepare for removing an object from the filesystem (and that is the only credible reason for wanting a positive locked dentry). Thus it corresponds to kern_path_create() and so should have a corresponding name. Unfortunately the name "kern_path_create" is somewhat misleading as it doesn't actually create anything. The recently added simple_start_creating() provides a better pattern I believe. The "start" can be matched with "end" to bracket the creating or removing. So this patch changes names: kern_path_locked -> start_removing_path kern_path_create -> start_creating_path user_path_create -> start_creating_user_path user_path_locked_at -> start_removing_user_path_at done_path_create -> end_creating_path and also introduces end_removing_path() which is identical to end_creating_path(). __start_removing_path (which was __kern_path_locked) is enhanced to call mnt_want_write() for consistency with the start_creating_path(). Reviewed-by: Amir Goldstein Signed-off-by: NeilBrown Signed-off-by: Christian Brauner --- include/linux/namei.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/namei.h b/include/linux/namei.h index 1d5038c21c20..a7800ef04e76 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -59,11 +59,15 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name, extern int kern_path(const char *, unsigned, struct path *); struct dentry *kern_path_parent(const char *name, struct path *parent); -extern struct dentry *kern_path_create(int, const char *, struct path *, unsigned int); -extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int); -extern void done_path_create(struct path *, struct dentry *); -extern struct dentry *kern_path_locked(const char *, struct path *); -extern struct dentry *user_path_locked_at(int , const char __user *, struct path *); +extern struct dentry *start_creating_path(int, const char *, struct path *, unsigned int); +extern struct dentry *start_creating_user_path(int, const char __user *, struct path *, unsigned int); +extern void end_creating_path(struct path *, struct dentry *); +extern struct dentry *start_removing_path(const char *, struct path *); +extern struct dentry *start_removing_user_path_at(int , const char __user *, struct path *); +static inline void end_removing_path(struct path *path , struct dentry *dentry) +{ + end_creating_path(path, dentry); +} int vfs_path_parent_lookup(struct filename *filename, unsigned int flags, struct path *parent, struct qstr *last, int *type, const struct path *root); -- cgit v1.2.3 From 1abc1b212effe920f4729353880c8e03f1d76b4b Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 31 Jul 2025 13:23:40 +0100 Subject: coresight: Appropriately disable programming clocks Some CoreSight components have programming clocks (pclk) and are enabled using clk_get() and clk_prepare_enable(). However, in many cases, these clocks are not disabled when modules exit and only released by clk_put(). To fix the issue, this commit refactors programming clock by replacing clk_get() and clk_prepare_enable() with devm_clk_get_optional_enabled() for enabling APB clock. If the "apb_pclk" clock is not found, a NULL pointer is returned, and the function proceeds to attempt enabling the "apb" clock. Since ACPI platforms rely on firmware to manage clocks, returning a NULL pointer in this case leaves clock management to the firmware rather than the driver. This effectively avoids a clock imbalance issue during module removal - where the clock could be disabled twice: once during the ACPI runtime suspend and again during the devm resource release. Callers are updated to reuse the returned error value. With the change, programming clocks are managed as resources in driver model layer, allowing clock cleanup to be handled automatically. As a result, manual cleanup operations are no longer needed and are removed from the Coresight drivers. Fixes: 73d779a03a76 ("coresight: etm4x: Change etm4_platform_driver driver for MMIO devices") Reviewed-by: Yeoreum Yun Tested-by: James Clark Signed-off-by: Leo Yan Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250731-arm_cs_fix_clock_v4-v6-4-1dfe10bb3f6f@arm.com --- include/linux/coresight.h | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 4ac65c68bbf4..1e652e157841 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -6,6 +6,7 @@ #ifndef _LINUX_CORESIGHT_H #define _LINUX_CORESIGHT_H +#include #include #include #include @@ -480,26 +481,21 @@ static inline bool is_coresight_device(void __iomem *base) * Returns: * * clk - Clock is found and enabled - * NULL - clock is not found + * NULL - Clock is controlled by firmware (ACPI device only) * ERROR - Clock is found but failed to enable */ static inline struct clk *coresight_get_enable_apb_pclk(struct device *dev) { struct clk *pclk; - int ret; - pclk = clk_get(dev, "apb_pclk"); - if (IS_ERR(pclk)) { - pclk = clk_get(dev, "apb"); - if (IS_ERR(pclk)) - return NULL; - } + /* Firmware controls clocks for an ACPI device. */ + if (has_acpi_companion(dev)) + return NULL; + + pclk = devm_clk_get_optional_enabled(dev, "apb_pclk"); + if (!pclk) + pclk = devm_clk_get_optional_enabled(dev, "apb"); - ret = clk_prepare_enable(pclk); - if (ret) { - clk_put(pclk); - return ERR_PTR(ret); - } return pclk; } -- cgit v1.2.3 From d091c6312561821f216ced63a7ad17c946b6d335 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 31 Jul 2025 13:23:42 +0100 Subject: coresight: Avoid enable programming clock duplicately The programming clock is enabled by AMBA bus driver before a dynamic probe. As a result, a CoreSight driver may redundantly enable the same clock. To avoid this, add a check for device type and skip enabling the programming clock for AMBA devices. The returned NULL pointer will be tolerated by the drivers. Fixes: 73d779a03a76 ("coresight: etm4x: Change etm4_platform_driver driver for MMIO devices") Reviewed-by: Anshuman Khandual Reviewed-by: Yeoreum Yun Tested-by: James Clark Signed-off-by: Leo Yan Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250731-arm_cs_fix_clock_v4-v6-6-1dfe10bb3f6f@arm.com --- include/linux/coresight.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 1e652e157841..bb49080ec8f9 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -481,20 +481,23 @@ static inline bool is_coresight_device(void __iomem *base) * Returns: * * clk - Clock is found and enabled - * NULL - Clock is controlled by firmware (ACPI device only) + * NULL - Clock is controlled by firmware (ACPI device only) or when managed + * by the AMBA bus driver instead * ERROR - Clock is found but failed to enable */ static inline struct clk *coresight_get_enable_apb_pclk(struct device *dev) { - struct clk *pclk; + struct clk *pclk = NULL; /* Firmware controls clocks for an ACPI device. */ if (has_acpi_companion(dev)) return NULL; - pclk = devm_clk_get_optional_enabled(dev, "apb_pclk"); - if (!pclk) - pclk = devm_clk_get_optional_enabled(dev, "apb"); + if (!dev_is_amba(dev)) { + pclk = devm_clk_get_optional_enabled(dev, "apb_pclk"); + if (!pclk) + pclk = devm_clk_get_optional_enabled(dev, "apb"); + } return pclk; } -- cgit v1.2.3 From fbe7514a7912959e384acf108931ac1bfbb16466 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 31 Jul 2025 13:23:43 +0100 Subject: coresight: Consolidate clock enabling CoreSight drivers enable pclk and atclk conditionally. For example, pclk is only enabled in the static probe, while atclk is an optional clock that it is enabled for both dynamic and static probes, if it is present. In the current CoreSight drivers, these two clocks are initialized separately. This causes complex and duplicate codes. CoreSight drivers are refined so that clocks are initialized in one go. For this purpose, this commit renames coresight_get_enable_apb_pclk() to coresight_get_enable_clocks() and encapsulates clock initialization logic: - If a clock is initialized successfully, its clock pointer is assigned to the double pointer passed as an argument. - For ACPI devices, clocks are controlled by firmware, directly bail out. - Skip enabling pclk for an AMBA device. - If atclk is not found, the corresponding double pointer is set to NULL. The function returns Success (0) to guide callers can proceed with no error. - Otherwise, an error number is returned for failures. The function became complex, move it from the header to the CoreSight core layer and the symbol is exported. Added comments for recording details. Suggested-by: Suzuki K Poulose Reviewed-by: Anshuman Khandual Reviewed-by: Yeoreum Yun Tested-by: James Clark Signed-off-by: Leo Yan Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250731-arm_cs_fix_clock_v4-v6-7-1dfe10bb3f6f@arm.com --- include/linux/coresight.h | 30 ++---------------------------- 1 file changed, 2 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/linux/coresight.h b/include/linux/coresight.h index bb49080ec8f9..6de59ce8ef8c 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -6,7 +6,6 @@ #ifndef _LINUX_CORESIGHT_H #define _LINUX_CORESIGHT_H -#include #include #include #include @@ -475,33 +474,6 @@ static inline bool is_coresight_device(void __iomem *base) return cid == CORESIGHT_CID; } -/* - * Attempt to find and enable "APB clock" for the given device - * - * Returns: - * - * clk - Clock is found and enabled - * NULL - Clock is controlled by firmware (ACPI device only) or when managed - * by the AMBA bus driver instead - * ERROR - Clock is found but failed to enable - */ -static inline struct clk *coresight_get_enable_apb_pclk(struct device *dev) -{ - struct clk *pclk = NULL; - - /* Firmware controls clocks for an ACPI device. */ - if (has_acpi_companion(dev)) - return NULL; - - if (!dev_is_amba(dev)) { - pclk = devm_clk_get_optional_enabled(dev, "apb_pclk"); - if (!pclk) - pclk = devm_clk_get_optional_enabled(dev, "apb"); - } - - return pclk; -} - #define CORESIGHT_PIDRn(i) (0xFE0 + ((i) * 4)) static inline u32 coresight_get_pid(struct csdev_access *csa) @@ -732,4 +704,6 @@ void coresight_remove_driver(struct amba_driver *amba_drv, struct platform_driver *pdev_drv); int coresight_etm_get_trace_id(struct coresight_device *csdev, enum cs_mode mode, struct coresight_device *sink); +int coresight_get_enable_clocks(struct device *dev, struct clk **pclk, + struct clk **atclk); #endif /* _LINUX_COREISGHT_H */ -- cgit v1.2.3 From 559f2eacc8a23c7f44daac09d4f3efd958d497f2 Mon Sep 17 00:00:00 2001 From: Huisong Li Date: Tue, 23 Sep 2025 11:24:28 +0800 Subject: ACPI: processor: Do not expose global variable acpi_idle_driver Move the cpuidle driver check from __acpi_processor_start() to acpi_processor_power_init() which allows variable acpi_idle_driver to become static. No intentional functional impact. Signed-off-by: Huisong Li Link: https://patch.msgid.link/20250923032428.2656329-2-lihuisong@huawei.com [ rjw: Subject tweak, new changelog, adjustment of a new comment ] Signed-off-by: Rafael J. Wysocki --- include/acpi/processor.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 24fdaa3c2899..7146a8e9e9c2 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -417,7 +417,6 @@ static inline void acpi_processor_throttling_init(void) {} #endif /* CONFIG_ACPI_CPU_FREQ_PSS */ /* in processor_idle.c */ -extern struct cpuidle_driver acpi_idle_driver; #ifdef CONFIG_ACPI_PROCESSOR_IDLE void acpi_processor_power_init(struct acpi_processor *pr); void acpi_processor_power_exit(struct acpi_processor *pr); -- cgit v1.2.3 From ade2105e748f85eb026d26701091213855aea633 Mon Sep 17 00:00:00 2001 From: Elijah Wright Date: Wed, 20 Aug 2025 22:39:07 -0700 Subject: tracing: Move buffer in trace_seq to end of struct TRACE_SEQ_BUFFER_SIZE is dependent on the architecture for its size. on 64-bit systems, it is 8148 bytes. forced 8-byte alignment in size_t and seq_buf means that trace_seq is 8200 bytes on 64-bit systems. moving the buffer to the end of the struct fixes the issue. there shouldn't be any side effects, i.e. pointer arithmetic on trace_seq Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250821053917.23301-1-git@elijahs.space Signed-off-by: Elijah Wright Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_seq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index a93ed5ac3226..557780fe1c77 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -21,10 +21,10 @@ (sizeof(struct seq_buf) + sizeof(size_t) + sizeof(int))) struct trace_seq { - char buffer[TRACE_SEQ_BUFFER_SIZE]; struct seq_buf seq; size_t readpos; int full; + char buffer[TRACE_SEQ_BUFFER_SIZE]; }; static inline void -- cgit v1.2.3 From 5c8fd7e2b5b0a527cf88740da122166695382a78 Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Tue, 23 Sep 2025 12:24:00 +0100 Subject: bpf: bpf task work plumbing This patch adds necessary plumbing in verifier, syscall and maps to support handling new kfunc bpf_task_work_schedule and kernel structure bpf_task_work. The idea is similar to how we already handle bpf_wq and bpf_timer. verifier changes validate calls to bpf_task_work_schedule to make sure it is safe and expected invariants hold. btf part is required to detect bpf_task_work structure inside map value and store its offset, which will be used in the next patch to calculate key and value addresses. arraymap and hashtab changes are needed to handle freeing of the bpf_task_work: run code needed to deinitialize it, for example cancel task_work callback if possible. The use of bpf_task_work and proper implementation for kfuncs are introduced in the next patch. Signed-off-by: Mykyta Yatsenko Acked-by: Andrii Nakryiko Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250923112404.668720-6-mykyta.yatsenko5@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 11 +++++++++++ include/uapi/linux/bpf.h | 4 ++++ 2 files changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index dfc1a27b56d5..a2ab51fa8b0a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -209,6 +209,7 @@ enum btf_field_type { BPF_WORKQUEUE = (1 << 10), BPF_UPTR = (1 << 11), BPF_RES_SPIN_LOCK = (1 << 12), + BPF_TASK_WORK = (1 << 13), }; enum bpf_cgroup_storage_type { @@ -262,6 +263,7 @@ struct btf_record { int timer_off; int wq_off; int refcount_off; + int task_work_off; struct btf_field fields[]; }; @@ -363,6 +365,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type) return "bpf_rb_node"; case BPF_REFCOUNT: return "bpf_refcount"; + case BPF_TASK_WORK: + return "bpf_task_work"; default: WARN_ON_ONCE(1); return "unknown"; @@ -401,6 +405,8 @@ static inline u32 btf_field_type_size(enum btf_field_type type) return sizeof(struct bpf_rb_node); case BPF_REFCOUNT: return sizeof(struct bpf_refcount); + case BPF_TASK_WORK: + return sizeof(struct bpf_task_work); default: WARN_ON_ONCE(1); return 0; @@ -433,6 +439,8 @@ static inline u32 btf_field_type_align(enum btf_field_type type) return __alignof__(struct bpf_rb_node); case BPF_REFCOUNT: return __alignof__(struct bpf_refcount); + case BPF_TASK_WORK: + return __alignof__(struct bpf_task_work); default: WARN_ON_ONCE(1); return 0; @@ -464,6 +472,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr) case BPF_KPTR_REF: case BPF_KPTR_PERCPU: case BPF_UPTR: + case BPF_TASK_WORK: break; default: WARN_ON_ONCE(1); @@ -600,6 +609,7 @@ void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, bool lock_src); void bpf_timer_cancel_and_free(void *timer); void bpf_wq_cancel_and_free(void *timer); +void bpf_task_work_cancel_and_free(void *timer); void bpf_list_head_free(const struct btf_field *field, void *list_head, struct bpf_spin_lock *spin_lock); void bpf_rb_root_free(const struct btf_field *field, void *rb_root, @@ -2426,6 +2436,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec); bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b); void bpf_obj_free_timer(const struct btf_record *rec, void *obj); void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj); +void bpf_obj_free_task_work(const struct btf_record *rec, void *obj); void bpf_obj_free_fields(const struct btf_record *rec, void *obj); void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f3b173e48b0f..ae83d8649ef1 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -7436,6 +7436,10 @@ struct bpf_timer { __u64 __opaque[2]; } __attribute__((aligned(8))); +struct bpf_task_work { + __u64 __opaque; +} __attribute__((aligned(8))); + struct bpf_wq { __u64 __opaque[2]; } __attribute__((aligned(8))); -- cgit v1.2.3 From 66e2d96b1c5875122bfb94239989d832ccf51477 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 23 Sep 2025 23:37:26 +0800 Subject: LoongArch: KVM: Move kvm_iocsr tracepoint out of generic code The tracepoint kvm_iocsr is only used by the loongarch architecture. As trace events can take up to 5K of memory, move this tracepoint into the LoongArch specific tracing file so that it doesn't waste memory for all other architectures. Reviewed-by: Bibo Mao Signed-off-by: Steven Rostedt (Google) Signed-off-by: Huacai Chen --- include/trace/events/kvm.h | 35 ----------------------------------- 1 file changed, 35 deletions(-) (limited to 'include') diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 8b7252b8d751..b282e3a86769 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -156,41 +156,6 @@ TRACE_EVENT(kvm_mmio, __entry->len, __entry->gpa, __entry->val) ); -#define KVM_TRACE_IOCSR_READ_UNSATISFIED 0 -#define KVM_TRACE_IOCSR_READ 1 -#define KVM_TRACE_IOCSR_WRITE 2 - -#define kvm_trace_symbol_iocsr \ - { KVM_TRACE_IOCSR_READ_UNSATISFIED, "unsatisfied-read" }, \ - { KVM_TRACE_IOCSR_READ, "read" }, \ - { KVM_TRACE_IOCSR_WRITE, "write" } - -TRACE_EVENT(kvm_iocsr, - TP_PROTO(int type, int len, u64 gpa, void *val), - TP_ARGS(type, len, gpa, val), - - TP_STRUCT__entry( - __field( u32, type ) - __field( u32, len ) - __field( u64, gpa ) - __field( u64, val ) - ), - - TP_fast_assign( - __entry->type = type; - __entry->len = len; - __entry->gpa = gpa; - __entry->val = 0; - if (val) - memcpy(&__entry->val, val, - min_t(u32, sizeof(__entry->val), len)); - ), - - TP_printk("iocsr %s len %u gpa 0x%llx val 0x%llx", - __print_symbolic(__entry->type, kvm_trace_symbol_iocsr), - __entry->len, __entry->gpa, __entry->val) -); - #define kvm_fpu_load_symbol \ {0, "unload"}, \ {1, "load"} -- cgit v1.2.3 From 9082aae154be2d9e208b56e249cb886612f7c6cf Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 22 Aug 2025 09:19:22 -0400 Subject: sunrpc: remove dfprintk_cont() and dfprintk_rcu_cont() KERN_CONT hails from a simpler time, when SMP wasn't the norm. These days, it doesn't quite work right since another printk() can always race in between the first one and the one being "continued". Nothing calls dprintk_rcu_cont(), so just remove it. The only caller of dprintk_cont() is in nfs_commit_release_pages(). Just use a normal dprintk() there instead, since this is not SMP-safe anyway. Signed-off-by: Jeff Layton Reviewed-by: Chuck Lever Reviewed-by: Simon Horman Signed-off-by: Anna Schumaker --- include/linux/sunrpc/debug.h | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h index f6aeed07fe04..99a6fa4a1d6a 100644 --- a/include/linux/sunrpc/debug.h +++ b/include/linux/sunrpc/debug.h @@ -23,12 +23,8 @@ extern unsigned int nlm_debug; #define dprintk(fmt, ...) \ dfprintk(FACILITY, fmt, ##__VA_ARGS__) -#define dprintk_cont(fmt, ...) \ - dfprintk_cont(FACILITY, fmt, ##__VA_ARGS__) #define dprintk_rcu(fmt, ...) \ dfprintk_rcu(FACILITY, fmt, ##__VA_ARGS__) -#define dprintk_rcu_cont(fmt, ...) \ - dfprintk_rcu_cont(FACILITY, fmt, ##__VA_ARGS__) #undef ifdebug #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) @@ -37,29 +33,14 @@ extern unsigned int nlm_debug; # define dfprintk(fac, fmt, ...) \ do { \ ifdebug(fac) \ - printk(KERN_DEFAULT fmt, ##__VA_ARGS__); \ -} while (0) - -# define dfprintk_cont(fac, fmt, ...) \ -do { \ - ifdebug(fac) \ - printk(KERN_CONT fmt, ##__VA_ARGS__); \ + printk(KERN_DEFAULT fmt, ##__VA_ARGS__); \ } while (0) # define dfprintk_rcu(fac, fmt, ...) \ do { \ ifdebug(fac) { \ rcu_read_lock(); \ - printk(KERN_DEFAULT fmt, ##__VA_ARGS__); \ - rcu_read_unlock(); \ - } \ -} while (0) - -# define dfprintk_rcu_cont(fac, fmt, ...) \ -do { \ - ifdebug(fac) { \ - rcu_read_lock(); \ - printk(KERN_CONT fmt, ##__VA_ARGS__); \ + printk(KERN_DEFAULT fmt, ##__VA_ARGS__); \ rcu_read_unlock(); \ } \ } while (0) @@ -68,7 +49,6 @@ do { \ #else # define ifdebug(fac) if (0) # define dfprintk(fac, fmt, ...) do {} while (0) -# define dfprintk_cont(fac, fmt, ...) do {} while (0) # define dfprintk_rcu(fac, fmt, ...) do {} while (0) # define RPC_IFDEBUG(x) #endif -- cgit v1.2.3 From ec7d8e68ef0ec5c635c8f9e93cd881673445a397 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 22 Aug 2025 09:19:23 -0400 Subject: sunrpc: add a Kconfig option to redirect dfprintk() output to trace buffer We have a lot of old dprintk() call sites that aren't going anywhere anytime soon. At the same time, turning them up is a serious burden on the host due to the console locking overhead. Add a new Kconfig option that redirects dfprintk() output to the trace buffer. This is more efficient than logging to the console and allows for proper interleaving of dprintk and static tracepoint events. Since using trace_printk() causes scary warnings to pop at boot time, this new option defaults to "n". Signed-off-by: Jeff Layton Reviewed-by: Chuck Lever Reviewed-by: Simon Horman Signed-off-by: Anna Schumaker --- include/linux/sunrpc/debug.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h index 99a6fa4a1d6a..891f6173c951 100644 --- a/include/linux/sunrpc/debug.h +++ b/include/linux/sunrpc/debug.h @@ -30,17 +30,23 @@ extern unsigned int nlm_debug; #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define ifdebug(fac) if (unlikely(rpc_debug & RPCDBG_##fac)) +# if IS_ENABLED(CONFIG_SUNRPC_DEBUG_TRACE) +# define __sunrpc_printk(fmt, ...) trace_printk(fmt, ##__VA_ARGS__) +# else +# define __sunrpc_printk(fmt, ...) printk(KERN_DEFAULT fmt, ##__VA_ARGS__) +# endif + # define dfprintk(fac, fmt, ...) \ do { \ ifdebug(fac) \ - printk(KERN_DEFAULT fmt, ##__VA_ARGS__); \ + __sunrpc_printk(fmt, ##__VA_ARGS__); \ } while (0) # define dfprintk_rcu(fac, fmt, ...) \ do { \ ifdebug(fac) { \ rcu_read_lock(); \ - printk(KERN_DEFAULT fmt, ##__VA_ARGS__); \ + __sunrpc_printk(fmt, ##__VA_ARGS__); \ rcu_read_unlock(); \ } \ } while (0) -- cgit v1.2.3 From 62c0c0e7491211969d8d1c2a9ab0e112b34664cf Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 20 Aug 2025 10:27:28 -0400 Subject: SUNRPC: Move the svc_rpcb_cleanup() call sites Clean up: because svc_rpcb_cleanup() and svc_xprt_destroy_all() are always invoked in pairs, we can deduplicate code by moving the svc_rpcb_cleanup() call sites into svc_xprt_destroy_all(). Signed-off-by: Chuck Lever Tested-by: Olga Kornievskaia Signed-off-by: Anna Schumaker --- include/linux/sunrpc/svc_xprt.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 369a89aea186..fde60d4e2cd5 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -165,7 +165,8 @@ int svc_xprt_create(struct svc_serv *serv, const char *xprt_name, struct net *net, const int family, const unsigned short port, int flags, const struct cred *cred); -void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net); +void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net, + bool unregister); void svc_xprt_received(struct svc_xprt *xprt); void svc_xprt_enqueue(struct svc_xprt *xprt); void svc_xprt_put(struct svc_xprt *xprt); -- cgit v1.2.3 From 301f3470273c89df3a933762b7495569f650e68b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 21 Aug 2025 12:27:00 -0400 Subject: nfs: remove NFS_WBACK_BUSY() Nothing calls this macro. Signed-off-by: Jeff Layton Signed-off-by: Anna Schumaker --- include/linux/nfs_page.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 9aed39abc94b..afe1d8f09d89 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -122,8 +122,6 @@ struct nfs_pageio_descriptor { /* arbitrarily selected limit to number of mirrors */ #define NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX 16 -#define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) - extern struct nfs_page *nfs_page_create_from_page(struct nfs_open_context *ctx, struct page *page, unsigned int pgbase, -- cgit v1.2.3 From c8a127596edc026e5364a7a609986dcd3999914c Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Mon, 30 Jun 2025 10:04:54 -0400 Subject: SUNRPC: Introduce xdr_set_scratch_folio() This will replace xdr_set_scratch_page() when we switch pages to folios. Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xdr.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 8a9ec617cf66..3ce17321689a 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -300,6 +300,19 @@ xdr_set_scratch_page(struct xdr_stream *xdr, struct page *page) xdr_set_scratch_buffer(xdr, page_address(page), PAGE_SIZE); } +/** + * xdr_set_scratch_folio - Attach a scratch buffer for decoding data + * @xdr: pointer to xdr_stream struct + * @page: an anonymous folio + * + * See xdr_set_scratch_buffer(). + */ +static inline void +xdr_set_scratch_folio(struct xdr_stream *xdr, struct folio *folio) +{ + xdr_set_scratch_buffer(xdr, folio_address(folio), folio_size(folio)); +} + /** * xdr_reset_scratch_buffer - Clear scratch buffer information * @xdr: pointer to xdr_stream struct -- cgit v1.2.3 From 2f8416f23edf3b5c49ce8e08616d0071cda5c37b Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Mon, 30 Jun 2025 10:32:29 -0400 Subject: NFS: Update getacl to use xdr_set_scratch_folio() Signed-off-by: Anna Schumaker --- include/linux/nfs_xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index ac4bff6e9913..7823d4574e29 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -862,7 +862,7 @@ struct nfs_getaclres { size_t acl_len; size_t acl_data_offset; int acl_flags; - struct page * acl_scratch; + struct folio * acl_scratch; }; struct nfs_setattrres { -- cgit v1.2.3 From c9cefd7ae86aa3463adfedca309696ba0946f9c5 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Mon, 30 Jun 2025 11:14:41 -0400 Subject: NFS: Update listxattr to use xdr_set_scratch_folio() Signed-off-by: Anna Schumaker --- include/linux/nfs_xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 7823d4574e29..d56583572c98 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1596,7 +1596,7 @@ struct nfs42_listxattrsargs { struct nfs42_listxattrsres { struct nfs4_sequence_res seq_res; - struct page *scratch; + struct folio *scratch; void *xattr_buf; size_t xattr_len; u64 cookie; -- cgit v1.2.3 From d57e43b72bf2071caac90da323849c3983a695f0 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Mon, 30 Jun 2025 14:53:09 -0400 Subject: SUNRPC: Update svcxdr_init_decode() to call xdr_set_scratch_folio() The only snag here is that __folio_alloc_node() doesn't handle NUMA_NO_NODE, so I also need to update svc_pool_map_get_node() to return numa_mem_id() instead. I arrived at this approach by looking at what other users of __folio_alloc_node() do for this case. Signed-off-by: Anna Schumaker --- include/linux/sunrpc/svc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 40cbe81360ed..5506d20857c3 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -196,7 +196,7 @@ struct svc_rqst { struct xdr_buf rq_arg; struct xdr_stream rq_arg_stream; struct xdr_stream rq_res_stream; - struct page *rq_scratch_page; + struct folio *rq_scratch_folio; struct xdr_buf rq_res; unsigned long rq_maxpages; /* num of entries in rq_pages */ struct page * *rq_pages; @@ -503,7 +503,7 @@ static inline void svcxdr_init_decode(struct svc_rqst *rqstp) buf->len = buf->head->iov_len + buf->page_len + buf->tail->iov_len; xdr_init_decode(xdr, buf, argv->iov_base, NULL); - xdr_set_scratch_page(xdr, rqstp->rq_scratch_page); + xdr_set_scratch_folio(xdr, rqstp->rq_scratch_folio); } /** -- cgit v1.2.3 From cc6ac66f1c0946299b8f192026cff9a320aaad18 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 1 Jul 2025 10:46:50 -0400 Subject: SUNRPC: Update gssx_accept_sec_context() to use xdr_set_scratch_folio() This was the last caller of xdr_set_scratch_page(), so I remove this function while I'm at it. Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xdr.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 3ce17321689a..49278749ad0c 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -287,19 +287,6 @@ xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen) xdr->scratch.iov_len = buflen; } -/** - * xdr_set_scratch_page - Attach a scratch buffer for decoding data - * @xdr: pointer to xdr_stream struct - * @page: an anonymous page - * - * See xdr_set_scratch_buffer(). - */ -static inline void -xdr_set_scratch_page(struct xdr_stream *xdr, struct page *page) -{ - xdr_set_scratch_buffer(xdr, page_address(page), PAGE_SIZE); -} - /** * xdr_set_scratch_folio - Attach a scratch buffer for decoding data * @xdr: pointer to xdr_stream struct -- cgit v1.2.3 From 24bbd533f596a4544e17579e9f622918680e7bff Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 6 Sep 2025 12:48:14 -0400 Subject: filemap: Add a helper for filesystems implementing dropbehind Add a helper to allow filesystems to attempt to free the 'dropbehind' folio. Signed-off-by: Trond Myklebust Link: https://lore.kernel.org/all/5588a06f6d5a2cf6746828e2d36e7ada668b1739.1745381692.git.trond.myklebust@hammerspace.com/ Reviewed-by: Mike Snitzer Signed-off-by: Anna Schumaker --- include/linux/pagemap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 12a12dae727d..201b7c6f6441 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1221,6 +1221,7 @@ void folio_wait_writeback(struct folio *folio); int folio_wait_writeback_killable(struct folio *folio); void end_page_writeback(struct page *page); void folio_end_writeback(struct folio *folio); +void folio_end_dropbehind(struct folio *folio); void folio_wait_stable(struct folio *folio); void __folio_mark_dirty(struct folio *folio, struct address_space *, int warn); void folio_account_cleaned(struct folio *folio, struct bdi_writeback *wb); -- cgit v1.2.3 From 010054a530aa266ee1711dfbe23fc06b6eb0fa48 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 6 Sep 2025 12:48:15 -0400 Subject: filemap: Add a version of folio_end_writeback that ignores dropbehind Filesystems such as NFS may need to defer dropbehind until after their 2-stage writes are done. This adds a helper folio_end_writeback_no_dropbehind() that allows them to release the writeback flag without immediately dropping the folio. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/pagemap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 201b7c6f6441..5b26465358ce 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1221,6 +1221,7 @@ void folio_wait_writeback(struct folio *folio); int folio_wait_writeback_killable(struct folio *folio); void end_page_writeback(struct page *page); void folio_end_writeback(struct folio *folio); +void folio_end_writeback_no_dropbehind(struct folio *folio); void folio_end_dropbehind(struct folio *folio); void folio_wait_stable(struct folio *folio); void __folio_mark_dirty(struct folio *folio, struct address_space *, int warn); -- cgit v1.2.3 From a91ae3c89311648cbaa9b46b860e4f76004a24b8 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Tue, 23 Sep 2025 11:01:49 +0000 Subject: bpf, x86: Add support for signed arena loads Currently, signed load instructions into arena memory are unsupported. The compiler is free to generate these, and on GCC-14 we see a corresponding error when it happens. The hurdle in supporting them is deciding which unused opcode to use to mark them for the JIT's own consumption. After much thinking, it appears 0xc0 / BPF_NOSPEC can be combined with load instructions to identify signed arena loads. Use this to recognize and JIT them appropriately, and remove the verifier side limitation on the program if the JIT supports them. Co-developed-by: Puranjay Mohan Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Puranjay Mohan Link: https://lore.kernel.org/r/20250923110157.18326-2-puranjay@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 4241a885975f..f5c859b8131a 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -78,6 +78,9 @@ struct ctl_table_header; /* unused opcode to mark special atomic instruction */ #define BPF_PROBE_ATOMIC 0xe0 +/* unused opcode to mark special ldsx instruction. Same as BPF_NOSPEC */ +#define BPF_PROBE_MEM32SX 0xc0 + /* unused opcode to mark call to interpreter with arguments */ #define BPF_CALL_ARGS 0xe0 -- cgit v1.2.3 From edf005fa274a0c224e550a52726aa7a426384e36 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 23 Sep 2025 09:03:26 -1000 Subject: sched_ext: Improve SCX_KF_DISPATCH comment The comment for SCX_KF_DISPATCH was incomplete and didn't explain that ops.dispatch() may temporarily release the rq lock, allowing ENQUEUE and SELECT_CPU operations to be nested inside DISPATCH contexts. Update the comment to clarify this nesting behavior and provide better context for when these operations can occur within dispatch. Acked-by: Andrea Righi Signed-off-by: Tejun Heo --- include/linux/sched/ext.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index 7047101dbf58..d82b7a9b0658 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -108,7 +108,11 @@ enum scx_kf_mask { SCX_KF_UNLOCKED = 0, /* sleepable and not rq locked */ /* ENQUEUE and DISPATCH may be nested inside CPU_RELEASE */ SCX_KF_CPU_RELEASE = 1 << 0, /* ops.cpu_release() */ - /* ops.dequeue (in REST) may be nested inside DISPATCH */ + /* + * ops.dispatch() may release rq lock temporarily and thus ENQUEUE and + * SELECT_CPU may be nested inside. ops.dequeue (in REST) may also be + * nested inside DISPATCH. + */ SCX_KF_DISPATCH = 1 << 1, /* ops.dispatch() */ SCX_KF_ENQUEUE = 1 << 2, /* ops.enqueue() and ops.select_cpu() */ SCX_KF_SELECT_CPU = 1 << 3, /* ops.select_cpu() */ -- cgit v1.2.3 From ccb4f5d91ec43c05ba165ccfc7ed889eb9cdfd05 Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Fri, 19 Sep 2025 12:41:09 +0800 Subject: bpf: Allow union argument in trampoline based programs Currently, functions with 'union' arguments cannot be traced with fentry/fexit: bpftrace -e 'fentry:release_pages { exit(); }' -v The function release_pages arg0 type UNION is unsupported. The type of the 'release_pages' arg0 is defined as: typedef union { struct page **pages; struct folio **folios; struct encoded_page **encoded_pages; } release_pages_arg __attribute__ ((__transparent_union__)); This patch relaxes the restriction by allowing function arguments of type 'union' to be traced in verifier. Reviewed-by: Amery Hung Signed-off-by: Leon Hwang Link: https://lore.kernel.org/r/20250919044110.23729-2-leon.hwang@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a2ab51fa8b0a..ba3a3be7eb2a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1128,7 +1128,7 @@ struct bpf_prog_offload { */ #define MAX_BPF_FUNC_REG_ARGS 5 -/* The argument is a structure. */ +/* The argument is a structure or a union. */ #define BTF_FMODEL_STRUCT_ARG BIT(0) /* The argument is signed. */ -- cgit v1.2.3 From 8f12d1137c2382c80aada8e05d7cc650cd4e403c Mon Sep 17 00:00:00 2001 From: Amery Hung Date: Mon, 22 Sep 2025 16:33:49 -0700 Subject: bpf: Clear pfmemalloc flag when freeing all fragments It is possible for bpf_xdp_adjust_tail() to free all fragments. The kfunc currently clears the XDP_FLAGS_HAS_FRAGS bit, but not XDP_FLAGS_FRAGS_PF_MEMALLOC. So far, this has not caused a issue when building sk_buff from xdp_buff since all readers of xdp_buff->flags use the flag only when there are fragments. Clear the XDP_FLAGS_FRAGS_PF_MEMALLOC bit as well to make the flags correct. Signed-off-by: Amery Hung Signed-off-by: Martin KaFai Lau Reviewed-by: Maciej Fijalkowski Link: https://patch.msgid.link/20250922233356.3356453-2-ameryhung@gmail.com --- include/net/xdp.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/xdp.h b/include/net/xdp.h index b40f1f96cb11..f288c348a6c1 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -115,6 +115,11 @@ static __always_inline void xdp_buff_set_frag_pfmemalloc(struct xdp_buff *xdp) xdp->flags |= XDP_FLAGS_FRAGS_PF_MEMALLOC; } +static __always_inline void xdp_buff_clear_frag_pfmemalloc(struct xdp_buff *xdp) +{ + xdp->flags &= ~XDP_FLAGS_FRAGS_PF_MEMALLOC; +} + static __always_inline void xdp_init_buff(struct xdp_buff *xdp, u32 frame_sz, struct xdp_rxq_info *rxq) { -- cgit v1.2.3 From dea1526fbafb55099a788cde0b659530ee5b1c66 Mon Sep 17 00:00:00 2001 From: Amery Hung Date: Mon, 22 Sep 2025 16:33:50 -0700 Subject: bpf: Allow bpf_xdp_shrink_data to shrink a frag from head and tail Move skb_frag_t adjustment into bpf_xdp_shrink_data() and extend its functionality to be able to shrink an xdp fragment from both head and tail. In a later patch, bpf_xdp_pull_data() will reuse it to shrink an xdp fragment from head. Additionally, in bpf_xdp_frags_shrink_tail(), breaking the loop when bpf_xdp_shrink_data() returns false (i.e., not releasing the current fragment) is not necessary as the loop condition, offset > 0, has the same effect. Remove the else branch to simplify the code. Signed-off-by: Amery Hung Signed-off-by: Martin KaFai Lau Reviewed-by: Maciej Fijalkowski Link: https://patch.msgid.link/20250922233356.3356453-3-ameryhung@gmail.com --- include/net/xdp_sock_drv.h | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 513c8e9704f6..4f2d3268a676 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -160,13 +160,23 @@ static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first) return ret; } -static inline void xsk_buff_del_tail(struct xdp_buff *tail) +static inline void xsk_buff_del_frag(struct xdp_buff *xdp) { - struct xdp_buff_xsk *xskb = container_of(tail, struct xdp_buff_xsk, xdp); + struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp); list_del(&xskb->list_node); } +static inline struct xdp_buff *xsk_buff_get_head(struct xdp_buff *first) +{ + struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp); + struct xdp_buff_xsk *frag; + + frag = list_first_entry(&xskb->pool->xskb_list, struct xdp_buff_xsk, + list_node); + return &frag->xdp; +} + static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first) { struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp); @@ -389,8 +399,13 @@ static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first) return NULL; } -static inline void xsk_buff_del_tail(struct xdp_buff *tail) +static inline void xsk_buff_del_frag(struct xdp_buff *xdp) +{ +} + +static inline struct xdp_buff *xsk_buff_get_head(struct xdp_buff *first) { + return NULL; } static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first) -- cgit v1.2.3 From b650bf0977d34c52befb31a9fa711534e11b220f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 22 Sep 2025 10:42:40 +0000 Subject: udp: remove busylock and add per NUMA queues busylock was protecting UDP sockets against packet floods, but unfortunately was not protecting the host itself. Under stress, many cpus could spin while acquiring the busylock, and NIC had to drop packets. Or packets would be dropped in cpu backlog if RPS/RFS were in place. This patch replaces the busylock by intermediate lockless queues. (One queue per NUMA node). This means that fewer number of cpus have to acquire the UDP receive queue lock. Most of the cpus can either: - immediately drop the packet. - or queue it in their NUMA aware lockless queue. Then one of the cpu is chosen to process this lockless queue in a batch. The batch only contains packets that were cooked on the same NUMA node, thus with very limited latency impact. Tested: DDOS targeting a victim UDP socket, on a platform with 6 NUMA nodes (Intel(R) Xeon(R) 6985P-C) Before: nstat -n ; sleep 1 ; nstat | grep Udp Udp6InDatagrams 1004179 0.0 Udp6InErrors 3117 0.0 Udp6RcvbufErrors 3117 0.0 After: nstat -n ; sleep 1 ; nstat | grep Udp Udp6InDatagrams 1116633 0.0 Udp6InErrors 14197275 0.0 Udp6RcvbufErrors 14197275 0.0 We can see this host can now proces 14.2 M more packets per second while under attack, and the victim socket can receive 11 % more packets. I used a small bpftrace program measuring time (in us) spent in __udp_enqueue_schedule_skb(). Before: @udp_enqueue_us[398]: [0] 24901 |@@@ | [1] 63512 |@@@@@@@@@ | [2, 4) 344827 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@| [4, 8) 244673 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | [8, 16) 54022 |@@@@@@@@ | [16, 32) 222134 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | [32, 64) 232042 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | [64, 128) 4219 | | [128, 256) 188 | | After: @udp_enqueue_us[398]: [0] 5608855 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@| [1] 1111277 |@@@@@@@@@@ | [2, 4) 501439 |@@@@ | [4, 8) 102921 | | [8, 16) 29895 | | [16, 32) 43500 | | [32, 64) 31552 | | [64, 128) 979 | | [128, 256) 13 | | Note that the remaining bottleneck for this platform is in udp_drops_inc() because we limited struct numa_drop_counters to only two nodes so far. Signed-off-by: Eric Dumazet Acked-by: Paolo Abeni Reviewed-by: Willem de Bruijn Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250922104240.2182559-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/udp.h | 9 ++++++++- include/net/udp.h | 11 +++++++++-- 2 files changed, 17 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/udp.h b/include/linux/udp.h index e554890c4415..58795688a186 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -44,6 +44,12 @@ enum { UDP_FLAGS_UDPLITE_RECV_CC, /* set via udplite setsockopt */ }; +/* per NUMA structure for lockless producer usage. */ +struct udp_prod_queue { + struct llist_head ll_root ____cacheline_aligned_in_smp; + atomic_t rmem_alloc; +}; + struct udp_sock { /* inet_sock has to be the first member */ struct inet_sock inet; @@ -90,6 +96,8 @@ struct udp_sock { struct sk_buff *skb, int nhoff); + struct udp_prod_queue *udp_prod_queue; + /* udp_recvmsg try to use this before splicing sk_receive_queue */ struct sk_buff_head reader_queue ____cacheline_aligned_in_smp; @@ -109,7 +117,6 @@ struct udp_sock { */ struct hlist_node tunnel_list; struct numa_drop_counters drop_counters; - spinlock_t busylock ____cacheline_aligned_in_smp; }; #define udp_test_bit(nr, sk) \ diff --git a/include/net/udp.h b/include/net/udp.h index 059a0cee5f55..cffedb3e40f2 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -284,16 +284,23 @@ INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *)); struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, netdev_features_t features, bool is_ipv6); -static inline void udp_lib_init_sock(struct sock *sk) +static inline int udp_lib_init_sock(struct sock *sk) { struct udp_sock *up = udp_sk(sk); sk->sk_drop_counters = &up->drop_counters; - spin_lock_init(&up->busylock); skb_queue_head_init(&up->reader_queue); INIT_HLIST_NODE(&up->tunnel_list); up->forward_threshold = sk->sk_rcvbuf >> 2; set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags); + + up->udp_prod_queue = kcalloc(nr_node_ids, sizeof(*up->udp_prod_queue), + GFP_KERNEL); + if (!up->udp_prod_queue) + return -ENOMEM; + for (int i = 0; i < nr_node_ids; i++) + init_llist_head(&up->udp_prod_queue[i].ll_root); + return 0; } static inline void udp_drops_inc(struct sock *sk) -- cgit v1.2.3 From 092263a03105539b8dfe74c59be4c6cce1304d5f Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 20 Sep 2025 23:34:07 +0200 Subject: net: phy: stop exporting phy_driver_register phy_driver_register() isn't used outside phy_device.c any longer, so we can stop exporting it. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/dff44b83-4a85-4fff-bf6b-f12efd97b56e@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index d09fc42e61f3..b377dfaa6801 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -2032,7 +2032,6 @@ static inline int phy_read_status(struct phy_device *phydev) void phy_driver_unregister(struct phy_driver *drv); void phy_drivers_unregister(struct phy_driver *drv, int n); -int phy_driver_register(struct phy_driver *new_driver, struct module *owner); int phy_drivers_register(struct phy_driver *new_driver, int n, struct module *owner); void phy_error(struct phy_device *phydev); -- cgit v1.2.3 From 6043819e707cefb1c9e59d6e431dcfa735c4f975 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 22 Sep 2025 10:11:32 +0300 Subject: net/mlx5: fs, fix UAF in flow counter release Fix a kernel trace [1] caused by releasing an HWS action of a local flow counter in mlx5_cmd_hws_delete_fte(), where the HWS action refcount and mutex were not initialized and the counter struct could already be freed when deleting the rule. Fix it by adding the missing initializations and adding refcount for the local flow counter struct. [1] Kernel log: Call Trace: dump_stack_lvl+0x34/0x48 mlx5_fs_put_hws_action.part.0.cold+0x21/0x94 [mlx5_core] mlx5_fc_put_hws_action+0x96/0xad [mlx5_core] mlx5_fs_destroy_fs_actions+0x8b/0x152 [mlx5_core] mlx5_cmd_hws_delete_fte+0x5a/0xa0 [mlx5_core] del_hw_fte+0x1ce/0x260 [mlx5_core] mlx5_del_flow_rules+0x12d/0x240 [mlx5_core] ? ttwu_queue_wakelist+0xf4/0x110 mlx5_ib_destroy_flow+0x103/0x1b0 [mlx5_ib] uverbs_free_flow+0x20/0x50 [ib_uverbs] destroy_hw_idr_uobject+0x1b/0x50 [ib_uverbs] uverbs_destroy_uobject+0x34/0x1a0 [ib_uverbs] uobj_destroy+0x3c/0x80 [ib_uverbs] ib_uverbs_run_method+0x23e/0x360 [ib_uverbs] ? uverbs_finalize_object+0x60/0x60 [ib_uverbs] ib_uverbs_cmd_verbs+0x14f/0x2c0 [ib_uverbs] ? do_tty_write+0x1a9/0x270 ? file_tty_write.constprop.0+0x98/0xc0 ? new_sync_write+0xfc/0x190 ib_uverbs_ioctl+0xd7/0x160 [ib_uverbs] __x64_sys_ioctl+0x87/0xc0 do_syscall_64+0x59/0x90 Fixes: b581f4266928 ("net/mlx5: fs, manage flow counters HWS action sharing by refcount") Signed-off-by: Moshe Shemesh Reviewed-by: Yevgeny Kliteynik Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1758525094-816583-2-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- include/linux/mlx5/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 86055d55836d..6ac76a0c3827 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -308,6 +308,8 @@ struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging); void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter); struct mlx5_fc *mlx5_fc_local_create(u32 counter_id, u32 offset, u32 bulk_size); void mlx5_fc_local_destroy(struct mlx5_fc *counter); +void mlx5_fc_local_get(struct mlx5_fc *counter); +void mlx5_fc_local_put(struct mlx5_fc *counter); u64 mlx5_fc_query_lastuse(struct mlx5_fc *counter); void mlx5_fc_query_cached(struct mlx5_fc *counter, u64 *bytes, u64 *packets, u64 *lastuse); -- cgit v1.2.3 From 7384893d970ea114952aef54ad7e3d7d2ca82d4f Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 16 Sep 2025 23:52:56 +0200 Subject: bpf: Allow uprobe program to change context registers Currently uprobe (BPF_PROG_TYPE_KPROBE) program can't write to the context registers data. While this makes sense for kprobe attachments, for uprobe attachment it might make sense to be able to change user space registers to alter application execution. Since uprobe and kprobe programs share the same type (BPF_PROG_TYPE_KPROBE), we can't deny write access to context during the program load. We need to check on it during program attachment to see if it's going to be kprobe or uprobe. Storing the program's write attempt to context and checking on it during the attachment. Acked-by: Andrii Nakryiko Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20250916215301.664963-2-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index ba3a3be7eb2a..ea2ed6771cc6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1639,6 +1639,7 @@ struct bpf_prog_aux { bool priv_stack_requested; bool changes_pkt_data; bool might_sleep; + bool kprobe_write_ctx; u64 prog_array_member_cnt; /* counts how many times as member of prog_array */ struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */ struct bpf_arena *arena; -- cgit v1.2.3 From d4680a11e14c7baf683cb8453d91d71d2e0b9d3e Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Wed, 24 Sep 2025 10:14:26 +0200 Subject: bpf: Mark kfuncs as __noclone Some distributions (e.g., CachyOS) support building the kernel with -O3, but doing so may break kfuncs, resulting in their symbols not being properly exported. In fact, with gcc -O3, some kfuncs may be optimized away despite being annotated as noinline. This happens because gcc can still clone the function during IPA optimizations, e.g., by duplicating or inlining it into callers, and then dropping the standalone symbol. This breaks BTF ID resolution since resolve_btfids relies on the presence of a global symbol for each kfunc. Currently, this is not an issue for upstream, because we don't allow building the kernel with -O3, but it may be safer to address it anyway, to prevent potential issues in the future if compilers become more aggressive with optimizations. Therefore, add __noclone to __bpf_kfunc to ensure kfuncs are never cloned and remain distinct, globally visible symbols, regardless of the optimization level. Fixes: 57e7c169cd6af ("bpf: Add __bpf_kfunc tag for marking kernel functions as kfuncs") Acked-by: David Vernet Acked-by: Yonghong Song Signed-off-by: Andrea Righi Link: https://lore.kernel.org/r/20250924081426.156934-1-arighi@nvidia.com Signed-off-by: Alexei Starovoitov --- include/linux/btf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/btf.h b/include/linux/btf.h index 9eda6b113f9b..f06976ffb63f 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -86,7 +86,7 @@ * as to avoid issues such as the compiler inlining or eliding either a static * kfunc, or a global kfunc in an LTO build. */ -#define __bpf_kfunc __used __retain noinline +#define __bpf_kfunc __used __retain __noclone noinline #define __bpf_kfunc_start_defs() \ __diag_push(); \ -- cgit v1.2.3 From 64f89f6e1f2b7f8f203d218a8c8d90922e1d4048 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 17 Sep 2025 10:54:05 +0200 Subject: gpio: generic: rename BGPIOF_ flags to GPIO_GENERIC_ Make the flags passed to gpio_generic_chip_init() use the same prefix as the rest of the modernized generic GPIO chip API. Link: https://lore.kernel.org/r/20250917-gpio-generic-flags-v1-1-69f51fee8c89@linaro.org Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/driver.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 9b14fd20f13e..e62622e42cad 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -684,15 +684,15 @@ int gpiochip_populate_parent_fwspec_fourcell(struct gpio_chip *gc, #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ -#define BGPIOF_BIG_ENDIAN BIT(0) -#define BGPIOF_UNREADABLE_REG_SET BIT(1) /* reg_set is unreadable */ -#define BGPIOF_UNREADABLE_REG_DIR BIT(2) /* reg_dir is unreadable */ -#define BGPIOF_BIG_ENDIAN_BYTE_ORDER BIT(3) -#define BGPIOF_READ_OUTPUT_REG_SET BIT(4) /* reg_set stores output value */ -#define BGPIOF_NO_OUTPUT BIT(5) /* only input */ -#define BGPIOF_NO_SET_ON_INPUT BIT(6) -#define BGPIOF_PINCTRL_BACKEND BIT(7) /* Call pinctrl direction setters */ -#define BGPIOF_NO_INPUT BIT(8) /* only output */ +#define GPIO_GENERIC_BIG_ENDIAN BIT(0) +#define GPIO_GENERIC_UNREADABLE_REG_SET BIT(1) /* reg_set is unreadable */ +#define GPIO_GENERIC_UNREADABLE_REG_DIR BIT(2) /* reg_dir is unreadable */ +#define GPIO_GENERIC_BIG_ENDIAN_BYTE_ORDER BIT(3) +#define GPIO_GENERIC_READ_OUTPUT_REG_SET BIT(4) /* reg_set stores output value */ +#define GPIO_GENERIC_NO_OUTPUT BIT(5) /* only input */ +#define GPIO_GENERIC_NO_SET_ON_INPUT BIT(6) +#define GPIO_GENERIC_PINCTRL_BACKEND BIT(7) /* Call pinctrl direction setters */ +#define GPIO_GENERIC_NO_INPUT BIT(8) /* only output */ #ifdef CONFIG_GPIOLIB_IRQCHIP int gpiochip_irqchip_add_domain(struct gpio_chip *gc, -- cgit v1.2.3 From 2235b26c1b25daf253748acff501af3ea85faaa8 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 17 Sep 2025 10:54:06 +0200 Subject: gpio: generic: move GPIO_GENERIC_ flags to the correct header These flags are specific to gpio-mmio and belong in linux/gpio/generic.h so move them there. Link: https://lore.kernel.org/r/20250917-gpio-generic-flags-v1-2-69f51fee8c89@linaro.org Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/driver.h | 10 ---------- include/linux/gpio/generic.h | 10 ++++++++++ 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index e62622e42cad..fabe2baf7b50 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -684,16 +684,6 @@ int gpiochip_populate_parent_fwspec_fourcell(struct gpio_chip *gc, #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ -#define GPIO_GENERIC_BIG_ENDIAN BIT(0) -#define GPIO_GENERIC_UNREADABLE_REG_SET BIT(1) /* reg_set is unreadable */ -#define GPIO_GENERIC_UNREADABLE_REG_DIR BIT(2) /* reg_dir is unreadable */ -#define GPIO_GENERIC_BIG_ENDIAN_BYTE_ORDER BIT(3) -#define GPIO_GENERIC_READ_OUTPUT_REG_SET BIT(4) /* reg_set stores output value */ -#define GPIO_GENERIC_NO_OUTPUT BIT(5) /* only input */ -#define GPIO_GENERIC_NO_SET_ON_INPUT BIT(6) -#define GPIO_GENERIC_PINCTRL_BACKEND BIT(7) /* Call pinctrl direction setters */ -#define GPIO_GENERIC_NO_INPUT BIT(8) /* only output */ - #ifdef CONFIG_GPIOLIB_IRQCHIP int gpiochip_irqchip_add_domain(struct gpio_chip *gc, struct irq_domain *domain); diff --git a/include/linux/gpio/generic.h b/include/linux/gpio/generic.h index 162430d96660..ff566dc9c3cb 100644 --- a/include/linux/gpio/generic.h +++ b/include/linux/gpio/generic.h @@ -9,6 +9,16 @@ struct device; +#define GPIO_GENERIC_BIG_ENDIAN BIT(0) +#define GPIO_GENERIC_UNREADABLE_REG_SET BIT(1) /* reg_set is unreadable */ +#define GPIO_GENERIC_UNREADABLE_REG_DIR BIT(2) /* reg_dir is unreadable */ +#define GPIO_GENERIC_BIG_ENDIAN_BYTE_ORDER BIT(3) +#define GPIO_GENERIC_READ_OUTPUT_REG_SET BIT(4) /* reg_set stores output value */ +#define GPIO_GENERIC_NO_OUTPUT BIT(5) /* only input */ +#define GPIO_GENERIC_NO_SET_ON_INPUT BIT(6) +#define GPIO_GENERIC_PINCTRL_BACKEND BIT(7) /* Call pinctrl direction setters */ +#define GPIO_GENERIC_NO_INPUT BIT(8) /* only output */ + /** * struct gpio_generic_chip_config - Generic GPIO chip configuration data * @dev: Parent device of the new GPIO chip (compulsory). -- cgit v1.2.3 From 8327bd4fcb6c1dab01ce5c6ff00b42496836dcd2 Mon Sep 17 00:00:00 2001 From: Varad Gautam Date: Sun, 30 Mar 2025 16:42:29 +0000 Subject: asm-generic/io.h: Skip trace helpers if rwmmio events are disabled With `CONFIG_TRACE_MMIO_ACCESS=y`, the `{read,write}{b,w,l,q}{_relaxed}()` mmio accessors unconditionally call `log_{post_}{read,write}_mmio()` helpers, which in turn call the ftrace ops for `rwmmio` trace events This adds a performance penalty per mmio accessor call, even when `rwmmio` events are disabled at runtime (~80% overhead on local measurement). Guard these with `tracepoint_enabled()`. Signed-off-by: Varad Gautam Fixes: 210031971cdd ("asm-generic/io: Add logging support for MMIO accessors") Cc: stable@vger.kernel.org Signed-off-by: Arnd Bergmann --- include/asm-generic/io.h | 98 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 66 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index 11abad6c87e1..ca5a1ce6f0f8 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -75,6 +75,7 @@ #if IS_ENABLED(CONFIG_TRACE_MMIO_ACCESS) && !(defined(__DISABLE_TRACE_MMIO__)) #include +#define rwmmio_tracepoint_enabled(tracepoint) tracepoint_enabled(tracepoint) DECLARE_TRACEPOINT(rwmmio_write); DECLARE_TRACEPOINT(rwmmio_post_write); DECLARE_TRACEPOINT(rwmmio_read); @@ -91,6 +92,7 @@ void log_post_read_mmio(u64 val, u8 width, const volatile void __iomem *addr, #else +#define rwmmio_tracepoint_enabled(tracepoint) false static inline void log_write_mmio(u64 val, u8 width, volatile void __iomem *addr, unsigned long caller_addr, unsigned long caller_addr0) {} static inline void log_post_write_mmio(u64 val, u8 width, volatile void __iomem *addr, @@ -189,11 +191,13 @@ static inline u8 readb(const volatile void __iomem *addr) { u8 val; - log_read_mmio(8, addr, _THIS_IP_, _RET_IP_); + if (rwmmio_tracepoint_enabled(rwmmio_read)) + log_read_mmio(8, addr, _THIS_IP_, _RET_IP_); __io_br(); val = __raw_readb(addr); __io_ar(val); - log_post_read_mmio(val, 8, addr, _THIS_IP_, _RET_IP_); + if (rwmmio_tracepoint_enabled(rwmmio_post_read)) + log_post_read_mmio(val, 8, addr, _THIS_IP_, _RET_IP_); return val; } #endif @@ -204,11 +208,13 @@ static inline u16 readw(const volatile void __iomem *addr) { u16 val; - log_read_mmio(16, addr, _THIS_IP_, _RET_IP_); + if (rwmmio_tracepoint_enabled(rwmmio_read)) + log_read_mmio(16, addr, _THIS_IP_, _RET_IP_); __io_br(); val = __le16_to_cpu((__le16 __force)__raw_readw(addr)); __io_ar(val); - log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_); + if (rwmmio_tracepoint_enabled(rwmmio_post_read)) + log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_); return val; } #endif @@ -219,11 +225,13 @@ static inline u32 readl(const volatile void __iomem *addr) { u32 val; - log_read_mmio(32, addr, _THIS_IP_, _RET_IP_); + if (rwmmio_tracepoint_enabled(rwmmio_read)) + log_read_mmio(32, addr, _THIS_IP_, _RET_IP_); __io_br(); val = __le32_to_cpu((__le32 __force)__raw_readl(addr)); __io_ar(val); - log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_); + if (rwmmio_tracepoint_enabled(rwmmio_post_read)) + log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_); return val; } #endif @@ -235,11 +243,13 @@ static inline u64 readq(const volatile void __iomem *addr) { u64 val; - log_read_mmio(64, addr, _THIS_IP_, _RET_IP_); + if (rwmmio_tracepoint_enabled(rwmmio_read)) + log_read_mmio(64, addr, _THIS_IP_, _RET_IP_); __io_br(); val = __le64_to_cpu((__le64 __force)__raw_readq(addr)); __io_ar(val); - log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_); + if (rwmmio_tracepoint_enabled(rwmmio_post_read)) + log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_); return val; } #endif @@ -249,11 +259,13 @@ static inline u64 readq(const volatile void __iomem *addr) #define writeb writeb static inline void writeb(u8 value, volatile void __iomem *addr) { - log_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); + if (rwmmio_tracepoint_enabled(rwmmio_write)) + log_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); __io_bw(); __raw_writeb(value, addr); __io_aw(); - log_post_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); + if (rwmmio_tracepoint_enabled(rwmmio_post_write)) + log_post_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); } #endif @@ -261,11 +273,13 @@ static inline void writeb(u8 value, volatile void __iomem *addr) #define writew writew static inline void writew(u16 value, volatile void __iomem *addr) { - log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); + if (rwmmio_tracepoint_enabled(rwmmio_write)) + log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); __io_bw(); __raw_writew((u16 __force)cpu_to_le16(value), addr); __io_aw(); - log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); + if (rwmmio_tracepoint_enabled(rwmmio_post_write)) + log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); } #endif @@ -273,11 +287,13 @@ static inline void writew(u16 value, volatile void __iomem *addr) #define writel writel static inline void writel(u32 value, volatile void __iomem *addr) { - log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); + if (rwmmio_tracepoint_enabled(rwmmio_write)) + log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); __io_bw(); __raw_writel((u32 __force)__cpu_to_le32(value), addr); __io_aw(); - log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); + if (rwmmio_tracepoint_enabled(rwmmio_post_write)) + log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); } #endif @@ -286,11 +302,13 @@ static inline void writel(u32 value, volatile void __iomem *addr) #define writeq writeq static inline void writeq(u64 value, volatile void __iomem *addr) { - log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); + if (rwmmio_tracepoint_enabled(rwmmio_write)) + log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); __io_bw(); __raw_writeq((u64 __force)__cpu_to_le64(value), addr); __io_aw(); - log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); + if (rwmmio_tracepoint_enabled(rwmmio_post_write)) + log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); } #endif #endif /* CONFIG_64BIT */ @@ -306,9 +324,11 @@ static inline u8 readb_relaxed(const volatile void __iomem *addr) { u8 val; - log_read_mmio(8, addr, _THIS_IP_, _RET_IP_); + if (rwmmio_tracepoint_enabled(rwmmio_read)) + log_read_mmio(8, addr, _THIS_IP_, _RET_IP_); val = __raw_readb(addr); - log_post_read_mmio(val, 8, addr, _THIS_IP_, _RET_IP_); + if (rwmmio_tracepoint_enabled(rwmmio_post_read)) + log_post_read_mmio(val, 8, addr, _THIS_IP_, _RET_IP_); return val; } #endif @@ -319,9 +339,11 @@ static inline u16 readw_relaxed(const volatile void __iomem *addr) { u16 val; - log_read_mmio(16, addr, _THIS_IP_, _RET_IP_); + if (rwmmio_tracepoint_enabled(rwmmio_read)) + log_read_mmio(16, addr, _THIS_IP_, _RET_IP_); val = __le16_to_cpu((__le16 __force)__raw_readw(addr)); - log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_); + if (rwmmio_tracepoint_enabled(rwmmio_post_read)) + log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_); return val; } #endif @@ -332,9 +354,11 @@ static inline u32 readl_relaxed(const volatile void __iomem *addr) { u32 val; - log_read_mmio(32, addr, _THIS_IP_, _RET_IP_); + if (rwmmio_tracepoint_enabled(rwmmio_read)) + log_read_mmio(32, addr, _THIS_IP_, _RET_IP_); val = __le32_to_cpu((__le32 __force)__raw_readl(addr)); - log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_); + if (rwmmio_tracepoint_enabled(rwmmio_post_read)) + log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_); return val; } #endif @@ -345,9 +369,11 @@ static inline u64 readq_relaxed(const volatile void __iomem *addr) { u64 val; - log_read_mmio(64, addr, _THIS_IP_, _RET_IP_); + if (rwmmio_tracepoint_enabled(rwmmio_read)) + log_read_mmio(64, addr, _THIS_IP_, _RET_IP_); val = __le64_to_cpu((__le64 __force)__raw_readq(addr)); - log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_); + if (rwmmio_tracepoint_enabled(rwmmio_post_read)) + log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_); return val; } #endif @@ -356,9 +382,11 @@ static inline u64 readq_relaxed(const volatile void __iomem *addr) #define writeb_relaxed writeb_relaxed static inline void writeb_relaxed(u8 value, volatile void __iomem *addr) { - log_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); + if (rwmmio_tracepoint_enabled(rwmmio_write)) + log_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); __raw_writeb(value, addr); - log_post_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); + if (rwmmio_tracepoint_enabled(rwmmio_post_write)) + log_post_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); } #endif @@ -366,9 +394,11 @@ static inline void writeb_relaxed(u8 value, volatile void __iomem *addr) #define writew_relaxed writew_relaxed static inline void writew_relaxed(u16 value, volatile void __iomem *addr) { - log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); + if (rwmmio_tracepoint_enabled(rwmmio_write)) + log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); __raw_writew((u16 __force)cpu_to_le16(value), addr); - log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); + if (rwmmio_tracepoint_enabled(rwmmio_post_write)) + log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); } #endif @@ -376,9 +406,11 @@ static inline void writew_relaxed(u16 value, volatile void __iomem *addr) #define writel_relaxed writel_relaxed static inline void writel_relaxed(u32 value, volatile void __iomem *addr) { - log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); + if (rwmmio_tracepoint_enabled(rwmmio_write)) + log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); __raw_writel((u32 __force)__cpu_to_le32(value), addr); - log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); + if (rwmmio_tracepoint_enabled(rwmmio_post_write)) + log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); } #endif @@ -386,9 +418,11 @@ static inline void writel_relaxed(u32 value, volatile void __iomem *addr) #define writeq_relaxed writeq_relaxed static inline void writeq_relaxed(u64 value, volatile void __iomem *addr) { - log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); + if (rwmmio_tracepoint_enabled(rwmmio_write)) + log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); __raw_writeq((u64 __force)__cpu_to_le64(value), addr); - log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); + if (rwmmio_tracepoint_enabled(rwmmio_post_write)) + log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); } #endif -- cgit v1.2.3 From 1c1658058c99bcfd3b2347e587a556986037f80a Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Wed, 17 Sep 2025 20:10:35 +0200 Subject: hwmon: (dell-smm) Add support for automatic fan mode Many machines treat fan state 3 as some sort of automatic mode, which is superior to the separate SMM calls for switching to automatic fan mode for two reasons: - the fan control mode can be controlled for each fan separately - the current fan control mode can be retrieved from the BIOS On some machines however, this special fan state does not exist. Fan state 3 acts like a regular fan state on such machines or does not exist at all. Such machines usually use separate SMM calls for enabling/disabling automatic fan control. Add support for it. If the machine supports separate SMM calls for changing the fan control mode, then the other interface is ignored. Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20250917181036.10972-4-W_Armin@gmx.de Signed-off-by: Guenter Roeck --- include/uapi/linux/i8k.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/i8k.h b/include/uapi/linux/i8k.h index 268e6268f6c8..a16e4049710f 100644 --- a/include/uapi/linux/i8k.h +++ b/include/uapi/linux/i8k.h @@ -36,6 +36,8 @@ #define I8K_FAN_LOW 1 #define I8K_FAN_HIGH 2 #define I8K_FAN_TURBO 3 +/* Many machines treat this mode as some sort of automatic mode */ +#define I8K_FAN_AUTO 3 #define I8K_FAN_MAX I8K_FAN_TURBO #define I8K_VOL_UP 1 -- cgit v1.2.3 From 23049938605bda390f875ce20e0704252c2e5c3d Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Tue, 23 Sep 2025 15:37:10 +0900 Subject: can: populate the minimum and maximum MTU values By populating: net_device->min_mtu and net_device->max_mtu the net core infrastructure will automatically: 1. validate that the user's inputs are in range. 2. report those min and max MTU values through the netlink interface. Add can_set_default_mtu() which sets the default mtu value as well as the minimum and maximum values. The logic for the default mtu value remains unchanged: - CANFD_MTU if the device has a static CAN_CTRLMODE_FD. - CAN_MTU otherwise. Call can_set_default_mtu() each time the CAN_CTRLMODE_FD is modified. This will guarantee that the MTU value is always consistent with the control mode flags. With this, the checks done in can_change_mtu() become fully redundant and will be removed in an upcoming change and it is now possible to confirm the minimum and maximum MTU values on a physical CAN interface by doing: $ ip --details link show can0 The virtual interfaces (vcan and vxcan) are not impacted by this change. Signed-off-by: Vincent Mailhol Link: https://patch.msgid.link/20250923-can-fix-mtu-v3-3-581bde113f52@kernel.org [mkl: squashed https://patch.msgid.link/20250924143644.17622-2-mailhol@kernel.org] Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 5dc58360c2d7..3354f70ed2c6 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -166,6 +166,7 @@ struct can_priv *safe_candev_priv(struct net_device *dev); int open_candev(struct net_device *dev); void close_candev(struct net_device *dev); +void can_set_default_mtu(struct net_device *dev); int can_change_mtu(struct net_device *dev, int new_mtu); int __must_check can_set_static_ctrlmode(struct net_device *dev, u32 static_mode); -- cgit v1.2.3 From cc470fcf1d59f9d6186810ea5253da49a4f85f83 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Tue, 23 Sep 2025 15:58:26 +0900 Subject: can: dev: move struct data_bittiming_params to linux/can/bittiming.h In commit b803c4a4f788 ("can: dev: add struct data_bittiming_params to group FD parameters"), struct data_bittiming_params was put into linux/can/dev.h. This structure being a collection of bittiming parameters, on second thought, bittiming.h is actually a better location. This way, users of struct data_bittiming_params will not have to forcefully include linux/can/dev.h thus removing some complexity and reducing the risk of circular dependencies in headers. Move struct data_bittiming_params from linux/can/dev.h to linux/can/bittiming.h. Signed-off-by: Vincent Mailhol Link: https://patch.msgid.link/20250923-canxl-netlink-prep-v4-1-e720d28f66fe@kernel.org Signed-off-by: Marc Kleine-Budde --- include/linux/can/bittiming.h | 11 +++++++++++ include/linux/can/dev.h | 11 ----------- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h index 5dfdbb63b1d5..6572ec1712ca 100644 --- a/include/linux/can/bittiming.h +++ b/include/linux/can/bittiming.h @@ -114,6 +114,17 @@ struct can_tdc_const { u32 tdcf_max; }; +struct data_bittiming_params { + const struct can_bittiming_const *data_bittiming_const; + struct can_bittiming data_bittiming; + const struct can_tdc_const *tdc_const; + struct can_tdc tdc; + const u32 *data_bitrate_const; + unsigned int data_bitrate_const_cnt; + int (*do_set_data_bittiming)(struct net_device *dev); + int (*do_get_auto_tdcv)(const struct net_device *dev, u32 *tdcv); +}; + #ifdef CONFIG_CAN_CALC_BITTIMING int can_calc_bittiming(const struct net_device *dev, struct can_bittiming *bt, const struct can_bittiming_const *btc, struct netlink_ext_ack *extack); diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 3354f70ed2c6..c2fe956ab776 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -38,17 +38,6 @@ enum can_termination_gpio { CAN_TERMINATION_GPIO_MAX, }; -struct data_bittiming_params { - const struct can_bittiming_const *data_bittiming_const; - struct can_bittiming data_bittiming; - const struct can_tdc_const *tdc_const; - struct can_tdc tdc; - const u32 *data_bitrate_const; - unsigned int data_bitrate_const_cnt; - int (*do_set_data_bittiming)(struct net_device *dev); - int (*do_get_auto_tdcv)(const struct net_device *dev, u32 *tdcv); -}; - /* * CAN common private data */ -- cgit v1.2.3 From 7208385df7846d30e29febc6c6280cb32e91ee82 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Tue, 23 Sep 2025 15:58:27 +0900 Subject: can: dev: make can_get_relative_tdco() FD agnostic and move it to bittiming.h can_get_relative_tdco() needs to access can_priv->fd making it specific to CAN FD. Change the function parameter from struct can_priv to struct data_bittiming_params. This way, the function becomes CAN FD agnostic and can be reused later on for the CAN XL TDC. Now that we dropped the dependency on struct can_priv, also move can_get_relative_tdco() back to bittiming.h where it was meant to belong to. Signed-off-by: Vincent Mailhol Link: https://patch.msgid.link/20250923-canxl-netlink-prep-v4-2-e720d28f66fe@kernel.org Signed-off-by: Marc Kleine-Budde --- include/linux/can/bittiming.h | 29 +++++++++++++++++++++++++++++ include/linux/can/dev.h | 29 ----------------------------- 2 files changed, 29 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h index 6572ec1712ca..4d5f7794194a 100644 --- a/include/linux/can/bittiming.h +++ b/include/linux/can/bittiming.h @@ -160,6 +160,35 @@ int can_get_bittiming(const struct net_device *dev, struct can_bittiming *bt, const unsigned int bitrate_const_cnt, struct netlink_ext_ack *extack); +/* + * can_get_relative_tdco() - TDCO relative to the sample point + * + * struct can_tdc::tdco represents the absolute offset from TDCV. Some + * controllers use instead an offset relative to the Sample Point (SP) + * such that: + * + * SSP = TDCV + absolute TDCO + * = TDCV + SP + relative TDCO + * + * -+----------- one bit ----------+-- TX pin + * |<--- Sample Point --->| + * + * --+----------- one bit ----------+-- RX pin + * |<-------- TDCV -------->| + * |<------------------------>| absolute TDCO + * |<--- Sample Point --->| + * | |<->| relative TDCO + * |<------------- Secondary Sample Point ------------>| + */ +static inline s32 can_get_relative_tdco(const struct data_bittiming_params *dbt_params) +{ + const struct can_bittiming *dbt = &dbt_params->data_bittiming; + s32 sample_point_in_tc = (CAN_SYNC_SEG + dbt->prop_seg + + dbt->phase_seg1) * dbt->brp; + + return (s32)dbt_params->tdc.tdco - sample_point_in_tc; +} + /* * can_bit_time() - Duration of one bit * diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index c2fe956ab776..8e75e9b3830a 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -85,35 +85,6 @@ static inline bool can_fd_tdc_is_enabled(const struct can_priv *priv) return !!(priv->ctrlmode & CAN_CTRLMODE_FD_TDC_MASK); } -/* - * can_get_relative_tdco() - TDCO relative to the sample point - * - * struct can_tdc::tdco represents the absolute offset from TDCV. Some - * controllers use instead an offset relative to the Sample Point (SP) - * such that: - * - * SSP = TDCV + absolute TDCO - * = TDCV + SP + relative TDCO - * - * -+----------- one bit ----------+-- TX pin - * |<--- Sample Point --->| - * - * --+----------- one bit ----------+-- RX pin - * |<-------- TDCV -------->| - * |<------------------------>| absolute TDCO - * |<--- Sample Point --->| - * | |<->| relative TDCO - * |<------------- Secondary Sample Point ------------>| - */ -static inline s32 can_get_relative_tdco(const struct can_priv *priv) -{ - const struct can_bittiming *dbt = &priv->fd.data_bittiming; - s32 sample_point_in_tc = (CAN_SYNC_SEG + dbt->prop_seg + - dbt->phase_seg1) * dbt->brp; - - return (s32)priv->fd.tdc.tdco - sample_point_in_tc; -} - static inline u32 can_get_static_ctrlmode(struct can_priv *priv) { return priv->ctrlmode & ~priv->ctrlmode_supported; -- cgit v1.2.3 From 94040a8f484576cb1b7df3b2e93118c3b3e3aff4 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Tue, 23 Sep 2025 15:58:28 +0900 Subject: can: netlink: document which symbols are FD specific The CAN XL netlink interface will also have data bitrate and TDC parameters. The current FD parameters do not have a prefix in their names to differentiate them. Because the netlink interface is part of the UAPI, it is unfortunately not feasible to rename the existing symbols to add an FD_ prefix. The best alternative is to add a comment for each of the symbols to notify the reader of which parts are CAN FD specific. While at it, fix a typo: transiver -> transceiver. Signed-off-by: Vincent Mailhol Link: https://patch.msgid.link/20250923-canxl-netlink-prep-v4-3-e720d28f66fe@kernel.org Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/netlink.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h index 02ec32d69474..ef62f56eaaef 100644 --- a/include/uapi/linux/can/netlink.h +++ b/include/uapi/linux/can/netlink.h @@ -101,8 +101,8 @@ struct can_ctrlmode { #define CAN_CTRLMODE_PRESUME_ACK 0x40 /* Ignore missing CAN ACKs */ #define CAN_CTRLMODE_FD_NON_ISO 0x80 /* CAN FD in non-ISO mode */ #define CAN_CTRLMODE_CC_LEN8_DLC 0x100 /* Classic CAN DLC option */ -#define CAN_CTRLMODE_TDC_AUTO 0x200 /* CAN transiver automatically calculates TDCV */ -#define CAN_CTRLMODE_TDC_MANUAL 0x400 /* TDCV is manually set up by user */ +#define CAN_CTRLMODE_TDC_AUTO 0x200 /* FD transceiver automatically calculates TDCV */ +#define CAN_CTRLMODE_TDC_MANUAL 0x400 /* FD TDCV is manually set up by user */ /* * CAN device statistics @@ -129,14 +129,14 @@ enum { IFLA_CAN_RESTART_MS, IFLA_CAN_RESTART, IFLA_CAN_BERR_COUNTER, - IFLA_CAN_DATA_BITTIMING, - IFLA_CAN_DATA_BITTIMING_CONST, + IFLA_CAN_DATA_BITTIMING, /* FD */ + IFLA_CAN_DATA_BITTIMING_CONST, /* FD */ IFLA_CAN_TERMINATION, IFLA_CAN_TERMINATION_CONST, IFLA_CAN_BITRATE_CONST, - IFLA_CAN_DATA_BITRATE_CONST, + IFLA_CAN_DATA_BITRATE_CONST, /* FD */ IFLA_CAN_BITRATE_MAX, - IFLA_CAN_TDC, + IFLA_CAN_TDC, /* FD */ IFLA_CAN_CTRLMODE_EXT, /* add new constants above here */ @@ -145,7 +145,7 @@ enum { }; /* - * CAN FD Transmitter Delay Compensation (TDC) + * CAN FD/XL Transmitter Delay Compensation (TDC) * * Please refer to struct can_tdc_const and can_tdc in * include/linux/can/bittiming.h for further details. -- cgit v1.2.3 From b23a8425cba5d7908d69f3bce8f3c697362b50ae Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Tue, 23 Sep 2025 15:58:30 +0900 Subject: can: netlink: add can_validate_tdc() Factorise the TDC validation out of can_validate() and move it in the new can_validate_tdc() function. This is a preparation patch for the introduction of CAN XL because this TDC validation will be reused later on. Signed-off-by: Vincent Mailhol Link: https://patch.msgid.link/20250923-canxl-netlink-prep-v4-5-e720d28f66fe@kernel.org Signed-off-by: Marc Kleine-Budde --- include/linux/can/bittiming.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h index 4d5f7794194a..71f839c3f032 100644 --- a/include/linux/can/bittiming.h +++ b/include/linux/can/bittiming.h @@ -16,6 +16,10 @@ #define CAN_CTRLMODE_FD_TDC_MASK \ (CAN_CTRLMODE_TDC_AUTO | CAN_CTRLMODE_TDC_MANUAL) +#define CAN_CTRLMODE_TDC_AUTO_MASK \ + (CAN_CTRLMODE_TDC_AUTO) +#define CAN_CTRLMODE_TDC_MANUAL_MASK \ + (CAN_CTRLMODE_TDC_MANUAL) /* * struct can_tdc - CAN FD Transmission Delay Compensation parameters -- cgit v1.2.3 From 6ffc1230d3a728e07d7d2464f388ad4bbefe90c2 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Tue, 23 Sep 2025 15:58:43 +0900 Subject: can: calc_bittiming: make can_calc_tdco() FD agnostic can_calc_tdco() uses the CAN_CTRLMODE_FD_TDC_MASK and CAN_CTRLMODE_TDC_AUTO macros making it specific to CAN FD. Add the tdc mask to the function parameter list. The value of the tdc auto flag can then be derived from that mask and stored in a local variable. This way, the function becomes CAN FD agnostic and can be reused later on for the CAN XL TDC. Signed-off-by: Vincent Mailhol Link: https://patch.msgid.link/20250923-canxl-netlink-prep-v4-18-e720d28f66fe@kernel.org Signed-off-by: Marc Kleine-Budde --- include/linux/can/bittiming.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h index 71f839c3f032..d30816dd93c7 100644 --- a/include/linux/can/bittiming.h +++ b/include/linux/can/bittiming.h @@ -135,7 +135,7 @@ int can_calc_bittiming(const struct net_device *dev, struct can_bittiming *bt, void can_calc_tdco(struct can_tdc *tdc, const struct can_tdc_const *tdc_const, const struct can_bittiming *dbt, - u32 *ctrlmode, u32 ctrlmode_supported); + u32 tdc_mask, u32 *ctrlmode, u32 ctrlmode_supported); #else /* !CONFIG_CAN_CALC_BITTIMING */ static inline int can_calc_bittiming(const struct net_device *dev, struct can_bittiming *bt, @@ -148,7 +148,7 @@ can_calc_bittiming(const struct net_device *dev, struct can_bittiming *bt, static inline void can_calc_tdco(struct can_tdc *tdc, const struct can_tdc_const *tdc_const, const struct can_bittiming *dbt, - u32 *ctrlmode, u32 ctrlmode_supported) + u32 tdc_mask, u32 *ctrlmode, u32 ctrlmode_supported) { } #endif /* CONFIG_CAN_CALC_BITTIMING */ -- cgit v1.2.3 From 7de54546fff11cb0a53f47847d62f7b1a5792d17 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Tue, 23 Sep 2025 15:58:44 +0900 Subject: can: dev: add can_get_ctrlmode_str() In an effort to give more human readable messages when errors occur because of conflicting options, it can be useful to convert the CAN control mode flags into text. Add a function which converts the first set CAN control mode into a human readable string. The reason to only convert the first one is to simplify edge cases: imagine that there are several invalid control modes, we would just return the first invalid one to the user, thus not having to handle complex string concatenation. The user can then solve the first problem, call the netlink interface again and see the next issue. People who wish to enumerate all the control modes can still do so by, for example, using this new function in a for_each_set_bit() loop. Signed-off-by: Vincent Mailhol Link: https://patch.msgid.link/20250923-canxl-netlink-prep-v4-19-e720d28f66fe@kernel.org Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 8e75e9b3830a..a2229a61ccde 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -141,6 +141,8 @@ int can_restart_now(struct net_device *dev); void can_bus_off(struct net_device *dev); const char *can_get_state_str(const enum can_state state); +const char *can_get_ctrlmode_str(u32 ctrlmode); + void can_state_get_by_berr_counter(const struct net_device *dev, const struct can_berr_counter *bec, enum can_state *tx_state, -- cgit v1.2.3 From 3e86e4d74c0490e5fc5a7f8de8f29e7579c9ffe5 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 18 Sep 2025 10:05:47 +0200 Subject: kbuild: keep .modinfo section in vmlinux.unstripped Keep the .modinfo section during linking, but strip it from the final vmlinux. Adjust scripts/mksysmap to exclude modinfo symbols from kallsyms. This change will allow the next commit to extract the .modinfo section from the vmlinux.unstripped intermediate. Signed-off-by: Masahiro Yamada Signed-off-by: Alexey Gladkov Reviewed-by: Nicolas Schier Link: https://patch.msgid.link/aaf67c07447215463300fccaa758904bac42f992.1758182101.git.legion@kernel.org Signed-off-by: Nathan Chancellor --- include/asm-generic/vmlinux.lds.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index ae2d2359b79e..cfa63860dfd4 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -831,6 +831,7 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) /* Required sections not related to debugging. */ #define ELF_DETAILS \ + .modinfo : { *(.modinfo) } \ .comment 0 : { *(.comment) } \ .symtab 0 : { *(.symtab) } \ .strtab 0 : { *(.strtab) } \ @@ -1044,7 +1045,6 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) *(.discard.*) \ *(.export_symbol) \ *(.no_trim_symbol) \ - *(.modinfo) \ /* ld.bfd warns about .gnu.version* even when not emitted */ \ *(.gnu.version*) \ -- cgit v1.2.3 From 83fb49389bbe07defb85b063f7ff0fd016f06b35 Mon Sep 17 00:00:00 2001 From: Alexey Gladkov Date: Thu, 18 Sep 2025 10:05:50 +0200 Subject: modpost: Add modname to mod_device_table alias At this point, if a symbol is compiled as part of the kernel, information about which module the symbol belongs to is lost. To save this it is possible to add the module name to the alias name. It's not very pretty, but it's possible for now. Cc: Miguel Ojeda Cc: Andreas Hindborg Cc: Danilo Krummrich Cc: Alex Gaynor Cc: rust-for-linux@vger.kernel.org Signed-off-by: Alexey Gladkov Acked-by: Danilo Krummrich Acked-by: Nicolas Schier Link: https://patch.msgid.link/1a0d0bd87a4981d465b9ed21e14f4e78eaa03ded.1758182101.git.legion@kernel.org Signed-off-by: Nathan Chancellor --- include/linux/module.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/module.h b/include/linux/module.h index 3319a5269d28..e31ee29fac6b 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -244,10 +244,22 @@ struct module_kobject *lookup_or_create_module_kobject(const char *name); /* What your module does. */ #define MODULE_DESCRIPTION(_description) MODULE_INFO(description, _description) +/* + * Format: __mod_device_table__kmod_____ + * Parts of the string `__kmod_` and `__` are used as delimiters when parsing + * a symbol in file2alias.c + */ +#define __mod_device_table(type, name) \ + __PASTE(__mod_device_table__, \ + __PASTE(__KBUILD_MODNAME, \ + __PASTE(__, \ + __PASTE(type, \ + __PASTE(__, name))))) + #ifdef MODULE /* Creates an alias so file2alias.c can find device table. */ #define MODULE_DEVICE_TABLE(type, name) \ -static typeof(name) __mod_device_table__##type##__##name \ +static typeof(name) __mod_device_table(type, name) \ __attribute__ ((used, alias(__stringify(name)))) #else /* !MODULE */ #define MODULE_DEVICE_TABLE(type, name) -- cgit v1.2.3 From 5ab23c7923a1d2ae1890026866a2d8506b010a4a Mon Sep 17 00:00:00 2001 From: Alexey Gladkov Date: Thu, 18 Sep 2025 10:05:51 +0200 Subject: modpost: Create modalias for builtin modules For some modules, modalias is generated using the modpost utility and the section is added to the module file. When a module is added inside vmlinux, modpost does not generate modalias for such modules and the information is lost. As a result kmod (which uses modules.builtin.modinfo in userspace) cannot determine that modalias is handled by a builtin kernel module. $ cat /sys/devices/pci0000:00/0000:00:14.0/modalias pci:v00008086d0000A36Dsv00001043sd00008694bc0Csc03i30 $ modinfo xhci_pci name: xhci_pci filename: (builtin) license: GPL file: drivers/usb/host/xhci-pci description: xHCI PCI Host Controller Driver Missing modalias "pci:v*d*sv*sd*bc0Csc03i30*" which will be generated by modpost if the module is built separately. To fix this it is necessary to generate the same modalias for vmlinux as for the individual modules. Fortunately '.vmlinux.export.o' is already generated from which '.modinfo' can be extracted in the same way as for vmlinux.o. Signed-off-by: Masahiro Yamada Signed-off-by: Alexey Gladkov Tested-by: Stephen Rothwell Reviewed-by: Nicolas Schier Link: https://patch.msgid.link/28d4da3b0e3fc8474142746bcf469e03752c3208.1758182101.git.legion@kernel.org Signed-off-by: Nathan Chancellor --- include/linux/module.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/module.h b/include/linux/module.h index e31ee29fac6b..e135cc79acee 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -256,14 +256,10 @@ struct module_kobject *lookup_or_create_module_kobject(const char *name); __PASTE(type, \ __PASTE(__, name))))) -#ifdef MODULE /* Creates an alias so file2alias.c can find device table. */ #define MODULE_DEVICE_TABLE(type, name) \ static typeof(name) __mod_device_table(type, name) \ __attribute__ ((used, alias(__stringify(name)))) -#else /* !MODULE */ -#define MODULE_DEVICE_TABLE(type, name) -#endif /* Version of form [:][-]. * Or for CVS/RCS ID version, everything but the number is stripped. -- cgit v1.2.3 From 23ef9d439769d5f35353650e771c63d13824235b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 23 Sep 2025 14:34:19 -0700 Subject: kcfi: Rename CONFIG_CFI_CLANG to CONFIG_CFI The kernel's CFI implementation uses the KCFI ABI specifically, and is not strictly tied to a particular compiler. In preparation for GCC supporting KCFI, rename CONFIG_CFI_CLANG to CONFIG_CFI (along with associated options). Use new "transitional" Kconfig option for old CONFIG_CFI_CLANG that will enable CONFIG_CFI during olddefconfig. Reviewed-by: Linus Walleij Reviewed-by: Nathan Chancellor Link: https://lore.kernel.org/r/20250923213422.1105654-3-kees@kernel.org Signed-off-by: Kees Cook --- include/asm-generic/vmlinux.lds.h | 2 +- include/linux/cfi.h | 6 +++--- include/linux/cfi_types.h | 8 ++++---- include/linux/compiler.h | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index ae2d2359b79e..a65a87366c48 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -157,7 +157,7 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) #define PATCHABLE_DISCARDS *(__patchable_function_entries) #endif -#ifndef CONFIG_ARCH_SUPPORTS_CFI_CLANG +#ifndef CONFIG_ARCH_SUPPORTS_CFI /* * Simply points to ftrace_stub, but with the proper protocol. * Defined by the linker script in linux/vmlinux.lds.h diff --git a/include/linux/cfi.h b/include/linux/cfi.h index 52a98886a455..1fd22ea6eba4 100644 --- a/include/linux/cfi.h +++ b/include/linux/cfi.h @@ -11,7 +11,7 @@ #include #include -#ifdef CONFIG_CFI_CLANG +#ifdef CONFIG_CFI extern bool cfi_warn; enum bug_trap_type report_cfi_failure(struct pt_regs *regs, unsigned long addr, @@ -52,7 +52,7 @@ static inline u32 cfi_get_func_hash(void *func) extern u32 cfi_bpf_hash; extern u32 cfi_bpf_subprog_hash; -#else /* CONFIG_CFI_CLANG */ +#else /* CONFIG_CFI */ static inline int cfi_get_offset(void) { return 0; } static inline u32 cfi_get_func_hash(void *func) { return 0; } @@ -60,7 +60,7 @@ static inline u32 cfi_get_func_hash(void *func) { return 0; } #define cfi_bpf_hash 0U #define cfi_bpf_subprog_hash 0U -#endif /* CONFIG_CFI_CLANG */ +#endif /* CONFIG_CFI */ #ifdef CONFIG_ARCH_USES_CFI_TRAPS bool is_cfi_trap(unsigned long addr); diff --git a/include/linux/cfi_types.h b/include/linux/cfi_types.h index 685f7181780f..a86af9bc8bdc 100644 --- a/include/linux/cfi_types.h +++ b/include/linux/cfi_types.h @@ -8,7 +8,7 @@ #ifdef __ASSEMBLY__ #include -#ifdef CONFIG_CFI_CLANG +#ifdef CONFIG_CFI /* * Use the __kcfi_typeid_ type identifier symbol to * annotate indirectly called assembly functions. The compiler emits @@ -29,12 +29,12 @@ #define SYM_TYPED_START(name, linkage, align...) \ SYM_TYPED_ENTRY(name, linkage, align) -#else /* CONFIG_CFI_CLANG */ +#else /* CONFIG_CFI */ #define SYM_TYPED_START(name, linkage, align...) \ SYM_START(name, linkage, align) -#endif /* CONFIG_CFI_CLANG */ +#endif /* CONFIG_CFI */ #ifndef SYM_TYPED_FUNC_START #define SYM_TYPED_FUNC_START(name) \ @@ -43,7 +43,7 @@ #else /* __ASSEMBLY__ */ -#ifdef CONFIG_CFI_CLANG +#ifdef CONFIG_CFI #define DEFINE_CFI_TYPE(name, func) \ /* \ * Force a reference to the function so the compiler generates \ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 6f04a1d8c720..fb27da2221ee 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -248,7 +248,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, #endif /* __KERNEL__ */ -#if defined(CONFIG_CFI_CLANG) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) +#if defined(CONFIG_CFI) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) /* * Force a reference to the external symbol so the compiler generates * __kcfi_typid. -- cgit v1.2.3 From d0ca0df179c4b21e2a6c4a4fb637aa8fa14575cb Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 24 Sep 2025 13:18:22 -0700 Subject: crypto: af_alg - Fix incorrect boolean values in af_alg_ctx Commit 1b34cbbf4f01 ("crypto: af_alg - Disallow concurrent writes in af_alg_sendmsg") changed some fields from bool to 1-bit bitfields of type u32. However, some assignments to these fields, specifically 'more' and 'merge', assign values greater than 1. These relied on C's implicit conversion to bool, such that zero becomes false and nonzero becomes true. With a 1-bit bitfields of type u32 instead, mod 2 of the value is taken instead, resulting in 0 being assigned in some cases when 1 was intended. Fix this by restoring the bool type. Fixes: 1b34cbbf4f01 ("crypto: af_alg - Disallow concurrent writes in af_alg_sendmsg") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Signed-off-by: Linus Torvalds --- include/crypto/if_alg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h index 0c70f3a55575..107b797c33ec 100644 --- a/include/crypto/if_alg.h +++ b/include/crypto/if_alg.h @@ -152,7 +152,7 @@ struct af_alg_ctx { size_t used; atomic_t rcvused; - u32 more:1, + bool more:1, merge:1, enc:1, write:1, -- cgit v1.2.3 From 340974c4f709ce8142a672ab11f1889dff94d6dc Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 18 Aug 2025 09:39:00 +0530 Subject: mailbox: Add common header for RPMI messages sent via mailbox The RPMI based mailbox controller drivers and mailbox clients need to share defines related to RPMI messages over mailbox interface so add a common header for this purpose. Acked-by: Jassi Brar Co-developed-by: Rahul Pathak Signed-off-by: Rahul Pathak Signed-off-by: Anup Patel Link: https://lore.kernel.org/r/20250818040920.272664-5-apatel@ventanamicro.com Signed-off-by: Paul Walmsley --- include/linux/mailbox/riscv-rpmi-message.h | 214 +++++++++++++++++++++++++++++ 1 file changed, 214 insertions(+) create mode 100644 include/linux/mailbox/riscv-rpmi-message.h (limited to 'include') diff --git a/include/linux/mailbox/riscv-rpmi-message.h b/include/linux/mailbox/riscv-rpmi-message.h new file mode 100644 index 000000000000..c3a98fc12c0a --- /dev/null +++ b/include/linux/mailbox/riscv-rpmi-message.h @@ -0,0 +1,214 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2025 Ventana Micro Systems Inc. */ + +#ifndef _LINUX_RISCV_RPMI_MESSAGE_H_ +#define _LINUX_RISCV_RPMI_MESSAGE_H_ + +#include +#include +#include +#include + +/* RPMI version encode/decode macros */ +#define RPMI_VER_MAJOR(__ver) upper_16_bits(__ver) +#define RPMI_VER_MINOR(__ver) lower_16_bits(__ver) +#define RPMI_MKVER(__maj, __min) (((u32)(__maj) << 16) | (u16)(__min)) + +/* RPMI message header */ +struct rpmi_message_header { + __le16 servicegroup_id; + u8 service_id; + u8 flags; + __le16 datalen; + __le16 token; +}; + +/* RPMI message */ +struct rpmi_message { + struct rpmi_message_header header; + u8 data[]; +}; + +/* RPMI notification event */ +struct rpmi_notification_event { + __le16 event_datalen; + u8 event_id; + u8 reserved; + u8 event_data[]; +}; + +/* RPMI error codes */ +enum rpmi_error_codes { + RPMI_SUCCESS = 0, + RPMI_ERR_FAILED = -1, + RPMI_ERR_NOTSUPP = -2, + RPMI_ERR_INVALID_PARAM = -3, + RPMI_ERR_DENIED = -4, + RPMI_ERR_INVALID_ADDR = -5, + RPMI_ERR_ALREADY = -6, + RPMI_ERR_EXTENSION = -7, + RPMI_ERR_HW_FAULT = -8, + RPMI_ERR_BUSY = -9, + RPMI_ERR_INVALID_STATE = -10, + RPMI_ERR_BAD_RANGE = -11, + RPMI_ERR_TIMEOUT = -12, + RPMI_ERR_IO = -13, + RPMI_ERR_NO_DATA = -14, + RPMI_ERR_RESERVED_START = -15, + RPMI_ERR_RESERVED_END = -127, + RPMI_ERR_VENDOR_START = -128, +}; + +static inline int rpmi_to_linux_error(int rpmi_error) +{ + switch (rpmi_error) { + case RPMI_SUCCESS: + return 0; + case RPMI_ERR_INVALID_PARAM: + case RPMI_ERR_BAD_RANGE: + case RPMI_ERR_INVALID_STATE: + return -EINVAL; + case RPMI_ERR_DENIED: + return -EPERM; + case RPMI_ERR_INVALID_ADDR: + case RPMI_ERR_HW_FAULT: + return -EFAULT; + case RPMI_ERR_ALREADY: + return -EALREADY; + case RPMI_ERR_BUSY: + return -EBUSY; + case RPMI_ERR_TIMEOUT: + return -ETIMEDOUT; + case RPMI_ERR_IO: + return -ECOMM; + case RPMI_ERR_FAILED: + case RPMI_ERR_NOTSUPP: + case RPMI_ERR_NO_DATA: + case RPMI_ERR_EXTENSION: + default: + return -EOPNOTSUPP; + } +} + +/* RPMI Linux mailbox attribute IDs */ +enum rpmi_mbox_attribute_id { + RPMI_MBOX_ATTR_SPEC_VERSION, + RPMI_MBOX_ATTR_MAX_MSG_DATA_SIZE, + RPMI_MBOX_ATTR_SERVICEGROUP_ID, + RPMI_MBOX_ATTR_SERVICEGROUP_VERSION, + RPMI_MBOX_ATTR_IMPL_ID, + RPMI_MBOX_ATTR_IMPL_VERSION, + RPMI_MBOX_ATTR_MAX_ID +}; + +/* RPMI Linux mailbox message types */ +enum rpmi_mbox_message_type { + RPMI_MBOX_MSG_TYPE_GET_ATTRIBUTE, + RPMI_MBOX_MSG_TYPE_SET_ATTRIBUTE, + RPMI_MBOX_MSG_TYPE_SEND_WITH_RESPONSE, + RPMI_MBOX_MSG_TYPE_SEND_WITHOUT_RESPONSE, + RPMI_MBOX_MSG_TYPE_NOTIFICATION_EVENT, + RPMI_MBOX_MSG_MAX_TYPE +}; + +/* RPMI Linux mailbox message instance */ +struct rpmi_mbox_message { + enum rpmi_mbox_message_type type; + union { + struct { + enum rpmi_mbox_attribute_id id; + u32 value; + } attr; + + struct { + u32 service_id; + void *request; + unsigned long request_len; + void *response; + unsigned long max_response_len; + unsigned long out_response_len; + } data; + + struct { + u16 event_datalen; + u8 event_id; + u8 *event_data; + } notif; + }; + int error; +}; + +/* RPMI Linux mailbox message helper routines */ +static inline void rpmi_mbox_init_get_attribute(struct rpmi_mbox_message *msg, + enum rpmi_mbox_attribute_id id) +{ + msg->type = RPMI_MBOX_MSG_TYPE_GET_ATTRIBUTE; + msg->attr.id = id; + msg->attr.value = 0; + msg->error = 0; +} + +static inline void rpmi_mbox_init_set_attribute(struct rpmi_mbox_message *msg, + enum rpmi_mbox_attribute_id id, + u32 value) +{ + msg->type = RPMI_MBOX_MSG_TYPE_SET_ATTRIBUTE; + msg->attr.id = id; + msg->attr.value = value; + msg->error = 0; +} + +static inline void rpmi_mbox_init_send_with_response(struct rpmi_mbox_message *msg, + u32 service_id, + void *request, + unsigned long request_len, + void *response, + unsigned long max_response_len) +{ + msg->type = RPMI_MBOX_MSG_TYPE_SEND_WITH_RESPONSE; + msg->data.service_id = service_id; + msg->data.request = request; + msg->data.request_len = request_len; + msg->data.response = response; + msg->data.max_response_len = max_response_len; + msg->data.out_response_len = 0; + msg->error = 0; +} + +static inline void rpmi_mbox_init_send_without_response(struct rpmi_mbox_message *msg, + u32 service_id, + void *request, + unsigned long request_len) +{ + msg->type = RPMI_MBOX_MSG_TYPE_SEND_WITHOUT_RESPONSE; + msg->data.service_id = service_id; + msg->data.request = request; + msg->data.request_len = request_len; + msg->data.response = NULL; + msg->data.max_response_len = 0; + msg->data.out_response_len = 0; + msg->error = 0; +} + +static inline void *rpmi_mbox_get_msg_response(struct rpmi_mbox_message *msg) +{ + return msg ? msg->data.response : NULL; +} + +static inline int rpmi_mbox_send_message(struct mbox_chan *chan, + struct rpmi_mbox_message *msg) +{ + int ret; + + /* Send message for the underlying mailbox channel */ + ret = mbox_send_message(chan, msg); + if (ret < 0) + return ret; + + /* Explicitly signal txdone for mailbox channel */ + ret = msg->error; + mbox_client_txdone(chan, ret); + return ret; +} + +#endif /* _LINUX_RISCV_RPMI_MESSAGE_H_ */ -- cgit v1.2.3 From f32a26fab3672e60f622bd7461bf978fc72f29ec Mon Sep 17 00:00:00 2001 From: Viacheslav Dubeyko Date: Wed, 24 Sep 2025 16:24:41 -0700 Subject: hfs/hfsplus: rework debug output subsystem Currently, HFS/HFS+ has very obsolete and inconvenient debug output subsystem. Also, the code is duplicated in HFS and HFS+ driver. This patch introduces linux/hfs_common.h for gathering common declarations, inline functions, and common short methods. Currently, this file contains only hfs_dbg() function that employs pr_debug() with the goal to print a debug-level messages conditionally. So, now, it is possible to enable the debug output by means of: echo 'file extent.c +p' > /proc/dynamic_debug/control echo 'func hfsplus_evict_inode +p' > /proc/dynamic_debug/control And debug output looks like this: hfs: pid 5831:fs/hfs/catalog.c:228 hfs_cat_delete(): delete_cat: 00,48 hfs: pid 5831:fs/hfs/extent.c:484 hfs_file_truncate(): truncate: 48, 409600 -> 0 hfs: pid 5831:fs/hfs/extent.c:212 hfs_dump_extent(): hfs: pid 5831:fs/hfs/extent.c:214 hfs_dump_extent(): 78:4 hfs: pid 5831:fs/hfs/extent.c:214 hfs_dump_extent(): 0:0 hfs: pid 5831:fs/hfs/extent.c:214 hfs_dump_extent(): 0:0 v4 Debug messages have been reworked and information about new HFS/HFS+ shared declarations file has been added to MAINTAINERS file. v5 Yangtao Li suggested to clean up debug output and fix several typos. Signed-off-by: Viacheslav Dubeyko cc: John Paul Adrian Glaubitz cc: Yangtao Li cc: linux-fsdevel@vger.kernel.org cc: Johannes Thumshirn Signed-off-by: Viacheslav Dubeyko --- include/linux/hfs_common.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 include/linux/hfs_common.h (limited to 'include') diff --git a/include/linux/hfs_common.h b/include/linux/hfs_common.h new file mode 100644 index 000000000000..8838ca2f3d08 --- /dev/null +++ b/include/linux/hfs_common.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * HFS/HFS+ common definitions, inline functions, + * and shared functionality. + */ + +#ifndef _HFS_COMMON_H_ +#define _HFS_COMMON_H_ + +#ifdef pr_fmt +#undef pr_fmt +#endif + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#define hfs_dbg(fmt, ...) \ + pr_debug("pid %d:%s:%d %s(): " fmt, \ + current->pid, __FILE__, __LINE__, __func__, ##__VA_ARGS__) \ + +#endif /* _HFS_COMMON_H_ */ -- cgit v1.2.3 From ba879dfc0574878f3e08f217b2b4fdf845c426c0 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 18 Aug 2025 09:39:01 +0530 Subject: mailbox: Allow controller specific mapping using fwnode Introduce optional fw_node() callback which allows a mailbox controller driver to provide controller specific mapping using fwnode. The Linux OF framework already implements fwnode operations for the Linux DD framework so the fw_xlate() callback works fine with device tree as well. Acked-by: Jassi Brar Reviewed-by: Andy Shevchenko Signed-off-by: Anup Patel Link: https://lore.kernel.org/r/20250818040920.272664-6-apatel@ventanamicro.com Signed-off-by: Paul Walmsley --- include/linux/mailbox_controller.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h index ad01c4082358..80a427c7ca29 100644 --- a/include/linux/mailbox_controller.h +++ b/include/linux/mailbox_controller.h @@ -66,6 +66,7 @@ struct mbox_chan_ops { * no interrupt rises. Ignored if 'txdone_irq' is set. * @txpoll_period: If 'txdone_poll' is in effect, the API polls for * last TX's status after these many millisecs + * @fw_xlate: Controller driver specific mapping of channel via fwnode * @of_xlate: Controller driver specific mapping of channel via DT * @poll_hrt: API private. hrtimer used to poll for TXDONE on all * channels. @@ -79,6 +80,8 @@ struct mbox_controller { bool txdone_irq; bool txdone_poll; unsigned txpoll_period; + struct mbox_chan *(*fw_xlate)(struct mbox_controller *mbox, + const struct fwnode_reference_args *sp); struct mbox_chan *(*of_xlate)(struct mbox_controller *mbox, const struct of_phandle_args *sp); /* Internal to API */ -- cgit v1.2.3 From 6f01c24f3a7525e953d514367a6d91b39c8f1f6a Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 18 Aug 2025 09:39:02 +0530 Subject: byteorder: Add memcpy_to_le32() and memcpy_from_le32() Add common memcpy APIs for copying u32 array to/from __le32 array. Suggested-by: Andy Shevchenko Reviewed-by: Andy Shevchenko Signed-off-by: Anup Patel Reviewed-by: Linus Walleij Acked-by: Jassi Brar Link: https://lore.kernel.org/r/20250818040920.272664-7-apatel@ventanamicro.com Signed-off-by: Paul Walmsley --- include/linux/byteorder/generic.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/byteorder/generic.h b/include/linux/byteorder/generic.h index c9a4c96c9943..b3705e8bbe2b 100644 --- a/include/linux/byteorder/generic.h +++ b/include/linux/byteorder/generic.h @@ -173,6 +173,22 @@ static inline void cpu_to_le32_array(u32 *buf, unsigned int words) } } +static inline void memcpy_from_le32(u32 *dst, const __le32 *src, size_t words) +{ + size_t i; + + for (i = 0; i < words; i++) + dst[i] = le32_to_cpu(src[i]); +} + +static inline void memcpy_to_le32(__le32 *dst, const u32 *src, size_t words) +{ + size_t i; + + for (i = 0; i < words; i++) + dst[i] = cpu_to_le32(src[i]); +} + static inline void be16_add_cpu(__be16 *var, u16 val) { *var = cpu_to_be16(be16_to_cpu(*var) + val); -- cgit v1.2.3 From 253757797973c54ea967f8fd8f40d16e4a78e6d4 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Wed, 24 Sep 2025 17:16:19 +0800 Subject: scsi: ufs: core: Change MCQ interrupt enable flow Move the MCQ interrupt enable process to ufshcd_mcq_make_queues_operational() to ensure that interrupts are set correctly when making queues operational, similar to ufshcd_make_hba_operational(). This change addresses the issue where ufshcd_mcq_make_queues_operational() was not fully operational due to missing interrupt enablement. This change only affects host drivers that call ufshcd_mcq_make_queues_operational(), i.e. ufs-mediatek. Signed-off-by: Peter Wang Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index ea0021f067c9..d8e06de0afbb 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -1292,6 +1292,7 @@ static inline void ufshcd_rmwl(struct ufs_hba *hba, u32 mask, u32 val, u32 reg) void ufshcd_enable_irq(struct ufs_hba *hba); void ufshcd_disable_irq(struct ufs_hba *hba); +void ufshcd_enable_intr(struct ufs_hba *hba, u32 intrs); int ufshcd_alloc_host(struct device *, struct ufs_hba **); int ufshcd_hba_enable(struct ufs_hba *hba); int ufshcd_init(struct ufs_hba *, void __iomem *, unsigned int); -- cgit v1.2.3 From 79dde5f7dc7c038eec903745dc1550cd4139980e Mon Sep 17 00:00:00 2001 From: Zhongqiu Han Date: Wed, 17 Sep 2025 17:41:43 +0800 Subject: scsi: ufs: core: Fix data race in CPU latency PM QoS request handling The cpu_latency_qos_add/remove/update_request interfaces lack internal synchronization by design, requiring the caller to ensure thread safety. The current implementation relies on the 'pm_qos_enabled' flag, which is insufficient to prevent concurrent access and cannot serve as a proper synchronization mechanism. This has led to data races and list corruption issues. A typical race condition call trace is: [Thread A] ufshcd_pm_qos_exit() --> cpu_latency_qos_remove_request() --> cpu_latency_qos_apply(); --> pm_qos_update_target() --> plist_del <--(1) delete plist node --> memset(req, 0, sizeof(*req)); --> hba->pm_qos_enabled = false; [Thread B] ufshcd_devfreq_target --> ufshcd_devfreq_scale --> ufshcd_scale_clks --> ufshcd_pm_qos_update <--(2) pm_qos_enabled is true --> cpu_latency_qos_update_request --> pm_qos_update_target --> plist_del <--(3) plist node use-after-free Introduces a dedicated mutex to serialize PM QoS operations, preventing data races and ensuring safe access to PM QoS resources, including sysfs interface reads. Fixes: 2777e73fc154 ("scsi: ufs: core: Add CPU latency QoS support for UFS driver") Signed-off-by: Zhongqiu Han Reviewed-by: Bart Van Assche Tested-by: Huan Tang Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index d8e06de0afbb..9425cfd9d00e 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -938,6 +938,7 @@ enum ufshcd_mcq_opr { * @ufs_rtc_update_work: A work for UFS RTC periodic update * @pm_qos_req: PM QoS request handle * @pm_qos_enabled: flag to check if pm qos is enabled + * @pm_qos_mutex: synchronizes PM QoS request and status updates * @critical_health_count: count of critical health exceptions * @dev_lvl_exception_count: count of device level exceptions since last reset * @dev_lvl_exception_id: vendor specific information about the @@ -1110,6 +1111,8 @@ struct ufs_hba { struct delayed_work ufs_rtc_update_work; struct pm_qos_request pm_qos_req; bool pm_qos_enabled; + /* synchronizes PM QoS request and status updates */ + struct mutex pm_qos_mutex; int critical_health_count; atomic_t dev_lvl_exception_count; -- cgit v1.2.3 From edcc8a38b5ac1a3dbd05e113a38a25b937ebefe5 Mon Sep 17 00:00:00 2001 From: Qi Xi Date: Tue, 9 Sep 2025 19:29:10 +0800 Subject: once: fix race by moving DO_ONCE to separate section The commit c2c60ea37e5b ("once: use __section(".data.once")") moved DO_ONCE's ___done variable to .data.once section, which conflicts with DO_ONCE_LITE() that also uses the same section. This creates a race condition when clear_warn_once is used: Thread 1 (DO_ONCE) Thread 2 (DO_ONCE) __do_once_start read ___done (false) acquire once_lock execute func __do_once_done write ___done (true) __do_once_start release once_lock // Thread 3 clear_warn_once reset ___done read ___done (false) acquire once_lock execute func schedule once_work __do_once_done once_deferred: OK write ___done (true) static_branch_disable release once_lock schedule once_work once_deferred: BUG_ON(!static_key_enabled) DO_ONCE_LITE() in once_lite.h is used by WARN_ON_ONCE() and other warning macros. Keep its ___done flag in the .data..once section and allow resetting by clear_warn_once, as originally intended. In contrast, DO_ONCE() is used for functions like get_random_once() and relies on its ___done flag for internal synchronization. We should not reset DO_ONCE() by clear_warn_once. Fix it by isolating DO_ONCE's ___done into a separate .data..do_once section, shielding it from clear_warn_once. Fixes: c2c60ea37e5b ("once: use __section(".data.once")") Reported-by: Hulk Robot Signed-off-by: Qi Xi Signed-off-by: Arnd Bergmann --- include/asm-generic/vmlinux.lds.h | 1 + include/linux/once.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index ae2d2359b79e..8efbe8c4874e 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -361,6 +361,7 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) __start_once = .; \ *(.data..once) \ __end_once = .; \ + *(.data..do_once) \ STRUCT_ALIGN(); \ *(__tracepoints) \ /* implement dynamic printk debug */ \ diff --git a/include/linux/once.h b/include/linux/once.h index 30346fcdc799..449a0e34ad5a 100644 --- a/include/linux/once.h +++ b/include/linux/once.h @@ -46,7 +46,7 @@ void __do_once_sleepable_done(bool *done, struct static_key_true *once_key, #define DO_ONCE(func, ...) \ ({ \ bool ___ret = false; \ - static bool __section(".data..once") ___done = false; \ + static bool __section(".data..do_once") ___done = false; \ static DEFINE_STATIC_KEY_TRUE(___once_key); \ if (static_branch_unlikely(&___once_key)) { \ unsigned long ___flags; \ @@ -64,7 +64,7 @@ void __do_once_sleepable_done(bool *done, struct static_key_true *once_key, #define DO_ONCE_SLEEPABLE(func, ...) \ ({ \ bool ___ret = false; \ - static bool __section(".data..once") ___done = false; \ + static bool __section(".data..do_once") ___done = false; \ static DEFINE_STATIC_KEY_TRUE(___once_key); \ if (static_branch_unlikely(&___once_key)) { \ ___ret = __do_once_sleepable_start(&___done); \ -- cgit v1.2.3 From 5295be6c4ea4e1ffd38ab0ab131a65afc6b78e9f Mon Sep 17 00:00:00 2001 From: Vitaly Margolin Date: Sun, 23 Jun 2024 09:19:15 +0300 Subject: accel/habanalabs: add generic message type to get error counters Add a new CPUCP generic message type to retrieve HBM, SRAM and critical error counters from the device. Signed-off-by: Vitaly Margolin Reviewed-by: Koby Elbaz Signed-off-by: Koby Elbaz --- include/linux/habanalabs/cpucp_if.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/habanalabs/cpucp_if.h b/include/linux/habanalabs/cpucp_if.h index 7ed3fdd55dda..29c50e7427d1 100644 --- a/include/linux/habanalabs/cpucp_if.h +++ b/include/linux/habanalabs/cpucp_if.h @@ -1425,9 +1425,11 @@ struct cpucp_monitor_dump { * from "pkt_subidx" field in struct cpucp_packet. * * HL_PASSTHROUGHT_VERSIONS - Fetch all firmware versions. + * HL_GET_ERR_COUNTERS_CMD - Command to get error counters */ enum hl_passthrough_type { HL_PASSTHROUGH_VERSIONS, + HL_GET_ERR_COUNTERS_CMD, }; #endif /* CPUCP_IF_H */ -- cgit v1.2.3 From cade027efa9b358719dcb9305b845d905c3eafce Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Wed, 31 Jul 2024 13:56:34 +0300 Subject: accel/habanalabs: fix typo in trace output (cms -> cmd) Fix a typo in TP_printk format string of habanalabs tracepoint: replace "cms" with "cmd". Signed-off-by: Tomer Tayar Reviewed-by: Koby Elbaz Signed-off-by: Koby Elbaz --- include/trace/events/habanalabs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/habanalabs.h b/include/trace/events/habanalabs.h index 4a2bb2c896d1..fa0d2c6bace4 100644 --- a/include/trace/events/habanalabs.h +++ b/include/trace/events/habanalabs.h @@ -145,7 +145,7 @@ DECLARE_EVENT_CLASS(habanalabs_comms_template, __entry->op_str = op_str; ), - TP_printk("%s: cms: %s", + TP_printk("%s: cmd: %s", __get_str(dname), __entry->op_str) ); -- cgit v1.2.3 From 65a3f5bc331ca7384900641b46cadff63805c10e Mon Sep 17 00:00:00 2001 From: Ariel Aviad Date: Tue, 24 Sep 2024 11:02:49 +0300 Subject: accel/habanalabs: add HL_GET_P_STATE passthrough type Add a new passthrough type HL_GET_P_STATE to the cpucp generic ioctl to allow userspace to read the device performance state via firmware. Signed-off-by: Ariel Aviad Reviewed-by: Koby Elbaz Signed-off-by: Koby Elbaz --- include/linux/habanalabs/cpucp_if.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/habanalabs/cpucp_if.h b/include/linux/habanalabs/cpucp_if.h index 29c50e7427d1..45f181bcf890 100644 --- a/include/linux/habanalabs/cpucp_if.h +++ b/include/linux/habanalabs/cpucp_if.h @@ -1426,10 +1426,12 @@ struct cpucp_monitor_dump { * * HL_PASSTHROUGHT_VERSIONS - Fetch all firmware versions. * HL_GET_ERR_COUNTERS_CMD - Command to get error counters + * HL_GET_P_STATE - get performance state */ enum hl_passthrough_type { HL_PASSTHROUGH_VERSIONS, HL_GET_ERR_COUNTERS_CMD, + HL_GET_P_STATE, }; #endif /* CPUCP_IF_H */ -- cgit v1.2.3 From a19239ba14525c26ad097d59fd52cd9198b5bcdb Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 24 Sep 2025 13:49:49 +0100 Subject: afs: Add support for RENAME_NOREPLACE and RENAME_EXCHANGE Add support for RENAME_NOREPLACE and RENAME_EXCHANGE, if the server supports them. The default is translated to YFS.Rename_Replace, falling back to YFS.Rename; RENAME_NOREPLACE is translated to YFS.Rename_NoReplace and RENAME_EXCHANGE to YFS.Rename_Exchange, both of which fall back to reporting EINVAL. Signed-off-by: David Howells Link: https://lore.kernel.org/740476.1758718189@warthog.procyon.org.uk cc: Marc Dionne cc: Dan Carpenter cc: linux-afs@lists.infradead.org Signed-off-by: Christian Brauner --- include/trace/events/afs.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 7f83d242c8e9..1b3c48b5591d 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -69,6 +69,9 @@ enum afs_fs_operation { yfs_FS_RemoveACL = 64171, yfs_FS_RemoveFile2 = 64173, yfs_FS_StoreOpaqueACL2 = 64174, + yfs_FS_Rename_Replace = 64176, + yfs_FS_Rename_NoReplace = 64177, + yfs_FS_Rename_Exchange = 64187, yfs_FS_InlineBulkStatus = 64536, /* YFS Fetch multiple file statuses with errors */ yfs_FS_FetchData64 = 64537, /* YFS Fetch file data */ yfs_FS_StoreData64 = 64538, /* YFS Store file data */ @@ -300,6 +303,9 @@ enum yfs_cm_operation { EM(yfs_FS_RemoveACL, "YFS.RemoveACL") \ EM(yfs_FS_RemoveFile2, "YFS.RemoveFile2") \ EM(yfs_FS_StoreOpaqueACL2, "YFS.StoreOpaqueACL2") \ + EM(yfs_FS_Rename_Replace, "YFS.Rename_Replace") \ + EM(yfs_FS_Rename_NoReplace, "YFS.Rename_NoReplace") \ + EM(yfs_FS_Rename_Exchange, "YFS.Rename_Exchange") \ EM(yfs_FS_InlineBulkStatus, "YFS.InlineBulkStatus") \ EM(yfs_FS_FetchData64, "YFS.FetchData64") \ EM(yfs_FS_StoreData64, "YFS.StoreData64") \ -- cgit v1.2.3 From 10cdfcd37ade7ce736bc4a1927680f390a6b1f7b Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 24 Sep 2025 13:33:58 +0200 Subject: nstree: make struct ns_tree private Don't expose it directly. There's no need to do that. Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/nstree.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include') diff --git a/include/linux/nstree.h b/include/linux/nstree.h index 29ad6402260c..8b8636690473 100644 --- a/include/linux/nstree.h +++ b/include/linux/nstree.h @@ -9,19 +9,6 @@ #include #include -/** - * struct ns_tree - Namespace tree - * @ns_tree: Rbtree of namespaces of a particular type - * @ns_list: Sequentially walkable list of all namespaces of this type - * @ns_tree_lock: Seqlock to protect the tree and list - */ -struct ns_tree { - struct rb_root ns_tree; - struct list_head ns_list; - seqlock_t ns_tree_lock; - int type; -}; - extern struct ns_tree cgroup_ns_tree; extern struct ns_tree ipc_ns_tree; extern struct ns_tree mnt_ns_tree; -- cgit v1.2.3 From 4055526d35746ce8b04bfa5e14e14f28bb163186 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 24 Sep 2025 13:33:59 +0200 Subject: ns: move ns type into struct ns_common It's misplaced in struct proc_ns_operations and ns->ops might be NULL if the namespace is compiled out but we still want to know the type of the namespace for the initial namespace struct. Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/ns_common.h | 30 +++++++++++++++++++++++++----- include/linux/proc_ns.h | 1 - 2 files changed, 25 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h index 56492cd9ff8d..f5b68b8abb54 100644 --- a/include/linux/ns_common.h +++ b/include/linux/ns_common.h @@ -4,6 +4,7 @@ #include #include +#include struct proc_ns_operations; @@ -37,6 +38,7 @@ extern const struct proc_ns_operations timens_operations; extern const struct proc_ns_operations timens_for_children_operations; struct ns_common { + u32 ns_type; struct dentry *stashed; const struct proc_ns_operations *ops; unsigned int inum; @@ -51,7 +53,7 @@ struct ns_common { }; }; -int __ns_common_init(struct ns_common *ns, const struct proc_ns_operations *ops, int inum); +int __ns_common_init(struct ns_common *ns, u32 ns_type, const struct proc_ns_operations *ops, int inum); void __ns_common_free(struct ns_common *ns); #define to_ns_common(__ns) \ @@ -106,10 +108,28 @@ void __ns_common_free(struct ns_common *ns); struct user_namespace *: (IS_ENABLED(CONFIG_USER_NS) ? &userns_operations : NULL), \ struct uts_namespace *: (IS_ENABLED(CONFIG_UTS_NS) ? &utsns_operations : NULL)) -#define ns_common_init(__ns) \ - __ns_common_init(to_ns_common(__ns), to_ns_operations(__ns), (((__ns) == ns_init_ns(__ns)) ? ns_init_inum(__ns) : 0)) - -#define ns_common_init_inum(__ns, __inum) __ns_common_init(to_ns_common(__ns), to_ns_operations(__ns), __inum) +#define ns_common_type(__ns) \ + _Generic((__ns), \ + struct cgroup_namespace *: CLONE_NEWCGROUP, \ + struct ipc_namespace *: CLONE_NEWIPC, \ + struct mnt_namespace *: CLONE_NEWNS, \ + struct net *: CLONE_NEWNET, \ + struct pid_namespace *: CLONE_NEWPID, \ + struct time_namespace *: CLONE_NEWTIME, \ + struct user_namespace *: CLONE_NEWUSER, \ + struct uts_namespace *: CLONE_NEWUTS) + +#define ns_common_init(__ns) \ + __ns_common_init(to_ns_common(__ns), \ + ns_common_type(__ns), \ + to_ns_operations(__ns), \ + (((__ns) == ns_init_ns(__ns)) ? ns_init_inum(__ns) : 0)) + +#define ns_common_init_inum(__ns, __inum) \ + __ns_common_init(to_ns_common(__ns), \ + ns_common_type(__ns), \ + to_ns_operations(__ns), \ + __inum) #define ns_common_free(__ns) __ns_common_free(to_ns_common((__ns))) diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index 08016f6e0e6f..e81b8e596e4f 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -17,7 +17,6 @@ struct inode; struct proc_ns_operations { const char *name; const char *real_ns_name; - int type; struct ns_common *(*get)(struct task_struct *task); void (*put)(struct ns_common *ns); int (*install)(struct nsset *nsset, struct ns_common *ns); -- cgit v1.2.3 From 4ae8d9aa9f9dc7137ea5e564d79c5aa5af1bc45c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 16 Sep 2025 23:02:41 +0200 Subject: sched/deadline: Fix dl_server getting stuck John found it was easy to hit lockup warnings when running locktorture on a 2 CPU VM, which he bisected down to: commit cccb45d7c429 ("sched/deadline: Less agressive dl_server handling"). While debugging it seems there is a chance where we end up with the dl_server dequeued, with dl_se->dl_server_active. This causes dl_server_start() to return without enqueueing the dl_server, thus it fails to run when RT tasks starve the cpu. When this happens, dl_server_timer() catches the '!dl_se->server_has_tasks(dl_se)' case, which then calls replenish_dl_entity() and dl_server_stopped() and finally return HRTIMER_NO_RESTART. This ends in no new timer and also no enqueue, leaving the dl_server 'dead', allowing starvation. What should have happened is for the bandwidth timer to start the zero-laxity timer, which in turn would enqueue the dl_server and cause dl_se->server_pick_task() to be called -- which will stop the dl_server if no fair tasks are observed for a whole period. IOW, it is totally irrelevant if there are fair tasks at the moment of bandwidth refresh. This removes all dl_se->server_has_tasks() users, so remove the whole thing. Fixes: cccb45d7c4295 ("sched/deadline: Less agressive dl_server handling") Reported-by: John Stultz Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Peter Zijlstra (Intel) Tested-by: John Stultz --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index f8188b833350..f89313b150e6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -733,7 +733,6 @@ struct sched_dl_entity { * runnable task. */ struct rq *rq; - dl_server_has_tasks_f server_has_tasks; dl_server_pick_f server_pick_task; #ifdef CONFIG_RT_MUTEXES -- cgit v1.2.3 From a3a70caf7906708bf9bbc80018752a6b36543808 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 17 Sep 2025 12:03:20 +0200 Subject: sched/deadline: Fix dl_server behaviour John reported undesirable behaviour with the dl_server since commit: cccb45d7c4295 ("sched/deadline: Less agressive dl_server handling"). When starving fair tasks on purpose (starting spinning FIFO tasks), his fair workload, which often goes (briefly) idle, would delay fair invocations for a second, running one invocation per second was both unexpected and terribly slow. The reason this happens is that when dl_se->server_pick_task() returns NULL, indicating no runnable tasks, it would yield, pushing any later jobs out a whole period (1 second). Instead simply stop the server. This should restore behaviour in that a later wakeup (which restarts the server) will be able to continue running (subject to the CBS wakeup rules). Notably, this does not re-introduce the behaviour cccb45d7c4295 set out to solve, any start/stop cycle is naturally throttled by the timer period (no active cancel). Fixes: cccb45d7c4295 ("sched/deadline: Less agressive dl_server handling") Reported-by: John Stultz Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Peter Zijlstra (Intel) Tested-by: John Stultz --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index f89313b150e6..e4ce0a76831e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -706,7 +706,6 @@ struct sched_dl_entity { unsigned int dl_defer : 1; unsigned int dl_defer_armed : 1; unsigned int dl_defer_running : 1; - unsigned int dl_server_idle : 1; /* * Bandwidth enforcement timer. Each -deadline task has its -- cgit v1.2.3 From 88a90315a99a9120cd471bf681515cc77cd7cdb8 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Wed, 17 Sep 2025 14:09:14 +0800 Subject: rcu: Replace preempt.h with sched.h in include/linux/rcupdate.h In the next commit, we will move the definition of migrate_enable() and migrate_disable() to linux/sched.h. However, migrate_enable/migrate_disable will be used in commit 1b93c03fb319 ("rcu: add rcu_read_lock_dont_migrate()") in bpf-next tree. In order to fix potential compiling error, replace linux/preempt.h with linux/sched.h in include/linux/rcupdate.h. Signed-off-by: Menglong Dong Signed-off-by: Peter Zijlstra (Intel) --- include/linux/rcupdate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 120536f4c6eb..8f346c847ee5 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include -- cgit v1.2.3 From 378b7708194fff77c9020392067329931c3fcc04 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Wed, 17 Sep 2025 14:09:15 +0800 Subject: sched: Make migrate_{en,dis}able() inline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For now, migrate_enable and migrate_disable are global, which makes them become hotspots in some case. Take BPF for example, the function calling to migrate_enable and migrate_disable in BPF trampoline can introduce significant overhead, and following is the 'perf top' of FENTRY's benchmark (./tools/testing/selftests/bpf/bench trig-fentry): 54.63% bpf_prog_2dcccf652aac1793_bench_trigger_fentry [k] bpf_prog_2dcccf652aac1793_bench_trigger_fentry 10.43% [kernel] [k] migrate_enable 10.07% bpf_trampoline_6442517037 [k] bpf_trampoline_6442517037 8.06% [kernel] [k] __bpf_prog_exit_recur 4.11% libc.so.6 [.] syscall 2.15% [kernel] [k] entry_SYSCALL_64 1.48% [kernel] [k] memchr_inv 1.32% [kernel] [k] fput 1.16% [kernel] [k] _copy_to_user 0.73% [kernel] [k] bpf_prog_test_run_raw_tp So in this commit, we make migrate_enable/migrate_disable inline to obtain better performance. The struct rq is defined internally in kernel/sched/sched.h, and the field "nr_pinned" is accessed in migrate_enable/migrate_disable, which makes it hard to make them inline. Alexei Starovoitov suggests to generate the offset of "nr_pinned" in [1], so we can define the migrate_enable/migrate_disable in include/linux/sched.h and access "this_rq()->nr_pinned" with "(void *)this_rq() + RQ_nr_pinned". The offset of "nr_pinned" is generated in include/generated/rq-offsets.h by kernel/sched/rq-offsets.c. Generally speaking, we move the definition of migrate_enable and migrate_disable to include/linux/sched.h from kernel/sched/core.c. The calling to __set_cpus_allowed_ptr() is leaved in ___migrate_enable(). The "struct rq" is not available in include/linux/sched.h, so we can't access the "runqueues" with this_cpu_ptr(), as the compilation will fail in this_cpu_ptr() -> raw_cpu_ptr() -> __verify_pcpu_ptr(): typeof((ptr) + 0) So we introduce the this_rq_raw() and access the runqueues with arch_raw_cpu_ptr/PERCPU_PTR directly. The variable "runqueues" is not visible in the kernel modules, and export it is not a good idea. As Peter Zijlstra advised in [2], we define and export migrate_enable/migrate_disable in kernel/sched/core.c too, and use them for the modules. Before this patch, the performance of BPF FENTRY is: fentry : 113.030 ± 0.149M/s fentry : 112.501 ± 0.187M/s fentry : 112.828 ± 0.267M/s fentry : 115.287 ± 0.241M/s After this patch, the performance of BPF FENTRY increases to: fentry : 143.644 ± 0.670M/s fentry : 149.764 ± 0.362M/s fentry : 149.642 ± 0.156M/s fentry : 145.263 ± 0.221M/s Signed-off-by: Menglong Dong Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/bpf/CAADnVQ+5sEDKHdsJY5ZsfGDO_1SEhhQWHrt2SMBG5SYyQ+jt7w@mail.gmail.com/ [1] Link: https://lore.kernel.org/all/20250819123214.GH4067720@noisy.programming.kicks-ass.net/ [2] --- include/linux/preempt.h | 3 -- include/linux/sched.h | 113 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 1fad1c8a4c76..92237c319035 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -424,8 +424,6 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier, * work-conserving schedulers. * */ -extern void migrate_disable(void); -extern void migrate_enable(void); /** * preempt_disable_nested - Disable preemption inside a normally preempt disabled section @@ -471,7 +469,6 @@ static __always_inline void preempt_enable_nested(void) DEFINE_LOCK_GUARD_0(preempt, preempt_disable(), preempt_enable()) DEFINE_LOCK_GUARD_0(preempt_notrace, preempt_disable_notrace(), preempt_enable_notrace()) -DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable()) #ifdef CONFIG_PREEMPT_DYNAMIC diff --git a/include/linux/sched.h b/include/linux/sched.h index 644a01bdae70..d60ecaccdffc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -49,6 +49,9 @@ #include #include #include +#ifndef COMPILE_OFFSETS +#include +#endif /* task_struct member predeclarations (sorted alphabetically): */ struct audit_context; @@ -2317,4 +2320,114 @@ static __always_inline void alloc_tag_restore(struct alloc_tag *tag, struct allo #define alloc_tag_restore(_tag, _old) do {} while (0) #endif +#ifndef MODULE +#ifndef COMPILE_OFFSETS + +extern void ___migrate_enable(void); + +struct rq; +DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); + +/* + * The "struct rq" is not available here, so we can't access the + * "runqueues" with this_cpu_ptr(), as the compilation will fail in + * this_cpu_ptr() -> raw_cpu_ptr() -> __verify_pcpu_ptr(): + * typeof((ptr) + 0) + * + * So use arch_raw_cpu_ptr()/PERCPU_PTR() directly here. + */ +#ifdef CONFIG_SMP +#define this_rq_raw() arch_raw_cpu_ptr(&runqueues) +#else +#define this_rq_raw() PERCPU_PTR(&runqueues) +#endif +#define this_rq_pinned() (*(unsigned int *)((void *)this_rq_raw() + RQ_nr_pinned)) + +static inline void __migrate_enable(void) +{ + struct task_struct *p = current; + +#ifdef CONFIG_DEBUG_PREEMPT + /* + * Check both overflow from migrate_disable() and superfluous + * migrate_enable(). + */ + if (WARN_ON_ONCE((s16)p->migration_disabled <= 0)) + return; +#endif + + if (p->migration_disabled > 1) { + p->migration_disabled--; + return; + } + + /* + * Ensure stop_task runs either before or after this, and that + * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule(). + */ + guard(preempt)(); + if (unlikely(p->cpus_ptr != &p->cpus_mask)) + ___migrate_enable(); + /* + * Mustn't clear migration_disabled() until cpus_ptr points back at the + * regular cpus_mask, otherwise things that race (eg. + * select_fallback_rq) get confused. + */ + barrier(); + p->migration_disabled = 0; + this_rq_pinned()--; +} + +static inline void __migrate_disable(void) +{ + struct task_struct *p = current; + + if (p->migration_disabled) { +#ifdef CONFIG_DEBUG_PREEMPT + /* + *Warn about overflow half-way through the range. + */ + WARN_ON_ONCE((s16)p->migration_disabled < 0); +#endif + p->migration_disabled++; + return; + } + + guard(preempt)(); + this_rq_pinned()++; + p->migration_disabled = 1; +} +#else /* !COMPILE_OFFSETS */ +static inline void __migrate_disable(void) { } +static inline void __migrate_enable(void) { } +#endif /* !COMPILE_OFFSETS */ + +/* + * So that it is possible to not export the runqueues variable, define and + * export migrate_enable/migrate_disable in kernel/sched/core.c too, and use + * them for the modules. The macro "INSTANTIATE_EXPORTED_MIGRATE_DISABLE" will + * be defined in kernel/sched/core.c. + */ +#ifndef INSTANTIATE_EXPORTED_MIGRATE_DISABLE +static inline void migrate_disable(void) +{ + __migrate_disable(); +} + +static inline void migrate_enable(void) +{ + __migrate_enable(); +} +#else /* INSTANTIATE_EXPORTED_MIGRATE_DISABLE */ +extern void migrate_disable(void); +extern void migrate_enable(void); +#endif /* INSTANTIATE_EXPORTED_MIGRATE_DISABLE */ + +#else /* MODULE */ +extern void migrate_disable(void); +extern void migrate_enable(void); +#endif /* MODULE */ + +DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable()) + #endif -- cgit v1.2.3 From 45b7f780739a3145aeef24d2dfa02517a6c82ed6 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Wed, 17 Sep 2025 14:09:16 +0800 Subject: sched: Fix some typos in include/linux/preempt.h There are some typos in the comments of migrate in include/linux/preempt.h: elegible -> eligible it's -> its migirate_disable -> migrate_disable abritrary -> arbitrary Just fix them. Signed-off-by: Menglong Dong Signed-off-by: Peter Zijlstra (Intel) --- include/linux/preempt.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 92237c319035..102202185d7a 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -372,7 +372,7 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier, /* * Migrate-Disable and why it is undesired. * - * When a preempted task becomes elegible to run under the ideal model (IOW it + * When a preempted task becomes eligible to run under the ideal model (IOW it * becomes one of the M highest priority tasks), it might still have to wait * for the preemptee's migrate_disable() section to complete. Thereby suffering * a reduction in bandwidth in the exact duration of the migrate_disable() @@ -387,7 +387,7 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier, * - a lower priority tasks; which under preempt_disable() could've instantly * migrated away when another CPU becomes available, is now constrained * by the ability to push the higher priority task away, which might itself be - * in a migrate_disable() section, reducing it's available bandwidth. + * in a migrate_disable() section, reducing its available bandwidth. * * IOW it trades latency / moves the interference term, but it stays in the * system, and as long as it remains unbounded, the system is not fully @@ -399,7 +399,7 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier, * PREEMPT_RT breaks a number of assumptions traditionally held. By forcing a * number of primitives into becoming preemptible, they would also allow * migration. This turns out to break a bunch of per-cpu usage. To this end, - * all these primitives employ migirate_disable() to restore this implicit + * all these primitives employ migrate_disable() to restore this implicit * assumption. * * This is a 'temporary' work-around at best. The correct solution is getting @@ -407,7 +407,7 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier, * per-cpu locking or short preempt-disable regions. * * The end goal must be to get rid of migrate_disable(), alternatively we need - * a schedulability theory that does not depend on abritrary migration. + * a schedulability theory that does not depend on arbitrary migration. * * * Notes on the implementation. -- cgit v1.2.3 From 25c550464acd40803d63868dfa4a42506df48b88 Mon Sep 17 00:00:00 2001 From: Richard Gobert Date: Tue, 23 Sep 2025 10:59:04 +0200 Subject: net: gro: remove is_ipv6 from napi_gro_cb Remove is_ipv6 from napi_gro_cb and use sk->sk_family instead. This frees up space for another ip_fixedid bit that will be added in the next commit. udp_sock_create always creates either a AF_INET or a AF_INET6 socket, so using sk->sk_family is reliable. In IPv6-FOU, cfg->ipv6_v6only is always enabled. Signed-off-by: Richard Gobert Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250923085908.4687-2-richardbgobert@gmail.com Signed-off-by: Paolo Abeni --- include/net/gro.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/net/gro.h b/include/net/gro.h index a0fca7ac6e7e..87c68007f949 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -71,9 +71,6 @@ struct napi_gro_cb { /* Free the skb? */ u8 free:2; - /* Used in foo-over-udp, set in udp[46]_gro_receive */ - u8 is_ipv6:1; - /* Used in GRE, set in fou/gue_gro_receive */ u8 is_fou:1; -- cgit v1.2.3 From 21f7484220ace6c355cb0023d14d83da6fe5843d Mon Sep 17 00:00:00 2001 From: Richard Gobert Date: Tue, 23 Sep 2025 10:59:05 +0200 Subject: net: gro: only merge packets with incrementing or fixed outer ids Only merge encapsulated packets if their outer IDs are either incrementing or fixed, just like for inner IDs and IDs of non-encapsulated packets. Add another ip_fixedid bit for a total of two bits: one for outer IDs (and for unencapsulated packets) and one for inner IDs. This commit preserves the current behavior of GSO where only the IDs of the inner-most headers are restored correctly. Signed-off-by: Richard Gobert Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250923085908.4687-3-richardbgobert@gmail.com Signed-off-by: Paolo Abeni --- include/net/gro.h | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/net/gro.h b/include/net/gro.h index 87c68007f949..e7997a9fb30b 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -75,7 +75,7 @@ struct napi_gro_cb { u8 is_fou:1; /* Used to determine if ipid_offset can be ignored */ - u8 ip_fixedid:1; + u8 ip_fixedid:2; /* Number of gro_receive callbacks this packet already went through */ u8 recursion_counter:4; @@ -442,29 +442,26 @@ static inline __wsum ip6_gro_compute_pseudo(const struct sk_buff *skb, } static inline int inet_gro_flush(const struct iphdr *iph, const struct iphdr *iph2, - struct sk_buff *p, bool outer) + struct sk_buff *p, bool inner) { const u32 id = ntohl(*(__be32 *)&iph->id); const u32 id2 = ntohl(*(__be32 *)&iph2->id); const u16 ipid_offset = (id >> 16) - (id2 >> 16); const u16 count = NAPI_GRO_CB(p)->count; const u32 df = id & IP_DF; - int flush; /* All fields must match except length and checksum. */ - flush = (iph->ttl ^ iph2->ttl) | (iph->tos ^ iph2->tos) | (df ^ (id2 & IP_DF)); - - if (flush | (outer && df)) - return flush; + if ((iph->ttl ^ iph2->ttl) | (iph->tos ^ iph2->tos) | (df ^ (id2 & IP_DF))) + return true; /* When we receive our second frame we can make a decision on if we * continue this flow as an atomic flow with a fixed ID or if we use * an incrementing ID. */ if (count == 1 && df && !ipid_offset) - NAPI_GRO_CB(p)->ip_fixedid = true; + NAPI_GRO_CB(p)->ip_fixedid |= 1 << inner; - return ipid_offset ^ (count * !NAPI_GRO_CB(p)->ip_fixedid); + return ipid_offset ^ (count * !(NAPI_GRO_CB(p)->ip_fixedid & (1 << inner))); } static inline int ipv6_gro_flush(const struct ipv6hdr *iph, const struct ipv6hdr *iph2) @@ -479,7 +476,7 @@ static inline int ipv6_gro_flush(const struct ipv6hdr *iph, const struct ipv6hdr static inline int __gro_receive_network_flush(const void *th, const void *th2, struct sk_buff *p, const u16 diff, - bool outer) + bool inner) { const void *nh = th - diff; const void *nh2 = th2 - diff; @@ -487,19 +484,18 @@ static inline int __gro_receive_network_flush(const void *th, const void *th2, if (((struct iphdr *)nh)->version == 6) return ipv6_gro_flush(nh, nh2); else - return inet_gro_flush(nh, nh2, p, outer); + return inet_gro_flush(nh, nh2, p, inner); } static inline int gro_receive_network_flush(const void *th, const void *th2, struct sk_buff *p) { - const bool encap_mark = NAPI_GRO_CB(p)->encap_mark; int off = skb_transport_offset(p); int flush; - flush = __gro_receive_network_flush(th, th2, p, off - NAPI_GRO_CB(p)->network_offset, encap_mark); - if (encap_mark) - flush |= __gro_receive_network_flush(th, th2, p, off - NAPI_GRO_CB(p)->inner_network_offset, false); + flush = __gro_receive_network_flush(th, th2, p, off - NAPI_GRO_CB(p)->network_offset, false); + if (NAPI_GRO_CB(p)->encap_mark) + flush |= __gro_receive_network_flush(th, th2, p, off - NAPI_GRO_CB(p)->inner_network_offset, true); return flush; } -- cgit v1.2.3 From 3271f19bf7b9df665549666d789b9f126b4420c7 Mon Sep 17 00:00:00 2001 From: Richard Gobert Date: Tue, 23 Sep 2025 10:59:06 +0200 Subject: net: gso: restore ids of outer ip headers correctly Currently, NETIF_F_TSO_MANGLEID indicates that the inner-most ID can be mangled. Outer IDs can always be mangled. Make GSO preserve outer IDs by default, with NETIF_F_TSO_MANGLEID allowing both inner and outer IDs to be mangled. This commit also modifies a few drivers that use SKB_GSO_FIXEDID directly. Signed-off-by: Richard Gobert Reviewed-by: Edward Cree # for sfc Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250923085908.4687-4-richardbgobert@gmail.com Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 9 +++++++-- include/linux/skbuff.h | 8 +++++++- 2 files changed, 14 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1c54d44805fa..1b85454116f6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -5320,13 +5320,18 @@ void skb_warn_bad_offload(const struct sk_buff *skb); static inline bool net_gso_ok(netdev_features_t features, int gso_type) { - netdev_features_t feature = (netdev_features_t)gso_type << NETIF_F_GSO_SHIFT; + netdev_features_t feature; + + if (gso_type & (SKB_GSO_TCP_FIXEDID | SKB_GSO_TCP_FIXEDID_INNER)) + gso_type |= __SKB_GSO_TCP_FIXEDID; + + feature = ((netdev_features_t)gso_type << NETIF_F_GSO_SHIFT) & NETIF_F_GSO_MASK; /* check flags correspondence */ BUILD_BUG_ON(SKB_GSO_TCPV4 != (NETIF_F_TSO >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_DODGY != (NETIF_F_GSO_ROBUST >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TCP_ECN != (NETIF_F_TSO_ECN >> NETIF_F_GSO_SHIFT)); - BUILD_BUG_ON(SKB_GSO_TCP_FIXEDID != (NETIF_F_TSO_MANGLEID >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(__SKB_GSO_TCP_FIXEDID != (NETIF_F_TSO_MANGLEID >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TCPV6 != (NETIF_F_TSO6 >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_FCOE != (NETIF_F_FSO >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_GRE != (NETIF_F_GSO_GRE >> NETIF_F_GSO_SHIFT)); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 78ecfa7d00d0..fb3fec9affaa 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -674,7 +674,7 @@ enum { /* This indicates the tcp segment has CWR set. */ SKB_GSO_TCP_ECN = 1 << 2, - SKB_GSO_TCP_FIXEDID = 1 << 3, + __SKB_GSO_TCP_FIXEDID = 1 << 3, SKB_GSO_TCPV6 = 1 << 4, @@ -707,6 +707,12 @@ enum { SKB_GSO_FRAGLIST = 1 << 18, SKB_GSO_TCP_ACCECN = 1 << 19, + + /* These indirectly map onto the same netdev feature. + * If NETIF_F_TSO_MANGLEID is set it may mangle both inner and outer IDs. + */ + SKB_GSO_TCP_FIXEDID = 1 << 30, + SKB_GSO_TCP_FIXEDID_INNER = 1 << 31, }; #if BITS_PER_LONG > 32 -- cgit v1.2.3 From f095a358faf263bf1d8ae712bd38e13b71286819 Mon Sep 17 00:00:00 2001 From: Richard Gobert Date: Tue, 23 Sep 2025 10:59:07 +0200 Subject: net: gro: remove unnecessary df checks Currently, packets with fixed IDs will be merged only if their don't-fragment bit is set. This restriction is unnecessary since packets without the don't-fragment bit will be forwarded as-is even if they were merged together. The merged packets will be segmented into their original forms before being forwarded, either by GSO or by TSO. The IDs will also remain identical unless NETIF_F_TSO_MANGLEID is set, in which case the IDs can become incrementing, which is also fine. Clean up the code by removing the unnecessary don't-fragment checks. Signed-off-by: Richard Gobert Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250923085908.4687-5-richardbgobert@gmail.com Signed-off-by: Paolo Abeni --- include/net/gro.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/gro.h b/include/net/gro.h index e7997a9fb30b..e3affb2e2ca8 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -448,17 +448,16 @@ static inline int inet_gro_flush(const struct iphdr *iph, const struct iphdr *ip const u32 id2 = ntohl(*(__be32 *)&iph2->id); const u16 ipid_offset = (id >> 16) - (id2 >> 16); const u16 count = NAPI_GRO_CB(p)->count; - const u32 df = id & IP_DF; /* All fields must match except length and checksum. */ - if ((iph->ttl ^ iph2->ttl) | (iph->tos ^ iph2->tos) | (df ^ (id2 & IP_DF))) + if ((iph->ttl ^ iph2->ttl) | (iph->tos ^ iph2->tos) | ((id ^ id2) & IP_DF)) return true; /* When we receive our second frame we can make a decision on if we * continue this flow as an atomic flow with a fixed ID or if we use * an incrementing ID. */ - if (count == 1 && df && !ipid_offset) + if (count == 1 && !ipid_offset) NAPI_GRO_CB(p)->ip_fixedid |= 1 << inner; return ipid_offset ^ (count * !(NAPI_GRO_CB(p)->ip_fixedid & (1 << inner))); -- cgit v1.2.3 From 60ac65a31041b0e5dfd736a79027314b9d533ef5 Mon Sep 17 00:00:00 2001 From: Sung-Chi Li Date: Thu, 11 Sep 2025 06:56:34 +0000 Subject: platform/chrome: update pwm fan control host commands MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update cros_ec_commands.h to include definitions for getting PWM fan duty, getting and setting the fan control mode. Signed-off-by: Sung-Chi Li Acked-by: Tzung-Bi Shih Reviewed-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20250911-cros_ec_fan-v6-1-a1446cc098af@google.com Signed-off-by: Guenter Roeck --- include/linux/platform_data/cros_ec_commands.h | 29 +++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index c19b404e3d8d..69294f79cc88 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -1825,6 +1825,16 @@ struct ec_response_pwm_get_duty { uint16_t duty; /* Duty cycle, EC_PWM_MAX_DUTY = 100% */ } __ec_align2; +#define EC_CMD_PWM_GET_FAN_DUTY 0x0027 + +struct ec_params_pwm_get_fan_duty { + uint8_t fan_idx; +} __ec_align1; + +struct ec_response_pwm_get_fan_duty { + uint32_t percent; /* Percentage of duty cycle, ranging from 0 ~ 100 */ +} __ec_align4; + /*****************************************************************************/ /* * Lightbar commands. This looks worse than it is. Since we only use one HOST @@ -3127,14 +3137,31 @@ struct ec_params_thermal_set_threshold_v1 { /****************************************************************************/ -/* Toggle automatic fan control */ +/* Set or get fan control mode */ #define EC_CMD_THERMAL_AUTO_FAN_CTRL 0x0052 +enum ec_auto_fan_ctrl_cmd { + EC_AUTO_FAN_CONTROL_CMD_SET = 0, + EC_AUTO_FAN_CONTROL_CMD_GET, +}; + /* Version 1 of input params */ struct ec_params_auto_fan_ctrl_v1 { uint8_t fan_idx; } __ec_align1; +/* Version 2 of input params */ +struct ec_params_auto_fan_ctrl_v2 { + uint8_t fan_idx; + uint8_t cmd; /* enum ec_auto_fan_ctrl_cmd */ + uint8_t set_auto; /* only used with EC_AUTO_FAN_CONTROL_CMD_SET - bool + */ +} __ec_align4; + +struct ec_response_auto_fan_control { + uint8_t is_auto; /* bool */ +} __ec_align1; + /* Get/Set TMP006 calibration data */ #define EC_CMD_TMP006_GET_CALIBRATION 0x0053 #define EC_CMD_TMP006_SET_CALIBRATION 0x0054 -- cgit v1.2.3 From 5ba9f520f41a33c99fd5d1eb81b5650ed3517b88 Mon Sep 17 00:00:00 2001 From: Rahul Pathak Date: Mon, 18 Aug 2025 09:39:06 +0530 Subject: clk: Add clock driver for the RISC-V RPMI clock service group The RPMI specification defines a clock service group which can be accessed via SBI MPXY extension or dedicated S-mode RPMI transport. Add mailbox client based clock driver for the RISC-V RPMI clock service group. Reviewed-by: Stephen Boyd Reviewed-by: Andy Shevchenko Co-developed-by: Anup Patel Signed-off-by: Anup Patel Signed-off-by: Rahul Pathak Link: https://lore.kernel.org/r/20250818040920.272664-11-apatel@ventanamicro.com [pjw@kernel.org: converted rpmi_clkrate_u64 macro to a function; replaced bare constant with a macro] Signed-off-by: Paul Walmsley --- include/linux/mailbox/riscv-rpmi-message.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/mailbox/riscv-rpmi-message.h b/include/linux/mailbox/riscv-rpmi-message.h index c3a98fc12c0a..8176d33747fe 100644 --- a/include/linux/mailbox/riscv-rpmi-message.h +++ b/include/linux/mailbox/riscv-rpmi-message.h @@ -90,6 +90,22 @@ static inline int rpmi_to_linux_error(int rpmi_error) } } +/* RPMI service group IDs */ +#define RPMI_SRVGRP_CLOCK 0x00008 + +/* RPMI clock service IDs */ +enum rpmi_clock_service_id { + RPMI_CLK_SRV_ENABLE_NOTIFICATION = 0x01, + RPMI_CLK_SRV_GET_NUM_CLOCKS = 0x02, + RPMI_CLK_SRV_GET_ATTRIBUTES = 0x03, + RPMI_CLK_SRV_GET_SUPPORTED_RATES = 0x04, + RPMI_CLK_SRV_SET_CONFIG = 0x05, + RPMI_CLK_SRV_GET_CONFIG = 0x06, + RPMI_CLK_SRV_SET_RATE = 0x07, + RPMI_CLK_SRV_GET_RATE = 0x08, + RPMI_CLK_SRV_ID_MAX_COUNT +}; + /* RPMI Linux mailbox attribute IDs */ enum rpmi_mbox_attribute_id { RPMI_MBOX_ATTR_SPEC_VERSION, -- cgit v1.2.3 From 495c8d35035edb66e3284113bef01f3b1b843832 Mon Sep 17 00:00:00 2001 From: "Mario Limonciello (AMD)" Date: Thu, 25 Sep 2025 13:51:07 -0500 Subject: PM: hibernate: Add pm_hibernation_mode_is_suspend() Some drivers have different flows for hibernation and suspend. If the driver opportunistically will skip thaw() then it needs a hint to know what is happening after the hibernate. Introduce a new symbol pm_hibernation_mode_is_suspend() that drivers can call to determine if suspending the system for this purpose. Tested-by: Ionut Nechita Tested-by: Kenneth Crudup Acked-by: Alex Deucher Signed-off-by: Mario Limonciello (AMD) Signed-off-by: Rafael J. Wysocki --- include/linux/suspend.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 317ae31e89b3..0664c685f0b2 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -276,6 +276,7 @@ extern void arch_suspend_enable_irqs(void); extern int pm_suspend(suspend_state_t state); extern bool sync_on_suspend_enabled; +bool pm_hibernation_mode_is_suspend(void); #else /* !CONFIG_SUSPEND */ #define suspend_valid_only_mem NULL @@ -288,6 +289,7 @@ static inline bool pm_suspend_via_firmware(void) { return false; } static inline bool pm_resume_via_firmware(void) { return false; } static inline bool pm_suspend_no_platform(void) { return false; } static inline bool pm_suspend_default_s2idle(void) { return false; } +static inline bool pm_hibernation_mode_is_suspend(void) { return false; } static inline void suspend_set_ops(const struct platform_suspend_ops *ops) {} static inline int pm_suspend(suspend_state_t state) { return -ENOSYS; } -- cgit v1.2.3 From 10e1dcb62a7e874af43e7dfbef13ef8e3a2ad4a9 Mon Sep 17 00:00:00 2001 From: Svyatoslav Ryhel Date: Thu, 28 Aug 2025 08:51:02 +0300 Subject: dt-bindings: thermal: add Tegra114 soctherm header This adds header for the Tegra114 SOCTHERM device tree node. Signed-off-by: Svyatoslav Ryhel Acked-by: Conor Dooley Reviewed-by: Mikko Perttunen Link: https://lore.kernel.org/r/20250828055104.8073-5-clamor95@gmail.com Signed-off-by: Daniel Lezcano --- include/dt-bindings/thermal/tegra114-soctherm.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 include/dt-bindings/thermal/tegra114-soctherm.h (limited to 'include') diff --git a/include/dt-bindings/thermal/tegra114-soctherm.h b/include/dt-bindings/thermal/tegra114-soctherm.h new file mode 100644 index 000000000000..b766a61cd1ce --- /dev/null +++ b/include/dt-bindings/thermal/tegra114-soctherm.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * This header provides constants for binding nvidia,tegra114-soctherm. + */ + +#ifndef _DT_BINDINGS_THERMAL_TEGRA114_SOCTHERM_H +#define _DT_BINDINGS_THERMAL_TEGRA114_SOCTHERM_H + +#define TEGRA114_SOCTHERM_SENSOR_CPU 0 +#define TEGRA114_SOCTHERM_SENSOR_MEM 1 +#define TEGRA114_SOCTHERM_SENSOR_GPU 2 +#define TEGRA114_SOCTHERM_SENSOR_PLLX 3 + +#define TEGRA114_SOCTHERM_THROT_LEVEL_NONE 0 +#define TEGRA114_SOCTHERM_THROT_LEVEL_LOW 1 +#define TEGRA114_SOCTHERM_THROT_LEVEL_MED 2 +#define TEGRA114_SOCTHERM_THROT_LEVEL_HIGH 3 + +#endif -- cgit v1.2.3 From aa43953e862c031ff66e44353c88beb7a449e80d Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 18 Aug 2025 09:39:09 +0530 Subject: irqchip: Add driver for the RPMI system MSI service group The RPMI specification defines a system MSI service group which allows application processors to receive MSIs upon system events such as graceful shutdown/reboot request, CPU hotplug event, memory hotplug event, etc. Add an irqchip driver for the RISC-V RPMI system MSI service group to directly receive system MSIs in Linux kernel. Reviewed-by: Thomas Gleixner Signed-off-by: Anup Patel Link: https://lore.kernel.org/r/20250818040920.272664-14-apatel@ventanamicro.com Signed-off-by: Paul Walmsley --- include/linux/mailbox/riscv-rpmi-message.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/mailbox/riscv-rpmi-message.h b/include/linux/mailbox/riscv-rpmi-message.h index 8176d33747fe..e135c6564d0c 100644 --- a/include/linux/mailbox/riscv-rpmi-message.h +++ b/include/linux/mailbox/riscv-rpmi-message.h @@ -91,6 +91,7 @@ static inline int rpmi_to_linux_error(int rpmi_error) } /* RPMI service group IDs */ +#define RPMI_SRVGRP_SYSTEM_MSI 0x00002 #define RPMI_SRVGRP_CLOCK 0x00008 /* RPMI clock service IDs */ @@ -106,6 +107,18 @@ enum rpmi_clock_service_id { RPMI_CLK_SRV_ID_MAX_COUNT }; +/* RPMI system MSI service IDs */ +enum rpmi_sysmsi_service_id { + RPMI_SYSMSI_SRV_ENABLE_NOTIFICATION = 0x01, + RPMI_SYSMSI_SRV_GET_ATTRIBUTES = 0x02, + RPMI_SYSMSI_SRV_GET_MSI_ATTRIBUTES = 0x03, + RPMI_SYSMSI_SRV_SET_MSI_STATE = 0x04, + RPMI_SYSMSI_SRV_GET_MSI_STATE = 0x05, + RPMI_SYSMSI_SRV_SET_MSI_TARGET = 0x06, + RPMI_SYSMSI_SRV_GET_MSI_TARGET = 0x07, + RPMI_SYSMSI_SRV_ID_MAX_COUNT +}; + /* RPMI Linux mailbox attribute IDs */ enum rpmi_mbox_attribute_id { RPMI_MBOX_ATTR_SPEC_VERSION, -- cgit v1.2.3 From 1a2b423be6a89dd07d5fc27ea042be68697a6a49 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 14 Sep 2025 22:24:13 +0200 Subject: i2c: boardinfo: Annotate code used in init phase only Annotate two places in boardinfo code: - __i2c_first_dynamic_bus_num is set in init phase. Annotate it as __ro_after_init to prevent later changes. - i2c_register_board_info() is used in init phase only, so annotate it as __init, allowing to free the memory after init phase. This is safe, see comment: "done in board-specific init code near arch_initcall() time" Signed-off-by: Heiner Kallweit Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 20fd41b51d5c..11a19241e360 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -499,7 +499,7 @@ static inline struct i2c_client *i2c_verify_client(struct device *dev) * Modules for add-on boards must use other calls. */ #ifdef CONFIG_I2C_BOARDINFO -int +int __init i2c_register_board_info(int busnum, struct i2c_board_info const *info, unsigned n); #else -- cgit v1.2.3 From 14967a9c7d247841b0312c48dcf8cd29e55a4cc8 Mon Sep 17 00:00:00 2001 From: Jane Chu Date: Mon, 15 Sep 2025 18:45:20 -0600 Subject: mm/hugetlb: fix copy_hugetlb_page_range() to use ->pt_share_count commit 59d9094df3d79 ("mm: hugetlb: independent PMD page table shared count") introduced ->pt_share_count dedicated to hugetlb PMD share count tracking, but omitted fixing copy_hugetlb_page_range(), leaving the function relying on page_count() for tracking that no longer works. When lazy page table copy for hugetlb is disabled, that is, revert commit bcd51a3c679d ("hugetlb: lazy page table copies in fork()") fork()'ing with hugetlb PMD sharing quickly lockup - [ 239.446559] watchdog: BUG: soft lockup - CPU#75 stuck for 27s! [ 239.446611] RIP: 0010:native_queued_spin_lock_slowpath+0x7e/0x2e0 [ 239.446631] Call Trace: [ 239.446633] [ 239.446636] _raw_spin_lock+0x3f/0x60 [ 239.446639] copy_hugetlb_page_range+0x258/0xb50 [ 239.446645] copy_page_range+0x22b/0x2c0 [ 239.446651] dup_mmap+0x3e2/0x770 [ 239.446654] dup_mm.constprop.0+0x5e/0x230 [ 239.446657] copy_process+0xd17/0x1760 [ 239.446660] kernel_clone+0xc0/0x3e0 [ 239.446661] __do_sys_clone+0x65/0xa0 [ 239.446664] do_syscall_64+0x82/0x930 [ 239.446668] ? count_memcg_events+0xd2/0x190 [ 239.446671] ? syscall_trace_enter+0x14e/0x1f0 [ 239.446676] ? syscall_exit_work+0x118/0x150 [ 239.446677] ? arch_exit_to_user_mode_prepare.constprop.0+0x9/0xb0 [ 239.446681] ? clear_bhb_loop+0x30/0x80 [ 239.446684] ? clear_bhb_loop+0x30/0x80 [ 239.446686] entry_SYSCALL_64_after_hwframe+0x76/0x7e There are two options to resolve the potential latent issue: 1. warn against PMD sharing in copy_hugetlb_page_range(), 2. fix it. This patch opts for the second option. While at it, simplify the comment, the details are not actually relevant anymore. Link: https://lkml.kernel.org/r/20250916004520.1604530-1-jane.chu@oracle.com Fixes: 59d9094df3d7 ("mm: hugetlb: independent PMD page table shared count") Signed-off-by: Jane Chu Reviewed-by: Harry Yoo Acked-by: Oscar Salvador Acked-by: David Hildenbrand Cc: Jann Horn Cc: Liu Shixin Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 08bc2442db93..a643fae8a349 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -631,6 +631,11 @@ static inline int ptdesc_pmd_pts_count(struct ptdesc *ptdesc) { return atomic_read(&ptdesc->pt_share_count); } + +static inline bool ptdesc_pmd_is_shared(struct ptdesc *ptdesc) +{ + return !!ptdesc_pmd_pts_count(ptdesc); +} #else static inline void ptdesc_pmd_pts_init(struct ptdesc *ptdesc) { -- cgit v1.2.3 From 7e89979f6695fb56e8739b7d19614256e637131d Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sat, 13 Sep 2025 17:03:39 -0700 Subject: include/linux/pgtable.h: convert arch_enter_lazy_mmu_mode() and friends to static inlines commit c519c3c0a113 ("mm/kasan: avoid lazy MMU mode hazards") introduced the use of arch_enter_lazy_mmu_mode(), which results in the compiler complaining about "statement has no effect", when __HAVE_ARCH_LAZY_MMU_MODE is not defined in include/linux/pgtable.h The exact warning/error is: In file included from ./include/linux/kasan.h:37, from mm/kasan/shadow.c:14: mm/kasan/shadow.c: In function kasan_populate_vmalloc_pte: ./include/linux/pgtable.h:247:41: error: statement with no effect [-Werror=unused-value] 247 | #define arch_enter_lazy_mmu_mode() (LAZY_MMU_DEFAULT) | ^ mm/kasan/shadow.c:322:9: note: in expansion of macro arch_enter_lazy_mmu_mode> 322 | arch_enter_lazy_mmu_mode(); | ^~~~~~~~~~~~~~~~~~~~~~~~ switching these "functions" to static inlines fixes this up. Fixes: c519c3c0a113 ("mm/kasan: avoid lazy MMU mode hazards") Reported-by: Balbir Singh Closes: https://lkml.kernel.org/r/20250912235515.367061-1-balbirs@nvidia.com Cc: Alexander Gordeev Cc: Andrey Ryabinin Signed-off-by: Andrew Morton --- include/linux/pgtable.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 2b80fd456c8b..25a7257052ff 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -232,9 +232,9 @@ static inline int pmd_dirty(pmd_t pmd) * and the mode cannot be used in interrupt context. */ #ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE -#define arch_enter_lazy_mmu_mode() do {} while (0) -#define arch_leave_lazy_mmu_mode() do {} while (0) -#define arch_flush_lazy_mmu_mode() do {} while (0) +static inline void arch_enter_lazy_mmu_mode(void) {} +static inline void arch_leave_lazy_mmu_mode(void) {} +static inline void arch_flush_lazy_mmu_mode(void) {} #endif #ifndef pte_batch_hint -- cgit v1.2.3 From 17f0d1f6321caa95699b8f96baf12e654d7b8d60 Mon Sep 17 00:00:00 2001 From: Tao Chen Date: Fri, 26 Sep 2025 01:50:28 +0800 Subject: bpf: Add lookup_and_delete_elem for BPF_MAP_STACK_TRACE The stacktrace map can be easily full, which will lead to failure in obtaining the stack. In addition to increasing the size of the map, another solution is to delete the stack_id after looking it up from the user, so extend the existing bpf_map_lookup_and_delete_elem() functionality to stacktrace map types. Signed-off-by: Tao Chen Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250925175030.1615837-1-chen.dylane@linux.dev --- include/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index ea2ed6771cc6..6338e54a9b1f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2724,7 +2724,7 @@ int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, u64 flags); -int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value); +int bpf_stackmap_extract(struct bpf_map *map, void *key, void *value, bool delete); int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, void *key, void *value, u64 map_flags); -- cgit v1.2.3 From 212b0f07cf021575ec25e0b2336df77c7a4d2e68 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 3 Sep 2025 14:59:43 +0200 Subject: locking/local_lock: Expose dep_map in local_trylock_t. lockdep_is_held() macro assumes that "struct lockdep_map dep_map;" is a top level field of any lock that participates in LOCKDEP. Make it so for local_trylock_t. Reviewed-by: Sebastian Andrzej Siewior Signed-off-by: Alexei Starovoitov Reviewed-by: Harry Yoo Reviewed-by: Suren Baghdasaryan Signed-off-by: Vlastimil Babka --- include/linux/local_lock_internal.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h index d80b5306a2c0..949de37700db 100644 --- a/include/linux/local_lock_internal.h +++ b/include/linux/local_lock_internal.h @@ -17,7 +17,10 @@ typedef struct { /* local_trylock() and local_trylock_irqsave() only work with local_trylock_t */ typedef struct { - local_lock_t llock; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; + struct task_struct *owner; +#endif u8 acquired; } local_trylock_t; @@ -31,7 +34,7 @@ typedef struct { .owner = NULL, # define LOCAL_TRYLOCK_DEBUG_INIT(lockname) \ - .llock = { LOCAL_LOCK_DEBUG_INIT((lockname).llock) }, + LOCAL_LOCK_DEBUG_INIT(lockname) static inline void local_lock_acquire(local_lock_t *l) { @@ -81,7 +84,7 @@ do { \ local_lock_debug_init(lock); \ } while (0) -#define __local_trylock_init(lock) __local_lock_init(lock.llock) +#define __local_trylock_init(lock) __local_lock_init((local_lock_t *)lock) #define __spinlock_nested_bh_init(lock) \ do { \ -- cgit v1.2.3 From 2d517aa09bbc4203f10cdee7e1d42f3bbdc1b1cd Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 3 Sep 2025 14:59:45 +0200 Subject: slab: add opt-in caching layer of percpu sheaves Specifying a non-zero value for a new struct kmem_cache_args field sheaf_capacity will setup a caching layer of percpu arrays called sheaves of given capacity for the created cache. Allocations from the cache will allocate via the percpu sheaves (main or spare) as long as they have no NUMA node preference. Frees will also put the object back into one of the sheaves. When both percpu sheaves are found empty during an allocation, an empty sheaf may be replaced with a full one from the per-node barn. If none are available and the allocation is allowed to block, an empty sheaf is refilled from slab(s) by an internal bulk alloc operation. When both percpu sheaves are full during freeing, the barn can replace a full one with an empty one, unless over a full sheaves limit. In that case a sheaf is flushed to slab(s) by an internal bulk free operation. Flushing sheaves and barns is also wired to the existing cpu flushing and cache shrinking operations. The sheaves do not distinguish NUMA locality of the cached objects. If an allocation is requested with kmem_cache_alloc_node() (or a mempolicy with strict_numa mode enabled) with a specific node (not NUMA_NO_NODE), the sheaves are bypassed. The bulk operations exposed to slab users also try to utilize the sheaves as long as the necessary (full or empty) sheaves are available on the cpu or in the barn. Once depleted, they will fallback to bulk alloc/free to slabs directly to avoid double copying. The sheaf_capacity value is exported in sysfs for observability. Sysfs CONFIG_SLUB_STATS counters alloc_cpu_sheaf and free_cpu_sheaf count objects allocated or freed using the sheaves (and thus not counting towards the other alloc/free path counters). Counters sheaf_refill and sheaf_flush count objects filled or flushed from or to slab pages, and can be used to assess how effective the caching is. The refill and flush operations will also count towards the usual alloc_fastpath/slowpath, free_fastpath/slowpath and other counters for the backing slabs. For barn operations, barn_get and barn_put count how many full sheaves were get from or put to the barn, the _fail variants count how many such requests could not be satisfied mainly because the barn was either empty or full. While the barn also holds empty sheaves to make some operations easier, these are not as critical to mandate own counters. Finally, there are sheaf_alloc/sheaf_free counters. Access to the percpu sheaves is protected by local_trylock() when potential callers include irq context, and local_lock() otherwise (such as when we already know the gfp flags allow blocking). The trylock failures should be rare and we can easily fallback. Each per-NUMA-node barn has a spin_lock. When slub_debug is enabled for a cache with sheaf_capacity also specified, the latter is ignored so that allocations and frees reach the slow path where debugging hooks are processed. Similarly, we ignore it with CONFIG_SLUB_TINY which prefers low memory usage to performance. [boot failure: https://lore.kernel.org/all/583eacf5-c971-451a-9f76-fed0e341b815@linux.ibm.com/ ] Reported-and-tested-by: Venkat Rao Bagalkote Reviewed-by: Harry Yoo Reviewed-by: Suren Baghdasaryan Signed-off-by: Vlastimil Babka --- include/linux/slab.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index d5a8ab98035c..49acbcdc6696 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -335,6 +335,37 @@ struct kmem_cache_args { * %NULL means no constructor. */ void (*ctor)(void *); + /** + * @sheaf_capacity: Enable sheaves of given capacity for the cache. + * + * With a non-zero value, allocations from the cache go through caching + * arrays called sheaves. Each cpu has a main sheaf that's always + * present, and a spare sheaf that may be not present. When both become + * empty, there's an attempt to replace an empty sheaf with a full sheaf + * from the per-node barn. + * + * When no full sheaf is available, and gfp flags allow blocking, a + * sheaf is allocated and filled from slab(s) using bulk allocation. + * Otherwise the allocation falls back to the normal operation + * allocating a single object from a slab. + * + * Analogically when freeing and both percpu sheaves are full, the barn + * may replace it with an empty sheaf, unless it's over capacity. In + * that case a sheaf is bulk freed to slab pages. + * + * The sheaves do not enforce NUMA placement of objects, so allocations + * via kmem_cache_alloc_node() with a node specified other than + * NUMA_NO_NODE will bypass them. + * + * Bulk allocation and free operations also try to use the cpu sheaves + * and barn, but fallback to using slab pages directly. + * + * When slub_debug is enabled for the cache, the sheaf_capacity argument + * is ignored. + * + * %0 means no sheaves will be created. + */ + unsigned int sheaf_capacity; }; struct kmem_cache *__kmem_cache_create_args(const char *name, -- cgit v1.2.3 From 04a91570ac67760301e5458d65eaf1342ecca314 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 16 Sep 2025 23:22:49 -0400 Subject: ext4: implemet new ioctls to set and get superblock parameters Implement the EXT4_IOC_GET_TUNE_SB_PARAM and EXT4_IOC_SET_TUNE_SB_PARAM ioctls, which allow certains superblock parameters to be set while the file system is mounted, without needing write access to the block device. Reviewed-by: Darrick J. Wong Reviewed-by: Jan Kara Signed-off-by: Theodore Ts'o Message-ID: <20250916-tune2fs-v2-3-d594dc7486f0@mit.edu> Signed-off-by: Theodore Ts'o --- include/uapi/linux/ext4.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/ext4.h b/include/uapi/linux/ext4.h index 1c4c2dd29112..411dcc1e4a35 100644 --- a/include/uapi/linux/ext4.h +++ b/include/uapi/linux/ext4.h @@ -33,6 +33,8 @@ #define EXT4_IOC_CHECKPOINT _IOW('f', 43, __u32) #define EXT4_IOC_GETFSUUID _IOR('f', 44, struct fsuuid) #define EXT4_IOC_SETFSUUID _IOW('f', 44, struct fsuuid) +#define EXT4_IOC_GET_TUNE_SB_PARAM _IOR('f', 45, struct ext4_tune_sb_params) +#define EXT4_IOC_SET_TUNE_SB_PARAM _IOW('f', 46, struct ext4_tune_sb_params) #define EXT4_IOC_SHUTDOWN _IOR('X', 125, __u32) @@ -108,6 +110,57 @@ struct ext4_new_group_input { __u16 unused; }; +struct ext4_tune_sb_params { + __u32 set_flags; + __u32 checkinterval; + __u16 errors_behavior; + __u16 mnt_count; + __u16 max_mnt_count; + __u16 raid_stride; + __u64 last_check_time; + __u64 reserved_blocks; + __u64 blocks_count; + __u32 default_mnt_opts; + __u32 reserved_uid; + __u32 reserved_gid; + __u32 raid_stripe_width; + __u16 encoding; + __u16 encoding_flags; + __u8 def_hash_alg; + __u8 pad_1; + __u16 pad_2; + __u32 feature_compat; + __u32 feature_incompat; + __u32 feature_ro_compat; + __u32 set_feature_compat_mask; + __u32 set_feature_incompat_mask; + __u32 set_feature_ro_compat_mask; + __u32 clear_feature_compat_mask; + __u32 clear_feature_incompat_mask; + __u32 clear_feature_ro_compat_mask; + __u8 mount_opts[64]; + __u8 pad[64]; +}; + +#define EXT4_TUNE_FL_ERRORS_BEHAVIOR 0x00000001 +#define EXT4_TUNE_FL_MNT_COUNT 0x00000002 +#define EXT4_TUNE_FL_MAX_MNT_COUNT 0x00000004 +#define EXT4_TUNE_FL_CHECKINTRVAL 0x00000008 +#define EXT4_TUNE_FL_LAST_CHECK_TIME 0x00000010 +#define EXT4_TUNE_FL_RESERVED_BLOCKS 0x00000020 +#define EXT4_TUNE_FL_RESERVED_UID 0x00000040 +#define EXT4_TUNE_FL_RESERVED_GID 0x00000080 +#define EXT4_TUNE_FL_DEFAULT_MNT_OPTS 0x00000100 +#define EXT4_TUNE_FL_DEF_HASH_ALG 0x00000200 +#define EXT4_TUNE_FL_RAID_STRIDE 0x00000400 +#define EXT4_TUNE_FL_RAID_STRIPE_WIDTH 0x00000800 +#define EXT4_TUNE_FL_MOUNT_OPTS 0x00001000 +#define EXT4_TUNE_FL_FEATURES 0x00002000 +#define EXT4_TUNE_FL_EDIT_FEATURES 0x00004000 +#define EXT4_TUNE_FL_FORCE_FSCK 0x00008000 +#define EXT4_TUNE_FL_ENCODING 0x00010000 +#define EXT4_TUNE_FL_ENCODING_FLAGS 0x00020000 + /* * Returned by EXT4_IOC_GET_ES_CACHE as an additional possible flag. * It indicates that the entry in extent status cache is for a hole. -- cgit v1.2.3 From bbfe987c5a2854705393ad79813074e5eadcbde6 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 26 Sep 2025 13:10:22 +0200 Subject: PM: hibernate: Fix pm_hibernation_mode_is_suspend() build breakage Commit 495c8d35035e ("PM: hibernate: Add pm_hibernation_mode_is_suspend()") that introduced pm_hibernation_mode_is_suspend() did not define it in the case when CONFIG_HIBERNATION is unset, but CONFIG_SUSPEND is set. Subsequent commit 0a6e9e098fcc ("drm/amd: Fix hybrid sleep") made the amdgpu driver use that function which led to kernel build breakage in the case mentioned above [1]. Address this by using appropriate #ifdeffery around the definition of pm_hibernation_mode_is_suspend(). Fixes: 0a6e9e098fcc ("drm/amd: Fix hybrid sleep") Reported-by: KernelCI bot Closes: https://groups.io/g/kernelci-results/topic/regression_pm_testing/115439919 [1] Signed-off-by: Rafael J. Wysocki Reviewed-by: Mario Limonciello (AMD) --- include/linux/suspend.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 0664c685f0b2..b02876f1ae38 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -276,7 +276,6 @@ extern void arch_suspend_enable_irqs(void); extern int pm_suspend(suspend_state_t state); extern bool sync_on_suspend_enabled; -bool pm_hibernation_mode_is_suspend(void); #else /* !CONFIG_SUSPEND */ #define suspend_valid_only_mem NULL @@ -289,7 +288,6 @@ static inline bool pm_suspend_via_firmware(void) { return false; } static inline bool pm_resume_via_firmware(void) { return false; } static inline bool pm_suspend_no_platform(void) { return false; } static inline bool pm_suspend_default_s2idle(void) { return false; } -static inline bool pm_hibernation_mode_is_suspend(void) { return false; } static inline void suspend_set_ops(const struct platform_suspend_ops *ops) {} static inline int pm_suspend(suspend_state_t state) { return -ENOSYS; } @@ -420,6 +418,12 @@ static inline int hibernate_quiet_exec(int (*func)(void *data), void *data) { } #endif /* CONFIG_HIBERNATION */ +#if defined(CONFIG_HIBERNATION) && defined(CONFIG_SUSPEND) +bool pm_hibernation_mode_is_suspend(void); +#else +static inline bool pm_hibernation_mode_is_suspend(void) { return false; } +#endif + int arch_resume_nosmt(void); #ifdef CONFIG_HIBERNATION_SNAPSHOT_DEV -- cgit v1.2.3 From a036bb0e60ad2828c3498bff7465bcbb247b7436 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 25 Sep 2025 14:56:30 -0500 Subject: of: base: Add of_get_next_child_with_prefix() stub 1fcc67e3a354 ("of: base: Add for_each_child_of_node_with_prefix()") added of_get_next_child_with_prefix() but did not add a stub for the !CONFIG_OF case. Add a of_get_next_child_with_prefix() stub so users of for_each_child_of_node_with_prefix() can be built for compile testing even when !CONFIG_OF. Fixes: 1fcc67e3a354 ("of: base: Add for_each_child_of_node_with_prefix()") Signed-off-by: Bjorn Helgaas Signed-off-by: Rob Herring (Arm) --- include/linux/of.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/of.h b/include/linux/of.h index a62154aeda1b..5e2c6ed9370a 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -550,6 +550,13 @@ static inline struct device_node *of_get_next_child( return NULL; } +static inline struct device_node *of_get_next_child_with_prefix( + const struct device_node *node, struct device_node *prev, + const char *prefix) +{ + return NULL; +} + static inline struct device_node *of_get_next_available_child( const struct device_node *node, struct device_node *prev) { -- cgit v1.2.3 From 4e9510f16218802b5fc0d593d8707d4e7ebf9774 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 24 Sep 2025 01:27:07 -0400 Subject: ptr_ring: drop duplicated tail zeroing code We have some rather subtle code around zeroing tail entries, minimizing cache bouncing. Let's put it all in one place. Doing this also reduces the text size slightly, e.g. for drivers/vhost/net.o Before: text: 15,114 bytes After: text: 15,082 bytes Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Link: https://patch.msgid.link/adb9d941de4a2b619ddb2be271a9939849e70687.1758690291.git.mst@redhat.com Signed-off-by: Jakub Kicinski --- include/linux/ptr_ring.h | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 551329220e4f..a736b16859a6 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -243,6 +243,24 @@ static inline bool ptr_ring_empty_bh(struct ptr_ring *r) return ret; } +/* Zero entries from tail to specified head. + * NB: if consumer_head can be >= r->size need to fixup tail later. + */ +static inline void __ptr_ring_zero_tail(struct ptr_ring *r, int consumer_head) +{ + int head = consumer_head - 1; + + /* Zero out entries in the reverse order: this way we touch the + * cache line that producer might currently be reading the last; + * producer won't make progress and touch other cache lines + * besides the first one until we write out all entries. + */ + while (likely(head >= r->consumer_tail)) + r->queue[head--] = NULL; + + r->consumer_tail = consumer_head; +} + /* Must only be called after __ptr_ring_peek returned !NULL */ static inline void __ptr_ring_discard_one(struct ptr_ring *r) { @@ -261,8 +279,7 @@ static inline void __ptr_ring_discard_one(struct ptr_ring *r) /* Note: we must keep consumer_head valid at all times for __ptr_ring_empty * to work correctly. */ - int consumer_head = r->consumer_head; - int head = consumer_head++; + int consumer_head = r->consumer_head + 1; /* Once we have processed enough entries invalidate them in * the ring all at once so producer can reuse their space in the ring. @@ -270,16 +287,9 @@ static inline void __ptr_ring_discard_one(struct ptr_ring *r) * but helps keep the implementation simple. */ if (unlikely(consumer_head - r->consumer_tail >= r->batch || - consumer_head >= r->size)) { - /* Zero out entries in the reverse order: this way we touch the - * cache line that producer might currently be reading the last; - * producer won't make progress and touch other cache lines - * besides the first one until we write out all entries. - */ - while (likely(head >= r->consumer_tail)) - r->queue[head--] = NULL; - r->consumer_tail = consumer_head; - } + consumer_head >= r->size)) + __ptr_ring_zero_tail(r, consumer_head); + if (unlikely(consumer_head >= r->size)) { consumer_head = 0; r->consumer_tail = 0; @@ -513,7 +523,6 @@ static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n, void (*destroy)(void *)) { unsigned long flags; - int head; spin_lock_irqsave(&r->consumer_lock, flags); spin_lock(&r->producer_lock); @@ -525,17 +534,14 @@ static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n, * Clean out buffered entries (for simplicity). This way following code * can test entries for NULL and if not assume they are valid. */ - head = r->consumer_head - 1; - while (likely(head >= r->consumer_tail)) - r->queue[head--] = NULL; - r->consumer_tail = r->consumer_head; + __ptr_ring_zero_tail(r, r->consumer_head); /* * Go over entries in batch, start moving head back and copy entries. * Stop when we run into previously unconsumed entries. */ while (n) { - head = r->consumer_head - 1; + int head = r->consumer_head - 1; if (head < 0) head = r->size - 1; if (r->queue[head]) { -- cgit v1.2.3 From cc2f08129925b437bf28f7f7822f20dac083a87c Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Wed, 24 Sep 2025 12:40:33 +0000 Subject: ethtool: add FEC bins histogram report IEEE 802.3ck-2022 defines counters for FEC bins and 802.3df-2024 clarifies it a bit further. Implement reporting interface through as addition to FEC stats available in ethtool. Drivers can leave bin counter uninitialized if per-lane values are provided. In this case the core will recalculate summ for the bin. Signed-off-by: Vadim Fedorenko Reviewed-by: Aleksandr Loktionov Link: https://patch.msgid.link/20250924124037.1508846-2-vadim.fedorenko@linux.dev Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 25 ++++++++++++++++++++++++- include/uapi/linux/ethtool_netlink_generated.h | 12 ++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index c869b7f8bce8..c2d8b4ec62eb 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -492,7 +492,29 @@ struct ethtool_pause_stats { }; #define ETHTOOL_MAX_LANES 8 +/** + * IEEE 802.3ck/df defines 16 bins for FEC histogram plus one more for + * the end-of-list marker, total 17 items + */ +#define ETHTOOL_FEC_HIST_MAX 17 +/** + * struct ethtool_fec_hist_range - error bits range for FEC histogram + * statistics + * @low: low bound of the bin (inclusive) + * @high: high bound of the bin (inclusive) + */ +struct ethtool_fec_hist_range { + u16 low; + u16 high; +}; +struct ethtool_fec_hist { + struct ethtool_fec_hist_value { + u64 sum; + u64 per_lane[ETHTOOL_MAX_LANES]; + } values[ETHTOOL_FEC_HIST_MAX]; + const struct ethtool_fec_hist_range *ranges; +}; /** * struct ethtool_fec_stats - statistics for IEEE 802.3 FEC * @corrected_blocks: number of received blocks corrected by FEC @@ -1214,7 +1236,8 @@ struct ethtool_ops { int (*set_link_ksettings)(struct net_device *, const struct ethtool_link_ksettings *); void (*get_fec_stats)(struct net_device *dev, - struct ethtool_fec_stats *fec_stats); + struct ethtool_fec_stats *fec_stats, + struct ethtool_fec_hist *hist); int (*get_fecparam)(struct net_device *, struct ethtool_fecparam *); int (*set_fecparam)(struct net_device *, diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index e3b8813465d7..0e8ac0d974e2 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -561,12 +561,24 @@ enum { ETHTOOL_A_TUNNEL_INFO_MAX = (__ETHTOOL_A_TUNNEL_INFO_CNT - 1) }; +enum { + ETHTOOL_A_FEC_HIST_PAD = 1, + ETHTOOL_A_FEC_HIST_BIN_LOW, + ETHTOOL_A_FEC_HIST_BIN_HIGH, + ETHTOOL_A_FEC_HIST_BIN_VAL, + ETHTOOL_A_FEC_HIST_BIN_VAL_PER_LANE, + + __ETHTOOL_A_FEC_HIST_CNT, + ETHTOOL_A_FEC_HIST_MAX = (__ETHTOOL_A_FEC_HIST_CNT - 1) +}; + enum { ETHTOOL_A_FEC_STAT_UNSPEC, ETHTOOL_A_FEC_STAT_PAD, ETHTOOL_A_FEC_STAT_CORRECTED, ETHTOOL_A_FEC_STAT_UNCORR, ETHTOOL_A_FEC_STAT_CORR_BITS, + ETHTOOL_A_FEC_STAT_HIST, __ETHTOOL_A_FEC_STAT_CNT, ETHTOOL_A_FEC_STAT_MAX = (__ETHTOOL_A_FEC_STAT_CNT - 1) -- cgit v1.2.3 From 105ce7ad57e492b75ab40f2dc591db645fadbaa2 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 24 Sep 2025 23:14:53 +0200 Subject: net: airoha: npu: Add a NPU callback to initialize flow stats Introduce a NPU callback to initialize flow stats and remove NPU stats initialization from airoha_npu_get routine. Add num_stats_entries to airoha_npu_ppe_stats_setup routine. This patch makes the code more readable since NPU statistic are now initialized on demand by the NPU consumer (at the moment NPU statistic are configured just by the airoha_eth driver). Moreover this patch allows the NPU consumer (PPE module) to explicitly enable/disable NPU flow stats. Signed-off-by: Lorenzo Bianconi Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250924-airoha-npu-init-stats-callback-v1-1-88bdf3c941b2@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/soc/airoha/airoha_offload.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/soc/airoha/airoha_offload.h b/include/linux/soc/airoha/airoha_offload.h index 1dc5b4e35ef9..6f66eb339b3f 100644 --- a/include/linux/soc/airoha/airoha_offload.h +++ b/include/linux/soc/airoha/airoha_offload.h @@ -181,6 +181,8 @@ struct airoha_npu { struct { int (*ppe_init)(struct airoha_npu *npu); int (*ppe_deinit)(struct airoha_npu *npu); + int (*ppe_init_stats)(struct airoha_npu *npu, + dma_addr_t addr, u32 num_stats_entries); int (*ppe_flush_sram_entries)(struct airoha_npu *npu, dma_addr_t foe_addr, int sram_num_entries); @@ -206,7 +208,7 @@ struct airoha_npu { }; #if (IS_BUILTIN(CONFIG_NET_AIROHA_NPU) || IS_MODULE(CONFIG_NET_AIROHA_NPU)) -struct airoha_npu *airoha_npu_get(struct device *dev, dma_addr_t *stats_addr); +struct airoha_npu *airoha_npu_get(struct device *dev); void airoha_npu_put(struct airoha_npu *npu); static inline int airoha_npu_wlan_init_reserved_memory(struct airoha_npu *npu) @@ -256,8 +258,7 @@ static inline void airoha_npu_wlan_disable_irq(struct airoha_npu *npu, int q) npu->ops.wlan_disable_irq(npu, q); } #else -static inline struct airoha_npu *airoha_npu_get(struct device *dev, - dma_addr_t *foe_stats_addr) +static inline struct airoha_npu *airoha_npu_get(struct device *dev) { return NULL; } -- cgit v1.2.3 From c5273f6ca166c4edfaa6a87570e111453a0576ad Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Thu, 25 Sep 2025 12:32:40 +0200 Subject: mptcp: pm: rename 'subflows' to 'extra_subflows' A few variables linked to the Path-Managers are confusing, and it would help current and future developers, to clarify them. One of them is 'subflows', which in fact represents the number of extra subflows: all the additional subflows created after the initial one, and not the total number of subflows. While at it, add an additional name for the corresponding variable in MPTCP INFO: mptcpi_extra_subflows. Not to break the current uAPI, the new name is added as a 'define' pointing to the former name. This will then also help userspace devs. No functional changes intended. Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250925-net-next-mptcp-c-flag-laminar-v1-5-ad126cc47c6b@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/mptcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 15eef878690b..f807c8dba56e 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -42,6 +42,7 @@ struct mptcp_info { __u8 mptcpi_subflows; + #define mptcpi_extra_subflows mptcpi_subflows __u8 mptcpi_add_addr_signal; __u8 mptcpi_add_addr_accepted; __u8 mptcpi_subflows_max; -- cgit v1.2.3 From 3eb3c9a9596a53880f7d7eff28ac5622f3e0ba37 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Thu, 25 Sep 2025 12:32:41 +0200 Subject: mptcp: pm: in-kernel: rename 'subflows_max' to 'limit_extra_subflows' A few variables linked to the in-kernel Path-Manager are confusing, and it would help current and future developers, to clarify them. One of them is 'subflows_max', which in fact represents the limit of extra subflows: the limit set via 'ip mptcp limit subflows X' for example. It is not linked to the maximum number of created / possible subflows. While at it, add an additional name for the corresponding variable in MPTCP INFO: mptcpi_limit_extra_subflows. Not to break the current uAPI, the new name is added as a 'define' pointing to the former name. This will then also help userspace devs. No functional changes intended. Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250925-net-next-mptcp-c-flag-laminar-v1-6-ad126cc47c6b@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/mptcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index f807c8dba56e..314200c61f15 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -46,6 +46,7 @@ struct mptcp_info { __u8 mptcpi_add_addr_signal; __u8 mptcpi_add_addr_accepted; __u8 mptcpi_subflows_max; + #define mptcpi_limit_extra_subflows mptcpi_subflows_max __u8 mptcpi_add_addr_signal_max; __u8 mptcpi_add_addr_accepted_max; __u32 mptcpi_flags; -- cgit v1.2.3 From 45cae570664d58c562e21a3c7409fc02147bba46 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Thu, 25 Sep 2025 12:32:42 +0200 Subject: mptcp: pm: in-kernel: rename 'add_addr_signal_max' to 'endp_signal_max' A few variables linked to the in-kernel Path-Manager are confusing, and it would help current and future developers, to clarify them. One of them is 'add_addr_signal_max', which in fact represents the maximum number of 'signal' endpoints that can be used to announced addresses, and not the number of ADD_ADDR that can be signalled. While at it, add an additional name for the corresponding variable in MPTCP INFO: mptcpi_endp_signal_max. Not to break the current uAPI, the new name is added as a 'define' pointing to the former name. This will then also help userspace devs. No functional changes intended. Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250925-net-next-mptcp-c-flag-laminar-v1-7-ad126cc47c6b@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/mptcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 314200c61f15..69fc20db1c2f 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -48,6 +48,7 @@ struct mptcp_info { __u8 mptcpi_subflows_max; #define mptcpi_limit_extra_subflows mptcpi_subflows_max __u8 mptcpi_add_addr_signal_max; + #define mptcpi_endp_signal_max mptcpi_add_addr_signal_max __u8 mptcpi_add_addr_accepted_max; __u32 mptcpi_flags; __u32 mptcpi_token; -- cgit v1.2.3 From 37712d84dfc2e80d4d218ff9be490c86e604aa69 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Thu, 25 Sep 2025 12:32:43 +0200 Subject: mptcp: pm: in-kernel: rename 'add_addr_accept_max' to 'limit_add_addr_accepted' A few variables linked to the in-kernel Path-Manager are confusing, and it would help current and future developers, to clarify them. One of them is 'add_addr_accept_max', which in fact represents the limit of ADD_ADDR that can be accepted: the limit set via 'ip mptcp limit add_addr_accepted X' for example. It is not linked to the maximum number of accepted ADD_ADDR. While at it, add an additional name for the corresponding variable in MPTCP INFO: mptcpi_limit_add_addr_accepted. Not to break the current uAPI, the new name is added as a 'define' pointing to the former name. This will then also help userspace devs. No functional changes intended. Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250925-net-next-mptcp-c-flag-laminar-v1-8-ad126cc47c6b@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/mptcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 69fc20db1c2f..1c275ce96b52 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -50,6 +50,7 @@ struct mptcp_info { __u8 mptcpi_add_addr_signal_max; #define mptcpi_endp_signal_max mptcpi_add_addr_signal_max __u8 mptcpi_add_addr_accepted_max; + #define mptcpi_limit_add_addr_accepted mptcpi_add_addr_accepted_max __u32 mptcpi_flags; __u32 mptcpi_token; __u64 mptcpi_write_seq; -- cgit v1.2.3 From e7757b6d3a623671705388be24851af7360b54ba Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Thu, 25 Sep 2025 12:32:44 +0200 Subject: mptcp: pm: in-kernel: rename 'local_addr_max' to 'endp_subflow_max' A few variables linked to the in-kernel Path-Manager are confusing, and it would help current and future developers, to clarify them. One of them is 'local_addr_max', which in fact represents the maximum number of 'subflow' endpoints that can be used to create new subflows, and not the number of local addresses that have been used to create subflows. While at it, add an additional name for the corresponding variable in MPTCP INFO: mptcpi_endp_subflow_max. Not to break the current uAPI, the new name is added as a 'define' pointing to the former name. This will then also help userspace devs. Also move the variable and function next to the other 'endp_X_max' ones. No functional changes intended. Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250925-net-next-mptcp-c-flag-laminar-v1-9-ad126cc47c6b@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/mptcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 1c275ce96b52..5ec996977b3f 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -58,6 +58,7 @@ struct mptcp_info { __u64 mptcpi_rcv_nxt; __u8 mptcpi_local_addr_used; __u8 mptcpi_local_addr_max; + #define mptcpi_endp_subflow_max mptcpi_local_addr_max __u8 mptcpi_csum_enabled; __u32 mptcpi_retransmits; __u64 mptcpi_bytes_retrans; -- cgit v1.2.3 From 539f6b9de39ec5d827b16f6f5c8f3cfd58669e93 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Thu, 25 Sep 2025 12:32:50 +0200 Subject: mptcp: pm: in-kernel: add laminar endpoints Currently, upon the reception of an ADD_ADDR (and when the fullmesh flag is not used), the in-kernel PM will create new subflows using the local address the routing configuration will pick. It would be easier to pick local addresses from a selected list of endpoints, and use it only once, than relying on routing rules. Use case: both the client (C) and the server (S) have two addresses (a and b). The client establishes the connection between C(a) and S(a). Once established, the server announces its additional address S(b). Once received, the client connects to it using its second address C(b). Compared to a situation without the 'laminar' endpoint for C(b), the client didn't use this address C(b) to establish a subflow to the server's primary address S(a). So at the end, we have: C S C(a) --- S(a) C(b) --- S(b) In case of a 3rd address on each side (C(c) and S(c)), upon the reception of an ADD_ADDR with S(c), the client should not pick C(b) because it has already been used. C(c) should then be used. Note that this situation is currently possible if C doesn't add any endpoint, but configure the routing in order to pick C(b) for the route to S(b), and pick C(c) for the route to S(c). That doesn't sound very practical because it means knowing in advance the IP addresses that will be used and announced by the server. 'laminar', like the idea of laminar flows: the different subflows don't mix with each other on an endpoint, unlike the "turbulent" way traffic is mixed by 'fullmesh'. In the code, the new endpoint type is added. Similar to the other subflow types, an MPTCP_INFO counter is added. While at it, hole are now commented in struct mptcp_info, to remember next time that these holes can no longer be used. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/503 Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250925-net-next-mptcp-c-flag-laminar-v1-15-ad126cc47c6b@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/mptcp.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 5ec996977b3f..87cfab874e24 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -39,6 +39,7 @@ #define MPTCP_PM_ADDR_FLAG_BACKUP _BITUL(2) #define MPTCP_PM_ADDR_FLAG_FULLMESH _BITUL(3) #define MPTCP_PM_ADDR_FLAG_IMPLICIT _BITUL(4) +#define MPTCP_PM_ADDR_FLAG_LAMINAR _BITUL(5) struct mptcp_info { __u8 mptcpi_subflows; @@ -51,6 +52,7 @@ struct mptcp_info { #define mptcpi_endp_signal_max mptcpi_add_addr_signal_max __u8 mptcpi_add_addr_accepted_max; #define mptcpi_limit_add_addr_accepted mptcpi_add_addr_accepted_max + /* 16-bit hole that can no longer be filled */ __u32 mptcpi_flags; __u32 mptcpi_token; __u64 mptcpi_write_seq; @@ -60,13 +62,15 @@ struct mptcp_info { __u8 mptcpi_local_addr_max; #define mptcpi_endp_subflow_max mptcpi_local_addr_max __u8 mptcpi_csum_enabled; + /* 8-bit hole that can no longer be filled */ __u32 mptcpi_retransmits; __u64 mptcpi_bytes_retrans; __u64 mptcpi_bytes_sent; __u64 mptcpi_bytes_received; __u64 mptcpi_bytes_acked; __u8 mptcpi_subflows_total; - __u8 reserved[3]; + __u8 mptcpi_endp_laminar_max; + __u8 reserved[2]; __u32 mptcpi_last_data_sent; __u32 mptcpi_last_data_recv; __u32 mptcpi_last_ack_recv; -- cgit v1.2.3 From 87608c2a7718dcac5deef801fb3c18cf36fb0233 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Fri, 26 Sep 2025 17:52:39 +0800 Subject: bpf: Remove duplicate crypto/sha2.h header ./include/linux/bpf.h: crypto/sha2.h is included more than once. Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=25501 Signed-off-by: Jiapeng Chong Acked-by: Quentin Monnet Link: https://lore.kernel.org/r/20250926095240.3397539-1-jiapeng.chong@linux.alibaba.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6338e54a9b1f..fe2a396d8ac6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -32,7 +32,6 @@ #include #include #include -#include struct bpf_verifier_env; struct bpf_verifier_log; -- cgit v1.2.3 From fed7eaa4f037361fe4f3d4170649d6849a25998d Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Thu, 25 Sep 2025 12:42:16 -0700 Subject: PM: runtime: Update kerneldoc return codes APIs based on __pm_runtime_idle() (pm_runtime_idle(), pm_request_idle()) do not return 1 when already suspended. They return -EAGAIN. This is already covered in the docs, so the entry for "1" is redundant and conflicting. (pm_runtime_put() and pm_runtime_put_sync() were previously incorrect, but that's fixed in "PM: runtime: pm_runtime_put{,_sync}() returns 1 when already suspended", to ensure consistency with APIs like pm_runtime_put_autosuspend().) RPM_GET_PUT APIs based on __pm_runtime_suspend() do return 1 when already suspended, but the language is a little unclear -- it's not really an "error", so it seems better to list as a clarification before the 0/success case. Additionally, they only actually return 1 when the refcount makes it to 0; if the usage counter is still non-zero, we return 0. pm_runtime_put(), etc., also don't appear at first like they can ever see "-EAGAIN: Runtime PM usage_count non-zero", because in non-racy conditions, pm_runtime_put() would drop its reference count, see it's non-zero, and return early (in __pm_runtime_idle()). However, it's possible to race with another actor that increments the usage_count afterward, since rpm_idle() is protected by a separate lock; in such a case, we may see -EAGAIN. Because this case is only seen in the presence of concurrent actors, it makes sense to clarify that this is when "usage_count **became** non-zero", by way of some racing actor. Lastly, pm_runtime_put_sync_suspend() duplicated some -EAGAIN language. Fix that. Fixes: 271ff96d6066 ("PM: runtime: Document return values of suspend-related API functions") Link: https://lore.kernel.org/linux-pm/aJ5pkEJuixTaybV4@google.com/ Signed-off-by: Brian Norris Reviewed-by: Sakari Ailus Cc: 6.17+ # 6.17+ Signed-off-by: Rafael J. Wysocki --- include/linux/pm_runtime.h | 56 +++++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index d88d6b6ccf5b..d1ff76e0e2d0 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -350,13 +350,12 @@ static inline int pm_runtime_force_resume(struct device *dev) { return -ENXIO; } * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. - * * -EAGAIN: Runtime PM usage_count non-zero, Runtime PM status change ongoing - * or device not in %RPM_ACTIVE state. + * * -EAGAIN: Runtime PM usage counter non-zero, Runtime PM status change + * ongoing or device not in %RPM_ACTIVE state. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -EINPROGRESS: Suspend already in progress. * * -ENOSYS: CONFIG_PM not enabled. - * * 1: Device already suspended. * Other values and conditions for the above values are possible as returned by * Runtime PM idle and suspend callbacks. */ @@ -370,14 +369,15 @@ static inline int pm_runtime_idle(struct device *dev) * @dev: Target device. * * Return: + * * 1: Success; device was already suspended. * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. - * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EAGAIN: Runtime PM usage counter non-zero or Runtime PM status change + * ongoing. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -ENOSYS: CONFIG_PM not enabled. - * * 1: Device already suspended. * Other values and conditions for the above values are possible as returned by * Runtime PM suspend callbacks. */ @@ -396,14 +396,15 @@ static inline int pm_runtime_suspend(struct device *dev) * engaging its "idle check" callback. * * Return: + * * 1: Success; device was already suspended. * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. - * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EAGAIN: Runtime PM usage counter non-zero or Runtime PM status change + * ongoing. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -ENOSYS: CONFIG_PM not enabled. - * * 1: Device already suspended. * Other values and conditions for the above values are possible as returned by * Runtime PM suspend callbacks. */ @@ -433,13 +434,12 @@ static inline int pm_runtime_resume(struct device *dev) * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. - * * -EAGAIN: Runtime PM usage_count non-zero, Runtime PM status change ongoing - * or device not in %RPM_ACTIVE state. + * * -EAGAIN: Runtime PM usage counter non-zero, Runtime PM status change + * ongoing or device not in %RPM_ACTIVE state. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -EINPROGRESS: Suspend already in progress. * * -ENOSYS: CONFIG_PM not enabled. - * * 1: Device already suspended. */ static inline int pm_request_idle(struct device *dev) { @@ -464,15 +464,16 @@ static inline int pm_request_resume(struct device *dev) * equivalent pm_runtime_autosuspend() for @dev asynchronously. * * Return: + * * 1: Success; device was already suspended. * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. - * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EAGAIN: Runtime PM usage counter non-zero or Runtime PM status change + * ongoing. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -EINPROGRESS: Suspend already in progress. * * -ENOSYS: CONFIG_PM not enabled. - * * 1: Device already suspended. */ static inline int pm_request_autosuspend(struct device *dev) { @@ -540,15 +541,16 @@ static inline int pm_runtime_resume_and_get(struct device *dev) * equal to 0, queue up a work item for @dev like in pm_request_idle(). * * Return: + * * 1: Success. Usage counter dropped to zero, but device was already suspended. * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. - * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status + * change ongoing. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -EINPROGRESS: Suspend already in progress. * * -ENOSYS: CONFIG_PM not enabled. - * * 1: Device already suspended. */ static inline int pm_runtime_put(struct device *dev) { @@ -565,15 +567,16 @@ DEFINE_FREE(pm_runtime_put, struct device *, if (_T) pm_runtime_put(_T)) * equal to 0, queue up a work item for @dev like in pm_request_autosuspend(). * * Return: + * * 1: Success. Usage counter dropped to zero, but device was already suspended. * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. - * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status + * change ongoing. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -EINPROGRESS: Suspend already in progress. * * -ENOSYS: CONFIG_PM not enabled. - * * 1: Device already suspended. */ static inline int __pm_runtime_put_autosuspend(struct device *dev) { @@ -590,15 +593,16 @@ static inline int __pm_runtime_put_autosuspend(struct device *dev) * in pm_request_autosuspend(). * * Return: + * * 1: Success. Usage counter dropped to zero, but device was already suspended. * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. - * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status + * change ongoing. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -EINPROGRESS: Suspend already in progress. * * -ENOSYS: CONFIG_PM not enabled. - * * 1: Device already suspended. */ static inline int pm_runtime_put_autosuspend(struct device *dev) { @@ -619,14 +623,15 @@ static inline int pm_runtime_put_autosuspend(struct device *dev) * if it returns an error code. * * Return: + * * 1: Success. Usage counter dropped to zero, but device was already suspended. * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. - * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status + * change ongoing. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -ENOSYS: CONFIG_PM not enabled. - * * 1: Device already suspended. * Other values and conditions for the above values are possible as returned by * Runtime PM suspend callbacks. */ @@ -646,15 +651,15 @@ static inline int pm_runtime_put_sync(struct device *dev) * if it returns an error code. * * Return: + * * 1: Success. Usage counter dropped to zero, but device was already suspended. * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. - * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. - * * -EAGAIN: usage_count non-zero or Runtime PM status change ongoing. + * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status + * change ongoing. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -ENOSYS: CONFIG_PM not enabled. - * * 1: Device already suspended. * Other values and conditions for the above values are possible as returned by * Runtime PM suspend callbacks. */ @@ -677,15 +682,16 @@ static inline int pm_runtime_put_sync_suspend(struct device *dev) * if it returns an error code. * * Return: + * * 1: Success. Usage counter dropped to zero, but device was already suspended. * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. - * * -EAGAIN: Runtime PM usage_count non-zero or Runtime PM status change ongoing. + * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status + * change ongoing. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -EINPROGRESS: Suspend already in progress. * * -ENOSYS: CONFIG_PM not enabled. - * * 1: Device already suspended. * Other values and conditions for the above values are possible as returned by * Runtime PM suspend callbacks. */ -- cgit v1.2.3 From 4540aed51b12bc13364149bf95f6ecef013197c0 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 26 Sep 2025 19:12:00 +0200 Subject: bpf: Enforce expected_attach_type for tailcall compatibility Yinhao et al. recently reported: Our fuzzer tool discovered an uninitialized pointer issue in the bpf_prog_test_run_xdp() function within the Linux kernel's BPF subsystem. This leads to a NULL pointer dereference when a BPF program attempts to deference the txq member of struct xdp_buff object. The test initializes two programs of BPF_PROG_TYPE_XDP: progA acts as the entry point for bpf_prog_test_run_xdp() and its expected_attach_type can neither be of be BPF_XDP_DEVMAP nor BPF_XDP_CPUMAP. progA calls into a slot of a tailcall map it owns. progB's expected_attach_type must be BPF_XDP_DEVMAP to pass xdp_is_valid_access() validation. The program returns struct xdp_md's egress_ifindex, and the latter is only allowed to be accessed under mentioned expected_attach_type. progB is then inserted into the tailcall which progA calls. The underlying issue goes beyond XDP though. Another example are programs of type BPF_PROG_TYPE_CGROUP_SOCK_ADDR. sock_addr_is_valid_access() as well as sock_addr_func_proto() have different logic depending on the programs' expected_attach_type. Similarly, a program attached to BPF_CGROUP_INET4_GETPEERNAME should not be allowed doing a tailcall into a program which calls bpf_bind() out of BPF which is only enabled for BPF_CGROUP_INET4_CONNECT. In short, specifying expected_attach_type allows to open up additional functionality or restrictions beyond what the basic bpf_prog_type enables. The use of tailcalls must not violate these constraints. Fix it by enforcing expected_attach_type in __bpf_prog_map_compatible(). Note that we only enforce this for tailcall maps, but not for BPF devmaps or cpumaps: There, the programs are invoked through dev_map_bpf_prog_run*() and cpu_map_bpf_prog_run*() which set up a new environment / context and therefore these situations are not prone to this issue. Fixes: 5e43f899b03a ("bpf: Check attach type at prog load time") Reported-by: Yinhao Hu Reported-by: Kaiyan Mei Reviewed-by: Dongliang Mu Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20250926171201.188490-1-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index fe2a396d8ac6..a98c83346134 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -289,6 +289,7 @@ struct bpf_map_owner { bool xdp_has_frags; u64 storage_cookie[MAX_BPF_CGROUP_STORAGE_TYPE]; const struct btf_type *attach_func_proto; + enum bpf_attach_type expected_attach_type; }; struct bpf_map { -- cgit v1.2.3 From d4e99db3d942c8099006f3b7536bc52f766b475a Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sun, 10 Aug 2025 23:53:20 +0200 Subject: Bluetooth: Annotate struct hci_drv_rp_read_info with __counted_by_le() Add the __counted_by_le() compiler attribute to the flexible array member 'supported_commands' to improve access bounds-checking via CONFIG_UBSAN_BOUNDS and CONFIG_FORTIFY_SOURCE. Signed-off-by: Thorsten Blum Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_drv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_drv.h b/include/net/bluetooth/hci_drv.h index 2f01c44f05ec..3fd6fdbdb02e 100644 --- a/include/net/bluetooth/hci_drv.h +++ b/include/net/bluetooth/hci_drv.h @@ -47,7 +47,7 @@ struct hci_drv_ev_cmd_complete { struct hci_drv_rp_read_info { __u8 driver_name[HCI_DRV_MAX_DRIVER_NAME_LENGTH]; __le16 num_supported_commands; - __le16 supported_commands[]; + __le16 supported_commands[] __counted_by_le(num_supported_commands); } __packed; /* Driver specific OGF (Opcode Group Field) -- cgit v1.2.3 From 339a87883a14d6a818ca436fed41aa5d10e0f4bd Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 13 Aug 2025 15:21:19 -0400 Subject: Bluetooth: ISO: Use sk_sndtimeo as conn_timeout This aligns the usage of socket sk_sndtimeo as conn_timeout when initiating a connection and then use it when scheduling the resulting HCI command, similar to what has been done in bf98feea5b65 ("Bluetooth: hci_conn: Always use sk_timeo as conn_timeout"). Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 6560b32f3125..a068beae9318 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1587,16 +1587,18 @@ struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst, __u16 setting, struct bt_codec *codec, u16 timeout); struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst, - __u8 dst_type, struct bt_iso_qos *qos); + __u8 dst_type, struct bt_iso_qos *qos, + u16 timeout); struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, __u8 sid, struct bt_iso_qos *qos, - __u8 base_len, __u8 *base); + __u8 base_len, __u8 *base, u16 timeout); struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst, - __u8 dst_type, struct bt_iso_qos *qos); + __u8 dst_type, struct bt_iso_qos *qos, + u16 timeout); struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, __u8 sid, struct bt_iso_qos *qos, - __u8 data_len, __u8 *data); + __u8 data_len, __u8 *data, u16 timeout); struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, __u8 sid, struct bt_iso_qos *qos); int hci_conn_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon, -- cgit v1.2.3 From c9beb36c14660713b948e289b1e352cc3d386d44 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 13 Aug 2025 15:57:39 -0400 Subject: Bluetooth: hci_core: Detect if an ISO link has stalled This attempts to detect if an ISO link has been waiting for an ISO buffer for longer than the maximum allowed transport latency then proceed to use hci_link_tx_to which prints an error and disconnects. Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 1 + include/net/bluetooth/hci_core.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index df1847b74e55..9ecc70baaca9 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -488,6 +488,7 @@ enum { #define HCI_AUTO_OFF_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */ #define HCI_ACL_CONN_TIMEOUT msecs_to_jiffies(20000) /* 20 seconds */ #define HCI_LE_CONN_TIMEOUT msecs_to_jiffies(20000) /* 20 seconds */ +#define HCI_ISO_TX_TIMEOUT usecs_to_jiffies(0x7fffff) /* 8388607 usecs */ /* HCI data types */ #define HCI_COMMAND_PKT 0x01 diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index a068beae9318..2924c2bf2a98 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -487,6 +487,7 @@ struct hci_dev { unsigned long acl_last_tx; unsigned long le_last_tx; + unsigned long iso_last_tx; __u8 le_tx_def_phys; __u8 le_rx_def_phys; -- cgit v1.2.3 From 9eb14331885b09adfa7fe69a5a4603e24909c88f Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 18 Aug 2025 16:43:53 -0400 Subject: Bluetooth: Add function and line information to bt_dbg When enabling debug via CONFIG_BT_FEATURE_DEBUG include function and line information by default otherwise it is hard to make any sense of which function the logs comes from. Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/bluetooth.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index e5751f3070b8..d46ed9011ee5 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -272,7 +272,8 @@ void bt_err_ratelimited(const char *fmt, ...); #define BT_ERR(fmt, ...) bt_err(fmt "\n", ##__VA_ARGS__) #if IS_ENABLED(CONFIG_BT_FEATURE_DEBUG) -#define BT_DBG(fmt, ...) bt_dbg(fmt "\n", ##__VA_ARGS__) +#define BT_DBG(fmt, ...) \ + bt_dbg("%s:%d: " fmt "\n", __func__, __LINE__, ##__VA_ARGS__) #else #define BT_DBG(fmt, ...) pr_debug(fmt "\n", ##__VA_ARGS__) #endif -- cgit v1.2.3 From be812ace0378a9db86344ad637c5ed2a5d11f216 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 9 Sep 2025 14:13:35 +0200 Subject: Bluetooth: Avoid a couple dozen -Wflex-array-member-not-at-end warnings -Wflex-array-member-not-at-end was introduced in GCC-14, and we are getting ready to enable it, globally. Use the __struct_group() helper to fix 31 instances of the following type of warnings: 30 net/bluetooth/mgmt_config.c:16:33: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] 1 net/bluetooth/mgmt_config.c:22:33: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] Signed-off-by: Gustavo A. R. Silva Reviewed-by: Simon Horman Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/mgmt.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 3575cd16049a..74edea06985b 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -53,10 +53,15 @@ struct mgmt_hdr { } __packed; struct mgmt_tlv { - __le16 type; - __u8 length; + /* New members MUST be added within the __struct_group() macro below. */ + __struct_group(mgmt_tlv_hdr, __hdr, __packed, + __le16 type; + __u8 length; + ); __u8 value[]; } __packed; +static_assert(offsetof(struct mgmt_tlv, value) == sizeof(struct mgmt_tlv_hdr), + "struct member likely outside of __struct_group()"); struct mgmt_addr_info { bdaddr_t bdaddr; -- cgit v1.2.3 From 720a485d12c590750f40f4ffbe41e36725f43f3d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 9 Aug 2025 10:19:41 -0700 Subject: KEYS: trusted_tpm1: Move private functionality out of public header Move functionality used only by trusted_tpm1.c out of the public header . Specifically, change the exported functions into static functions, since they are not used outside trusted_tpm1.c, and move various other definitions and inline functions to trusted_tpm1.c. Signed-off-by: Eric Biggers Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- include/keys/trusted_tpm.h | 79 ---------------------------------------------- 1 file changed, 79 deletions(-) (limited to 'include') diff --git a/include/keys/trusted_tpm.h b/include/keys/trusted_tpm.h index a088b33fd0e3..0fadc6a4f166 100644 --- a/include/keys/trusted_tpm.h +++ b/include/keys/trusted_tpm.h @@ -5,41 +5,8 @@ #include #include -/* implementation specific TPM constants */ -#define TPM_SIZE_OFFSET 2 -#define TPM_RETURN_OFFSET 6 -#define TPM_DATA_OFFSET 10 - -#define LOAD32(buffer, offset) (ntohl(*(uint32_t *)&buffer[offset])) -#define LOAD32N(buffer, offset) (*(uint32_t *)&buffer[offset]) -#define LOAD16(buffer, offset) (ntohs(*(uint16_t *)&buffer[offset])) - extern struct trusted_key_ops trusted_key_tpm_ops; -struct osapsess { - uint32_t handle; - unsigned char secret[SHA1_DIGEST_SIZE]; - unsigned char enonce[TPM_NONCE_SIZE]; -}; - -/* discrete values, but have to store in uint16_t for TPM use */ -enum { - SEAL_keytype = 1, - SRK_keytype = 4 -}; - -int TSS_authhmac(unsigned char *digest, const unsigned char *key, - unsigned int keylen, unsigned char *h1, - unsigned char *h2, unsigned int h3, ...); -int TSS_checkhmac1(unsigned char *buffer, - const uint32_t command, - const unsigned char *ononce, - const unsigned char *key, - unsigned int keylen, ...); - -int trusted_tpm_send(unsigned char *cmd, size_t buflen); -int oiap(struct tpm_buf *tb, uint32_t *handle, unsigned char *nonce); - int tpm2_seal_trusted(struct tpm_chip *chip, struct trusted_key_payload *payload, struct trusted_key_options *options); @@ -47,50 +14,4 @@ int tpm2_unseal_trusted(struct tpm_chip *chip, struct trusted_key_payload *payload, struct trusted_key_options *options); -#define TPM_DEBUG 0 - -#if TPM_DEBUG -static inline void dump_options(struct trusted_key_options *o) -{ - pr_info("sealing key type %d\n", o->keytype); - pr_info("sealing key handle %0X\n", o->keyhandle); - pr_info("pcrlock %d\n", o->pcrlock); - pr_info("pcrinfo %d\n", o->pcrinfo_len); - print_hex_dump(KERN_INFO, "pcrinfo ", DUMP_PREFIX_NONE, - 16, 1, o->pcrinfo, o->pcrinfo_len, 0); -} - -static inline void dump_sess(struct osapsess *s) -{ - print_hex_dump(KERN_INFO, "trusted-key: handle ", DUMP_PREFIX_NONE, - 16, 1, &s->handle, 4, 0); - pr_info("secret:\n"); - print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, - 16, 1, &s->secret, SHA1_DIGEST_SIZE, 0); - pr_info("trusted-key: enonce:\n"); - print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, - 16, 1, &s->enonce, SHA1_DIGEST_SIZE, 0); -} - -static inline void dump_tpm_buf(unsigned char *buf) -{ - int len; - - pr_info("\ntpm buffer\n"); - len = LOAD32(buf, TPM_SIZE_OFFSET); - print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 16, 1, buf, len, 0); -} -#else -static inline void dump_options(struct trusted_key_options *o) -{ -} - -static inline void dump_sess(struct osapsess *s) -{ -} - -static inline void dump_tpm_buf(unsigned char *buf) -{ -} -#endif #endif -- cgit v1.2.3 From 2f7d98f10b8f64525b2c74cae7d70ae5278eb654 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 20 Jul 2025 15:32:31 -0400 Subject: Have cc(1) catch attempts to modify ->f_path There are very few places that have cause to do that - all in core VFS now, and all done to files that are not yet opened (or visible to anybody else, for that matter). Let's turn f_path into a union of struct path __f_path and const struct path f_path. It's C, not C++ - 6.5.2.3[4] in C99 and later explicitly allows that kind of type-punning. That way any attempts to bypass these checks will be either very easy to catch, or (if the bastards get sufficiently creative to make it hard to spot with grep alone) very clearly malicious - and still catchable with a bit of instrumentation for sparse. Reviewed-by: Jan Kara Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- include/linux/fs.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index af514fae4e2d..1fb02c76ae09 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1082,6 +1082,8 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) * @f_cred: stashed credentials of creator/opener * @f_owner: file owner * @f_path: path of the file + * @__f_path: writable alias for @f_path; *ONLY* for core VFS and only before + * the file gets open * @f_pos_lock: lock protecting file position * @f_pipe: specific to pipes * @f_pos: file position @@ -1107,7 +1109,10 @@ struct file { const struct cred *f_cred; struct fown_struct *f_owner; /* --- cacheline 1 boundary (64 bytes) --- */ - struct path f_path; + union { + const struct path f_path; + struct path __f_path; + }; union { /* regular files (with FMODE_ATOMIC_POS) and directories */ struct mutex f_pos_lock; -- cgit v1.2.3 From ab91835e61ab56d3964f51480955c9661678c269 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 23 Sep 2025 14:03:25 +0100 Subject: ASoC: cs35l56: Set fw_regs table after getting REVID Defer setting the cs35l56_base.fw_regs pointer until after the REVID has been read in cs35l56_hw_init(). Also make the corresponding change to the cs35l56_hda drivers to prevent a build break. This is preparing for firmware registers that change address between revisions of the same device. Signed-off-by: Richard Fitzgerald Signed-off-by: Takashi Iwai --- include/sound/cs35l56.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index 7c8bbe8ad1e2..20dc3ee6378d 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -337,9 +337,6 @@ extern const struct regmap_config cs35l56_regmap_sdw; extern const struct regmap_config cs35l63_regmap_i2c; extern const struct regmap_config cs35l63_regmap_sdw; -extern const struct cs35l56_fw_reg cs35l56_fw_reg; -extern const struct cs35l56_fw_reg cs35l63_fw_reg; - extern const struct cirrus_amp_cal_controls cs35l56_calibration_controls; extern const char * const cs35l56_tx_input_texts[CS35L56_NUM_INPUT_SRC]; -- cgit v1.2.3 From 33da2d892b6241a3e71f96acdd0e64de5d70b7f3 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 23 Sep 2025 14:03:26 +0100 Subject: ASoC: cs35l56: Add support for CS35L56 B2 silicon This adds support for changed firmware addresses on the B2 revision of CS35L56 silicon. Signed-off-by: Richard Fitzgerald Signed-off-by: Takashi Iwai --- include/sound/cs35l56.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index 20dc3ee6378d..ab044ce2aa8b 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -85,7 +85,9 @@ #define CS35L56_DSP1_XMEM_UNPACKED24_0 0x2800000 #define CS35L56_DSP1_FW_VER 0x2800010 #define CS35L56_DSP1_HALO_STATE 0x28021E0 +#define CS35L56_B2_DSP1_HALO_STATE 0x2803D20 #define CS35L56_DSP1_PM_CUR_STATE 0x2804308 +#define CS35L56_B2_DSP1_PM_CUR_STATE 0x2804678 #define CS35L56_DSP1_XMEM_UNPACKED24_8191 0x2807FFC #define CS35L56_DSP1_CORE_BASE 0x2B80000 #define CS35L56_DSP1_SCRATCH1 0x2B805C0 -- cgit v1.2.3 From fa7d16734f9606c396681648618dd76a5af861e6 Mon Sep 17 00:00:00 2001 From: Kriish Sharma Date: Sat, 27 Sep 2025 14:27:08 +0000 Subject: ALSA: compress: document 'chan_map' member in snd_dec_opus When building kernel docs, the following warning appeared: WARNING: ./include/uapi/sound/compress_params.h:364 struct member 'chan_map' not described in 'snd_dec_opus' The inline struct 'snd_dec_opus_ch_map' inside 'snd_dec_opus' was not properly documented. This patch documents the 'chan_map' member and its fields (stream_count, coupled_count, channel_map), resolving the warning. Fixes: 5d36370f3431 ("ALSA: compress: add raw opus codec define and opus decoder structs") Suggested-by: Bagas Sanjaya Signed-off-by: Kriish Sharma Signed-off-by: Takashi Iwai --- include/uapi/sound/compress_params.h | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/uapi/sound/compress_params.h b/include/uapi/sound/compress_params.h index faf4fa911f7f..d7db6b4e1166 100644 --- a/include/uapi/sound/compress_params.h +++ b/include/uapi/sound/compress_params.h @@ -336,16 +336,14 @@ struct snd_dec_ape { * @mapping_family: Order and meaning of output channels. Only values 0 and 1 * are expected; values 2..255 are not recommended for playback. * - * Optional channel mapping table. Describes mapping of opus streams to decoded - * channels. - * @struct snd_dec_opus_ch_map - * @stream_count: Number of streams encoded in each Ogg packet. - * @coupled_count: Number of streams whose decoders are used for two - * channels. - * @channel_map: describes which decoded channel to be used for each one. - * See RFC doc for details. - * This supports only mapping families 0 and 1, therefore max - * number of channels is 8. + * @chan_map: Optional channel mapping table. Describes mapping of opus streams + * to decoded channels. Fields: + * @chan_map.stream_count: Number of streams encoded in each Ogg packet. + * @chan_map.coupled_count: Number of streams whose decoders are used + * for two channels. + * @chan_map.channel_map: Which decoded channel to be used for each one. + * Supports only mapping families 0 and 1, + * max number of channels is 8. * * These options were extracted from RFC7845 Section 5. */ -- cgit v1.2.3 From 1ddf1636e0e058adf2231486da0419243eb49539 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 22 Sep 2025 09:06:30 +0300 Subject: net/mlx5: Add IFC bit for TIR/SQ order capability Before this cap, firmware requested a certain creation order between TIR objects and SQs of the same transport domain to properly support the self loopback prevention feature. If order is not preserved, explicit modify_tir operations are necessary after the opening of the SQs. When set, this cap bit indicates that this firmware requirement / limitation no longer holds. Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1758521191-814350-2-git-send-email-tariqt@nvidia.com Reviewed-by: Carolina Jubran Reviewed-by: Dragos Tatulea Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 0cf187e13def..c0f5fee7a4a5 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1895,7 +1895,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_2a0[0x7]; u8 mkey_pcie_tph[0x1]; - u8 reserved_at_2a8[0x2]; + u8 reserved_at_2a8[0x1]; + u8 tis_tir_td_order[0x1]; u8 psp[0x1]; u8 shampo[0x1]; -- cgit v1.2.3 From 137d1a6355131457723b51a34192320d93d15654 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Mon, 22 Sep 2025 09:06:31 +0300 Subject: net/mlx5: IFC add balance ID and LAG per MP group bits Add interface definitions for load balance ID and LAG per multiplane group functionality. This patch introduces the hardware capability bits needed to support balance ID in multiplane LAG configurations. The new fields include: - load_balance_id: 4-bit field for balance identifier. - lag_per_mp_group: capability bit for LAG per multiplane group support. These interface additions are prerequisites for implementing balance ID support in the MLX5 driver. Signed-off-by: Mark Bloch Reviewed-by: Shay Drori Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1758521191-814350-3-git-send-email-tariqt@nvidia.com Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index c0f5fee7a4a5..07614cd95bed 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2235,12 +2235,16 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_440[0x8]; u8 max_num_eqs_24b[0x18]; - u8 reserved_at_460[0x160]; + u8 reserved_at_460[0x144]; + u8 load_balance_id[0x4]; + u8 reserved_at_5a8[0x18]; u8 query_adjacent_functions_id[0x1]; u8 ingress_egress_esw_vport_connect[0x1]; u8 function_id_type_vhca_id[0x1]; - u8 reserved_at_5c3[0xd]; + u8 reserved_at_5c3[0x1]; + u8 lag_per_mp_group[0x1]; + u8 reserved_at_5c5[0xb]; u8 delegate_vhca_management_profiles[0x10]; u8 delegated_vhca_max[0x10]; -- cgit v1.2.3 From 2db579838296239545554443234fafb8f485cca0 Mon Sep 17 00:00:00 2001 From: Kiryl Shutsemau Date: Tue, 23 Sep 2025 12:07:06 +0100 Subject: mm/page_vma_mapped: track if the page is mapped across page table boundary Patch series "mm: Improve mlock tracking for large folios", v3. The patchset includes several fixes and improvements related to mlock tracking of large folios. The main objective is to reduce the undercount of Mlocked memory in /proc/meminfo and improve the accuracy of the statistics. Patches 1-2: These patches address a minor race condition in folio_referenced_one() related to mlock_vma_folio(). Currently, mlock_vma_folio() is called on large folio without the page table lock, which can result in a race condition with unmap (i.e. MADV_DONTNEED). This can lead to partially mapped folios on the unevictable LRU list. While not a significant issue, I do not believe backporting is necessary. Patch 3: This patch adds mlocking logic similar to folio_referenced_one() to try_to_unmap_one(), allowing for mlocking of large folios where possible. Patch 4-5: These patches modifies finish_fault() and faultaround to map in the entire folio when possible, enabling efficient mlocking upon addition to the rmap. Patch 6: This patch makes rmap mlock large folios if they are fully mapped, addressing the primary source of mlock undercount for large folios. This patch (of 6): Add a PVMW_PGTABLE_CROSSSED flag that page_vma_mapped_walk() will set if the page is mapped across page table boundary. Unlike other PVMW_* flags, this one is result of page_vma_mapped_walk() and not set by the caller. folio_referenced_one() will use it to detect if it safe to mlock the folio. [akpm@linux-foundation.org: s/CROSSSED/CROSSED/] Link: https://lkml.kernel.org/r/20250923110711.690639-1-kirill@shutemov.name Link: https://lkml.kernel.org/r/20250923110711.690639-2-kirill@shutemov.name Signed-off-by: Kiryl Shutsemau Reviewed-by: Shakeel Butt Cc: Baolin Wang Cc: David Hildenbrand Cc: Johannes Weiner Cc: Lorenzo Stoakes Signed-off-by: Andrew Morton --- include/linux/rmap.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index e8aff6d2deda..daa92a58585d 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -922,6 +922,11 @@ struct page *make_device_exclusive(struct mm_struct *mm, unsigned long addr, /* Look for migration entries rather than present PTEs */ #define PVMW_MIGRATION (1 << 1) +/* Result flags */ + +/* The page is mapped across page table boundary */ +#define PVMW_PGTABLE_CROSSED (1 << 16) + struct page_vma_mapped_walk { unsigned long pfn; unsigned long nr_pages; -- cgit v1.2.3 From 4d6fc29f36341d7795db1d1819b4c15fe9be7b23 Mon Sep 17 00:00:00 2001 From: Donet Tom Date: Wed, 24 Sep 2025 00:16:59 +0530 Subject: mm/ksm: fix incorrect KSM counter handling in mm_struct during fork Patch series "mm/ksm: Fix incorrect accounting of KSM counters during fork", v3. The first patch in this series fixes the incorrect accounting of KSM counters such as ksm_merging_pages, ksm_rmap_items, and the global ksm_zero_pages during fork. The following patch add a selftest to verify the ksm_merging_pages counter was updated correctly during fork. Test Results ============ Without the first patch ----------------------- # [RUN] test_fork_ksm_merging_page_count not ok 10 ksm_merging_page in child: 32 With the first patch -------------------- # [RUN] test_fork_ksm_merging_page_count ok 10 ksm_merging_pages is not inherited after fork This patch (of 2): Currently, the KSM-related counters in `mm_struct`, such as `ksm_merging_pages`, `ksm_rmap_items`, and `ksm_zero_pages`, are inherited by the child process during fork. This results in inconsistent accounting. When a process uses KSM, identical pages are merged and an rmap item is created for each merged page. The `ksm_merging_pages` and `ksm_rmap_items` counters are updated accordingly. However, after a fork, these counters are copied to the child while the corresponding rmap items are not. As a result, when the child later triggers an unmerge, there are no rmap items present in the child, so the counters remain stale, leading to incorrect accounting. A similar issue exists with `ksm_zero_pages`, which maintains both a global counter and a per-process counter. During fork, the per-process counter is inherited by the child, but the global counter is not incremented. Since the child also references zero pages, the global counter should be updated as well. Otherwise, during zero-page unmerge, both the global and per-process counters are decremented, causing the global counter to become inconsistent. To fix this, ksm_merging_pages and ksm_rmap_items are reset to 0 during fork, and the global ksm_zero_pages counter is updated with the per-process ksm_zero_pages value inherited by the child. This ensures that KSM statistics remain accurate and reflect the activity of each process correctly. Link: https://lkml.kernel.org/r/cover.1758648700.git.donettom@linux.ibm.com Link: https://lkml.kernel.org/r/7b9870eb67ccc0d79593940d9dbd4a0b39b5d396.1758648700.git.donettom@linux.ibm.com Fixes: 7609385337a4 ("ksm: count ksm merging pages for each process") Fixes: cb4df4cae4f2 ("ksm: count allocated ksm rmap_items for each process") Fixes: e2942062e01d ("ksm: count all zero pages placed by KSM") Signed-off-by: Donet Tom Reviewed-by: Chengming Zhou Acked-by: David Hildenbrand Cc: Aboorva Devarajan Cc: David Hildenbrand Cc: Donet Tom Cc: "Ritesh Harjani (IBM)" Cc: Wei Yang Cc: xu xin Cc: [6.6+] Signed-off-by: Andrew Morton --- include/linux/ksm.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 22e67ca7cba3..067538fc4d58 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -56,8 +56,14 @@ static inline long mm_ksm_zero_pages(struct mm_struct *mm) static inline void ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) { /* Adding mm to ksm is best effort on fork. */ - if (mm_flags_test(MMF_VM_MERGEABLE, oldmm)) + if (mm_flags_test(MMF_VM_MERGEABLE, oldmm)) { + long nr_ksm_zero_pages = atomic_long_read(&mm->ksm_zero_pages); + + mm->ksm_merging_pages = 0; + mm->ksm_rmap_items = 0; + atomic_long_add(nr_ksm_zero_pages, &ksm_zero_pages); __ksm_enter(mm); + } } static inline int ksm_execve(struct mm_struct *mm) -- cgit v1.2.3 From 989c2f55ca4839121cbf23b5802f8513dbd54e1e Mon Sep 17 00:00:00 2001 From: Lance Yang Date: Fri, 26 Sep 2025 17:24:26 +0800 Subject: mm: silence data-race in update_hiwater_rss KCSAN reports a data race on mm_cluster.hiwater_rss, which can be accessed concurrently from various paths like page migration and memory unmapping without synchronization. Since hiwater_rss is a statistical field for accounting purposes, this data race is benign. Annotate both the read and write accesses with data_race() to make KCSAN happy. Link: https://lkml.kernel.org/r/20250926092426.43312-1-lance.yang@linux.dev Signed-off-by: Lance Yang Reported-by: syzbot+60192c8877d0bc92a92b@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-mm/68d6364e.050a0220.3390a8.000d.GAE@google.com Acked-by: Vlastimil Babka Cc: David Hildenbrand Cc: Jann Horn Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Marco Elver Cc: Rik van Riel Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index fcb1e72eea40..06978b4dbeb8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2742,7 +2742,7 @@ static inline void update_hiwater_rss(struct mm_struct *mm) unsigned long _rss = get_mm_rss(mm); if (data_race(mm->hiwater_rss) < _rss) - (mm)->hiwater_rss = _rss; + data_race(mm->hiwater_rss = _rss); } static inline void update_hiwater_vm(struct mm_struct *mm) -- cgit v1.2.3 From 81e78b7ec61e89e8bab9736551839f79b063614c Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 23 Sep 2025 16:00:58 +0200 Subject: mm: convert folio_page() back to a macro In commit 73b3294b1152 ("mm: simplify folio_page() and folio_page_idx()") we converted folio_page() into a static inline function. However briefly afterwards in commit a847b17009ec ("mm: constify highmem related functions for improved const-correctness") we had to add some nasty const-away casting to make the compiler happy when checking const correctness. So let's just convert it back to a simple macro so the compiler can check const correctness properly. There is the alternative of using a _Generic() similar to page_folio(), but there is not a lot of benefit compared to just using a simple macro. Link: https://lkml.kernel.org/r/20250923140058.2020023-1-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Kiryl Shutsemau Reviewed-by: SeongJae Park Reviewed-by: Vishal Moola (Oracle) Reviewed-by: Dev Jain Reviewed-by: Suren Baghdasaryan Reviewed-by: Lance Yang Reviewed-by: Wei Yang Cc: Lorenzo Stoakes Cc: "Liam R. Howlett" Cc: Vlastimil Babka Cc: Mike Rapoport Cc: Michal Hocko Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 568011930e35..48e27768e7ba 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -316,10 +316,7 @@ static __always_inline unsigned long _compound_head(const struct page *page) * check that the page number lies within @folio; the caller is presumed * to have a reference to the page. */ -static inline struct page *folio_page(const struct folio *folio, unsigned long n) -{ - return (struct page *)(&folio->page + n); -} +#define folio_page(folio, n) (&(folio)->page + (n)) static __always_inline int PageTail(const struct page *page) { -- cgit v1.2.3 From c8a935a31bc787db52296944890f300ba9479088 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Tue, 23 Sep 2025 10:52:29 +0100 Subject: lib/string_choices: Add str_assert_deassert() helper Add str_assert_deassert() helper to return "assert" or "deassert" string literal depending on the boolean argument. Also add the inversed variant str_deassert_assert(). Suggested-by: Philipp Zabel Signed-off-by: Lad Prabhakar Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250923095229.2149740-1-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Kees Cook --- include/linux/string_choices.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/string_choices.h b/include/linux/string_choices.h index f3ba4f52ff26..6c4077be7742 100644 --- a/include/linux/string_choices.h +++ b/include/linux/string_choices.h @@ -17,6 +17,12 @@ #include +static inline const char *str_assert_deassert(bool v) +{ + return v ? "assert" : "deassert"; +} +#define str_deassert_assert(v) str_assert_deassert(!(v)) + static inline const char *str_enable_disable(bool v) { return v ? "enable" : "disable"; -- cgit v1.2.3 From 3c1ea5c5019ff197aca7e886a3a240c38f6c6f0d Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 3 Sep 2025 14:59:47 +0200 Subject: slab: sheaf prefilling for guaranteed allocations Add functions for efficient guaranteed allocations e.g. in a critical section that cannot sleep, when the exact number of allocations is not known beforehand, but an upper limit can be calculated. kmem_cache_prefill_sheaf() returns a sheaf containing at least given number of objects. kmem_cache_alloc_from_sheaf() will allocate an object from the sheaf and is guaranteed not to fail until depleted. kmem_cache_return_sheaf() is for giving the sheaf back to the slab allocator after the critical section. This will also attempt to refill it to cache's sheaf capacity for better efficiency of sheaves handling, but it's not stricly necessary to succeed. kmem_cache_refill_sheaf() can be used to refill a previously obtained sheaf to requested size. If the current size is sufficient, it does nothing. If the requested size exceeds cache's sheaf_capacity and the sheaf's current capacity, the sheaf will be replaced with a new one, hence the indirect pointer parameter. kmem_cache_sheaf_size() can be used to query the current size. The implementation supports requesting sizes that exceed cache's sheaf_capacity, but it is not efficient - such "oversize" sheaves are allocated fresh in kmem_cache_prefill_sheaf() and flushed and freed immediately by kmem_cache_return_sheaf(). kmem_cache_refill_sheaf() might be especially ineffective when replacing a sheaf with a new one of a larger capacity. It is therefore better to size cache's sheaf_capacity accordingly to make oversize sheaves exceptional. CONFIG_SLUB_STATS counters are added for sheaf prefill and return operations. A prefill or return is considered _fast when it is able to grab or return a percpu spare sheaf (even if the sheaf needs a refill to satisfy the request, as those should amortize over time), and _slow otherwise (when the barn or even sheaf allocation/freeing has to be involved). sheaf_prefill_oversize is provided to determine how many prefills were oversize (counter for oversize returns is not necessary as all oversize refills result in oversize returns). When slub_debug is enabled for a cache with sheaves, no percpu sheaves exist for it, but the prefill functionality is still provided simply by all prefilled sheaves becoming oversize. If percpu sheaves are not created for a cache due to not passing the sheaf_capacity argument on cache creation, the prefills also work through oversize sheaves, but there's a WARN_ON_ONCE() to indicate the omission. Reviewed-by: Suren Baghdasaryan Reviewed-by: Harry Yoo Signed-off-by: Vlastimil Babka --- include/linux/slab.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index 49acbcdc6696..680193356ac7 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -829,6 +829,22 @@ void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t flags, int node) __assume_slab_alignment __malloc; #define kmem_cache_alloc_node(...) alloc_hooks(kmem_cache_alloc_node_noprof(__VA_ARGS__)) +struct slab_sheaf * +kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size); + +int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp, + struct slab_sheaf **sheafp, unsigned int size); + +void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp, + struct slab_sheaf *sheaf); + +void *kmem_cache_alloc_from_sheaf_noprof(struct kmem_cache *cachep, gfp_t gfp, + struct slab_sheaf *sheaf) __assume_slab_alignment __malloc; +#define kmem_cache_alloc_from_sheaf(...) \ + alloc_hooks(kmem_cache_alloc_from_sheaf_noprof(__VA_ARGS__)) + +unsigned int kmem_cache_sheaf_size(struct slab_sheaf *sheaf); + /* * These macros allow declaring a kmem_buckets * parameter alongside size, which * can be compiled out with CONFIG_SLAB_BUCKETS=n so that a large number of call -- cgit v1.2.3 From 9b05890a25d9197e39fcf5b2298f0b911c323306 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Wed, 3 Sep 2025 15:00:01 +0200 Subject: maple_tree: Prefilled sheaf conversion and testing Use prefilled sheaves instead of bulk allocations. This should speed up the allocations and the return path of unused allocations. Remove the push and pop of nodes from the maple state as this is now handled by the slab layer with sheaves. Testing has been removed as necessary since the features of the tree have been reduced. Signed-off-by: Liam R. Howlett Reviewed-by: Suren Baghdasaryan Signed-off-by: Vlastimil Babka --- include/linux/maple_tree.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index bafe143b1f78..0e31b191e3be 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -442,7 +442,8 @@ struct ma_state { struct maple_enode *node; /* The node containing this entry */ unsigned long min; /* The minimum index of this node - implied pivot min */ unsigned long max; /* The maximum index of this node - implied pivot max */ - struct maple_alloc *alloc; /* Allocated nodes for this operation */ + struct slab_sheaf *sheaf; /* Allocated nodes for this operation */ + unsigned long node_request; /* The number of nodes to allocate for this operation */ enum maple_status status; /* The status of the state (active, start, none, etc) */ unsigned char depth; /* depth of tree descent during write */ unsigned char offset; @@ -490,7 +491,8 @@ struct ma_wr_state { .status = ma_start, \ .min = 0, \ .max = ULONG_MAX, \ - .alloc = NULL, \ + .node_request = 0, \ + .sheaf = NULL, \ .mas_flags = 0, \ .store_type = wr_invalid, \ } -- cgit v1.2.3 From 6bf377b06c08049d0f4042493df302285e45165e Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Wed, 3 Sep 2025 15:00:02 +0200 Subject: maple_tree: Add single node allocation support to maple state The fast path through a write will require replacing a single node in the tree. Using a sheaf (32 nodes) is too heavy for the fast path, so special case the node store operation by just allocating one node in the maple state. Signed-off-by: Liam R. Howlett Signed-off-by: Vlastimil Babka --- include/linux/maple_tree.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index 0e31b191e3be..51a64ff23b88 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -443,6 +443,7 @@ struct ma_state { unsigned long min; /* The minimum index of this node - implied pivot min */ unsigned long max; /* The maximum index of this node - implied pivot max */ struct slab_sheaf *sheaf; /* Allocated nodes for this operation */ + struct maple_node *alloc; /* A single allocated node for fast path writes */ unsigned long node_request; /* The number of nodes to allocate for this operation */ enum maple_status status; /* The status of the state (active, start, none, etc) */ unsigned char depth; /* depth of tree descent during write */ @@ -491,8 +492,9 @@ struct ma_wr_state { .status = ma_start, \ .min = 0, \ .max = ULONG_MAX, \ - .node_request = 0, \ .sheaf = NULL, \ + .alloc = NULL, \ + .node_request = 0, \ .mas_flags = 0, \ .store_type = wr_invalid, \ } -- cgit v1.2.3 From 4957089a23f41f31f8e7e22802a8ef9f5789c191 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 8 Sep 2025 18:00:02 -0700 Subject: locking/local_lock: Introduce local_lock_is_locked(). Introduce local_lock_is_locked() that returns true when given local_lock is locked by current cpu (in !PREEMPT_RT) or by current task (in PREEMPT_RT). The goal is to detect a deadlock by the caller. Reviewed-by: Sebastian Andrzej Siewior Signed-off-by: Alexei Starovoitov Signed-off-by: Vlastimil Babka --- include/linux/local_lock.h | 2 ++ include/linux/local_lock_internal.h | 7 +++++++ include/linux/rtmutex.h | 10 ++++++++++ 3 files changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h index 2ba846419524..0d91d060e3e9 100644 --- a/include/linux/local_lock.h +++ b/include/linux/local_lock.h @@ -66,6 +66,8 @@ */ #define local_trylock(lock) __local_trylock(this_cpu_ptr(lock)) +#define local_lock_is_locked(lock) __local_lock_is_locked(lock) + /** * local_trylock_irqsave - Try to acquire a per CPU local lock, save and disable * interrupts if acquired diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h index 949de37700db..a4dc479157b5 100644 --- a/include/linux/local_lock_internal.h +++ b/include/linux/local_lock_internal.h @@ -165,6 +165,9 @@ do { \ !!tl; \ }) +/* preemption or migration must be disabled before calling __local_lock_is_locked */ +#define __local_lock_is_locked(lock) READ_ONCE(this_cpu_ptr(lock)->acquired) + #define __local_lock_release(lock) \ do { \ local_trylock_t *tl; \ @@ -285,4 +288,8 @@ do { \ __local_trylock(lock); \ }) +/* migration must be disabled before calling __local_lock_is_locked */ +#define __local_lock_is_locked(__lock) \ + (rt_mutex_owner(&this_cpu_ptr(__lock)->lock) == current) + #endif /* CONFIG_PREEMPT_RT */ diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h index fa9f1021541e..ede4c6bf6f22 100644 --- a/include/linux/rtmutex.h +++ b/include/linux/rtmutex.h @@ -44,6 +44,16 @@ static inline bool rt_mutex_base_is_locked(struct rt_mutex_base *lock) return READ_ONCE(lock->owner) != NULL; } +#ifdef CONFIG_RT_MUTEXES +#define RT_MUTEX_HAS_WAITERS 1UL + +static inline struct task_struct *rt_mutex_owner(struct rt_mutex_base *lock) +{ + unsigned long owner = (unsigned long) READ_ONCE(lock->owner); + + return (struct task_struct *) (owner & ~RT_MUTEX_HAS_WAITERS); +} +#endif extern void rt_mutex_base_init(struct rt_mutex_base *rtb); /** -- cgit v1.2.3 From 99253de51f80acccc528a9c94e2f4d5f329071f1 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 8 Sep 2025 18:00:03 -0700 Subject: mm: Allow GFP_ACCOUNT to be used in alloc_pages_nolock(). Change alloc_pages_nolock() to default to __GFP_COMP when allocating pages, since upcoming reentrant alloc_slab_page() needs __GFP_COMP. Also allow __GFP_ACCOUNT flag to be specified, since most of BPF infra needs __GFP_ACCOUNT except BPF streams. Reviewed-by: Vlastimil Babka Signed-off-by: Alexei Starovoitov Reviewed-by: Shakeel Butt Reviewed-by: Harry Yoo Signed-off-by: Vlastimil Babka --- include/linux/gfp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 5ebf26fcdcfa..0ceb4e09306c 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -354,7 +354,7 @@ static inline struct page *alloc_page_vma_noprof(gfp_t gfp, } #define alloc_page_vma(...) alloc_hooks(alloc_page_vma_noprof(__VA_ARGS__)) -struct page *alloc_pages_nolock_noprof(int nid, unsigned int order); +struct page *alloc_pages_nolock_noprof(gfp_t gfp_flags, int nid, unsigned int order); #define alloc_pages_nolock(...) alloc_hooks(alloc_pages_nolock_noprof(__VA_ARGS__)) extern unsigned long get_free_pages_noprof(gfp_t gfp_mask, unsigned int order); -- cgit v1.2.3 From 7612833192d56af86061de8ab51989b75daf5b0d Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 8 Sep 2025 18:00:06 -0700 Subject: slab: Reuse first bit for OBJEXTS_ALLOC_FAIL Since the combination of valid upper bits in slab->obj_exts with OBJEXTS_ALLOC_FAIL bit can never happen, use OBJEXTS_ALLOC_FAIL == (1ull << 0) as a magic sentinel instead of (1ull << 2) to free up bit 2. Signed-off-by: Alexei Starovoitov Acked-by: Shakeel Butt Reviewed-by: Harry Yoo Signed-off-by: Vlastimil Babka --- include/linux/memcontrol.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 785173aa0739..d254c0b96d0d 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -341,17 +341,23 @@ enum page_memcg_data_flags { __NR_MEMCG_DATA_FLAGS = (1UL << 2), }; +#define __OBJEXTS_ALLOC_FAIL MEMCG_DATA_OBJEXTS #define __FIRST_OBJEXT_FLAG __NR_MEMCG_DATA_FLAGS #else /* CONFIG_MEMCG */ +#define __OBJEXTS_ALLOC_FAIL (1UL << 0) #define __FIRST_OBJEXT_FLAG (1UL << 0) #endif /* CONFIG_MEMCG */ enum objext_flags { - /* slabobj_ext vector failed to allocate */ - OBJEXTS_ALLOC_FAIL = __FIRST_OBJEXT_FLAG, + /* + * Use bit 0 with zero other bits to signal that slabobj_ext vector + * failed to allocate. The same bit 0 with valid upper bits means + * MEMCG_DATA_OBJEXTS. + */ + OBJEXTS_ALLOC_FAIL = __OBJEXTS_ALLOC_FAIL, /* the next bit after the last actual flag */ __NR_OBJEXTS_FLAGS = (__FIRST_OBJEXT_FLAG << 1), }; -- cgit v1.2.3 From af92793e52c3a99b828ed4bdd277fd3e11c18d08 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 8 Sep 2025 18:00:07 -0700 Subject: slab: Introduce kmalloc_nolock() and kfree_nolock(). kmalloc_nolock() relies on ability of local_trylock_t to detect the situation when per-cpu kmem_cache is locked. In !PREEMPT_RT local_(try)lock_irqsave(&s->cpu_slab->lock, flags) disables IRQs and marks s->cpu_slab->lock as acquired. local_lock_is_locked(&s->cpu_slab->lock) returns true when slab is in the middle of manipulating per-cpu cache of that specific kmem_cache. kmalloc_nolock() can be called from any context and can re-enter into ___slab_alloc(): kmalloc() -> ___slab_alloc(cache_A) -> irqsave -> NMI -> bpf -> kmalloc_nolock() -> ___slab_alloc(cache_B) or kmalloc() -> ___slab_alloc(cache_A) -> irqsave -> tracepoint/kprobe -> bpf -> kmalloc_nolock() -> ___slab_alloc(cache_B) Hence the caller of ___slab_alloc() checks if &s->cpu_slab->lock can be acquired without a deadlock before invoking the function. If that specific per-cpu kmem_cache is busy the kmalloc_nolock() retries in a different kmalloc bucket. The second attempt will likely succeed, since this cpu locked different kmem_cache. Similarly, in PREEMPT_RT local_lock_is_locked() returns true when per-cpu rt_spin_lock is locked by current _task_. In this case re-entrance into the same kmalloc bucket is unsafe, and kmalloc_nolock() tries a different bucket that is most likely is not locked by the current task. Though it may be locked by a different task it's safe to rt_spin_lock() and sleep on it. Similar to alloc_pages_nolock() the kmalloc_nolock() returns NULL immediately if called from hard irq or NMI in PREEMPT_RT. kfree_nolock() defers freeing to irq_work when local_lock_is_locked() and (in_nmi() or in PREEMPT_RT). SLUB_TINY config doesn't use local_lock_is_locked() and relies on spin_trylock_irqsave(&n->list_lock) to allocate, while kfree_nolock() always defers to irq_work. Note, kfree_nolock() must be called _only_ for objects allocated with kmalloc_nolock(). Debug checks (like kmemleak and kfence) were skipped on allocation, hence obj = kmalloc(); kfree_nolock(obj); will miss kmemleak/kfence book keeping and will cause false positives. large_kmalloc is not supported by either kmalloc_nolock() or kfree_nolock(). Signed-off-by: Alexei Starovoitov Reviewed-by: Harry Yoo Signed-off-by: Vlastimil Babka --- include/linux/kasan.h | 13 ++++++++----- include/linux/memcontrol.h | 2 ++ include/linux/slab.h | 4 ++++ 3 files changed, 14 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 890011071f2b..acdc8cb0152e 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -200,7 +200,7 @@ static __always_inline bool kasan_slab_pre_free(struct kmem_cache *s, } bool __kasan_slab_free(struct kmem_cache *s, void *object, bool init, - bool still_accessible); + bool still_accessible, bool no_quarantine); /** * kasan_slab_free - Poison, initialize, and quarantine a slab object. * @object: Object to be freed. @@ -226,11 +226,13 @@ bool __kasan_slab_free(struct kmem_cache *s, void *object, bool init, * @Return true if KASAN took ownership of the object; false otherwise. */ static __always_inline bool kasan_slab_free(struct kmem_cache *s, - void *object, bool init, - bool still_accessible) + void *object, bool init, + bool still_accessible, + bool no_quarantine) { if (kasan_enabled()) - return __kasan_slab_free(s, object, init, still_accessible); + return __kasan_slab_free(s, object, init, still_accessible, + no_quarantine); return false; } @@ -427,7 +429,8 @@ static inline bool kasan_slab_pre_free(struct kmem_cache *s, void *object) } static inline bool kasan_slab_free(struct kmem_cache *s, void *object, - bool init, bool still_accessible) + bool init, bool still_accessible, + bool no_quarantine) { return false; } diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index d254c0b96d0d..82563236f35c 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -358,6 +358,8 @@ enum objext_flags { * MEMCG_DATA_OBJEXTS. */ OBJEXTS_ALLOC_FAIL = __OBJEXTS_ALLOC_FAIL, + /* slabobj_ext vector allocated with kmalloc_nolock() */ + OBJEXTS_NOSPIN_ALLOC = __FIRST_OBJEXT_FLAG, /* the next bit after the last actual flag */ __NR_OBJEXTS_FLAGS = (__FIRST_OBJEXT_FLAG << 1), }; diff --git a/include/linux/slab.h b/include/linux/slab.h index 680193356ac7..561597dd2164 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -501,6 +501,7 @@ void * __must_check krealloc_noprof(const void *objp, size_t new_size, #define krealloc(...) alloc_hooks(krealloc_noprof(__VA_ARGS__)) void kfree(const void *objp); +void kfree_nolock(const void *objp); void kfree_sensitive(const void *objp); size_t __ksize(const void *objp); @@ -957,6 +958,9 @@ static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t f } #define kmalloc(...) alloc_hooks(kmalloc_noprof(__VA_ARGS__)) +void *kmalloc_nolock_noprof(size_t size, gfp_t gfp_flags, int node); +#define kmalloc_nolock(...) alloc_hooks(kmalloc_nolock_noprof(__VA_ARGS__)) + #define kmem_buckets_alloc(_b, _size, _flags) \ alloc_hooks(__kmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE)) -- cgit v1.2.3 From 9a0abc39450a3123fd52533a662fbd37e0d1508c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 26 Sep 2025 17:47:14 +0200 Subject: PM: runtime: Add auto-cleanup macros for "resume and get" operations It is generally useful to be able to automatically drop a device's runtime PM usage counter incremented by runtime PM operations that resume a device and bump up its usage counter [1]. To that end, add guard definition macros allowing pm_runtime_put() and pm_runtime_put_autosuspend() to be used for the auto-cleanup in those cases. Simply put, a piece of code like below: pm_runtime_get_sync(dev); ..... pm_runtime_put(dev); return 0; can be transformed with guard() like: guard(pm_runtime_active)(dev); ..... return 0; (see the pm_runtime_put() call is gone). However, it is better to do proper error handling in the majority of cases, so doing something like this instead of the above is recommended: ACQUIRE(pm_runtime_active_try, pm)(dev); if (ACQUIRE_ERR(pm_runtime_active_try, &pm)) return -ENXIO; ..... return 0; In all of the cases in which runtime PM is known to be enabled for the given device or the device can be regarded as operational (and so it can be accessed) with runtime PM disabled, a piece of code like: ret = pm_runtime_resume_and_get(dev); if (ret < 0) return ret; ..... pm_runtime_put(dev); return 0; can be changed as follows: ACQUIRE(pm_runtime_active_try, pm)(dev); ret = ACQUIRE_ERR(pm_runtime_active_try, &pm); if (ret < 0) return ret; ..... return 0; (again, see the pm_runtime_put() call is gone). Still, if the device cannot be accessed unless runtime PM has been enabled for it, the pm_runtime_active_try_enabled guard variant needs to be used, that is (in the context of the example above): ACQUIRE(pm_runtime_active_try_enabled, pm)(dev); ret = ACQUIRE_ERR(pm_runtime_active_try_enabled, &pm); if (ret < 0) return ret; ..... return 0; When the original code calls pm_runtime_put_autosuspend(), use one of the "auto" guard variants, pm_runtime_active_auto/_try/_enabled, so for example, a piece of code like: ret = pm_runtime_resume_and_get(dev); if (ret < 0) return ret; ..... pm_runtime_put_autosuspend(dev); return 0; will become: ACQUIRE(pm_runtime_active_auto_try_enabled, pm)(dev); ret = ACQUIRE_ERR(pm_runtime_active_auto_try_enabled, &pm); if (ret < 0) return ret; ..... return 0; Note that the cases in which the return value of pm_runtime_get_sync() is checked can also be handled with the help of the new guard macros. For example, a piece of code like: ret = pm_runtime_get_sync(dev); if (ret < 0) { pm_runtime_put(dev); return ret; } ..... pm_runtime_put(dev); return 0; can be rewritten as: ACQUIRE(pm_runtime_active_auto_try_enabled, pm)(dev); ret = ACQUIRE_ERR(pm_runtime_active_auto_try_enabled, &pm); if (ret < 0) return ret; ..... return 0; or pm_runtime_get_active_try can be used if transparent handling of disabled runtime PM is desirable. Link: https://lore.kernel.org/linux-pm/878qimv24u.wl-tiwai@suse.de/ [1] Link: https://lore.kernel.org/linux-pm/20250926150613.000073a4@huawei.com/ Signed-off-by: Rafael J. Wysocki Acked-by: Dan Williams Reviewed-by: Takashi Iwai Link: https://patch.msgid.link/2238241.irdbgypaU6@rafael.j.wysocki [ rjw: Fixed leftovers from the previous version in the changelog ] Reviewed-by: Jonathan Cameron Reviewed-by: Dhruva Gole Signed-off-by: Rafael J. Wysocki --- include/linux/pm_runtime.h | 44 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index d1ff76e0e2d0..e5426bdd0c9f 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -21,6 +21,7 @@ #define RPM_GET_PUT 0x04 /* Increment/decrement the usage_count */ #define RPM_AUTO 0x08 /* Use autosuspend_delay */ +#define RPM_TRANSPARENT 0x10 /* Succeed if runtime PM is disabled */ /* * Use this for defining a set of PM operations to be used in all situations @@ -512,6 +513,19 @@ static inline int pm_runtime_get_sync(struct device *dev) return __pm_runtime_resume(dev, RPM_GET_PUT); } +static inline int pm_runtime_get_active(struct device *dev, int rpmflags) +{ + int ret; + + ret = __pm_runtime_resume(dev, RPM_GET_PUT | rpmflags); + if (ret < 0) { + pm_runtime_put_noidle(dev); + return ret; + } + + return 0; +} + /** * pm_runtime_resume_and_get - Bump up usage counter of a device and resume it. * @dev: Target device. @@ -522,15 +536,7 @@ static inline int pm_runtime_get_sync(struct device *dev) */ static inline int pm_runtime_resume_and_get(struct device *dev) { - int ret; - - ret = __pm_runtime_resume(dev, RPM_GET_PUT); - if (ret < 0) { - pm_runtime_put_noidle(dev); - return ret; - } - - return 0; + return pm_runtime_get_active(dev, 0); } /** @@ -610,6 +616,26 @@ static inline int pm_runtime_put_autosuspend(struct device *dev) return __pm_runtime_put_autosuspend(dev); } +DEFINE_GUARD(pm_runtime_active, struct device *, + pm_runtime_get_sync(_T), pm_runtime_put(_T)); +DEFINE_GUARD(pm_runtime_active_auto, struct device *, + pm_runtime_get_sync(_T), pm_runtime_put_autosuspend(_T)); +/* + * Use the following guards with ACQUIRE()/ACQUIRE_ERR(). + * + * The difference between the "_try" and "_try_enabled" variants is that the + * former do not produce an error when runtime PM is disabled for the given + * device. + */ +DEFINE_GUARD_COND(pm_runtime_active, _try, + pm_runtime_get_active(_T, RPM_TRANSPARENT)) +DEFINE_GUARD_COND(pm_runtime_active, _try_enabled, + pm_runtime_resume_and_get(_T)) +DEFINE_GUARD_COND(pm_runtime_active_auto, _try, + pm_runtime_get_active(_T, RPM_TRANSPARENT)) +DEFINE_GUARD_COND(pm_runtime_active_auto, _try_enabled, + pm_runtime_resume_and_get(_T)) + /** * pm_runtime_put_sync - Drop device usage counter and run "idle check" if 0. * @dev: Target device. -- cgit v1.2.3 From d5e58ce1fb0f13a9a0845851f267ede3551cd9fe Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 26 Sep 2025 18:26:40 +0200 Subject: PM: runtime: Drop DEFINE_FREE() for pm_runtime_put() The DEFINE_FREE() for pm_runtime_put has been superseded by recently introduced runtime PM auto-cleanup macros and its only user has been converted to using one of the new macros, so drop it. Signed-off-by: Rafael J. Wysocki Reviewed-by: Dhruva Gole Reviewed-by: Takashi Iwai Reviewed-by: Jonathan Cameron --- include/linux/pm_runtime.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index e5426bdd0c9f..edb8aed5ef62 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -563,8 +563,6 @@ static inline int pm_runtime_put(struct device *dev) return __pm_runtime_idle(dev, RPM_GET_PUT | RPM_ASYNC); } -DEFINE_FREE(pm_runtime_put, struct device *, if (_T) pm_runtime_put(_T)) - /** * __pm_runtime_put_autosuspend - Drop device usage counter and queue autosuspend if 0. * @dev: Target device. -- cgit v1.2.3 From c39d6d4d933381714b6e5d735545256558ec6c05 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sat, 27 Sep 2025 08:29:35 -0400 Subject: ptr_ring: __ptr_ring_zero_tail micro optimization __ptr_ring_zero_tail currently does the - 1 operation twice: - during initialization of head - at each loop iteration Let's just do it in one place, all we need to do is adjust the loop condition. this is better: - a slightly clearer logic with less duplication - uses prefix -- we don't need to save the old value - one less - 1 operation - for example, when ring is empty we now don't do - 1 at all, existing code does it once Text size shrinks from 15081 to 15050 bytes. Signed-off-by: Michael S. Tsirkin Reviewed-by: Simon Horman Link: https://patch.msgid.link/bcd630c7edc628e20d4f8e037341f26c90ab4365.1758976026.git.mst@redhat.com Signed-off-by: Jakub Kicinski --- include/linux/ptr_ring.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index a736b16859a6..534531807d95 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -248,15 +248,15 @@ static inline bool ptr_ring_empty_bh(struct ptr_ring *r) */ static inline void __ptr_ring_zero_tail(struct ptr_ring *r, int consumer_head) { - int head = consumer_head - 1; + int head = consumer_head; /* Zero out entries in the reverse order: this way we touch the * cache line that producer might currently be reading the last; * producer won't make progress and touch other cache lines * besides the first one until we write out all entries. */ - while (likely(head >= r->consumer_tail)) - r->queue[head--] = NULL; + while (likely(head > r->consumer_tail)) + r->queue[--head] = NULL; r->consumer_tail = consumer_head; } -- cgit v1.2.3 From a7556779745c047efb7b0ce8732889b0cdc80936 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Sat, 27 Sep 2025 11:40:38 +0200 Subject: tcp: make tcp_rcvbuf_grow() accessible to mptcp code To leverage the auto-tuning improvements brought by commit 2da35e4b4df9 ("Merge branch 'tcp-receive-side-improvements'"), the MPTCP stack need to access the mentioned helper. Acked-by: Geliang Tang Acked-by: Matthieu Baerts (NGI0) Signed-off-by: Paolo Abeni Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250927-net-next-mptcp-rcv-path-imp-v1-2-5da266aa9c1a@kernel.org Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 7c51a0a5ace8..5ca230ed526a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -370,6 +370,7 @@ void tcp_delack_timer_handler(struct sock *sk); int tcp_ioctl(struct sock *sk, int cmd, int *karg); enum skb_drop_reason tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb); void tcp_rcv_established(struct sock *sk, struct sk_buff *skb); +void tcp_rcvbuf_grow(struct sock *sk); void tcp_rcv_space_adjust(struct sock *sk); int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp); void tcp_twsk_destructor(struct sock *sk); -- cgit v1.2.3 From 7d452516b67add4a53e63bfa496d8df930a66b9a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 29 Sep 2025 18:21:12 +0000 Subject: Revert "net: group sk_backlog and sk_receive_queue" This reverts commit 4effb335b5dab08cb6e2c38d038910f8b527cfc9. This was a benefit for UDP flood case, which was later greatly improved with commits 6471658dc66c ("udp: use skb_attempt_defer_free()") and b650bf0977d3 ("udp: remove busylock and add per NUMA queues"). Apparently blamed commit added a regression for RAW sockets, possibly because they do not use the dual RX queue strategy that UDP has. sock_queue_rcv_skb_reason() and RAW recvmsg() compete for sk_receive_buf and sk_rmem_alloc changes, and them being in the same cache line reduce performance. Fixes: 4effb335b5da ("net: group sk_backlog and sk_receive_queue") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202509281326.f605b4eb-lkp@intel.com Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Cc: David Ahern Cc: Kuniyuki Iwashima Link: https://patch.msgid.link/20250929182112.824154-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 8c5b64f41ab7..60bcb13f045c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -395,6 +395,7 @@ struct sock { atomic_t sk_drops; __s32 sk_peek_off; + struct sk_buff_head sk_error_queue; struct sk_buff_head sk_receive_queue; /* * The backlog queue is special, it is always used with @@ -412,7 +413,6 @@ struct sock { } sk_backlog; #define sk_rmem_alloc sk_backlog.rmem_alloc - struct sk_buff_head sk_error_queue; __cacheline_group_end(sock_write_rx); __cacheline_group_begin(sock_read_rx); -- cgit v1.2.3 From a680581f6a131fd8c62d284ed4a24d4bc1cc553e Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Sat, 27 Sep 2025 10:49:10 +0200 Subject: dpll: add phase-offset-avg-factor device attribute to netlink spec Add dpll device level attribute DPLL_A_PHASE_OFFSET_AVG_FACTOR to allow control over a calculation of reported phase offset value. Attribute is present, if the driver provides such capability, otherwise attribute shall not be present. Signed-off-by: Ivan Vecera Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20250927084912.2343597-2-ivecera@redhat.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/dpll.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/dpll.h b/include/uapi/linux/dpll.h index 37b438ce8efc..ab1725a954d7 100644 --- a/include/uapi/linux/dpll.h +++ b/include/uapi/linux/dpll.h @@ -216,6 +216,7 @@ enum dpll_a { DPLL_A_LOCK_STATUS_ERROR, DPLL_A_CLOCK_QUALITY_LEVEL, DPLL_A_PHASE_OFFSET_MONITOR, + DPLL_A_PHASE_OFFSET_AVG_FACTOR, __DPLL_A_MAX, DPLL_A_MAX = (__DPLL_A_MAX - 1) -- cgit v1.2.3 From e28d5a68b6519ec6b2118a3f604295b5534eeb51 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Sat, 27 Sep 2025 10:49:11 +0200 Subject: dpll: add phase_offset_avg_factor_get/set callback ops Add new callback operations for a dpll device: - phase_offset_avg_factor_get(...) - to obtain current phase offset averaging factor from dpll device, - phase_offset_avg_factor_set(...) - to set phase offset averaging factor Obtain the factor value using the get callback and provide it to the user if the device driver implement this callback. Execute the set callback upon user requests, if the driver implement it. Signed-off-by: Ivan Vecera v2: * do not require 'set' callback to retrieve current value * always call 'set' callback regardless of current value Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20250927084912.2343597-3-ivecera@redhat.com Signed-off-by: Jakub Kicinski --- include/linux/dpll.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/dpll.h b/include/linux/dpll.h index fa1e76920d0e..25be745bf41f 100644 --- a/include/linux/dpll.h +++ b/include/linux/dpll.h @@ -38,6 +38,12 @@ struct dpll_device_ops { void *dpll_priv, enum dpll_feature_state *state, struct netlink_ext_ack *extack); + int (*phase_offset_avg_factor_set)(const struct dpll_device *dpll, + void *dpll_priv, u32 factor, + struct netlink_ext_ack *extack); + int (*phase_offset_avg_factor_get)(const struct dpll_device *dpll, + void *dpll_priv, u32 *factor, + struct netlink_ext_ack *extack); }; struct dpll_pin_ops { -- cgit v1.2.3 From 7bd80ed89d72285515db673803b021469ba71ee8 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 24 Sep 2025 14:02:41 +0200 Subject: Documentation: net: add flow control guide and document ethtool API Introduce a new document, flow_control.rst, to provide a comprehensive guide on Ethernet Flow Control in Linux. The guide explains how flow control works, how autonegotiation resolves pause capabilities, and how to configure it using ethtool and Netlink. In parallel, document the pause and pause-stat attributes in the ethtool.yaml netlink spec. This enables the ynl tool to generate kernel-doc comments for the corresponding enums in the UAPI header, making the C interface self-documenting. Finally, replace the legacy flow control section in phy.rst with a reference to the new document and add pointers in the relevant C source files. Signed-off-by: Oleksij Rempel Link: https://patch.msgid.link/20250924120241.724850-1-o.rempel@pengutronix.de Signed-off-by: Paolo Abeni --- include/linux/ethtool.h | 45 ++++++++++++++++++++++++-- include/uapi/linux/ethtool_netlink_generated.h | 4 +-- 2 files changed, 44 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index c2d8b4ec62eb..eeed1ea50369 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -953,9 +953,48 @@ struct kernel_ethtool_ts_info { * @get_pause_stats: Report pause frame statistics. Drivers must not zero * statistics which they don't report. The stats structure is initialized * to ETHTOOL_STAT_NOT_SET indicating driver does not report statistics. - * @get_pauseparam: Report pause parameters - * @set_pauseparam: Set pause parameters. Returns a negative error code - * or zero. + * + * @get_pauseparam: Report the configured policy for link-wide PAUSE + * (IEEE 802.3 Annex 31B). Drivers must fill struct ethtool_pauseparam + * such that: + * @autoneg: + * This refers to **Pause Autoneg** (IEEE 802.3 Annex 31B) only + * and is independent of generic link autonegotiation configured + * via ethtool -s. + * true -> the device follows the negotiated result of pause + * autonegotiation (Pause/Asym); + * false -> the device uses a forced MAC state independent of + * negotiation. + * @rx_pause/@tx_pause: + * represent the desired policy (preferred configuration). + * In autoneg mode they describe what is to be advertised; + * in forced mode they describe the MAC state to apply. + * + * Drivers (and/or frameworks) should persist this policy across link + * changes and reapply appropriate MAC programming when link parameters + * change. + * + * @set_pauseparam: Apply a policy for link-wide PAUSE (IEEE 802.3 Annex 31B). + * If @autoneg is true: + * Arrange for pause advertisement (Pause/Asym) based on + * @rx_pause/@tx_pause and program the MAC to follow the + * negotiated result (which may be symmetric, asymmetric, or off + * depending on the link partner). + * If @autoneg is false: + * Do not rely on autonegotiation; force the MAC RX/TX pause + * state directly per @rx_pause/@tx_pause. + * + * Implementations that integrate with PHYLIB/PHYLINK should cooperate + * with those frameworks for advertisement and resolution; MAC drivers are + * still responsible for applying the required MAC state. + * + * Return: 0 on success or a negative errno. Return -EOPNOTSUPP if + * link-wide PAUSE is unsupported. If only symmetric pause is supported, + * reject unsupported asymmetric requests with -EINVAL (or document any + * coercion policy). + * + * See also: Documentation/networking/flow_control.rst + * * @self_test: Run specified self-tests * @get_strings: Return a set of strings that describe the requested objects * @set_phys_id: Identify the physical devices, e.g. by flashing an LED diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index 0e8ac0d974e2..3dd9d7cde86e 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -375,7 +375,7 @@ enum { ETHTOOL_A_COALESCE_MAX = (__ETHTOOL_A_COALESCE_CNT - 1) }; -enum { +enum ethtool_a_pause_stat { ETHTOOL_A_PAUSE_STAT_UNSPEC, ETHTOOL_A_PAUSE_STAT_PAD, ETHTOOL_A_PAUSE_STAT_TX_FRAMES, @@ -385,7 +385,7 @@ enum { ETHTOOL_A_PAUSE_STAT_MAX = (__ETHTOOL_A_PAUSE_STAT_CNT - 1) }; -enum { +enum ethtool_a_pause { ETHTOOL_A_PAUSE_UNSPEC, ETHTOOL_A_PAUSE_HEADER, ETHTOOL_A_PAUSE_AUTONEG, -- cgit v1.2.3 From 5b66169f6be4847008c0aea50885ff0632151479 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 25 Sep 2025 02:33:03 +0000 Subject: bonding: fix xfrm offload feature setup on active-backup mode The active-backup bonding mode supports XFRM ESP offload. However, when a bond is added using command like `ip link add bond0 type bond mode 1 miimon 100`, the `ethtool -k` command shows that the XFRM ESP offload is disabled. This occurs because, in bond_newlink(), we change bond link first and register bond device later. So the XFRM feature update in bond_option_mode_set() is not called as the bond device is not yet registered, leading to the offload feature not being set successfully. To resolve this issue, we can modify the code order in bond_newlink() to ensure that the bond device is registered first before changing the bond link parameters. This change will allow the XFRM ESP offload feature to be correctly enabled. Fixes: 007ab5345545 ("bonding: fix feature flag setting at init time") Signed-off-by: Hangbin Liu Link: https://patch.msgid.link/20250925023304.472186-1-liuhangbin@gmail.com Signed-off-by: Paolo Abeni --- include/net/bonding.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/bonding.h b/include/net/bonding.h index e06f0d63b2c1..bd56ad976cfb 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -711,6 +711,7 @@ struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev, int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave); void bond_slave_arr_work_rearm(struct bonding *bond, unsigned long delay); void bond_work_init_all(struct bonding *bond); +void bond_work_cancel_all(struct bonding *bond); #ifdef CONFIG_PROC_FS void bond_create_proc_entry(struct bonding *bond); -- cgit v1.2.3 From e211c463b748c4e2e8364c10bc216ca775fcc943 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 27 Sep 2025 21:52:30 +0200 Subject: net: phy: stop exporting phy_driver_unregister After 42e2a9e11a1d ("net: phy: dp83640: improve phydev and driver removal handling") we can stop exporting also phy_driver_unregister(). Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/2bab950e-4b70-4030-b997-03f48379586f@gmail.com Signed-off-by: Paolo Abeni --- include/linux/phy.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index b377dfaa6801..7a54a8b4d277 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -2030,7 +2030,6 @@ static inline int phy_read_status(struct phy_device *phydev) return genphy_read_status(phydev); } -void phy_driver_unregister(struct phy_driver *drv); void phy_drivers_unregister(struct phy_driver *drv, int n); int phy_drivers_register(struct phy_driver *new_driver, int n, struct module *owner); -- cgit v1.2.3 From 9c94ae6bb0b2895024b6e29fcc1cbec968b4776a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 28 Sep 2025 08:49:32 +0000 Subject: net: make softnet_data.defer_count an atomic This is preparation work to remove the softnet_data.defer_lock, as it is contended on hosts with large number of cores. Signed-off-by: Eric Dumazet Reviewed-by: Jason Xing Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250928084934.3266948-2-edumazet@google.com Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1b85454116f6..27e3fa69253f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3538,7 +3538,7 @@ struct softnet_data { /* Another possibly contended cache line */ spinlock_t defer_lock ____cacheline_aligned_in_smp; - int defer_count; + atomic_t defer_count; int defer_ipi_scheduled; struct sk_buff *defer_list; call_single_data_t defer_csd; -- cgit v1.2.3 From 844c9db7f7f5fe1b0b53ed9f1c2bc7313b3021c8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 28 Sep 2025 08:49:33 +0000 Subject: net: use llist for sd->defer_list Get rid of sd->defer_lock and adopt llist operations. We optimize skb_attempt_defer_free() for the common case, where the packet is queued. Otherwise sd->defer_count is increasing, until skb_defer_free_flush() clears it. Signed-off-by: Eric Dumazet Reviewed-by: Jason Xing Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250928084934.3266948-3-edumazet@google.com Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 27e3fa69253f..5c9aa16933d1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3537,10 +3537,10 @@ struct softnet_data { struct numa_drop_counters drop_counters; /* Another possibly contended cache line */ - spinlock_t defer_lock ____cacheline_aligned_in_smp; - atomic_t defer_count; - int defer_ipi_scheduled; - struct sk_buff *defer_list; + struct llist_head defer_list ____cacheline_aligned_in_smp; + atomic_long_t defer_count; + + int defer_ipi_scheduled ____cacheline_aligned_in_smp; call_single_data_t defer_csd; }; -- cgit v1.2.3 From 5628f3fe3b16114e8424bbfcf0594caef8958a06 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 28 Sep 2025 08:49:34 +0000 Subject: net: add NUMA awareness to skb_attempt_defer_free() Instead of sharing sd->defer_list & sd->defer_count with many cpus, add one pair for each NUMA node. Signed-off-by: Eric Dumazet Reviewed-by: Jason Xing Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250928084934.3266948-4-edumazet@google.com Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 4 ---- include/net/hotdata.h | 7 +++++++ 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5c9aa16933d1..d1a687444b27 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3536,10 +3536,6 @@ struct softnet_data { struct numa_drop_counters drop_counters; - /* Another possibly contended cache line */ - struct llist_head defer_list ____cacheline_aligned_in_smp; - atomic_long_t defer_count; - int defer_ipi_scheduled ____cacheline_aligned_in_smp; call_single_data_t defer_csd; }; diff --git a/include/net/hotdata.h b/include/net/hotdata.h index fda94b2647ff..4acec191c54a 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -2,10 +2,16 @@ #ifndef _NET_HOTDATA_H #define _NET_HOTDATA_H +#include #include #include #include +struct skb_defer_node { + struct llist_head defer_list; + atomic_long_t defer_count; +} ____cacheline_aligned_in_smp; + /* Read mostly data used in network fast paths. */ struct net_hotdata { #if IS_ENABLED(CONFIG_INET) @@ -30,6 +36,7 @@ struct net_hotdata { struct rps_sock_flow_table __rcu *rps_sock_flow_table; u32 rps_cpu_mask; #endif + struct skb_defer_node __percpu *skb_defer_nodes; int gro_normal_batch; int netdev_budget; int netdev_budget_usecs; -- cgit v1.2.3 From 20c48920583675e67b3824f147726e0fbda735ce Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 18 Sep 2025 17:33:00 -0700 Subject: KVM: Export KVM-internal symbols for sub-modules only Rework the vast majority of KVM's exports to expose symbols only to KVM submodules, i.e. to x86's kvm-{amd,intel}.ko and PPC's kvm-{pr,hv}.ko. With few exceptions, KVM's exported APIs are intended (and safe) for KVM- internal usage only. Keep kvm_get_kvm(), kvm_get_kvm_safe(), and kvm_put_kvm() as normal exports, as they are needed by VFIO, and are generally safe for external usage (though ideally even the get/put APIs would be KVM-internal, and VFIO would pin a VM by grabbing a reference to its associated file). Implement a framework in kvm_types.h in anticipation of providing a macro to restrict KVM-specific kernel exports, i.e. to provide symbol exports for KVM if and only if KVM is built as one or more modules. Link: https://lore.kernel.org/r/20250919003303.1355064-3-seanjc@google.com Cc: Nathan Chancellor Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- include/linux/kvm_types.h | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 827ecc0b7e10..490464c205b4 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -3,6 +3,23 @@ #ifndef __KVM_TYPES_H__ #define __KVM_TYPES_H__ +#include +#include +#include +#include + +#ifdef KVM_SUB_MODULES +#define EXPORT_SYMBOL_FOR_KVM_INTERNAL(symbol) \ + EXPORT_SYMBOL_FOR_MODULES(symbol, __stringify(KVM_SUB_MODULES)) +#else +#define EXPORT_SYMBOL_FOR_KVM_INTERNAL(symbol) +#endif + +#ifndef __ASSEMBLER__ + +#include +#include + struct kvm; struct kvm_async_pf; struct kvm_device_ops; @@ -19,13 +36,6 @@ struct kvm_memslots; enum kvm_mr_change; -#include -#include -#include -#include - -#include - /* * Address types: * @@ -116,5 +126,6 @@ struct kvm_vcpu_stat_generic { }; #define KVM_STATS_NAME_SIZE 48 +#endif /* !__ASSEMBLER__ */ #endif /* __KVM_TYPES_H__ */ -- cgit v1.2.3 From d11f6cd1bb4a416b4515702d020a7480ac667f0f Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 29 Sep 2025 11:56:41 -0400 Subject: NFSD: filecache: add STATX_DIOALIGN and STATX_DIO_READ_ALIGN support Use STATX_DIOALIGN and STATX_DIO_READ_ALIGN to get DIO alignment attributes from the underlying filesystem and store them in the associated nfsd_file. This is done when the nfsd_file is first opened for each regular file. Signed-off-by: Mike Snitzer Reviewed-by: Jeff Layton Reviewed-by: NeilBrown Signed-off-by: Chuck Lever Acked-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/misc/fs.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include') diff --git a/include/trace/misc/fs.h b/include/trace/misc/fs.h index 0406ebe2a80a..7ead1c61f0cb 100644 --- a/include/trace/misc/fs.h +++ b/include/trace/misc/fs.h @@ -141,3 +141,25 @@ { ATTR_TIMES_SET, "TIMES_SET" }, \ { ATTR_TOUCH, "TOUCH"}, \ { ATTR_DELEG, "DELEG"}) + +#define show_statx_mask(flags) \ + __print_flags(flags, "|", \ + { STATX_TYPE, "TYPE" }, \ + { STATX_MODE, "MODE" }, \ + { STATX_NLINK, "NLINK" }, \ + { STATX_UID, "UID" }, \ + { STATX_GID, "GID" }, \ + { STATX_ATIME, "ATIME" }, \ + { STATX_MTIME, "MTIME" }, \ + { STATX_CTIME, "CTIME" }, \ + { STATX_INO, "INO" }, \ + { STATX_SIZE, "SIZE" }, \ + { STATX_BLOCKS, "BLOCKS" }, \ + { STATX_BASIC_STATS, "BASIC_STATS" }, \ + { STATX_BTIME, "BTIME" }, \ + { STATX_MNT_ID, "MNT_ID" }, \ + { STATX_DIOALIGN, "DIOALIGN" }, \ + { STATX_MNT_ID_UNIQUE, "MNT_ID_UNIQUE" }, \ + { STATX_SUBVOL, "SUBVOL" }, \ + { STATX_WRITE_ATOMIC, "WRITE_ATOMIC" }, \ + { STATX_DIO_READ_ALIGN, "DIO_READ_ALIGN" }) -- cgit v1.2.3 From 25ba2b84c38f624151a3ba36e56d41c39b9223ad Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 19 Sep 2025 10:36:26 -0400 Subject: nfs/localio: avoid issuing misaligned IO using O_DIRECT Add nfsd_file_dio_alignment and use it to avoid issuing misaligned IO using O_DIRECT. Any misaligned DIO falls back to using buffered IO. Because misaligned DIO is now handled safely, remove the nfs modparam 'localio_O_DIRECT_semantics' that was added to require users opt-in to the requirement that all O_DIRECT be properly DIO-aligned. Also, introduce nfs_iov_iter_aligned_bvec() which is a variant of iov_iter_aligned_bvec() that also verifies the offset associated with an iov_iter is DIO-aligned. NOTE: in a parallel effort, iov_iter_aligned_bvec() is being removed along with iov_iter_is_aligned(). Lastly, add pr_info_ratelimited if underlying filesystem returns -EINVAL because it was made to try O_DIRECT for IO that is not DIO-aligned (shouldn't happen, so its best to be louder if it does). Fixes: 3feec68563d ("nfs/localio: add direct IO enablement with sync and async IO support") Signed-off-by: Mike Snitzer Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- include/linux/nfslocalio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h index 5c7c92659e73..7ca2715edccc 100644 --- a/include/linux/nfslocalio.h +++ b/include/linux/nfslocalio.h @@ -65,6 +65,8 @@ struct nfsd_localio_operations { struct net *(*nfsd_file_put_local)(struct nfsd_file __rcu **); struct nfsd_file *(*nfsd_file_get_local)(struct nfsd_file *); struct file *(*nfsd_file_file)(struct nfsd_file *); + void (*nfsd_file_dio_alignment)(struct nfsd_file *, + u32 *, u32 *, u32 *); } ____cacheline_aligned; extern void nfsd_localio_ops_init(void); -- cgit v1.2.3 From 558ae4579810fa0fef011944230c65a6f3087f85 Mon Sep 17 00:00:00 2001 From: Hoyoung Seo Date: Tue, 30 Sep 2025 15:14:28 +0900 Subject: scsi: ufs: core: Include UTP error in INT_FATAL_ERRORS When a UTP error occurs in isolation, UFS is not currently recoverable. This is because the UTP error is not considered fatal in the error handling code, leading to either an I/O timeout or an OCS error. Add the UTP error flag to INT_FATAL_ERRORS so the controller will be reset in this situation. sd 0:0:0:0: [sda] tag#38 UNKNOWN(0x2003) Result: hostbyte=0x07 driverbyte=DRIVER_OK cmd_age=0s sd 0:0:0:0: [sda] tag#38 CDB: opcode=0x28 28 00 00 51 24 e2 00 00 08 00 I/O error, dev sda, sector 42542864 op 0x0:(READ) flags 0x80700 phys_seg 8 prio class 2 OCS error from controller = 9 for tag 39 pa_err[1] = 0x80000010 at 2667224756 us pa_err: total cnt=2 dl_err[0] = 0x80000002 at 2667148060 us dl_err[1] = 0x80002000 at 2667282844 us No record of nl_err No record of tl_err No record of dme_err No record of auto_hibern8_err fatal_err[0] = 0x804 at 2667282836 us --------------------------------------------------- REGISTER --------------------------------------------------- NAME OFFSET VALUE STD HCI SFR 0xfffffff0 0x0 AHIT 0x18 0x814 INTERRUPT STATUS 0x20 0x1000 INTERRUPT ENABLE 0x24 0x70ef5 [mkp: commit desc] Signed-off-by: Hoyoung Seo Reviewed-by: Bart Van Assche Message-Id: <20250930061428.617955-1-hy50.seo@samsung.com> Signed-off-by: Martin K. Petersen --- include/ufs/ufshci.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/ufs/ufshci.h b/include/ufs/ufshci.h index 612500a7088f..e64b70132101 100644 --- a/include/ufs/ufshci.h +++ b/include/ufs/ufshci.h @@ -180,6 +180,7 @@ static inline u32 ufshci_version(u32 major, u32 minor) #define UTP_TASK_REQ_COMPL 0x200 #define UIC_COMMAND_COMPL 0x400 #define DEVICE_FATAL_ERROR 0x800 +#define UTP_ERROR 0x1000 #define CONTROLLER_FATAL_ERROR 0x10000 #define SYSTEM_BUS_FATAL_ERROR 0x20000 #define CRYPTO_ENGINE_FATAL_ERROR 0x40000 @@ -199,7 +200,8 @@ static inline u32 ufshci_version(u32 major, u32 minor) CONTROLLER_FATAL_ERROR |\ SYSTEM_BUS_FATAL_ERROR |\ CRYPTO_ENGINE_FATAL_ERROR |\ - UIC_LINK_LOST) + UIC_LINK_LOST |\ + UTP_ERROR) /* HCS - Host Controller Status 30h */ #define DEVICE_PRESENT 0x1 -- cgit v1.2.3 From 6d0386ea99875313fdfd074eb74013b6e3b48a76 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Aug 2025 17:01:53 -0700 Subject: entry/kvm: KVM: Move KVM details related to signal/-EINTR into KVM proper Move KVM's morphing of pending signals into userspace exits into KVM proper, and drop the @vcpu param from xfer_to_guest_mode_handle_work(). How KVM responds to -EINTR is a detail that really belongs in KVM itself, and invoking kvm_handle_signal_exit() from kernel code creates an inverted module dependency. E.g. attempting to move kvm_handle_signal_exit() into kvm_main.c would generate an linker error when building kvm.ko as a module. Dropping KVM details will also converting the KVM "entry" code into a more generic virtualization framework so that it can be used when running as a Hyper-V root partition. Lastly, eliminating usage of "struct kvm_vcpu" outside of KVM is also nice to have for KVM x86 developers, as keeping the details of kvm_vcpu purely within KVM allows changing the layout of the structure without having to boot into a new kernel, e.g. allows rebuilding and reloading kvm.ko with a modified kvm_vcpu structure as part of debug/development. Signed-off-by: Sean Christopherson Reviewed-by: Thomas Gleixner Signed-off-by: Wei Liu --- include/linux/entry-kvm.h | 11 +++-------- include/linux/kvm_host.h | 13 ++++++++++++- 2 files changed, 15 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h index 16149f6625e4..3644de7e6019 100644 --- a/include/linux/entry-kvm.h +++ b/include/linux/entry-kvm.h @@ -21,8 +21,6 @@ _TIF_NOTIFY_SIGNAL | _TIF_NOTIFY_RESUME | \ ARCH_XFER_TO_GUEST_MODE_WORK) -struct kvm_vcpu; - /** * arch_xfer_to_guest_mode_handle_work - Architecture specific xfer to guest * mode work handling function. @@ -32,12 +30,10 @@ struct kvm_vcpu; * Invoked from xfer_to_guest_mode_handle_work(). Defaults to NOOP. Can be * replaced by architecture specific code. */ -static inline int arch_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu, - unsigned long ti_work); +static inline int arch_xfer_to_guest_mode_handle_work(unsigned long ti_work); #ifndef arch_xfer_to_guest_mode_work -static inline int arch_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu, - unsigned long ti_work) +static inline int arch_xfer_to_guest_mode_handle_work(unsigned long ti_work) { return 0; } @@ -46,11 +42,10 @@ static inline int arch_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu, /** * xfer_to_guest_mode_handle_work - Check and handle pending work which needs * to be handled before going to guest mode - * @vcpu: Pointer to current's VCPU data * * Returns: 0 or an error code */ -int xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu); +int xfer_to_guest_mode_handle_work(void); /** * xfer_to_guest_mode_prepare - Perform last minute preparation work that diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 15656b7fba6c..598b9473e46d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2,7 +2,7 @@ #ifndef __KVM_HOST_H #define __KVM_HOST_H - +#include #include #include #include @@ -2450,6 +2450,17 @@ static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu) vcpu->run->exit_reason = KVM_EXIT_INTR; vcpu->stat.signal_exits++; } + +static inline int kvm_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu) +{ + int r = xfer_to_guest_mode_handle_work(); + + if (r) { + WARN_ON_ONCE(r != -EINTR); + kvm_handle_signal_exit(vcpu); + } + return r; +} #endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */ /* -- cgit v1.2.3 From 9be7e1e320ff2e7db4b23c8ec5f599bbfac94ede Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Aug 2025 17:01:54 -0700 Subject: entry: Rename "kvm" entry code assets to "virt" to genericize APIs Rename the "kvm" entry code files and Kconfigs to use generic "virt" nomenclature so that the code can be reused by other hypervisors (or rather, their root/dom0 partition drivers), without incorrectly suggesting the code somehow relies on and/or involves KVM. No functional change intended. Signed-off-by: Sean Christopherson Reviewed-by: Thomas Gleixner Reviewed-by: Joel Fernandes Signed-off-by: Wei Liu --- include/linux/entry-kvm.h | 95 ---------------------------------------------- include/linux/entry-virt.h | 95 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/kvm_host.h | 6 +-- include/linux/rcupdate.h | 2 +- 4 files changed, 99 insertions(+), 99 deletions(-) delete mode 100644 include/linux/entry-kvm.h create mode 100644 include/linux/entry-virt.h (limited to 'include') diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h deleted file mode 100644 index 3644de7e6019..000000000000 --- a/include/linux/entry-kvm.h +++ /dev/null @@ -1,95 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LINUX_ENTRYKVM_H -#define __LINUX_ENTRYKVM_H - -#include -#include -#include -#include -#include -#include - -/* Transfer to guest mode work */ -#ifdef CONFIG_KVM_XFER_TO_GUEST_WORK - -#ifndef ARCH_XFER_TO_GUEST_MODE_WORK -# define ARCH_XFER_TO_GUEST_MODE_WORK (0) -#endif - -#define XFER_TO_GUEST_MODE_WORK \ - (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | _TIF_SIGPENDING | \ - _TIF_NOTIFY_SIGNAL | _TIF_NOTIFY_RESUME | \ - ARCH_XFER_TO_GUEST_MODE_WORK) - -/** - * arch_xfer_to_guest_mode_handle_work - Architecture specific xfer to guest - * mode work handling function. - * @vcpu: Pointer to current's VCPU data - * @ti_work: Cached TIF flags gathered in xfer_to_guest_mode_handle_work() - * - * Invoked from xfer_to_guest_mode_handle_work(). Defaults to NOOP. Can be - * replaced by architecture specific code. - */ -static inline int arch_xfer_to_guest_mode_handle_work(unsigned long ti_work); - -#ifndef arch_xfer_to_guest_mode_work -static inline int arch_xfer_to_guest_mode_handle_work(unsigned long ti_work) -{ - return 0; -} -#endif - -/** - * xfer_to_guest_mode_handle_work - Check and handle pending work which needs - * to be handled before going to guest mode - * - * Returns: 0 or an error code - */ -int xfer_to_guest_mode_handle_work(void); - -/** - * xfer_to_guest_mode_prepare - Perform last minute preparation work that - * need to be handled while IRQs are disabled - * upon entering to guest. - * - * Has to be invoked with interrupts disabled before the last call - * to xfer_to_guest_mode_work_pending(). - */ -static inline void xfer_to_guest_mode_prepare(void) -{ - lockdep_assert_irqs_disabled(); - tick_nohz_user_enter_prepare(); -} - -/** - * __xfer_to_guest_mode_work_pending - Check if work is pending - * - * Returns: True if work pending, False otherwise. - * - * Bare variant of xfer_to_guest_mode_work_pending(). Can be called from - * interrupt enabled code for racy quick checks with care. - */ -static inline bool __xfer_to_guest_mode_work_pending(void) -{ - unsigned long ti_work = read_thread_flags(); - - return !!(ti_work & XFER_TO_GUEST_MODE_WORK); -} - -/** - * xfer_to_guest_mode_work_pending - Check if work is pending which needs to be - * handled before returning to guest mode - * - * Returns: True if work pending, False otherwise. - * - * Has to be invoked with interrupts disabled before the transition to - * guest mode. - */ -static inline bool xfer_to_guest_mode_work_pending(void) -{ - lockdep_assert_irqs_disabled(); - return __xfer_to_guest_mode_work_pending(); -} -#endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */ - -#endif diff --git a/include/linux/entry-virt.h b/include/linux/entry-virt.h new file mode 100644 index 000000000000..42c89e3e5ca7 --- /dev/null +++ b/include/linux/entry-virt.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_ENTRYVIRT_H +#define __LINUX_ENTRYVIRT_H + +#include +#include +#include +#include +#include +#include + +/* Transfer to guest mode work */ +#ifdef CONFIG_VIRT_XFER_TO_GUEST_WORK + +#ifndef ARCH_XFER_TO_GUEST_MODE_WORK +# define ARCH_XFER_TO_GUEST_MODE_WORK (0) +#endif + +#define XFER_TO_GUEST_MODE_WORK \ + (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | _TIF_SIGPENDING | \ + _TIF_NOTIFY_SIGNAL | _TIF_NOTIFY_RESUME | \ + ARCH_XFER_TO_GUEST_MODE_WORK) + +/** + * arch_xfer_to_guest_mode_handle_work - Architecture specific xfer to guest + * mode work handling function. + * @vcpu: Pointer to current's VCPU data + * @ti_work: Cached TIF flags gathered in xfer_to_guest_mode_handle_work() + * + * Invoked from xfer_to_guest_mode_handle_work(). Defaults to NOOP. Can be + * replaced by architecture specific code. + */ +static inline int arch_xfer_to_guest_mode_handle_work(unsigned long ti_work); + +#ifndef arch_xfer_to_guest_mode_work +static inline int arch_xfer_to_guest_mode_handle_work(unsigned long ti_work) +{ + return 0; +} +#endif + +/** + * xfer_to_guest_mode_handle_work - Check and handle pending work which needs + * to be handled before going to guest mode + * + * Returns: 0 or an error code + */ +int xfer_to_guest_mode_handle_work(void); + +/** + * xfer_to_guest_mode_prepare - Perform last minute preparation work that + * need to be handled while IRQs are disabled + * upon entering to guest. + * + * Has to be invoked with interrupts disabled before the last call + * to xfer_to_guest_mode_work_pending(). + */ +static inline void xfer_to_guest_mode_prepare(void) +{ + lockdep_assert_irqs_disabled(); + tick_nohz_user_enter_prepare(); +} + +/** + * __xfer_to_guest_mode_work_pending - Check if work is pending + * + * Returns: True if work pending, False otherwise. + * + * Bare variant of xfer_to_guest_mode_work_pending(). Can be called from + * interrupt enabled code for racy quick checks with care. + */ +static inline bool __xfer_to_guest_mode_work_pending(void) +{ + unsigned long ti_work = read_thread_flags(); + + return !!(ti_work & XFER_TO_GUEST_MODE_WORK); +} + +/** + * xfer_to_guest_mode_work_pending - Check if work is pending which needs to be + * handled before returning to guest mode + * + * Returns: True if work pending, False otherwise. + * + * Has to be invoked with interrupts disabled before the transition to + * guest mode. + */ +static inline bool xfer_to_guest_mode_work_pending(void) +{ + lockdep_assert_irqs_disabled(); + return __xfer_to_guest_mode_work_pending(); +} +#endif /* CONFIG_VIRT_XFER_TO_GUEST_WORK */ + +#endif diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 598b9473e46d..70ac2267d5d0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2,7 +2,7 @@ #ifndef __KVM_HOST_H #define __KVM_HOST_H -#include +#include #include #include #include @@ -2444,7 +2444,7 @@ static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) } #endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */ -#ifdef CONFIG_KVM_XFER_TO_GUEST_WORK +#ifdef CONFIG_VIRT_XFER_TO_GUEST_WORK static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu) { vcpu->run->exit_reason = KVM_EXIT_INTR; @@ -2461,7 +2461,7 @@ static inline int kvm_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu) } return r; } -#endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */ +#endif /* CONFIG_VIRT_XFER_TO_GUEST_WORK */ /* * If more than one page is being (un)accounted, @virt must be the address of diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 120536f4c6eb..1e1f3aa375d9 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -129,7 +129,7 @@ static inline void rcu_sysrq_start(void) { } static inline void rcu_sysrq_end(void) { } #endif /* #else #ifdef CONFIG_RCU_STALL_COMMON */ -#if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK)) +#if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_VIRT_XFER_TO_GUEST_WORK)) void rcu_irq_work_resched(void); #else static __always_inline void rcu_irq_work_resched(void) { } -- cgit v1.2.3 From 94b04355e6397a0a70b69c2571fa5c7d9990b835 Mon Sep 17 00:00:00 2001 From: Mukesh Rathor Date: Mon, 15 Sep 2025 16:46:03 -0700 Subject: Drivers: hv: Add CONFIG_HYPERV_VMBUS option At present VMBus driver is hinged off of CONFIG_HYPERV which entails lot of builtin code and encompasses too much. It's not always clear what depends on builtin hv code and what depends on VMBus. Setting CONFIG_HYPERV as a module and fudging the Makefile to switch to builtin adds even more confusion. VMBus is an independent module and should have its own config option. Also, there are scenarios like baremetal dom0/root where support is built in with CONFIG_HYPERV but without VMBus. Lastly, there are more features coming down that use CONFIG_HYPERV and add more dependencies on it. So, create a fine grained HYPERV_VMBUS option and update Kconfigs for dependency on VMBus. Signed-off-by: Mukesh Rathor Acked-by: Bjorn Helgaas # drivers/pci Signed-off-by: Wei Liu --- include/asm-generic/mshyperv.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index dbd4c2f3aee3..64ba6bc807d9 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -163,6 +163,7 @@ static inline u64 hv_generate_guest_id(u64 kernel_version) return guest_id; } +#if IS_ENABLED(CONFIG_HYPERV_VMBUS) /* Free the message slot and signal end-of-message if required */ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type) { @@ -198,6 +199,10 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type) } } +extern int vmbus_interrupt; +extern int vmbus_irq; +#endif /* CONFIG_HYPERV_VMBUS */ + int hv_get_hypervisor_version(union hv_hypervisor_version_info *info); void hv_setup_vmbus_handler(void (*handler)(void)); @@ -211,9 +216,6 @@ void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs)); void hv_remove_crash_handler(void); void hv_setup_mshv_handler(void (*handler)(void)); -extern int vmbus_interrupt; -extern int vmbus_irq; - #if IS_ENABLED(CONFIG_HYPERV) /* * Hypervisor's notion of virtual processor ID is different from -- cgit v1.2.3 From 1a98f5699bd57c9b3f66ec54cc38571d5e42ffb1 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 30 Sep 2025 15:45:06 +0200 Subject: Revert "Documentation: net: add flow control guide and document ethtool API" This reverts commit 7bd80ed89d72285515db673803b021469ba71ee8. I should not have merged it to begin with due to pending review and changes to be addressed. Link: https://patch.msgid.link/c6f3af12df9b7998920a02027fc8893ce82afc4c.1759239721.git.pabeni@redhat.com Signed-off-by: Paolo Abeni --- include/linux/ethtool.h | 45 ++------------------------ include/uapi/linux/ethtool_netlink_generated.h | 4 +-- 2 files changed, 5 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index eeed1ea50369..c2d8b4ec62eb 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -953,48 +953,9 @@ struct kernel_ethtool_ts_info { * @get_pause_stats: Report pause frame statistics. Drivers must not zero * statistics which they don't report. The stats structure is initialized * to ETHTOOL_STAT_NOT_SET indicating driver does not report statistics. - * - * @get_pauseparam: Report the configured policy for link-wide PAUSE - * (IEEE 802.3 Annex 31B). Drivers must fill struct ethtool_pauseparam - * such that: - * @autoneg: - * This refers to **Pause Autoneg** (IEEE 802.3 Annex 31B) only - * and is independent of generic link autonegotiation configured - * via ethtool -s. - * true -> the device follows the negotiated result of pause - * autonegotiation (Pause/Asym); - * false -> the device uses a forced MAC state independent of - * negotiation. - * @rx_pause/@tx_pause: - * represent the desired policy (preferred configuration). - * In autoneg mode they describe what is to be advertised; - * in forced mode they describe the MAC state to apply. - * - * Drivers (and/or frameworks) should persist this policy across link - * changes and reapply appropriate MAC programming when link parameters - * change. - * - * @set_pauseparam: Apply a policy for link-wide PAUSE (IEEE 802.3 Annex 31B). - * If @autoneg is true: - * Arrange for pause advertisement (Pause/Asym) based on - * @rx_pause/@tx_pause and program the MAC to follow the - * negotiated result (which may be symmetric, asymmetric, or off - * depending on the link partner). - * If @autoneg is false: - * Do not rely on autonegotiation; force the MAC RX/TX pause - * state directly per @rx_pause/@tx_pause. - * - * Implementations that integrate with PHYLIB/PHYLINK should cooperate - * with those frameworks for advertisement and resolution; MAC drivers are - * still responsible for applying the required MAC state. - * - * Return: 0 on success or a negative errno. Return -EOPNOTSUPP if - * link-wide PAUSE is unsupported. If only symmetric pause is supported, - * reject unsupported asymmetric requests with -EINVAL (or document any - * coercion policy). - * - * See also: Documentation/networking/flow_control.rst - * + * @get_pauseparam: Report pause parameters + * @set_pauseparam: Set pause parameters. Returns a negative error code + * or zero. * @self_test: Run specified self-tests * @get_strings: Return a set of strings that describe the requested objects * @set_phys_id: Identify the physical devices, e.g. by flashing an LED diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index 3dd9d7cde86e..0e8ac0d974e2 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -375,7 +375,7 @@ enum { ETHTOOL_A_COALESCE_MAX = (__ETHTOOL_A_COALESCE_CNT - 1) }; -enum ethtool_a_pause_stat { +enum { ETHTOOL_A_PAUSE_STAT_UNSPEC, ETHTOOL_A_PAUSE_STAT_PAD, ETHTOOL_A_PAUSE_STAT_TX_FRAMES, @@ -385,7 +385,7 @@ enum ethtool_a_pause_stat { ETHTOOL_A_PAUSE_STAT_MAX = (__ETHTOOL_A_PAUSE_STAT_CNT - 1) }; -enum ethtool_a_pause { +enum { ETHTOOL_A_PAUSE_UNSPEC, ETHTOOL_A_PAUSE_HEADER, ETHTOOL_A_PAUSE_AUTONEG, -- cgit v1.2.3 From 5e1c88679174e4bfe5d152060b06d370bd85de80 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Mon, 4 Aug 2025 15:07:23 +0200 Subject: mfd: qnap-mcu: Include linux/types.h in qnap-mcu.h shared header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Relying on other components to include those basic types is unreliable and may cause compile errors like: ../include/linux/mfd/qnap-mcu.h:13:9: error: unknown type name ‘u32’ 13 | u32 baud_rate; | ^~~ ../include/linux/mfd/qnap-mcu.h:17:9: error: unknown type name ‘bool’ 17 | bool usb_led; | ^~~~ So make sure, the types used in the header are available. Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20250804130726.3180806-2-heiko@sntech.de Signed-off-by: Lee Jones --- include/linux/mfd/qnap-mcu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/qnap-mcu.h b/include/linux/mfd/qnap-mcu.h index 8d48c212fd44..42bf523f9a5b 100644 --- a/include/linux/mfd/qnap-mcu.h +++ b/include/linux/mfd/qnap-mcu.h @@ -7,6 +7,8 @@ #ifndef _LINUX_QNAP_MCU_H_ #define _LINUX_QNAP_MCU_H_ +#include + struct qnap_mcu; struct qnap_mcu_variant { -- cgit v1.2.3 From 9c5ad8374b1fe0c8c9eaabc1146d96a543858c4a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 8 Aug 2025 17:17:56 +0200 Subject: mfd: arizona: Make legacy gpiolib interface optional The only machine that still uses the old gpio number based interface is the wlf_cragg_6410 board file. In order to remove the dependency on the interfaces, add #ifdef blocks here. Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20250808151822.536879-13-arnd@kernel.org Signed-off-by: Lee Jones --- include/linux/mfd/arizona/pdata.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h index 2d13bbea4f3a..f72e6d4b14a7 100644 --- a/include/linux/mfd/arizona/pdata.h +++ b/include/linux/mfd/arizona/pdata.h @@ -117,8 +117,10 @@ struct arizona_pdata { /** Check for line output with HPDET method */ bool hpdet_acc_id_line; +#ifdef CONFIG_GPIOLIB_LEGACY /** GPIO used for mic isolation with HPDET */ int hpdet_id_gpio; +#endif /** Channel to use for headphone detection */ unsigned int hpdet_channel; @@ -129,8 +131,10 @@ struct arizona_pdata { /** Extra debounce timeout used during initial mic detection (ms) */ unsigned int micd_detect_debounce; +#ifdef CONFIG_GPIOLIB_LEGACY /** GPIO for mic detection polarity */ int micd_pol_gpio; +#endif /** Mic detect ramp rate */ unsigned int micd_bias_start_time; @@ -184,8 +188,10 @@ struct arizona_pdata { /** Haptic actuator type */ unsigned int hap_act; +#ifdef CONFIG_GPIOLIB_LEGACY /** GPIO for primary IRQ (used for edge triggered emulation) */ int irq_gpio; +#endif /** General purpose switch control */ unsigned int gpsw; -- cgit v1.2.3 From 719d02a25a24601c6fb8a7b1627e1abf015b6c2a Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Thu, 21 Aug 2025 20:23:34 +0200 Subject: mfd: bd71828, bd71815: Prepare for power-supply support Add core support for ROHM BD718(15/28/78) PMIC's charger blocks. Signed-off-by: Matti Vaittinen Signed-off-by: Andreas Kemnade Link: https://lore.kernel.org/r/20250821-bd71828-charger-v3-1-cc74ac4e0fb9@kemnade.info Signed-off-by: Lee Jones --- include/linux/mfd/rohm-bd71828.h | 63 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/rohm-bd71828.h b/include/linux/mfd/rohm-bd71828.h index ce786c96404a..73a71ef69152 100644 --- a/include/linux/mfd/rohm-bd71828.h +++ b/include/linux/mfd/rohm-bd71828.h @@ -189,6 +189,69 @@ enum { /* Charger/Battey */ #define BD71828_REG_CHG_STATE 0x65 #define BD71828_REG_CHG_FULL 0xd2 +#define BD71828_REG_CHG_EN 0x6F +#define BD71828_REG_DCIN_STAT 0x68 +#define BD71828_MASK_DCIN_DET 0x01 +#define BD71828_REG_VDCIN_U 0x9c +#define BD71828_MASK_CHG_EN 0x01 +#define BD71828_CHG_MASK_DCIN_U 0x0f +#define BD71828_REG_BAT_STAT 0x67 +#define BD71828_REG_BAT_TEMP 0x6c +#define BD71828_MASK_BAT_TEMP 0x07 +#define BD71828_BAT_TEMP_OPEN 0x07 +#define BD71828_MASK_BAT_DET 0x20 +#define BD71828_MASK_BAT_DET_DONE 0x10 +#define BD71828_REG_CHG_STATE 0x65 +#define BD71828_REG_VBAT_U 0x8c +#define BD71828_MASK_VBAT_U 0x0f +#define BD71828_REG_VBAT_REX_AVG_U 0x92 + +#define BD71828_REG_OCV_PWRON_U 0x8A + +#define BD71828_REG_VBAT_MIN_AVG_U 0x8e +#define BD71828_REG_VBAT_MIN_AVG_L 0x8f + +#define BD71828_REG_CC_CNT3 0xb5 +#define BD71828_REG_CC_CNT2 0xb6 +#define BD71828_REG_CC_CNT1 0xb7 +#define BD71828_REG_CC_CNT0 0xb8 +#define BD71828_REG_CC_CURCD_AVG_U 0xb2 +#define BD71828_MASK_CC_CURCD_AVG_U 0x3f +#define BD71828_MASK_CC_CUR_DIR 0x80 +#define BD71828_REG_VM_BTMP_U 0xa1 +#define BD71828_REG_VM_BTMP_L 0xa2 +#define BD71828_MASK_VM_BTMP_U 0x0f +#define BD71828_REG_COULOMB_CTRL 0xc4 +#define BD71828_REG_COULOMB_CTRL2 0xd2 +#define BD71828_MASK_REX_CC_CLR 0x01 +#define BD71828_MASK_FULL_CC_CLR 0x10 +#define BD71828_REG_CC_CNT_FULL3 0xbd +#define BD71828_REG_CC_CNT_CHG3 0xc1 + +#define BD71828_REG_VBAT_INITIAL1_U 0x86 +#define BD71828_REG_VBAT_INITIAL1_L 0x87 + +#define BD71828_REG_VBAT_INITIAL2_U 0x88 +#define BD71828_REG_VBAT_INITIAL2_L 0x89 + +#define BD71828_REG_IBAT_U 0xb0 +#define BD71828_REG_IBAT_L 0xb1 + +#define BD71828_REG_IBAT_AVG_U 0xb2 +#define BD71828_REG_IBAT_AVG_L 0xb3 + +#define BD71828_REG_VSYS_AVG_U 0x96 +#define BD71828_REG_VSYS_AVG_L 0x97 +#define BD71828_REG_VSYS_MIN_AVG_U 0x98 +#define BD71828_REG_VSYS_MIN_AVG_L 0x99 +#define BD71828_REG_CHG_SET1 0x75 +#define BD71828_REG_ALM_VBAT_LIMIT_U 0xaa +#define BD71828_REG_BATCAP_MON_LIMIT_U 0xcc +#define BD71828_REG_CONF 0x64 + +#define BD71828_REG_DCIN_CLPS 0x71 + +#define BD71828_REG_MEAS_CLEAR 0xaf /* LEDs */ #define BD71828_REG_LED_CTRL 0x4A -- cgit v1.2.3 From 7d096cb3e16f09d9762ae8cef897cdbc13a40029 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 21 Aug 2025 14:46:33 +0800 Subject: virtio_ring: constify virtqueue pointer for DMA helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch constifies the virtqueue pointer for DMA helpers. Reviewed-by: Christoph Hellwig Reviewed-by: Xuan Zhuo Reviewed-by: Eugenio Pérez Signed-off-by: Jason Wang Message-Id: <20250821064641.5025-2-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin Tested-by: Lei Yang Reviewed-by: Eugenio Pérez --- include/linux/virtio.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index db31fc6f4f1f..eab71a440fba 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -262,18 +262,18 @@ void unregister_virtio_driver(struct virtio_driver *drv); module_driver(__virtio_driver, register_virtio_driver, \ unregister_virtio_driver) -dma_addr_t virtqueue_dma_map_single_attrs(struct virtqueue *_vq, void *ptr, size_t size, +dma_addr_t virtqueue_dma_map_single_attrs(const struct virtqueue *_vq, void *ptr, size_t size, enum dma_data_direction dir, unsigned long attrs); -void virtqueue_dma_unmap_single_attrs(struct virtqueue *_vq, dma_addr_t addr, +void virtqueue_dma_unmap_single_attrs(const struct virtqueue *_vq, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs); -int virtqueue_dma_mapping_error(struct virtqueue *_vq, dma_addr_t addr); +int virtqueue_dma_mapping_error(const struct virtqueue *_vq, dma_addr_t addr); -bool virtqueue_dma_need_sync(struct virtqueue *_vq, dma_addr_t addr); -void virtqueue_dma_sync_single_range_for_cpu(struct virtqueue *_vq, dma_addr_t addr, +bool virtqueue_dma_need_sync(const struct virtqueue *_vq, dma_addr_t addr); +void virtqueue_dma_sync_single_range_for_cpu(const struct virtqueue *_vq, dma_addr_t addr, unsigned long offset, size_t size, enum dma_data_direction dir); -void virtqueue_dma_sync_single_range_for_device(struct virtqueue *_vq, dma_addr_t addr, +void virtqueue_dma_sync_single_range_for_device(const struct virtqueue *_vq, dma_addr_t addr, unsigned long offset, size_t size, enum dma_data_direction dir); -- cgit v1.2.3 From b41cb3bcf67fcb7b8297e5acc5bb3309c96c2ff2 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 21 Aug 2025 14:46:35 +0800 Subject: virtio: rename dma helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Following patch will introduce virtio mapping function to avoid abusing DMA API for device that doesn't do DMA. To ease the introduction, this patch rename "dma" to "map" for the current dma mapping helpers. Reviewed-by: Christoph Hellwig Reviewed-by: Xuan Zhuo Signed-off-by: Jason Wang Message-Id: <20250821064641.5025-4-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin Tested-by: Lei Yang Reviewed-by: Eugenio Pérez --- include/linux/virtio.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index eab71a440fba..576e08bd7697 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -262,18 +262,18 @@ void unregister_virtio_driver(struct virtio_driver *drv); module_driver(__virtio_driver, register_virtio_driver, \ unregister_virtio_driver) -dma_addr_t virtqueue_dma_map_single_attrs(const struct virtqueue *_vq, void *ptr, size_t size, +dma_addr_t virtqueue_map_single_attrs(const struct virtqueue *_vq, void *ptr, size_t size, enum dma_data_direction dir, unsigned long attrs); -void virtqueue_dma_unmap_single_attrs(const struct virtqueue *_vq, dma_addr_t addr, +void virtqueue_unmap_single_attrs(const struct virtqueue *_vq, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs); -int virtqueue_dma_mapping_error(const struct virtqueue *_vq, dma_addr_t addr); +int virtqueue_map_mapping_error(const struct virtqueue *_vq, dma_addr_t addr); -bool virtqueue_dma_need_sync(const struct virtqueue *_vq, dma_addr_t addr); -void virtqueue_dma_sync_single_range_for_cpu(const struct virtqueue *_vq, dma_addr_t addr, +bool virtqueue_map_need_sync(const struct virtqueue *_vq, dma_addr_t addr); +void virtqueue_map_sync_single_range_for_cpu(const struct virtqueue *_vq, dma_addr_t addr, unsigned long offset, size_t size, enum dma_data_direction dir); -void virtqueue_dma_sync_single_range_for_device(const struct virtqueue *_vq, dma_addr_t addr, +void virtqueue_map_sync_single_range_for_device(const struct virtqueue *_vq, dma_addr_t addr, unsigned long offset, size_t size, enum dma_data_direction dir); -- cgit v1.2.3 From b16060c5c7d56455da3c3c50b4a20a83c2a30810 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 21 Aug 2025 14:46:36 +0800 Subject: virtio: introduce virtio_map container union MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Following patch will introduce the mapping operations for virtio device. In order to achieve this, besides the dma device, virtio core needs to support a transport or device specific mapping metadata as well. So this patch introduces a union container of a dma device. The idea is the allow the transport layer to pass device specific mapping metadata which will be used as a parameter for the virtio mapping operations. For the transport or device that is using DMA, dma device is still being used. Signed-off-by: Jason Wang Message-Id: <20250821064641.5025-5-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin Tested-by: Lei Yang Reviewed-by: Eugenio Pérez --- include/linux/virtio.h | 5 +++++ include/linux/virtio_ring.h | 7 ++++--- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 576e08bd7697..b4ba1a99e5ab 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -41,6 +41,11 @@ struct virtqueue { void *priv; }; +union virtio_map { + /* Device that performs DMA */ + struct device *dma_dev; +}; + int virtqueue_add_outbuf(struct virtqueue *vq, struct scatterlist sg[], unsigned int num, void *data, diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index 9b33df741b63..c97a12c1cda3 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -3,6 +3,7 @@ #define _LINUX_VIRTIO_RING_H #include +#include #include #include @@ -79,9 +80,9 @@ struct virtqueue *vring_create_virtqueue(unsigned int index, /* * Creates a virtqueue and allocates the descriptor ring with per - * virtqueue DMA device. + * virtqueue mapping operations. */ -struct virtqueue *vring_create_virtqueue_dma(unsigned int index, +struct virtqueue *vring_create_virtqueue_map(unsigned int index, unsigned int num, unsigned int vring_align, struct virtio_device *vdev, @@ -91,7 +92,7 @@ struct virtqueue *vring_create_virtqueue_dma(unsigned int index, bool (*notify)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq), const char *name, - struct device *dma_dev); + union virtio_map map); /* * Creates a virtqueue with a standard layout but a caller-allocated -- cgit v1.2.3 From bee8c7c24b737338216dc0f87d6c47a4abaf609a Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 21 Aug 2025 14:46:38 +0800 Subject: virtio: introduce map ops in virtio core MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch introduces map operations for virtio device. Virtio used to use DMA API which is not necessarily the case since some devices doesn't do DMA. Instead of using tricks and abusing DMA API, let's simply abstract the current mapping logic into a virtio specific mapping operations. For the device or transport that doesn't do DMA, they can implement their own mapping logic without the need to trick DMA core. In this case the mapping metadata is opaque to the virtio core that will be passed back to the transport or device specific map operations. For other devices, DMA API will still be used, so map token will still be the dma device to minimize the changeset and performance impact. The mapping operations are abstracted as a independent structure instead of reusing virtio_config_ops. This allows the transport can simply reuse the structure for lower layers like vDPA. A set of new mapping helpers were introduced for the device that want to do mapping by themselves. Signed-off-by: Jason Wang Message-Id: <20250821064641.5025-7-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin Tested-by: Lei Yang Reviewed-by: Eugenio Pérez --- include/linux/virtio.h | 25 +++++++++++++++ include/linux/virtio_config.h | 72 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+) (limited to 'include') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index b4ba1a99e5ab..3386a4a8d06b 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -166,9 +166,11 @@ struct virtio_device { struct virtio_device_id id; const struct virtio_config_ops *config; const struct vringh_config_ops *vringh_config; + const struct virtio_map_ops *map; struct list_head vqs; VIRTIO_DECLARE_FEATURES(features); void *priv; + union virtio_map vmap; #ifdef CONFIG_VIRTIO_DEBUG struct dentry *debugfs_dir; u64 debugfs_filter_features[VIRTIO_FEATURES_DWORDS]; @@ -267,6 +269,29 @@ void unregister_virtio_driver(struct virtio_driver *drv); module_driver(__virtio_driver, register_virtio_driver, \ unregister_virtio_driver) + +void *virtqueue_map_alloc_coherent(struct virtio_device *vdev, + union virtio_map mapping_token, + size_t size, dma_addr_t *dma_handle, + gfp_t gfp); + +void virtqueue_map_free_coherent(struct virtio_device *vdev, + union virtio_map mapping_token, + size_t size, void *vaddr, + dma_addr_t dma_handle); + +dma_addr_t virtqueue_map_page_attrs(const struct virtqueue *_vq, + struct page *page, + unsigned long offset, + size_t size, + enum dma_data_direction dir, + unsigned long attrs); + +void virtqueue_unmap_page_attrs(const struct virtqueue *_vq, + dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir, + unsigned long attrs); + dma_addr_t virtqueue_map_single_attrs(const struct virtqueue *_vq, void *ptr, size_t size, enum dma_data_direction dir, unsigned long attrs); void virtqueue_unmap_single_attrs(const struct virtqueue *_vq, dma_addr_t addr, diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 7427b79d6f3d..16001e9f9b39 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -139,6 +139,78 @@ struct virtio_config_ops { int (*enable_vq_after_reset)(struct virtqueue *vq); }; +/** + * struct virtio_map_ops - operations for mapping buffer for a virtio device + * Note: For transport that has its own mapping logic it must + * implements all of the operations + * @map_page: map a buffer to the device + * map: metadata for performing mapping + * page: the page that will be mapped by the device + * offset: the offset in the page for a buffer + * size: the buffer size + * dir: mapping direction + * attrs: mapping attributes + * Returns: the mapped address + * @unmap_page: unmap a buffer from the device + * map: device specific mapping map + * map_handle: the mapped address + * size: the buffer size + * dir: mapping direction + * attrs: unmapping attributes + * @sync_single_for_cpu: sync a single buffer from device to cpu + * map: metadata for performing mapping + * map_handle: the mapping address to sync + * size: the size of the buffer + * dir: synchronization direction + * @sync_single_for_device: sync a single buffer from cpu to device + * map: metadata for performing mapping + * map_handle: the mapping address to sync + * size: the size of the buffer + * dir: synchronization direction + * @alloc: alloc a coherent buffer mapping + * map: metadata for performing mapping + * size: the size of the buffer + * map_handle: the mapping address to sync + * gfp: allocation flag (GFP_XXX) + * Returns: virtual address of the allocated buffer + * @free: free a coherent buffer mapping + * map: metadata for performing mapping + * size: the size of the buffer + * vaddr: virtual address of the buffer + * map_handle: the mapping address to sync + * attrs: unmapping attributes + * @need_sync: if the buffer needs synchronization + * map: metadata for performing mapping + * map_handle: the mapped address + * Returns: whether the buffer needs synchronization + * @mapping_error: if the mapping address is error + * map: metadata for performing mapping + * map_handle: the mapped address + * @max_mapping_size: get the maximum buffer size that can be mapped + * map: metadata for performing mapping + * Returns: the maximum buffer size that can be mapped + */ +struct virtio_map_ops { + dma_addr_t (*map_page)(union virtio_map map, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, unsigned long attrs); + void (*unmap_page)(union virtio_map map, dma_addr_t map_handle, + size_t size, enum dma_data_direction dir, + unsigned long attrs); + void (*sync_single_for_cpu)(union virtio_map map, dma_addr_t map_handle, + size_t size, enum dma_data_direction dir); + void (*sync_single_for_device)(union virtio_map map, + dma_addr_t map_handle, size_t size, + enum dma_data_direction dir); + void *(*alloc)(union virtio_map map, size_t size, + dma_addr_t *map_handle, gfp_t gfp); + void (*free)(union virtio_map map, size_t size, void *vaddr, + dma_addr_t map_handle, unsigned long attrs); + bool (*need_sync)(union virtio_map map, dma_addr_t map_handle); + int (*mapping_error)(union virtio_map map, dma_addr_t map_handle); + size_t (*max_mapping_size)(union virtio_map map); +}; + /* If driver didn't advertise the feature, it will never appear. */ void virtio_check_driver_offered_feature(const struct virtio_device *vdev, unsigned int fbit); -- cgit v1.2.3 From 58aca3dbc7d8891a016cb17d488af3002812793b Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 21 Aug 2025 14:46:39 +0800 Subject: vdpa: support virtio_map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Virtio core switches from DMA device to virtio_map, let's do that as well for vDPA. Signed-off-by: Jason Wang Message-Id: <20250821064641.5025-8-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin Tested-by: Lei Yang Reviewed-by: Eugenio Pérez --- include/linux/vdpa.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 2e7a30fe6b92..ae0451945851 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -70,7 +71,7 @@ struct vdpa_mgmt_dev; /** * struct vdpa_device - representation of a vDPA device * @dev: underlying device - * @dma_dev: the actual device that is performing DMA + * @vmap: the metadata passed to upper layer to be used for mapping * @driver_override: driver name to force a match; do not set directly, * because core frees it; use driver_set_override() to * set or clear it. @@ -87,7 +88,7 @@ struct vdpa_mgmt_dev; */ struct vdpa_device { struct device dev; - struct device *dma_dev; + union virtio_map vmap; const char *driver_override; const struct vdpa_config_ops *config; struct rw_semaphore cf_lock; /* Protects get/set config */ @@ -352,11 +353,11 @@ struct vdpa_map_file { * @vdev: vdpa device * @asid: address space identifier * Returns integer: success (0) or error (< 0) - * @get_vq_dma_dev: Get the dma device for a specific + * @get_vq_map: Get the map metadata for a specific * virtqueue (optional) * @vdev: vdpa device * @idx: virtqueue index - * Returns pointer to structure device or error (NULL) + * Returns map token union error (NULL) * @bind_mm: Bind the device to a specific address space * so the vDPA framework can use VA when this * callback is implemented. (optional) @@ -436,7 +437,7 @@ struct vdpa_config_ops { int (*reset_map)(struct vdpa_device *vdev, unsigned int asid); int (*set_group_asid)(struct vdpa_device *vdev, unsigned int group, unsigned int asid); - struct device *(*get_vq_dma_dev)(struct vdpa_device *vdev, u16 idx); + union virtio_map (*get_vq_map)(struct vdpa_device *vdev, u16 idx); int (*bind_mm)(struct vdpa_device *vdev, struct mm_struct *mm); void (*unbind_mm)(struct vdpa_device *vdev); @@ -520,9 +521,9 @@ static inline void vdpa_set_drvdata(struct vdpa_device *vdev, void *data) dev_set_drvdata(&vdev->dev, data); } -static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev) +static inline union virtio_map vdpa_get_map(struct vdpa_device *vdev) { - return vdev->dma_dev; + return vdev->vmap; } static inline int vdpa_reset(struct vdpa_device *vdev, u32 flags) -- cgit v1.2.3 From 0d16cc439f36355d04b17ac45c3001d90969aa44 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 24 Sep 2025 15:00:44 +0800 Subject: vdpa: introduce map ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Virtio core allows the transport to provide device or transport specific mapping functions. This patch adds this support to vDPA. We can simply do this by allowing the vDPA parent to register a virtio_map_ops. Reviewed-by: Christoph Hellwig Signed-off-by: Jason Wang Message-Id: <20250924070045.10361-2-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Eugenio Pérez --- include/linux/vdpa.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index ae0451945851..4cf21d6e9cfd 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -76,6 +76,7 @@ struct vdpa_mgmt_dev; * because core frees it; use driver_set_override() to * set or clear it. * @config: the configuration ops for this device. + * @map: the map ops for this device * @cf_lock: Protects get and set access to configuration layout. * @index: device index * @features_valid: were features initialized? for legacy guests @@ -91,6 +92,7 @@ struct vdpa_device { union virtio_map vmap; const char *driver_override; const struct vdpa_config_ops *config; + const struct virtio_map_ops *map; struct rw_semaphore cf_lock; /* Protects get/set config */ unsigned int index; bool features_valid; @@ -447,6 +449,7 @@ struct vdpa_config_ops { struct vdpa_device *__vdpa_alloc_device(struct device *parent, const struct vdpa_config_ops *config, + const struct virtio_map_ops *map, unsigned int ngroups, unsigned int nas, size_t size, const char *name, bool use_va); @@ -458,6 +461,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, * @member: the name of struct vdpa_device within the @dev_struct * @parent: the parent device * @config: the bus operations that is supported by this device + * @map: the map operations that is supported by this device * @ngroups: the number of virtqueue groups supported by this device * @nas: the number of address spaces * @name: name of the vdpa device @@ -465,10 +469,10 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, * * Return allocated data structure or ERR_PTR upon error */ -#define vdpa_alloc_device(dev_struct, member, parent, config, ngroups, nas, \ - name, use_va) \ +#define vdpa_alloc_device(dev_struct, member, parent, config, map, \ + ngroups, nas, name, use_va) \ container_of((__vdpa_alloc_device( \ - parent, config, ngroups, nas, \ + parent, config, map, ngroups, nas, \ (sizeof(dev_struct) + \ BUILD_BUG_ON_ZERO(offsetof( \ dev_struct, member))), name, use_va)), \ -- cgit v1.2.3 From 1c14b0e4ba988381e362ad8a9651eff0b21bd47f Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 24 Sep 2025 15:00:45 +0800 Subject: vduse: switch to use virtio map API instead of DMA API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lacking the support of device specific mapping supported in virtio, VDUSE must trick the DMA API in order to make virtio-vdpa transport work. This is done by advertising vDPA device as dma device with a VDUSE specific dma_ops even if it doesn't do DMA at all. This will be fixed by this patch. Thanks to the new mapping operations support by virtio and vDPA. VDUSE can simply switch to advertise its specific mappings operations to virtio via virtio-vdpa then DMA API is not needed for VDUSE any more and iova domain could be used as the mapping token instead. Signed-off-by: Jason Wang Message-Id: <20250924070045.10361-3-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Eugenio Pérez --- include/linux/virtio.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 3386a4a8d06b..96c66126c074 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -41,9 +41,13 @@ struct virtqueue { void *priv; }; +struct vduse_iova_domain; + union virtio_map { /* Device that performs DMA */ struct device *dma_dev; + /* VDUSE specific mapping data */ + struct vduse_iova_domain *iova_domain; }; int virtqueue_add_outbuf(struct virtqueue *vq, -- cgit v1.2.3 From f97aef092e199c10a3da96ae79b571edd5362faa Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 26 Sep 2025 12:12:37 +0200 Subject: cpufreq: Make drivers using CPUFREQ_ETERNAL specify transition latency Commit a755d0e2d41b ("cpufreq: Honour transition_latency over transition_delay_us") caused platforms where cpuinfo.transition_latency is CPUFREQ_ETERNAL to get a very large transition latency whereas previously it had been capped at 10 ms (and later at 2 ms). This led to a user-observable regression between 6.6 and 6.12 as described by Shawn: "The dbs sampling_rate was 10000 us on 6.6 and suddently becomes 6442450 us (4294967295 / 1000 * 1.5) on 6.12 for these platforms because the default transition delay was dropped [...]. It slows down dbs governor's reacting to CPU loading change dramatically. Also, as transition_delay_us is used by schedutil governor as rate_limit_us, it shows a negative impact on device idle power consumption, because the device gets slightly less time in the lowest OPP." Evidently, the expectation of the drivers using CPUFREQ_ETERNAL as cpuinfo.transition_latency was that it would be capped by the core, but they may as well return a default transition latency value instead of CPUFREQ_ETERNAL and the core need not do anything with it. Accordingly, introduce CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS and make all of the drivers in question use it instead of CPUFREQ_ETERNAL. Also update the related Rust binding. Fixes: a755d0e2d41b ("cpufreq: Honour transition_latency over transition_delay_us") Closes: https://lore.kernel.org/linux-pm/20250922125929.453444-1-shawnguo2@yeah.net/ Reported-by: Shawn Guo Reviewed-by: Mario Limonciello (AMD) Reviewed-by: Jie Zhan Acked-by: Viresh Kumar Cc: 6.6+ # 6.6+ Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/2264949.irdbgypaU6@rafael.j.wysocki [ rjw: Fix typo in new symbol name, drop redundant type cast from Rust binding ] Tested-by: Shawn Guo # with cpufreq-dt driver Reviewed-by: Qais Yousef Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 40966512ea18..bc8c083bc16a 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -32,6 +32,9 @@ */ #define CPUFREQ_ETERNAL (-1) + +#define CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS NSEC_PER_MSEC + #define CPUFREQ_NAME_LEN 16 /* Print length for names. Extra 1 space for accommodating '\n' in prints */ #define CPUFREQ_NAME_PLEN (CPUFREQ_NAME_LEN + 1) -- cgit v1.2.3 From c28a280bd465690981099cd6e43dfcfa5c28b133 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 26 Sep 2025 12:29:50 +0200 Subject: ACPI: CPPC: Do not use CPUFREQ_ETERNAL as an error value Instead of using CPUFREQ_ETERNAL for signaling an error condition in cppc_get_transition_latency(), change the return value type of that function to int and make it return a proper negative error code on failures. No intentional functional impact. Reviewed-by: Mario Limonciello (AMD) Reviewed-by: Jie Zhan Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Reviewed-by: Qais Yousef --- include/acpi/cppc_acpi.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index 20f3d62e7a16..13fa81504844 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -160,7 +160,7 @@ extern unsigned int cppc_khz_to_perf(struct cppc_perf_caps *caps, unsigned int f extern bool acpi_cpc_valid(void); extern bool cppc_allow_fast_switch(void); extern int acpi_get_psd_map(unsigned int cpu, struct cppc_cpudata *cpu_data); -extern unsigned int cppc_get_transition_latency(int cpu); +extern int cppc_get_transition_latency(int cpu); extern bool cpc_ffh_supported(void); extern bool cpc_supported_by_cpu(void); extern int cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val); @@ -216,9 +216,9 @@ static inline bool cppc_allow_fast_switch(void) { return false; } -static inline unsigned int cppc_get_transition_latency(int cpu) +static inline int cppc_get_transition_latency(int cpu) { - return CPUFREQ_ETERNAL; + return -ENODATA; } static inline bool cpc_ffh_supported(void) { -- cgit v1.2.3 From 950c6451a5c38d375993c3b9da427e2e69b01c30 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 26 Sep 2025 12:31:47 +0200 Subject: cpufreq: Drop unused symbol CPUFREQ_ETERNAL Drop CPUFREQ_ETERNAL that has no users any more along with all references to it in the documentation. No functional impact. Reviewed-by: Mario Limonciello (AMD) Reviewed-by: Jie Zhan Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Reviewed-by: Qais Yousef --- include/linux/cpufreq.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index bc8c083bc16a..0465d1e6f72a 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -26,13 +26,8 @@ *********************************************************************/ /* * Frequency values here are CPU kHz - * - * Maximum transition latency is in nanoseconds - if it's unknown, - * CPUFREQ_ETERNAL shall be used. */ -#define CPUFREQ_ETERNAL (-1) - #define CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS NSEC_PER_MSEC #define CPUFREQ_NAME_LEN 16 -- cgit v1.2.3 From 4e66293bb141df33d5eb1f922e16fe05913bf296 Mon Sep 17 00:00:00 2001 From: Bhanu Seshu Kumar Valluri Date: Wed, 1 Oct 2025 14:27:16 +0530 Subject: of: doc: Fix typo in doc comments. synthetized => synthesized definied => defined sucess => success Signed-off-by: Bhanu Seshu Kumar Valluri Signed-off-by: Rob Herring (Arm) --- include/linux/of.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/of.h b/include/linux/of.h index 5e2c6ed9370a..121a288ca92d 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -1134,7 +1134,7 @@ static inline bool of_phandle_args_equal(const struct of_phandle_args *a1, * Search for a property in a device node and count the number of u8 elements * in it. * - * Return: The number of elements on sucess, -EINVAL if the property does + * Return: The number of elements on success, -EINVAL if the property does * not exist or its length does not match a multiple of u8 and -ENODATA if the * property does not have a value. */ @@ -1153,7 +1153,7 @@ static inline int of_property_count_u8_elems(const struct device_node *np, * Search for a property in a device node and count the number of u16 elements * in it. * - * Return: The number of elements on sucess, -EINVAL if the property does + * Return: The number of elements on success, -EINVAL if the property does * not exist or its length does not match a multiple of u16 and -ENODATA if the * property does not have a value. */ @@ -1172,7 +1172,7 @@ static inline int of_property_count_u16_elems(const struct device_node *np, * Search for a property in a device node and count the number of u32 elements * in it. * - * Return: The number of elements on sucess, -EINVAL if the property does + * Return: The number of elements on success, -EINVAL if the property does * not exist or its length does not match a multiple of u32 and -ENODATA if the * property does not have a value. */ @@ -1191,7 +1191,7 @@ static inline int of_property_count_u32_elems(const struct device_node *np, * Search for a property in a device node and count the number of u64 elements * in it. * - * Return: The number of elements on sucess, -EINVAL if the property does + * Return: The number of elements on success, -EINVAL if the property does * not exist or its length does not match a multiple of u64 and -ENODATA if the * property does not have a value. */ -- cgit v1.2.3 From b595edcb24727e7f93e7962c3f6f971cc16dd29e Mon Sep 17 00:00:00 2001 From: Roman Kisel Date: Wed, 1 Oct 2025 16:08:46 -0700 Subject: hyperv: Remove the spurious null directive line The file contains a line that consists of the lone # symbol followed by a newline. While that is a valid syntax as defined by the C99+ grammar (6.10.7 "Null directive"), it serves no apparent purpose in this case. Remove the null preprocessor directive. No functional changes. Fixes: e68bda71a238 ("hyperv: Add new Hyper-V headers in include/hyperv") Signed-off-by: Roman Kisel Reviewed-by: Easwar Hariharan Signed-off-by: Wei Liu --- include/hyperv/hvgdk_mini.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/hyperv/hvgdk_mini.h b/include/hyperv/hvgdk_mini.h index 1be7f6a02304..77abddfc750e 100644 --- a/include/hyperv/hvgdk_mini.h +++ b/include/hyperv/hvgdk_mini.h @@ -597,8 +597,6 @@ struct ms_hyperv_tsc_page { /* HV_REFERENCE_TSC_PAGE */ #define HV_SYNIC_SINT_AUTO_EOI (1ULL << 17) #define HV_SYNIC_SINT_VECTOR_MASK (0xFF) -# - /* Hyper-V defined statically assigned SINTs */ #define HV_SYNIC_INTERCEPTION_SINT_INDEX 0x00000000 #define HV_SYNIC_IOMMU_FAULT_SINT_INDEX 0x00000001 -- cgit v1.2.3 From 510d76646a6a7beaa49fc0da7282e285a3dfce97 Mon Sep 17 00:00:00 2001 From: Tang Yizhou Date: Tue, 23 Sep 2025 19:21:36 +0800 Subject: block: Update a comment of disk statistics >From commit 074a7aca7afa ("block: move stats from disk to part0"), we know that: * {disk|all}_stat_*() are gone. * disk_stat_lock/unlock() are renamed to part_stat_lock/unlock(). Therefore, outdated comments should be updated accordingly. Fixes: 074a7aca7afa ("block: move stats from disk to part0") Signed-off-by: Tang Yizhou Signed-off-by: Jens Axboe --- include/linux/part_stat.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h index eeeff2a04529..729415e91215 100644 --- a/include/linux/part_stat.h +++ b/include/linux/part_stat.h @@ -17,8 +17,8 @@ struct disk_stats { /* * Macros to operate on percpu disk statistics: * - * {disk|part|all}_stat_{add|sub|inc|dec}() modify the stat counters and should - * be called between disk_stat_lock() and disk_stat_unlock(). + * part_stat_{add|sub|inc|dec}() modify the stat counters and should + * be called between part_stat_lock() and part_stat_unlock(). * * part_stat_read() can be called at any time. */ -- cgit v1.2.3 From 10aa5c80603088d10c2cd5e7e27d561a8fb59c7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Tue, 30 Sep 2025 14:27:52 +0200 Subject: drm/gpusvm, drm/xe: Fix userptr to not allow device private pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When userptr is used on SVM-enabled VMs, a non-NULL hmm_range::dev_private_owner value might mean that hmm_range_fault() attempts to return device private pages. Either that will fail, or the userptr code will not know how to handle those. Use NULL for hmm_range::dev_private_owner to migrate such pages to system. In order to do that, move the struct drm_gpusvm::device_private_page_owner field to struct drm_gpusvm_ctx::device_private_page_owner so that it doesn't remain immutable over the drm_gpusvm lifetime. v2: - Don't conditionally compile xe_svm_devm_owner(). - Kerneldoc xe_svm_devm_owner(). Fixes: 9e9787414882 ("drm/xe/userptr: replace xe_hmm with gpusvm") Cc: Matthew Auld Cc: Himal Prasad Ghimiray Cc: Matthew Brost Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Reviewed-by: Matthew Brost Acked-by: Maarten Lankhorst Link: https://lore.kernel.org/r/20250930122752.96034-1-thomas.hellstrom@linux.intel.com (cherry picked from commit ad298d9ec957414dbf3d51f3c8bca4b6d2416c0c) Signed-off-by: Lucas De Marchi --- include/drm/drm_gpusvm.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/drm/drm_gpusvm.h b/include/drm/drm_gpusvm.h index 5434048a2ca4..b92faa9a26b2 100644 --- a/include/drm/drm_gpusvm.h +++ b/include/drm/drm_gpusvm.h @@ -179,7 +179,6 @@ struct drm_gpusvm_range { * @name: Name of the GPU SVM * @drm: Pointer to the DRM device structure * @mm: Pointer to the mm_struct for the address space - * @device_private_page_owner: Device private pages owner * @mm_start: Start address of GPU SVM * @mm_range: Range of the GPU SVM * @notifier_size: Size of individual notifiers @@ -204,7 +203,6 @@ struct drm_gpusvm { const char *name; struct drm_device *drm; struct mm_struct *mm; - void *device_private_page_owner; unsigned long mm_start; unsigned long mm_range; unsigned long notifier_size; @@ -226,6 +224,8 @@ struct drm_gpusvm { /** * struct drm_gpusvm_ctx - DRM GPU SVM context * + * @device_private_page_owner: The device-private page owner to use for + * this operation * @check_pages_threshold: Check CPU pages for present if chunk is less than or * equal to threshold. If not present, reduce chunk * size. @@ -239,6 +239,7 @@ struct drm_gpusvm { * Context that is DRM GPUSVM is operating in (i.e. user arguments). */ struct drm_gpusvm_ctx { + void *device_private_page_owner; unsigned long check_pages_threshold; unsigned long timeslice_ms; unsigned int in_notifier :1; @@ -249,7 +250,7 @@ struct drm_gpusvm_ctx { int drm_gpusvm_init(struct drm_gpusvm *gpusvm, const char *name, struct drm_device *drm, - struct mm_struct *mm, void *device_private_page_owner, + struct mm_struct *mm, unsigned long mm_start, unsigned long mm_range, unsigned long notifier_size, const struct drm_gpusvm_ops *ops, -- cgit v1.2.3 From 16abbabc004bedeeaa702e11913da9d4fa70e63a Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Wed, 1 Oct 2025 08:10:28 +0200 Subject: dma-mapping: fix direction in dma_alloc direction traces Set __entry->dir to the actual "dir" parameter of all trace events in dma_alloc_class. This struct member was left uninitialized by mistake. Signed-off-by: Petr Tesarik Fixes: 3afff779a725 ("dma-mapping: trace dma_alloc/free direction") Cc: stable@vger.kernel.org Reviewed-by: Sean Anderson Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20251001061028.412258-1-ptesarik@suse.com --- include/trace/events/dma.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h index 5da59fd8121d..b3fef140ae15 100644 --- a/include/trace/events/dma.h +++ b/include/trace/events/dma.h @@ -133,6 +133,7 @@ DECLARE_EVENT_CLASS(dma_alloc_class, __entry->dma_addr = dma_addr; __entry->size = size; __entry->flags = flags; + __entry->dir = dir; __entry->attrs = attrs; ), -- cgit v1.2.3 From 93a4b36ef3cf4ce5e6a7e7a7686181de76e246a1 Mon Sep 17 00:00:00 2001 From: Nirbhay Sharma Date: Fri, 3 Oct 2025 17:15:55 +0530 Subject: cgroup: Fix seqcount lockdep assertion in cgroup freezer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit afa3701c0e45 ("cgroup: cgroup.stat.local time accounting") introduced a seqcount to track freeze timing but initialized it as a plain seqcount_t using seqcount_init(). However, the write-side critical section in cgroup_do_freeze() holds the css_set_lock spinlock while calling write_seqcount_begin(). On PREEMPT_RT kernels, spinlocks do not disable preemption, causing the lockdep assertion for a plain seqcount_t, which checks for preemption being disabled, to fail. This triggers the following warning: WARNING: CPU: 0 PID: 9692 at include/linux/seqlock.h:221 Fix this by changing the type to seqcount_spinlock_t and initializing it with seqcount_spinlock_init() to associate css_set_lock with the seqcount. This allows lockdep to correctly validate that the spinlock is held during write operations, resolving the assertion failure on all kernel configurations. Reported-by: syzbot+27a2519eb4dad86d0156@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=27a2519eb4dad86d0156 Fixes: afa3701c0e45 ("cgroup: cgroup.stat.local time accounting") Signed-off-by: Nirbhay Sharma Link: https://lore.kernel.org/r/20251002165510.KtY3IT--@linutronix.de/ Acked-by: Michal Koutný Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 93318fce31f3..b760a3c470a5 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -452,7 +452,7 @@ struct cgroup_freezer_state { int nr_frozen_tasks; /* Freeze time data consistency protection */ - seqcount_t freeze_seq; + seqcount_spinlock_t freeze_seq; /* * Most recent time the cgroup was requested to freeze. -- cgit v1.2.3 From 7a0f94361ffd6e1d31c79023e8674b492bef05e3 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 30 Sep 2025 19:24:26 -0700 Subject: net: psp: don't assume reply skbs will have a socket Rx path may be passing around unreferenced sockets, which means that skb_set_owner_edemux() may not set skb->sk and PSP will crash: KASAN: null-ptr-deref in range [0x0000000000000010-0x0000000000000017] RIP: 0010:psp_reply_set_decrypted (./include/net/psp/functions.h:132 net/psp/psp_sock.c:287) tcp_v6_send_response.constprop.0 (net/ipv6/tcp_ipv6.c:979) tcp_v6_send_reset (net/ipv6/tcp_ipv6.c:1140 (discriminator 1)) tcp_v6_do_rcv (net/ipv6/tcp_ipv6.c:1683) tcp_v6_rcv (net/ipv6/tcp_ipv6.c:1912) Fixes: 659a2899a57d ("tcp: add datapath logic for PSP with inline key exchange") Reviewed-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20251001022426.2592750-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/net/psp/functions.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/psp/functions.h b/include/net/psp/functions.h index ef7743664da3..c5c23a54774e 100644 --- a/include/net/psp/functions.h +++ b/include/net/psp/functions.h @@ -34,7 +34,7 @@ unsigned int psp_key_size(u32 version); void psp_sk_assoc_free(struct sock *sk); void psp_twsk_init(struct inet_timewait_sock *tw, const struct sock *sk); void psp_twsk_assoc_free(struct inet_timewait_sock *tw); -void psp_reply_set_decrypted(struct sk_buff *skb); +void psp_reply_set_decrypted(const struct sock *sk, struct sk_buff *skb); static inline struct psp_assoc *psp_sk_assoc(const struct sock *sk) { @@ -160,7 +160,7 @@ static inline void psp_twsk_init(struct inet_timewait_sock *tw, const struct sock *sk) { } static inline void psp_twsk_assoc_free(struct inet_timewait_sock *tw) { } static inline void -psp_reply_set_decrypted(struct sk_buff *skb) { } +psp_reply_set_decrypted(const struct sock *sk, struct sk_buff *skb) { } static inline struct psp_assoc *psp_sk_assoc(const struct sock *sk) { -- cgit v1.2.3 From 1b54b0756f051c11f5a5d0fbc1581e0b9a18e2bc Mon Sep 17 00:00:00 2001 From: Bhanu Seshu Kumar Valluri Date: Wed, 1 Oct 2025 16:27:15 +0530 Subject: net: doc: Fix typos in docs Fix typos in doc comments. Signed-off-by: Bhanu Seshu Kumar Valluri Link: https://patch.msgid.link/20251001105715.50462-1-bhanuseshukumar@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 7a54a8b4d277..3c7634482356 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -297,7 +297,7 @@ static inline const char *phy_modes(phy_interface_t interface) * * Description: maps RGMII supported link speeds into the clock rates. * This can also be used for MII, GMII, and RMII interface modes as the - * clock rates are indentical, but the caller must be aware that errors + * clock rates are identical, but the caller must be aware that errors * for unsupported clock rates will not be signalled. * * Returns: clock rate or negative errno @@ -519,7 +519,7 @@ enum phy_state { * struct phy_c45_device_ids - 802.3-c45 Device Identifiers * @devices_in_package: IEEE 802.3 devices in package register value. * @mmds_present: bit vector of MMDs present. - * @device_ids: The device identifer for each present device. + * @device_ids: The device identifier for each present device. */ struct phy_c45_device_ids { u32 devices_in_package; -- cgit v1.2.3 From 384b52ce32110db974d3b61d463af48347eb73fb Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 1 Oct 2025 19:34:29 +0200 Subject: PM: runtime: Introduce one more usage counter guard Follow previous commit 9a0abc39450a ("PM: runtime: Add auto-cleanup macros for "resume and get" operations") and define a runtime PM usage counter guard in which pm_runtime_get_noresume() and pm_runtime_put_noidle() will be used for incrementing and decrementing it, respectively. Signed-off-by: Rafael J. Wysocki Reviewed-by: Jonathan Cameron --- include/linux/pm_runtime.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index edb8aed5ef62..a3f44f6c2da1 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -614,6 +614,9 @@ static inline int pm_runtime_put_autosuspend(struct device *dev) return __pm_runtime_put_autosuspend(dev); } +DEFINE_GUARD(pm_runtime_noresume, struct device *, + pm_runtime_get_noresume(_T), pm_runtime_put_noidle(_T)); + DEFINE_GUARD(pm_runtime_active, struct device *, pm_runtime_get_sync(_T), pm_runtime_put(_T)); DEFINE_GUARD(pm_runtime_active_auto, struct device *, -- cgit v1.2.3 From b8179af120943e2fc099ea87caa234039a709a66 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 29 Jul 2025 08:46:35 +0200 Subject: mm/memory_hotplug: activate node before adding new memory blocks The sysfs attributes for memory blocks require the node ID to be set and initialized, so move the node activation before adding new memory blocks. This also has the nice side effect that the BUG_ON() can be converted into a WARN_ON() as we now can handle registration errors. Link: https://lkml.kernel.org/r/20250729064637.51662-3-hare@kernel.org Fixes: b9ff036082cd ("mm/memory_hotplug.c: make add_memory_resource use __try_online_node") Signed-off-by: Hannes Reinecke Acked-by: David Hildenbrand Acked-by: Oscar Salvador Reviewed-by: Donet Tom Signed-off-by: Andrew Morton --- include/linux/memory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/memory.h b/include/linux/memory.h index 40eb70ccb09d..4a29153e372e 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -159,7 +159,7 @@ static inline unsigned long memory_block_advised_max_size(void) extern int register_memory_notifier(struct notifier_block *nb); extern void unregister_memory_notifier(struct notifier_block *nb); int create_memory_block_devices(unsigned long start, unsigned long size, - struct vmem_altmap *altmap, + int nid, struct vmem_altmap *altmap, struct memory_group *group); void remove_memory_block_devices(unsigned long start, unsigned long size); extern void memory_dev_init(void); -- cgit v1.2.3 From 0a947c14e48cbf9de222836170282e0167a9e096 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 29 Jul 2025 08:46:36 +0200 Subject: drivers/base: move memory_block_add_nid() into the caller Now the node id only needs to be set for early memory, so move memory_block_add_nid() into the caller and rename it into memory_block_add_nid_early(). This allows us to further simplify the code by dropping the 'context' argument to do_register_memory_block_under_node(). Link: https://lkml.kernel.org/r/20250729064637.51662-4-hare@kernel.org Suggested-by: David Hildenbrand Signed-off-by: Hannes Reinecke Acked-by: David Hildenbrand Acked-by: Oscar Salvador Reviewed-by: Donet Tom Signed-off-by: Andrew Morton --- include/linux/memory.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/memory.h b/include/linux/memory.h index 4a29153e372e..43d378038ce2 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -202,8 +202,7 @@ static inline unsigned long phys_to_block_id(unsigned long phys) } #ifdef CONFIG_NUMA -void memory_block_add_nid(struct memory_block *mem, int nid, - enum meminit_context context); +void memory_block_add_nid_early(struct memory_block *mem, int nid); #endif /* CONFIG_NUMA */ int memory_block_advise_max_size(unsigned long size); unsigned long memory_block_advised_max_size(void); -- cgit v1.2.3 From de7342228b7343774d6a9981c2ddbfb5e201044b Mon Sep 17 00:00:00 2001 From: Rong Tao Date: Sat, 4 Oct 2025 22:23:29 +0800 Subject: bpf: Finish constification of 1st parameter of bpf_d_path() The commit 1b8abbb12128 ("bpf...d_path(): constify path argument") constified the first parameter of the bpf_d_path(), but failed to update it in all places. Finish constification. Otherwise the selftest fail to build: .../selftests/bpf/bpf_experimental.h:222:12: error: conflicting types for 'bpf_path_d_path' 222 | extern int bpf_path_d_path(const struct path *path, char *buf, size_t buf__sz) __ksym; | ^ .../selftests/bpf/tools/include/vmlinux.h:153922:12: note: previous declaration is here 153922 | extern int bpf_path_d_path(struct path *path, char *buf, size_t buf__sz) __weak __ksym; Fixes: 1b8abbb12128 ("bpf...d_path(): constify path argument") Signed-off-by: Rong Tao Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index ae83d8649ef1..6829936d33f5 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4891,7 +4891,7 @@ union bpf_attr { * * **-ENOENT** if the bpf_local_storage cannot be found. * - * long bpf_d_path(struct path *path, char *buf, u32 sz) + * long bpf_d_path(const struct path *path, char *buf, u32 sz) * Description * Return full path for given **struct path** object, which * needs to be the kernel BTF *path* object. The path is -- cgit v1.2.3 From b71a6e2a1b710ea31c363a72c75f510ef35d8e69 Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Thu, 2 Oct 2025 17:12:35 +0900 Subject: i2c: rename wait_for_completion callback to wait_for_completion_cb Functionally no change. Remove the ambiguity of 'wait_for_completion'. It helps development of the DEPT dependency tracker, but seems favorable in any case. Signed-off-by: Byungchul Park [wsa: reworded commit message] Signed-off-by: Wolfram Sang --- include/linux/i2c-algo-pca.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/i2c-algo-pca.h b/include/linux/i2c-algo-pca.h index 7c522fdd9ea7..e305bf32e40a 100644 --- a/include/linux/i2c-algo-pca.h +++ b/include/linux/i2c-algo-pca.h @@ -71,7 +71,7 @@ struct i2c_algo_pca_data { void *data; /* private low level data */ void (*write_byte) (void *data, int reg, int val); int (*read_byte) (void *data, int reg); - int (*wait_for_completion) (void *data); + int (*wait_for_completion_cb) (void *data); void (*reset_chip) (void *data); /* For PCA9564, use one of the predefined frequencies: * 330000, 288000, 217000, 146000, 88000, 59000, 44000, 36000 -- cgit v1.2.3 From 48b77733d0dbaf8cd0a122712072f92b2d95d894 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 1 Oct 2025 15:19:07 +0200 Subject: expfs: Fix exportfs_can_encode_fh() for EXPORT_FH_FID After commit 5402c4d4d200 ("exportfs: require ->fh_to_parent() to encode connectable file handles") we will fail to create non-decodable file handles for filesystems without export operations. Fix it. Fixes: 5402c4d4d200 ("exportfs: require ->fh_to_parent() to encode connectable file handles") Reviewed-by: Christian Brauner Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/exportfs.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index d0cf10d5e0f7..f0cf2714ec52 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -320,9 +320,6 @@ static inline bool exportfs_can_decode_fh(const struct export_operations *nop) static inline bool exportfs_can_encode_fh(const struct export_operations *nop, int fh_flags) { - if (!nop) - return false; - /* * If a non-decodeable file handle was requested, we only need to make * sure that filesystem did not opt-out of encoding fid. @@ -330,6 +327,10 @@ static inline bool exportfs_can_encode_fh(const struct export_operations *nop, if (fh_flags & EXPORT_FH_FID) return exportfs_can_encode_fid(nop); + /* Normal file handles cannot be created without export ops */ + if (!nop) + return false; + /* * If a connectable file handle was requested, we need to make sure that * filesystem can also decode connected file handles. -- cgit v1.2.3 From 929bf010e0599ddef6b640cd314f1de65dd1ca3e Mon Sep 17 00:00:00 2001 From: Li Zhe Date: Thu, 14 Aug 2025 14:47:10 +0800 Subject: mm: introduce num_pages_contiguous() Let's add a simple helper for determining the number of contiguous pages that represent contiguous PFNs. In an ideal world, this helper would be simpler or not even required. Unfortunately, on some configs we still have to maintain (SPARSEMEM without VMEMMAP), the memmap is allocated per memory section, and we might run into weird corner cases of false positives when blindly testing for contiguous pages only. One example of such false positives would be a memory section-sized hole that does not have a memmap. The surrounding memory sections might get "struct pages" that are contiguous, but the PFNs are actually not. This helper will, for example, be useful for determining contiguous PFNs in a GUP result, to batch further operations across returned "struct page"s. VFIO will utilize this interface to accelerate the VFIO DMA map process. Implementation based on Linus' suggestions to avoid new usage of nth_page() where avoidable. Suggested-by: Linus Torvalds Suggested-by: Jason Gunthorpe Signed-off-by: Li Zhe Co-developed-by: David Hildenbrand Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20250814064714.56485-2-lizhe.67@bytedance.com Signed-off-by: Alex Williamson --- include/linux/mm.h | 7 ++++++- include/linux/mm_inline.h | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 06978b4dbeb8..f092ce3530bb 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1833,7 +1833,12 @@ static inline unsigned long memdesc_section(memdesc_flags_t mdf) { return (mdf.f >> SECTIONS_PGSHIFT) & SECTIONS_MASK; } -#endif +#else /* !SECTION_IN_PAGE_FLAGS */ +static inline unsigned long memdesc_section(memdesc_flags_t mdf) +{ + return 0; +} +#endif /* SECTION_IN_PAGE_FLAGS */ /** * folio_pfn - Return the Page Frame Number of a folio. diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index d6c1011b38f2..f6a2b2d20016 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -617,4 +617,40 @@ static inline bool vma_has_recency(const struct vm_area_struct *vma) return true; } +/** + * num_pages_contiguous() - determine the number of contiguous pages + * that represent contiguous PFNs + * @pages: an array of page pointers + * @nr_pages: length of the array, at least 1 + * + * Determine the number of contiguous pages that represent contiguous PFNs + * in @pages, starting from the first page. + * + * In some kernel configs contiguous PFNs will not have contiguous struct + * pages. In these configurations num_pages_contiguous() will return a num + * smaller than ideal number. The caller should continue to check for pfn + * contiguity after each call to num_pages_contiguous(). + * + * Returns the number of contiguous pages. + */ +static inline size_t num_pages_contiguous(struct page **pages, size_t nr_pages) +{ + struct page *cur_page = pages[0]; + unsigned long section = memdesc_section(cur_page->flags); + size_t i; + + for (i = 1; i < nr_pages; i++) { + if (++cur_page != pages[i]) + break; + /* + * In unproblematic kernel configs, page_to_section() == 0 and + * the whole check will get optimized out. + */ + if (memdesc_section(cur_page->flags) != section) + break; + } + + return i; +} + #endif -- cgit v1.2.3 From 95920c2ed02bde551ab654e9749c2ca7bc3100e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Tue, 30 Sep 2025 13:43:29 +0200 Subject: page_pool: Fix PP_MAGIC_MASK to avoid crashing on some 32-bit arches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Helge reported that the introduction of PP_MAGIC_MASK let to crashes on boot on his 32-bit parisc machine. The cause of this is the mask is set too wide, so the page_pool_page_is_pp() incurs false positives which crashes the machine. Just disabling the check in page_pool_is_pp() will lead to the page_pool code itself malfunctioning; so instead of doing this, this patch changes the define for PP_DMA_INDEX_BITS to avoid mistaking arbitrary kernel pointers for page_pool-tagged pages. The fix relies on the kernel pointers that alias with the pp_magic field always being above PAGE_OFFSET. With this assumption, we can use the lowest bit of the value of PAGE_OFFSET as the upper bound of the PP_DMA_INDEX_MASK, which should avoid the false positives. Because we cannot rely on PAGE_OFFSET always being a compile-time constant, nor on it always being >0, we fall back to disabling the dma_index storage when there are not enough bits available. This leaves us in the situation we were in before the patch in the Fixes tag, but only on a subset of architecture configurations. This seems to be the best we can do until the transition to page types in complete for page_pool pages. v2: - Make sure there's at least 8 bits available and that the PAGE_OFFSET bit calculation doesn't wrap Link: https://lore.kernel.org/all/aMNJMFa5fDalFmtn@p100/ Fixes: ee62ce7a1d90 ("page_pool: Track DMA-mapped pages and unmap them when destroying the pool") Cc: stable@vger.kernel.org # 6.15+ Tested-by: Helge Deller Signed-off-by: Toke Høiland-Jørgensen Reviewed-by: Mina Almasry Tested-by: Helge Deller Link: https://patch.msgid.link/20250930114331.675412-1-toke@redhat.com Signed-off-by: Jakub Kicinski --- include/linux/mm.h | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 1ae97a0b8ec7..0905eb6b55ec 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4159,14 +4159,13 @@ int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status); * since this value becomes part of PP_SIGNATURE; meaning we can just use the * space between the PP_SIGNATURE value (without POISON_POINTER_DELTA), and the * lowest bits of POISON_POINTER_DELTA. On arches where POISON_POINTER_DELTA is - * 0, we make sure that we leave the two topmost bits empty, as that guarantees - * we won't mistake a valid kernel pointer for a value we set, regardless of the - * VMSPLIT setting. + * 0, we use the lowest bit of PAGE_OFFSET as the boundary if that value is + * known at compile-time. * - * Altogether, this means that the number of bits available is constrained by - * the size of an unsigned long (at the upper end, subtracting two bits per the - * above), and the definition of PP_SIGNATURE (with or without - * POISON_POINTER_DELTA). + * If the value of PAGE_OFFSET is not known at compile time, or if it is too + * small to leave at least 8 bits available above PP_SIGNATURE, we define the + * number of bits to be 0, which turns off the DMA index tracking altogether + * (see page_pool_register_dma_index()). */ #define PP_DMA_INDEX_SHIFT (1 + __fls(PP_SIGNATURE - POISON_POINTER_DELTA)) #if POISON_POINTER_DELTA > 0 @@ -4175,8 +4174,13 @@ int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status); */ #define PP_DMA_INDEX_BITS MIN(32, __ffs(POISON_POINTER_DELTA) - PP_DMA_INDEX_SHIFT) #else -/* Always leave out the topmost two; see above. */ -#define PP_DMA_INDEX_BITS MIN(32, BITS_PER_LONG - PP_DMA_INDEX_SHIFT - 2) +/* Use the lowest bit of PAGE_OFFSET if there's at least 8 bits available; see above */ +#define PP_DMA_INDEX_MIN_OFFSET (1 << (PP_DMA_INDEX_SHIFT + 8)) +#define PP_DMA_INDEX_BITS ((__builtin_constant_p(PAGE_OFFSET) && \ + PAGE_OFFSET >= PP_DMA_INDEX_MIN_OFFSET && \ + !(PAGE_OFFSET & (PP_DMA_INDEX_MIN_OFFSET - 1))) ? \ + MIN(32, __ffs(PAGE_OFFSET) - PP_DMA_INDEX_SHIFT) : 0) + #endif #define PP_DMA_INDEX_MASK GENMASK(PP_DMA_INDEX_BITS + PP_DMA_INDEX_SHIFT - 1, \ -- cgit v1.2.3 From beb97995b97532e1f215e3295e6843e59862f94b Mon Sep 17 00:00:00 2001 From: Haiyue Wang Date: Tue, 7 Oct 2025 14:18:18 +0800 Subject: io_uring: use tab indentation for IORING_SEND_VECTORIZED comment Be consistent with tab style of "liburing/src/include/liburing/io_uring.h". Signed-off-by: Haiyue Wang Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index a0cc1cc0dd01..263bed13473e 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -404,7 +404,7 @@ enum io_uring_op { * will be contiguous from the starting buffer ID. * * IORING_SEND_VECTORIZED If set, SEND[_ZC] will take a pointer to a io_vec - * to allow vectorized send operations. + * to allow vectorized send operations. */ #define IORING_RECVSEND_POLL_FIRST (1U << 0) #define IORING_RECV_MULTISHOT (1U << 1) -- cgit v1.2.3 From 1ed06c83506ecaaf1836ddeb7c65772ff86d8d53 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 7 Oct 2025 11:06:25 +0200 Subject: block: remove bio_iov_iter_get_pages Switch the only caller to bio_iov_iter_get_pages, and explain why it does not have any alignment requirements. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Reviewed-by: Keith Busch Reviewed-by: Qu Wenruo Signed-off-by: Jens Axboe --- include/linux/bio.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index a64a30131031..b01dae9506de 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -449,11 +449,6 @@ int bdev_rw_virt(struct block_device *bdev, sector_t sector, void *data, int bio_iov_iter_get_pages_aligned(struct bio *bio, struct iov_iter *iter, unsigned len_align_mask); -static inline int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) -{ - return bio_iov_iter_get_pages_aligned(bio, iter, 0); -} - void bio_iov_bvec_set(struct bio *bio, const struct iov_iter *iter); void __bio_release_pages(struct bio *bio, bool mark_dirty); extern void bio_set_pages_dirty(struct bio *bio); -- cgit v1.2.3 From 82dd5d763c9b718e2d655b9565e0a06a91bb83dc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 7 Oct 2025 11:06:26 +0200 Subject: block: rename bio_iov_iter_get_pages_aligned to bio_iov_iter_get_pages Now that the bio_iov_iter_get_pages is free again, use it instead of the more complicated now. Also drop the unused export. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Reviewed-by: Keith Busch Reviewed-by: Qu Wenruo Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 +- include/linux/blkdev.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index b01dae9506de..16c1c85613b7 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -446,7 +446,7 @@ int submit_bio_wait(struct bio *bio); int bdev_rw_virt(struct block_device *bdev, sector_t sector, void *data, size_t len, enum req_op op); -int bio_iov_iter_get_pages_aligned(struct bio *bio, struct iov_iter *iter, +int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter, unsigned len_align_mask); void bio_iov_bvec_set(struct bio *bio, const struct iov_iter *iter); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 066e5309bd45..c97e8b0e67b6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1876,7 +1876,7 @@ static inline int bio_split_rw_at(struct bio *bio, static inline int bio_iov_iter_get_bdev_pages(struct bio *bio, struct iov_iter *iter, struct block_device *bdev) { - return bio_iov_iter_get_pages_aligned(bio, iter, + return bio_iov_iter_get_pages(bio, iter, bdev_logical_block_size(bdev) - 1); } -- cgit v1.2.3 From 506aa235f6e0baa00bf792df82a5e9f618b7a5d8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 7 Oct 2025 11:06:28 +0200 Subject: block: move bio_iov_iter_get_bdev_pages to block/fops.c Keep bio_iov_iter_get_bdev_pages local with the callers, as blindly looking at the bdev logical block size is often not the best idea unless on a block device. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Reviewed-by: Keith Busch Reviewed-by: Qu Wenruo Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c97e8b0e67b6..d4db5039836d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1873,13 +1873,6 @@ static inline int bio_split_rw_at(struct bio *bio, return bio_split_io_at(bio, lim, segs, max_bytes, lim->dma_alignment); } -static inline int bio_iov_iter_get_bdev_pages(struct bio *bio, - struct iov_iter *iter, struct block_device *bdev) -{ - return bio_iov_iter_get_pages(bio, iter, - bdev_logical_block_size(bdev) - 1); -} - #define DEFINE_IO_COMP_BATCH(name) struct io_comp_batch name = { } #endif /* _LINUX_BLKDEV_H */ -- cgit v1.2.3 From 1f086d2508ebe494d13fd587d1f5e2b908379efc Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Fri, 3 Oct 2025 16:05:46 -0400 Subject: drm/amdkfd: Fix two comments in kfd_ioctl.h Queue read and write pointers are "to KFD", not "from KFD". Suggested-by: Robert Liu Signed-off-by: Felix Kuehling Reviewed-by: Alex Deucher Reviewed-by: Robert Liu Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 04c7d283dc7d..5d1727a6d040 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -67,8 +67,8 @@ struct kfd_ioctl_get_version_args { struct kfd_ioctl_create_queue_args { __u64 ring_base_address; /* to KFD */ - __u64 write_pointer_address; /* from KFD */ - __u64 read_pointer_address; /* from KFD */ + __u64 write_pointer_address; /* to KFD */ + __u64 read_pointer_address; /* to KFD */ __u64 doorbell_offset; /* from KFD */ __u32 ring_size; /* to KFD */ -- cgit v1.2.3 From 8375b76517cb52bac0903071feedc218c45d74d2 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 21 Sep 2025 08:44:56 +0300 Subject: kho: replace kho_preserve_phys() with kho_preserve_pages() to make it clear that KHO operates on pages rather than on a random physical address. The kho_preserve_pages() will be also used in upcoming support for vmalloc preservation. Link: https://lkml.kernel.org/r/20250921054458.4043761-3-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Reviewed-by: Pratyush Yadav Reviewed-by: Jason Gunthorpe Cc: Alexander Graf Cc: Baoquan He Cc: Changyuan Lyu Cc: Chris Li Cc: Pasha Tatashin Signed-off-by: Andrew Morton --- include/linux/kexec_handover.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h index 559d13a3bc44..cec663b39861 100644 --- a/include/linux/kexec_handover.h +++ b/include/linux/kexec_handover.h @@ -18,6 +18,7 @@ enum kho_event { struct folio; struct notifier_block; +struct page; #define DECLARE_KHOSER_PTR(name, type) \ union { \ @@ -43,7 +44,7 @@ bool kho_is_enabled(void); bool is_kho_boot(void); int kho_preserve_folio(struct folio *folio); -int kho_preserve_phys(phys_addr_t phys, size_t size); +int kho_preserve_pages(struct page *page, unsigned int nr_pages); struct folio *kho_restore_folio(phys_addr_t phys); int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt); int kho_retrieve_subtree(const char *name, phys_addr_t *phys); @@ -71,7 +72,7 @@ static inline int kho_preserve_folio(struct folio *folio) return -EOPNOTSUPP; } -static inline int kho_preserve_phys(phys_addr_t phys, size_t size) +static inline int kho_preserve_pages(struct page *page, unsigned int nr_pages) { return -EOPNOTSUPP; } -- cgit v1.2.3 From a667300bd53f272a3055238bcefe108f88836270 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 21 Sep 2025 08:44:57 +0300 Subject: kho: add support for preserving vmalloc allocations A vmalloc allocation is preserved using binary structure similar to global KHO memory tracker. It's a linked list of pages where each page is an array of physical address of pages in vmalloc area. kho_preserve_vmalloc() hands out the physical address of the head page to the caller. This address is used as the argument to kho_vmalloc_restore() to restore the mapping in the vmalloc address space and populate it with the preserved pages. [pasha.tatashin@soleen.com: free chunks using free_page() not kfree()] Link: https://lkml.kernel.org/r/mafs0a52idbeg.fsf@kernel.org [akpm@linux-foundation.org: coding-style cleanups] Link: https://lkml.kernel.org/r/20250921054458.4043761-4-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Reviewed-by: Pratyush Yadav Cc: Alexander Graf Cc: Baoquan He Cc: Changyuan Lyu Cc: Chris Li Cc: Jason Gunthorpe Signed-off-by: Andrew Morton --- include/linux/kexec_handover.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include') diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h index cec663b39861..25042c1d8d54 100644 --- a/include/linux/kexec_handover.h +++ b/include/linux/kexec_handover.h @@ -39,13 +39,24 @@ struct page; struct kho_serialization; +struct kho_vmalloc_chunk; +struct kho_vmalloc { + DECLARE_KHOSER_PTR(first, struct kho_vmalloc_chunk *); + unsigned int total_pages; + unsigned short flags; + unsigned short order; +}; + #ifdef CONFIG_KEXEC_HANDOVER bool kho_is_enabled(void); bool is_kho_boot(void); int kho_preserve_folio(struct folio *folio); int kho_preserve_pages(struct page *page, unsigned int nr_pages); +int kho_preserve_vmalloc(void *ptr, struct kho_vmalloc *preservation); struct folio *kho_restore_folio(phys_addr_t phys); +struct page *kho_restore_pages(phys_addr_t phys, unsigned int nr_pages); +void *kho_restore_vmalloc(const struct kho_vmalloc *preservation); int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt); int kho_retrieve_subtree(const char *name, phys_addr_t *phys); @@ -77,11 +88,28 @@ static inline int kho_preserve_pages(struct page *page, unsigned int nr_pages) return -EOPNOTSUPP; } +static inline int kho_preserve_vmalloc(void *ptr, + struct kho_vmalloc *preservation) +{ + return -EOPNOTSUPP; +} + static inline struct folio *kho_restore_folio(phys_addr_t phys) { return NULL; } +static inline struct page *kho_restore_pages(phys_addr_t phys, + unsigned int nr_pages) +{ + return NULL; +} + +static inline void *kho_restore_vmalloc(const struct kho_vmalloc *preservation) +{ + return NULL; +} + static inline int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt) { -- cgit v1.2.3 From fcc0669c5aa681994c507b50f1c706c969d99730 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Mon, 22 Sep 2025 15:02:03 -0700 Subject: memcg: skip cgroup_file_notify if spinning is not allowed Generally memcg charging is allowed from all the contexts including NMI where even spinning on spinlock can cause locking issues. However one call chain was missed during the addition of memcg charging from any context support. That is try_charge_memcg() -> memcg_memory_event() -> cgroup_file_notify(). The possible function call tree under cgroup_file_notify() can acquire many different spin locks in spinning mode. Some of them are cgroup_file_kn_lock, kernfs_notify_lock, pool_workqeue's lock. So, let's just skip cgroup_file_notify() from memcg charging if the context does not allow spinning. Alternative approach was also explored where instead of skipping cgroup_file_notify(), we defer the memcg event processing to irq_work [1]. However it adds complexity and it was decided to keep things simple until we need more memcg events with !allow_spinning requirement. Link: https://lore.kernel.org/all/5qi2llyzf7gklncflo6gxoozljbm4h3tpnuv4u4ej4ztysvi6f@x44v7nz2wdzd/ [1] Link: https://lkml.kernel.org/r/20250922220203.261714-1-shakeel.butt@linux.dev Fixes: 3ac4638a734a ("memcg: make memcg_rstat_updated nmi safe") Signed-off-by: Shakeel Butt Acked-by: Michal Hocko Closes: https://lore.kernel.org/all/20250905061919.439648-1-yepeilin@google.com/ Cc: Alexei Starovoitov Cc: Johannes Weiner Cc: Kumar Kartikeya Dwivedi Cc: Muchun Song Cc: Peilin Ye Cc: Roman Gushchin Cc: Tejun Heo Cc: Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 16fe0306e50e..873e510d6f8d 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1001,22 +1001,28 @@ static inline void count_memcg_event_mm(struct mm_struct *mm, count_memcg_events_mm(mm, idx, 1); } -static inline void memcg_memory_event(struct mem_cgroup *memcg, - enum memcg_memory_event event) +static inline void __memcg_memory_event(struct mem_cgroup *memcg, + enum memcg_memory_event event, + bool allow_spinning) { bool swap_event = event == MEMCG_SWAP_HIGH || event == MEMCG_SWAP_MAX || event == MEMCG_SWAP_FAIL; + /* For now only MEMCG_MAX can happen with !allow_spinning context. */ + VM_WARN_ON_ONCE(!allow_spinning && event != MEMCG_MAX); + atomic_long_inc(&memcg->memory_events_local[event]); - if (!swap_event) + if (!swap_event && allow_spinning) cgroup_file_notify(&memcg->events_local_file); do { atomic_long_inc(&memcg->memory_events[event]); - if (swap_event) - cgroup_file_notify(&memcg->swap_events_file); - else - cgroup_file_notify(&memcg->events_file); + if (allow_spinning) { + if (swap_event) + cgroup_file_notify(&memcg->swap_events_file); + else + cgroup_file_notify(&memcg->events_file); + } if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) break; @@ -1026,6 +1032,12 @@ static inline void memcg_memory_event(struct mem_cgroup *memcg, !mem_cgroup_is_root(memcg)); } +static inline void memcg_memory_event(struct mem_cgroup *memcg, + enum memcg_memory_event event) +{ + __memcg_memory_event(memcg, event, true); +} + static inline void memcg_memory_event_mm(struct mm_struct *mm, enum memcg_memory_event event) { -- cgit v1.2.3 From f04aad36a07cc17b7a5d5b9a2d386ce6fae63e93 Mon Sep 17 00:00:00 2001 From: Jakub Acs Date: Wed, 1 Oct 2025 09:03:52 +0000 Subject: mm/ksm: fix flag-dropping behavior in ksm_madvise syzkaller discovered the following crash: (kernel BUG) [ 44.607039] ------------[ cut here ]------------ [ 44.607422] kernel BUG at mm/userfaultfd.c:2067! [ 44.608148] Oops: invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN NOPTI [ 44.608814] CPU: 1 UID: 0 PID: 2475 Comm: reproducer Not tainted 6.16.0-rc6 #1 PREEMPT(none) [ 44.609635] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 [ 44.610695] RIP: 0010:userfaultfd_release_all+0x3a8/0x460 [ 44.617726] Call Trace: [ 44.617926] [ 44.619284] userfaultfd_release+0xef/0x1b0 [ 44.620976] __fput+0x3f9/0xb60 [ 44.621240] fput_close_sync+0x110/0x210 [ 44.622222] __x64_sys_close+0x8f/0x120 [ 44.622530] do_syscall_64+0x5b/0x2f0 [ 44.622840] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 44.623244] RIP: 0033:0x7f365bb3f227 Kernel panics because it detects UFFD inconsistency during userfaultfd_release_all(). Specifically, a VMA which has a valid pointer to vma->vm_userfaultfd_ctx, but no UFFD flags in vma->vm_flags. The inconsistency is caused in ksm_madvise(): when user calls madvise() with MADV_UNMEARGEABLE on a VMA that is registered for UFFD in MINOR mode, it accidentally clears all flags stored in the upper 32 bits of vma->vm_flags. Assuming x86_64 kernel build, unsigned long is 64-bit and unsigned int and int are 32-bit wide. This setup causes the following mishap during the &= ~VM_MERGEABLE assignment. VM_MERGEABLE is a 32-bit constant of type unsigned int, 0x8000'0000. After ~ is applied, it becomes 0x7fff'ffff unsigned int, which is then promoted to unsigned long before the & operation. This promotion fills upper 32 bits with leading 0s, as we're doing unsigned conversion (and even for a signed conversion, this wouldn't help as the leading bit is 0). & operation thus ends up AND-ing vm_flags with 0x0000'0000'7fff'ffff instead of intended 0xffff'ffff'7fff'ffff and hence accidentally clears the upper 32-bits of its value. Fix it by changing `VM_MERGEABLE` constant to unsigned long, using the BIT() macro. Note: other VM_* flags are not affected: This only happens to the VM_MERGEABLE flag, as the other VM_* flags are all constants of type int and after ~ operation, they end up with leading 1 and are thus converted to unsigned long with leading 1s. Note 2: After commit 31defc3b01d9 ("userfaultfd: remove (VM_)BUG_ON()s"), this is no longer a kernel BUG, but a WARNING at the same place: [ 45.595973] WARNING: CPU: 1 PID: 2474 at mm/userfaultfd.c:2067 but the root-cause (flag-drop) remains the same. [akpm@linux-foundation.org: rust bindgen wasn't able to handle BIT(), from Miguel] Link: https://lore.kernel.org/oe-kbuild-all/202510030449.VfSaAjvd-lkp@intel.com/ Link: https://lkml.kernel.org/r/20251001090353.57523-2-acsjakub@amazon.de Fixes: 7677f7fd8be7 ("userfaultfd: add minor fault registration mode") Signed-off-by: Jakub Acs Signed-off-by: Miguel Ojeda Acked-by: David Hildenbrand Acked-by: SeongJae Park Tested-by: Alice Ryhl Tested-by: Miguel Ojeda Cc: Xu Xin Cc: Chengming Zhou Cc: Peter Xu Cc: Axel Rasmussen Cc: Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 06978b4dbeb8..70a2a76007d4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -323,7 +323,7 @@ extern unsigned int kobjsize(const void *objp); #define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ #define VM_HUGEPAGE 0x20000000 /* MADV_HUGEPAGE marked this vma */ #define VM_NOHUGEPAGE 0x40000000 /* MADV_NOHUGEPAGE marked this vma */ -#define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */ +#define VM_MERGEABLE BIT(31) /* KSM may merge identical pages */ #ifdef CONFIG_ARCH_USES_HIGH_VMA_FLAGS #define VM_HIGH_ARCH_BIT_0 32 /* bit only usable on 64-bit architectures */ -- cgit v1.2.3 From 27c0a7b05d13a0dc54ed0b95fc12218210fdea1a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 31 Jul 2025 12:02:27 -0700 Subject: libceph: Use HMAC-SHA256 library instead of crypto_shash Use the HMAC-SHA256 library functions instead of crypto_shash. This is simpler and faster. Signed-off-by: Eric Biggers Reviewed-by: Viacheslav Dubeyko Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 1717cc57cdac..4b49592a738f 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -2,6 +2,7 @@ #ifndef __FS_CEPH_MESSENGER_H #define __FS_CEPH_MESSENGER_H +#include #include #include #include @@ -412,7 +413,8 @@ struct ceph_connection_v2_info { struct ceph_msg_data_cursor in_cursor; struct ceph_msg_data_cursor out_cursor; - struct crypto_shash *hmac_tfm; /* post-auth signature */ + struct hmac_sha256_key hmac_key; /* post-auth signature */ + bool hmac_key_set; struct crypto_aead *gcm_tfm; /* on-wire encryption */ struct aead_request *gcm_req; struct crypto_wait gcm_wait; -- cgit v1.2.3 From 59699a5a7114f09f890e86c09a6b32afb5eaa64c Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Wed, 6 Aug 2025 11:48:53 +0200 Subject: libceph: make ceph_con_get_out_msg() return the message pointer The caller in messenger_v1.c loads it anyway, so let's keep the pointer in the register instead of reloading it from memory. This eliminates a tiny bit of unnecessary overhead. Signed-off-by: Max Kellermann Reviewed-by: Viacheslav Dubeyko Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 4b49592a738f..9ebcac2981fd 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -550,7 +550,7 @@ void ceph_addr_set_port(struct ceph_entity_addr *addr, int p); void ceph_con_process_message(struct ceph_connection *con); int ceph_con_in_msg_alloc(struct ceph_connection *con, struct ceph_msg_header *hdr, int *skip); -void ceph_con_get_out_msg(struct ceph_connection *con); +struct ceph_msg *ceph_con_get_out_msg(struct ceph_connection *con); /* messenger_v1.c */ int ceph_con_v1_try_read(struct ceph_connection *con); -- cgit v1.2.3 From 7399212dcf64d90a6ab239bdd98bd325d922fc7e Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Wed, 6 Aug 2025 11:48:54 +0200 Subject: libceph: pass the message pointer instead of loading con->out_msg This pointer is in a register anyway, so let's use that instead of reloading from memory everywhere. [ idryomov: formatting ] Signed-off-by: Max Kellermann Reviewed-by: Viacheslav Dubeyko Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 9ebcac2981fd..6aa4c6478c9f 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -555,7 +555,7 @@ struct ceph_msg *ceph_con_get_out_msg(struct ceph_connection *con); /* messenger_v1.c */ int ceph_con_v1_try_read(struct ceph_connection *con); int ceph_con_v1_try_write(struct ceph_connection *con); -void ceph_con_v1_revoke(struct ceph_connection *con); +void ceph_con_v1_revoke(struct ceph_connection *con, struct ceph_msg *msg); void ceph_con_v1_revoke_incoming(struct ceph_connection *con); bool ceph_con_v1_opened(struct ceph_connection *con); void ceph_con_v1_reset_session(struct ceph_connection *con); @@ -564,7 +564,7 @@ void ceph_con_v1_reset_protocol(struct ceph_connection *con); /* messenger_v2.c */ int ceph_con_v2_try_read(struct ceph_connection *con); int ceph_con_v2_try_write(struct ceph_connection *con); -void ceph_con_v2_revoke(struct ceph_connection *con); +void ceph_con_v2_revoke(struct ceph_connection *con, struct ceph_msg *msg); void ceph_con_v2_revoke_incoming(struct ceph_connection *con); bool ceph_con_v2_opened(struct ceph_connection *con); void ceph_con_v2_reset_session(struct ceph_connection *con); -- cgit v1.2.3 From 207696b17f38e869e59889b44d395ab24bb678d3 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Thu, 18 Sep 2025 22:30:18 +0300 Subject: tpm: use a map for tpm2_calc_ordinal_duration() The current shenanigans for duration calculation introduce too much complexity for a trivial problem, and further the code is hard to patch and maintain. Address these issues with a flat look-up table, which is easy to understand and patch. If leaf driver specific patching is required in future, it is easy enough to make a copy of this table during driver initialization and add the chip parameter back. 'chip->duration' is retained for TPM 1.x. As the first entry for this new behavior address TCG spec update mentioned in this issue: https://github.com/raspberrypi/linux/issues/7054 Therefore, for TPM_SelfTest the duration is set to 3000 ms. This does not categorize a as bug, given that this is introduced to the spec after the feature was originally made. Reviewed-by: Serge Hallyn Signed-off-by: Jarkko Sakkinen --- include/linux/tpm.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/tpm.h b/include/linux/tpm.h index b0e9eb5ef022..dc0338a783f3 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -228,10 +228,11 @@ enum tpm2_timeouts { TPM2_TIMEOUT_B = 4000, TPM2_TIMEOUT_C = 200, TPM2_TIMEOUT_D = 30, +}; + +enum tpm2_durations { TPM2_DURATION_SHORT = 20, - TPM2_DURATION_MEDIUM = 750, TPM2_DURATION_LONG = 2000, - TPM2_DURATION_LONG_LONG = 300000, TPM2_DURATION_DEFAULT = 120000, }; -- cgit v1.2.3 From d2042d8f96ddefdeee823737f813efe3ab4b4e8d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 3 Oct 2025 16:25:54 -0700 Subject: KVM: Rework KVM_CAP_GUEST_MEMFD_MMAP into KVM_CAP_GUEST_MEMFD_FLAGS Rework the not-yet-released KVM_CAP_GUEST_MEMFD_MMAP into a more generic KVM_CAP_GUEST_MEMFD_FLAGS capability so that adding new flags doesn't require a new capability, and so that developers aren't tempted to bundle multiple flags into a single capability. Note, kvm_vm_ioctl_check_extension_generic() can only return a 32-bit value, but that limitation can be easily circumvented by adding e.g. KVM_CAP_GUEST_MEMFD_FLAGS2 in the unlikely event guest_memfd supports more than 32 flags. Reviewed-by: Ackerley Tng Tested-by: Ackerley Tng Reviewed-by: David Hildenbrand Link: https://lore.kernel.org/r/20251003232606.4070510-2-seanjc@google.com Signed-off-by: Sean Christopherson --- include/uapi/linux/kvm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 6efa98a57ec1..b1d52d0c56ec 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -962,7 +962,7 @@ struct kvm_enable_cap { #define KVM_CAP_ARM_EL2_E2H0 241 #define KVM_CAP_RISCV_MP_STATE_RESET 242 #define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243 -#define KVM_CAP_GUEST_MEMFD_MMAP 244 +#define KVM_CAP_GUEST_MEMFD_FLAGS 244 struct kvm_irq_routing_irqchip { __u32 irqchip; -- cgit v1.2.3 From fe2bf6234e947bf5544db6d386af1df2a8db80f3 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 3 Oct 2025 16:25:55 -0700 Subject: KVM: guest_memfd: Add INIT_SHARED flag, reject user page faults if not set Add a guest_memfd flag to allow userspace to state that the underlying memory should be configured to be initialized as shared, and reject user page faults if the guest_memfd instance's memory isn't shared. Because KVM doesn't yet support in-place private<=>shared conversions, all guest_memfd memory effectively follows the initial state. Alternatively, KVM could deduce the initial state based on MMAP, which for all intents and purposes is what KVM currently does. However, implicitly deriving the default state based on MMAP will result in a messy ABI when support for in-place conversions is added. For x86 CoCo VMs, which don't yet support MMAP, memory is currently private by default (otherwise the memory would be unusable). If MMAP implies memory is shared by default, then the default state for CoCo VMs will vary based on MMAP, and from userspace's perspective, will change when in-place conversion support is added. I.e. to maintain guest<=>host ABI, userspace would need to immediately convert all memory from shared=>private, which is both ugly and inefficient. The inefficiency could be avoided by adding a flag to state that memory is _private_ by default, irrespective of MMAP, but that would lead to an equally messy and hard to document ABI. Bite the bullet and immediately add a flag to control the default state so that the effective behavior is explicit and straightforward. Fixes: 3d3a04fad25a ("KVM: Allow and advertise support for host mmap() on guest_memfd files") Cc: David Hildenbrand Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Reviewed-by: Ackerley Tng Tested-by: Ackerley Tng Reviewed-by: David Hildenbrand Link: https://lore.kernel.org/r/20251003232606.4070510-3-seanjc@google.com Signed-off-by: Sean Christopherson --- include/uapi/linux/kvm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index b1d52d0c56ec..52f6000ab020 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1599,7 +1599,8 @@ struct kvm_memory_attributes { #define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3) #define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd) -#define GUEST_MEMFD_FLAG_MMAP (1ULL << 0) +#define GUEST_MEMFD_FLAG_MMAP (1ULL << 0) +#define GUEST_MEMFD_FLAG_INIT_SHARED (1ULL << 1) struct kvm_create_guest_memfd { __u64 size; -- cgit v1.2.3 From 44c6cb9fe9888b371e31165b2854bd0f4e2787d4 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 3 Oct 2025 16:25:58 -0700 Subject: KVM: guest_memfd: Allow mmap() on guest_memfd for x86 VMs with private memory Allow mmap() on guest_memfd instances for x86 VMs with private memory as the need to track private vs. shared state in the guest_memfd instance is only pertinent to INIT_SHARED. Doing mmap() on private memory isn't terrible useful (yet!), but it's now possible, and will be desirable when guest_memfd gains support for other VMA-based syscalls, e.g. mbind() to set NUMA policy. Lift the restriction now, before MMAP support is officially released, so that KVM doesn't need to add another capability to enumerate support for mmap() on private memory. Fixes: 3d3a04fad25a ("KVM: Allow and advertise support for host mmap() on guest_memfd files") Reviewed-by: Ackerley Tng Tested-by: Ackerley Tng Reviewed-by: David Hildenbrand Link: https://lore.kernel.org/r/20251003232606.4070510-6-seanjc@google.com Signed-off-by: Sean Christopherson --- include/linux/kvm_host.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 19b8c4bebb9c..680ca838f018 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -729,7 +729,17 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm) #endif #ifdef CONFIG_KVM_GUEST_MEMFD -bool kvm_arch_supports_gmem_mmap(struct kvm *kvm); +bool kvm_arch_supports_gmem_init_shared(struct kvm *kvm); + +static inline u64 kvm_gmem_get_supported_flags(struct kvm *kvm) +{ + u64 flags = GUEST_MEMFD_FLAG_MMAP; + + if (!kvm || kvm_arch_supports_gmem_init_shared(kvm)) + flags |= GUEST_MEMFD_FLAG_INIT_SHARED; + + return flags; +} #endif #ifndef kvm_arch_has_readonly_mem -- cgit v1.2.3 From a8482d2c9071d75c920eba0db36428898250ea57 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sat, 11 Oct 2025 12:31:53 +0200 Subject: Revert "i2c: boardinfo: Annotate code used in init phase only" This reverts commit 1a2b423be6a89dd07d5fc27ea042be68697a6a49 because we got a regression report and need time to find out the details. Reported-by: Konrad Dybcio Closes: https://lore.kernel.org/r/29ec0082-4dd4-4120-acd2-44b35b4b9487@oss.qualcomm.com Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 11a19241e360..20fd41b51d5c 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -499,7 +499,7 @@ static inline struct i2c_client *i2c_verify_client(struct device *dev) * Modules for add-on boards must use other calls. */ #ifdef CONFIG_I2C_BOARDINFO -int __init +int i2c_register_board_info(int busnum, struct i2c_board_info const *info, unsigned n); #else -- cgit v1.2.3 From 12d724f2852d094d68dccaf5101e0ef89a971cde Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 9 Oct 2025 19:46:00 +0900 Subject: ata: libata-core: relax checks in ata_read_log_directory() Commit 6d4405b16d37 ("ata: libata-core: Cache the general purpose log directory") introduced caching of a device general purpose log directory to avoid repeated access to this log page during device scan. This change also added a check on this log page to verify that the log page version is 0x0001 as mandated by the ACS specifications. And it turns out that some devices do not bother reporting this version, instead reporting a version 0, resulting in error messages such as: ata6.00: Invalid log directory version 0x0000 and to the device being marked as not supporting the general purpose log directory log page. Since before commit 6d4405b16d37 the log page version check did not exist and things were still working correctly for these devices, relax ata_read_log_directory() version check and only warn about the invalid log page version number without disabling access to the log directory page. Fixes: 6d4405b16d37 ("ata: libata-core: Cache the general purpose log directory") Cc: stable@vger.kernel.org Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220635 Signed-off-by: Damien Le Moal Signed-off-by: Niklas Cassel --- include/linux/libata.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 21de0935775d..7a98de1cc995 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1594,6 +1594,12 @@ do { \ #define ata_dev_dbg(dev, fmt, ...) \ ata_dev_printk(debug, dev, fmt, ##__VA_ARGS__) +#define ata_dev_warn_once(dev, fmt, ...) \ + pr_warn_once("ata%u.%02u: " fmt, \ + (dev)->link->ap->print_id, \ + (dev)->link->pmp + (dev)->devno, \ + ##__VA_ARGS__) + static inline void ata_print_version_once(const struct device *dev, const char *version) { -- cgit v1.2.3 From 11fb1a82aefa6f7fea6ac82334edb5639b9927df Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 23 Sep 2025 16:09:27 +0100 Subject: firmware: arm_ffa: Add support for IMPDEF value in the memory access descriptor FF-A v1.2 introduced 16 byte IMPLEMENTATION DEFINED value in the endpoint memory access descriptor to allow any sender could to specify an its any custom value for each receiver. Also this value must be specified by the receiver when retrieving the memory region. The sender must ensure it informs the receiver of this value via an IMPLEMENTATION DEFINED mechanism such as a partition message. So the FF-A driver can use the message interfaces to communicate the value and set the same in the ffa_mem_region_attributes structures when using the memory interfaces. The driver ensure that the size of the endpoint memory access descriptors is set correctly based on the FF-A version. Fixes: 9fac08d9d985 ("firmware: arm_ffa: Upgrade FF-A version to v1.2 in the driver") Reported-by: Lixiang Mao Tested-by: Lixiang Mao Message-Id: <20250923150927.1218364-1-sudeep.holla@arm.com> Signed-off-by: Sudeep Holla --- include/linux/arm_ffa.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index cd7ee4df9045..81e603839c4a 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -338,6 +338,7 @@ struct ffa_mem_region_attributes { * an `struct ffa_mem_region_addr_range`. */ u32 composite_off; + u8 impdef_val[16]; u64 reserved; }; @@ -417,15 +418,31 @@ struct ffa_mem_region { #define CONSTITUENTS_OFFSET(x) \ (offsetof(struct ffa_composite_mem_region, constituents[x])) +#define FFA_EMAD_HAS_IMPDEF_FIELD(version) ((version) >= FFA_VERSION_1_2) +#define FFA_MEM_REGION_HAS_EP_MEM_OFFSET(version) ((version) > FFA_VERSION_1_0) + +static inline u32 ffa_emad_size_get(u32 ffa_version) +{ + u32 sz; + struct ffa_mem_region_attributes *ep_mem_access; + + if (FFA_EMAD_HAS_IMPDEF_FIELD(ffa_version)) + sz = sizeof(*ep_mem_access); + else + sz = sizeof(*ep_mem_access) - sizeof(ep_mem_access->impdef_val); + + return sz; +} + static inline u32 ffa_mem_desc_offset(struct ffa_mem_region *buf, int count, u32 ffa_version) { - u32 offset = count * sizeof(struct ffa_mem_region_attributes); + u32 offset = count * ffa_emad_size_get(ffa_version); /* * Earlier to v1.1, the endpoint memory descriptor array started at * offset 32(i.e. offset of ep_mem_offset in the current structure) */ - if (ffa_version <= FFA_VERSION_1_0) + if (!FFA_MEM_REGION_HAS_EP_MEM_OFFSET(ffa_version)) offset += offsetof(struct ffa_mem_region, ep_mem_offset); else offset += sizeof(struct ffa_mem_region); -- cgit v1.2.3 From 53a3c6e222836a23e8e0693395584aefc456dca6 Mon Sep 17 00:00:00 2001 From: Baojun Xu Date: Thu, 2 Oct 2025 15:29:24 +0800 Subject: ASoC: tas2781: Support more newly-released amplifiers tas58xx in the driver TAS5802/TAS5815/TAS5828 has on-chip DSP without current/voltage feedback. Signed-off-by: Baojun Xu Link: https://patch.msgid.link/20251002072925.26242-1-baojun.xu@ti.com Signed-off-by: Mark Brown --- include/sound/tas2781.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h index ddd997ac3216..0fbcdb15c74b 100644 --- a/include/sound/tas2781.h +++ b/include/sound/tas2781.h @@ -120,8 +120,11 @@ enum audio_device { TAS2570, TAS2572, TAS2781, + TAS5802, + TAS5815, TAS5825, TAS5827, + TAS5828, TAS_OTHERS, }; -- cgit v1.2.3 From 7e8242405b94ceac6db820de7d4fd9318cbc1219 Mon Sep 17 00:00:00 2001 From: Bean Huo Date: Wed, 1 Oct 2025 08:08:03 +0200 Subject: rpmb: move rpmb_frame struct and constants to common header Move struct rpmb_frame and RPMB operation constants from MMC block driver to include/linux/rpmb.h for reuse across different RPMB implementations (UFS, NVMe, etc.). Signed-off-by: Bean Huo Reviewed-by: Avri Altman Acked-by: Jens Wiklander Reviewed-by: Bart Van Assche Signed-off-by: Ulf Hansson --- include/linux/rpmb.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'include') diff --git a/include/linux/rpmb.h b/include/linux/rpmb.h index cccda73eea4d..ed3f8e431eff 100644 --- a/include/linux/rpmb.h +++ b/include/linux/rpmb.h @@ -61,6 +61,50 @@ struct rpmb_dev { #define to_rpmb_dev(x) container_of((x), struct rpmb_dev, dev) +/** + * struct rpmb_frame - RPMB frame structure for authenticated access + * + * @stuff : stuff bytes, a padding/reserved area of 196 bytes at the + * beginning of the RPMB frame. They don’t carry meaningful + * data but are required to make the frame exactly 512 bytes. + * @key_mac : The authentication key or the message authentication + * code (MAC) depending on the request/response type. + * The MAC will be delivered in the last (or the only) + * block of data. + * @data : Data to be written or read by signed access. + * @nonce : Random number generated by the host for the requests + * and copied to the response by the RPMB engine. + * @write_counter: Counter value for the total amount of the successful + * authenticated data write requests made by the host. + * @addr : Address of the data to be programmed to or read + * from the RPMB. Address is the serial number of + * the accessed block (half sector 256B). + * @block_count : Number of blocks (half sectors, 256B) requested to be + * read/programmed. + * @result : Includes information about the status of the write counter + * (valid, expired) and result of the access made to the RPMB. + * @req_resp : Defines the type of request and response to/from the memory. + * + * The stuff bytes and big-endian properties are modeled to fit to the spec. + */ +struct rpmb_frame { + u8 stuff[196]; + u8 key_mac[32]; + u8 data[256]; + u8 nonce[16]; + __be32 write_counter; + __be16 addr; + __be16 block_count; + __be16 result; + __be16 req_resp; +}; + +#define RPMB_PROGRAM_KEY 0x1 /* Program RPMB Authentication Key */ +#define RPMB_GET_WRITE_COUNTER 0x2 /* Read RPMB write counter */ +#define RPMB_WRITE_DATA 0x3 /* Write data to RPMB partition */ +#define RPMB_READ_DATA 0x4 /* Read data from RPMB partition */ +#define RPMB_RESULT_READ 0x5 /* Read result request (Internal) */ + #if IS_ENABLED(CONFIG_RPMB) struct rpmb_dev *rpmb_dev_get(struct rpmb_dev *rdev); void rpmb_dev_put(struct rpmb_dev *rdev); -- cgit v1.2.3 From aa68975c973ed3b0bd4ff513113495588afb855c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 29 Sep 2025 17:04:46 +0100 Subject: KVM: arm64: Introduce timer_context_to_vcpu() helper We currently have a vcpu pointer nested into each timer context. As we are about to remove this pointer, introduce a helper (aptly named timer_context_to_vcpu()) that returns this pointer, at least until we repaint the data structure. Signed-off-by: Marc Zyngier --- include/kvm/arm_arch_timer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 681cf0c8b9df..d188c716d03c 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -128,7 +128,7 @@ void kvm_timer_init_vhe(void); #define vcpu_hptimer(v) (&(v)->arch.timer_cpu.timers[TIMER_HPTIMER]) #define arch_timer_ctx_index(ctx) ((ctx) - vcpu_timer((ctx)->vcpu)->timers) - +#define timer_context_to_vcpu(ctx) ((ctx)->vcpu) #define timer_vm_data(ctx) (&(ctx)->vcpu->kvm->arch.timer_data) #define timer_irq(ctx) (timer_vm_data(ctx)->ppi[arch_timer_ctx_index(ctx)]) -- cgit v1.2.3 From 8625a670afb05f1e1d69d50a74dbcc9d1b855efe Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 29 Sep 2025 17:04:47 +0100 Subject: KVM: arm64: Replace timer context vcpu pointer with timer_id Having to follow a pointer to a vcpu is pretty dumb, when the timers are are a fixed offset in the vcpu structure itself. Trade the vcpu pointer for a timer_id, which can then be used to compute the vcpu address as needed. Reviewed-by: Joey Gouly Signed-off-by: Marc Zyngier --- include/kvm/arm_arch_timer.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index d188c716d03c..d8e400cb2bff 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -51,8 +51,6 @@ struct arch_timer_vm_data { }; struct arch_timer_context { - struct kvm_vcpu *vcpu; - /* Emulated Timer (may be unused) */ struct hrtimer hrtimer; u64 ns_frac; @@ -71,6 +69,9 @@ struct arch_timer_context { bool level; } irq; + /* Who am I? */ + enum kvm_arch_timers timer_id; + /* Duplicated state from arch_timer.c for convenience */ u32 host_timer_irq; }; @@ -127,9 +128,9 @@ void kvm_timer_init_vhe(void); #define vcpu_hvtimer(v) (&(v)->arch.timer_cpu.timers[TIMER_HVTIMER]) #define vcpu_hptimer(v) (&(v)->arch.timer_cpu.timers[TIMER_HPTIMER]) -#define arch_timer_ctx_index(ctx) ((ctx) - vcpu_timer((ctx)->vcpu)->timers) -#define timer_context_to_vcpu(ctx) ((ctx)->vcpu) -#define timer_vm_data(ctx) (&(ctx)->vcpu->kvm->arch.timer_data) +#define arch_timer_ctx_index(ctx) ((ctx)->timer_id) +#define timer_context_to_vcpu(ctx) container_of((ctx), struct kvm_vcpu, arch.timer_cpu.timers[(ctx)->timer_id]) +#define timer_vm_data(ctx) (&(timer_context_to_vcpu(ctx)->kvm->arch.timer_data)) #define timer_irq(ctx) (timer_vm_data(ctx)->ppi[arch_timer_ctx_index(ctx)]) u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, -- cgit v1.2.3 From a92d552266890f83126fdef4f777a985cc1302bd Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 29 Sep 2025 17:04:48 +0100 Subject: KVM: arm64: Make timer_set_offset() generally accessible Move the timer_set_offset() helper to arm_arch_timer.h, so that it is next to timer_get_offset(), and accessible by the rest of KVM. Signed-off-by: Marc Zyngier --- include/kvm/arm_arch_timer.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index d8e400cb2bff..5f7f2ed8817c 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -179,4 +179,14 @@ static inline u64 timer_get_offset(struct arch_timer_context *ctxt) return offset; } +static inline void timer_set_offset(struct arch_timer_context *ctxt, u64 offset) +{ + if (!ctxt->offset.vm_offset) { + WARN(offset, "timer %d\n", arch_timer_ctx_index(ctxt)); + return; + } + + WRITE_ONCE(*ctxt->offset.vm_offset, offset); +} + #endif -- cgit v1.2.3 From 386aac77da112651a5cdadc4a6b29181592f5aa0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 29 Sep 2025 17:04:54 +0100 Subject: KVM: arm64: Kill leftovers of ad-hoc timer userspace access Now that the whole timer infrastructure is handled as system register accesses, get rid of the now unused ad-hoc infrastructure. Signed-off-by: Marc Zyngier --- include/kvm/arm_arch_timer.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 5f7f2ed8817c..7310841f4512 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -107,9 +107,6 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu); void kvm_timer_init_vm(struct kvm *kvm); -u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); -int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); - int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); -- cgit v1.2.3 From ef38b4eab146715bc68d45029257f5e69ea3f2cd Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 10 Oct 2025 16:40:57 -0400 Subject: drm/amdgpu: drop unused structures in amdgpu_drm.h These were never used and are duplicated with the interface that is used. Maybe leftovers from a previous revision of the patch that added them. Fixes: 90c448fef312 ("drm/amdgpu: add new AMDGPU_INFO subquery for userq objects") Reviewed-by: Prike Liang Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index cd7402e36b6d..406a42be429b 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1555,27 +1555,6 @@ struct drm_amdgpu_info_hw_ip { __u32 userq_num_slots; }; -/* GFX metadata BO sizes and alignment info (in bytes) */ -struct drm_amdgpu_info_uq_fw_areas_gfx { - /* shadow area size */ - __u32 shadow_size; - /* shadow area base virtual mem alignment */ - __u32 shadow_alignment; - /* context save area size */ - __u32 csa_size; - /* context save area base virtual mem alignment */ - __u32 csa_alignment; -}; - -/* IP specific fw related information used in the - * subquery AMDGPU_INFO_UQ_FW_AREAS - */ -struct drm_amdgpu_info_uq_fw_areas { - union { - struct drm_amdgpu_info_uq_fw_areas_gfx gfx; - }; -}; - struct drm_amdgpu_info_num_handles { /** Max handles as supported by firmware for UVD */ __u32 uvd_max_handles; -- cgit v1.2.3 From 7a84394f02ab1985ebbe0a8d6f6d69bd040de4b3 Mon Sep 17 00:00:00 2001 From: Joshua Watt Date: Tue, 7 Oct 2025 15:22:58 -0600 Subject: NFS4: Apply delay_retrans to async operations The setting of delay_retrans is applied to synchronous RPC operations because the retransmit count is stored in same struct nfs4_exception that is passed each time an error is checked. However, for asynchronous operations (READ, WRITE, LOCKU, CLOSE, DELEGRETURN), a new struct nfs4_exception is made on the stack each time the task callback is invoked. This means that the retransmit count is always zero and thus delay_retrans never takes effect. Apply delay_retrans to these operations by tracking and updating their retransmit count. Change-Id: Ieb33e046c2b277cb979caa3faca7f52faf0568c9 Signed-off-by: Joshua Watt Reviewed-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- include/linux/nfs_xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index d56583572c98..31463286402f 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1659,6 +1659,7 @@ struct nfs_pgio_header { void *netfs; #endif + unsigned short retrans; int pnfs_error; int error; /* merge with pnfs_error */ unsigned int good_bytes; /* boundary of good data */ -- cgit v1.2.3 From e4d0c909bf8328d986bf3aadba0c33a72b5ae30d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20Hor=C3=A1k=20-=202N?= Date: Thu, 9 Oct 2025 15:06:56 +0200 Subject: net: phy: bcm54811: Fix GMII/MII/MII-Lite selection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Broadcom bcm54811 is hardware-strapped to select among RGMII and GMII/MII/MII-Lite modes. However, the corresponding bit, RGMII Enable in Miscellaneous Control Register must be also set to select desired RGMII or MII(-lite)/GMII mode. Fixes: 3117a11fff5af9e7 ("net: phy: bcm54811: PHY initialization") Signed-off-by: Kamil Horák - 2N Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20251009130656.1308237-2-kamilh@axis.com Signed-off-by: Jakub Kicinski --- include/linux/brcmphy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 15c35655f482..115a964f3006 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -137,6 +137,7 @@ #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC 0x07 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_WIRESPEED_EN 0x0010 +#define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RSVD 0x0060 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_EN 0x0080 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN 0x0100 #define MII_BCM54XX_AUXCTL_MISC_FORCE_AMDIX 0x0200 -- cgit v1.2.3 From 21f4d45eba0b2dcae5dbc9e5e0ad08735c993f16 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 9 Oct 2025 16:02:19 +0100 Subject: net/ip6_tunnel: Prevent perpetual tunnel growth Similarly to ipv4 tunnel, ipv6 version updates dev->needed_headroom, too. While ipv4 tunnel headroom adjustment growth was limited in commit 5ae1e9922bbd ("net: ip_tunnel: prevent perpetual headroom growth"), ipv6 tunnel yet increases the headroom without any ceiling. Reflect ipv4 tunnel headroom adjustment limit on ipv6 version. Credits to Francesco Ruggeri, who was originally debugging this issue and wrote local Arista-specific patch and a reproducer. Fixes: 8eb30be0352d ("ipv6: Create ip6_tnl_xmit") Cc: Florian Westphal Cc: Francesco Ruggeri Signed-off-by: Dmitry Safonov Link: https://patch.msgid.link/20251009-ip6_tunnel-headroom-v2-1-8e4dbd8f7e35@arista.com Signed-off-by: Jakub Kicinski --- include/net/ip_tunnels.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 4314a97702ea..ecae35512b9b 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -611,6 +611,21 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, int skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst, int headroom, bool reply); +static inline void ip_tunnel_adj_headroom(struct net_device *dev, + unsigned int headroom) +{ + /* we must cap headroom to some upperlimit, else pskb_expand_head + * will overflow header offsets in skb_headers_offset_update(). + */ + const unsigned int max_allowed = 512; + + if (headroom > max_allowed) + headroom = max_allowed; + + if (headroom > READ_ONCE(dev->needed_headroom)) + WRITE_ONCE(dev->needed_headroom, headroom); +} + int iptunnel_handle_offloads(struct sk_buff *skb, int gso_type_mask); static inline int iptunnel_pull_offloads(struct sk_buff *skb) -- cgit v1.2.3 From 1d64624243af8329b4b219d8c39e28ea448f9929 Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Mon, 6 Oct 2025 18:05:31 -0700 Subject: HID: core: Add printk_ratelimited variants to hid_warn() etc hid_warn_ratelimited() is needed. Add the others as part of the block. Signed-off-by: Vicki Pfau Signed-off-by: Jiri Kosina --- include/linux/hid.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index e1b673ad7457..a4ddb94e3ee5 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -1292,4 +1292,15 @@ void hid_quirks_exit(__u16 bus); #define hid_dbg_once(hid, fmt, ...) \ dev_dbg_once(&(hid)->dev, fmt, ##__VA_ARGS__) +#define hid_err_ratelimited(hid, fmt, ...) \ + dev_err_ratelimited(&(hid)->dev, fmt, ##__VA_ARGS__) +#define hid_notice_ratelimited(hid, fmt, ...) \ + dev_notice_ratelimited(&(hid)->dev, fmt, ##__VA_ARGS__) +#define hid_warn_ratelimited(hid, fmt, ...) \ + dev_warn_ratelimited(&(hid)->dev, fmt, ##__VA_ARGS__) +#define hid_info_ratelimited(hid, fmt, ...) \ + dev_info_ratelimited(&(hid)->dev, fmt, ##__VA_ARGS__) +#define hid_dbg_ratelimited(hid, fmt, ...) \ + dev_dbg_ratelimited(&(hid)->dev, fmt, ##__VA_ARGS__) + #endif -- cgit v1.2.3 From 5fb750e8a9ae123b2034771b864b8a21dbef65cd Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 14 Oct 2025 17:07:00 -0700 Subject: bpf: Replace bpf_map_kmalloc_node() with kmalloc_nolock() to allocate bpf_async_cb structures. The following kmemleak splat: [ 8.105530] kmemleak: Trying to color unknown object at 0xff11000100e918c0 as Black [ 8.106521] Call Trace: [ 8.106521] [ 8.106521] dump_stack_lvl+0x4b/0x70 [ 8.106521] kvfree_call_rcu+0xcb/0x3b0 [ 8.106521] ? hrtimer_cancel+0x21/0x40 [ 8.106521] bpf_obj_free_fields+0x193/0x200 [ 8.106521] htab_map_update_elem+0x29c/0x410 [ 8.106521] bpf_prog_cfc8cd0f42c04044_overwrite_cb+0x47/0x4b [ 8.106521] bpf_prog_8c30cd7c4db2e963_overwrite_timer+0x65/0x86 [ 8.106521] bpf_prog_test_run_syscall+0xe1/0x2a0 happens due to the combination of features and fixes, but mainly due to commit 6d78b4473cdb ("bpf: Tell memcg to use allow_spinning=false path in bpf_timer_init()") It's using __GFP_HIGH, which instructs slub/kmemleak internals to skip kmemleak_alloc_recursive() on allocation, so subsequent kfree_rcu()-> kvfree_call_rcu()->kmemleak_ignore() complains with the above splat. To fix this imbalance, replace bpf_map_kmalloc_node() with kmalloc_nolock() and kfree_rcu() with call_rcu() + kfree_nolock() to make sure that the objects allocated with kmalloc_nolock() are freed with kfree_nolock() rather than the implicit kfree() that kfree_rcu() uses internally. Note, the kmalloc_nolock() happens under bpf_spin_lock_irqsave(), so it will always fail in PREEMPT_RT. This is not an issue at the moment, since bpf_timers are disabled in PREEMPT_RT. In the future bpf_spin_lock will be replaced with state machine similar to bpf_task_work. Fixes: 6d78b4473cdb ("bpf: Tell memcg to use allow_spinning=false path in bpf_timer_init()") Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Reviewed-by: Shakeel Butt Acked-by: Harry Yoo Acked-by: Vlastimil Babka Cc: linux-mm@kvack.org Link: https://lore.kernel.org/bpf/20251015000700.28988-1-alexei.starovoitov@gmail.com --- include/linux/bpf.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a98c83346134..d808253f2e94 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2499,6 +2499,8 @@ int bpf_map_alloc_pages(const struct bpf_map *map, int nid, #ifdef CONFIG_MEMCG void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, int node); +void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags, + int node); void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags); void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size, gfp_t flags); @@ -2511,6 +2513,8 @@ void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, */ #define bpf_map_kmalloc_node(_map, _size, _flags, _node) \ kmalloc_node(_size, _flags, _node) +#define bpf_map_kmalloc_nolock(_map, _size, _flags, _node) \ + kmalloc_nolock(_size, _flags, _node) #define bpf_map_kzalloc(_map, _size, _flags) \ kzalloc(_size, _flags) #define bpf_map_kvcalloc(_map, _n, _size, _flags) \ -- cgit v1.2.3 From 2aab1f993c8cb753ccb3d5b848cd758e2e87d965 Mon Sep 17 00:00:00 2001 From: Ankan Biswas Date: Wed, 15 Oct 2025 20:50:57 +0530 Subject: drm/gpuvm: Fix kernel-doc warning for drm_gpuvm_map_req.map The kernel-doc for struct drm_gpuvm_map_req.map was added as '@op_map' instead of '@map', leading to this warning during htmldocs build: WARNING: include/drm/drm_gpuvm.h:1083 struct member 'map' not described in 'drm_gpuvm_map_req' Fixes: 000a45dce7ad ("drm/gpuvm: Pass map arguments through a struct") Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/all/20250821133539.03aa298e@canb.auug.org.au/ Signed-off-by: Ankan Biswas Signed-off-by: Danilo Krummrich --- include/drm/drm_gpuvm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h index 8890ded1d907..476990e761f8 100644 --- a/include/drm/drm_gpuvm.h +++ b/include/drm/drm_gpuvm.h @@ -1078,7 +1078,7 @@ struct drm_gpuva_ops { */ struct drm_gpuvm_map_req { /** - * @op_map: struct drm_gpuva_op_map + * @map: struct drm_gpuva_op_map */ struct drm_gpuva_op_map map; }; -- cgit v1.2.3 From c97513cddcfc235f2522617980838e500af21d01 Mon Sep 17 00:00:00 2001 From: Lance Yang Date: Tue, 9 Sep 2025 22:52:43 +0800 Subject: hung_task: fix warnings caused by unaligned lock pointers The blocker tracking mechanism assumes that lock pointers are at least 4-byte aligned to use their lower bits for type encoding. However, as reported by Eero Tamminen, some architectures like m68k only guarantee 2-byte alignment of 32-bit values. This breaks the assumption and causes two related WARN_ON_ONCE checks to trigger. To fix this, the runtime checks are adjusted to silently ignore any lock that is not 4-byte aligned, effectively disabling the feature in such cases and avoiding the related warnings. Thanks to Geert Uytterhoeven for bisecting! Link: https://lkml.kernel.org/r/20250909145243.17119-1-lance.yang@linux.dev Fixes: e711faaafbe5 ("hung_task: replace blocker_mutex with encoded blocker") Signed-off-by: Lance Yang Reported-by: Eero Tamminen Closes: https://lore.kernel.org/lkml/CAMuHMdW7Ab13DdGs2acMQcix5ObJK0O2dG_Fxzr8_g58Rc1_0g@mail.gmail.com Reviewed-by: Masami Hiramatsu (Google) Cc: John Paul Adrian Glaubitz Cc: Anna Schumaker Cc: Boqun Feng Cc: Finn Thain Cc: Geert Uytterhoeven Cc: Ingo Molnar Cc: Joel Granados Cc: John Stultz Cc: Kent Overstreet Cc: Lance Yang Cc: Mingzhe Yang Cc: Peter Zijlstra Cc: Sergey Senozhatsky Cc: Steven Rostedt Cc: Tomasz Figa Cc: Waiman Long Cc: Will Deacon Cc: Yongliang Gao Cc: Signed-off-by: Andrew Morton --- include/linux/hung_task.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/hung_task.h b/include/linux/hung_task.h index 34e615c76ca5..c4403eeb7144 100644 --- a/include/linux/hung_task.h +++ b/include/linux/hung_task.h @@ -20,6 +20,10 @@ * always zero. So we can use these bits to encode the specific blocking * type. * + * Note that on architectures where this is not guaranteed, or for any + * unaligned lock, this tracking mechanism is silently skipped for that + * lock. + * * Type encoding: * 00 - Blocked on mutex (BLOCKER_TYPE_MUTEX) * 01 - Blocked on semaphore (BLOCKER_TYPE_SEM) @@ -45,7 +49,7 @@ static inline void hung_task_set_blocker(void *lock, unsigned long type) * If the lock pointer matches the BLOCKER_TYPE_MASK, return * without writing anything. */ - if (WARN_ON_ONCE(lock_ptr & BLOCKER_TYPE_MASK)) + if (lock_ptr & BLOCKER_TYPE_MASK) return; WRITE_ONCE(current->blocker, lock_ptr | type); @@ -53,8 +57,6 @@ static inline void hung_task_set_blocker(void *lock, unsigned long type) static inline void hung_task_clear_blocker(void) { - WARN_ON_ONCE(!READ_ONCE(current->blocker)); - WRITE_ONCE(current->blocker, 0UL); } -- cgit v1.2.3 From 0fbbcab7f9082cdc233da5e5e353f69830f11956 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 17 Oct 2025 00:07:42 -0700 Subject: cgroup/misc: fix misc_res_type kernel-doc warning Format the kernel-doc for SCALE_HW_CALIB_INVALID correctly to avoid a kernel-doc warning: Warning: include/linux/misc_cgroup.h:26 Enum value 'MISC_CG_RES_TDX' not described in enum 'misc_res_type' Fixes: 7c035bea9407 ("KVM: TDX: Register TDX host key IDs to cgroup misc controller") Signed-off-by: Randy Dunlap Signed-off-by: Tejun Heo --- include/linux/misc_cgroup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h index 71cf5bfc6349..0cb36a3ffc47 100644 --- a/include/linux/misc_cgroup.h +++ b/include/linux/misc_cgroup.h @@ -19,7 +19,7 @@ enum misc_res_type { MISC_CG_RES_SEV_ES, #endif #ifdef CONFIG_INTEL_TDX_HOST - /* Intel TDX HKIDs resource */ + /** @MISC_CG_RES_TDX: Intel TDX HKIDs resource */ MISC_CG_RES_TDX, #endif /** @MISC_CG_RES_TYPES: count of enum misc_res_type constants */ -- cgit v1.2.3 From ce831bffcef3d8f9691b5537d74ffa1b1256c017 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Wed, 15 Oct 2025 19:07:26 +0200 Subject: drm/xe/uapi: Hide the madvise autoreset behind a VM_BIND flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The madvise implementation currently resets the SVM madvise if the underlying CPU map is unmapped. This is in an attempt to mimic the CPU madvise behaviour. However, it's not clear that this is a desired behaviour since if the end app user relies on it for malloc()ed objects or stack objects, it may not work as intended. Instead of having the autoreset functionality being a direct application-facing implicit UAPI, make the UMD explicitly choose this behaviour if it wants to expose it by introducing DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET, and add a semantics description. v2: - Kerneldoc fixes. Fix a commit log message. Fixes: a2eb8aec3ebe ("drm/xe: Reset VMA attributes to default in SVM garbage collector") Cc: Matthew Brost Cc: Himal Prasad Ghimiray Cc: "Falkowski, John" Cc: "Mrozek, Michal" Signed-off-by: Thomas Hellström Reviewed-by: Himal Prasad Ghimiray Link: https://lore.kernel.org/r/20251015170726.178685-2-thomas.hellstrom@linux.intel.com (cherry picked from commit 59a2d3f38ab23cce4cd9f0c4a5e08fdfe9e67ae7) Signed-off-by: Lucas De Marchi --- include/uapi/drm/xe_drm.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 40ff19f52a8d..517489a7ec60 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1013,6 +1013,20 @@ struct drm_xe_vm_destroy { * valid on VMs with DRM_XE_VM_CREATE_FLAG_FAULT_MODE set. The CPU address * mirror flag are only valid for DRM_XE_VM_BIND_OP_MAP operations, the BO * handle MBZ, and the BO offset MBZ. + * - %DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET - Can be used in combination with + * %DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR to reset madvises when the underlying + * CPU address space range is unmapped (typically with munmap(2) or brk(2)). + * The madvise values set with &DRM_IOCTL_XE_MADVISE are reset to the values + * that were present immediately after the &DRM_IOCTL_XE_VM_BIND. + * The reset GPU virtual address range is the intersection of the range bound + * using &DRM_IOCTL_XE_VM_BIND and the virtual CPU address space range + * unmapped. + * This functionality is present to mimic the behaviour of CPU address space + * madvises set using madvise(2), which are typically reset on unmap. + * Note: free(3) may or may not call munmap(2) and/or brk(2), and may thus + * not invoke autoreset. Neither will stack variables going out of scope. + * Therefore it's recommended to always explicitly reset the madvises when + * freeing the memory backing a region used in a &DRM_IOCTL_XE_MADVISE call. * * The @prefetch_mem_region_instance for %DRM_XE_VM_BIND_OP_PREFETCH can also be: * - %DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC, which ensures prefetching occurs in @@ -1119,6 +1133,7 @@ struct drm_xe_vm_bind_op { #define DRM_XE_VM_BIND_FLAG_DUMPABLE (1 << 3) #define DRM_XE_VM_BIND_FLAG_CHECK_PXP (1 << 4) #define DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR (1 << 5) +#define DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET (1 << 6) /** @flags: Bind flags */ __u32 flags; -- cgit v1.2.3 From d54c676d4fe0543d1642ab7a68ffdd31e8639a5d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 14 Oct 2025 15:02:43 -0700 Subject: scsi: core: Fix the unit attention counter implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit scsi_decide_disposition() may call scsi_check_sense(). scsi_decide_disposition() calls are not serialized. Hence, counter updates by scsi_check_sense() must be serialized. Hence this patch that makes the counters updated by scsi_check_sense() atomic. Cc: Kai Mäkisara Fixes: a5d518cd4e3e ("scsi: core: Add counters for New Media and Power On/Reset UNIT ATTENTIONs") Signed-off-by: Bart Van Assche Reviewed-by: Ewan D. Milne Link: https://patch.msgid.link/20251014220244.3689508-1-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- include/scsi/scsi_device.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 6d6500148c4b..993008cdea65 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -252,8 +252,8 @@ struct scsi_device { unsigned int queue_stopped; /* request queue is quiesced */ bool offline_already; /* Device offline message logged */ - unsigned int ua_new_media_ctr; /* Counter for New Media UNIT ATTENTIONs */ - unsigned int ua_por_ctr; /* Counter for Power On / Reset UAs */ + atomic_t ua_new_media_ctr; /* Counter for New Media UNIT ATTENTIONs */ + atomic_t ua_por_ctr; /* Counter for Power On / Reset UAs */ atomic_t disk_events_disable_depth; /* disable depth for disk events */ @@ -693,10 +693,8 @@ static inline int scsi_device_busy(struct scsi_device *sdev) } /* Macros to access the UNIT ATTENTION counters */ -#define scsi_get_ua_new_media_ctr(sdev) \ - ((const unsigned int)(sdev->ua_new_media_ctr)) -#define scsi_get_ua_por_ctr(sdev) \ - ((const unsigned int)(sdev->ua_por_ctr)) +#define scsi_get_ua_new_media_ctr(sdev) atomic_read(&sdev->ua_new_media_ctr) +#define scsi_get_ua_por_ctr(sdev) atomic_read(&sdev->ua_por_ctr) #define MODULE_ALIAS_SCSI_DEVICE(type) \ MODULE_ALIAS("scsi:t-" __stringify(type) "*") -- cgit v1.2.3 From 00aaae60faf554c27c95e93d47f200a93ff266ef Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Tue, 14 Oct 2025 18:53:53 +0300 Subject: gpio: regmap: add the .fixed_direction_output configuration parameter There are GPIO controllers such as the one present in the LX2160ARDB QIXIS FPGA which have fixed-direction input and output GPIO lines mixed together in a single register. This cannot be modeled using the gpio-regmap as-is since there is no way to present the true direction of a GPIO line. In order to make this use case possible, add a new configuration parameter - fixed_direction_output - into the gpio_regmap_config structure. This will enable user drivers to provide a bitmap that represents the fixed direction of the GPIO lines. Signed-off-by: Ioana Ciornei Acked-by: Bartosz Golaszewski Reviewed-by: Michael Walle Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/regmap.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/gpio/regmap.h b/include/linux/gpio/regmap.h index 622a2939ebe0..87983a5f3681 100644 --- a/include/linux/gpio/regmap.h +++ b/include/linux/gpio/regmap.h @@ -38,6 +38,10 @@ struct regmap; * offset to a register/bitmask pair. If not * given the default gpio_regmap_simple_xlate() * is used. + * @fixed_direction_output: + * (Optional) Bitmap representing the fixed direction of + * the GPIO lines. Useful when there are GPIO lines with a + * fixed direction mixed together in the same register. * @drvdata: (Optional) Pointer to driver specific data which is * not used by gpio-remap but is provided "as is" to the * driver callback(s). @@ -85,6 +89,7 @@ struct gpio_regmap_config { int reg_stride; int ngpio_per_reg; struct irq_domain *irq_domain; + unsigned long *fixed_direction_output; #ifdef CONFIG_REGMAP_IRQ struct regmap_irq_chip *regmap_irq_chip; -- cgit v1.2.3 From db82b8dbf5f06d7b1abec4e1326ed8c02fa16897 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 20 Oct 2025 17:03:28 +0200 Subject: PM: runtime: Fix conditional guard definitions Since pm_runtime_get_active() returns 0 on success, all of the DEFINE_GUARD_COND() macros in pm_runtime.h need the "_RET == 0" condition at the end of the argument list or they would not work correctly. Fixes: 9a0abc39450a ("PM: runtime: Add auto-cleanup macros for "resume and get" operations") Reported-by: kernel test robot Link: https://lore.kernel.org/linux-pm/202510191529.BCyjKlLQ-lkp@intel.com/ Signed-off-by: Rafael J. Wysocki Reviewed-by: Jonathan Cameron Reviewed-by: Dan Williams Tested-by: Farhan Ali Link: https://patch.msgid.link/5943878.DvuYhMxLoT@rafael.j.wysocki --- include/linux/pm_runtime.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index a3f44f6c2da1..0b436e15f4cd 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -629,13 +629,13 @@ DEFINE_GUARD(pm_runtime_active_auto, struct device *, * device. */ DEFINE_GUARD_COND(pm_runtime_active, _try, - pm_runtime_get_active(_T, RPM_TRANSPARENT)) + pm_runtime_get_active(_T, RPM_TRANSPARENT), _RET == 0) DEFINE_GUARD_COND(pm_runtime_active, _try_enabled, - pm_runtime_resume_and_get(_T)) + pm_runtime_resume_and_get(_T), _RET == 0) DEFINE_GUARD_COND(pm_runtime_active_auto, _try, - pm_runtime_get_active(_T, RPM_TRANSPARENT)) + pm_runtime_get_active(_T, RPM_TRANSPARENT), _RET == 0) DEFINE_GUARD_COND(pm_runtime_active_auto, _try_enabled, - pm_runtime_resume_and_get(_T)) + pm_runtime_resume_and_get(_T), _RET == 0) /** * pm_runtime_put_sync - Drop device usage counter and run "idle check" if 0. -- cgit v1.2.3 From f6ceec6434b5efff62cecbaa2ff74fc29b96c0c6 Mon Sep 17 00:00:00 2001 From: Ralf Lici Date: Tue, 21 Oct 2025 12:09:40 +0200 Subject: net: datagram: introduce datagram_poll_queue for custom receive queues Some protocols using TCP encapsulation (e.g., espintcp, openvpn) deliver userspace-bound packets through a custom skb queue rather than the standard sk_receive_queue. Introduce datagram_poll_queue that accepts an explicit receive queue, and convert datagram_poll into a wrapper around datagram_poll_queue. This allows protocols with custom skb queues to reuse the core polling logic without relying on sk_receive_queue. Cc: Sabrina Dubroca Cc: Antonio Quartulli Signed-off-by: Ralf Lici Reviewed-by: Sabrina Dubroca Reviewed-by: Antonio Quartulli Link: https://patch.msgid.link/20251021100942.195010-2-ralf@mandelbit.com Signed-off-by: Paolo Abeni --- include/linux/skbuff.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index fb3fec9affaa..a7cc3d1f4fd1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4204,6 +4204,9 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, struct sk_buff_head *sk_queue, unsigned int flags, int *off, int *err); struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags, int *err); +__poll_t datagram_poll_queue(struct file *file, struct socket *sock, + struct poll_table_struct *wait, + struct sk_buff_head *rcv_queue); __poll_t datagram_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); int skb_copy_datagram_iter(const struct sk_buff *from, int offset, -- cgit v1.2.3 From b2284768c6b32aa224ca7d0ef0741beb434f03aa Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 22 Oct 2025 11:44:21 +0800 Subject: virtio-net: zero unused hash fields When GSO tunnel is negotiated virtio_net_hdr_tnl_from_skb() tries to initialize the tunnel metadata but forget to zero unused rxhash fields. This may leak information to another side. Fixing this by zeroing the unused hash fields. Acked-by: Michael S. Tsirkin Fixes: a2fb4bc4e2a6a ("net: implement virtio helpers to handle UDP GSO tunneling") Cc: Signed-off-by: Jason Wang Reviewed-by: Xuan Zhuo Link: https://patch.msgid.link/20251022034421.70244-1-jasowang@redhat.com Signed-off-by: Jakub Kicinski --- include/linux/virtio_net.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 20e0584db1dd..4d1780848d0e 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -401,6 +401,10 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb, if (!tnl_hdr_negotiated) return -EINVAL; + vhdr->hash_hdr.hash_value = 0; + vhdr->hash_hdr.hash_report = 0; + vhdr->hash_hdr.padding = 0; + /* Let the basic parsing deal with plain GSO features. */ skb_shinfo(skb)->gso_type &= ~tnl_gso_type; ret = virtio_net_hdr_from_skb(skb, hdr, true, false, vlan_hlen); -- cgit v1.2.3 From bb65e0c141f879cdf54db11ae446ee3605fb54d5 Mon Sep 17 00:00:00 2001 From: Alexei Lazar Date: Wed, 22 Oct 2025 15:29:39 +0300 Subject: net/mlx5: Add PPHCR to PCAM supported registers mask Add the PPHCR bit to the port_access_reg_cap_mask field of PCAM register to indicate that the device supports the PPHCR register and the RS-FEC histogram feature. Signed-off-by: Alexei Lazar Reviewed-by: Yael Chemla Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1761136182-918470-2-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- include/linux/mlx5/mlx5_ifc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 07614cd95bed..1b0b36aa2a76 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -10833,7 +10833,9 @@ struct mlx5_ifc_pcam_regs_5000_to_507f_bits { u8 port_access_reg_cap_mask_127_to_96[0x20]; u8 port_access_reg_cap_mask_95_to_64[0x20]; - u8 port_access_reg_cap_mask_63_to_36[0x1c]; + u8 port_access_reg_cap_mask_63[0x1]; + u8 pphcr[0x1]; + u8 port_access_reg_cap_mask_61_to_36[0x1a]; u8 pplm[0x1]; u8 port_access_reg_cap_mask_34_to_32[0x3]; -- cgit v1.2.3 From 0d92808024b4e9868cef68d16f121d509843e80e Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 1 Oct 2025 10:55:58 -0400 Subject: Bluetooth: HCI: Fix tracking of advertisement set/instance 0x00 This fixes the state tracking of advertisement set/instance 0x00 which is considered a legacy instance and is not tracked individually by adv_instances list, previously it was assumed that hci_dev itself would track it via HCI_LE_ADV but that is a global state not specifc to instance 0x00, so to fix it a new flag is introduced that only tracks the state of instance 0x00. Fixes: 1488af7b8b5f ("Bluetooth: hci_sync: Fix hci_resume_advertising_sync") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 9ecc70baaca9..8d0e703bc929 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -434,6 +434,7 @@ enum { HCI_USER_CHANNEL, HCI_EXT_CONFIGURED, HCI_LE_ADV, + HCI_LE_ADV_0, HCI_LE_PER_ADV, HCI_LE_SCAN, HCI_SSP_ENABLED, -- cgit v1.2.3 From e8785404de06a69d89dcdd1e9a0b6ea42dc6d327 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Fri, 3 Oct 2025 22:07:32 +0300 Subject: Bluetooth: MGMT: fix crash in set_mesh_sync and set_mesh_complete There is a BUG: KASAN: stack-out-of-bounds in set_mesh_sync due to memcpy from badly declared on-stack flexible array. Another crash is in set_mesh_complete() due to double list_del via mgmt_pending_valid + mgmt_pending_remove. Use DEFINE_FLEX to declare the flexible array right, and don't memcpy outside bounds. As mgmt_pending_valid removes the cmd from list, use mgmt_pending_free, and also report status on error. Fixes: 302a1f674c00d ("Bluetooth: MGMT: Fix possible UAFs") Signed-off-by: Pauli Virtanen Reviewed-by: Paul Menzel Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/mgmt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 74edea06985b..bca0333f1e99 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -853,7 +853,7 @@ struct mgmt_cp_set_mesh { __le16 window; __le16 period; __u8 num_ad_types; - __u8 ad_types[]; + __u8 ad_types[] __counted_by(num_ad_types); } __packed; #define MGMT_SET_MESH_RECEIVER_SIZE 6 -- cgit v1.2.3 From 76e20da0bd00c556ed0a1e7250bdb6ac3e808ea8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Danis?= Date: Mon, 6 Oct 2025 10:35:44 +0200 Subject: Revert "Bluetooth: L2CAP: convert timeouts to secs_to_jiffies()" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit c9d84da18d1e0d28a7e16ca6df8e6d47570501d4. It replaces in L2CAP calls to msecs_to_jiffies() to secs_to_jiffies() and updates the constants accordingly. But the constants are also used in LCAP Configure Request and L2CAP Configure Response which expect values in milliseconds. This may prevent correct usage of L2CAP channel. To fix it, keep those constants in milliseconds and so revert this change. Fixes: c9d84da18d1e ("Bluetooth: L2CAP: convert timeouts to secs_to_jiffies()") Signed-off-by: Frédéric Danis Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/l2cap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 4bb0eaedda18..00e182a22720 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -38,8 +38,8 @@ #define L2CAP_DEFAULT_TX_WINDOW 63 #define L2CAP_DEFAULT_EXT_WINDOW 0x3FFF #define L2CAP_DEFAULT_MAX_TX 3 -#define L2CAP_DEFAULT_RETRANS_TO 2 /* seconds */ -#define L2CAP_DEFAULT_MONITOR_TO 12 /* seconds */ +#define L2CAP_DEFAULT_RETRANS_TO 2000 /* 2 seconds */ +#define L2CAP_DEFAULT_MONITOR_TO 12000 /* 12 seconds */ #define L2CAP_DEFAULT_MAX_PDU_SIZE 1492 /* Sized for AMP packet */ #define L2CAP_DEFAULT_ACK_TO 200 #define L2CAP_DEFAULT_MAX_SDU_SIZE 0xFFFF -- cgit v1.2.3 From 751463ceefc3397566d03c8b64ef4a77f5fd88ac Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 22 Oct 2025 16:03:19 -0400 Subject: Bluetooth: hci_core: Fix tracking of periodic advertisement Periodic advertising enabled flag cannot be tracked by the enabled flag since advertising and periodic advertising each can be enabled/disabled separately from one another causing the states to be inconsistent when for example an advertising set is disabled its enabled flag is set to false which is then used for periodic which has not being disabled. Fixes: eca0ae4aea66 ("Bluetooth: Add initial implementation of BIS connections") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 2924c2bf2a98..b8100dbfe5d7 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -244,6 +244,7 @@ struct adv_info { bool enabled; bool pending; bool periodic; + bool periodic_enabled; __u8 mesh; __u8 instance; __u8 handle; -- cgit v1.2.3 From d50f21091358b2b29dc06c2061106cdb0f030d03 Mon Sep 17 00:00:00 2001 From: Dimitri John Ledkov Date: Sun, 26 Oct 2025 20:21:00 +0000 Subject: kbuild: align modinfo section for Secureboot Authenticode EDK2 compat Previously linker scripts would always generate vmlinuz that has sections aligned. And thus padded (correct Authenticode calculation) and unpadded calculation would be same. As in https://github.com/rhboot/pesign userspace tool would produce the same authenticode digest for both of the following commands: pesign --padding --hash --in ./arch/x86_64/boot/bzImage pesign --nopadding --hash --in ./arch/x86_64/boot/bzImage The commit 3e86e4d74c04 ("kbuild: keep .modinfo section in vmlinux.unstripped") added .modinfo section of variable length. Depending on kernel configuration it may or may not be aligned. All userspace signing tooling correctly pads such section to calculation spec compliant authenticode digest. However, if bzImage is not further processed and is attempted to be loaded directly by EDK2 firmware, it calculates unpadded Authenticode digest and fails to correct accept/reject such kernel builds even when propoer Authenticode values are enrolled in db/dbx. One can say EDK2 requires aligned/padded kernels in Secureboot. Thus add ALIGN(8) to the .modinfo section, to esure kernels irrespective of modinfo contents can be loaded by all existing EDK2 firmware builds. Fixes: 3e86e4d74c04 ("kbuild: keep .modinfo section in vmlinux.unstripped") Cc: stable@vger.kernel.org Signed-off-by: Dimitri John Ledkov Link: https://patch.msgid.link/20251026202100.679989-1-dimitri.ledkov@surgut.co.uk Signed-off-by: Nathan Chancellor --- include/asm-generic/vmlinux.lds.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 8a9a2e732a65..e04d56a5332e 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -832,7 +832,7 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) /* Required sections not related to debugging. */ #define ELF_DETAILS \ - .modinfo : { *(.modinfo) } \ + .modinfo : { *(.modinfo) . = ALIGN(8); } \ .comment 0 : { *(.comment) } \ .symtab 0 : { *(.symtab) } \ .strtab 0 : { *(.strtab) } \ -- cgit v1.2.3 From 12a1c9353c47c0fb3464eba2d78cdf649dee1cf7 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 27 Oct 2025 09:27:32 +0900 Subject: block: fix op_is_zone_mgmt() to handle REQ_OP_ZONE_RESET_ALL REQ_OP_ZONE_RESET_ALL is a zone management request. Fix op_is_zone_mgmt() to return true for that operation, like it already does for REQ_OP_ZONE_RESET. While no problems were reported without this fix, this change allows strengthening checks in various block device drivers (scsi sd, virtioblk, DM) where op_is_zone_mgmt() is used to verify that a zone management command is not being issued to a regular block device. Fixes: 6c1b1da58f8c ("block: add zone open, close and finish operations") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 8e8d1cc8b06c..d8ba743a89b7 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -478,6 +478,7 @@ static inline bool op_is_zone_mgmt(enum req_op op) { switch (op & REQ_OP_MASK) { case REQ_OP_ZONE_RESET: + case REQ_OP_ZONE_RESET_ALL: case REQ_OP_ZONE_OPEN: case REQ_OP_ZONE_CLOSE: case REQ_OP_ZONE_FINISH: -- cgit v1.2.3 From 19de03b312d69a7e9bacb51c806c6e3f4207376c Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 27 Oct 2025 09:27:33 +0900 Subject: block: make REQ_OP_ZONE_OPEN a write operation A REQ_OP_OPEN_ZONE request changes the condition of a sequential zone of a zoned block device to the explicitly open condition (BLK_ZONE_COND_EXP_OPEN). As such, it should be considered a write operation. Change this operation code to be an odd number to reflect this. The following operation numbers are changed to keep the numbering compact. No problems were reported without this change as this operation has no data. However, this unifies the zone operation to reflect that they modify the device state and also allows strengthening checks in the block layer, e.g. checking if this operation is not issued against a read-only device. Fixes: 6c1b1da58f8c ("block: add zone open, close and finish operations") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index d8ba743a89b7..44c30183ecc3 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -341,15 +341,15 @@ enum req_op { /* write the zero filled sector many times */ REQ_OP_WRITE_ZEROES = (__force blk_opf_t)9, /* Open a zone */ - REQ_OP_ZONE_OPEN = (__force blk_opf_t)10, + REQ_OP_ZONE_OPEN = (__force blk_opf_t)11, /* Close a zone */ - REQ_OP_ZONE_CLOSE = (__force blk_opf_t)11, + REQ_OP_ZONE_CLOSE = (__force blk_opf_t)13, /* Transition a zone to full */ - REQ_OP_ZONE_FINISH = (__force blk_opf_t)13, + REQ_OP_ZONE_FINISH = (__force blk_opf_t)15, /* reset a zone write pointer */ - REQ_OP_ZONE_RESET = (__force blk_opf_t)15, + REQ_OP_ZONE_RESET = (__force blk_opf_t)17, /* reset all the zone present on the device */ - REQ_OP_ZONE_RESET_ALL = (__force blk_opf_t)17, + REQ_OP_ZONE_RESET_ALL = (__force blk_opf_t)19, /* Driver private requests */ REQ_OP_DRV_IN = (__force blk_opf_t)34, -- cgit v1.2.3 From 7ceba45a6658ce637da334cd0ebf27f4ede6c0fe Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Tue, 28 Oct 2025 12:58:37 +0200 Subject: wifi: cfg80211: add an hrtimer based delayed work item The normal timer mechanism assume that timeout further in the future need a lower accuracy. As an example, the granularity for a timer scheduled 4096 ms in the future on a 1000 Hz system is already 512 ms. This granularity is perfectly sufficient for e.g. timeouts, but there are other types of events that will happen at a future point in time and require a higher accuracy. Add a new wiphy_hrtimer_work type that uses an hrtimer internally. The API is almost identical to the existing wiphy_delayed_work and it can be used as a drop-in replacement after minor adjustments. The work will be scheduled relative to the current time with a slack of 1 millisecond. CC: stable@vger.kernel.org # 6.4+ Signed-off-by: Benjamin Berg Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20251028125710.7f13a2adc5eb.I01b5af0363869864b0580d9c2a1770bafab69566@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 781624f5913a..820e299f06b5 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -6435,6 +6435,11 @@ static inline void wiphy_delayed_work_init(struct wiphy_delayed_work *dwork, * after wiphy_lock() was called. Therefore, wiphy_cancel_work() can * use just cancel_work() instead of cancel_work_sync(), it requires * being in a section protected by wiphy_lock(). + * + * Note that these are scheduled with a timer where the accuracy + * becomes less the longer in the future the scheduled timer is. Use + * wiphy_hrtimer_work_queue() if the timer must be not be late by more + * than approximately 10 percent. */ void wiphy_delayed_work_queue(struct wiphy *wiphy, struct wiphy_delayed_work *dwork, @@ -6506,6 +6511,79 @@ void wiphy_delayed_work_flush(struct wiphy *wiphy, bool wiphy_delayed_work_pending(struct wiphy *wiphy, struct wiphy_delayed_work *dwork); +struct wiphy_hrtimer_work { + struct wiphy_work work; + struct wiphy *wiphy; + struct hrtimer timer; +}; + +enum hrtimer_restart wiphy_hrtimer_work_timer(struct hrtimer *t); + +static inline void wiphy_hrtimer_work_init(struct wiphy_hrtimer_work *hrwork, + wiphy_work_func_t func) +{ + hrtimer_setup(&hrwork->timer, wiphy_hrtimer_work_timer, + CLOCK_BOOTTIME, HRTIMER_MODE_REL); + wiphy_work_init(&hrwork->work, func); +} + +/** + * wiphy_hrtimer_work_queue - queue hrtimer work for the wiphy + * @wiphy: the wiphy to queue for + * @hrwork: the high resolution timer worker + * @delay: the delay given as a ktime_t + * + * Please refer to wiphy_delayed_work_queue(). The difference is that + * the hrtimer work uses a high resolution timer for scheduling. This + * may be needed if timeouts might be scheduled further in the future + * and the accuracy of the normal timer is not sufficient. + * + * Expect a delay of a few milliseconds as the timer is scheduled + * with some slack and some more time may pass between queueing the + * work and its start. + */ +void wiphy_hrtimer_work_queue(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrwork, + ktime_t delay); + +/** + * wiphy_hrtimer_work_cancel - cancel previously queued hrtimer work + * @wiphy: the wiphy, for debug purposes + * @hrtimer: the hrtimer work to cancel + * + * Cancel the work *without* waiting for it, this assumes being + * called under the wiphy mutex acquired by wiphy_lock(). + */ +void wiphy_hrtimer_work_cancel(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrtimer); + +/** + * wiphy_hrtimer_work_flush - flush previously queued hrtimer work + * @wiphy: the wiphy, for debug purposes + * @hrwork: the hrtimer work to flush + * + * Flush the work (i.e. run it if pending). This must be called + * under the wiphy mutex acquired by wiphy_lock(). + */ +void wiphy_hrtimer_work_flush(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrwork); + +/** + * wiphy_hrtimer_work_pending - Find out whether a wiphy hrtimer + * work item is currently pending. + * + * @wiphy: the wiphy, for debug purposes + * @hrwork: the hrtimer work in question + * + * Return: true if timer is pending, false otherwise + * + * Please refer to the wiphy_delayed_work_pending() documentation as + * this is the equivalent function for hrtimer based delayed work + * items. + */ +bool wiphy_hrtimer_work_pending(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrwork); + /** * enum ieee80211_ap_reg_power - regulatory power for an Access Point * -- cgit v1.2.3 From f0f7a3f542c1698edb69075f25a3f846207facba Mon Sep 17 00:00:00 2001 From: Qiu Wenbo Date: Tue, 28 Oct 2025 14:30:09 +0800 Subject: platform/x86: int3472: Fix double free of GPIO device during unregister MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit regulator_unregister() already frees the associated GPIO device. On ThinkPad X9 (Lunar Lake), this causes a double free issue that leads to random failures when other drivers (typically Intel THC) attempt to allocate interrupts. The root cause is that the reference count of the pinctrl_intel_platform module unexpectedly drops to zero when this driver defers its probe. This behavior can also be reproduced by unloading the module directly. Fix the issue by removing the redundant release of the GPIO device during regulator unregistration. Cc: stable@vger.kernel.org Fixes: 1e5d088a52c2 ("platform/x86: int3472: Stop using devm_gpiod_get()") Signed-off-by: Qiu Wenbo Reviewed-by: Andy Shevchenko Reviewed-by: Sakari Ailus Reviewed-by: Hans de Goede Reviewed-by: Daniel Scally Link: https://patch.msgid.link/20251028063009.289414-1-qiuwenbo@gnome.org Signed-off-by: Ilpo Järvinen --- include/linux/platform_data/x86/int3472.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/platform_data/x86/int3472.h b/include/linux/platform_data/x86/int3472.h index 1571e9157fa5..b1b837583d54 100644 --- a/include/linux/platform_data/x86/int3472.h +++ b/include/linux/platform_data/x86/int3472.h @@ -100,7 +100,6 @@ struct int3472_gpio_regulator { struct regulator_consumer_supply supply_map[GPIO_REGULATOR_SUPPLY_MAP_COUNT * 2]; char supply_name_upper[GPIO_SUPPLY_NAME_LENGTH]; char regulator_name[GPIO_REGULATOR_NAME_LENGTH]; - struct gpio_desc *ena_gpio; struct regulator_dev *rdev; struct regulator_desc rdesc; }; -- cgit v1.2.3 From 8f3eaad9812f62e7006ad08602444b32c3101824 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 20 Oct 2025 17:23:30 +0200 Subject: Input: Add keycodes for electronic privacy screen on/off hotkeys MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add keycodes for hotkeys toggling the electronic privacy screen found on some laptops on/off. There already is an API for eprivacy screens as kernel-mode-setting drm connector object properties: https://www.kernel.org/doc/html/latest/gpu/drm-kms.html#standard-connector-properties this API also supports reporting when the eprivacy screen is turned on/off by the embedded-controller (EC) in response to hotkey presses. But on some laptops (e.g. the Dell Latitude 7300) the firmware does not allow querying the presence nor the status of the eprivacy screen at boot. This makes it impossible to implement the drm connector properties API since drm objects do not allow adding new properties after creation and the presence of the eprivacy cannot be detected at boot. The first notice of the presence of an eprivacy screen on these laptops is an EC generated (WMI) event when the eprivacy screen hotkeys are pressed. In this case the new keycodes this change adds can be generated to notify userspace of the eprivacy screen on/off hotkeys being pressed, so that userspace can show the usual on-screen-display (OSD) notification for eprivacy screen on/off to the user. This is similar to how e.g. touchpad on/off keycodes are used to show the touchpad on/off OSD. Signed-off-by: Hans de Goede Acked-by: Dmitry Torokhov Link: https://patch.msgid.link/20251020152331.52870-2-hansg@kernel.org Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/uapi/linux/input-event-codes.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 4a9fbf42aa9f..9cd89bcc1d9c 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -631,6 +631,18 @@ #define KEY_BRIGHTNESS_MIN 0x250 /* Set Brightness to Minimum */ #define KEY_BRIGHTNESS_MAX 0x251 /* Set Brightness to Maximum */ +/* + * Keycodes for hotkeys toggling the electronic privacy screen found on some + * laptops on/off. Note when the embedded-controller turns on/off the eprivacy + * screen itself then the state should be reported through drm connecter props: + * https://www.kernel.org/doc/html/latest/gpu/drm-kms.html#standard-connector-properties + * Except when implementing the drm connecter properties API is not possible + * because e.g. the firmware does not allow querying the presence and/or status + * of the eprivacy screen at boot. + */ +#define KEY_EPRIVACY_SCREEN_ON 0x252 +#define KEY_EPRIVACY_SCREEN_OFF 0x253 + #define KEY_KBDINPUTASSIST_PREV 0x260 #define KEY_KBDINPUTASSIST_NEXT 0x261 #define KEY_KBDINPUTASSIST_PREVGROUP 0x262 -- cgit v1.2.3 From 48cbf50531d8eca15b8a811717afdebb8677de9b Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Fri, 24 Oct 2025 16:23:44 +0800 Subject: regmap: irq: Correct documentation of wake_invert flag Per commit 9442490a0286 ("regmap: irq: Support wake IRQ mask inversion") the wake_invert flag is to support enable register, so cleared bits are wake disabled. Fixes: 68622bdfefb9 ("regmap: irq: document mask/wake_invert flags") Cc: stable@vger.kernel.org Signed-off-by: Shawn Guo Link: https://patch.msgid.link/20251024082344.2188895-1-shawnguo2@yeah.net Signed-off-by: Mark Brown --- include/linux/regmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 4e1ac1fbcec4..55343795644b 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -1643,7 +1643,7 @@ struct regmap_irq_chip_data; * @status_invert: Inverted status register: cleared bits are active interrupts. * @status_is_level: Status register is actuall signal level: Xor status * register with previous value to get active interrupts. - * @wake_invert: Inverted wake register: cleared bits are wake enabled. + * @wake_invert: Inverted wake register: cleared bits are wake disabled. * @type_in_mask: Use the mask registers for controlling irq type. Use this if * the hardware provides separate bits for rising/falling edge * or low/high level interrupts and they should be combined into -- cgit v1.2.3 From 18cd0a9c7aaf880502e4aff3ea30022f97d6c103 Mon Sep 17 00:00:00 2001 From: PIYUSH CHOUDHARY Date: Mon, 20 Oct 2025 00:05:08 +0530 Subject: video: fb: Fix typo in comment in fb.h Fix typo: "verical" -> "vertical" in macro description Signed-off-by: PIYUSH CHOUDHARY Signed-off-by: Helge Deller Cc: stable@vger.kernel.org --- include/uapi/linux/fb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/fb.h b/include/uapi/linux/fb.h index cde8f173f566..22acaaec7b1c 100644 --- a/include/uapi/linux/fb.h +++ b/include/uapi/linux/fb.h @@ -319,7 +319,7 @@ enum { #define FB_VBLANK_HAVE_VCOUNT 0x020 /* the vcount field is valid */ #define FB_VBLANK_HAVE_HCOUNT 0x040 /* the hcount field is valid */ #define FB_VBLANK_VSYNCING 0x080 /* currently in a vsync */ -#define FB_VBLANK_HAVE_VSYNC 0x100 /* verical syncs can be detected */ +#define FB_VBLANK_HAVE_VSYNC 0x100 /* vertical syncs can be detected */ struct fb_vblank { __u32 flags; /* FB_VBLANK flags */ -- cgit v1.2.3 From a1f3058930745d2b938b6b4f5bd9630dc74b26b7 Mon Sep 17 00:00:00 2001 From: Quanmin Yan Date: Fri, 10 Oct 2025 16:16:59 +0800 Subject: fbcon: Set fb_display[i]->mode to NULL when the mode is released Recently, we discovered the following issue through syzkaller: BUG: KASAN: slab-use-after-free in fb_mode_is_equal+0x285/0x2f0 Read of size 4 at addr ff11000001b3c69c by task syz.xxx ... Call Trace: dump_stack_lvl+0xab/0xe0 print_address_description.constprop.0+0x2c/0x390 print_report+0xb9/0x280 kasan_report+0xb8/0xf0 fb_mode_is_equal+0x285/0x2f0 fbcon_mode_deleted+0x129/0x180 fb_set_var+0xe7f/0x11d0 do_fb_ioctl+0x6a0/0x750 fb_ioctl+0xe0/0x140 __x64_sys_ioctl+0x193/0x210 do_syscall_64+0x5f/0x9c0 entry_SYSCALL_64_after_hwframe+0x76/0x7e Based on experimentation and analysis, during framebuffer unregistration, only the memory of fb_info->modelist is freed, without setting the corresponding fb_display[i]->mode to NULL for the freed modes. This leads to UAF issues during subsequent accesses. Here's an example of reproduction steps: 1. With /dev/fb0 already registered in the system, load a kernel module to register a new device /dev/fb1; 2. Set fb1's mode to the global fb_display[] array (via FBIOPUT_CON2FBMAP); 3. Switch console from fb to VGA (to allow normal rmmod of the ko); 4. Unload the kernel module, at this point fb1's modelist is freed, leaving a wild pointer in fb_display[]; 5. Trigger the bug via system calls through fb0 attempting to delete a mode from fb0. Add a check in do_unregister_framebuffer(): if the mode to be freed exists in fb_display[], set the corresponding mode pointer to NULL. Signed-off-by: Quanmin Yan Reviewed-by: Thomas Zimmermann Signed-off-by: Helge Deller Cc: stable@vger.kernel.org --- include/linux/fbcon.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/fbcon.h b/include/linux/fbcon.h index 81f0e698acbf..f206370060e1 100644 --- a/include/linux/fbcon.h +++ b/include/linux/fbcon.h @@ -18,6 +18,7 @@ void fbcon_suspended(struct fb_info *info); void fbcon_resumed(struct fb_info *info); int fbcon_mode_deleted(struct fb_info *info, struct fb_videomode *mode); +void fbcon_delete_modelist(struct list_head *head); void fbcon_new_modelist(struct fb_info *info); void fbcon_get_requirement(struct fb_info *info, struct fb_blit_caps *caps); @@ -38,6 +39,7 @@ static inline void fbcon_suspended(struct fb_info *info) {} static inline void fbcon_resumed(struct fb_info *info) {} static inline int fbcon_mode_deleted(struct fb_info *info, struct fb_videomode *mode) { return 0; } +static inline void fbcon_delete_modelist(struct list_head *head) {} static inline void fbcon_new_modelist(struct fb_info *info) {} static inline void fbcon_get_requirement(struct fb_info *info, struct fb_blit_caps *caps) {} -- cgit v1.2.3 From 23ee8a2563a0f24cf4964685ced23c32be444ab8 Mon Sep 17 00:00:00 2001 From: Qinxin Xia Date: Tue, 28 Oct 2025 20:08:59 +0800 Subject: dma-mapping: benchmark: Restore padding to ensure uABI remained consistent The padding field in the structure was previously reserved to maintain a stable interface for potential new fields, ensuring compatibility with user-space shared data structures. However,it was accidentally removed by tiantao in a prior commit, which may lead to incompatibility between user space and the kernel. This patch reinstates the padding to restore the original structure layout and preserve compatibility. Fixes: 8ddde07a3d28 ("dma-mapping: benchmark: extract a common header file for map_benchmark definition") Cc: stable@vger.kernel.org Acked-by: Barry Song Signed-off-by: Qinxin Xia Reported-by: Barry Song Closes: https://lore.kernel.org/lkml/CAGsJ_4waiZ2+NBJG+SCnbNk+nQ_ZF13_Q5FHJqZyxyJTcEop2A@mail.gmail.com/ Reviewed-by: Jonathan Cameron Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20251028120900.2265511-2-xiaqinxin@huawei.com --- include/linux/map_benchmark.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/map_benchmark.h b/include/linux/map_benchmark.h index 62674c83bde4..48e2ff95332f 100644 --- a/include/linux/map_benchmark.h +++ b/include/linux/map_benchmark.h @@ -27,5 +27,6 @@ struct map_benchmark { __u32 dma_dir; /* DMA data direction */ __u32 dma_trans_ns; /* time for DMA transmission in ns */ __u32 granule; /* how many PAGE_SIZE will do map/unmap once a time */ + __u8 expansion[76]; /* For future use */ }; #endif /* _KERNEL_DMA_BENCHMARK_H */ -- cgit v1.2.3 From 24990d89c23de4dbef6b0b3d58383cafefdd6983 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Oct 2025 12:58:00 +0100 Subject: trace: tcp: add three metrics to trace_tcp_rcvbuf_grow() While chasing yet another receive autotuning bug, I found useful to add rcv_ssthresh, window_clamp and rcv_wnd. tcp_stream 40597 [068] 2172.978198: tcp:tcp_rcvbuf_grow: time=50307 rtt_us=50179 copied=77824 inq=0 space=40960 ooo=0 scaling_ratio=219 rcvbuf=131072 rcv_ssthresh=107474 window_clamp=112128 rcv_wnd=110592 tcp_stream 40597 [068] 2173.028528: tcp:tcp_rcvbuf_grow: time=50336 rtt_us=50206 copied=110592 inq=0 space=77824 ooo=0 scaling_ratio=219 rcvbuf=509444 rcv_ssthresh=328658 window_clamp=435813 rcv_wnd=331776 tcp_stream 40597 [068] 2173.078830: tcp:tcp_rcvbuf_grow: time=50305 rtt_us=50070 copied=270336 inq=0 space=110592 ooo=0 scaling_ratio=219 rcvbuf=509444 rcv_ssthresh=431159 window_clamp=435813 rcv_wnd=434176 tcp_stream 40597 [068] 2173.129137: tcp:tcp_rcvbuf_grow: time=50313 rtt_us=50118 copied=434176 inq=0 space=270336 ooo=0 scaling_ratio=219 rcvbuf=2457847 rcv_ssthresh=1299511 window_clamp=2102611 rcv_wnd=1302528 tcp_stream 40597 [068] 2173.179451: tcp:tcp_rcvbuf_grow: time=50318 rtt_us=50041 copied=1019904 inq=0 space=434176 ooo=0 scaling_ratio=219 rcvbuf=2457847 rcv_ssthresh=2087445 window_clamp=2102611 rcv_wnd=2088960 Signed-off-by: Eric Dumazet Signed-off-by: Matthieu Baerts (NGI0) Reviewed-by: Neal Cardwell Link: https://patch.msgid.link/20251028-net-tcp-recv-autotune-v3-2-74b43ba4c84c@kernel.org Signed-off-by: Jakub Kicinski --- include/trace/events/tcp.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 9d2c36c6a0ed..6757233bd064 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -218,6 +218,9 @@ TRACE_EVENT(tcp_rcvbuf_grow, __field(__u32, space) __field(__u32, ooo_space) __field(__u32, rcvbuf) + __field(__u32, rcv_ssthresh) + __field(__u32, window_clamp) + __field(__u32, rcv_wnd) __field(__u8, scaling_ratio) __field(__u16, sport) __field(__u16, dport) @@ -245,6 +248,9 @@ TRACE_EVENT(tcp_rcvbuf_grow, tp->rcv_nxt; __entry->rcvbuf = sk->sk_rcvbuf; + __entry->rcv_ssthresh = tp->rcv_ssthresh; + __entry->window_clamp = tp->window_clamp; + __entry->rcv_wnd = tp->rcv_wnd; __entry->scaling_ratio = tp->scaling_ratio; __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); @@ -264,11 +270,14 @@ TRACE_EVENT(tcp_rcvbuf_grow, ), TP_printk("time=%u rtt_us=%u copied=%u inq=%u space=%u ooo=%u scaling_ratio=%u rcvbuf=%u " + "rcv_ssthresh=%u window_clamp=%u rcv_wnd=%u " "family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 " "saddrv6=%pI6c daddrv6=%pI6c skaddr=%p sock_cookie=%llx", __entry->time, __entry->rtt_us, __entry->copied, __entry->inq, __entry->space, __entry->ooo_space, __entry->scaling_ratio, __entry->rcvbuf, + __entry->rcv_ssthresh, __entry->window_clamp, + __entry->rcv_wnd, show_family_name(__entry->family), __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, -- cgit v1.2.3 From b1e014a1f3275a6f3d0f2b30b8117447fc3915f5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Oct 2025 12:58:01 +0100 Subject: tcp: add newval parameter to tcp_rcvbuf_grow() This patch has no functional change, and prepares the following one. tcp_rcvbuf_grow() will need to have access to tp->rcvq_space.space old and new values. Change mptcp_rcvbuf_grow() in a similar way. Signed-off-by: Eric Dumazet [ Moved 'oldval' declaration to the next patch to avoid warnings at build time. ] Signed-off-by: Matthieu Baerts (NGI0) Reviewed-by: Neal Cardwell Link: https://patch.msgid.link/20251028-net-tcp-recv-autotune-v3-3-74b43ba4c84c@kernel.org Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 5ca230ed526a..ab20f549b8f9 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -370,7 +370,7 @@ void tcp_delack_timer_handler(struct sock *sk); int tcp_ioctl(struct sock *sk, int cmd, int *karg); enum skb_drop_reason tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb); void tcp_rcv_established(struct sock *sk, struct sk_buff *skb); -void tcp_rcvbuf_grow(struct sock *sk); +void tcp_rcvbuf_grow(struct sock *sk, u32 newval); void tcp_rcv_space_adjust(struct sock *sk); int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp); void tcp_twsk_destructor(struct sock *sk); -- cgit v1.2.3 From 34892cfec0c2d96787c4be7bda0d5f18d7dacf85 Mon Sep 17 00:00:00 2001 From: Shahar Shitrit Date: Sun, 26 Oct 2025 22:03:01 +0200 Subject: net: tls: Change async resync helpers argument Update tls_offload_rx_resync_async_request_start() and tls_offload_rx_resync_async_request_end() to get a struct tls_offload_resync_async parameter directly, rather than extracting it from struct sock. This change aligns the function signatures with the upcoming tls_offload_rx_resync_async_request_cancel() helper, which will be introduced in a subsequent patch. Signed-off-by: Shahar Shitrit Reviewed-by: Sabrina Dubroca Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1761508983-937977-2-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/tls.h | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/net/tls.h b/include/net/tls.h index 857340338b69..b90f3b675c3c 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -451,25 +451,20 @@ static inline void tls_offload_rx_resync_request(struct sock *sk, __be32 seq) /* Log all TLS record header TCP sequences in [seq, seq+len] */ static inline void -tls_offload_rx_resync_async_request_start(struct sock *sk, __be32 seq, u16 len) +tls_offload_rx_resync_async_request_start(struct tls_offload_resync_async *resync_async, + __be32 seq, u16 len) { - struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx); - - atomic64_set(&rx_ctx->resync_async->req, ((u64)ntohl(seq) << 32) | + atomic64_set(&resync_async->req, ((u64)ntohl(seq) << 32) | ((u64)len << 16) | RESYNC_REQ | RESYNC_REQ_ASYNC); - rx_ctx->resync_async->loglen = 0; - rx_ctx->resync_async->rcd_delta = 0; + resync_async->loglen = 0; + resync_async->rcd_delta = 0; } static inline void -tls_offload_rx_resync_async_request_end(struct sock *sk, __be32 seq) +tls_offload_rx_resync_async_request_end(struct tls_offload_resync_async *resync_async, + __be32 seq) { - struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx); - - atomic64_set(&rx_ctx->resync_async->req, - ((u64)ntohl(seq) << 32) | RESYNC_REQ); + atomic64_set(&resync_async->req, ((u64)ntohl(seq) << 32) | RESYNC_REQ); } static inline void -- cgit v1.2.3 From c15d5c62ab313c19121f10e25d4fec852bd1c40c Mon Sep 17 00:00:00 2001 From: Shahar Shitrit Date: Sun, 26 Oct 2025 22:03:02 +0200 Subject: net: tls: Cancel RX async resync request on rcd_delta overflow When a netdev issues a RX async resync request for a TLS connection, the TLS module handles it by logging record headers and attempting to match them to the tcp_sn provided by the device. If a match is found, the TLS module approves the tcp_sn for resynchronization. While waiting for a device response, the TLS module also increments rcd_delta each time a new TLS record is received, tracking the distance from the original resync request. However, if the device response is delayed or fails (e.g due to unstable connection and device getting out of tracking, hardware errors, resource exhaustion etc.), the TLS module keeps logging and incrementing, which can lead to a WARN() when rcd_delta exceeds the threshold. To address this, introduce tls_offload_rx_resync_async_request_cancel() to explicitly cancel resync requests when a device response failure is detected. Call this helper also as a final safeguard when rcd_delta crosses its threshold, as reaching this point implies that earlier cancellation did not occur. Signed-off-by: Shahar Shitrit Reviewed-by: Sabrina Dubroca Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1761508983-937977-3-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/tls.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/tls.h b/include/net/tls.h index b90f3b675c3c..c7bcdb3afad7 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -467,6 +467,12 @@ tls_offload_rx_resync_async_request_end(struct tls_offload_resync_async *resync_ atomic64_set(&resync_async->req, ((u64)ntohl(seq) << 32) | RESYNC_REQ); } +static inline void +tls_offload_rx_resync_async_request_cancel(struct tls_offload_resync_async *resync_async) +{ + atomic64_set(&resync_async->req, 0); +} + static inline void tls_offload_rx_resync_set_type(struct sock *sk, enum tls_offload_sync_type type) { -- cgit v1.2.3 From 39c89ee6e9c4464eb366f4e594379454a6c4db39 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sat, 25 Oct 2025 21:53:18 +0100 Subject: compiler_types: Introduce __nocfi_generic There are two different ways that LLVM can expand kCFI operand bundles in LLVM IR: generically in the middle end or using an architecture specific sequence when lowering LLVM IR to machine code in the backend. The generic pass allows any architecture to take advantage of kCFI but the expansion of these bundles in the middle end can mess with optimizations that may turn indirect calls into direct calls when the call target is known at compile time, such as after inlining. Add __nocfi_generic, dependent on an architecture selecting CONFIG_ARCH_USES_CFI_GENERIC_LLVM_PASS, to disable kCFI bundle generation in functions where only the generic kCFI pass may cause problems. Link: https://github.com/ClangBuiltLinux/linux/issues/2124 Signed-off-by: Nathan Chancellor Link: https://patch.msgid.link/20251025-idpf-fix-arm-kcfi-build-error-v1-1-ec57221153ae@kernel.org Signed-off-by: Kees Cook --- include/linux/compiler_types.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 59288a2c1ad2..1414be493738 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -461,6 +461,12 @@ struct ftrace_likely_data { # define __nocfi #endif +#if defined(CONFIG_ARCH_USES_CFI_GENERIC_LLVM_PASS) +# define __nocfi_generic __nocfi +#else +# define __nocfi_generic +#endif + /* * Any place that could be marked with the "alloc_size" attribute is also * a place to be marked with the "malloc" attribute, except those that may -- cgit v1.2.3 From c57f5fee54dfc83ee1d7f70f7beb9410b8466e9e Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sat, 25 Oct 2025 21:53:20 +0100 Subject: libeth: xdp: Disable generic kCFI pass for libeth_xdp_tx_xmit_bulk() When building drivers/net/ethernet/intel/idpf/xsk.c for ARCH=arm with CONFIG_CFI=y using a version of LLVM prior to 22.0.0, there is a BUILD_BUG_ON failure: $ cat arch/arm/configs/repro.config CONFIG_BPF_SYSCALL=y CONFIG_CFI=y CONFIG_IDPF=y CONFIG_XDP_SOCKETS=y $ make -skj"$(nproc)" ARCH=arm LLVM=1 clean defconfig repro.config drivers/net/ethernet/intel/idpf/xsk.o In file included from drivers/net/ethernet/intel/idpf/xsk.c:4: include/net/libeth/xsk.h:205:2: error: call to '__compiletime_assert_728' declared with 'error' attribute: BUILD_BUG_ON failed: !__builtin_constant_p(tmo == libeth_xsktmo) 205 | BUILD_BUG_ON(!__builtin_constant_p(tmo == libeth_xsktmo)); | ^ ... libeth_xdp_tx_xmit_bulk() indirectly calls libeth_xsk_xmit_fill_buf() but these functions are marked as __always_inline so that the compiler can turn these indirect calls into direct ones and see that the tmo parameter to __libeth_xsk_xmit_fill_buf_md() is ultimately libeth_xsktmo from idpf_xsk_xmit(). Unfortunately, the generic kCFI pass in LLVM expands the kCFI bundles from the indirect calls in libeth_xdp_tx_xmit_bulk() in such a way that later optimizations cannot turn these calls into direct ones, making the BUILD_BUG_ON fail because it cannot be proved at compile time that tmo is libeth_xsktmo. Disable the generic kCFI pass for libeth_xdp_tx_xmit_bulk() to ensure these indirect calls can always be turned into direct calls to avoid this error. Closes: https://github.com/ClangBuiltLinux/linux/issues/2124 Fixes: 9705d6552f58 ("idpf: implement Rx path for AF_XDP") Signed-off-by: Nathan Chancellor Reviewed-by: Aleksandr Loktionov Acked-by: Alexander Lobakin Link: https://patch.msgid.link/20251025-idpf-fix-arm-kcfi-build-error-v1-3-ec57221153ae@kernel.org Signed-off-by: Kees Cook --- include/net/libeth/xdp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/libeth/xdp.h b/include/net/libeth/xdp.h index bc3507edd589..898723ab62e8 100644 --- a/include/net/libeth/xdp.h +++ b/include/net/libeth/xdp.h @@ -513,7 +513,7 @@ struct libeth_xdp_tx_desc { * can't fail, but can send less frames if there's no enough free descriptors * available. The actual free space is returned by @prep from the driver. */ -static __always_inline u32 +static __always_inline __nocfi_generic u32 libeth_xdp_tx_xmit_bulk(const struct libeth_xdp_tx_frame *bulk, void *xdpsq, u32 n, bool unroll, u64 priv, u32 (*prep)(void *xdpsq, struct libeth_xdpsq *sq), -- cgit v1.2.3 From d34caa89a132cd69efc48361d4772251546fdb88 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 24 Oct 2025 11:59:16 +0300 Subject: scsi: ufs: core: Add a quirk to suppress link_startup_again ufshcd_link_startup() has a facility (link_startup_again) to issue DME_LINKSTARTUP a 2nd time even though the 1st time was successful. Some older hardware benefits from that, however the behaviour is non-standard, and has been found to cause link startup to be unreliable for some Intel Alder Lake based host controllers. Add UFSHCD_QUIRK_PERFORM_LINK_STARTUP_ONCE to suppress link_startup_again, in preparation for setting the quirk for affected controllers. Fixes: 7dc9fb47bc9a ("scsi: ufs: ufs-pci: Add support for Intel ADL") Cc: stable@vger.kernel.org Signed-off-by: Adrian Hunter Reviewed-by: Bart Van Assche Link: https://patch.msgid.link/20251024085918.31825-3-adrian.hunter@intel.com Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index 9425cfd9d00e..0f95576bf1f6 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -688,6 +688,13 @@ enum ufshcd_quirks { * single doorbell mode. */ UFSHCD_QUIRK_BROKEN_LSDBS_CAP = 1 << 25, + + /* + * This quirk indicates that DME_LINKSTARTUP should not be issued a 2nd + * time (refer link_startup_again) after the 1st time was successful, + * because it causes link startup to become unreliable. + */ + UFSHCD_QUIRK_PERFORM_LINK_STARTUP_ONCE = 1 << 26, }; enum ufshcd_caps { -- cgit v1.2.3 From 8d59fba49362c65332395789fd82771f1028d87e Mon Sep 17 00:00:00 2001 From: Ilia Gavrilov Date: Mon, 20 Oct 2025 15:12:55 +0000 Subject: Bluetooth: MGMT: Fix OOB access in parse_adv_monitor_pattern() In the parse_adv_monitor_pattern() function, the value of the 'length' variable is currently limited to HCI_MAX_EXT_AD_LENGTH(251). The size of the 'value' array in the mgmt_adv_pattern structure is 31. If the value of 'pattern[i].length' is set in the user space and exceeds 31, the 'patterns[i].value' array can be accessed out of bound when copied. Increasing the size of the 'value' array in the 'mgmt_adv_pattern' structure will break the userspace. Considering this, and to avoid OOB access revert the limits for 'offset' and 'length' back to the value of HCI_MAX_AD_LENGTH. Found by InfoTeCS on behalf of Linux Verification Center (linuxtesting.org) with SVACE. Fixes: db08722fc7d4 ("Bluetooth: hci_core: Fix missing instances using HCI_MAX_AD_LENGTH") Cc: stable@vger.kernel.org Signed-off-by: Ilia Gavrilov Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/mgmt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index bca0333f1e99..f5be96f08b9d 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -780,7 +780,7 @@ struct mgmt_adv_pattern { __u8 ad_type; __u8 offset; __u8 length; - __u8 value[31]; + __u8 value[HCI_MAX_AD_LENGTH]; } __packed; #define MGMT_OP_ADD_ADV_PATTERNS_MONITOR 0x0052 -- cgit v1.2.3 From 14a7f2392f42bbb71c1a5ea68930006221fcd80a Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 29 Oct 2025 11:36:46 -0700 Subject: bpf: Make migrate_disable always inline to avoid partial inlining The build fails with llvm 21/22: $ make LLVM=1 -j ... LD vmlinux.o GEN .vmlinux.objs ... BTF .tmp_vmlinux1.btf.o ... AS .tmp_vmlinux2.kallsyms.o LD vmlinux.unstripped BTFIDS vmlinux.unstripped WARN: resolve_btfids: unresolved symbol migrate_enable WARN: resolve_btfids: unresolved symbol migrate_disable make[2]: *** [vmlinux.unstripped] Error 255 make[2]: *** Deleting file 'vmlinux.unstripped' make[1]: *** [Makefile:1242: vmlinux] Error 2 make: *** [Makefile:248: __sub-make] Error 2 Two functions with identical names but different addresses are considered ambiguous and removed by "pahole" from vmlinux BTF. Later resolve_btfids warns since it cannot find them. Commit 378b7708194f ("sched: Make migrate_{en,dis}able() inline") made them inlineable in most places, but in vmlinux built with llvm 21 and 22 there are four symbols for migrate_{enable,disable}: three static functions and one global function. Fix the issue by marking migrate_{enable,disable} as always inline. The alternative is to mark them as notrace/nokprobe which is more drastic. Only bpf programs are prevented from attaching to these functions. The rest of the tracing shouldn't be affected. [note: Peter ok-ed the patch, Alexei rewrote commit log] Fixes: 378b7708194f ("sched: Make migrate_{en,dis}able() inline") Signed-off-by: Yonghong Song Acked-by: Menglong Dong Link: https://lore.kernel.org/r/20251029183646.3811774-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index cbb7340c5866..b469878de25c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2407,12 +2407,12 @@ static inline void __migrate_enable(void) { } * be defined in kernel/sched/core.c. */ #ifndef INSTANTIATE_EXPORTED_MIGRATE_DISABLE -static inline void migrate_disable(void) +static __always_inline void migrate_disable(void) { __migrate_disable(); } -static inline void migrate_enable(void) +static __always_inline void migrate_enable(void) { __migrate_enable(); } -- cgit v1.2.3 From 819630bd6f86ac8998c7df9deddb6cee50e9e22d Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 3 Nov 2025 13:53:13 +0000 Subject: io_uring/zcrx: remove sync refill uapi There is a better way to handle the problem IORING_REGISTER_ZCRX_REFILL solves. The uapi can also be slightly adjusted to accommodate future extensions. Remove the feature for now, it'll be reworked for the next release. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 263bed13473e..b7c8dad26690 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -689,9 +689,6 @@ enum io_uring_register_op { /* query various aspects of io_uring, see linux/io_uring/query.h */ IORING_REGISTER_QUERY = 35, - /* return zcrx buffers back into circulation */ - IORING_REGISTER_ZCRX_REFILL = 36, - /* this goes last */ IORING_REGISTER_LAST, @@ -1073,15 +1070,6 @@ struct io_uring_zcrx_ifq_reg { __u64 __resv[3]; }; -struct io_uring_zcrx_sync_refill { - __u32 zcrx_id; - /* the number of entries to return */ - __u32 nr_entries; - /* pointer to an array of struct io_uring_zcrx_rqe */ - __u64 rqes; - __u64 __resv[2]; -}; - #ifdef __cplusplus } #endif -- cgit v1.2.3 From c3838262b824c71c145cd3668722e99a69bc9cd9 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 31 Oct 2025 14:05:51 +0800 Subject: virtio_net: fix alignment for virtio_net_hdr_v1_hash Changing alignment of header would mean it's no longer safe to cast a 2 byte aligned pointer between formats. Use two 16 bit fields to make it 2 byte aligned as previously. This fixes the performance regression since commit ("virtio_net: enable gso over UDP tunnel support.") as it uses virtio_net_hdr_v1_hash_tunnel which embeds virtio_net_hdr_v1_hash. Pktgen in guest + XDP_DROP on TAP + vhost_net shows the TX PPS is recovered from 2.4Mpps to 4.45Mpps. Fixes: 56a06bd40fab ("virtio_net: enable gso over UDP tunnel support.") Cc: stable@vger.kernel.org Signed-off-by: Michael S. Tsirkin Signed-off-by: Jason Wang Tested-by: Lei Yang Link: https://patch.msgid.link/20251031060551.126-1-jasowang@redhat.com Signed-off-by: Jakub Kicinski --- include/linux/virtio_net.h | 3 ++- include/uapi/linux/virtio_net.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 4d1780848d0e..b673c31569f3 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -401,7 +401,8 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb, if (!tnl_hdr_negotiated) return -EINVAL; - vhdr->hash_hdr.hash_value = 0; + vhdr->hash_hdr.hash_value_lo = 0; + vhdr->hash_hdr.hash_value_hi = 0; vhdr->hash_hdr.hash_report = 0; vhdr->hash_hdr.padding = 0; diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index 8bf27ab8bcb4..1db45b01532b 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -193,7 +193,8 @@ struct virtio_net_hdr_v1 { struct virtio_net_hdr_v1_hash { struct virtio_net_hdr_v1 hdr; - __le32 hash_value; + __le16 hash_value_lo; + __le16 hash_value_hi; #define VIRTIO_NET_HASH_REPORT_NONE 0 #define VIRTIO_NET_HASH_REPORT_IPv4 1 #define VIRTIO_NET_HASH_REPORT_TCPv4 2 -- cgit v1.2.3 From a50f7456f853ec3a6f07cbe1d16ad8a8b2501320 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 30 Oct 2025 14:05:27 +0000 Subject: dma-mapping: Allow use of DMA_BIT_MASK(64) in global scope Clang doesn't like that (1ULL<<(64)) overflows when initializing a global scope variable, even if that part of the ternary isn't used when n = 64. The same initialization can be done without warnings in function scopes, and GCC doesn't mind either way. The build failure that highlighted this was already fixed in a different way [1], which also has detailed links to the Clang issues. However it's not going to be long before the same thing happens again, so it's better to fix the root cause. Fix it by using GENMASK_ULL() which does exactly the same thing, is much more readable anyway, and doesn't have a shift that overflows. [1]: https://lore.kernel.org/all/20250918-mmp-pdma-simplify-dma-addressing-v1-1-5c2be2b85696@riscstar.com/ Signed-off-by: James Clark Reviewed-by: Nathan Chancellor Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20251030-james-fix-dma_bit_mask-v1-1-ad1ce7cfab6e@linaro.org --- include/linux/dma-mapping.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 8248ff9363ee..2ceda49c609f 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -90,7 +90,7 @@ */ #define DMA_MAPPING_ERROR (~(dma_addr_t)0) -#define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) +#define DMA_BIT_MASK(n) GENMASK_ULL(n - 1, 0) struct dma_iova_state { dma_addr_t addr; -- cgit v1.2.3 From 1cf52a0d4ba079fb354fa1339f5fb34142228dae Mon Sep 17 00:00:00 2001 From: James Jones Date: Thu, 30 Oct 2025 11:11:52 -0700 Subject: drm: define NVIDIA DRM format modifiers for GB20x The layout of bits within the individual tiles (referred to as sectors in the DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D() macro) changed for 8 and 16-bit surfaces starting in Blackwell 2 GPUs (With the exception of GB10). To denote the difference, extend the sector field in the parametric format modifier definition used to generate modifier values for NVIDIA hardware. Without this change, it would be impossible to differentiate the two layouts based on modifiers, and as a result software could attempt to share surfaces directly between pre-GB20x and GB20x cards, resulting in corruption when the surface was accessed on one of the GPUs after being populated with content by the other. Of note: This change causes the DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D() macro to evaluate its "s" parameter twice, with the side effects that entails. I surveyed all usage of the modifier in the kernel and Mesa code, and that does not appear to be problematic in any current usage, but I thought it was worth calling out. Fixes: 6cc6e08d4542 ("drm/nouveau/kms: add support for GB20x") Signed-off-by: James Jones Reviewed-by: Faith Ekstrand Signed-off-by: Dave Airlie Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20251030181153.1208-2-jajones@nvidia.com --- include/uapi/drm/drm_fourcc.h | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index ea91aa8afde9..e527b24bd824 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -979,14 +979,20 @@ extern "C" { * 2 = Gob Height 8, Turing+ Page Kind mapping * 3 = Reserved for future use. * - * 22:22 s Sector layout. On Tegra GPUs prior to Xavier, there is a further - * bit remapping step that occurs at an even lower level than the - * page kind and block linear swizzles. This causes the layout of - * surfaces mapped in those SOC's GPUs to be incompatible with the - * equivalent mapping on other GPUs in the same system. - * - * 0 = Tegra K1 - Tegra Parker/TX2 Layout. - * 1 = Desktop GPU and Tegra Xavier+ Layout + * 22:22 s Sector layout. There is a further bit remapping step that occurs + * 26:27 at an even lower level than the page kind and block linear + * swizzles. This causes the bit arrangement of surfaces in memory + * to differ subtly, and prevents direct sharing of surfaces between + * GPUs with different layouts. + * + * 0 = Tegra K1 - Tegra Parker/TX2 Layout + * 1 = Pre-GB20x, GB20x 32+ bpp, GB10, Tegra Xavier-Orin Layout + * 2 = GB20x(Blackwell 2)+ 8 bpp surface layout + * 3 = GB20x(Blackwell 2)+ 16 bpp surface layout + * 4 = Reserved for future use. + * 5 = Reserved for future use. + * 6 = Reserved for future use. + * 7 = Reserved for future use. * * 25:23 c Lossless Framebuffer Compression type. * @@ -1001,7 +1007,7 @@ extern "C" { * 6 = Reserved for future use * 7 = Reserved for future use * - * 55:25 - Reserved for future use. Must be zero. + * 55:28 - Reserved for future use. Must be zero. */ #define DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(c, s, g, k, h) \ fourcc_mod_code(NVIDIA, (0x10 | \ @@ -1009,6 +1015,7 @@ extern "C" { (((k) & 0xff) << 12) | \ (((g) & 0x3) << 20) | \ (((s) & 0x1) << 22) | \ + (((s) & 0x6) << 25) | \ (((c) & 0x7) << 23))) /* To grandfather in prior block linear format modifiers to the above layout, -- cgit v1.2.3 From 20a0bc10272fa17a44fc857c31574a8306f60d20 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 4 Nov 2025 22:54:03 +0100 Subject: x86/fgraph,bpf: Fix stack ORC unwind from kprobe_multi return probe Currently we don't get stack trace via ORC unwinder on top of fgraph exit handler. We can see that when generating stacktrace from kretprobe_multi bpf program which is based on fprobe/fgraph. The reason is that the ORC unwind code won't get pass the return_to_handler callback installed by fgraph return probe machinery. Solving this by creating stack frame in return_to_handler expected by ftrace_graph_ret_addr function to recover original return address and continue with the unwind. Also updating the pt_regs data with cs/flags/rsp which are needed for successful stack retrieval from ebpf bpf_get_stackid helper. - in get_perf_callchain we check user_mode(regs) so CS has to be set - in perf_callchain_kernel we call perf_hw_regs(regs), so EFLAGS/FIXED has to be unset Acked-by: Masami Hiramatsu (Google) Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20251104215405.168643-3-jolsa@kernel.org Signed-off-by: Alexei Starovoitov Acked-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 7ded7df6e9b5..07f8c309e432 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -193,6 +193,10 @@ static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs #if !defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS) || \ defined(CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS) +#ifndef arch_ftrace_partial_regs +#define arch_ftrace_partial_regs(regs) do {} while (0) +#endif + static __always_inline struct pt_regs * ftrace_partial_regs(struct ftrace_regs *fregs, struct pt_regs *regs) { @@ -202,7 +206,11 @@ ftrace_partial_regs(struct ftrace_regs *fregs, struct pt_regs *regs) * Since arch_ftrace_get_regs() will check some members and may return * NULL, we can not use it. */ - return &arch_ftrace_regs(fregs)->regs; + regs = &arch_ftrace_regs(fregs)->regs; + + /* Allow arch specific updates to regs. */ + arch_ftrace_partial_regs(regs); + return regs; } #endif /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS || CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS */ -- cgit v1.2.3 From b1d16f7c0063b7209fd3251ce40c77d37b477b83 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Tue, 4 Nov 2025 09:23:31 -0800 Subject: libie: depend on DEBUG_FS when building LIBIE_FWLOG LIBIE_FWLOG is unusable without DEBUG_FS. Mark it in Kconfig. Fix build error on ixgbe when DEBUG_FS is not set. To not add another layer of #if IS_ENABLED(LIBIE_FWLOG) in ixgbe fwlog code define debugfs dentry even when DEBUG_FS isn't enabled. In this case the dummy functions of LIBIE_FWLOG will be used, so not initialized dentry isn't a problem. Fixes: 641585bc978e ("ixgbe: fwlog support for e610") Reported-by: Guenter Roeck Closes: https://lore.kernel.org/lkml/f594c621-f9e1-49f2-af31-23fbcb176058@roeck-us.net/ Signed-off-by: Michal Swiatkowski Reviewed-by: Simon Horman Reviewed-by: Aleksandr Loktionov Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20251104172333.752445-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- include/linux/net/intel/libie/fwlog.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/net/intel/libie/fwlog.h b/include/linux/net/intel/libie/fwlog.h index 36b13fabca9e..7273c78c826b 100644 --- a/include/linux/net/intel/libie/fwlog.h +++ b/include/linux/net/intel/libie/fwlog.h @@ -78,8 +78,20 @@ struct libie_fwlog { ); }; +#if IS_ENABLED(CONFIG_LIBIE_FWLOG) int libie_fwlog_init(struct libie_fwlog *fwlog, struct libie_fwlog_api *api); void libie_fwlog_deinit(struct libie_fwlog *fwlog); void libie_fwlog_reregister(struct libie_fwlog *fwlog); void libie_get_fwlog_data(struct libie_fwlog *fwlog, u8 *buf, u16 len); +#else +static inline int libie_fwlog_init(struct libie_fwlog *fwlog, + struct libie_fwlog_api *api) +{ + return -EOPNOTSUPP; +} +static inline void libie_fwlog_deinit(struct libie_fwlog *fwlog) { } +static inline void libie_fwlog_reregister(struct libie_fwlog *fwlog) { } +static inline void libie_get_fwlog_data(struct libie_fwlog *fwlog, u8 *buf, + u16 len) { } +#endif /* CONFIG_LIBIE_FWLOG */ #endif /* _LIBIE_FWLOG_H_ */ -- cgit v1.2.3 From 9818af18db4bfefd320d0fef41390a616365e6f7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 6 Nov 2025 11:50:00 +0100 Subject: compiler_types: Move unused static inline functions warning to W=2 Per Nathan, clang catches unused "static inline" functions in C files since commit 6863f5643dd7 ("kbuild: allow Clang to find unused static inline functions for W=1 build"). Linus said: > So I entirely ignore W=1 issues, because I think so many of the extra > warnings are bogus. > > But if this one in particular is causing more problems than most - > some teams do seem to use W=1 as part of their test builds - it's fine > to send me a patch that just moves bad warnings to W=2. > > And if anybody uses W=2 for their test builds, that's THEIR problem.. Here is the change to bump the warning from W=1 to W=2. Fixes: 6863f5643dd7 ("kbuild: allow Clang to find unused static inline functions for W=1 build") Signed-off-by: Peter Zijlstra Signed-off-by: Andy Shevchenko Link: https://patch.msgid.link/20251106105000.2103276-1-andriy.shevchenko@linux.intel.com [nathan: Adjust comment as well] Signed-off-by: Nathan Chancellor --- include/linux/compiler_types.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 59288a2c1ad2..339603f05b54 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -250,10 +250,9 @@ struct ftrace_likely_data { /* * GCC does not warn about unused static inline functions for -Wunused-function. * Suppress the warning in clang as well by using __maybe_unused, but enable it - * for W=1 build. This will allow clang to find unused functions. Remove the - * __inline_maybe_unused entirely after fixing most of -Wunused-function warnings. + * for W=2 build. This will allow clang to find unused functions. */ -#ifdef KBUILD_EXTRA_WARN1 +#ifdef KBUILD_EXTRA_WARN2 #define __inline_maybe_unused #else #define __inline_maybe_unused __maybe_unused -- cgit v1.2.3 From 002621a4df3c166fab1427e8e502bc15acc26b13 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Fri, 7 Nov 2025 19:29:33 +0100 Subject: kbuild: Let kernel-doc.py use PYTHON3 override It is possible to force a specific version of python to be used when building the kernel by passing PYTHON3= on the make command line. However kernel-doc.py is currently called with python3 hard-coded and thus ignores this setting. Use $(PYTHON3) to run $(KERNELDOC) so that the desired version of python is used. Signed-off-by: Jean Delvare Reviewed-by: Nicolas Schier Reviewed-by: Mauro Carvalho Chehab Link: https://patch.msgid.link/20251107192933.2bfe9e57@endymion Signed-off-by: Nathan Chancellor --- include/drm/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/Makefile b/include/drm/Makefile index 1df6962556ef..48fae3f167c7 100644 --- a/include/drm/Makefile +++ b/include/drm/Makefile @@ -11,7 +11,7 @@ always-$(CONFIG_DRM_HEADER_TEST) += \ quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@) cmd_hdrtest = \ $(CC) $(c_flags) -fsyntax-only -x c /dev/null -include $< -include $<; \ - PYTHONDONTWRITEBYTECODE=1 $(KERNELDOC) -none $(if $(CONFIG_WERROR)$(CONFIG_DRM_WERROR),-Werror) $<; \ + PYTHONDONTWRITEBYTECODE=1 $(PYTHON3) $(KERNELDOC) -none $(if $(CONFIG_WERROR)$(CONFIG_DRM_WERROR),-Werror) $<; \ touch $@ $(obj)/%.hdrtest: $(src)/%.h FORCE -- cgit v1.2.3 From 77008e1b2ef73249bceb078a321a3ff6bc087afb Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Thu, 16 Oct 2025 21:36:30 -0400 Subject: mm/huge_memory: do not change split_huge_page*() target order silently Page cache folios from a file system that support large block size (LBS) can have minimal folio order greater than 0, thus a high order folio might not be able to be split down to order-0. Commit e220917fa507 ("mm: split a folio in minimum folio order chunks") bumps the target order of split_huge_page*() to the minimum allowed order when splitting a LBS folio. This causes confusion for some split_huge_page*() callers like memory failure handling code, since they expect after-split folios all have order-0 when split succeeds but in reality get min_order_for_split() order folios and give warnings. Fix it by failing a split if the folio cannot be split to the target order. Rename try_folio_split() to try_folio_split_to_order() to reflect the added new_order parameter. Remove its unused list parameter. [The test poisons LBS folios, which cannot be split to order-0 folios, and also tries to poison all memory. The non split LBS folios take more memory than the test anticipated, leading to OOM. The patch fixed the kernel warning and the test needs some change to avoid OOM.] Link: https://lkml.kernel.org/r/20251017013630.139907-1-ziy@nvidia.com Fixes: e220917fa507 ("mm: split a folio in minimum folio order chunks") Signed-off-by: Zi Yan Reported-by: syzbot+e6367ea2fdab6ed46056@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/68d2c943.a70a0220.1b52b.02b3.GAE@google.com/ Reviewed-by: Luis Chamberlain Reviewed-by: Pankaj Raghav Reviewed-by: Wei Yang Acked-by: David Hildenbrand Reviewed-by: Lorenzo Stoakes Reviewed-by: Miaohe Lin Cc: Baolin Wang Cc: Barry Song Cc: David Hildenbrand Cc: Dev Jain Cc: Jane Chu Cc: Lance Yang Cc: Liam Howlett Cc: Mariano Pache Cc: Matthew Wilcox (Oracle) Cc: Naoya Horiguchi Cc: Ryan Roberts Cc: Christian Brauner Cc: Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 55 +++++++++++++++++++++---------------------------- 1 file changed, 23 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index f327d62fc985..71ac78b9f834 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -376,45 +376,30 @@ bool non_uniform_split_supported(struct folio *folio, unsigned int new_order, int folio_split(struct folio *folio, unsigned int new_order, struct page *page, struct list_head *list); /* - * try_folio_split - try to split a @folio at @page using non uniform split. + * try_folio_split_to_order - try to split a @folio at @page to @new_order using + * non uniform split. * @folio: folio to be split - * @page: split to order-0 at the given page - * @list: store the after-split folios + * @page: split to @new_order at the given page + * @new_order: the target split order * - * Try to split a @folio at @page using non uniform split to order-0, if - * non uniform split is not supported, fall back to uniform split. + * Try to split a @folio at @page using non uniform split to @new_order, if + * non uniform split is not supported, fall back to uniform split. After-split + * folios are put back to LRU list. Use min_order_for_split() to get the lower + * bound of @new_order. * * Return: 0: split is successful, otherwise split failed. */ -static inline int try_folio_split(struct folio *folio, struct page *page, - struct list_head *list) +static inline int try_folio_split_to_order(struct folio *folio, + struct page *page, unsigned int new_order) { - int ret = min_order_for_split(folio); - - if (ret < 0) - return ret; - - if (!non_uniform_split_supported(folio, 0, false)) - return split_huge_page_to_list_to_order(&folio->page, list, - ret); - return folio_split(folio, ret, page, list); + if (!non_uniform_split_supported(folio, new_order, /* warns= */ false)) + return split_huge_page_to_list_to_order(&folio->page, NULL, + new_order); + return folio_split(folio, new_order, page, NULL); } static inline int split_huge_page(struct page *page) { - struct folio *folio = page_folio(page); - int ret = min_order_for_split(folio); - - if (ret < 0) - return ret; - - /* - * split_huge_page() locks the page before splitting and - * expects the same page that has been split to be locked when - * returned. split_folio(page_folio(page)) cannot be used here - * because it converts the page to folio and passes the head - * page to be split. - */ - return split_huge_page_to_list_to_order(page, NULL, ret); + return split_huge_page_to_list_to_order(page, NULL, 0); } void deferred_split_folio(struct folio *folio, bool partially_mapped); @@ -597,14 +582,20 @@ static inline int split_huge_page(struct page *page) return -EINVAL; } +static inline int min_order_for_split(struct folio *folio) +{ + VM_WARN_ON_ONCE_FOLIO(1, folio); + return -EINVAL; +} + static inline int split_folio_to_list(struct folio *folio, struct list_head *list) { VM_WARN_ON_ONCE_FOLIO(1, folio); return -EINVAL; } -static inline int try_folio_split(struct folio *folio, struct page *page, - struct list_head *list) +static inline int try_folio_split_to_order(struct folio *folio, + struct page *page, unsigned int new_order) { VM_WARN_ON_ONCE_FOLIO(1, folio); return -EINVAL; -- cgit v1.2.3 From fa759cd75bce5489eed34596daa53f721849a86f Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Mon, 20 Oct 2025 20:08:52 -0400 Subject: kho: allocate metadata directly from the buddy allocator KHO allocates metadata for its preserved memory map using the slab allocator via kzalloc(). This metadata is temporary and is used by the next kernel during early boot to find preserved memory. A problem arises when KFENCE is enabled. kzalloc() calls can be randomly intercepted by kfence_alloc(), which services the allocation from a dedicated KFENCE memory pool. This pool is allocated early in boot via memblock. When booting via KHO, the memblock allocator is restricted to a "scratch area", forcing the KFENCE pool to be allocated within it. This creates a conflict, as the scratch area is expected to be ephemeral and overwriteable by a subsequent kexec. If KHO metadata is placed in this KFENCE pool, it leads to memory corruption when the next kernel is loaded. To fix this, modify KHO to allocate its metadata directly from the buddy allocator instead of slab. Link: https://lkml.kernel.org/r/20251021000852.2924827-4-pasha.tatashin@soleen.com Fixes: fc33e4b44b27 ("kexec: enable KHO support for memory preservation") Signed-off-by: Pasha Tatashin Reviewed-by: Pratyush Yadav Reviewed-by: Mike Rapoport (Microsoft) Reviewed-by: David Matlack Cc: Alexander Graf Cc: Christian Brauner Cc: Jason Gunthorpe Cc: Jonathan Corbet Cc: Masahiro Yamada Cc: Miguel Ojeda Cc: Randy Dunlap Cc: Samiullah Khawaja Cc: Tejun Heo Cc: Signed-off-by: Andrew Morton --- include/linux/gfp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 0ceb4e09306c..623bee335383 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -7,6 +7,7 @@ #include #include #include +#include #include struct vm_area_struct; @@ -463,4 +464,6 @@ static inline struct folio *folio_alloc_gigantic_noprof(int order, gfp_t gfp, /* This should be paired with folio_put() rather than free_contig_range(). */ #define folio_alloc_gigantic(...) alloc_hooks(folio_alloc_gigantic_noprof(__VA_ARGS__)) +DEFINE_FREE(free_page, void *, free_page((unsigned long)_T)) + #endif /* __LINUX_GFP_H */ -- cgit v1.2.3 From 6a77267d97b5b6cd0e35099ab4eb054e5f965ee6 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 10 Nov 2025 13:03:53 +0000 Subject: io_uring/query: return number of available queries It's useful to know which query opcodes are available. Extend the structure and return that. It's a trivial change, and even though it can be painlessly extended later, it'd still require adding a v2 of the structure. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring/query.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/io_uring/query.h b/include/uapi/linux/io_uring/query.h index 5d754322a27c..3539ccbfd064 100644 --- a/include/uapi/linux/io_uring/query.h +++ b/include/uapi/linux/io_uring/query.h @@ -36,6 +36,9 @@ struct io_uring_query_opcode { __u64 enter_flags; /* Bitmask of all supported IOSQE_* flags */ __u64 sqe_flags; + /* The number of available query opcodes */ + __u32 nr_query_opcodes; + __u32 __pad; }; #endif -- cgit v1.2.3 From 485e0626e58768f3c53ba61ab9e09d6b60a455f4 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 6 Nov 2025 13:05:35 -0500 Subject: Bluetooth: hci_event: Fix not handling PA Sync Lost event This handles PA Sync Lost event which previously was assumed to be handled with BIG Sync Lost but their lifetime are not the same thus why there are 2 different events to inform when each sync is lost. Fixes: b2a5f2e1c127 ("Bluetooth: hci_event: Add support for handling LE BIG Sync Lost event") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 8d0e703bc929..cb4c02d00759 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -2783,6 +2783,11 @@ struct hci_ev_le_per_adv_report { __u8 data[]; } __packed; +#define HCI_EV_LE_PA_SYNC_LOST 0x10 +struct hci_ev_le_pa_sync_lost { + __le16 handle; +} __packed; + #define LE_PA_DATA_COMPLETE 0x00 #define LE_PA_DATA_MORE_TO_COME 0x01 #define LE_PA_DATA_TRUNCATED 0x02 -- cgit v1.2.3 From e5eba42f01340f73888dfe560be2806057c25913 Mon Sep 17 00:00:00 2001 From: Akiva Goldberger Date: Sun, 9 Nov 2025 11:49:03 +0200 Subject: mlx5: Fix default values in create CQ Currently, CQs without a completion function are assigned the mlx5_add_cq_to_tasklet function by default. This is problematic since only user CQs created through the mlx5_ib driver are intended to use this function. Additionally, all CQs that will use doorbells instead of polling for completions must call mlx5_cq_arm. However, the default CQ creation flow leaves a valid value in the CQ's arm_db field, allowing FW to send interrupts to polling-only CQs in certain corner cases. These two factors would allow a polling-only kernel CQ to be triggered by an EQ interrupt and call a completion function intended only for user CQs, causing a null pointer exception. Some areas in the driver have prevented this issue with one-off fixes but did not address the root cause. This patch fixes the described issue by adding defaults to the create CQ flow. It adds a default dummy completion function to protect against null pointer exceptions, and it sets an invalid command sequence number by default in kernel CQs to prevent the FW from sending an interrupt to the CQ until it is armed. User CQs are responsible for their own initialization values. Callers of mlx5_core_create_cq are responsible for changing the completion function and arming the CQ per their needs. Fixes: cdd04f4d4d71 ("net/mlx5: Add support to create SQ and CQ for ASO") Signed-off-by: Akiva Goldberger Reviewed-by: Moshe Shemesh Signed-off-by: Tariq Toukan Acked-by: Leon Romanovsky Link: https://patch.msgid.link/1762681743-1084694-1-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- include/linux/mlx5/cq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 7ef2c7c7d803..9d47cdc727ad 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -183,6 +183,7 @@ static inline void mlx5_cq_put(struct mlx5_core_cq *cq) complete(&cq->free); } +void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq, struct mlx5_eqe *eqe); int mlx5_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 *in, int inlen, u32 *out, int outlen); int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, -- cgit v1.2.3 From bb8336a5163a5839476f27ed1ad69df4a19e13ca Mon Sep 17 00:00:00 2001 From: Kriish Sharma Date: Mon, 10 Nov 2025 18:25:45 +0000 Subject: ethtool: fix incorrect kernel-doc style comment in ethtool.h Building documentation produced the following warning: WARNING: ./include/linux/ethtool.h:495 This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * IEEE 802.3ck/df defines 16 bins for FEC histogram plus one more for This comment was not intended to be parsed as kernel-doc, so replace the '/**' with '/*' to silence the warning and align with normal comment style in header files. No functional changes. Signed-off-by: Kriish Sharma Link: https://patch.msgid.link/20251110182545.2112596-1-kriish.sharma2006@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index c2d8b4ec62eb..5c9162193d26 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -492,7 +492,7 @@ struct ethtool_pause_stats { }; #define ETHTOOL_MAX_LANES 8 -/** +/* * IEEE 802.3ck/df defines 16 bins for FEC histogram plus one more for * the end-of-list marker, total 17 items */ -- cgit v1.2.3 From 4495bffd86ba0fdabfaef0c41d12f68ec2a1e05b Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 10 Nov 2025 16:22:25 -0600 Subject: PCI/ASPM: Cache L0s/L1 Supported so advertised link states can be overridden Defective devices sometimes advertise support for ASPM L0s or L1 states even if they don't work correctly. Cache the L0s Supported and L1 Supported bits early in enumeration so HEADER quirks can override the ASPM states advertised in Link Capabilities before pcie_aspm_cap_init() enables ASPM. Signed-off-by: Bjorn Helgaas Tested-by: Shawn Lin Reviewed-by: Lukas Wunner Link: https://patch.msgid.link/20251110222929.2140564-2-helgaas@kernel.org --- include/linux/pci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index d1fdf81fbe1e..bf97d49c23cf 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -412,6 +412,8 @@ struct pci_dev { u16 l1ss; /* L1SS Capability pointer */ #ifdef CONFIG_PCIEASPM struct pcie_link_state *link_state; /* ASPM link state */ + unsigned int aspm_l0s_support:1; /* ASPM L0s support */ + unsigned int aspm_l1_support:1; /* ASPM L1 support */ unsigned int ltr_path:1; /* Latency Tolerance Reporting supported from root to here */ #endif -- cgit v1.2.3 From ebd4469e7af61019daaf904fdcba07a9ecd18440 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Wed, 5 Nov 2025 14:40:32 +1100 Subject: entry: Fix ifndef around arch_xfer_to_guest_mode_handle_work() stub The stub implementation of arch_xfer_to_guest_mode_handle_work() is guarded by an #ifndef that incorrectly checks for the name arch_xfer_to_guest_mode_work instead. It seems the function was renamed to add "_handle" as a late change to the original patch, and the #ifndef wasn't updated to go with it. Change the #ifndef to match the name of the function. No users right now, so no need to update any architecture code. Fixes: 935ace2fb5cc4 ("entry: Provide infrastructure for work before transitioning to guest mode") Signed-off-by: Andrew Donnellan Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20251105-entry-fix-ifndef-v1-1-d8d28045b627@linux.ibm.com --- include/linux/entry-virt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/entry-virt.h b/include/linux/entry-virt.h index 42c89e3e5ca7..bfa767702d9a 100644 --- a/include/linux/entry-virt.h +++ b/include/linux/entry-virt.h @@ -32,7 +32,7 @@ */ static inline int arch_xfer_to_guest_mode_handle_work(unsigned long ti_work); -#ifndef arch_xfer_to_guest_mode_work +#ifndef arch_xfer_to_guest_mode_handle_work static inline int arch_xfer_to_guest_mode_handle_work(unsigned long ti_work) { return 0; -- cgit v1.2.3 From 4ef92743625818932b9c320152b58274c05e5053 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 12 Nov 2025 12:55:16 +0000 Subject: bpf: Add bpf_prog_run_data_pointers() syzbot found that cls_bpf_classify() is able to change tc_skb_cb(skb)->drop_reason triggering a warning in sk_skb_reason_drop(). WARNING: CPU: 0 PID: 5965 at net/core/skbuff.c:1192 __sk_skb_reason_drop net/core/skbuff.c:1189 [inline] WARNING: CPU: 0 PID: 5965 at net/core/skbuff.c:1192 sk_skb_reason_drop+0x76/0x170 net/core/skbuff.c:1214 struct tc_skb_cb has been added in commit ec624fe740b4 ("net/sched: Extend qdisc control block with tc control block"), which added a wrong interaction with db58ba459202 ("bpf: wire in data and data_end for cls_act_bpf"). drop_reason was added later. Add bpf_prog_run_data_pointers() helper to save/restore the net_sched storage colliding with BPF data_meta/data_end. Fixes: ec624fe740b4 ("net/sched: Extend qdisc control block with tc control block") Reported-by: syzbot Closes: https://lore.kernel.org/netdev/6913437c.a70a0220.22f260.013b.GAE@google.com/ Signed-off-by: Eric Dumazet Signed-off-by: Martin KaFai Lau Reviewed-by: Victor Nogueira Acked-by: Jamal Hadi Salim Link: https://patch.msgid.link/20251112125516.1563021-1-edumazet@google.com --- include/linux/filter.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index f5c859b8131a..973233b82dc1 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -901,6 +901,26 @@ static inline void bpf_compute_data_pointers(struct sk_buff *skb) cb->data_end = skb->data + skb_headlen(skb); } +static inline int bpf_prog_run_data_pointers( + const struct bpf_prog *prog, + struct sk_buff *skb) +{ + struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb; + void *save_data_meta, *save_data_end; + int res; + + save_data_meta = cb->data_meta; + save_data_end = cb->data_end; + + bpf_compute_data_pointers(skb); + res = bpf_prog_run(prog, skb); + + cb->data_meta = save_data_meta; + cb->data_end = save_data_end; + + return res; +} + /* Similar to bpf_compute_data_pointers(), except that save orginal * data in cb->data and cb->meta_data for restore. */ -- cgit v1.2.3 From 39231e8d6ba7f794b566fd91ebd88c0834a23b98 Mon Sep 17 00:00:00 2001 From: "David Hildenbrand (Red Hat)" Date: Fri, 14 Nov 2025 22:49:20 +0100 Subject: mm: fix MAX_FOLIO_ORDER on powerpc configs with hugetlb In the past, CONFIG_ARCH_HAS_GIGANTIC_PAGE indicated that we support runtime allocation of gigantic hugetlb folios. In the meantime it evolved into a generic way for the architecture to state that it supports gigantic hugetlb folios. In commit fae7d834c43c ("mm: add __dump_folio()") we started using CONFIG_ARCH_HAS_GIGANTIC_PAGE to decide MAX_FOLIO_ORDER: whether we could have folios larger than what the buddy can handle. In the context of that commit, we started using MAX_FOLIO_ORDER to detect page corruptions when dumping tail pages of folios. Before that commit, we assumed that we cannot have folios larger than the highest buddy order, which was obviously wrong. In commit 7b4f21f5e038 ("mm/hugetlb: check for unreasonable folio sizes when registering hstate"), we used MAX_FOLIO_ORDER to detect inconsistencies, and in fact, we found some now. Powerpc allows for configs that can allocate gigantic folio during boot (not at runtime), that do not set CONFIG_ARCH_HAS_GIGANTIC_PAGE and can exceed PUD_ORDER. To fix it, let's make powerpc select CONFIG_ARCH_HAS_GIGANTIC_PAGE with hugetlb on powerpc, and increase the maximum folio size with hugetlb to 16 GiB on 64bit (possible on arm64 and powerpc) and 1 GiB on 32 bit (powerpc). Note that on some powerpc configurations, whether we actually have gigantic pages depends on the setting of CONFIG_ARCH_FORCE_MAX_ORDER, but there is nothing really problematic about setting it unconditionally: we just try to keep the value small so we can better detect problems in __dump_folio() and inconsistencies around the expected largest folio in the system. Ideally, we'd have a better way to obtain the maximum hugetlb folio size and detect ourselves whether we really end up with gigantic folios. Let's defer bigger changes and fix the warnings first. While at it, handle gigantic DAX folios more clearly: DAX can only end up creating gigantic folios with HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD. Add a new Kconfig option HAVE_GIGANTIC_FOLIOS to make both cases clearer. In particular, worry about ARCH_HAS_GIGANTIC_PAGE only with HUGETLB_PAGE. Note: with enabling CONFIG_ARCH_HAS_GIGANTIC_PAGE on powerpc, we will now also allow for runtime allocations of folios in some more powerpc configs. I don't think this is a problem, but if it is we could handle it through __HAVE_ARCH_GIGANTIC_PAGE_RUNTIME_SUPPORTED. While __dump_page()/__dump_folio was also problematic (not handling dumping of tail pages of such gigantic folios correctly), it doesn't seem critical enough to mark it as a fix. Link: https://lkml.kernel.org/r/20251114214920.2550676-1-david@kernel.org Fixes: 7b4f21f5e038 ("mm/hugetlb: check for unreasonable folio sizes when registering hstate") Reported-by: Christophe Leroy Closes: https://lore.kernel.org/r/3e043453-3f27-48ad-b987-cc39f523060a@csgroup.eu/ Reported-by: Sourabh Jain Closes: https://lore.kernel.org/r/94377f5c-d4f0-4c0f-b0f6-5bf1cd7305b1@linux.ibm.com/ Signed-off-by: David Hildenbrand (Red Hat) Cc: Ritesh Harjani (IBM) Cc: Madhavan Srinivasan Cc: Donet Tom Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Christophe Leroy Cc: Lorenzo Stoakes Cc: "Liam R. Howlett" Cc: Vlastimil Babka Cc: Mike Rapoport Cc: Suren Baghdasaryan Cc: Michal Hocko Cc: Nathan Chancellor Signed-off-by: Andrew Morton --- include/linux/mm.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index d16b33bacc32..7c79b3369b82 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2074,7 +2074,7 @@ static inline unsigned long folio_nr_pages(const struct folio *folio) return folio_large_nr_pages(folio); } -#if !defined(CONFIG_ARCH_HAS_GIGANTIC_PAGE) +#if !defined(CONFIG_HAVE_GIGANTIC_FOLIOS) /* * We don't expect any folios that exceed buddy sizes (and consequently * memory sections). @@ -2087,10 +2087,17 @@ static inline unsigned long folio_nr_pages(const struct folio *folio) * pages are guaranteed to be contiguous. */ #define MAX_FOLIO_ORDER PFN_SECTION_SHIFT -#else +#elif defined(CONFIG_HUGETLB_PAGE) /* * There is no real limit on the folio size. We limit them to the maximum we - * currently expect (e.g., hugetlb, dax). + * currently expect (see CONFIG_HAVE_GIGANTIC_FOLIOS): with hugetlb, we expect + * no folios larger than 16 GiB on 64bit and 1 GiB on 32bit. + */ +#define MAX_FOLIO_ORDER get_order(IS_ENABLED(CONFIG_64BIT) ? SZ_16G : SZ_1G) +#else +/* + * Without hugetlb, gigantic folios that are bigger than a single PUD are + * currently impossible. */ #define MAX_FOLIO_ORDER PUD_ORDER #endif -- cgit v1.2.3