From b80961a86b40372b7cfb3065439377f7e7550e59 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 26 Nov 2025 10:59:50 -0800 Subject: drm/xe/uapi: Add DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE which accepts a user pointer to populate the exec queue state so that a GPU hang can be replayed via a Mesa tool. v2: Update the value for HANG_REPLAY_STATE flag Cc: José Roberto de Souza Signed-off-by: Matthew Brost Signed-off-by: Carlos Santa Reviewed-by: Jonathan Cavitt Acked-by: José Roberto de Souza Acked-by: Rodrigo Vivi Link: https://patch.msgid.link/20251126185952.546277-8-matthew.brost@intel.com --- include/uapi/drm/xe_drm.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 47853659a705..37881b1eb6ba 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -210,8 +210,12 @@ struct drm_xe_ext_set_property { /** @pad: MBZ */ __u32 pad; - /** @value: property value */ - __u64 value; + union { + /** @value: property value */ + __u64 value; + /** @ptr: pointer to user value */ + __u64 ptr; + }; /** @reserved: Reserved */ __u64 reserved[2]; @@ -1292,6 +1296,7 @@ struct drm_xe_exec_queue_create { #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY 0 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE 1 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE 2 +#define DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE 3 /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; -- cgit v1.2.3 From 78d91ba6bd7968d4750dad57c62bf5225ddcb388 Mon Sep 17 00:00:00 2001 From: Sanjay Yadav Date: Thu, 4 Dec 2025 09:34:03 +0530 Subject: drm/xe/uapi: Add NO_COMPRESSION BO flag and query capability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION to let userspace opt out of CCS compression on a per-BO basis. When set, the driver maps this to XE_BO_FLAG_NO_COMPRESSION, skips CCS metadata allocation/clearing, and rejects compressed PAT indices at vm_bind. This avoids extra memory ops and manual CCS state handling for buffers. To allow userspace to detect at runtime whether the kernel supports this feature, add DRM_XE_QUERY_CONFIG_FLAG_HAS_NO_COMPRESSION_HINT and expose it via query_config() on Xe2+ platforms. Mesa PR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38425 IGT PR: https://patchwork.freedesktop.org/patch/685180/ v2 - Changed error code from -EINVAL to -EOPNOTSUPP for unsupported flag usage on pre-Xe2 platforms - Fixed checkpatch warning in xe_vm.c - Fixed kernel-doc formatting in xe_drm.h v3 - Rebase - Updated commit title and description - Added UAPI for DRM_XE_QUERY_CONFIG_FLAG_HAS_NO_COMPRESSION_HINT and exposed it via query_config() v4 - Rebase v5 - Included Mesa PR and IGT PR in the commit description - Used xe_pat_index_get_comp_en() to extract the compression v6 - Added XE_IOCTL_DBG() checks for argument validation Suggested-by: Matthew Auld Suggested-by: José Roberto de Souza Acked-by: José Roberto de Souza Reviewed-by: Matthew Auld Signed-off-by: Sanjay Yadav Signed-off-by: Matthew Auld Link: https://patch.msgid.link/20251204040402.2692921-2-sanjay.kumar.yadav@intel.com --- include/uapi/drm/xe_drm.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 37881b1eb6ba..0d99bb0cd20a 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -407,6 +407,9 @@ struct drm_xe_query_mem_regions { * has low latency hint support * - %DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR - Flag is set if the * device has CPU address mirroring support + * - %DRM_XE_QUERY_CONFIG_FLAG_HAS_NO_COMPRESSION_HINT - Flag is set if the + * device supports the userspace hint %DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION. + * This is exposed only on Xe2+. * - %DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT - Minimal memory alignment * required by this device, typically SZ_4K or SZ_64K * - %DRM_XE_QUERY_CONFIG_VA_BITS - Maximum bits of a virtual address @@ -425,6 +428,7 @@ struct drm_xe_query_config { #define DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM (1 << 0) #define DRM_XE_QUERY_CONFIG_FLAG_HAS_LOW_LATENCY (1 << 1) #define DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR (1 << 2) + #define DRM_XE_QUERY_CONFIG_FLAG_HAS_NO_COMPRESSION_HINT (1 << 3) #define DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT 2 #define DRM_XE_QUERY_CONFIG_VA_BITS 3 #define DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY 4 @@ -795,6 +799,17 @@ struct drm_xe_device_query { * need to use VRAM for display surfaces, therefore the kernel requires * setting this flag for such objects, otherwise an error is thrown on * small-bar systems. + * - %DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION - Allows userspace to + * hint that compression (CCS) should be disabled for the buffer being + * created. This can avoid unnecessary memory operations and CCS state + * management. + * On pre-Xe2 platforms, this flag is currently rejected as compression + * control is not supported via PAT index. On Xe2+ platforms, compression + * is controlled via PAT entries. If this flag is set, the driver will reject + * any VM bind that requests a PAT index enabling compression for this BO. + * Note: On dGPU platforms, there is currently no change in behavior with + * this flag, but future improvements may leverage it. The current benefit is + * primarily applicable to iGPU platforms. * * @cpu_caching supports the following values: * - %DRM_XE_GEM_CPU_CACHING_WB - Allocate the pages with write-back @@ -841,6 +856,7 @@ struct drm_xe_gem_create { #define DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING (1 << 0) #define DRM_XE_GEM_CREATE_FLAG_SCANOUT (1 << 1) #define DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM (1 << 2) +#define DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION (1 << 3) /** * @flags: Flags, currently a mask of memory instances of where BO can * be placed -- cgit v1.2.3 From 16e076b036583702bb47554d3931b5e674dd9a8e Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 1 Dec 2025 18:51:12 -0800 Subject: drm/xe/oa/uapi: Add gt_id to struct drm_xe_oa_unit gt_id was previously omitted from 'struct drm_xe_oa_unit' because it could be determine from hwe's attached to the OA unit. However, we now have OA units which don't have any hwe's attached to them. Hence add gt_id to 'struct drm_xe_oa_unit' in order to provide this needed information to userspace. Signed-off-by: Ashutosh Dixit Reviewed-by: Umesh Nerlige Ramappa Link: https://patch.msgid.link/20251202025115.373546-3-ashutosh.dixit@intel.com --- include/uapi/drm/xe_drm.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 0d99bb0cd20a..876a076fa6c0 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1697,12 +1697,19 @@ struct drm_xe_oa_unit { #define DRM_XE_OA_CAPS_OA_BUFFER_SIZE (1 << 2) #define DRM_XE_OA_CAPS_WAIT_NUM_REPORTS (1 << 3) #define DRM_XE_OA_CAPS_OAM (1 << 4) +#define DRM_XE_OA_CAPS_OA_UNIT_GT_ID (1 << 5) /** @oa_timestamp_freq: OA timestamp freq */ __u64 oa_timestamp_freq; + /** @gt_id: gt id for this OA unit */ + __u16 gt_id; + + /** @reserved1: MBZ */ + __u16 reserved1[3]; + /** @reserved: MBZ */ - __u64 reserved[4]; + __u64 reserved[3]; /** @num_engines: number of engines in @eci array */ __u64 num_engines; -- cgit v1.2.3 From b5709f6d26d65f6bb9711f4b5f98469fd507cb5b Mon Sep 17 00:00:00 2001 From: Amery Hung Date: Wed, 3 Dec 2025 15:37:44 -0800 Subject: bpf: Support associating BPF program with struct_ops Add a new BPF command BPF_PROG_ASSOC_STRUCT_OPS to allow associating a BPF program with a struct_ops map. This command takes a file descriptor of a struct_ops map and a BPF program and set prog->aux->st_ops_assoc to the kdata of the struct_ops map. The command does not accept a struct_ops program nor a non-struct_ops map. Programs of a struct_ops map is automatically associated with the map during map update. If a program is shared between two struct_ops maps, prog->aux->st_ops_assoc will be poisoned to indicate that the associated struct_ops is ambiguous. The pointer, once poisoned, cannot be reset since we have lost track of associated struct_ops. For other program types, the associated struct_ops map, once set, cannot be changed later. This restriction may be lifted in the future if there is a use case. A kernel helper bpf_prog_get_assoc_struct_ops() can be used to retrieve the associated struct_ops pointer. The returned pointer, if not NULL, is guaranteed to be valid and point to a fully updated struct_ops struct. For struct_ops program reused in multiple struct_ops map, the return will be NULL. prog->aux->st_ops_assoc is protected by bumping the refcount for non-struct_ops programs and RCU for struct_ops programs. Since it would be inefficient to track programs associated with a struct_ops map, every non-struct_ops program will bump the refcount of the map to make sure st_ops_assoc stays valid. For a struct_ops program, it is protected by RCU as map_free will wait for an RCU grace period before disassociating the program with the map. The helper must be called in BPF program context or RCU read-side critical section. struct_ops implementers should note that the struct_ops returned may not be initialized nor attached yet. The struct_ops implementer will be responsible for tracking and checking the state of the associated struct_ops map if the use case expects an initialized or attached struct_ops. Signed-off-by: Amery Hung Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20251203233748.668365-3-ameryhung@gmail.com --- include/uapi/linux/bpf.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f8d8513eda27..84ced3ed2d21 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -918,6 +918,16 @@ union bpf_iter_link_info { * Number of bytes read from the stream on success, or -1 if an * error occurred (in which case, *errno* is set appropriately). * + * BPF_PROG_ASSOC_STRUCT_OPS + * Description + * Associate a BPF program with a struct_ops map. The struct_ops + * map is identified by *map_fd* and the BPF program is + * identified by *prog_fd*. + * + * Return + * 0 on success or -1 if an error occurred (in which case, + * *errno* is set appropriately). + * * NOTES * eBPF objects (maps and programs) can be shared between processes. * @@ -974,6 +984,7 @@ enum bpf_cmd { BPF_PROG_BIND_MAP, BPF_TOKEN_CREATE, BPF_PROG_STREAM_READ_BY_FD, + BPF_PROG_ASSOC_STRUCT_OPS, __MAX_BPF_CMD, }; @@ -1894,6 +1905,12 @@ union bpf_attr { __u32 prog_fd; } prog_stream_read; + struct { + __u32 map_fd; + __u32 prog_fd; + __u32 flags; + } prog_assoc_struct_ops; + } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF -- cgit v1.2.3 From 4d65215145de002defa985136093566a20fdb435 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Fri, 12 Sep 2025 13:21:09 -0400 Subject: drm/amdgpu: update VRAM types Update VRAM types. Signed-off-by: Hawking Zhang Reviewed-by: Likun Gao Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index f80aa4c9d88f..c705fbcad3e3 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1427,6 +1427,7 @@ struct drm_amdgpu_info_vbios { #define AMDGPU_VRAM_TYPE_LPDDR4 11 #define AMDGPU_VRAM_TYPE_LPDDR5 12 #define AMDGPU_VRAM_TYPE_HBM3E 13 +#define AMDGPU_VRAM_TYPE_HBM4 14 struct drm_amdgpu_info_device { /** PCI Device ID */ -- cgit v1.2.3 From c3cd568d31b6d41fc201b1d0506e4f6cab7e488a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Wed, 19 Nov 2025 10:25:43 +0100 Subject: drm/amdgpu/uapi: Clarify comment on AMDGPU_VM_PAGE_PRT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the context of the amdgpu uAPI, the PRT flag is referring only to unmapped pages of a partially resident texture (aka. sparse resource), but not the full resource. Virtual addresses marked with this flag behave as follows: - Reads return zero - Writes are discarded Signed-off-by: Timur Kristóf Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index c705fbcad3e3..351c2fb2df90 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -883,7 +883,7 @@ struct drm_amdgpu_gem_list_handles_entry { #define AMDGPU_VM_PAGE_WRITEABLE (1 << 2) /* executable mapping, new for VI */ #define AMDGPU_VM_PAGE_EXECUTABLE (1 << 3) -/* partially resident texture */ +/* unmapped page of partially resident textures */ #define AMDGPU_VM_PAGE_PRT (1 << 4) /* MTYPE flags use bit 5 to 8 */ #define AMDGPU_VM_MTYPE_MASK (0xf << 5) -- cgit v1.2.3 From cc6b66d661fda4fb94c0099dd92b83f8de5c1bf4 Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Thu, 21 Aug 2025 17:22:44 +0800 Subject: amdkfd: introduce new ioctl AMDKFD_IOC_CREATE_PROCESS This commit implemetns a new ioctl AMDKFD_IOC_CREATE_PROCESS that creates a new secondary kfd_progress on the FD. To keep backward compatibility, userspace programs need to invoke this ioctl explicitly on a FD to create a secondary kfd_process which replacing its primary kfd_process. This commit bumps ioctl minor version. Signed-off-by: Zhu Lingshan Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 5d1727a6d040..84aa24c02715 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -44,9 +44,10 @@ * - 1.16 - Add contiguous VRAM allocation flag * - 1.17 - Add SDMA queue creation with target SDMA engine ID * - 1.18 - Rename pad in set_memory_policy_args to misc_process_flag + * - 1.19 - Add a new ioctl to craete secondary kfd processes */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 18 +#define KFD_IOCTL_MINOR_VERSION 19 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ @@ -1671,7 +1672,10 @@ struct kfd_ioctl_dbg_trap_args { #define AMDKFD_IOC_DBG_TRAP \ AMDKFD_IOWR(0x26, struct kfd_ioctl_dbg_trap_args) +#define AMDKFD_IOC_CREATE_PROCESS \ + AMDKFD_IO(0x27) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x27 +#define AMDKFD_COMMAND_END 0x28 #endif -- cgit v1.2.3 From 7a5fb05b5b18e531989aa55b10dfa4be0633207e Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 5 Dec 2025 08:04:41 -0600 Subject: amdkfd: Bump ABI to indicate presence of Trap handler support for expert scheduling commit 0f0c8a6983db ("drm/amdkfd: Trap handler support for expert scheduling mode") introduced support for a trap handler when expert scheduling mode. However userspace needs to know whether or not a trap handler support is present. Bump the KFD IOCTL API so that userspace can key off this to decide. Suggested-by: Stella Laurenzo Fixes: 423888879412 ("drm/amdkfd: Trap handler support for expert scheduling mode") Reviewed-by: Kent Russell Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 84aa24c02715..4d0c1a53f9d5 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -45,9 +45,10 @@ * - 1.17 - Add SDMA queue creation with target SDMA engine ID * - 1.18 - Rename pad in set_memory_policy_args to misc_process_flag * - 1.19 - Add a new ioctl to craete secondary kfd processes + * - 1.20 - Trap handler support for expert scheduling mode available */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 19 +#define KFD_IOCTL_MINOR_VERSION 20 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ -- cgit v1.2.3 From ea78ec98265339997959eba3c9d764317614675a Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 8 Dec 2025 11:08:30 +0100 Subject: drm/panthor: Expose the selected coherency protocol to the UMD If we want to be able to skip CPU cache maintenance operations on CPU-cached mappings, the UMD needs to know the kind of coherency in place. Add a field to drm_panthor_gpu_info to do that. We can re-use a padding field for that since this object is write-only from the KMD perspective, and the UMD should just ignore it. v2: - New commit v3: - Make coherency protocol a real enum, not a bitmask - Add BUILD_BUG_ON()s to make sure the values in panthor_regs.h and those exposed through the uAPI match v4: - Add Steve's R-b v5: - No changes v6: - No changes v7: - Fix kernel doc v8: - No changes Reviewed-by: Steven Price Reviewed-by: Karunika Choo Link: https://patch.msgid.link/20251208100841.730527-4-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- include/uapi/drm/panthor_drm.h | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index 467d365ed7ba..28cf9e878db6 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -245,6 +245,26 @@ enum drm_panthor_dev_query_type { DRM_PANTHOR_DEV_QUERY_GROUP_PRIORITIES_INFO, }; +/** + * enum drm_panthor_gpu_coherency: Type of GPU coherency + */ +enum drm_panthor_gpu_coherency { + /** + * @DRM_PANTHOR_GPU_COHERENCY_ACE_LITE: ACE Lite coherency. + */ + DRM_PANTHOR_GPU_COHERENCY_ACE_LITE = 0, + + /** + * @DRM_PANTHOR_GPU_COHERENCY_ACE: ACE coherency. + */ + DRM_PANTHOR_GPU_COHERENCY_ACE = 1, + + /** + * @DRM_PANTHOR_GPU_COHERENCY_NONE: No coherency. + */ + DRM_PANTHOR_GPU_COHERENCY_NONE = 31, +}; + /** * struct drm_panthor_gpu_info - GPU information * @@ -301,7 +321,16 @@ struct drm_panthor_gpu_info { */ __u32 thread_max_barrier_size; - /** @coherency_features: Coherency features. */ + /** + * @coherency_features: Coherency features. + * + * Combination of drm_panthor_gpu_coherency flags. + * + * Note that this is just what the coherency protocols supported by the + * GPU, but the actual coherency in place depends on the SoC + * integration and is reflected by + * drm_panthor_gpu_info::selected_coherency. + */ __u32 coherency_features; /** @texture_features: Texture features. */ @@ -310,8 +339,12 @@ struct drm_panthor_gpu_info { /** @as_present: Bitmask encoding the number of address-space exposed by the MMU. */ __u32 as_present; - /** @pad0: MBZ. */ - __u32 pad0; + /** + * @select_coherency: Coherency selected for this device. + * + * One of drm_panthor_gpu_coherency. + */ + __u32 selected_coherency; /** @shader_present: Bitmask encoding the shader cores exposed by the GPU. */ __u64 shader_present; -- cgit v1.2.3 From e06177ec7a36391c66216b55b7c112d5ba8c4cc1 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 8 Dec 2025 11:08:31 +0100 Subject: drm/panthor: Add a PANTHOR_BO_SYNC ioctl This will be used by the UMD to synchronize CPU-cached mappings when the UMD can't do it directly (no usermode cache maintenance instruction on Arm32). v2: - Change the flags so they better match the drm_gem_shmem_sync() semantics v3: - Add Steve's R-b v4: - No changes v5: - Drop Steve's R-b (the semantics changes call for a new review) v6: - Drop ret initialization in panthor_ioctl_bo_sync() - Bail out early in panthor_ioctl_bo_sync() if ops.count is zero - Drop unused PANTHOR_BO_SYNC_OP_FLAGS definition v7: - Hand-roll the sync logic (was previously provided by gem_shmem) v8: - Collect R-b Signed-off-by: Faith Ekstrand Reviewed-by: Steven Price Link: https://patch.msgid.link/20251208100841.730527-5-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- include/uapi/drm/panthor_drm.h | 52 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index 28cf9e878db6..9f810305db6e 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -144,6 +144,9 @@ enum drm_panthor_ioctl_id { * pgoff_t size. */ DRM_PANTHOR_SET_USER_MMIO_OFFSET, + + /** @DRM_PANTHOR_BO_SYNC: Sync BO data to/from the device */ + DRM_PANTHOR_BO_SYNC, }; /** @@ -1073,6 +1076,53 @@ struct drm_panthor_set_user_mmio_offset { __u64 offset; }; +/** + * enum drm_panthor_bo_sync_op_type - BO sync type + */ +enum drm_panthor_bo_sync_op_type { + /** @DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH: Flush CPU caches. */ + DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH = 0, + + /** @DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE: Flush and invalidate CPU caches. */ + DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE = 1, +}; + +/** + * struct drm_panthor_bo_sync_op - BO map sync op + */ +struct drm_panthor_bo_sync_op { + /** @handle: Handle of the buffer object to sync. */ + __u32 handle; + + /** @type: Type of operation. */ + __u32 type; + + /** + * @offset: Offset into the BO at which the sync range starts. + * + * This will be rounded down to the nearest cache line as needed. + */ + __u64 offset; + + /** + * @size: Size of the range to sync + * + * @size + @offset will be rounded up to the nearest cache line as + * needed. + */ + __u64 size; +}; + +/** + * struct drm_panthor_bo_sync - BO map sync request + */ +struct drm_panthor_bo_sync { + /** + * @ops: Array of struct drm_panthor_bo_sync_op sync operations. + */ + struct drm_panthor_obj_array ops; +}; + /** * DRM_IOCTL_PANTHOR() - Build a Panthor IOCTL number * @__access: Access type. Must be R, W or RW. @@ -1119,6 +1169,8 @@ enum { DRM_IOCTL_PANTHOR(WR, BO_SET_LABEL, bo_set_label), DRM_IOCTL_PANTHOR_SET_USER_MMIO_OFFSET = DRM_IOCTL_PANTHOR(WR, SET_USER_MMIO_OFFSET, set_user_mmio_offset), + DRM_IOCTL_PANTHOR_BO_SYNC = + DRM_IOCTL_PANTHOR(WR, BO_SYNC, bo_sync), }; #if defined(__cplusplus) -- cgit v1.2.3 From c146c82f862e9c7e602a908891c3adf992ef2beb Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 8 Dec 2025 11:08:32 +0100 Subject: drm/panthor: Add an ioctl to query BO flags This is useful when importing BOs, so we can know about cacheability and flush the caches when needed. We can also know when the buffer comes from a different subsystem and take proper actions (avoid CPU mappings, or do kernel-based syncs instead of userland cache flushes). v2: - New commit v3: - Add Steve's R-b v4: - No changes v5: - No changes v6: - No changes v7: - No changes v8: - No changes Reviewed-by: Steven Price Link: https://patch.msgid.link/20251208100841.730527-6-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- include/uapi/drm/panthor_drm.h | 57 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index 9f810305db6e..39d5ce815742 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -147,6 +147,13 @@ enum drm_panthor_ioctl_id { /** @DRM_PANTHOR_BO_SYNC: Sync BO data to/from the device */ DRM_PANTHOR_BO_SYNC, + + /** + * @DRM_PANTHOR_BO_QUERY_INFO: Query information about a BO. + * + * This is useful for imported BOs. + */ + DRM_PANTHOR_BO_QUERY_INFO, }; /** @@ -1123,6 +1130,54 @@ struct drm_panthor_bo_sync { struct drm_panthor_obj_array ops; }; +/** + * enum drm_panthor_bo_extra_flags - Set of flags returned on a BO_QUERY_INFO request + * + * Those are flags reflecting BO properties that are not directly coming from the flags + * passed are creation time, or information on BOs that were imported from other drivers. + */ +enum drm_panthor_bo_extra_flags { + /** + * @DRM_PANTHOR_BO_IS_IMPORTED: BO has been imported from an external driver. + * + * Note that imported dma-buf handles are not flagged as imported if they + * where exported by panthor. Only buffers that are coming from other drivers + * (dma heaps, other GPUs, display controllers, V4L, ...). + * + * It's also important to note that all imported BOs are mapped cached and can't + * be considered IO-coherent even if the GPU is. This means they require explicit + * syncs that must go through the DRM_PANTHOR_BO_SYNC ioctl (userland cache + * maintenance is not allowed in that case, because extra operations might be + * needed to make changes visible to the CPU/device, like buffer migration when the + * exporter is a GPU with its own VRAM). + */ + DRM_PANTHOR_BO_IS_IMPORTED = (1 << 0), +}; + +/** + * struct drm_panthor_bo_query_info - Query BO info + */ +struct drm_panthor_bo_query_info { + /** @handle: Handle of the buffer object to query flags on. */ + __u32 handle; + + /** + * @extra_flags: Combination of enum drm_panthor_bo_extra_flags flags. + */ + __u32 extra_flags; + + /** + * @create_flags: Flags passed at creation time. + * + * Combination of enum drm_panthor_bo_flags flags. + * Will be zero if the buffer comes from a different driver. + */ + __u32 create_flags; + + /** @pad: Will be zero on return. */ + __u32 pad; +}; + /** * DRM_IOCTL_PANTHOR() - Build a Panthor IOCTL number * @__access: Access type. Must be R, W or RW. @@ -1171,6 +1226,8 @@ enum { DRM_IOCTL_PANTHOR(WR, SET_USER_MMIO_OFFSET, set_user_mmio_offset), DRM_IOCTL_PANTHOR_BO_SYNC = DRM_IOCTL_PANTHOR(WR, BO_SYNC, bo_sync), + DRM_IOCTL_PANTHOR_BO_QUERY_INFO = + DRM_IOCTL_PANTHOR(WR, BO_QUERY_INFO, bo_query_info), }; #if defined(__cplusplus) -- cgit v1.2.3 From cd2c9c3015e642e28e1b528c52c06a79f350d600 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Molinari?= Date: Mon, 8 Dec 2025 11:08:33 +0100 Subject: drm/panthor: Add flag to map GEM object Write-Back Cacheable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Will be used by the UMD to optimize CPU accesses to buffers that are frequently read by the CPU, or on which the access pattern makes non-cacheable mappings inefficient. Mapping buffers CPU-cached implies taking care of the CPU cache maintenance in the UMD, unless the GPU is IO coherent. v2: - Add more to the commit message - Tweak the doc - Make sure we sync the section of the BO pointing to the CS syncobj before we read its seqno v3: - Fix formatting/spelling issues v4: - Add Steve's R-b v5: - Drop Steve's R-b (changes in the ioctl semantics requiring new review) v6: - Fix the uAPI doc - Fix inverted logic in some comment v7: - No changes v8: - Collect R-b Signed-off-by: Loïc Molinari Reviewed-by: Steven Price Link: https://patch.msgid.link/20251208100841.730527-7-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- include/uapi/drm/panthor_drm.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index 39d5ce815742..e238c6264fa1 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -681,6 +681,15 @@ struct drm_panthor_vm_get_state { enum drm_panthor_bo_flags { /** @DRM_PANTHOR_BO_NO_MMAP: The buffer object will never be CPU-mapped in userspace. */ DRM_PANTHOR_BO_NO_MMAP = (1 << 0), + + /** + * @DRM_PANTHOR_BO_WB_MMAP: Force "Write-Back Cacheable" CPU mapping. + * + * CPU map the buffer object in userspace by forcing the "Write-Back + * Cacheable" cacheability attribute. The mapping otherwise uses the + * "Non-Cacheable" attribute if the GPU is not IO coherent. + */ + DRM_PANTHOR_BO_WB_MMAP = (1 << 1), }; /** -- cgit v1.2.3 From 2396d65d94fc75d39f096b9777f9edc9c8e677c1 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 8 Dec 2025 11:08:36 +0100 Subject: drm/panfrost: Expose the selected coherency protocol to the UMD Will be needed if we want to skip CPU cache maintenance operations when the GPU can snoop CPU caches. v2: - New commit v3: - Fix the coherency values (enum instead of bitmask) v4: - Fix init/test on coherency_features v5: - No changes v6: - Collect R-b v7: - No changes v8: - No changes Reviewed-by: Steven Price Link: https://patch.msgid.link/20251208100841.730527-10-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- include/uapi/drm/panfrost_drm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h index 1956431bb391..0c59714ae42b 100644 --- a/include/uapi/drm/panfrost_drm.h +++ b/include/uapi/drm/panfrost_drm.h @@ -228,6 +228,13 @@ enum drm_panfrost_param { DRM_PANFROST_PARAM_SYSTEM_TIMESTAMP, DRM_PANFROST_PARAM_SYSTEM_TIMESTAMP_FREQUENCY, DRM_PANFROST_PARAM_ALLOWED_JM_CTX_PRIORITIES, + DRM_PANFROST_PARAM_SELECTED_COHERENCY, +}; + +enum drm_panfrost_gpu_coherency { + DRM_PANFROST_GPU_COHERENCY_ACE_LITE = 0, + DRM_PANFROST_GPU_COHERENCY_ACE = 1, + DRM_PANFROST_GPU_COHERENCY_NONE = 31, }; struct drm_panfrost_get_param { -- cgit v1.2.3 From 7be45f5489769520aa9276137d0f1f543fb81286 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Mon, 8 Dec 2025 11:08:37 +0100 Subject: drm/panfrost: Add a PANFROST_SYNC_BO ioctl This will be used by the UMD to synchronize CPU-cached mappings when the UMD can't do it directly (no usermode cache maintenance instruction on Arm32). v2: - Add more to the commit message - Change the flags to better match the drm_gem_shmem_sync semantics v3: - Add Steve's R-b v4: - No changes v5: - Drop Steve's R-b (semantics changes requiring a new review) v6: - Bail out early in panfrost_ioctl_sync_bo() if op_count is zero v7: - Hand-roll our own bo_sync() helper v8: - Collect R-b Signed-off-by: Faith Ekstrand Reviewed-by: Steven Price Link: https://patch.msgid.link/20251208100841.730527-11-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- include/uapi/drm/panfrost_drm.h | 45 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h index 0c59714ae42b..e194e087a0c8 100644 --- a/include/uapi/drm/panfrost_drm.h +++ b/include/uapi/drm/panfrost_drm.h @@ -24,6 +24,7 @@ extern "C" { #define DRM_PANFROST_SET_LABEL_BO 0x09 #define DRM_PANFROST_JM_CTX_CREATE 0x0a #define DRM_PANFROST_JM_CTX_DESTROY 0x0b +#define DRM_PANFROST_SYNC_BO 0x0c #define DRM_IOCTL_PANFROST_SUBMIT DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_SUBMIT, struct drm_panfrost_submit) #define DRM_IOCTL_PANFROST_WAIT_BO DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_WAIT_BO, struct drm_panfrost_wait_bo) @@ -35,6 +36,7 @@ extern "C" { #define DRM_IOCTL_PANFROST_SET_LABEL_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_SET_LABEL_BO, struct drm_panfrost_set_label_bo) #define DRM_IOCTL_PANFROST_JM_CTX_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_JM_CTX_CREATE, struct drm_panfrost_jm_ctx_create) #define DRM_IOCTL_PANFROST_JM_CTX_DESTROY DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_JM_CTX_DESTROY, struct drm_panfrost_jm_ctx_destroy) +#define DRM_IOCTL_PANFROST_SYNC_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_SYNC_BO, struct drm_panfrost_sync_bo) /* * Unstable ioctl(s): only exposed when the unsafe unstable_ioctls module @@ -308,6 +310,49 @@ struct drm_panfrost_set_label_bo { __u64 label; }; +/* Valid flags to pass to drm_panfrost_bo_sync_op */ +#define PANFROST_BO_SYNC_CPU_CACHE_FLUSH 0 +#define PANFROST_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE 1 + +/** + * struct drm_panthor_bo_flush_map_op - BO map sync op + */ +struct drm_panfrost_bo_sync_op { + /** @handle: Handle of the buffer object to sync. */ + __u32 handle; + + /** @type: Type of sync operation. */ + __u32 type; + + /** + * @offset: Offset into the BO at which the sync range starts. + * + * This will be rounded down to the nearest cache line as needed. + */ + __u32 offset; + + /** + * @size: Size of the range to sync + * + * @size + @offset will be rounded up to the nearest cache line as + * needed. + */ + __u32 size; +}; + +/** + * struct drm_panfrost_sync_bo - ioctl argument for syncing BO maps + */ +struct drm_panfrost_sync_bo { + /** Array of struct drm_panfrost_bo_sync_op */ + __u64 ops; + + /** Number of BO sync ops */ + __u32 op_count; + + __u32 pad; +}; + /* Definitions for coredump decoding in user space */ #define PANFROSTDUMP_MAJOR 1 #define PANFROSTDUMP_MINOR 0 -- cgit v1.2.3 From d17592e61fa8e3b2d58df7c4a24abc8ac58b8d3f Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 8 Dec 2025 11:08:38 +0100 Subject: drm/panfrost: Add an ioctl to query BO flags This is useful when importing BOs, so we can know about cacheability and flush the caches when needed. v2: - New commit v3: - Add Steve's R-b v4: - No changes v5: - No changes v6: - No changes v7: - No changes v8: - No changes Reviewed-by: Steven Price Link: https://patch.msgid.link/20251208100841.730527-12-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- include/uapi/drm/panfrost_drm.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h index e194e087a0c8..36ae48ea50d3 100644 --- a/include/uapi/drm/panfrost_drm.h +++ b/include/uapi/drm/panfrost_drm.h @@ -25,6 +25,7 @@ extern "C" { #define DRM_PANFROST_JM_CTX_CREATE 0x0a #define DRM_PANFROST_JM_CTX_DESTROY 0x0b #define DRM_PANFROST_SYNC_BO 0x0c +#define DRM_PANFROST_QUERY_BO_INFO 0x0d #define DRM_IOCTL_PANFROST_SUBMIT DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_SUBMIT, struct drm_panfrost_submit) #define DRM_IOCTL_PANFROST_WAIT_BO DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_WAIT_BO, struct drm_panfrost_wait_bo) @@ -37,6 +38,7 @@ extern "C" { #define DRM_IOCTL_PANFROST_JM_CTX_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_JM_CTX_CREATE, struct drm_panfrost_jm_ctx_create) #define DRM_IOCTL_PANFROST_JM_CTX_DESTROY DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_JM_CTX_DESTROY, struct drm_panfrost_jm_ctx_destroy) #define DRM_IOCTL_PANFROST_SYNC_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_SYNC_BO, struct drm_panfrost_sync_bo) +#define DRM_IOCTL_PANFROST_QUERY_BO_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_QUERY_BO_INFO, struct drm_panfrost_query_bo_info) /* * Unstable ioctl(s): only exposed when the unsafe unstable_ioctls module @@ -353,6 +355,23 @@ struct drm_panfrost_sync_bo { __u32 pad; }; +/** BO comes from a different subsystem. */ +#define DRM_PANFROST_BO_IS_IMPORTED (1 << 0) + +struct drm_panfrost_query_bo_info { + /** Handle of the object being queried. */ + __u32 handle; + + /** Extra flags that are not coming from the BO_CREATE ioctl(). */ + __u32 extra_flags; + + /** Flags passed at creation time. */ + __u32 create_flags; + + /** Will be zero on return. */ + __u32 pad; +}; + /* Definitions for coredump decoding in user space */ #define PANFROSTDUMP_MAJOR 1 #define PANFROSTDUMP_MINOR 0 -- cgit v1.2.3 From 62eedf1ccba534b318ca85d3890bf0951b9e0f87 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Mon, 8 Dec 2025 11:08:39 +0100 Subject: drm/panfrost: Add flag to map GEM object Write-Back Cacheable Will be used by the UMD to optimize CPU accesses to buffers that are frequently read by the CPU, or on which the access pattern makes non-cacheable mappings inefficient. Mapping buffers CPU-cached implies taking care of the CPU cache maintenance in the UMD, unless the GPU is IO coherent. v2: - Add more to the commit message v3: - No changes v4: - Fix the map_wc test in panfrost_ioctl_query_bo_info() v5: - Drop Steve's R-b (enough has changed to justify a new review) v6: - Collect R-b v7: - No changes v8: - Fix double drm_gem_object_funcs::export assignment Signed-off-by: Faith Ekstrand Reviewed-by: Steven Price Link: https://patch.msgid.link/20251208100841.730527-13-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- include/uapi/drm/panfrost_drm.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h index 36ae48ea50d3..50d5337f35ef 100644 --- a/include/uapi/drm/panfrost_drm.h +++ b/include/uapi/drm/panfrost_drm.h @@ -124,9 +124,12 @@ struct drm_panfrost_wait_bo { __s64 timeout_ns; }; -/* Valid flags to pass to drm_panfrost_create_bo */ +/* Valid flags to pass to drm_panfrost_create_bo. + * PANFROST_BO_WB_MMAP can't be set if PANFROST_BO_HEAP is. + */ #define PANFROST_BO_NOEXEC 1 #define PANFROST_BO_HEAP 2 +#define PANFROST_BO_WB_MMAP 4 /** * struct drm_panfrost_create_bo - ioctl argument for creating Panfrost BOs. -- cgit v1.2.3 From d9ec63474648a258094704ce223c9249fa7bb279 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Wed, 10 Dec 2025 17:02:50 -0800 Subject: drm/xe/multi_queue: Add user interface for multi queue support Multi Queue is a new mode of execution supported by the compute and blitter copy command streamers (CCS and BCS, respectively). It is an enhancement of the existing hardware architecture and leverages the same submission model. It enables support for efficient, parallel execution of multiple queues within a single context. All the queues of a group must use the same address space (VM). The new DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE execution queue property supports creating a multi queue group and adding queues to a queue group. All queues of a multi queue group share the same context. A exec queue create ioctl call with above property specified with value DRM_XE_SUPER_GROUP_CREATE will create a new multi queue group with the queue being created as the primary queue (aka q0) of the group. To add secondary queues to the group, they need to be created with the above property with id of the primary queue as the value. The properties of the primary queue (like priority, timeslice) applies to the whole group. So, these properties can't be set for secondary queues of a group. Once destroyed, the secondary queues of a multi queue group can't be replaced. However, they can be dynamically added to the group up to a total of 64 queues per group. Once the primary queue is destroyed, secondary queues can't be added to the queue group. v2: Remove group->lock, fix xe_exec_queue_group_add()/delete() function semantics, add additional comments, remove unused group->list_lock, add XE_BO_FLAG_GGTT_INVALIDATE for cgp bo, Assert LRC is valid, update uapi kernel doc. (Matt Brost) v3: Use XE_BO_FLAG_PINNED_LATE_RESTORE/USER_VRAM/GGTT_INVALIDATE flags for cgp bo (Matt) v4: Ensure queue is not a vm_bind queue uapi change due to rebase Signed-off-by: Stuart Summers Signed-off-by: Niranjana Vishwanathapura Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20251211010249.1647839-21-niranjana.vishwanathapura@intel.com --- include/uapi/drm/xe_drm.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 876a076fa6c0..19a8ae856a17 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1272,6 +1272,14 @@ struct drm_xe_vm_bind { * Given that going into a power-saving state kills PXP HWDRM sessions, * runtime PM will be blocked while queues of this type are alive. * All PXP queues will be killed if a PXP invalidation event occurs. + * - %DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP - Create a multi-queue group + * or add secondary queues to a multi-queue group. + * If the extension's 'value' field has %DRM_XE_MULTI_GROUP_CREATE flag set, + * then a new multi-queue group is created with this queue as the primary queue + * (Q0). Otherwise, the queue gets added to the multi-queue group whose primary + * queue's exec_queue_id is specified in the lower 32 bits of the 'value' field. + * All the other non-relevant bits of extension's 'value' field while adding the + * primary or the secondary queues of the group must be set to 0. * * The example below shows how to use @drm_xe_exec_queue_create to create * a simple exec_queue (no parallel submission) of class @@ -1313,6 +1321,8 @@ struct drm_xe_exec_queue_create { #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE 1 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE 2 #define DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE 3 +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP 4 +#define DRM_XE_MULTI_GROUP_CREATE (1ull << 63) /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; -- cgit v1.2.3 From 898a00f4b43311adfd4da1711ed2b72adc8c98a5 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Wed, 10 Dec 2025 17:02:52 -0800 Subject: drm/xe/multi_queue: Add multi queue priority property Add support for queues of a multi queue group to set their priority within the queue group by adding property DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY. This is the only other property supported by secondary queues of a multi queue group, other than DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE. v2: Add kernel doc for enum xe_multi_queue_priority, Add assert for priority values, fix includes and declarations (Matt Brost) v3: update uapi kernel-doc (Matt Brost) v4: uapi change due to rebase Signed-off-by: Niranjana Vishwanathapura Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20251211010249.1647839-23-niranjana.vishwanathapura@intel.com --- include/uapi/drm/xe_drm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 19a8ae856a17..fd79d78de2e9 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1280,6 +1280,9 @@ struct drm_xe_vm_bind { * queue's exec_queue_id is specified in the lower 32 bits of the 'value' field. * All the other non-relevant bits of extension's 'value' field while adding the * primary or the secondary queues of the group must be set to 0. + * - %DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY - Set the queue + * priority within the multi-queue group. Current valid priority values are 0–2 + * (default is 1), with higher values indicating higher priority. * * The example below shows how to use @drm_xe_exec_queue_create to create * a simple exec_queue (no parallel submission) of class @@ -1323,6 +1326,7 @@ struct drm_xe_exec_queue_create { #define DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE 3 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP 4 #define DRM_XE_MULTI_GROUP_CREATE (1ull << 63) +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY 5 /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; -- cgit v1.2.3 From 2a31ea17d5c69e51ea454485edd40e4aeff467c1 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Wed, 10 Dec 2025 17:02:54 -0800 Subject: drm/xe/multi_queue: Add exec_queue set_property ioctl support This patch adds support for exec_queue set_property ioctl. It is derived from the original work which is part of https://patchwork.freedesktop.org/series/112188/ Currently only DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY property can be dynamically set. v2: Check for and update kernel-doc which property this ioctl supports (Matt Brost) Signed-off-by: Matthew Brost Signed-off-by: Pallavi Mishra Signed-off-by: Niranjana Vishwanathapura Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20251211010249.1647839-25-niranjana.vishwanathapura@intel.com --- include/uapi/drm/xe_drm.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index fd79d78de2e9..705081bf0d81 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -106,6 +106,7 @@ extern "C" { #define DRM_XE_OBSERVATION 0x0b #define DRM_XE_MADVISE 0x0c #define DRM_XE_VM_QUERY_MEM_RANGE_ATTRS 0x0d +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY 0x0e /* Must be kept compact -- no holes */ @@ -123,6 +124,7 @@ extern "C" { #define DRM_IOCTL_XE_OBSERVATION DRM_IOW(DRM_COMMAND_BASE + DRM_XE_OBSERVATION, struct drm_xe_observation_param) #define DRM_IOCTL_XE_MADVISE DRM_IOW(DRM_COMMAND_BASE + DRM_XE_MADVISE, struct drm_xe_madvise) #define DRM_IOCTL_XE_VM_QUERY_MEM_RANGE_ATTRS DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_VM_QUERY_MEM_RANGE_ATTRS, struct drm_xe_vm_query_mem_range_attr) +#define DRM_IOCTL_XE_EXEC_QUEUE_SET_PROPERTY DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_SET_PROPERTY, struct drm_xe_exec_queue_set_property) /** * DOC: Xe IOCTL Extensions @@ -2315,6 +2317,30 @@ struct drm_xe_vm_query_mem_range_attr { }; +/** + * struct drm_xe_exec_queue_set_property - exec queue set property + * + * Sets execution queue properties dynamically. + * Currently only %DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY + * property can be dynamically set. + */ +struct drm_xe_exec_queue_set_property { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @exec_queue_id: Exec queue ID */ + __u32 exec_queue_id; + + /** @property: property to set */ + __u32 property; + + /** @value: property value */ + __u64 value; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From 3131a43ecb346ae3b5287ee195779fc38c6fcd11 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Wed, 10 Dec 2025 17:03:03 -0800 Subject: drm/xe/multi_queue: Support active group after primary is destroyed Add support to keep the group active after the primary queue is destroyed. Instead of killing the primary queue during exec_queue destroy ioctl, kill it when all the secondary queues of the group are killed. Signed-off-by: Niranjana Vishwanathapura Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20251211010249.1647839-34-niranjana.vishwanathapura@intel.com --- include/uapi/drm/xe_drm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 705081bf0d81..bd6154e3b728 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1280,6 +1280,9 @@ struct drm_xe_vm_bind { * then a new multi-queue group is created with this queue as the primary queue * (Q0). Otherwise, the queue gets added to the multi-queue group whose primary * queue's exec_queue_id is specified in the lower 32 bits of the 'value' field. + * If the extension's 'value' field has %DRM_XE_MULTI_GROUP_KEEP_ACTIVE flag + * set, then the multi-queue group is kept active after the primary queue is + * destroyed. * All the other non-relevant bits of extension's 'value' field while adding the * primary or the secondary queues of the group must be set to 0. * - %DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY - Set the queue @@ -1328,6 +1331,7 @@ struct drm_xe_exec_queue_create { #define DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE 3 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP 4 #define DRM_XE_MULTI_GROUP_CREATE (1ull << 63) +#define DRM_XE_MULTI_GROUP_KEEP_ACTIVE (1ull << 62) #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY 5 /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; -- cgit v1.2.3 From 0e5032237ee5530147fbdf33134297e1490d5ec3 Mon Sep 17 00:00:00 2001 From: Bhavik Sachdev Date: Sat, 29 Nov 2025 14:41:21 +0530 Subject: statmount: accept fd as a parameter Extend `struct mnt_id_req` to take in a fd and introduce STATMOUNT_BY_FD flag. When a valid fd is provided and STATMOUNT_BY_FD is set, statmount will return mountinfo about the mount the fd is on. This even works for "unmounted" mounts (mounts that have been umounted using umount2(mnt, MNT_DETACH)), if you have access to a file descriptor on that mount. These "umounted" mounts will have no mountpoint and no valid mount namespace. Hence, we unset the STATMOUNT_MNT_POINT and STATMOUNT_MNT_NS_ID in statmount.mask for "unmounted" mounts. In case of STATMOUNT_BY_FD, given that we already have access to an fd on the mount, accessing mount information without a capability check seems fine because of the following reasons: - All fs related information is available via fstatfs() without any capability check. - Mount information is also available via /proc/pid/mountinfo (without any capability check). - Given that we have access to a fd on the mount which tells us that we had access to the mount at some point (or someone that had access gave us the fd). So, we should be able to access mount info. Co-developed-by: Pavel Tikhomirov Signed-off-by: Pavel Tikhomirov Signed-off-by: Bhavik Sachdev Link: https://patch.msgid.link/20251129091455.757724-3-b.sachdev1904@gmail.com Acked-by: Andrei Vagin Signed-off-by: Christian Brauner --- include/uapi/linux/mount.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index 5d3f8c9e3a62..18c624405268 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -197,7 +197,10 @@ struct statmount { */ struct mnt_id_req { __u32 size; - __u32 mnt_ns_fd; + union { + __u32 mnt_ns_fd; + __u32 mnt_fd; + }; __u64 mnt_id; __u64 param; __u64 mnt_ns_id; @@ -232,4 +235,9 @@ struct mnt_id_req { #define LSMT_ROOT 0xffffffffffffffff /* root mount */ #define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */ +/* + * @flag bits for statmount(2) + */ +#define STATMOUNT_BY_FD 0x00000001U /* want mountinfo for given fd */ + #endif /* _UAPI_LINUX_MOUNT_H */ -- cgit v1.2.3 From b07bac9bd708ec468cd1b8a5fe70ae2ac9b0a11c Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Fri, 5 Dec 2025 23:47:17 +0000 Subject: drm/xe: Limit num_syncs to prevent oversized allocations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The exec and vm_bind ioctl allow userspace to specify an arbitrary num_syncs value. Without bounds checking, a very large num_syncs can force an excessively large allocation, leading to kernel warnings from the page allocator as below. Introduce DRM_XE_MAX_SYNCS (set to 1024) and reject any request exceeding this limit. " ------------[ cut here ]------------ WARNING: CPU: 0 PID: 1217 at mm/page_alloc.c:5124 __alloc_frozen_pages_noprof+0x2f8/0x2180 mm/page_alloc.c:5124 ... Call Trace: alloc_pages_mpol+0xe4/0x330 mm/mempolicy.c:2416 ___kmalloc_large_node+0xd8/0x110 mm/slub.c:4317 __kmalloc_large_node_noprof+0x18/0xe0 mm/slub.c:4348 __do_kmalloc_node mm/slub.c:4364 [inline] __kmalloc_noprof+0x3d4/0x4b0 mm/slub.c:4388 kmalloc_noprof include/linux/slab.h:909 [inline] kmalloc_array_noprof include/linux/slab.h:948 [inline] xe_exec_ioctl+0xa47/0x1e70 drivers/gpu/drm/xe/xe_exec.c:158 drm_ioctl_kernel+0x1f1/0x3e0 drivers/gpu/drm/drm_ioctl.c:797 drm_ioctl+0x5e7/0xc50 drivers/gpu/drm/drm_ioctl.c:894 xe_drm_ioctl+0x10b/0x170 drivers/gpu/drm/xe/xe_device.c:224 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:598 [inline] __se_sys_ioctl fs/ioctl.c:584 [inline] __x64_sys_ioctl+0x18b/0x210 fs/ioctl.c:584 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xbb/0x380 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f ... " v2: Add "Reported-by" and Cc stable kernels. v3: Change XE_MAX_SYNCS from 64 to 1024. (Matt & Ashutosh) v4: s/XE_MAX_SYNCS/DRM_XE_MAX_SYNCS/ (Matt) v5: Do the check at the top of the exec func. (Matt) Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Reported-by: Koen Koning Reported-by: Peter Senna Tschudin Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/6450 Cc: # v6.12+ Cc: Matthew Brost Cc: Michal Mrozek Cc: Carl Zhang Cc: José Roberto de Souza Cc: Lionel Landwerlin Cc: Ivan Briano Cc: Thomas Hellström Cc: Ashutosh Dixit Signed-off-by: Shuicheng Lin Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20251205234715.2476561-5-shuicheng.lin@intel.com --- include/uapi/drm/xe_drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index bd6154e3b728..c59587529986 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1504,6 +1504,7 @@ struct drm_xe_exec { /** @exec_queue_id: Exec queue ID for the batch buffer */ __u32 exec_queue_id; +#define DRM_XE_MAX_SYNCS 1024 /** @num_syncs: Amount of struct drm_xe_sync in array. */ __u32 num_syncs; -- cgit v1.2.3 From e83f63da2ac776fbc30861e4ce8b798df6ee8a7a Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Mon, 23 Jun 2025 14:12:58 -0400 Subject: drm/amdkfd: allow debug subscription to lds violations on gfx 1250 GFX 1250 allows the debugger to subcribe to LDS out-of-range read/write memory violations. Bump IOCTL minor version and flag KFD capabilities for enablement hint. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 4 +++- include/uapi/linux/kfd_sysfs.h | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 4d0c1a53f9d5..6e91875c10ba 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -46,9 +46,10 @@ * - 1.18 - Rename pad in set_memory_policy_args to misc_process_flag * - 1.19 - Add a new ioctl to craete secondary kfd processes * - 1.20 - Trap handler support for expert scheduling mode available + * - 1.21 - Debugger support to subscribe to LDS out-of-address exceptions */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 20 +#define KFD_IOCTL_MINOR_VERSION 21 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ @@ -947,6 +948,7 @@ enum kfd_dbg_trap_address_watch_mode { enum kfd_dbg_trap_flags { KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP = 1, KFD_DBG_TRAP_FLAG_SINGLE_ALU_OP = 2, + KFD_DBG_TRAP_FLAG_LDS_OUT_OF_ADDR_RANGE = 4 }; /* Trap exceptions */ diff --git a/include/uapi/linux/kfd_sysfs.h b/include/uapi/linux/kfd_sysfs.h index 1125fe47959f..0b6ce2f3c887 100644 --- a/include/uapi/linux/kfd_sysfs.h +++ b/include/uapi/linux/kfd_sysfs.h @@ -64,7 +64,8 @@ #define HSA_CAP_RESERVED 0x000f8000 #define HSA_CAP2_PER_SDMA_QUEUE_RESET_SUPPORTED 0x00000001 -#define HSA_CAP2_RESERVED 0xfffffffe +#define HSA_CAP2_TRAP_DEBUG_LDS_OUT_OF_ADDR_RANGE_SUPPORTED 0x00000002 +#define HSA_CAP2_RESERVED 0xfffffffc /* debug_prop bits in node properties */ #define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_MASK 0x0000000f -- cgit v1.2.3 From ab39e2a8f7aed72929bfc1d58eb5e8766f1d85db Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Fri, 5 Dec 2025 13:26:11 -0800 Subject: drm/xe/oa/uapi: Expose MERT OA unit A MERT OA unit is available in the SoC on some platforms. Add support for this OA unit and expose it to userspace. The MERT OA unit does not have any HW engines attached, but is otherwise similar to an OAM unit. Signed-off-by: Lucas De Marchi Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patch.msgid.link/20251205212613.826224-2-ashutosh.dixit@intel.com --- include/uapi/drm/xe_drm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index c59587529986..726e481574fe 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1696,6 +1696,9 @@ enum drm_xe_oa_unit_type { /** @DRM_XE_OA_UNIT_TYPE_OAM_SAG: OAM_SAG OA unit */ DRM_XE_OA_UNIT_TYPE_OAM_SAG, + + /** @DRM_XE_OA_UNIT_TYPE_MERT: MERT OA unit */ + DRM_XE_OA_UNIT_TYPE_MERT, }; /** -- cgit v1.2.3 From 5c3c3e7b654df01a69d49551a08b7863c09546f6 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 17 Dec 2025 14:24:03 +0100 Subject: drm/panthor: Fix kerneldoc in uAPI header Fix a typo in a kerneldoc header. Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/dri-devel/20251216120049.3ed7e06e@canb.auug.org.au/ Signed-off-by: Boris Brezillon Reviewed-by: Liviu Dudau Reviewed-by: Steven Price Fixes: ea78ec982653 ("drm/panthor: Expose the selected coherency protocol to the UMD") Signed-off-by: Steven Price Link: https://patch.msgid.link/20251217132403.3996014-1-boris.brezillon@collabora.com --- include/uapi/drm/panthor_drm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index e238c6264fa1..b401ac585d6a 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -350,7 +350,7 @@ struct drm_panthor_gpu_info { __u32 as_present; /** - * @select_coherency: Coherency selected for this device. + * @selected_coherency: Coherency selected for this device. * * One of drm_panthor_gpu_coherency. */ -- cgit v1.2.3 From 332070795bd96193756cb4446eddc3ec9ff6a0e8 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Wed, 17 Dec 2025 09:17:19 -0800 Subject: accel/amdxdna: Enable hardware context priority Newer firmware supports hardware context priority. Set the priority based on application input. Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20251217171719.2139025-1-lizhi.hou@amd.com --- include/uapi/drm/amdxdna_accel.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h index 62c917fd4f7b..9c44db2b3dcd 100644 --- a/include/uapi/drm/amdxdna_accel.h +++ b/include/uapi/drm/amdxdna_accel.h @@ -19,6 +19,14 @@ extern "C" { #define AMDXDNA_INVALID_BO_HANDLE 0 #define AMDXDNA_INVALID_FENCE_HANDLE 0 +/* + * Define hardware context priority + */ +#define AMDXDNA_QOS_REALTIME_PRIORITY 0x100 +#define AMDXDNA_QOS_HIGH_PRIORITY 0x180 +#define AMDXDNA_QOS_NORMAL_PRIORITY 0x200 +#define AMDXDNA_QOS_LOW_PRIORITY 0x280 + enum amdxdna_device_type { AMDXDNA_DEV_TYPE_UNKNOWN = -1, AMDXDNA_DEV_TYPE_KMQ, -- cgit v1.2.3 From cb8fe62f87ad21f4c174aec480694c9b4b8b01c4 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 20 Dec 2025 03:04:26 +0900 Subject: nilfs2: convert nilfs_super_block to kernel-doc Eliminate 40+ kernel-doc warnings in nilfs2_ondisk.h by converting all of the struct member comments to kernel-doc comments. Fix one misnamed struct member in nilfs_direct_node. Object files before and after are the same size and content. Examples of warnings: Warning: include/uapi/linux/nilfs2_ondisk.h:202 struct member 's_rev_level' not described in 'nilfs_super_block' Warning: include/uapi/linux/nilfs2_ondisk.h:202 struct member 's_minor_rev_level' not described in 'nilfs_super_block' Warning: include/uapi/linux/nilfs2_ondisk.h:202 struct member 's_magic' not described in 'nilfs_super_block' Warning: include/uapi/linux/nilfs2_ondisk.h:202 struct member 's_bytes' not described in 'nilfs_super_block' Warning: include/uapi/linux/nilfs2_ondisk.h:202 struct member 's_flags' not described in 'nilfs_super_block' Signed-off-by: Randy Dunlap Signed-off-by: Ryusuke Konishi Signed-off-by: Viacheslav Dubeyko --- include/uapi/linux/nilfs2_ondisk.h | 163 ++++++++++++++++++++++--------------- 1 file changed, 97 insertions(+), 66 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/nilfs2_ondisk.h b/include/uapi/linux/nilfs2_ondisk.h index 3196cc44a002..b3442b16ff6a 100644 --- a/include/uapi/linux/nilfs2_ondisk.h +++ b/include/uapi/linux/nilfs2_ondisk.h @@ -133,73 +133,104 @@ struct nilfs_super_root { /** * struct nilfs_super_block - structure of super block on disk + * @s_rev_level: Revision level + * @s_minor_rev_level: minor revision level + * @s_magic: Magic signature + * @s_bytes: Bytes count of CRC calculation for + * this structure. s_reserved is excluded. + * @s_flags: flags + * @s_crc_seed: Seed value of CRC calculation + * @s_sum: Check sum of super block + * @s_log_block_size: Block size represented as follows: + * blocksize = 1 << (s_log_block_size + 10) + * @s_nsegments: Number of segments in filesystem + * @s_dev_size: block device size in bytes + * @s_first_data_block: 1st seg disk block number + * @s_blocks_per_segment: number of blocks per full segment + * @s_r_segments_percentage: Reserved segments percentage + * @s_last_cno: Last checkpoint number + * @s_last_pseg: disk block addr pseg written last + * @s_last_seq: seq. number of seg written last + * @s_free_blocks_count: Free blocks count + * @s_ctime: Creation time (execution time of newfs) + * @s_mtime: Mount time + * @s_wtime: Write time + * @s_mnt_count: Mount count + * @s_max_mnt_count: Maximal mount count + * @s_state: File system state + * @s_errors: Behaviour when detecting errors + * @s_lastcheck: time of last check + * @s_checkinterval: max. time between checks + * @s_creator_os: OS + * @s_def_resuid: Default uid for reserved blocks + * @s_def_resgid: Default gid for reserved blocks + * @s_first_ino: First non-reserved inode + * @s_inode_size: Size of an inode + * @s_dat_entry_size: Size of a dat entry + * @s_checkpoint_size: Size of a checkpoint + * @s_segment_usage_size: Size of a segment usage + * @s_uuid: 128-bit uuid for volume + * @s_volume_name: volume name + * @s_c_interval: Commit interval of segment + * @s_c_block_max: Threshold of data amount for the + * segment construction + * @s_feature_compat: Compatible feature set + * @s_feature_compat_ro: Read-only compatible feature set + * @s_feature_incompat: Incompatible feature set + * @s_reserved: padding to the end of the block */ struct nilfs_super_block { -/*00*/ __le32 s_rev_level; /* Revision level */ - __le16 s_minor_rev_level; /* minor revision level */ - __le16 s_magic; /* Magic signature */ - - __le16 s_bytes; /* - * Bytes count of CRC calculation - * for this structure. s_reserved - * is excluded. - */ - __le16 s_flags; /* flags */ - __le32 s_crc_seed; /* Seed value of CRC calculation */ -/*10*/ __le32 s_sum; /* Check sum of super block */ - - __le32 s_log_block_size; /* - * Block size represented as follows - * blocksize = - * 1 << (s_log_block_size + 10) - */ - __le64 s_nsegments; /* Number of segments in filesystem */ -/*20*/ __le64 s_dev_size; /* block device size in bytes */ - __le64 s_first_data_block; /* 1st seg disk block number */ -/*30*/ __le32 s_blocks_per_segment; /* number of blocks per full segment */ - __le32 s_r_segments_percentage; /* Reserved segments percentage */ - - __le64 s_last_cno; /* Last checkpoint number */ -/*40*/ __le64 s_last_pseg; /* disk block addr pseg written last */ - __le64 s_last_seq; /* seq. number of seg written last */ -/*50*/ __le64 s_free_blocks_count; /* Free blocks count */ - - __le64 s_ctime; /* - * Creation time (execution time of - * newfs) - */ -/*60*/ __le64 s_mtime; /* Mount time */ - __le64 s_wtime; /* Write time */ -/*70*/ __le16 s_mnt_count; /* Mount count */ - __le16 s_max_mnt_count; /* Maximal mount count */ - __le16 s_state; /* File system state */ - __le16 s_errors; /* Behaviour when detecting errors */ - __le64 s_lastcheck; /* time of last check */ - -/*80*/ __le32 s_checkinterval; /* max. time between checks */ - __le32 s_creator_os; /* OS */ - __le16 s_def_resuid; /* Default uid for reserved blocks */ - __le16 s_def_resgid; /* Default gid for reserved blocks */ - __le32 s_first_ino; /* First non-reserved inode */ - -/*90*/ __le16 s_inode_size; /* Size of an inode */ - __le16 s_dat_entry_size; /* Size of a dat entry */ - __le16 s_checkpoint_size; /* Size of a checkpoint */ - __le16 s_segment_usage_size; /* Size of a segment usage */ - -/*98*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */ -/*A8*/ char s_volume_name[80] /* volume name */ - __kernel_nonstring; - -/*F8*/ __le32 s_c_interval; /* Commit interval of segment */ - __le32 s_c_block_max; /* - * Threshold of data amount for - * the segment construction - */ -/*100*/ __le64 s_feature_compat; /* Compatible feature set */ - __le64 s_feature_compat_ro; /* Read-only compatible feature set */ - __le64 s_feature_incompat; /* Incompatible feature set */ - __u32 s_reserved[186]; /* padding to the end of the block */ +/*00*/ __le32 s_rev_level; + __le16 s_minor_rev_level; + __le16 s_magic; + + __le16 s_bytes; + __le16 s_flags; + __le32 s_crc_seed; +/*10*/ __le32 s_sum; + + __le32 s_log_block_size; + __le64 s_nsegments; +/*20*/ __le64 s_dev_size; + __le64 s_first_data_block; +/*30*/ __le32 s_blocks_per_segment; + __le32 s_r_segments_percentage; + + __le64 s_last_cno; +/*40*/ __le64 s_last_pseg; + __le64 s_last_seq; +/*50*/ __le64 s_free_blocks_count; + + __le64 s_ctime; +/*60*/ __le64 s_mtime; + __le64 s_wtime; +/*70*/ __le16 s_mnt_count; + __le16 s_max_mnt_count; + __le16 s_state; + __le16 s_errors; + __le64 s_lastcheck; + +/*80*/ __le32 s_checkinterval; + __le32 s_creator_os; + __le16 s_def_resuid; + __le16 s_def_resgid; + __le32 s_first_ino; + +/*90*/ __le16 s_inode_size; + __le16 s_dat_entry_size; + __le16 s_checkpoint_size; + __le16 s_segment_usage_size; + +/*98*/ __u8 s_uuid[16]; +/*A8*/ char s_volume_name[80] __kernel_nonstring; + +/*F8*/ __le32 s_c_interval; + __le32 s_c_block_max; + +/*100*/ __le64 s_feature_compat; + __le64 s_feature_compat_ro; + __le64 s_feature_incompat; + __u32 s_reserved[186]; }; /* @@ -449,7 +480,7 @@ struct nilfs_btree_node { /** * struct nilfs_direct_node - header of built-in bmap array * @dn_flags: flags - * @dn_pad: padding + * @pad: padding */ struct nilfs_direct_node { __u8 dn_flags; -- cgit v1.2.3 From 6fd8a09f48d6fee184207f4e15e939898a3947f9 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sat, 20 Dec 2025 03:04:27 +0900 Subject: nilfs2: fix missing struct keywords in nilfs2_api.h kernel-doc Eliminate the following kernel-doc warnings in nilfs2_api.h: Warning: include/uapi/linux/nilfs2_api.h:65 cannot understand function prototype: 'struct nilfs_suinfo' Warning: include/uapi/linux/nilfs2_api.h:101 cannot understand function prototype: 'struct nilfs_suinfo_update' This ensures that the documentation for nilfs_suinfo and nilfs_suinfo_update is correctly parsed and generated by adding the missing 'struct' keyword to their kernel-doc comments. Signed-off-by: Ryusuke Konishi Signed-off-by: Viacheslav Dubeyko --- include/uapi/linux/nilfs2_api.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/nilfs2_api.h b/include/uapi/linux/nilfs2_api.h index 8b9b89104f3d..d1b6fcde2fb8 100644 --- a/include/uapi/linux/nilfs2_api.h +++ b/include/uapi/linux/nilfs2_api.h @@ -58,7 +58,7 @@ NILFS_CPINFO_FNS(INVALID, invalid) NILFS_CPINFO_FNS(MINOR, minor) /** - * nilfs_suinfo - segment usage information + * struct nilfs_suinfo - segment usage information * @sui_lastmod: timestamp of last modification * @sui_nblocks: number of written blocks in segment * @sui_flags: segment usage flags @@ -93,7 +93,7 @@ static inline int nilfs_suinfo_clean(const struct nilfs_suinfo *si) } /** - * nilfs_suinfo_update - segment usage information update + * struct nilfs_suinfo_update - segment usage information update * @sup_segnum: segment number * @sup_flags: flags for which fields are active in sup_sui * @sup_reserved: reserved necessary for alignment -- cgit v1.2.3 From 98b9f207afa53aff2edb0e52910c4348b456b37d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 22 Dec 2025 09:04:13 +0100 Subject: dmaengine: idxd: uapi: use UAPI types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using libc types and headers from the UAPI headers is problematic as it introduces a dependency on a full C toolchain. Use the fixed-width integer types provided by the UAPI headers instead. Signed-off-by: Thomas Weißschuh Acked-by: Arnd Bergmann Link: https://patch.msgid.link/20251222-uapi-idxd-v1-1-baa183adb20d@linutronix.de Signed-off-by: Vinod Koul --- include/uapi/linux/idxd.h | 270 +++++++++++++++++++++++----------------------- 1 file changed, 133 insertions(+), 137 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 3d1987e1bb2d..fdcc8eefb925 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -3,11 +3,7 @@ #ifndef _USR_IDXD_H_ #define _USR_IDXD_H_ -#ifdef __KERNEL__ #include -#else -#include -#endif /* Driver command error status */ enum idxd_scmd_stat { @@ -176,132 +172,132 @@ enum iax_completion_status { #define DSA_COMP_STATUS(status) ((status) & DSA_COMP_STATUS_MASK) struct dsa_hw_desc { - uint32_t pasid:20; - uint32_t rsvd:11; - uint32_t priv:1; - uint32_t flags:24; - uint32_t opcode:8; - uint64_t completion_addr; + __u32 pasid:20; + __u32 rsvd:11; + __u32 priv:1; + __u32 flags:24; + __u32 opcode:8; + __u64 completion_addr; union { - uint64_t src_addr; - uint64_t rdback_addr; - uint64_t pattern; - uint64_t desc_list_addr; - uint64_t pattern_lower; - uint64_t transl_fetch_addr; + __u64 src_addr; + __u64 rdback_addr; + __u64 pattern; + __u64 desc_list_addr; + __u64 pattern_lower; + __u64 transl_fetch_addr; }; union { - uint64_t dst_addr; - uint64_t rdback_addr2; - uint64_t src2_addr; - uint64_t comp_pattern; + __u64 dst_addr; + __u64 rdback_addr2; + __u64 src2_addr; + __u64 comp_pattern; }; union { - uint32_t xfer_size; - uint32_t desc_count; - uint32_t region_size; + __u32 xfer_size; + __u32 desc_count; + __u32 region_size; }; - uint16_t int_handle; - uint16_t rsvd1; + __u16 int_handle; + __u16 rsvd1; union { - uint8_t expected_res; + __u8 expected_res; /* create delta record */ struct { - uint64_t delta_addr; - uint32_t max_delta_size; - uint32_t delt_rsvd; - uint8_t expected_res_mask; + __u64 delta_addr; + __u32 max_delta_size; + __u32 delt_rsvd; + __u8 expected_res_mask; }; - uint32_t delta_rec_size; - uint64_t dest2; + __u32 delta_rec_size; + __u64 dest2; /* CRC */ struct { - uint32_t crc_seed; - uint32_t crc_rsvd; - uint64_t seed_addr; + __u32 crc_seed; + __u32 crc_rsvd; + __u64 seed_addr; }; /* DIF check or strip */ struct { - uint8_t src_dif_flags; - uint8_t dif_chk_res; - uint8_t dif_chk_flags; - uint8_t dif_chk_res2[5]; - uint32_t chk_ref_tag_seed; - uint16_t chk_app_tag_mask; - uint16_t chk_app_tag_seed; + __u8 src_dif_flags; + __u8 dif_chk_res; + __u8 dif_chk_flags; + __u8 dif_chk_res2[5]; + __u32 chk_ref_tag_seed; + __u16 chk_app_tag_mask; + __u16 chk_app_tag_seed; }; /* DIF insert */ struct { - uint8_t dif_ins_res; - uint8_t dest_dif_flag; - uint8_t dif_ins_flags; - uint8_t dif_ins_res2[13]; - uint32_t ins_ref_tag_seed; - uint16_t ins_app_tag_mask; - uint16_t ins_app_tag_seed; + __u8 dif_ins_res; + __u8 dest_dif_flag; + __u8 dif_ins_flags; + __u8 dif_ins_res2[13]; + __u32 ins_ref_tag_seed; + __u16 ins_app_tag_mask; + __u16 ins_app_tag_seed; }; /* DIF update */ struct { - uint8_t src_upd_flags; - uint8_t upd_dest_flags; - uint8_t dif_upd_flags; - uint8_t dif_upd_res[5]; - uint32_t src_ref_tag_seed; - uint16_t src_app_tag_mask; - uint16_t src_app_tag_seed; - uint32_t dest_ref_tag_seed; - uint16_t dest_app_tag_mask; - uint16_t dest_app_tag_seed; + __u8 src_upd_flags; + __u8 upd_dest_flags; + __u8 dif_upd_flags; + __u8 dif_upd_res[5]; + __u32 src_ref_tag_seed; + __u16 src_app_tag_mask; + __u16 src_app_tag_seed; + __u32 dest_ref_tag_seed; + __u16 dest_app_tag_mask; + __u16 dest_app_tag_seed; }; /* Fill */ - uint64_t pattern_upper; + __u64 pattern_upper; /* Translation fetch */ struct { - uint64_t transl_fetch_res; - uint32_t region_stride; + __u64 transl_fetch_res; + __u32 region_stride; }; /* DIX generate */ struct { - uint8_t dix_gen_res; - uint8_t dest_dif_flags; - uint8_t dif_flags; - uint8_t dix_gen_res2[13]; - uint32_t ref_tag_seed; - uint16_t app_tag_mask; - uint16_t app_tag_seed; + __u8 dix_gen_res; + __u8 dest_dif_flags; + __u8 dif_flags; + __u8 dix_gen_res2[13]; + __u32 ref_tag_seed; + __u16 app_tag_mask; + __u16 app_tag_seed; }; - uint8_t op_specific[24]; + __u8 op_specific[24]; }; } __attribute__((packed)); struct iax_hw_desc { - uint32_t pasid:20; - uint32_t rsvd:11; - uint32_t priv:1; - uint32_t flags:24; - uint32_t opcode:8; - uint64_t completion_addr; - uint64_t src1_addr; - uint64_t dst_addr; - uint32_t src1_size; - uint16_t int_handle; + __u32 pasid:20; + __u32 rsvd:11; + __u32 priv:1; + __u32 flags:24; + __u32 opcode:8; + __u64 completion_addr; + __u64 src1_addr; + __u64 dst_addr; + __u32 src1_size; + __u16 int_handle; union { - uint16_t compr_flags; - uint16_t decompr_flags; + __u16 compr_flags; + __u16 decompr_flags; }; - uint64_t src2_addr; - uint32_t max_dst_size; - uint32_t src2_size; - uint32_t filter_flags; - uint32_t num_inputs; + __u64 src2_addr; + __u32 max_dst_size; + __u32 src2_size; + __u32 filter_flags; + __u32 num_inputs; } __attribute__((packed)); struct dsa_raw_desc { - uint64_t field[8]; + __u64 field[8]; } __attribute__((packed)); /* @@ -309,91 +305,91 @@ struct dsa_raw_desc { * volatile and prevent the compiler from optimize the read. */ struct dsa_completion_record { - volatile uint8_t status; + volatile __u8 status; union { - uint8_t result; - uint8_t dif_status; + __u8 result; + __u8 dif_status; }; - uint8_t fault_info; - uint8_t rsvd; + __u8 fault_info; + __u8 rsvd; union { - uint32_t bytes_completed; - uint32_t descs_completed; + __u32 bytes_completed; + __u32 descs_completed; }; - uint64_t fault_addr; + __u64 fault_addr; union { /* common record */ struct { - uint32_t invalid_flags:24; - uint32_t rsvd2:8; + __u32 invalid_flags:24; + __u32 rsvd2:8; }; - uint32_t delta_rec_size; - uint64_t crc_val; + __u32 delta_rec_size; + __u64 crc_val; /* DIF check & strip */ struct { - uint32_t dif_chk_ref_tag; - uint16_t dif_chk_app_tag_mask; - uint16_t dif_chk_app_tag; + __u32 dif_chk_ref_tag; + __u16 dif_chk_app_tag_mask; + __u16 dif_chk_app_tag; }; /* DIF insert */ struct { - uint64_t dif_ins_res; - uint32_t dif_ins_ref_tag; - uint16_t dif_ins_app_tag_mask; - uint16_t dif_ins_app_tag; + __u64 dif_ins_res; + __u32 dif_ins_ref_tag; + __u16 dif_ins_app_tag_mask; + __u16 dif_ins_app_tag; }; /* DIF update */ struct { - uint32_t dif_upd_src_ref_tag; - uint16_t dif_upd_src_app_tag_mask; - uint16_t dif_upd_src_app_tag; - uint32_t dif_upd_dest_ref_tag; - uint16_t dif_upd_dest_app_tag_mask; - uint16_t dif_upd_dest_app_tag; + __u32 dif_upd_src_ref_tag; + __u16 dif_upd_src_app_tag_mask; + __u16 dif_upd_src_app_tag; + __u32 dif_upd_dest_ref_tag; + __u16 dif_upd_dest_app_tag_mask; + __u16 dif_upd_dest_app_tag; }; /* DIX generate */ struct { - uint64_t dix_gen_res; - uint32_t dix_ref_tag; - uint16_t dix_app_tag_mask; - uint16_t dix_app_tag; + __u64 dix_gen_res; + __u32 dix_ref_tag; + __u16 dix_app_tag_mask; + __u16 dix_app_tag; }; - uint8_t op_specific[16]; + __u8 op_specific[16]; }; } __attribute__((packed)); struct dsa_raw_completion_record { - uint64_t field[4]; + __u64 field[4]; } __attribute__((packed)); struct iax_completion_record { - volatile uint8_t status; - uint8_t error_code; - uint8_t fault_info; - uint8_t rsvd; - uint32_t bytes_completed; - uint64_t fault_addr; - uint32_t invalid_flags; - uint32_t rsvd2; - uint32_t output_size; - uint8_t output_bits; - uint8_t rsvd3; - uint16_t xor_csum; - uint32_t crc; - uint32_t min; - uint32_t max; - uint32_t sum; - uint64_t rsvd4[2]; + volatile __u8 status; + __u8 error_code; + __u8 fault_info; + __u8 rsvd; + __u32 bytes_completed; + __u64 fault_addr; + __u32 invalid_flags; + __u32 rsvd2; + __u32 output_size; + __u8 output_bits; + __u8 rsvd3; + __u16 xor_csum; + __u32 crc; + __u32 min; + __u32 max; + __u32 sum; + __u64 rsvd4[2]; } __attribute__((packed)); struct iax_raw_completion_record { - uint64_t field[8]; + __u64 field[8]; } __attribute__((packed)); #endif -- cgit v1.2.3 From dff547e137be2f36c6c4d77172a03a54a38230d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Fri, 19 Dec 2025 12:33:11 +0100 Subject: drm/xe/uapi: Extend the madvise functionality to support foreign pagemap placement for svm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use device file descriptors and regions to represent pagemaps on foreign or local devices. The underlying files are type-checked at madvise time, and references are kept on the drm_pagemap as long as there is are madvises pointing to it. Extend the madvise preferred_location UAPI to support the region instance to identify the foreign placement. v2: - Improve UAPI documentation. (Matt Brost) - Sanitize preferred_mem_loc.region_instance madvise. (Matt Brost) - Clarify madvise drm_pagemap vs xe_pagemap refcounting. (Matt Brost) - Don't allow a foreign drm_pagemap madvise without a fast interconnect. v3: - Add a comment about reference-counting in xe_devmem_open() and remove the reference-count get-and-put. (Matt Brost) Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20251219113320.183860-16-thomas.hellstrom@linux.intel.com --- include/uapi/drm/xe_drm.h | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 726e481574fe..bb69f9b30c7d 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -2123,7 +2123,13 @@ struct drm_xe_madvise { struct { #define DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE 0 #define DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM -1 - /** @preferred_mem_loc.devmem_fd: fd for preferred loc */ + /** + * @preferred_mem_loc.devmem_fd: + * Device file-descriptor of the device where the + * preferred memory is located, or one of the + * above special values. Please also see + * @preferred_mem_loc.region_instance below. + */ __u32 devmem_fd; #define DRM_XE_MIGRATE_ALL_PAGES 0 @@ -2131,8 +2137,14 @@ struct drm_xe_madvise { /** @preferred_mem_loc.migration_policy: Page migration policy */ __u16 migration_policy; - /** @preferred_mem_loc.pad : MBZ */ - __u16 pad; + /** + * @preferred_mem_loc.region_instance : Region instance. + * MBZ if @devmem_fd <= &DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE. + * Otherwise should point to the desired device + * VRAM instance of the device indicated by + * @preferred_mem_loc.devmem_fd. + */ + __u16 region_instance; /** @preferred_mem_loc.reserved : Reserved */ __u64 reserved; -- cgit v1.2.3 From 9e541b3cee70a3bbe86b176c903c23b29fe033cd Mon Sep 17 00:00:00 2001 From: Shuai Xue Date: Wed, 10 Dec 2025 21:29:05 +0800 Subject: PCI: trace: Add generic RAS tracepoint for hotplug event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hotplug events are critical indicators for analyzing hardware health, and surprise link downs can significantly impact system performance and reliability. Define a new TRACING_SYSTEM named "pci", add a generic RAS tracepoint for hotplug event to help health checks. Add enum pci_hotplug_event in include/uapi/linux/pci.h so applications like rasdaemon can register tracepoint event handlers for it. The following output is generated when a device is hotplugged: $ echo 1 > /sys/kernel/debug/tracing/events/pci/pci_hp_event/enable $ cat /sys/kernel/debug/tracing/trace_pipe irq/51-pciehp-88 [001] ..... 1311.177459: pci_hp_event: 0000:00:02.0 slot:10, event:CARD_PRESENT irq/51-pciehp-88 [001] ..... 1311.177566: pci_hp_event: 0000:00:02.0 slot:10, event:LINK_UP Suggested-by: Lukas Wunner Signed-off-by: Shuai Xue Signed-off-by: Bjorn Helgaas Reviewed-by: Lukas Wunner Reviewed-by: Jonathan Cameron Reviewed-by: Steven Rostedt (Google) # for trace event Reviewed-by: Ilpo Järvinen Link: https://patch.msgid.link/20251210132907.58799-2-xueshuai@linux.alibaba.com --- include/uapi/linux/pci.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/pci.h b/include/uapi/linux/pci.h index a769eefc5139..4f150028965d 100644 --- a/include/uapi/linux/pci.h +++ b/include/uapi/linux/pci.h @@ -39,4 +39,11 @@ #define PCIIOC_MMAP_IS_MEM (PCIIOC_BASE | 0x02) /* Set mmap state to MEM space. */ #define PCIIOC_WRITE_COMBINE (PCIIOC_BASE | 0x03) /* Enable/disable write-combining. */ +enum pci_hotplug_event { + PCI_HOTPLUG_LINK_UP, + PCI_HOTPLUG_LINK_DOWN, + PCI_HOTPLUG_CARD_PRESENT, + PCI_HOTPLUG_CARD_NOT_PRESENT, +}; + #endif /* _UAPILINUX_PCI_H */ -- cgit v1.2.3 From 3c4629b68dbe18e454cce4b864c530268cffbeed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 22 Dec 2025 09:00:33 +0100 Subject: virtio: uapi: avoid usage of libc types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using libc types and headers from the UAPI headers is problematic as it introduces a dependency on a full C toolchain. On Linux 'unsigned long' works as a replacement for 'uintptr_t' and does not depend on libc. Signed-off-by: Thomas Weißschuh Acked-by: Arnd Bergmann Signed-off-by: Michael S. Tsirkin Message-Id: <20251222-uapi-virtio-v1-1-29390f87bcad@linutronix.de> --- include/uapi/linux/virtio_ring.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/virtio_ring.h b/include/uapi/linux/virtio_ring.h index f8c20d3de8da..3c478582a3c2 100644 --- a/include/uapi/linux/virtio_ring.h +++ b/include/uapi/linux/virtio_ring.h @@ -31,9 +31,6 @@ * SUCH DAMAGE. * * Copyright Rusty Russell IBM Corporation 2007. */ -#ifndef __KERNEL__ -#include -#endif #include #include @@ -202,7 +199,7 @@ static inline void vring_init(struct vring *vr, unsigned int num, void *p, vr->num = num; vr->desc = p; vr->avail = (struct vring_avail *)((char *)p + num * sizeof(struct vring_desc)); - vr->used = (void *)(((uintptr_t)&vr->avail->ring[num] + sizeof(__virtio16) + vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + sizeof(__virtio16) + align-1) & ~(align - 1)); } -- cgit v1.2.3 From 40fc797ba18328e57ed1cb213b4b5e48f86f4c7c Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Mon, 15 Dec 2025 18:17:09 +0000 Subject: binder: fix trivial typo in uapi header As reported by codespell: include/uapi/linux/android/binder.h:281: interupted ==> interrupted Signed-off-by: Carlos Llamas Reviewed-by: Alice Ryhl Link: https://patch.msgid.link/20251215181724.3811977-1-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/android/binder.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h index 03ee4c7010d7..701cad36de43 100644 --- a/include/uapi/linux/android/binder.h +++ b/include/uapi/linux/android/binder.h @@ -278,7 +278,7 @@ enum { * NOTE: Two special error codes you should check for when calling * in to the driver are: * - * EINTR -- The operation has been interupted. This should be + * EINTR -- The operation has been interrupted. This should be * handled by retrying the ioctl() until a different error code * is returned. * -- cgit v1.2.3 From 51a07ce2fefd061edf4ba552a741c85f07b3e6dd Mon Sep 17 00:00:00 2001 From: Or Har-Toov Date: Thu, 18 Dec 2025 17:58:46 +0200 Subject: IB/core: Add query_port_speed verb Add new ibv_query_port_speed() verb to enable applications to query the effective bandwidth of a port. This verb is particularly useful when the speed is not a multiplication of IB speed and width where width is 2^n. Signed-off-by: Or Har-Toov Reviewed-by: Mark Bloch Signed-off-by: Edward Srouji Signed-off-by: Leon Romanovsky --- include/uapi/rdma/ib_user_ioctl_cmds.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h index de6f5a94f1e3..35da4026f452 100644 --- a/include/uapi/rdma/ib_user_ioctl_cmds.h +++ b/include/uapi/rdma/ib_user_ioctl_cmds.h @@ -73,6 +73,7 @@ enum uverbs_methods_device { UVERBS_METHOD_QUERY_CONTEXT, UVERBS_METHOD_QUERY_GID_TABLE, UVERBS_METHOD_QUERY_GID_ENTRY, + UVERBS_METHOD_QUERY_PORT_SPEED, }; enum uverbs_attrs_invoke_write_cmd_attr_ids { @@ -86,6 +87,11 @@ enum uverbs_attrs_query_port_cmd_attr_ids { UVERBS_ATTR_QUERY_PORT_RESP, }; +enum uverbs_attrs_query_port_speed_cmd_attr_ids { + UVERBS_ATTR_QUERY_PORT_SPEED_PORT_NUM, + UVERBS_ATTR_QUERY_PORT_SPEED_RESP, +}; + enum uverbs_attrs_get_context_attr_ids { UVERBS_ATTR_GET_CONTEXT_NUM_COMP_VECTORS, UVERBS_ATTR_GET_CONTEXT_CORE_SUPPORT, -- cgit v1.2.3 From 44b69cf1d35cad4a846208e769b34a648fd637bb Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 10 Oct 2025 16:44:58 -0400 Subject: drm/amdgpu: Update AMDGPU_INFO_UQ_FW_AREAS query for compute Add a query for compute queues. Userspace can use this to query the size of the EOP buffers for compute user queues. Proposed userspace: https://gitlab.freedesktop.org/yogeshmohan/mesa/-/commits/userq_query Reviewed-by: Prike Liang Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 351c2fb2df90..138d9ae1aa48 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1630,9 +1630,17 @@ struct drm_amdgpu_info_uq_metadata_gfx { __u32 csa_alignment; }; +struct drm_amdgpu_info_uq_metadata_compute { + /* EOP size for gfx11 */ + __u32 eop_size; + /* EOP base virtual alignment for gfx11 */ + __u32 eop_alignment; +}; + struct drm_amdgpu_info_uq_metadata { union { struct drm_amdgpu_info_uq_metadata_gfx gfx; + struct drm_amdgpu_info_uq_metadata_compute compute; }; }; -- cgit v1.2.3 From 0030595c3e8b48b32a12b8354ce9dbe00efd632f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 10 Oct 2025 16:47:02 -0400 Subject: drm/amdgpu: Update AMDGPU_INFO_UQ_FW_AREAS query for sdma Add a query for sdma queues. Userspace can use this to query the size of the CSA buffers for sdma user queues. Proposed userspace: https://gitlab.freedesktop.org/yogeshmohan/mesa/-/commits/userq_query Reviewed-by: Prike Liang Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 138d9ae1aa48..f902add31fc6 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1637,10 +1637,18 @@ struct drm_amdgpu_info_uq_metadata_compute { __u32 eop_alignment; }; +struct drm_amdgpu_info_uq_metadata_sdma { + /* context save area size for sdma6 */ + __u32 csa_size; + /* context save area base virtual alignment for sdma6 */ + __u32 csa_alignment; +}; + struct drm_amdgpu_info_uq_metadata { union { struct drm_amdgpu_info_uq_metadata_gfx gfx; struct drm_amdgpu_info_uq_metadata_compute compute; + struct drm_amdgpu_info_uq_metadata_sdma sdma; }; }; -- cgit v1.2.3 From c51bb53d5c68041dd02f66d9b638cda33647623e Mon Sep 17 00:00:00 2001 From: David Yat Sin Date: Tue, 18 Mar 2025 19:49:55 +0000 Subject: drm/amdkfd: Add metadata ring buffer for compute Add support for separate ring-buffer for metadata packets when using compute queues. Userspace application allocate the metadata ring-buffer and the queue ring-buffer with a single allocation. The metadata ring-buffer starts after the queue ring-buffer. Signed-off-by: David Yat Sin Reviewed-by: Philip Yang Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 6e91875c10ba..047bcb1cc078 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -47,9 +47,10 @@ * - 1.19 - Add a new ioctl to craete secondary kfd processes * - 1.20 - Trap handler support for expert scheduling mode available * - 1.21 - Debugger support to subscribe to LDS out-of-address exceptions + * - 1.22 - Add queue creation with metadata ring base address */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 21 +#define KFD_IOCTL_MINOR_VERSION 22 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ @@ -87,7 +88,7 @@ struct kfd_ioctl_create_queue_args { __u32 ctx_save_restore_size; /* to KFD */ __u32 ctl_stack_size; /* to KFD */ __u32 sdma_engine_id; /* to KFD */ - __u32 pad; + __u32 metadata_ring_size; /* to KFD */ }; struct kfd_ioctl_destroy_queue_args { -- cgit v1.2.3 From 22cd0db47f4f65ebe8afc8c34ab120c47c73da2a Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Mon, 15 Dec 2025 13:08:12 +0100 Subject: media: uapi: mali-c55-config: Remove version identifier The Mali C55 driver uses the v4l2-isp framework, which defines its own versioning number which does not need to be defined again in each platform-specific header. Remove the definition of mali_c55_param_buffer_version enumeration from the Mali C55 uAPI header. Signed-off-by: Jacopo Mondi Reviewed-by: Daniel Scally Signed-off-by: Hans Verkuil --- include/uapi/linux/media/arm/mali-c55-config.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/media/arm/mali-c55-config.h b/include/uapi/linux/media/arm/mali-c55-config.h index 109082c5694f..3d335f950eeb 100644 --- a/include/uapi/linux/media/arm/mali-c55-config.h +++ b/include/uapi/linux/media/arm/mali-c55-config.h @@ -194,15 +194,6 @@ struct mali_c55_stats_buffer { __u32 reserved3[15]; } __attribute__((packed)); -/** - * enum mali_c55_param_buffer_version - Mali-C55 parameters block versioning - * - * @MALI_C55_PARAM_BUFFER_V1: First version of Mali-C55 parameters block - */ -enum mali_c55_param_buffer_version { - MALI_C55_PARAM_BUFFER_V1, -}; - /** * enum mali_c55_param_block_type - Enumeration of Mali-C55 parameter blocks * -- cgit v1.2.3 From caaed1dda7df9b4e21d439bb5e7750d4af4f1e78 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Tue, 6 Jan 2026 11:10:50 -0800 Subject: Revert "drm/xe/multi_queue: Support active group after primary is destroyed" This reverts commit 3131a43ecb346ae3b5287ee195779fc38c6fcd11. There is no must have requirement for this feature from Compute UMD. Signed-off-by: Niranjana Vishwanathapura Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20260106191051.2866538-5-niranjana.vishwanathapura@intel.com --- include/uapi/drm/xe_drm.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index bb69f9b30c7d..077e66a682e2 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1280,9 +1280,6 @@ struct drm_xe_vm_bind { * then a new multi-queue group is created with this queue as the primary queue * (Q0). Otherwise, the queue gets added to the multi-queue group whose primary * queue's exec_queue_id is specified in the lower 32 bits of the 'value' field. - * If the extension's 'value' field has %DRM_XE_MULTI_GROUP_KEEP_ACTIVE flag - * set, then the multi-queue group is kept active after the primary queue is - * destroyed. * All the other non-relevant bits of extension's 'value' field while adding the * primary or the secondary queues of the group must be set to 0. * - %DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY - Set the queue @@ -1331,7 +1328,6 @@ struct drm_xe_exec_queue_create { #define DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE 3 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP 4 #define DRM_XE_MULTI_GROUP_CREATE (1ull << 63) -#define DRM_XE_MULTI_GROUP_KEEP_ACTIVE (1ull << 62) #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY 5 /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; -- cgit v1.2.3 From 2b421662c7887a0649fe409155a1f101562d0fa9 Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Wed, 7 Jan 2026 10:20:16 +0800 Subject: bpf: Introduce BPF_F_CPU and BPF_F_ALL_CPUS flags Introduce BPF_F_CPU and BPF_F_ALL_CPUS flags and check them for following APIs: * 'map_lookup_elem()' * 'map_update_elem()' * 'generic_map_lookup_batch()' * 'generic_map_update_batch()' And, get the correct value size for these APIs. Acked-by: Andrii Nakryiko Signed-off-by: Leon Hwang Link: https://lore.kernel.org/r/20260107022022.12843-2-leon.hwang@linux.dev Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 84ced3ed2d21..2a2ade4be60f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1384,6 +1384,8 @@ enum { BPF_NOEXIST = 1, /* create new element if it didn't exist */ BPF_EXIST = 2, /* update existing element */ BPF_F_LOCK = 4, /* spin_lock-ed map_lookup/map_update */ + BPF_F_CPU = 8, /* cpu flag for percpu maps, upper 32-bit of flags is a cpu number */ + BPF_F_ALL_CPUS = 16, /* update value across all CPUs for percpu maps */ }; /* flags for BPF_MAP_CREATE command */ -- cgit v1.2.3 From caa07a815d6ee32586beb66f67e7e3c103a02efd Mon Sep 17 00:00:00 2001 From: Changwoo Min Date: Thu, 8 Jan 2026 14:32:10 +0900 Subject: PM: EM: Rename em.yaml to dev-energymodel.yaml MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The EM YNL specification used many acronyms, including ‘em’, ‘pd’, ‘ps’, etc. While the acronyms are short and convenient, they could be confusing. So, let’s spell them out to be more specific. The following changes were made in the spec. Note that the protocol name cannot exceed GENL_NAMSIZ (16). em -> dev-energymodel pds -> perf-domains pd -> perf-domain pd-id -> perf-domain-id pd-table -> perf-table ps -> perf-state get-pds -> get-perf-domains get-pd-table -> get-perf-table pd-created -> perf-domain-created pd-updated -> perf-domain-updated pd-deleted -> perf-domain-deleted In addition. doc strings were added to the spec. based on the comments in energy_model.h. Two flag attributes (perf-state-flags and perf-domain-flags) were added for easily interpreting the bit flags. Finally, the autogenerated files and em_netlink.c were updated accordingly to reflect the name changes. Suggested-by: Donald Hunter Reviewed-by: Lukasz Luba Reviewed-by: Donald Hunter Signed-off-by: Changwoo Min Link: https://patch.msgid.link/20260108053212.642478-3-changwoo@igalia.com Signed-off-by: Rafael J. Wysocki --- include/uapi/linux/dev_energymodel.h | 89 ++++++++++++++++++++++++++++++++++++ include/uapi/linux/energy_model.h | 63 ------------------------- 2 files changed, 89 insertions(+), 63 deletions(-) create mode 100644 include/uapi/linux/dev_energymodel.h delete mode 100644 include/uapi/linux/energy_model.h (limited to 'include/uapi') diff --git a/include/uapi/linux/dev_energymodel.h b/include/uapi/linux/dev_energymodel.h new file mode 100644 index 000000000000..3399967e1f93 --- /dev/null +++ b/include/uapi/linux/dev_energymodel.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/dev-energymodel.yaml */ +/* YNL-GEN uapi header */ +/* To regenerate run: tools/net/ynl/ynl-regen.sh */ + +#ifndef _UAPI_LINUX_DEV_ENERGYMODEL_H +#define _UAPI_LINUX_DEV_ENERGYMODEL_H + +#define DEV_ENERGYMODEL_FAMILY_NAME "dev-energymodel" +#define DEV_ENERGYMODEL_FAMILY_VERSION 1 + +/** + * enum dev_energymodel_perf_state_flags + * @DEV_ENERGYMODEL_PERF_STATE_FLAGS_PERF_STATE_INEFFICIENT: The performance + * state is inefficient. There is in this perf-domain, another performance + * state with a higher frequency but a lower or equal power cost. + */ +enum dev_energymodel_perf_state_flags { + DEV_ENERGYMODEL_PERF_STATE_FLAGS_PERF_STATE_INEFFICIENT = 1, +}; + +/** + * enum dev_energymodel_perf_domain_flags + * @DEV_ENERGYMODEL_PERF_DOMAIN_FLAGS_PERF_DOMAIN_MICROWATTS: The power values + * are in micro-Watts or some other scale. + * @DEV_ENERGYMODEL_PERF_DOMAIN_FLAGS_PERF_DOMAIN_SKIP_INEFFICIENCIES: Skip + * inefficient states when estimating energy consumption. + * @DEV_ENERGYMODEL_PERF_DOMAIN_FLAGS_PERF_DOMAIN_ARTIFICIAL: The power values + * are artificial and might be created by platform missing real power + * information. + */ +enum dev_energymodel_perf_domain_flags { + DEV_ENERGYMODEL_PERF_DOMAIN_FLAGS_PERF_DOMAIN_MICROWATTS = 1, + DEV_ENERGYMODEL_PERF_DOMAIN_FLAGS_PERF_DOMAIN_SKIP_INEFFICIENCIES = 2, + DEV_ENERGYMODEL_PERF_DOMAIN_FLAGS_PERF_DOMAIN_ARTIFICIAL = 4, +}; + +enum { + DEV_ENERGYMODEL_A_PERF_DOMAINS_PERF_DOMAIN = 1, + + __DEV_ENERGYMODEL_A_PERF_DOMAINS_MAX, + DEV_ENERGYMODEL_A_PERF_DOMAINS_MAX = (__DEV_ENERGYMODEL_A_PERF_DOMAINS_MAX - 1) +}; + +enum { + DEV_ENERGYMODEL_A_PERF_DOMAIN_PAD = 1, + DEV_ENERGYMODEL_A_PERF_DOMAIN_PERF_DOMAIN_ID, + DEV_ENERGYMODEL_A_PERF_DOMAIN_FLAGS, + DEV_ENERGYMODEL_A_PERF_DOMAIN_CPUS, + + __DEV_ENERGYMODEL_A_PERF_DOMAIN_MAX, + DEV_ENERGYMODEL_A_PERF_DOMAIN_MAX = (__DEV_ENERGYMODEL_A_PERF_DOMAIN_MAX - 1) +}; + +enum { + DEV_ENERGYMODEL_A_PERF_TABLE_PERF_DOMAIN_ID = 1, + DEV_ENERGYMODEL_A_PERF_TABLE_PERF_STATE, + + __DEV_ENERGYMODEL_A_PERF_TABLE_MAX, + DEV_ENERGYMODEL_A_PERF_TABLE_MAX = (__DEV_ENERGYMODEL_A_PERF_TABLE_MAX - 1) +}; + +enum { + DEV_ENERGYMODEL_A_PERF_STATE_PAD = 1, + DEV_ENERGYMODEL_A_PERF_STATE_PERFORMANCE, + DEV_ENERGYMODEL_A_PERF_STATE_FREQUENCY, + DEV_ENERGYMODEL_A_PERF_STATE_POWER, + DEV_ENERGYMODEL_A_PERF_STATE_COST, + DEV_ENERGYMODEL_A_PERF_STATE_FLAGS, + + __DEV_ENERGYMODEL_A_PERF_STATE_MAX, + DEV_ENERGYMODEL_A_PERF_STATE_MAX = (__DEV_ENERGYMODEL_A_PERF_STATE_MAX - 1) +}; + +enum { + DEV_ENERGYMODEL_CMD_GET_PERF_DOMAINS = 1, + DEV_ENERGYMODEL_CMD_GET_PERF_TABLE, + DEV_ENERGYMODEL_CMD_PERF_DOMAIN_CREATED, + DEV_ENERGYMODEL_CMD_PERF_DOMAIN_UPDATED, + DEV_ENERGYMODEL_CMD_PERF_DOMAIN_DELETED, + + __DEV_ENERGYMODEL_CMD_MAX, + DEV_ENERGYMODEL_CMD_MAX = (__DEV_ENERGYMODEL_CMD_MAX - 1) +}; + +#define DEV_ENERGYMODEL_MCGRP_EVENT "event" + +#endif /* _UAPI_LINUX_DEV_ENERGYMODEL_H */ diff --git a/include/uapi/linux/energy_model.h b/include/uapi/linux/energy_model.h deleted file mode 100644 index 0bcad967854f..000000000000 --- a/include/uapi/linux/energy_model.h +++ /dev/null @@ -1,63 +0,0 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ -/* Do not edit directly, auto-generated from: */ -/* Documentation/netlink/specs/em.yaml */ -/* YNL-GEN uapi header */ -/* To regenerate run: tools/net/ynl/ynl-regen.sh */ - -#ifndef _UAPI_LINUX_ENERGY_MODEL_H -#define _UAPI_LINUX_ENERGY_MODEL_H - -#define EM_FAMILY_NAME "em" -#define EM_FAMILY_VERSION 1 - -enum { - EM_A_PDS_PD = 1, - - __EM_A_PDS_MAX, - EM_A_PDS_MAX = (__EM_A_PDS_MAX - 1) -}; - -enum { - EM_A_PD_PAD = 1, - EM_A_PD_PD_ID, - EM_A_PD_FLAGS, - EM_A_PD_CPUS, - - __EM_A_PD_MAX, - EM_A_PD_MAX = (__EM_A_PD_MAX - 1) -}; - -enum { - EM_A_PD_TABLE_PD_ID = 1, - EM_A_PD_TABLE_PS, - - __EM_A_PD_TABLE_MAX, - EM_A_PD_TABLE_MAX = (__EM_A_PD_TABLE_MAX - 1) -}; - -enum { - EM_A_PS_PAD = 1, - EM_A_PS_PERFORMANCE, - EM_A_PS_FREQUENCY, - EM_A_PS_POWER, - EM_A_PS_COST, - EM_A_PS_FLAGS, - - __EM_A_PS_MAX, - EM_A_PS_MAX = (__EM_A_PS_MAX - 1) -}; - -enum { - EM_CMD_GET_PDS = 1, - EM_CMD_GET_PD_TABLE, - EM_CMD_PD_CREATED, - EM_CMD_PD_UPDATED, - EM_CMD_PD_DELETED, - - __EM_CMD_MAX, - EM_CMD_MAX = (__EM_CMD_MAX - 1) -}; - -#define EM_MCGRP_EVENT "event" - -#endif /* _UAPI_LINUX_ENERGY_MODEL_H */ -- cgit v1.2.3 From 380ff27af25e49e2cb2ff8fd0ecd7c95be2976ee Mon Sep 17 00:00:00 2001 From: Changwoo Min Date: Thu, 8 Jan 2026 14:32:12 +0900 Subject: PM: EM: Add dump to get-perf-domains in the EM YNL spec Add dump to get-perf-domains, so that a user can fetch either information about a specific performance domain with do or information about all performance domains with dump. Share the reply format of do and dump using perf-domain-attrs, so remove perf-domains. The YNL spec, autogenerated files, and the do implementation are updated, and the dump implementation is added. Suggested-by: Donald Hunter Reviewed-by: Lukasz Luba Reviewed-by: Donald Hunter Signed-off-by: Changwoo Min Link: https://patch.msgid.link/20260108053212.642478-5-changwoo@igalia.com Signed-off-by: Rafael J. Wysocki --- include/uapi/linux/dev_energymodel.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/dev_energymodel.h b/include/uapi/linux/dev_energymodel.h index 3399967e1f93..355d8885c9a0 100644 --- a/include/uapi/linux/dev_energymodel.h +++ b/include/uapi/linux/dev_energymodel.h @@ -36,13 +36,6 @@ enum dev_energymodel_perf_domain_flags { DEV_ENERGYMODEL_PERF_DOMAIN_FLAGS_PERF_DOMAIN_ARTIFICIAL = 4, }; -enum { - DEV_ENERGYMODEL_A_PERF_DOMAINS_PERF_DOMAIN = 1, - - __DEV_ENERGYMODEL_A_PERF_DOMAINS_MAX, - DEV_ENERGYMODEL_A_PERF_DOMAINS_MAX = (__DEV_ENERGYMODEL_A_PERF_DOMAINS_MAX - 1) -}; - enum { DEV_ENERGYMODEL_A_PERF_DOMAIN_PAD = 1, DEV_ENERGYMODEL_A_PERF_DOMAIN_PERF_DOMAIN_ID, -- cgit v1.2.3 From c279e83953d937470f8a6e69b69f62608714f13f Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Mon, 15 Dec 2025 13:42:19 -0800 Subject: iommu: Introduce pci_dev_reset_iommu_prepare/done() PCIe permits a device to ignore ATS invalidation TLPs while processing a reset. This creates a problem visible to the OS where an ATS invalidation command will time out. E.g. an SVA domain will have no coordination with a reset event and can racily issue ATS invalidations to a resetting device. The OS should do something to mitigate this as we do not want production systems to be reporting critical ATS failures, especially in a hypervisor environment. Broadly, OS could arrange to ignore the timeouts, block page table mutations to prevent invalidations, or disable and block ATS. The PCIe r6.0, sec 10.3.1 IMPLEMENTATION NOTE recommends SW to disable and block ATS before initiating a Function Level Reset. It also mentions that other reset methods could have the same vulnerability as well. Provide a callback from the PCI subsystem that will enclose the reset and have the iommu core temporarily change all the attached RID/PASID domains group->blocking_domain so that the IOMMU hardware would fence any incoming ATS queries. And IOMMU drivers should also synchronously stop issuing new ATS invalidations and wait for all ATS invalidations to complete. This can avoid any ATS invaliation timeouts. However, if there is a domain attachment/replacement happening during an ongoing reset, ATS routines may be re-activated between the two function calls. So, introduce a new resetting_domain in the iommu_group structure to reject any concurrent attach_dev/set_dev_pasid call during a reset for a concern of compatibility failure. Since this changes the behavior of an attach operation, update the uAPI accordingly. Note that there are two corner cases: 1. Devices in the same iommu_group Since an attachment is always per iommu_group, this means that any sibling devices in the iommu_group cannot change domain, to prevent race conditions. 2. An SR-IOV PF that is being reset while its VF is not In such case, the VF itself is already broken. So, there is no point in preventing PF from going through the iommu reset. Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Tested-by: Dheeraj Kumar Srivastava Signed-off-by: Nicolin Chen Signed-off-by: Joerg Roedel --- include/uapi/linux/vfio.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index ac2329f24141..bb7b89330d35 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -964,6 +964,10 @@ struct vfio_device_bind_iommufd { * hwpt corresponding to the given pt_id. * * Return: 0 on success, -errno on failure. + * + * When a device is resetting, -EBUSY will be returned to reject any concurrent + * attachment to the resetting device itself or any sibling device in the IOMMU + * group having the resetting device. */ struct vfio_device_attach_iommufd_pt { __u32 argsz; -- cgit v1.2.3 From 38feb171b3f92d77e8061fafb5ddfffc2c13b672 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 22 Oct 2025 23:24:40 -0700 Subject: accel/rocket: rocket_accel.h: fix kernel-doc warnings Fix all kernel-doc warnings in rocket_accel.h: Warning: include/uapi/drm/rocket_accel.h:35 Incorrect use of kernel-doc format: * Output: DMA address for the BO in the NPU address space. This address and 22 warnings like these: Warning: include/uapi/drm/rocket_accel.h:43 struct member 'size' not described in 'drm_rocket_create_bo' Warning: include/uapi/drm/rocket_accel.h:60 struct member 'handle' not described in 'drm_rocket_prep_bo' Warning: include/uapi/drm/rocket_accel.h:73 struct member 'handle' not described in 'drm_rocket_fini_bo' Warning: include/uapi/drm/rocket_accel.h:86 struct member 'regcmd' not described in 'drm_rocket_task' Warning: include/uapi/drm/rocket_accel.h:116 struct member 'tasks' not described in 'drm_rocket_job' Warning: include/uapi/drm/rocket_accel.h:135 struct member 'jobs' not described in 'drm_rocket_submit' Signed-off-by: Randy Dunlap Reviewed-by: Tomeu Vizoso Signed-off-by: Tomeu Vizoso Link: https://patch.msgid.link/20251023062440.4093661-1-rdunlap@infradead.org --- include/uapi/drm/rocket_accel.h | 98 +++++++++++++++++++++++++++++++---------- 1 file changed, 74 insertions(+), 24 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/rocket_accel.h b/include/uapi/drm/rocket_accel.h index 14b2e12b7c49..d0685e372b79 100644 --- a/include/uapi/drm/rocket_accel.h +++ b/include/uapi/drm/rocket_accel.h @@ -26,20 +26,27 @@ extern "C" { * */ struct drm_rocket_create_bo { - /** Input: Size of the requested BO. */ + /** + * @size: Input: Size of the requested BO. + */ __u32 size; - /** Output: GEM handle for the BO. */ + /** + * @handle: Output: GEM handle for the BO. + */ __u32 handle; /** - * Output: DMA address for the BO in the NPU address space. This address - * is private to the DRM fd and is valid for the lifetime of the GEM - * handle. + * @dma_address: Output: DMA address for the BO in the NPU address + * space. This address is private to the DRM fd and is valid for + * the lifetime of the GEM handle. */ __u64 dma_address; - /** Output: Offset into the drm node to use for subsequent mmap call. */ + /** + * @offset: Output: Offset into the drm node to use for subsequent + * mmap call. + */ __u64 offset; }; @@ -50,13 +57,19 @@ struct drm_rocket_create_bo { * synchronization. */ struct drm_rocket_prep_bo { - /** Input: GEM handle of the buffer object. */ + /** + * @handle: Input: GEM handle of the buffer object. + */ __u32 handle; - /** Reserved, must be zero. */ + /** + * @reserved: Reserved, must be zero. + */ __u32 reserved; - /** Input: Amount of time to wait for NPU jobs. */ + /** + * @timeout_ns: Input: Amount of time to wait for NPU jobs. + */ __s64 timeout_ns; }; @@ -66,10 +79,14 @@ struct drm_rocket_prep_bo { * Synchronize caches for NPU access. */ struct drm_rocket_fini_bo { - /** Input: GEM handle of the buffer object. */ + /** + * @handle: Input: GEM handle of the buffer object. + */ __u32 handle; - /** Reserved, must be zero. */ + /** + * @reserved: Reserved, must be zero. + */ __u32 reserved; }; @@ -79,10 +96,15 @@ struct drm_rocket_fini_bo { * A task is the smallest unit of work that can be run on the NPU. */ struct drm_rocket_task { - /** Input: DMA address to NPU mapping of register command buffer */ + /** + * @regcmd: Input: DMA address to NPU mapping of register command buffer + */ __u32 regcmd; - /** Input: Number of commands in the register command buffer */ + /** + * @regcmd_count: Input: Number of commands in the register command + * buffer + */ __u32 regcmd_count; }; @@ -94,25 +116,44 @@ struct drm_rocket_task { * sequentially on the same core, to benefit from memory residency in SRAM. */ struct drm_rocket_job { - /** Input: Pointer to an array of struct drm_rocket_task. */ + /** + * @tasks: Input: Pointer to an array of struct drm_rocket_task. + */ __u64 tasks; - /** Input: Pointer to a u32 array of the BOs that are read by the job. */ + /** + * @in_bo_handles: Input: Pointer to a u32 array of the BOs that + * are read by the job. + */ __u64 in_bo_handles; - /** Input: Pointer to a u32 array of the BOs that are written to by the job. */ + /** + * @out_bo_handles: Input: Pointer to a u32 array of the BOs that + * are written to by the job. + */ __u64 out_bo_handles; - /** Input: Number of tasks passed in. */ + /** + * @task_count: Input: Number of tasks passed in. + */ __u32 task_count; - /** Input: Size in bytes of the structs in the @tasks field. */ + /** + * @task_struct_size: Input: Size in bytes of the structs in the + * @tasks field. + */ __u32 task_struct_size; - /** Input: Number of input BO handles passed in (size is that times 4). */ + /** + * @in_bo_handle_count: Input: Number of input BO handles passed in + * (size is that times 4). + */ __u32 in_bo_handle_count; - /** Input: Number of output BO handles passed in (size is that times 4). */ + /** + * @out_bo_handle_count: Input: Number of output BO handles passed in + * (size is that times 4). + */ __u32 out_bo_handle_count; }; @@ -122,16 +163,25 @@ struct drm_rocket_job { * The kernel will schedule the execution of these jobs in dependency order. */ struct drm_rocket_submit { - /** Input: Pointer to an array of struct drm_rocket_job. */ + /** + * @jobs: Input: Pointer to an array of struct drm_rocket_job. + */ __u64 jobs; - /** Input: Number of jobs passed in. */ + /** + * @job_count: Input: Number of jobs passed in. + */ __u32 job_count; - /** Input: Size in bytes of the structs in the @jobs field. */ + /** + * @job_struct_size: Input: Size in bytes of the structs in the + * @jobs field. + */ __u32 job_struct_size; - /** Reserved, must be zero. */ + /** + * @reserved: Reserved, must be zero. + */ __u64 reserved; }; -- cgit v1.2.3 From 96e97a562d067a6d867862db79864cc66aae99c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 2 Dec 2025 16:12:41 +0100 Subject: drm/amdgpu: Drop MMIO_REMAP domain bit and keep it Internal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "AMDGPU_GEM_DOMAIN_MMIO_REMAP" - Never activated as UAPI and it turned out that this was to inflexible. Allocate the MMIO_REMAP buffer object as a regular GEM BO and explicitly move it into the fixed AMDGPU_PL_MMIO_REMAP placement at the TTM level. This avoids relying on GEM domain bits for MMIO_REMAP, keeps the placement purely internal, and makes the lifetime and pinning of the global MMIO_REMAP BO explicit. The BO is pinned in TTM so it cannot be migrated or evicted. The corresponding free path relies on normal DRM teardown ordering, where no further user ioctls can access the global BO once TTM teardown begins. v2 (Srini): - Updated patch title. - Drop use of AMDGPU_GEM_DOMAIN_MMIO_REMAP in amdgpu_ttm.c. The MMIO_REMAP domain bit is removed from UAPI, so keep the MMIO_REMAP BO allocation domain-less (bp.domain = 0) and rely on the TTM placement (AMDGPU_PL_MMIO_REMAP) for backing/pinning. - Keep fdinfo/mem-stats visibility for MMIO_REMAP by classifying BOs based on bo->tbo.resource->mem_type == AMDGPU_PL_MMIO_REMAP, since the domain bit is removed. v3: Squash patches #1 & #3 Fixes: 056132483724 ("drm/amdgpu/uapi: Introduce AMDGPU_GEM_DOMAIN_MMIO_REMAP") Fixes: 2a7a794eb82c ("drm/amdgpu/ttm: Allocate/Free 4K MMIO_REMAP Singleton") Cc: Alex Deucher Cc: Christian König Cc: Leo Liu Cc: Ruijing Dong Cc: David (Ming Qiang) Wu Signed-off-by: Srinivasan Shanmugam Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index f902add31fc6..1d34daa0ebcd 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -105,8 +105,6 @@ extern "C" { * * %AMDGPU_GEM_DOMAIN_DOORBELL Doorbell. It is an MMIO region for * signalling user mode queues. - * - * %AMDGPU_GEM_DOMAIN_MMIO_REMAP MMIO remap page (special mapping for HDP flushing). */ #define AMDGPU_GEM_DOMAIN_CPU 0x1 #define AMDGPU_GEM_DOMAIN_GTT 0x2 @@ -115,15 +113,13 @@ extern "C" { #define AMDGPU_GEM_DOMAIN_GWS 0x10 #define AMDGPU_GEM_DOMAIN_OA 0x20 #define AMDGPU_GEM_DOMAIN_DOORBELL 0x40 -#define AMDGPU_GEM_DOMAIN_MMIO_REMAP 0x80 #define AMDGPU_GEM_DOMAIN_MASK (AMDGPU_GEM_DOMAIN_CPU | \ AMDGPU_GEM_DOMAIN_GTT | \ AMDGPU_GEM_DOMAIN_VRAM | \ AMDGPU_GEM_DOMAIN_GDS | \ AMDGPU_GEM_DOMAIN_GWS | \ AMDGPU_GEM_DOMAIN_OA | \ - AMDGPU_GEM_DOMAIN_DOORBELL | \ - AMDGPU_GEM_DOMAIN_MMIO_REMAP) + AMDGPU_GEM_DOMAIN_DOORBELL) /* Flag that CPU access will be required for the case of VRAM domain */ #define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED (1 << 0) -- cgit v1.2.3 From bc87b14594e30720a5c1546c24e0f5f08d34eb40 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 8 Jan 2026 10:35:21 -0800 Subject: bnxt_en: Implement ethtool_ops -> get_link_ext_state() Map the link_down_reason from the FW to the ethtool link_ext_state when it is available. Also log it to the link down dmesg when it is available. Add 2 new link_ext_state enums to the UAPI: ETHTOOL_LINK_EXT_STATE_OTP_SPEED_VIOLATION ETHTOOL_LINK_EXT_STATE_BMC_REQUEST_DOWN to cover OTP (one-time-programmable) speed restrictions and BMC (Baseboard management controller) forcing the link down. Reviewed-by: Andy Gospodarek Reviewed-by: Kalesh AP Reviewed-by: Pavan Chebbi Signed-off-by: Michael Chan Link: https://patch.msgid.link/20260108183521.215610-7-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index eb7ff2602fbb..5daa8f225b67 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -603,6 +603,8 @@ enum ethtool_link_ext_state { ETHTOOL_LINK_EXT_STATE_POWER_BUDGET_EXCEEDED, ETHTOOL_LINK_EXT_STATE_OVERHEAT, ETHTOOL_LINK_EXT_STATE_MODULE, + ETHTOOL_LINK_EXT_STATE_OTP_SPEED_VIOLATION, + ETHTOOL_LINK_EXT_STATE_BMC_REQUEST_DOWN, }; /* More information in addition to ETHTOOL_LINK_EXT_STATE_AUTONEG. */ -- cgit v1.2.3 From 309b23a1553acdc6b8534682ee1782c9598e0e2e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 6 Jan 2026 12:00:17 -0700 Subject: scsi: ufs: core: Improve the documentation of UFS data frames In source code comments, use terminology that comes from the JEDEC UFS standard. This makes it easier to compare the UFS driver code with the JEDEC UFS standard. Add static_assert() statements that verify the size of data structures defined in the UFS standard. Signed-off-by: Bart Van Assche Link: https://patch.msgid.link/20260106190017.2527978-1-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- include/uapi/scsi/scsi_bsg_ufs.h | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/scsi/scsi_bsg_ufs.h b/include/uapi/scsi/scsi_bsg_ufs.h index 8c29e498ef98..06f88d1b1876 100644 --- a/include/uapi/scsi/scsi_bsg_ufs.h +++ b/include/uapi/scsi/scsi_bsg_ufs.h @@ -94,16 +94,15 @@ struct utp_upiu_header { }; /** - * struct utp_upiu_query - upiu request buffer structure for - * query request. - * @opcode: command to perform B-0 - * @idn: a value that indicates the particular type of data B-1 - * @index: Index to further identify data B-2 - * @selector: Index to further identify data B-3 + * struct utp_upiu_query - QUERY REQUEST UPIU structure. + * @opcode: query function to perform B-0 + * @idn: descriptor or attribute identification number B-1 + * @index: Index that further identifies which data to access B-2 + * @selector: Index that further identifies which data to access B-3 * @reserved_osf: spec reserved field B-4,5 - * @length: number of descriptor bytes to read/write B-6,7 - * @value: Attribute value to be written DW-5 - * @reserved: spec reserved DW-6,7 + * @length: number of descriptor bytes to read or write B-6,7 + * @value: if @opcode == UPIU_QUERY_OPCODE_WRITE_ATTR, the value to be written B-6,7 + * @reserved: reserved for future use DW-6,7 */ struct utp_upiu_query { __u8 opcode; -- cgit v1.2.3 From 0432fe32c129780f89fd5426059cb1ddd8e50858 Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Mon, 12 Jan 2026 13:32:18 +0200 Subject: ASoC: sof: ipc4-topology: Add topology tokens domain_in stack & heap_bytes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add topology tokens for defining user-space domain_id, required stack and heap size byte for a component. The new topology tokens are SOF_TKN_COMP_DOMAIN_ID, SOF_TKN_COMP_HEAP_BYTES_REQUIREMENT and SOF_TKN_COMP_STACK_BYTES_REQUIREMENT for defining required stack and heap size for a component. Signed-off-by: Jyri Sarha Reviewed-by: Ranjani Sridharan Reviewed-by: Guennadi Liakhovetski Reviewed-by: Péter Ujfalusi Signed-off-by: Peter Ujfalusi Link: https://patch.msgid.link/20260112113221.4442-2-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- include/uapi/sound/sof/tokens.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/sound/sof/tokens.h b/include/uapi/sound/sof/tokens.h index 9ce72fbd6f11..5fa8ab5088e0 100644 --- a/include/uapi/sound/sof/tokens.h +++ b/include/uapi/sound/sof/tokens.h @@ -107,6 +107,9 @@ #define SOF_TKN_COMP_NO_WNAME_IN_KCONTROL_NAME 417 #define SOF_TKN_COMP_SCHED_DOMAIN 418 +#define SOF_TKN_COMP_DOMAIN_ID 419 +#define SOF_TKN_COMP_HEAP_BYTES_REQUIREMENT 420 +#define SOF_TKN_COMP_STACK_BYTES_REQUIREMENT 421 /* SSP */ #define SOF_TKN_INTEL_SSP_CLKS_CONTROL 500 -- cgit v1.2.3 From 576ee5dfd459abe8e29bee8b204cd259e60b4e18 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 12 Jan 2026 16:47:10 +0100 Subject: fs: add immutable rootfs Currently pivot_root() doesn't work on the real rootfs because it cannot be unmounted. Userspace has to do a recursive removal of the initramfs contents manually before continuing the boot. Really all we want from the real rootfs is to serve as the parent mount for anything that is actually useful such as the tmpfs or ramfs for initramfs unpacking or the rootfs itself. There's no need for the real rootfs to actually be anything meaningful or useful. Add a immutable rootfs called "nullfs" that can be selected via the "nullfs_rootfs" kernel command line option. The kernel will mount a tmpfs/ramfs on top of it, unpack the initramfs and fire up userspace which mounts the rootfs and can then just do: chdir(rootfs); pivot_root(".", "."); umount2(".", MNT_DETACH); and be done with it. (Ofc, userspace can also choose to retain the initramfs contents by using something like pivot_root(".", "/initramfs") without unmounting it.) Technically this also means that the rootfs mount in unprivileged namespaces doesn't need to become MNT_LOCKED anymore as it's guaranteed that the immutable rootfs remains permanently empty so there cannot be anything revealed by unmounting the covering mount. In the future this will also allow us to create completely empty mount namespaces without risking to leak anything. systemd already handles this all correctly as it tries to pivot_root() first and falls back to MS_MOVE only when that fails. This goes back to various discussion in previous years and a LPC 2024 presentation about this very topic. Link: https://patch.msgid.link/20260112-work-immutable-rootfs-v2-3-88dd1c34a204@kernel.org Signed-off-by: Christian Brauner --- include/uapi/linux/magic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index 638ca21b7a90..4f2da935a76c 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -104,5 +104,6 @@ #define SECRETMEM_MAGIC 0x5345434d /* "SECM" */ #define PID_FS_MAGIC 0x50494446 /* "PIDF" */ #define GUEST_MEMFD_MAGIC 0x474d454d /* "GMEM" */ +#define NULL_FS_MAGIC 0x4E554C4C /* "NULL" */ #endif /* __LINUX_MAGIC_H__ */ -- cgit v1.2.3 From 6abbb8703aeeb645a681ab6ad155e0b450413787 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnther=20Noack?= Date: Sun, 11 Jan 2026 18:52:04 +0100 Subject: landlock: Clarify documentation for the IOCTL access right MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the description of the LANDLOCK_ACCESS_FS_IOCTL_DEV access right together with the file access rights. This group of access rights applies to files (in this case device files), and they can be added to file or directory inodes using landlock_add_rule(2). The check for that works the same for all file access rights, including LANDLOCK_ACCESS_FS_IOCTL_DEV. Invoking ioctl(2) on directory FDs can not currently be restricted with Landlock. Having it grouped separately in the documentation is a remnant from earlier revisions of the LANDLOCK_ACCESS_FS_IOCTL_DEV patch set. Link: https://lore.kernel.org/all/20260108.Thaex5ruach2@digikod.net/ Signed-off-by: Günther Noack Link: https://lore.kernel.org/r/20260111175203.6545-2-gnoack3000@gmail.com Signed-off-by: Mickaël Salaün --- include/uapi/linux/landlock.h | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h index f030adc462ee..75fd7f5e6cc3 100644 --- a/include/uapi/linux/landlock.h +++ b/include/uapi/linux/landlock.h @@ -216,6 +216,23 @@ struct landlock_net_port_attr { * :manpage:`ftruncate(2)`, :manpage:`creat(2)`, or :manpage:`open(2)` with * ``O_TRUNC``. This access right is available since the third version of the * Landlock ABI. + * - %LANDLOCK_ACCESS_FS_IOCTL_DEV: Invoke :manpage:`ioctl(2)` commands on an opened + * character or block device. + * + * This access right applies to all `ioctl(2)` commands implemented by device + * drivers. However, the following common IOCTL commands continue to be + * invokable independent of the %LANDLOCK_ACCESS_FS_IOCTL_DEV right: + * + * * IOCTL commands targeting file descriptors (``FIOCLEX``, ``FIONCLEX``), + * * IOCTL commands targeting file descriptions (``FIONBIO``, ``FIOASYNC``), + * * IOCTL commands targeting file systems (``FIFREEZE``, ``FITHAW``, + * ``FIGETBSZ``, ``FS_IOC_GETFSUUID``, ``FS_IOC_GETFSSYSFSPATH``) + * * Some IOCTL commands which do not make sense when used with devices, but + * whose implementations are safe and return the right error codes + * (``FS_IOC_FIEMAP``, ``FICLONE``, ``FICLONERANGE``, ``FIDEDUPERANGE``) + * + * This access right is available since the fifth version of the Landlock + * ABI. * * Whether an opened file can be truncated with :manpage:`ftruncate(2)` or used * with `ioctl(2)` is determined during :manpage:`open(2)`, in the same way as @@ -275,26 +292,6 @@ struct landlock_net_port_attr { * If multiple requirements are not met, the ``EACCES`` error code takes * precedence over ``EXDEV``. * - * The following access right applies both to files and directories: - * - * - %LANDLOCK_ACCESS_FS_IOCTL_DEV: Invoke :manpage:`ioctl(2)` commands on an opened - * character or block device. - * - * This access right applies to all `ioctl(2)` commands implemented by device - * drivers. However, the following common IOCTL commands continue to be - * invokable independent of the %LANDLOCK_ACCESS_FS_IOCTL_DEV right: - * - * * IOCTL commands targeting file descriptors (``FIOCLEX``, ``FIONCLEX``), - * * IOCTL commands targeting file descriptions (``FIONBIO``, ``FIOASYNC``), - * * IOCTL commands targeting file systems (``FIFREEZE``, ``FITHAW``, - * ``FIGETBSZ``, ``FS_IOC_GETFSUUID``, ``FS_IOC_GETFSSYSFSPATH``) - * * Some IOCTL commands which do not make sense when used with devices, but - * whose implementations are safe and return the right error codes - * (``FS_IOC_FIEMAP``, ``FICLONE``, ``FICLONERANGE``, ``FIDEDUPERANGE``) - * - * This access right is available since the fifth version of the Landlock - * ABI. - * * .. warning:: * * It is currently not possible to restrict some file-related actions -- cgit v1.2.3 From 98bf2256855eb682433a33e6a7c4bce35191ca99 Mon Sep 17 00:00:00 2001 From: Stanley Zhang Date: Thu, 8 Jan 2026 02:19:31 -0700 Subject: ublk: support UBLK_PARAM_TYPE_INTEGRITY in device creation Add a feature flag UBLK_F_INTEGRITY for a ublk server to request integrity/metadata support when creating a ublk device. The ublk server can also check for the feature flag on the created device or the result of UBLK_U_CMD_GET_FEATURES to tell if the ublk driver supports it. UBLK_F_INTEGRITY requires UBLK_F_USER_COPY, as user copy is the only data copy mode initially supported for integrity data. Add UBLK_PARAM_TYPE_INTEGRITY and struct ublk_param_integrity to struct ublk_params to specify the integrity params of a ublk device. UBLK_PARAM_TYPE_INTEGRITY requires UBLK_F_INTEGRITY and a nonzero metadata_size. The LBMD_PI_CAP_* and LBMD_PI_CSUM_* values from the linux/fs.h UAPI header are used for the flags and csum_type fields. If the UBLK_PARAM_TYPE_INTEGRITY flag is set, validate the integrity parameters and apply them to the blk_integrity limits. The struct ublk_param_integrity validations are based on the checks in blk_validate_integrity_limits(). Any invalid parameters should be rejected before being applied to struct blk_integrity. [csander: drop redundant pi_tuple_size field, use block metadata UAPI constants, add param validation] Signed-off-by: Stanley Zhang Signed-off-by: Caleb Sander Mateos Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index ec77dabba45b..4c141d7e4710 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -311,6 +311,12 @@ */ #define UBLK_F_BUF_REG_OFF_DAEMON (1ULL << 14) +/* + * ublk device supports requests with integrity/metadata buffer. + * Requires UBLK_F_USER_COPY. + */ +#define UBLK_F_INTEGRITY (1ULL << 16) + /* device state */ #define UBLK_S_DEV_DEAD 0 #define UBLK_S_DEV_LIVE 1 @@ -600,6 +606,17 @@ struct ublk_param_segment { __u8 pad[2]; }; +struct ublk_param_integrity { + __u32 flags; /* LBMD_PI_CAP_* from linux/fs.h */ + __u16 max_integrity_segments; /* 0 means no limit */ + __u8 interval_exp; + __u8 metadata_size; /* UBLK_PARAM_TYPE_INTEGRITY requires nonzero */ + __u8 pi_offset; + __u8 csum_type; /* LBMD_PI_CSUM_* from linux/fs.h */ + __u8 tag_size; + __u8 pad[5]; +}; + struct ublk_params { /* * Total length of parameters, userspace has to set 'len' for both @@ -614,6 +631,7 @@ struct ublk_params { #define UBLK_PARAM_TYPE_ZONED (1 << 3) #define UBLK_PARAM_TYPE_DMA_ALIGN (1 << 4) #define UBLK_PARAM_TYPE_SEGMENT (1 << 5) +#define UBLK_PARAM_TYPE_INTEGRITY (1 << 6) /* requires UBLK_F_INTEGRITY */ __u32 types; /* types of parameter included */ struct ublk_param_basic basic; @@ -622,6 +640,7 @@ struct ublk_params { struct ublk_param_zoned zoned; struct ublk_param_dma_align dma; struct ublk_param_segment seg; + struct ublk_param_integrity integrity; }; #endif -- cgit v1.2.3 From f82f0a16a8270b17211254beeb123d11a0f279cd Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Thu, 8 Jan 2026 02:19:32 -0700 Subject: ublk: set UBLK_IO_F_INTEGRITY in ublksrv_io_desc Indicate to the ublk server when an incoming request has integrity data by setting UBLK_IO_F_INTEGRITY in the ublksrv_io_desc's op_flags field. Signed-off-by: Caleb Sander Mateos Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 4c141d7e4710..dfde4aee39eb 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -414,6 +414,8 @@ struct ublksrv_ctrl_dev_info { * passed in. */ #define UBLK_IO_F_NEED_REG_BUF (1U << 17) +/* Request has an integrity data buffer */ +#define UBLK_IO_F_INTEGRITY (1UL << 18) /* * io cmd is described by this structure, and stored in share memory, indexed -- cgit v1.2.3 From be82a89066d595da334f6e153ababcedc3f92ad6 Mon Sep 17 00:00:00 2001 From: Stanley Zhang Date: Thu, 8 Jan 2026 02:19:37 -0700 Subject: ublk: implement integrity user copy Add a function ublk_copy_user_integrity() to copy integrity information between a request and a user iov_iter. This mirrors the existing ublk_copy_user_pages() but operates on request integrity data instead of regular data. Check UBLKSRV_IO_INTEGRITY_FLAG in iocb->ki_pos in ublk_user_copy() to choose between copying data or integrity data. [csander: change offset units from data bytes to integrity data bytes, fix CONFIG_BLK_DEV_INTEGRITY=n build, rebase on user copy refactor] Signed-off-by: Stanley Zhang Signed-off-by: Caleb Sander Mateos Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index dfde4aee39eb..61ac5d8e1078 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -134,6 +134,10 @@ #define UBLKSRV_IO_BUF_TOTAL_BITS (UBLK_QID_OFF + UBLK_QID_BITS) #define UBLKSRV_IO_BUF_TOTAL_SIZE (1ULL << UBLKSRV_IO_BUF_TOTAL_BITS) +/* Copy to/from request integrity buffer instead of data buffer */ +#define UBLK_INTEGRITY_FLAG_OFF 62 +#define UBLKSRV_IO_INTEGRITY_FLAG (1ULL << UBLK_INTEGRITY_FLAG_OFF) + /* * ublk server can register data buffers for incoming I/O requests with a sparse * io_uring buffer table. The request buffer can then be used as the data buffer -- cgit v1.2.3 From e6ce36ccc86f6d447808a6e620f56d440d74aa19 Mon Sep 17 00:00:00 2001 From: Askar Safin Date: Wed, 19 Nov 2025 22:24:07 +0000 Subject: init: remove /proc/sys/kernel/real-root-dev It is not used anymore. Signed-off-by: Askar Safin Link: https://patch.msgid.link/20251119222407.3333257-4-safinaskar@gmail.com Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- include/uapi/linux/sysctl.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h index 63d1464cb71c..1c7fe0f4dca4 100644 --- a/include/uapi/linux/sysctl.h +++ b/include/uapi/linux/sysctl.h @@ -92,7 +92,6 @@ enum KERN_DOMAINNAME=8, /* string: domainname */ KERN_PANIC=15, /* int: panic timeout */ - KERN_REALROOTDEV=16, /* real root device to mount after initrd */ KERN_SPARC_REBOOT=21, /* reboot command on Sparc */ KERN_CTLALTDEL=22, /* int: allow ctl-alt-del to reboot */ -- cgit v1.2.3 From e1cbdf78f60c35a1a320ca401852fd6a73624a4a Mon Sep 17 00:00:00 2001 From: Lachlan Hodges Date: Fri, 9 Jan 2026 19:14:39 +1100 Subject: wifi: cfg80211: include S1G_NO_PRIMARY flag when sending channel When sending a channel ensure we include the IEEE80211_CHAN_S1G_NO_PRIMARY flag. Signed-off-by: Lachlan Hodges Link: https://patch.msgid.link/20260109081439.3168-1-lachlan.hodges@morsemicro.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 8134f10e4e6c..964e1c779cdd 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4444,6 +4444,9 @@ enum nl80211_wmm_rule { * channel in current regulatory domain. * @NL80211_FREQUENCY_ATTR_NO_16MHZ: 16 MHz operation is not allowed on this * channel in current regulatory domain. + * @NL80211_FREQUENCY_ATTR_S1G_NO_PRIMARY: Channel is not permitted for use + * as a primary channel. Does not prevent the channel from existing + * as a non-primary subchannel. Only applicable to S1G channels. * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number * currently defined * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use @@ -4492,6 +4495,7 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_NO_4MHZ, NL80211_FREQUENCY_ATTR_NO_8MHZ, NL80211_FREQUENCY_ATTR_NO_16MHZ, + NL80211_FREQUENCY_ATTR_S1G_NO_PRIMARY, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, -- cgit v1.2.3 From 93ada1b3da398b492c45429cef1a1c9651d5c7ba Mon Sep 17 00:00:00 2001 From: Yoav Cohen Date: Tue, 13 Jan 2026 00:05:01 +0200 Subject: ublk: add UBLK_CMD_TRY_STOP_DEV command Add a best-effort stop command, UBLK_CMD_TRY_STOP_DEV, which only stops a ublk device when it has no active openers. Unlike UBLK_CMD_STOP_DEV, this command does not disrupt existing users. New opens are blocked only after disk_openers has reached zero; if the device is busy, the command returns -EBUSY and leaves it running. The ub->block_open flag is used only to close a race with an in-progress open and does not otherwise change open behavior. Advertise support via the UBLK_F_SAFE_STOP_DEV feature flag. Signed-off-by: Yoav Cohen Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 61ac5d8e1078..90f47da4f435 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -55,7 +55,8 @@ _IOWR('u', 0x15, struct ublksrv_ctrl_cmd) #define UBLK_U_CMD_QUIESCE_DEV \ _IOWR('u', 0x16, struct ublksrv_ctrl_cmd) - +#define UBLK_U_CMD_TRY_STOP_DEV \ + _IOWR('u', 0x17, struct ublksrv_ctrl_cmd) /* * 64bits are enough now, and it should be easy to extend in case of * running out of feature flags @@ -321,6 +322,12 @@ */ #define UBLK_F_INTEGRITY (1ULL << 16) +/* + * The device supports the UBLK_CMD_TRY_STOP_DEV command, which + * allows stopping the device only if there are no openers. + */ +#define UBLK_F_SAFE_STOP_DEV (1ULL << 17) + /* device state */ #define UBLK_S_DEV_DEAD 0 #define UBLK_S_DEV_LIVE 1 -- cgit v1.2.3 From 602544773763da411ffa67567fa1d146f3a40231 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 12 Jan 2026 16:31:09 -0800 Subject: uapi: promote EFSCORRUPTED and EUCLEAN to errno.h Stop definining these privately and instead move them to the uapi errno.h so that they become canonical instead of copy pasta. Cc: linux-api@vger.kernel.org Signed-off-by: Darrick J. Wong Link: https://patch.msgid.link/176826402587.3490369.17659117524205214600.stgit@frogsfrogsfrogs Reviewed-by: Gao Xiang Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/uapi/asm-generic/errno.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/asm-generic/errno.h b/include/uapi/asm-generic/errno.h index cf9c51ac49f9..92e7ae493ee3 100644 --- a/include/uapi/asm-generic/errno.h +++ b/include/uapi/asm-generic/errno.h @@ -55,6 +55,7 @@ #define EMULTIHOP 72 /* Multihop attempted */ #define EDOTDOT 73 /* RFS specific error */ #define EBADMSG 74 /* Not a data message */ +#define EFSBADCRC EBADMSG /* Bad CRC detected */ #define EOVERFLOW 75 /* Value too large for defined data type */ #define ENOTUNIQ 76 /* Name not unique on network */ #define EBADFD 77 /* File descriptor in bad state */ @@ -98,6 +99,7 @@ #define EINPROGRESS 115 /* Operation now in progress */ #define ESTALE 116 /* Stale file handle */ #define EUCLEAN 117 /* Structure needs cleaning */ +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ #define ENOTNAM 118 /* Not a XENIX named type file */ #define ENAVAIL 119 /* No XENIX semaphores available */ #define EISNAM 120 /* Is a named type file */ -- cgit v1.2.3 From 406fc2e9ca65e0df345ebf4ce95aa87cb6416f35 Mon Sep 17 00:00:00 2001 From: Deepa Guthyappa Madivalara Date: Wed, 10 Dec 2025 10:59:04 -0800 Subject: media: uapi: videodev2: Add support for AV1 stateful decoder Introduce a new pixel format, V4L2_PIX_FMT_AV1, to the Video4Linux2(V4L2) API. This format is intended for AV1 bitstreams in stateful decoding/encoding workflows. The fourcc code 'AV10' is used to distinguish this format from the existing V4L2_PIX_FMT_AV1_FRAME, which is used for stateless AV1 decoder implementation. Reviewed-by: Bryan O'Donoghue Reviewed-by: Nicolas Dufresne Reviewed-by: Hans Verkuil Signed-off-by: Deepa Guthyappa Madivalara Tested-by: Val Packett Signed-off-by: Bryan O'Donoghue Signed-off-by: Hans Verkuil --- include/uapi/linux/videodev2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index add08188f068..848e86617d5c 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -775,6 +775,7 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_H264_SLICE v4l2_fourcc('S', '2', '6', '4') /* H264 parsed slices */ #define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */ #define V4L2_PIX_FMT_AV1_FRAME v4l2_fourcc('A', 'V', '1', 'F') /* AV1 parsed frame */ +#define V4L2_PIX_FMT_AV1 v4l2_fourcc('A', 'V', '0', '1') /* AV1 */ #define V4L2_PIX_FMT_SPK v4l2_fourcc('S', 'P', 'K', '0') /* Sorenson Spark */ #define V4L2_PIX_FMT_RV30 v4l2_fourcc('R', 'V', '3', '0') /* RealVideo 8 */ #define V4L2_PIX_FMT_RV40 v4l2_fourcc('R', 'V', '4', '0') /* RealVideo 9 & 10 */ -- cgit v1.2.3 From 1bddd758bac21fbbd8a06af746ec7b6d878a9d2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20K=C3=B6ppeler?= Date: Fri, 9 Jan 2026 14:15:34 +0100 Subject: net/sched: sch_cake: share shaper state across sub-instances of cake_mq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds shared shaper state across the cake instances beneath a cake_mq qdisc. It works by periodically tracking the number of active instances, and scaling the configured rate by the number of active queues. The scan is lockless and simply reads the qlen and the last_active state variable of each of the instances configured beneath the parent cake_mq instance. Locking is not required since the values are only updated by the owning instance, and eventual consistency is sufficient for the purpose of estimating the number of active queues. The interval for scanning the number of active queues is set to 200 us. We found this to be a good tradeoff between overhead and response time. For a detailed analysis of this aspect see the Netdevconf talk: https://netdevconf.info/0x19/docs/netdev-0x19-paper16-talk-paper.pdf Reviewed-by: Jamal Hadi Salim Signed-off-by: Jonas Köppeler Signed-off-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20260109-mq-cake-sub-qdisc-v8-5-8d613fece5d8@redhat.com Signed-off-by: Paolo Abeni --- include/uapi/linux/pkt_sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index c2da76e78bad..66e8072f44df 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -1036,6 +1036,7 @@ enum { TCA_CAKE_STATS_DROP_NEXT_US, TCA_CAKE_STATS_P_DROP, TCA_CAKE_STATS_BLUE_TIMER_US, + TCA_CAKE_STATS_ACTIVE_QUEUES, __TCA_CAKE_STATS_MAX }; #define TCA_CAKE_STATS_MAX (__TCA_CAKE_STATS_MAX - 1) -- cgit v1.2.3 From f29c852149f94dc1975c64fa919b3dd62db04d23 Mon Sep 17 00:00:00 2001 From: Ainy Kumari Date: Wed, 14 Jan 2026 16:48:52 +0530 Subject: wifi: cfg80211: add support for EPPKE Authentication Protocol Add an extended feature flag NL80211_EXT_FEATURE_EPPKE to allow a driver to indicate support for the Enhanced Privacy Protection Key Exchange (EPPKE) authentication protocol in non-AP STA mode, as defined in "IEEE P802.11bi/D3.0, 12.16.9". In case of SME in userspace, the Authentication frame body is prepared in userspace while the driver finalizes the Authentication frame once it receives the required fields and elements. The driver indicates support for EPPKE using the extended feature flag so that userspace can initiate EPPKE authentication. When the feature flag is set, process EPPKE Authentication frames from userspace in non-AP STA mode. If the flag is not set, reject EPPKE Authentication frames. Define a new authentication type NL80211_AUTHTYPE_EPPKE for EPPKE. Signed-off-by: Ainy Kumari Co-developed-by: Kavita Kavita Signed-off-by: Kavita Kavita Link: https://patch.msgid.link/20260114111900.2196941-2-kavita.kavita@oss.qualcomm.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 964e1c779cdd..351d4d176f87 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -5429,6 +5429,7 @@ enum nl80211_bss_status { * @NL80211_AUTHTYPE_FILS_SK: Fast Initial Link Setup shared key * @NL80211_AUTHTYPE_FILS_SK_PFS: Fast Initial Link Setup shared key with PFS * @NL80211_AUTHTYPE_FILS_PK: Fast Initial Link Setup public key + * @NL80211_AUTHTYPE_EPPKE: Enhanced Privacy Protection Key Exchange * @__NL80211_AUTHTYPE_NUM: internal * @NL80211_AUTHTYPE_MAX: maximum valid auth algorithm * @NL80211_AUTHTYPE_AUTOMATIC: determine automatically (if necessary by @@ -5444,6 +5445,7 @@ enum nl80211_auth_type { NL80211_AUTHTYPE_FILS_SK, NL80211_AUTHTYPE_FILS_SK_PFS, NL80211_AUTHTYPE_FILS_PK, + NL80211_AUTHTYPE_EPPKE, /* keep last */ __NL80211_AUTHTYPE_NUM, @@ -6748,6 +6750,10 @@ enum nl80211_feature_flags { * @NL80211_EXT_FEATURE_BEACON_RATE_EHT: Driver supports beacon rate * configuration (AP/mesh) with EHT rates. * + * @NL80211_EXT_FEATURE_EPPKE: Driver supports Enhanced Privacy Protection + * Key Exchange (EPPKE) with user space SME (NL80211_CMD_AUTHENTICATE) + * in non-AP STA mode. + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -6824,6 +6830,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_DFS_CONCURRENT, NL80211_EXT_FEATURE_SPP_AMSDU_SUPPORT, NL80211_EXT_FEATURE_BEACON_RATE_EHT, + NL80211_EXT_FEATURE_EPPKE, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, -- cgit v1.2.3 From 9d17a040c15d4b99484f13cf08dd45a9e308beeb Mon Sep 17 00:00:00 2001 From: Ainy Kumari Date: Wed, 14 Jan 2026 16:48:53 +0530 Subject: wifi: cfg80211: add feature flag for (re)association frame encryption Introduce an extended feature flag that allows drivers to signal support for encryption of (Re)Association Request and Response frames in both non-AP STA and AP mode, as specified in specification "IEEE P802.11bi/D3.0, 12.16.6". Signed-off-by: Ainy Kumari Signed-off-by: Kavita Kavita Link: https://patch.msgid.link/20260114111900.2196941-3-kavita.kavita@oss.qualcomm.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 351d4d176f87..60573334e086 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -6754,6 +6754,11 @@ enum nl80211_feature_flags { * Key Exchange (EPPKE) with user space SME (NL80211_CMD_AUTHENTICATE) * in non-AP STA mode. * + * @NL80211_EXT_FEATURE_ASSOC_FRAME_ENCRYPTION: This specifies that the + * driver supports encryption of (Re)Association Request and Response + * frames in both non‑AP STA and AP mode as specified in + * "IEEE P802.11bi/D3.0, 12.16.6". + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -6831,6 +6836,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_SPP_AMSDU_SUPPORT, NL80211_EXT_FEATURE_BEACON_RATE_EHT, NL80211_EXT_FEATURE_EPPKE, + NL80211_EXT_FEATURE_ASSOC_FRAME_ENCRYPTION, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, -- cgit v1.2.3 From 6ee3a22c61cdf57d71592ec9f3b9439cd5d0c75f Mon Sep 17 00:00:00 2001 From: Sai Pratyusha Magam Date: Wed, 14 Jan 2026 16:48:55 +0530 Subject: wifi: nl80211: Add support for EPP peer indication Introduce a new netlink attribute NL80211_ATTR_EPP_PEER to be used with NL80211_CMD_NEW_STA and NL80211_CMD_ADD_LINK_STA for the userspace to indicate that a non-AP STA is an Enhanced Privacy Protection (EPP) peer. Co-developed-by: Rohan Dutta Signed-off-by: Rohan Dutta Signed-off-by: Sai Pratyusha Magam Signed-off-by: Kavita Kavita Link: https://patch.msgid.link/20260114111900.2196941-5-kavita.kavita@oss.qualcomm.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 60573334e086..eb92296457c9 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2973,6 +2973,9 @@ enum nl80211_commands { * primary channel is 2 MHz wide, and the control channel designates * the 1 MHz primary subchannel within that 2 MHz primary. * + * @NL80211_ATTR_EPP_PEER: A flag attribute to indicate if the peer is an EPP + * STA. Used with %NL80211_CMD_NEW_STA and %NL80211_CMD_ADD_LINK_STA + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3541,6 +3544,8 @@ enum nl80211_attrs { NL80211_ATTR_S1G_PRIMARY_2MHZ, + NL80211_ATTR_EPP_PEER, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.3 From dacbfc16780837aa3e00c684d89492d211fd809f Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Mon, 5 Jan 2026 13:24:03 +0100 Subject: crypto: af_alg - Annotate struct af_alg_iv with __counted_by Add the __counted_by() compiler attribute to the flexible array member 'iv' to improve access bounds-checking via CONFIG_UBSAN_BOUNDS and CONFIG_FORTIFY_SOURCE. Reviewed-by: Simon Horman Signed-off-by: Thorsten Blum Link: https://patch.msgid.link/20260105122402.2685-2-thorsten.blum@linux.dev Signed-off-by: Kees Cook --- include/uapi/linux/if_alg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_alg.h b/include/uapi/linux/if_alg.h index b35871cbeed7..4f51e198ac2e 100644 --- a/include/uapi/linux/if_alg.h +++ b/include/uapi/linux/if_alg.h @@ -42,7 +42,7 @@ struct sockaddr_alg_new { struct af_alg_iv { __u32 ivlen; - __u8 iv[]; + __u8 iv[] __counted_by(ivlen); }; /* Socket options */ -- cgit v1.2.3 From d2bdcde9626cbea0c44a6aaa33b440c8adf81e09 Mon Sep 17 00:00:00 2001 From: Dapeng Mi Date: Wed, 14 Jan 2026 09:17:45 +0800 Subject: perf/x86/intel: Add support for PEBS memory auxiliary info field in DMR With the introduction of the OMR feature, the PEBS memory auxiliary info field for load and store latency events has been restructured for DMR. The memory auxiliary info field's bit[8] indicates whether a L2 cache miss occurred for a memory load or store instruction. If bit[8] is 0, it signifies no L2 cache miss, and bits[7:0] specify the exact cache data source (up to the L2 cache level). If bit[8] is 1, bits[7:0] represent the OMR encoding, indicating the specific L3 cache or memory region involved in the memory access. A significant enhancement is OMR encoding provides up to 8 fine-grained memory regions besides the cache region. A significant enhancement for OMR encoding is the ability to provide up to 8 fine-grained memory regions in addition to the cache region, offering more detailed insights into memory access regions. For detailed information on the memory auxiliary info encoding, please refer to section 16.2 "PEBS LOAD LATENCY AND STORE LATENCY FACILITY" in the ISE documentation. This patch ensures that the PEBS memory auxiliary info field is correctly interpreted and utilized in DMR. Signed-off-by: Dapeng Mi Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260114011750.350569-3-dapeng1.mi@linux.intel.com --- include/uapi/linux/perf_event.h | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index c44a8fb3e418..533393ec94d0 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -1330,14 +1330,16 @@ union perf_mem_data_src { mem_snoopx : 2, /* Snoop mode, ext */ mem_blk : 3, /* Access blocked */ mem_hops : 3, /* Hop level */ - mem_rsvd : 18; + mem_region : 5, /* cache/memory regions */ + mem_rsvd : 13; }; }; #elif defined(__BIG_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_rsvd : 18, + __u64 mem_rsvd : 13, + mem_region : 5, /* cache/memory regions */ mem_hops : 3, /* Hop level */ mem_blk : 3, /* Access blocked */ mem_snoopx : 2, /* Snoop mode, ext */ @@ -1394,7 +1396,7 @@ union perf_mem_data_src { #define PERF_MEM_LVLNUM_L4 0x0004 /* L4 */ #define PERF_MEM_LVLNUM_L2_MHB 0x0005 /* L2 Miss Handling Buffer */ #define PERF_MEM_LVLNUM_MSC 0x0006 /* Memory-side Cache */ -/* 0x007 available */ +#define PERF_MEM_LVLNUM_L0 0x0007 /* L0 */ #define PERF_MEM_LVLNUM_UNC 0x0008 /* Uncached */ #define PERF_MEM_LVLNUM_CXL 0x0009 /* CXL */ #define PERF_MEM_LVLNUM_IO 0x000a /* I/O */ @@ -1447,6 +1449,25 @@ union perf_mem_data_src { /* 5-7 available */ #define PERF_MEM_HOPS_SHIFT 43 +/* Cache/Memory region */ +#define PERF_MEM_REGION_NA 0x0 /* Invalid */ +#define PERF_MEM_REGION_RSVD 0x01 /* Reserved */ +#define PERF_MEM_REGION_L_SHARE 0x02 /* Local CA shared cache */ +#define PERF_MEM_REGION_L_NON_SHARE 0x03 /* Local CA non-shared cache */ +#define PERF_MEM_REGION_O_IO 0x04 /* Other CA IO agent */ +#define PERF_MEM_REGION_O_SHARE 0x05 /* Other CA shared cache */ +#define PERF_MEM_REGION_O_NON_SHARE 0x06 /* Other CA non-shared cache */ +#define PERF_MEM_REGION_MMIO 0x07 /* MMIO */ +#define PERF_MEM_REGION_MEM0 0x08 /* Memory region 0 */ +#define PERF_MEM_REGION_MEM1 0x09 /* Memory region 1 */ +#define PERF_MEM_REGION_MEM2 0x0a /* Memory region 2 */ +#define PERF_MEM_REGION_MEM3 0x0b /* Memory region 3 */ +#define PERF_MEM_REGION_MEM4 0x0c /* Memory region 4 */ +#define PERF_MEM_REGION_MEM5 0x0d /* Memory region 5 */ +#define PERF_MEM_REGION_MEM6 0x0e /* Memory region 6 */ +#define PERF_MEM_REGION_MEM7 0x0f /* Memory region 7 */ +#define PERF_MEM_REGION_SHIFT 46 + #define PERF_MEM_S(a, s) \ (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) -- cgit v1.2.3 From f972bde7326e9cd3498c137a052f2034f975ebae Mon Sep 17 00:00:00 2001 From: Konstantin Taranov Date: Thu, 15 Jan 2026 01:36:25 -0800 Subject: RDMA/mana_ib: Take CQ type from the device type Get CQ type from the used gdma device. The MANA_IB_CREATE_RNIC_CQ flag is ignored. It was used in older kernel versions where the mana_ib was shared between ethernet and rnic. Fixes: d4293f96ce0b ("RDMA/mana_ib: unify mana_ib functions to support any gdma device") Signed-off-by: Konstantin Taranov Link: https://patch.msgid.link/20260115093625.177306-1-kotaranov@linux.microsoft.com Signed-off-by: Leon Romanovsky --- include/uapi/rdma/mana-abi.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/rdma/mana-abi.h b/include/uapi/rdma/mana-abi.h index 45c2df619f07..a75bf32b8cfb 100644 --- a/include/uapi/rdma/mana-abi.h +++ b/include/uapi/rdma/mana-abi.h @@ -17,6 +17,9 @@ #define MANA_IB_UVERBS_ABI_VERSION 1 enum mana_ib_create_cq_flags { + /* Reserved for backward compatibility. Legacy + * kernel versions use it to create CQs in RNIC + */ MANA_IB_CREATE_RNIC_CQ = 1 << 0, }; -- cgit v1.2.3 From 567873005dca1be0a3b3e2e309a8f0de14d2b827 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 15 Jan 2026 08:05:44 +0200 Subject: ethtool: Clarify len/n_stats fields in/out semantics Document that the 'len' field in ethtool_gstrings and 'n_stats' field in ethtool_stats optionally serve dual purposes: on entry they specify the number of items requested, and on return they indicate the number actually returned (which is not necessarily the same). Signed-off-by: Gal Pressman Reviewed-by: Dragos Tatulea Link: https://patch.msgid.link/20260115060544.481550-1-gal@nvidia.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 5daa8f225b67..bbfe6e1cf01b 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1096,13 +1096,20 @@ enum ethtool_module_fw_flash_status { * struct ethtool_gstrings - string set for data tagging * @cmd: Command number = %ETHTOOL_GSTRINGS * @string_set: String set ID; one of &enum ethtool_stringset - * @len: On return, the number of strings in the string set + * @len: Number of strings in the string set * @data: Buffer for strings. Each string is null-padded to a size of * %ETH_GSTRING_LEN. * * Users must use %ETHTOOL_GSSET_INFO to find the number of strings in * the string set. They must allocate a buffer of the appropriate * size immediately following this structure. + * + * Setting @len on input is optional (though preferred), but must be zeroed + * otherwise. + * When set, @len will return the requested count if it matches the actual + * count; otherwise, it will be zero. + * This prevents issues when the number of strings is different than the + * userspace allocation. */ struct ethtool_gstrings { __u32 cmd; @@ -1179,13 +1186,20 @@ struct ethtool_test { /** * struct ethtool_stats - device-specific statistics * @cmd: Command number = %ETHTOOL_GSTATS - * @n_stats: On return, the number of statistics + * @n_stats: Number of statistics * @data: Array of statistics * * Users must use %ETHTOOL_GSSET_INFO or %ETHTOOL_GDRVINFO to find the * number of statistics that will be returned. They must allocate a * buffer of the appropriate size (8 * number of statistics) * immediately following this structure. + * + * Setting @n_stats on input is optional (though preferred), but must be zeroed + * otherwise. + * When set, @n_stats will return the requested count if it matches the actual + * count; otherwise, it will be zero. + * This prevents issues when the number of stats is different than the + * userspace allocation. */ struct ethtool_stats { __u32 cmd; -- cgit v1.2.3 From d89ccbf3dde727d91a242a5a3f3b70a90579b057 Mon Sep 17 00:00:00 2001 From: Richard Leitner Date: Tue, 9 Dec 2025 23:44:36 +0100 Subject: media: v4l: ctrls: add a control for flash/strobe duration Add a V4L2_CID_FLASH_DURATION control to set the duration of a flash/strobe pulse. This controls the length of the flash/strobe pulse output by device (typically a camera sensor) and connected to the flash controller. This is different to the V4L2_CID_FLASH_TIMEOUT control, which is implemented by the flash controller and defines a limit after which the flash is "forcefully" turned off again. Reviewed-by: Laurent Pinchart Signed-off-by: Richard Leitner Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/uapi/linux/v4l2-controls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index f84ed133a6c9..357845830fe9 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -1192,6 +1192,7 @@ enum v4l2_flash_strobe_source { #define V4L2_CID_FLASH_CHARGE (V4L2_CID_FLASH_CLASS_BASE + 11) #define V4L2_CID_FLASH_READY (V4L2_CID_FLASH_CLASS_BASE + 12) +#define V4L2_CID_FLASH_DURATION (V4L2_CID_FLASH_CLASS_BASE + 13) /* JPEG-class control IDs */ -- cgit v1.2.3 From 5be4154f6255d92d9d2ad5da658d7d33a655386f Mon Sep 17 00:00:00 2001 From: Richard Leitner Date: Tue, 9 Dec 2025 23:44:37 +0100 Subject: media: v4l: ctrls: add a control for enabling strobe output Add a control V4L2_CID_FLASH_STROBE_OE to en- or disable the strobe output of v4l2 devices (most likely sensors). Signed-off-by: Richard Leitner Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/uapi/linux/v4l2-controls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index 357845830fe9..572622e4535e 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -1193,6 +1193,7 @@ enum v4l2_flash_strobe_source { #define V4L2_CID_FLASH_CHARGE (V4L2_CID_FLASH_CLASS_BASE + 11) #define V4L2_CID_FLASH_READY (V4L2_CID_FLASH_CLASS_BASE + 12) #define V4L2_CID_FLASH_DURATION (V4L2_CID_FLASH_CLASS_BASE + 13) +#define V4L2_CID_FLASH_STROBE_OE (V4L2_CID_FLASH_CLASS_BASE + 14) /* JPEG-class control IDs */ -- cgit v1.2.3 From 10d28cffb3f6ec7ad67f0a4cd32c2afa92909452 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Wed, 3 Dec 2025 16:24:38 +0000 Subject: comedi: Fix getting range information for subdevices 16 to 255 The `COMEDI_RANGEINFO` ioctl does not work properly for subdevice indices above 15. Currently, the only in-tree COMEDI drivers that support more than 16 subdevices are the "8255" driver and the "comedi_bond" driver. Making the ioctl work for subdevice indices up to 255 is achievable. It needs minor changes to the handling of the `COMEDI_RANGEINFO` and `COMEDI_CHANINFO` ioctls that should be mostly harmless to user-space, apart from making them less broken. Details follow... The `COMEDI_RANGEINFO` ioctl command gets the list of supported ranges (usually with units of volts or milliamps) for a COMEDI subdevice or channel. (Only some subdevices have per-channel range tables, indicated by the `SDF_RANGETYPE` flag in the subdevice information.) It uses a `range_type` value and a user-space pointer, both supplied by user-space, but the `range_type` value should match what was obtained using the `COMEDI_CHANINFO` ioctl (if the subdevice has per-channel range tables) or `COMEDI_SUBDINFO` ioctl (if the subdevice uses a single range table for all channels). Bits 15 to 0 of the `range_type` value contain the length of the range table, which is the only part that user-space should care about (so it can use a suitably sized buffer to fetch the range table). Bits 23 to 16 store the channel index, which is assumed to be no more than 255 if the subdevice has per-channel range tables, and is set to 0 if the subdevice has a single range table. For `range_type` values produced by the `COMEDI_SUBDINFO` ioctl, bits 31 to 24 contain the subdevice index, which is assumed to be no more than 255. But for `range_type` values produced by the `COMEDI_CHANINFO` ioctl, bits 27 to 24 contain the subdevice index, which is assumed to be no more than 15, and bits 31 to 28 contain the COMEDI device's minor device number for some unknown reason lost in the mists of time. The `COMEDI_RANGEINFO` ioctl extract the length from bits 15 to 0 of the user-supplied `range_type` value, extracts the channel index from bits 23 to 16 (only used if the subdevice has per-channel range tables), extracts the subdevice index from bits 27 to 24, and ignores bits 31 to 28. So for subdevice indices 16 to 255, the `COMEDI_SUBDINFO` or `COMEDI_CHANINFO` ioctl will report a `range_type` value that doesn't work with the `COMEDI_RANGEINFO` ioctl. It will either get the range table for the subdevice index modulo 16, or will fail with `-EINVAL`. To fix this, always use bits 31 to 24 of the `range_type` value to hold the subdevice index (assumed to be no more than 255). This affects the `COMEDI_CHANINFO` and `COMEDI_RANGEINFO` ioctls. There should not be anything in user-space that depends on the old, broken usage, although it may now see different values in bits 31 to 28 of the `range_type` values reported by the `COMEDI_CHANINFO` ioctl for subdevices that have per-channel subdevices. User-space should not be trying to decode bits 31 to 16 of the `range_type` values anyway. Fixes: ed9eccbe8970 ("Staging: add comedi core") Cc: stable@vger.kernel.org #5.17+ Signed-off-by: Ian Abbott Link: https://patch.msgid.link/20251203162438.176841-1-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/comedi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/comedi.h b/include/uapi/linux/comedi.h index 7314e5ee0a1e..798ec9a39e12 100644 --- a/include/uapi/linux/comedi.h +++ b/include/uapi/linux/comedi.h @@ -640,7 +640,7 @@ struct comedi_chaninfo { /** * struct comedi_rangeinfo - used to retrieve the range table for a channel - * @range_type: Encodes subdevice index (bits 27:24), channel index + * @range_type: Encodes subdevice index (bits 31:24), channel index * (bits 23:16) and range table length (bits 15:0). * @range_ptr: Pointer to array of @struct comedi_krange to be filled * in with the range table for the channel or subdevice. -- cgit v1.2.3 From 9b8a0ba68246a61d903ce62c35c303b1501df28b Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 29 Dec 2025 14:03:24 +0100 Subject: mount: add OPEN_TREE_NAMESPACE When creating containers the setup usually involves using CLONE_NEWNS via clone3() or unshare(). This copies the caller's complete mount namespace. The runtime will also assemble a new rootfs and then use pivot_root() to switch the old mount tree with the new rootfs. Afterward it will recursively umount the old mount tree thereby getting rid of all mounts. On a basic system here where the mount table isn't particularly large this still copies about 30 mounts. Copying all of these mounts only to get rid of them later is pretty wasteful. This is exacerbated if intermediary mount namespaces are used that only exist for a very short amount of time and are immediately destroyed again causing a ton of mounts to be copied and destroyed needlessly. With a large mount table and a system where thousands or ten-thousands of containers are spawned in parallel this quickly becomes a bottleneck increasing contention on the semaphore. Extend open_tree() with a new OPEN_TREE_NAMESPACE flag. Similar to OPEN_TREE_CLONE only the indicated mount tree is copied. Instead of returning a file descriptor referring to that mount tree OPEN_TREE_NAMESPACE will cause open_tree() to return a file descriptor to a new mount namespace. In that new mount namespace the copied mount tree has been mounted on top of a copy of the real rootfs. The caller can setns() into that mount namespace and perform any additionally required setup such as move_mount() detached mounts in there. This allows OPEN_TREE_NAMESPACE to function as a combined unshare(CLONE_NEWNS) and pivot_root(). A caller may for example choose to create an extremely minimal rootfs: fd_mntns = open_tree(-EBADF, "/var/lib/containers/wootwoot", OPEN_TREE_NAMESPACE); This will create a mount namespace where "wootwoot" has become the rootfs mounted on top of the real rootfs. The caller can now setns() into this new mount namespace and assemble additional mounts. This also works with user namespaces: unshare(CLONE_NEWUSER); fd_mntns = open_tree(-EBADF, "/var/lib/containers/wootwoot", OPEN_TREE_NAMESPACE); which creates a new mount namespace owned by the earlier created user namespace with "wootwoot" as the rootfs mounted on top of the real rootfs. Link: https://patch.msgid.link/20251229-work-empty-namespace-v1-1-bfb24c7b061f@kernel.org Tested-by: Jeff Layton Reviewed-by: Aleksa Sarai Reviewed-by: Jeff Layton Suggested-by: Christian Brauner Suggested-by: Aleksa Sarai Signed-off-by: Christian Brauner --- include/uapi/linux/mount.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index 18c624405268..d9d86598d100 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -61,7 +61,8 @@ /* * open_tree() flags. */ -#define OPEN_TREE_CLONE 1 /* Clone the target tree and attach the clone */ +#define OPEN_TREE_CLONE (1 << 0) /* Clone the target tree and attach the clone */ +#define OPEN_TREE_NAMESPACE (1 << 1) /* Clone the target tree into a new mount namespace */ #define OPEN_TREE_CLOEXEC O_CLOEXEC /* Close the file on execve() */ /* -- cgit v1.2.3 From 1e5271393d777f6159d896943b4c44c4f3ecff52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 15 Jan 2026 08:35:44 +0100 Subject: hyper-v: Mark inner union in hv_kvp_exchg_msg_value as packed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The unpacked union within a packed struct generates alignment warnings on clang for 32-bit ARM: ./usr/include/linux/hyperv.h:361:2: error: field within 'struct hv_kvp_exchg_msg_value' is less aligned than 'union hv_kvp_exchg_msg_value::(anonymous at ./usr/include/linux/hyperv.h:361:2)' and is usually due to 'struct hv_kvp_exchg_msg_value' being packed, which can lead to unaligned accesses [-Werror,-Wunaligned-access] 361 | union { | ^ With the recent changes to compile-test the UAPI headers in more cases, this warning in combination with CONFIG_WERROR breaks the build. Fix the warning. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202512140314.DzDxpIVn-lkp@intel.com/ Reported-by: Nathan Chancellor Closes: https://lore.kernel.org/linux-kbuild/20260110-uapi-test-disable-headers-arm-clang-unaligned-access-v1-1-b7b0fa541daa@kernel.org/ Suggested-by: Arnd Bergmann Link: https://lore.kernel.org/linux-kbuild/29b2e736-d462-45b7-a0a9-85f8d8a3de56@app.fastmail.com/ Signed-off-by: Thomas Weißschuh Acked-by: Wei Liu (Microsoft) Tested-by: Nicolas Schier Reviewed-by: Nicolas Schier Acked-by: Greg Kroah-Hartman Link: https://patch.msgid.link/20260115-kbuild-alignment-vbox-v1-1-076aed1623ff@linutronix.de Signed-off-by: Nathan Chancellor --- include/uapi/linux/hyperv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/hyperv.h b/include/uapi/linux/hyperv.h index aaa502a7bff4..1749b35ab2c2 100644 --- a/include/uapi/linux/hyperv.h +++ b/include/uapi/linux/hyperv.h @@ -362,7 +362,7 @@ struct hv_kvp_exchg_msg_value { __u8 value[HV_KVP_EXCHANGE_MAX_VALUE_SIZE]; __u32 value_u32; __u64 value_u64; - }; + } __attribute__((packed)); } __attribute__((packed)); struct hv_kvp_msg_enumerate { -- cgit v1.2.3 From c25d01e1c4f2d43f47af87c00e223f5ca7c71792 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 15 Jan 2026 08:35:45 +0100 Subject: virt: vbox: uapi: Mark inner unions in packed structs as packed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The unpacked unions within a packed struct generates alignment warnings on clang for 32-bit ARM: ./usr/include/linux/vbox_vmmdev_types.h:239:4: error: field u within 'struct vmmdev_hgcm_function_parameter32' is less aligned than 'union (unnamed union at ./usr/include/linux/vbox_vmmdev_types.h:223:2)' and is usually due to 'struct vmmdev_hgcm_function_parameter32' being packed, which can lead to unaligned accesses [-Werror,-Wunaligned-access] 239 | } u; | ^ ./usr/include/linux/vbox_vmmdev_types.h:254:6: error: field u within 'struct vmmdev_hgcm_function_parameter64::(anonymous union)::(unnamed at ./usr/include/linux/vbox_vmmdev_types.h:249:3)' is less aligned than 'union (unnamed union at ./usr/include/linux/vbox_vmmdev_types.h:251:4)' and is usually due to 'struct vmmdev_hgcm_function_parameter64::(anonymous union)::(unnamed at ./usr/include/linux/vbox_vmmdev_types.h:249:3)' being packed, which can lead to unaligned accesses [-Werror,-Wunaligned-access] With the recent changes to compile-test the UAPI headers in more cases, these warning in combination with CONFIG_WERROR breaks the build. Fix the warnings. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202512140314.DzDxpIVn-lkp@intel.com/ Reported-by: Nathan Chancellor Closes: https://lore.kernel.org/linux-kbuild/20260110-uapi-test-disable-headers-arm-clang-unaligned-access-v1-1-b7b0fa541daa@kernel.org/ Suggested-by: Arnd Bergmann Link: https://lore.kernel.org/linux-kbuild/29b2e736-d462-45b7-a0a9-85f8d8a3de56@app.fastmail.com/ Signed-off-by: Thomas Weißschuh Tested-by: Nicolas Schier Reviewed-by: Nicolas Schier Acked-by: Greg Kroah-Hartman Link: https://patch.msgid.link/20260115-kbuild-alignment-vbox-v1-2-076aed1623ff@linutronix.de Signed-off-by: Nathan Chancellor --- include/uapi/linux/vbox_vmmdev_types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/vbox_vmmdev_types.h b/include/uapi/linux/vbox_vmmdev_types.h index 6073858d52a2..11f3627c3729 100644 --- a/include/uapi/linux/vbox_vmmdev_types.h +++ b/include/uapi/linux/vbox_vmmdev_types.h @@ -236,7 +236,7 @@ struct vmmdev_hgcm_function_parameter32 { /** Relative to the request header. */ __u32 offset; } page_list; - } u; + } __packed u; } __packed; VMMDEV_ASSERT_SIZE(vmmdev_hgcm_function_parameter32, 4 + 8); @@ -251,7 +251,7 @@ struct vmmdev_hgcm_function_parameter64 { union { __u64 phys_addr; __u64 linear_addr; - } u; + } __packed u; } __packed pointer; struct { /** Size of the buffer described by the page list. */ -- cgit v1.2.3 From 150a04d817d8f5be5a4f92799827cdc8d7e45989 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 16 Jan 2026 00:57:57 +0000 Subject: compiler_types.h: Attributes: Add __counted_by_ptr macro Introduce __counted_by_ptr(), which works like __counted_by(), but for pointer struct members. struct foo { int a, b, c; char *buffer __counted_by_ptr(bytes); short nr_bars; struct bar *bars __counted_by_ptr(nr_bars); size_t bytes; }; Because "counted_by" can only be applied to pointer members in very recent compiler versions, its application ends up needing to be distinct from flexibe array "counted_by" annotations, hence a separate macro. This is a reworking of Kees' previous patch [1]. Link: https://lore.kernel.org/all/20251020220118.1226740-1-kees@kernel.org/ [1] Co-developed-by: Kees Cook Signed-off-by: Bill Wendling Link: https://patch.msgid.link/20260116005838.2419118-1-morbo@google.com Signed-off-by: Kees Cook --- include/uapi/linux/stddef.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h index 9a28f7d9a334..111b097ec00b 100644 --- a/include/uapi/linux/stddef.h +++ b/include/uapi/linux/stddef.h @@ -72,6 +72,10 @@ #define __counted_by_be(m) #endif +#ifndef __counted_by_ptr +#define __counted_by_ptr(m) +#endif + #ifdef __KERNEL__ #define __kernel_nonstring __nonstring #else -- cgit v1.2.3 From ca9d74eb5f6aea6eee746aae648382332dbcf24e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 13 Jan 2026 08:44:17 +0100 Subject: uapi: add INT_MAX and INT_MIN constants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some UAPI headers use INT_MAX and INT_MIN. Currently they include for their definitions, which introduces a problematic dependency on libc. Add custom, namespaced definitions of INT_MAX and INT_MIN using the same values as the regular kernel code. These definitions are not added to uapi/linux/limits.h, as that header will conflict with libc definitions on some platforms. Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20260113-uapi-limits-v2-1-93c20f4b2c1a@linutronix.de Signed-off-by: Jakub Kicinski --- include/uapi/linux/typelimits.h | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 include/uapi/linux/typelimits.h (limited to 'include/uapi') diff --git a/include/uapi/linux/typelimits.h b/include/uapi/linux/typelimits.h new file mode 100644 index 000000000000..8166c639b518 --- /dev/null +++ b/include/uapi/linux/typelimits.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_TYPELIMITS_H +#define _UAPI_LINUX_TYPELIMITS_H + +#define __KERNEL_INT_MAX ((int)(~0U >> 1)) +#define __KERNEL_INT_MIN (-__KERNEL_INT_MAX - 1) + +#endif /* _UAPI_LINUX_TYPELIMITS_H */ -- cgit v1.2.3 From a8a11e5237aed71b7f5f9d33c554ef06fe974311 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 13 Jan 2026 08:44:18 +0100 Subject: ethtool: uapi: Use UAPI definition of INT_MAX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using to gain access to INT_MAX introduces a dependency on a libc, which UAPI headers should not do. Use the equivalent UAPI constant. Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20260113-uapi-limits-v2-2-93c20f4b2c1a@linutronix.de Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index bbfe6e1cf01b..ce9aeb65a8e1 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -15,13 +15,10 @@ #define _UAPI_LINUX_ETHTOOL_H #include +#include #include #include -#ifndef __KERNEL__ -#include /* for INT_MAX */ -#endif - /* All structures exposed to userland should be defined such that they * have the same layout for 32-bit and 64-bit userland. */ @@ -2216,7 +2213,7 @@ enum ethtool_link_mode_bit_indices { static inline int ethtool_validate_speed(__u32 speed) { - return speed <= INT_MAX || speed == (__u32)SPEED_UNKNOWN; + return speed <= __KERNEL_INT_MAX || speed == (__u32)SPEED_UNKNOWN; } /* Duplex, half or full. */ -- cgit v1.2.3 From 0b3877bec78b0f26a280078a15f8992426de1db7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 13 Jan 2026 08:44:19 +0100 Subject: netfilter: uapi: Use UAPI definition of INT_MAX and INT_MIN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using to gain access to INT_MAX and INT_MIN introduces a dependency on a libc, which UAPI headers should not do. Use the equivalent UAPI constants. Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20260113-uapi-limits-v2-3-93c20f4b2c1a@linutronix.de Signed-off-by: Jakub Kicinski --- include/uapi/linux/netfilter_bridge.h | 9 +++------ include/uapi/linux/netfilter_ipv4.h | 9 ++++----- include/uapi/linux/netfilter_ipv6.h | 7 +++---- 3 files changed, 10 insertions(+), 15 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter_bridge.h b/include/uapi/linux/netfilter_bridge.h index 1610fdbab98d..f6e8d1e05c97 100644 --- a/include/uapi/linux/netfilter_bridge.h +++ b/include/uapi/linux/netfilter_bridge.h @@ -10,10 +10,7 @@ #include #include #include - -#ifndef __KERNEL__ -#include /* for INT_MIN, INT_MAX */ -#endif +#include /* Bridge Hooks */ /* After promisc drops, checksum checks. */ @@ -31,14 +28,14 @@ #define NF_BR_NUMHOOKS 6 enum nf_br_hook_priorities { - NF_BR_PRI_FIRST = INT_MIN, + NF_BR_PRI_FIRST = __KERNEL_INT_MIN, NF_BR_PRI_NAT_DST_BRIDGED = -300, NF_BR_PRI_FILTER_BRIDGED = -200, NF_BR_PRI_BRNF = 0, NF_BR_PRI_NAT_DST_OTHER = 100, NF_BR_PRI_FILTER_OTHER = 200, NF_BR_PRI_NAT_SRC = 300, - NF_BR_PRI_LAST = INT_MAX, + NF_BR_PRI_LAST = __KERNEL_INT_MAX, }; #endif /* _UAPI__LINUX_BRIDGE_NETFILTER_H */ diff --git a/include/uapi/linux/netfilter_ipv4.h b/include/uapi/linux/netfilter_ipv4.h index 155e77d6a42d..439d3c59862b 100644 --- a/include/uapi/linux/netfilter_ipv4.h +++ b/include/uapi/linux/netfilter_ipv4.h @@ -7,12 +7,11 @@ #include +#include /* only for userspace compatibility */ #ifndef __KERNEL__ -#include /* for INT_MIN, INT_MAX */ - /* IP Hooks */ /* After promisc drops, checksum checks. */ #define NF_IP_PRE_ROUTING 0 @@ -28,7 +27,7 @@ #endif /* ! __KERNEL__ */ enum nf_ip_hook_priorities { - NF_IP_PRI_FIRST = INT_MIN, + NF_IP_PRI_FIRST = __KERNEL_INT_MIN, NF_IP_PRI_RAW_BEFORE_DEFRAG = -450, NF_IP_PRI_CONNTRACK_DEFRAG = -400, NF_IP_PRI_RAW = -300, @@ -41,8 +40,8 @@ enum nf_ip_hook_priorities { NF_IP_PRI_NAT_SRC = 100, NF_IP_PRI_SELINUX_LAST = 225, NF_IP_PRI_CONNTRACK_HELPER = 300, - NF_IP_PRI_CONNTRACK_CONFIRM = INT_MAX, - NF_IP_PRI_LAST = INT_MAX, + NF_IP_PRI_CONNTRACK_CONFIRM = __KERNEL_INT_MAX, + NF_IP_PRI_LAST = __KERNEL_INT_MAX, }; /* Arguments for setsockopt SOL_IP: */ diff --git a/include/uapi/linux/netfilter_ipv6.h b/include/uapi/linux/netfilter_ipv6.h index 80aa9b0799af..0e40d00b37fa 100644 --- a/include/uapi/linux/netfilter_ipv6.h +++ b/include/uapi/linux/netfilter_ipv6.h @@ -10,12 +10,11 @@ #include +#include /* only for userspace compatibility */ #ifndef __KERNEL__ -#include /* for INT_MIN, INT_MAX */ - /* IP6 Hooks */ /* After promisc drops, checksum checks. */ #define NF_IP6_PRE_ROUTING 0 @@ -32,7 +31,7 @@ enum nf_ip6_hook_priorities { - NF_IP6_PRI_FIRST = INT_MIN, + NF_IP6_PRI_FIRST = __KERNEL_INT_MIN, NF_IP6_PRI_RAW_BEFORE_DEFRAG = -450, NF_IP6_PRI_CONNTRACK_DEFRAG = -400, NF_IP6_PRI_RAW = -300, @@ -45,7 +44,7 @@ enum nf_ip6_hook_priorities { NF_IP6_PRI_NAT_SRC = 100, NF_IP6_PRI_SELINUX_LAST = 225, NF_IP6_PRI_CONNTRACK_HELPER = 300, - NF_IP6_PRI_LAST = INT_MAX, + NF_IP6_PRI_LAST = __KERNEL_INT_MAX, }; -- cgit v1.2.3 From 7d8b06ecc45bd679dec58d2cc2bd86223d4e076d Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Thu, 15 Jan 2026 06:08:02 +0000 Subject: iommu/amd: Add support for hw_info for iommu capability query AMD IOMMU Extended Feature (EFR) and Extended Feature 2 (EFR2) registers specify features supported by each IOMMU hardware instance. The IOMMU driver checks each feature-specific bits before enabling each feature at run time. For IOMMUFD, the hypervisor passes the raw value of amd_iommu_efr and amd_iommu_efr2 to VMM via iommufd IOMMU_DEVICE_GET_HW_INFO ioctl. Reviewed-by: Nicolin Chen Reviewed-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Signed-off-by: Suravee Suthikulpanit Signed-off-by: Joerg Roedel --- include/uapi/linux/iommufd.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 2c41920b641d..3db37f6042a0 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -623,6 +623,32 @@ struct iommu_hw_info_tegra241_cmdqv { __u8 __reserved; }; +/** + * struct iommu_hw_info_amd - AMD IOMMU device info + * + * @efr : Value of AMD IOMMU Extended Feature Register (EFR) + * @efr2: Value of AMD IOMMU Extended Feature 2 Register (EFR2) + * + * Please See description of these registers in the following sections of + * the AMD I/O Virtualization Technology (IOMMU) Specification. + * (https://docs.amd.com/v/u/en-US/48882_3.10_PUB) + * + * - MMIO Offset 0030h IOMMU Extended Feature Register + * - MMIO Offset 01A0h IOMMU Extended Feature 2 Register + * + * Note: The EFR and EFR2 are raw values reported by hardware. + * VMM is responsible to determine the appropriate flags to be exposed to + * the VM since cetertain features are not currently supported by the kernel + * for HW-vIOMMU. + * + * Current VMM-allowed list of feature flags are: + * - EFR[GTSup, GASup, GioSup, PPRSup, EPHSup, GATS, GLX, PASmax] + */ +struct iommu_hw_info_amd { + __aligned_u64 efr; + __aligned_u64 efr2; +}; + /** * enum iommu_hw_info_type - IOMMU Hardware Info Types * @IOMMU_HW_INFO_TYPE_NONE: Output by the drivers that do not report hardware @@ -632,6 +658,7 @@ struct iommu_hw_info_tegra241_cmdqv { * @IOMMU_HW_INFO_TYPE_ARM_SMMUV3: ARM SMMUv3 iommu info type * @IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV (extension for ARM * SMMUv3) info type + * @IOMMU_HW_INFO_TYPE_AMD: AMD IOMMU info type */ enum iommu_hw_info_type { IOMMU_HW_INFO_TYPE_NONE = 0, @@ -639,6 +666,7 @@ enum iommu_hw_info_type { IOMMU_HW_INFO_TYPE_INTEL_VTD = 1, IOMMU_HW_INFO_TYPE_ARM_SMMUV3 = 2, IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV = 3, + IOMMU_HW_INFO_TYPE_AMD = 4, }; /** -- cgit v1.2.3 From e05698c10d980ac0a0b57ed81ec9353b9e9533c6 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Thu, 15 Jan 2026 06:08:06 +0000 Subject: iommufd: Introduce data struct for AMD nested domain allocation Introduce IOMMU_HWPT_DATA_AMD_GUEST data type for IOMMU guest page table, which is used for stage-1 in nested translation. The data structure contains information necessary for setting up the AMD HW-vIOMMU support. Reviewed-by: Jason Gunthorpe Reviewed-by: Nicolin Chen Reviewed-by: Vasant Hegde Signed-off-by: Suravee Suthikulpanit Signed-off-by: Joerg Roedel --- include/uapi/linux/iommufd.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 3db37f6042a0..1dafbc552d37 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -465,16 +465,27 @@ struct iommu_hwpt_arm_smmuv3 { __aligned_le64 ste[2]; }; +/** + * struct iommu_hwpt_amd_guest - AMD IOMMU guest I/O page table data + * (IOMMU_HWPT_DATA_AMD_GUEST) + * @dte: Guest Device Table Entry (DTE) + */ +struct iommu_hwpt_amd_guest { + __aligned_u64 dte[4]; +}; + /** * enum iommu_hwpt_data_type - IOMMU HWPT Data Type * @IOMMU_HWPT_DATA_NONE: no data * @IOMMU_HWPT_DATA_VTD_S1: Intel VT-d stage-1 page table * @IOMMU_HWPT_DATA_ARM_SMMUV3: ARM SMMUv3 Context Descriptor Table + * @IOMMU_HWPT_DATA_AMD_GUEST: AMD IOMMU guest page table */ enum iommu_hwpt_data_type { IOMMU_HWPT_DATA_NONE = 0, IOMMU_HWPT_DATA_VTD_S1 = 1, IOMMU_HWPT_DATA_ARM_SMMUV3 = 2, + IOMMU_HWPT_DATA_AMD_GUEST = 3, }; /** -- cgit v1.2.3 From cd16edba1c6a24af138e1a5ded2711231fffa99f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 4 Dec 2025 11:19:10 +0100 Subject: ext4: fix ext4_tune_sb_params padding The padding at the end of struct ext4_tune_sb_params is architecture specific and in particular is different between x86-32 and x86-64, since the __u64 member only enforces struct alignment on the latter. This shows up as a new warning when test-building the headers with -Wpadded: include/linux/ext4.h:144:1: error: padding struct size to alignment boundary with 4 bytes [-Werror=padded] All members inside the structure are naturally aligned, so the only difference here is the amount of padding at the end. Make the padding explicit, to have a consistent sizeof(struct ext4_tune_sb_params) of 232 on all architectures and avoid adding compat ioctl handling for EXT4_IOC_GET_TUNE_SB_PARAM/EXT4_IOC_SET_TUNE_SB_PARAM. This is an ABI break on x86-32 but hopefully this can go into 6.18.y early enough as a fixup so no actual users will be affected. Alternatively, the kernel could handle the ioctl commands for both sizes (232 and 228 bytes) on all architectures. Fixes: 04a91570ac67 ("ext4: implemet new ioctls to set and get superblock parameters") Signed-off-by: Arnd Bergmann Reviewed-by: Jan Kara Link: https://patch.msgid.link/20251204101914.1037148-1-arnd@kernel.org Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- include/uapi/linux/ext4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/ext4.h b/include/uapi/linux/ext4.h index 411dcc1e4a35..9c683991c32f 100644 --- a/include/uapi/linux/ext4.h +++ b/include/uapi/linux/ext4.h @@ -139,7 +139,7 @@ struct ext4_tune_sb_params { __u32 clear_feature_incompat_mask; __u32 clear_feature_ro_compat_mask; __u8 mount_opts[64]; - __u8 pad[64]; + __u8 pad[68]; }; #define EXT4_TUNE_FL_ERRORS_BEHAVIOR 0x00000001 -- cgit v1.2.3 From 8a42938a28941da29bf3e4cd2af877b0d5d929e1 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Wed, 7 Jan 2026 14:22:54 +0200 Subject: wifi: nl80211: ignore cluster id after NAN started After NAN was started, cluster id updates from the user space should not happen, since the device already started a cluster with the previousely provided id. Since NL80211_CMD_CHANGE_NAN_CONFIG requires to set the full NAN configuration, we can't require that NL80211_NAN_CONF_CLUSTER_ID won't be included in this command, and keeping the last confgiured value just to be able to compare it against the new one seems a bit overkill. Therefore, just ignore cluster id in this command and clarify the documentation. Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260107142229.fb55e5853269.I10d18c8f69d98b28916596d6da4207c15ea4abb5@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index eb92296457c9..b0f050e36fa4 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -11,7 +11,7 @@ * Copyright 2008 Jouni Malinen * Copyright 2008 Colin McCabe * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2025 Intel Corporation + * Copyright (C) 2018-2026 Intel Corporation * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -7454,6 +7454,8 @@ enum nl80211_nan_band_conf_attributes { * address that can take values from 50-6F-9A-01-00-00 to * 50-6F-9A-01-FF-FF. This attribute is optional. If not present, * a random Cluster ID will be chosen. + * This attribute will be ignored in NL80211_CMD_CHANGE_NAN_CONFIG + * since after NAN was started, the cluster ID can no longer change. * @NL80211_NAN_CONF_EXTRA_ATTRS: Additional NAN attributes to be * published in the beacons. This is an optional byte array. * @NL80211_NAN_CONF_VENDOR_ELEMS: Vendor-specific elements that will -- cgit v1.2.3 From a9927022c4491ba44249af079e8799ce56f8053c Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 15 Jan 2026 12:56:44 +0100 Subject: net: ethtool: Add support for 80Gbps speed USB4 v2 link used in peer-to-peer networking is symmetric 80Gbps so in order to support reading this link speed, add support for it to ethtool. Signed-off-by: Mika Westerberg Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20260115115646.328898-3-mika.westerberg@linux.intel.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index ce9aeb65a8e1..b74b80508553 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -2203,6 +2203,7 @@ enum ethtool_link_mode_bit_indices { #define SPEED_40000 40000 #define SPEED_50000 50000 #define SPEED_56000 56000 +#define SPEED_80000 80000 #define SPEED_100000 100000 #define SPEED_200000 200000 #define SPEED_400000 400000 -- cgit v1.2.3 From 50b359896fe55d0443ed550e1fabba71d242031a Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Sun, 18 Jan 2026 09:51:15 +0200 Subject: wifi: cfg80211: ignore link disabled flag from userspace When the AP has an advertised TID to Link Mapping (TTLM) it shall include the element in the association response. As such, when this element is present it needs to be used for the currently dormant links. See Draft P802.11REVmf_D1.0 section 35.3.7.2.3 ("Negotiation of TTLM") for the details. The flag is also not usable in case userspace wants to specify a negotiated TTLM during association. Note that for the link reconfiguration case, mac80211 did not use the information. Draft P802.11REVmf_D1.0 states in section 35.3.6.4 ("Link reconfiguration to the setup links) that we "shall operate with all the TIDs mapped to the newly added links ..." All this means that the flag is not needed. The implementation should parse the information from the association response. Signed-off-by: Benjamin Berg Reviewed-by: Johannes Berg Reviewed-by: Ilan Peer Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260118093904.754e057896a5.Ifd06f5ef839a93bfd54d0593dc932870f95f3242@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 8134f10e4e6c..8433bac48112 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2880,8 +2880,9 @@ enum nl80211_commands { * index. If the userspace includes more RNR elements than number of * MBSSID elements then these will be added in every EMA beacon. * - * @NL80211_ATTR_MLO_LINK_DISABLED: Flag attribute indicating that the link is - * disabled. + * @NL80211_ATTR_MLO_LINK_DISABLED: Unused. It was used to indicate that a link + * is disabled during association. However, the AP will send the + * information by including a TTLM in the association response. * * @NL80211_ATTR_BSS_DUMP_INCLUDE_USE_DATA: Include BSS usage data, i.e. * include BSSes that can only be used in restricted scenarios and/or -- cgit v1.2.3 From a5546e18f77c0cb15d434bf5b92647687fe483e3 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 15 Jan 2026 09:25:48 +0100 Subject: net: Add queue-create operation Add a ynl netdev family operation called queue-create that creates a new queue on a netdevice: name: queue-create attribute-set: queue flags: [admin-perm] do: request: attributes: - ifindex - type - lease reply: &queue-create-op attributes: - id This is a generic operation such that it can be extended for various use cases in future. Right now it is mandatory to specify ifindex, the queue type which is enforced to rx and a lease. The newly created queue id is returned to the caller. A queue from a virtual device can have a lease which refers to another queue from a physical device. This is useful for memory providers and AF_XDP operations which take an ifindex and queue id to allow applications to bind against virtual devices in containers. The lease couples both queues together and allows to proxy the operations from a virtual device in a container to the physical device. In future, the nested lease attribute can be lifted and made optional for other use-cases such as dynamic queue creation for physical netdevs. The lack of lease and the specification of the physical device as an ifindex will imply that we need a real queue to be allocated. Similarly, the queue type enforcement to rx can then be lifted as well to support tx. An early implementation had only driver-specific integration [0], but in order for other virtual devices to reuse, it makes sense to have this as a generic API in core net. For leasing queues, the virtual netdev must have real_num_rx_queue less than num_rx_queues at the time of calling queue-create. The queue-type must be rx as only rx queues are supported for leasing for now. We also enforce that the queue-create ifindex must point to a virtual device, and that the nested lease attribute's ifindex must point to a physical device. The nested lease attribute set contains a netns-id attribute which is currently only intended for dumping as part of the queue-get operation. Also, it is modeled as an s32 type similarly as done elsewhere in the stack. Signed-off-by: Daniel Borkmann Co-developed-by: David Wei Signed-off-by: David Wei Link: https://bpfconf.ebpf.io/bpfconf2025/bpfconf2025_material/lsfmmbpf_2025_netkit_borkmann.pdf [0] Acked-by: Stanislav Fomichev Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20260115082603.219152-2-daniel@iogearbox.net Signed-off-by: Paolo Abeni --- include/uapi/linux/netdev.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index e0b579a1df4f..7df1056a35fd 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -160,6 +160,7 @@ enum { NETDEV_A_QUEUE_DMABUF, NETDEV_A_QUEUE_IO_URING, NETDEV_A_QUEUE_XSK, + NETDEV_A_QUEUE_LEASE, __NETDEV_A_QUEUE_MAX, NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) @@ -202,6 +203,15 @@ enum { NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1) }; +enum { + NETDEV_A_LEASE_IFINDEX = 1, + NETDEV_A_LEASE_QUEUE, + NETDEV_A_LEASE_NETNS_ID, + + __NETDEV_A_LEASE_MAX, + NETDEV_A_LEASE_MAX = (__NETDEV_A_LEASE_MAX - 1) +}; + enum { NETDEV_A_DMABUF_IFINDEX = 1, NETDEV_A_DMABUF_QUEUES, @@ -228,6 +238,7 @@ enum { NETDEV_CMD_BIND_RX, NETDEV_CMD_NAPI_SET, NETDEV_CMD_BIND_TX, + NETDEV_CMD_QUEUE_CREATE, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) -- cgit v1.2.3 From b5c3fa4a0b16d4a7d0bd0e5626a13fec0024030a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 15 Jan 2026 09:25:56 +0100 Subject: netkit: Add single device mode for netkit Add a single device mode for netkit instead of netkit pairs. The primary target for the paired devices is to connect network namespaces, of course, and support has been implemented in projects like Cilium [0]. For the rxq leasing the plan is to support two main scenarios related to single device mode: * For the use-case of io_uring zero-copy, the control plane can either set up a netkit pair where the peer device can perform rxq leasing which is then tied to the lifetime of the peer device, or the control plane can use a regular netkit pair to connect the hostns to a Pod/container and dynamically add/remove rxq leasing through a single device without having to interrupt the device pair. In the case of io_uring, the memory pool is used as skb non-linear pages, and thus the skb will go its way through the regular stack into netkit. Things like the netkit policy when no BPF is attached or skb scrubbing etc apply as-is in case the paired devices are used, or if the backend memory is tied to the single device and traffic goes through a paired device. * For the use-case of AF_XDP, the control plane needs to use netkit in the single device mode. The single device mode currently enforces only a pass policy when no BPF is attached, and does not yet support BPF link attachments for AF_XDP. skbs sent to that device get dropped at the moment. Given AF_XDP operates at a lower layer of the stack tying this to the netkit pair did not make sense. In future, the plan is to allow BPF at the XDP layer which can: i) process traffic coming from the AF_XDP application (e.g. QEMU with AF_XDP backend) to filter egress traffic or to push selected egress traffic up to the single netkit device to the local stack (e.g. DHCP requests), and ii) vice-versa skbs sent to the single netkit into the AF_XDP application (e.g. DHCP replies). Also, the control-plane can dynamically manage rxq leasing for the single netkit device without having to interrupt (e.g. down/up cycle) the main netkit pair for the Pod which has traffic going in and out. Signed-off-by: Daniel Borkmann Co-developed-by: David Wei Signed-off-by: David Wei Reviewed-by: Jordan Rife Reviewed-by: Nikolay Aleksandrov Link: https://docs.cilium.io/en/stable/operations/performance/tuning/#netkit-device-mode [0] Link: https://patch.msgid.link/20260115082603.219152-10-daniel@iogearbox.net Signed-off-by: Paolo Abeni --- include/uapi/linux/if_link.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 3b491d96e52e..bbd565757298 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1296,6 +1296,11 @@ enum netkit_mode { NETKIT_L3, }; +enum netkit_pairing { + NETKIT_DEVICE_PAIR, + NETKIT_DEVICE_SINGLE, +}; + /* NETKIT_SCRUB_NONE leaves clearing skb->{mark,priority} up to * the BPF program if attached. This also means the latter can * consume the two fields if they were populated earlier. @@ -1320,6 +1325,7 @@ enum { IFLA_NETKIT_PEER_SCRUB, IFLA_NETKIT_HEADROOM, IFLA_NETKIT_TAILROOM, + IFLA_NETKIT_PAIRING, __IFLA_NETKIT_MAX, }; #define IFLA_NETKIT_MAX (__IFLA_NETKIT_MAX - 1) -- cgit v1.2.3 From 8766d61a1d33cb5f15bfdd6ce9832bbe1fc649c2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 20 Jan 2026 18:04:55 -0800 Subject: Revert "Merge branch 'netkit-support-for-io_uring-zero-copy-and-af_xdp'" This reverts commit 77b9c4a438fc66e2ab004c411056b3fb71a54f2c, reversing changes made to 4515ec4ad58a37e70a9e1256c0b993958c9b7497: 931420a2fc36 ("selftests/net: Add netkit container tests") ab771c938d9a ("selftests/net: Make NetDrvContEnv support queue leasing") 6be87fbb2776 ("selftests/net: Add env for container based tests") 61d99ce3dfc2 ("selftests/net: Add bpf skb forwarding program") 920da3634194 ("netkit: Add xsk support for af_xdp applications") eef51113f8af ("netkit: Add netkit notifier to check for unregistering devices") b5ef109d22d4 ("netkit: Implement rtnl_link_ops->alloc and ndo_queue_create") b5c3fa4a0b16 ("netkit: Add single device mode for netkit") 0073d2fd679d ("xsk: Proxy pool management for leased queues") 1ecea95dd3b5 ("xsk: Extend xsk_rcv_check validation") 804bf334d08a ("net: Proxy netdev_queue_get_dma_dev for leased queues") 0caa9a8ddec3 ("net: Proxy net_mp_{open,close}_rxq for leased queues") ff8889ff9107 ("net, ethtool: Disallow leased real rxqs to be resized") 9e2103f36110 ("net: Add lease info to queue-get response") 31127deddef4 ("net: Implement netdev_nl_queue_create_doit") a5546e18f77c ("net: Add queue-create operation") The series will conflict with io_uring work, and the code needs more polish. Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_link.h | 6 ------ include/uapi/linux/netdev.h | 11 ----------- 2 files changed, 17 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index bbd565757298..3b491d96e52e 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1296,11 +1296,6 @@ enum netkit_mode { NETKIT_L3, }; -enum netkit_pairing { - NETKIT_DEVICE_PAIR, - NETKIT_DEVICE_SINGLE, -}; - /* NETKIT_SCRUB_NONE leaves clearing skb->{mark,priority} up to * the BPF program if attached. This also means the latter can * consume the two fields if they were populated earlier. @@ -1325,7 +1320,6 @@ enum { IFLA_NETKIT_PEER_SCRUB, IFLA_NETKIT_HEADROOM, IFLA_NETKIT_TAILROOM, - IFLA_NETKIT_PAIRING, __IFLA_NETKIT_MAX, }; #define IFLA_NETKIT_MAX (__IFLA_NETKIT_MAX - 1) diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 7df1056a35fd..e0b579a1df4f 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -160,7 +160,6 @@ enum { NETDEV_A_QUEUE_DMABUF, NETDEV_A_QUEUE_IO_URING, NETDEV_A_QUEUE_XSK, - NETDEV_A_QUEUE_LEASE, __NETDEV_A_QUEUE_MAX, NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) @@ -203,15 +202,6 @@ enum { NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1) }; -enum { - NETDEV_A_LEASE_IFINDEX = 1, - NETDEV_A_LEASE_QUEUE, - NETDEV_A_LEASE_NETNS_ID, - - __NETDEV_A_LEASE_MAX, - NETDEV_A_LEASE_MAX = (__NETDEV_A_LEASE_MAX - 1) -}; - enum { NETDEV_A_DMABUF_IFINDEX = 1, NETDEV_A_DMABUF_QUEUES, @@ -238,7 +228,6 @@ enum { NETDEV_CMD_BIND_RX, NETDEV_CMD_NAPI_SET, NETDEV_CMD_BIND_TX, - NETDEV_CMD_QUEUE_CREATE, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) -- cgit v1.2.3 From 64dd89ae01f2708a508e028c28b7906e4702a9a7 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 15 Dec 2025 12:57:53 -0500 Subject: mm/block/fs: remove laptop_mode Laptop mode was introduced to save battery, by delaying and consolidating writes and thereby maximize the time rotating hard drives wouldn't have to spin. Luckily, rotating hard drives, with their high spin-up times and power draw, are a thing of the past for battery-powered devices. Reclaim has also since changed to not write single filesystem pages anymore, and regular filesystem writeback is lumpy by design. The juice doesn't appear worth the squeeze anymore. The footprint of the feature is small, but nevertheless it's a complicating factor in mm, block, filesystems. Developers don't think about it, and it likely hasn't been tested with new reclaim and writeback changes in years. Let's sunset it. Keep the sysctl with a deprecation warning around for a few more cycles, but remove all functionality behind it. [akpm@linux-foundation.org: fix Documentation/admin-guide/laptops/index.rst] Link: https://lkml.kernel.org/r/20251216185201.GH905277@cmpxchg.org Signed-off-by: Johannes Weiner Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Acked-by: Jens Axboe Reviewed-by: Shakeel Butt Acked-by: Michal Hocko Cc: Deepanshu Kartikey Signed-off-by: Andrew Morton --- include/uapi/linux/sysctl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h index 63d1464cb71c..6ea9ea8413fa 100644 --- a/include/uapi/linux/sysctl.h +++ b/include/uapi/linux/sysctl.h @@ -183,7 +183,7 @@ enum VM_LOWMEM_RESERVE_RATIO=20,/* reservation ratio for lower memory zones */ VM_MIN_FREE_KBYTES=21, /* Minimum free kilobytes to maintain */ VM_MAX_MAP_COUNT=22, /* int: Maximum number of mmaps/address-space */ - VM_LAPTOP_MODE=23, /* vm laptop mode */ + VM_BLOCK_DUMP=24, /* block dump mode */ VM_HUGETLB_GROUP=25, /* permitted hugetlb group */ VM_VFS_CACHE_PRESSURE=26, /* dcache/icache reclaim pressure */ -- cgit v1.2.3 From fa05705107a40131a8335ad37817153709261738 Mon Sep 17 00:00:00 2001 From: Detlev Casanova Date: Fri, 9 Jan 2026 11:15:18 -0500 Subject: media: v4l2-ctrls: Add hevc_ext_sps_[ls]t_rps controls The vdpu381 decoder found on newer Rockchip SoC need the information from the long term and short term ref pic sets from the SPS. So far, it wasn't included in the v4l2 API, so add it with new dynamic sized controls. Each element of the hevc_ext_sps_lt_rps array contains the long term ref pic set at that index. Each element of the hevc_ext_sps_st_rps contains the short term ref pic set at that index, as the raw data. It is the role of the drivers to calculate the reference sets values. Reviewed-by: Nicolas Dufresne Signed-off-by: Detlev Casanova Signed-off-by: Nicolas Dufresne Signed-off-by: Hans Verkuil --- include/uapi/linux/v4l2-controls.h | 61 ++++++++++++++++++++++++++++++++++++++ include/uapi/linux/videodev2.h | 2 ++ 2 files changed, 63 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index 572622e4535e..68dd0c4e47b2 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -2101,6 +2101,8 @@ struct v4l2_ctrl_mpeg2_quantisation { #define V4L2_CID_STATELESS_HEVC_DECODE_MODE (V4L2_CID_CODEC_STATELESS_BASE + 405) #define V4L2_CID_STATELESS_HEVC_START_CODE (V4L2_CID_CODEC_STATELESS_BASE + 406) #define V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS (V4L2_CID_CODEC_STATELESS_BASE + 407) +#define V4L2_CID_STATELESS_HEVC_EXT_SPS_ST_RPS (V4L2_CID_CODEC_STATELESS_BASE + 408) +#define V4L2_CID_STATELESS_HEVC_EXT_SPS_LT_RPS (V4L2_CID_CODEC_STATELESS_BASE + 409) enum v4l2_stateless_hevc_decode_mode { V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED, @@ -2556,6 +2558,65 @@ struct v4l2_ctrl_hevc_scaling_matrix { __u8 scaling_list_dc_coef_32x32[2]; }; +#define V4L2_HEVC_EXT_SPS_ST_RPS_FLAG_INTER_REF_PIC_SET_PRED 0x1 + +/* + * struct v4l2_ctrl_hevc_ext_sps_st_rps - HEVC short term RPS parameters + * + * Dynamic size 1-dimension array for short term RPS. The number of elements + * is v4l2_ctrl_hevc_sps::num_short_term_ref_pic_sets. It can contain up to 65 elements. + * + * @delta_idx_minus1: Specifies the delta compare to the index. See details in section 7.4.8 + * "Short-term reference picture set semantics" of the specification. + * @delta_rps_sign: Sign of the delta as specified in section 7.4.8 "Short-term reference picture + * set semantics" of the specification. + * @abs_delta_rps_minus1: Absolute delta RPS as specified in section 7.4.8 "Short-term reference + * picture set semantics" of the specification. + * @num_negative_pics: Number of short-term RPS entries that have picture order count values less + * than the picture order count value of the current picture. + * @num_positive_pics: Number of short-term RPS entries that have picture order count values + * greater than the picture order count value of the current picture. + * @used_by_curr_pic: Bit j specifies if short-term RPS j is used by the current picture. + * @use_delta_flag: Bit j equals to 1 specifies that the j-th entry in the source candidate + * short-term RPS is included in this candidate short-term RPS. + * @delta_poc_s0_minus1: Specifies the negative picture order count delta for the i-th entry in + * the short-term RPS. See details in section 7.4.8 "Short-term reference + * picture set semantics" of the specification. + * @delta_poc_s1_minus1: Specifies the positive picture order count delta for the i-th entry in + * the short-term RPS. See details in section 7.4.8 "Short-term reference + * picture set semantics" of the specification. + * @flags: See V4L2_HEVC_EXT_SPS_ST_RPS_FLAG_{} + */ +struct v4l2_ctrl_hevc_ext_sps_st_rps { + __u8 delta_idx_minus1; + __u8 delta_rps_sign; + __u8 num_negative_pics; + __u8 num_positive_pics; + __u32 used_by_curr_pic; + __u32 use_delta_flag; + __u16 abs_delta_rps_minus1; + __u16 delta_poc_s0_minus1[16]; + __u16 delta_poc_s1_minus1[16]; + __u16 flags; +}; + +#define V4L2_HEVC_EXT_SPS_LT_RPS_FLAG_USED_LT 0x1 + +/* + * struct v4l2_ctrl_hevc_ext_sps_lt_rps - HEVC long term RPS parameters + * + * Dynamic size 1-dimension array for long term RPS. The number of elements + * is v4l2_ctrl_hevc_sps::num_long_term_ref_pics_sps. It can contain up to 65 elements. + * + * @lt_ref_pic_poc_lsb_sps: picture order count modulo MaxPicOrderCntLsb of the i-th candidate + * long-term reference picture. + * @flags: See V4L2_HEVC_EXT_SPS_LT_RPS_FLAG_{} + */ +struct v4l2_ctrl_hevc_ext_sps_lt_rps { + __u16 lt_ref_pic_poc_lsb_sps; + __u16 flags; +}; + /* Stateless VP9 controls */ #define V4L2_VP9_LOOP_FILTER_FLAG_DELTA_ENABLED 0x1 diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 848e86617d5c..eda4492e40dc 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -1986,6 +1986,8 @@ enum v4l2_ctrl_type { V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS = 0x0272, V4L2_CTRL_TYPE_HEVC_SCALING_MATRIX = 0x0273, V4L2_CTRL_TYPE_HEVC_DECODE_PARAMS = 0x0274, + V4L2_CTRL_TYPE_HEVC_EXT_SPS_ST_RPS = 0x0275, + V4L2_CTRL_TYPE_HEVC_EXT_SPS_LT_RPS = 0x0276, V4L2_CTRL_TYPE_AV1_SEQUENCE = 0x280, V4L2_CTRL_TYPE_AV1_TILE_GROUP_ENTRY = 0x281, -- cgit v1.2.3 From f5f2bad67a45cd1ef6f5b727da104694a81b3666 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Jan 2026 08:31:49 +0100 Subject: block: make the new blkzoned UAPI constants discoverable The Linux 6.19 merge window added the new BLKREPORTZONESV2 ioctl, and with it the new BLK_ZONE_REP_CACHED and BLK_ZONE_COND_ACTIVE constants. The two constants are defined as part of enums, which makes it very painful for userspace to discover if they are present in the installed system headers. Use the #define to the same name trick to make them trivially discoverable using CPP directives. Fixes: 0bf0e2e46668 ("block: track zone conditions") Fixes: b30ffcdc0c15 ("block: introduce BLKREPORTZONESV2 ioctl") Reported-by: Andrey Albershteyn Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/uapi/linux/blkzoned.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h index e33f02703350..663836120966 100644 --- a/include/uapi/linux/blkzoned.h +++ b/include/uapi/linux/blkzoned.h @@ -81,7 +81,8 @@ enum blk_zone_cond { BLK_ZONE_COND_FULL = 0xE, BLK_ZONE_COND_OFFLINE = 0xF, - BLK_ZONE_COND_ACTIVE = 0xFF, + BLK_ZONE_COND_ACTIVE = 0xFF, /* added in Linux 6.19 */ +#define BLK_ZONE_COND_ACTIVE BLK_ZONE_COND_ACTIVE }; /** @@ -100,7 +101,8 @@ enum blk_zone_report_flags { BLK_ZONE_REP_CAPACITY = (1U << 0), /* Input flags */ - BLK_ZONE_REP_CACHED = (1U << 31), + BLK_ZONE_REP_CACHED = (1U << 31), /* added in Linux 6.19 */ +#define BLK_ZONE_REP_CACHED BLK_ZONE_REP_CACHED }; /** -- cgit v1.2.3 From d7a5da7a0f7fa7ff081140c4f6f971db98882703 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 15 Dec 2025 17:52:04 +0100 Subject: rseq: Add fields and constants for time slice extension Aside of a Kconfig knob add the following items: - Two flag bits for the rseq user space ABI, which allow user space to query the availability and enablement without a syscall. - A new member to the user space ABI struct rseq, which is going to be used to communicate request and grant between kernel and user space. - A rseq state struct to hold the kernel state of this - Documentation of the new mechanism Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20251215155708.669472597@linutronix.de --- include/uapi/linux/rseq.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h index 1b76d508400c..6afc219d1545 100644 --- a/include/uapi/linux/rseq.h +++ b/include/uapi/linux/rseq.h @@ -23,9 +23,15 @@ enum rseq_flags { }; enum rseq_cs_flags_bit { + /* Historical and unsupported bits */ RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT = 0, RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT = 1, RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT = 2, + /* (3) Intentional gap to put new bits into a separate byte */ + + /* User read only feature flags */ + RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE_BIT = 4, + RSEQ_CS_FLAG_SLICE_EXT_ENABLED_BIT = 5, }; enum rseq_cs_flags { @@ -35,6 +41,11 @@ enum rseq_cs_flags { (1U << RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT), RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE = (1U << RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT), + + RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE = + (1U << RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE_BIT), + RSEQ_CS_FLAG_SLICE_EXT_ENABLED = + (1U << RSEQ_CS_FLAG_SLICE_EXT_ENABLED_BIT), }; /* @@ -53,6 +64,27 @@ struct rseq_cs { __u64 abort_ip; } __attribute__((aligned(4 * sizeof(__u64)))); +/** + * rseq_slice_ctrl - Time slice extension control structure + * @all: Compound value + * @request: Request for a time slice extension + * @granted: Granted time slice extension + * + * @request is set by user space and can be cleared by user space or kernel + * space. @granted is set and cleared by the kernel and must only be read + * by user space. + */ +struct rseq_slice_ctrl { + union { + __u32 all; + struct { + __u8 request; + __u8 granted; + __u16 __reserved; + }; + }; +}; + /* * struct rseq is aligned on 4 * 8 bytes to ensure it is always * contained within a single cache-line. @@ -141,6 +173,12 @@ struct rseq { */ __u32 mm_cid; + /* + * Time slice extension control structure. CPU local updates from + * kernel and user space. + */ + struct rseq_slice_ctrl slice_ctrl; + /* * Flexible array member at end of structure, after last feature field. */ -- cgit v1.2.3 From 28621ec2d46c6adf7d33a6facbd83e2fa566bd34 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 15 Dec 2025 17:52:12 +0100 Subject: rseq: Add prctl() to enable time slice extensions Implement a prctl() so that tasks can enable the time slice extension mechanism. This fails, when time slice extensions are disabled at compile time or on the kernel command line and when no rseq pointer is registered in the kernel. That allows to implement a single trivial check in the exit to user mode hotpath, to decide whether the whole mechanism needs to be invoked. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20251215155708.858717691@linutronix.de --- include/uapi/linux/prctl.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 51c4e8c82b1e..79944b7ae50a 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -386,4 +386,14 @@ struct prctl_mm_map { # define PR_FUTEX_HASH_SET_SLOTS 1 # define PR_FUTEX_HASH_GET_SLOTS 2 +/* RSEQ time slice extensions */ +#define PR_RSEQ_SLICE_EXTENSION 79 +# define PR_RSEQ_SLICE_EXTENSION_GET 1 +# define PR_RSEQ_SLICE_EXTENSION_SET 2 +/* + * Bits for RSEQ_SLICE_EXTENSION_GET/SET + * PR_RSEQ_SLICE_EXT_ENABLE: Enable + */ +# define PR_RSEQ_SLICE_EXT_ENABLE 0x01 + #endif /* _LINUX_PRCTL_H */ -- cgit v1.2.3 From 99d2592023e5d0a31f5f5a83c694df48239a1e6c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 15 Dec 2025 17:52:15 +0100 Subject: rseq: Implement sys_rseq_slice_yield() Provide a new syscall which has the only purpose to yield the CPU after the kernel granted a time slice extension. sched_yield() is not suitable for that because it unconditionally schedules, but the end of the time slice extension is not required to schedule when the task was already preempted. This also allows to have a strict check for termination to catch user space invoking random syscalls including sched_yield() from a time slice extension region. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Mathieu Desnoyers Acked-by: Arnd Bergmann Link: https://patch.msgid.link/20251215155708.929634896@linutronix.de --- include/uapi/asm-generic/unistd.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 942370b3f5d2..a627acc8fb5f 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -860,8 +860,11 @@ __SYSCALL(__NR_file_setattr, sys_file_setattr) #define __NR_listns 470 __SYSCALL(__NR_listns, sys_listns) +#define __NR_rseq_slice_yield 471 +__SYSCALL(__NR_rseq_slice_yield, sys_rseq_slice_yield) + #undef __NR_syscalls -#define __NR_syscalls 471 +#define __NR_syscalls 472 /* * 32 bit systems traditionally used different -- cgit v1.2.3 From d6200245c75e832af2087bc60ba2e6641a90eee9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 19 Jan 2026 11:23:57 +0100 Subject: rseq: Allow registering RSEQ with slice extension Since glibc cares about the number of syscalls required to initialize a new thread, allow initializing rseq with slice extension on. This avoids having to do another prctl(). Requested-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260121143207.814193010@infradead.org --- include/uapi/linux/rseq.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h index 6afc219d1545..863c4a00a66b 100644 --- a/include/uapi/linux/rseq.h +++ b/include/uapi/linux/rseq.h @@ -19,7 +19,8 @@ enum rseq_cpu_id_state { }; enum rseq_flags { - RSEQ_FLAG_UNREGISTER = (1 << 0), + RSEQ_FLAG_UNREGISTER = (1 << 0), + RSEQ_FLAG_SLICE_EXT_DEFAULT_ON = (1 << 1), }; enum rseq_cs_flags_bit { -- cgit v1.2.3 From f174a9ffcd48d78a45d560c02ce4071ded036b53 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 19 Jan 2026 10:29:22 +0800 Subject: KVM: arm64: Add exit to userspace on {LD,ST}64B* outside of memslots The main use of {LD,ST}64B* is to talk to a device, which is hopefully directly assigned to the guest and requires no additional handling. However, this does not preclude a VMM from exposing a virtual device to the guest, and to allow 64 byte accesses as part of the programming interface. A direct consequence of this is that we need to be able to forward such access to userspace. Given that such a contraption is very unlikely to ever exist, we choose to offer a limited service: userspace gets (as part of a new exit reason) the ESR, the IPA, and that's it. It is fully expected to handle the full semantics of the instructions, deal with ACCDATA, the return values and increment PC. Much fun. A canonical implementation can also simply inject an abort and be done with it. Frankly, don't try to do anything else unless you have time to waste. Acked-by: Arnd Bergmann Acked-by: Oliver Upton Signed-off-by: Marc Zyngier Signed-off-by: Yicong Yang Signed-off-by: Zhou Wang Signed-off-by: Will Deacon --- include/uapi/linux/kvm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index dddb781b0507..88cca0e22ece 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -180,6 +180,7 @@ struct kvm_xen_exit { #define KVM_EXIT_MEMORY_FAULT 39 #define KVM_EXIT_TDX 40 #define KVM_EXIT_ARM_SEA 41 +#define KVM_EXIT_ARM_LDST64B 42 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -402,7 +403,7 @@ struct kvm_run { } eoi; /* KVM_EXIT_HYPERV */ struct kvm_hyperv_exit hyperv; - /* KVM_EXIT_ARM_NISV */ + /* KVM_EXIT_ARM_NISV / KVM_EXIT_ARM_LDST64B */ struct { __u64 esr_iss; __u64 fault_ipa; -- cgit v1.2.3 From 0f7afd80d81b739c4a9a6e4e24109ba1030c9c56 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Wed, 14 Jan 2026 12:20:22 -0600 Subject: PCI: Move CXL DVSEC definitions into uapi/linux/pci_regs.h The CXL DVSECs are currently defined in cxl/core/cxlpci.h. These are not accessible to other subsystems. Move these to uapi/linux/pci_regs.h. The CXL DVSEC definitions will be renamed and reformatted to fit better with existing defines. Signed-off-by: Terry Bowman Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Reviewed-by: Dan Williams Signed-off-by: Dan Williams Acked-by: Bjorn Helgaas Link: https://patch.msgid.link/20260114182055.46029-2-terry.bowman@amd.com Signed-off-by: Dave Jiang --- include/uapi/linux/pci_regs.h | 64 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 59 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 3add74ae2594..6c4b6f19b18e 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -1253,11 +1253,6 @@ #define PCI_DEV3_STA 0x0c /* Device 3 Status Register */ #define PCI_DEV3_STA_SEGMENT 0x8 /* Segment Captured (end-to-end flit-mode detected) */ -/* Compute Express Link (CXL r3.1, sec 8.1.5) */ -#define PCI_DVSEC_CXL_PORT 3 -#define PCI_DVSEC_CXL_PORT_CTL 0x0c -#define PCI_DVSEC_CXL_PORT_CTL_UNMASK_SBR 0x00000001 - /* Integrity and Data Encryption Extended Capability */ #define PCI_IDE_CAP 0x04 #define PCI_IDE_CAP_LINK 0x1 /* Link IDE Stream Supported */ @@ -1338,4 +1333,63 @@ #define PCI_IDE_SEL_ADDR_3(x) (28 + (x) * PCI_IDE_SEL_ADDR_BLOCK_SIZE) #define PCI_IDE_SEL_BLOCK_SIZE(nr_assoc) (20 + PCI_IDE_SEL_ADDR_BLOCK_SIZE * (nr_assoc)) +/* Compute Express Link (CXL r3.1, sec 8.1.5) */ +#define PCI_DVSEC_CXL_PORT 3 +#define PCI_DVSEC_CXL_PORT_CTL 0x0c +#define PCI_DVSEC_CXL_PORT_CTL_UNMASK_SBR 0x00000001 + +/* + * Compute Express Link (CXL r3.2, sec 8.1) + * + * Note that CXL DVSEC id 3 and 7 to be ignored when the CXL link state + * is "disconnected" (CXL r3.2, sec 9.12.3). Re-enumerate these + * registers on downstream link-up events. + */ +#define PCI_DVSEC_HEADER1_LENGTH_MASK __GENMASK(31, 20) + +/* CXL 3.2 8.1.3: PCIe DVSEC for CXL Device */ +#define CXL_DVSEC_PCIE_DEVICE 0 +#define CXL_DVSEC_CAP_OFFSET 0xA +#define CXL_DVSEC_MEM_CAPABLE _BITUL(2) +#define CXL_DVSEC_HDM_COUNT_MASK __GENMASK(5, 4) +#define CXL_DVSEC_CTRL_OFFSET 0xC +#define CXL_DVSEC_MEM_ENABLE _BITUL(2) +#define CXL_DVSEC_RANGE_SIZE_HIGH(i) (0x18 + (i * 0x10)) +#define CXL_DVSEC_RANGE_SIZE_LOW(i) (0x1C + (i * 0x10)) +#define CXL_DVSEC_MEM_INFO_VALID _BITUL(0) +#define CXL_DVSEC_MEM_ACTIVE _BITUL(1) +#define CXL_DVSEC_MEM_SIZE_LOW_MASK __GENMASK(31, 28) +#define CXL_DVSEC_RANGE_BASE_HIGH(i) (0x20 + (i * 0x10)) +#define CXL_DVSEC_RANGE_BASE_LOW(i) (0x24 + (i * 0x10)) +#define CXL_DVSEC_MEM_BASE_LOW_MASK __GENMASK(31, 28) + +#define CXL_DVSEC_RANGE_MAX 2 + +/* CXL 3.2 8.1.4: Non-CXL Function Map DVSEC */ +#define CXL_DVSEC_FUNCTION_MAP 2 + +/* CXL 3.2 8.1.5: Extensions DVSEC for Ports */ +#define CXL_DVSEC_PORT 3 +#define CXL_DVSEC_PORT_CTL 0x0c +#define CXL_DVSEC_PORT_CTL_UNMASK_SBR 0x00000001 + +/* CXL 3.2 8.1.6: GPF DVSEC for CXL Port */ +#define CXL_DVSEC_PORT_GPF 4 +#define CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET 0x0C +#define CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK __GENMASK(3, 0) +#define CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK __GENMASK(11, 8) +#define CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET 0xE +#define CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK __GENMASK(3, 0) +#define CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK __GENMASK(11, 8) + +/* CXL 3.2 8.1.7: GPF DVSEC for CXL Device */ +#define CXL_DVSEC_DEVICE_GPF 5 + +/* CXL 3.2 8.1.9: Register Locator DVSEC */ +#define CXL_DVSEC_REG_LOCATOR 8 +#define CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET 0xC +#define CXL_DVSEC_REG_LOCATOR_BIR_MASK __GENMASK(2, 0) +#define CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK __GENMASK(15, 8) +#define CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK __GENMASK(31, 16) + #endif /* LINUX_PCI_REGS_H */ -- cgit v1.2.3 From 6612bd9ff0b1001cff5f5d79db6ce44427d2e99c Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Wed, 14 Jan 2026 12:20:23 -0600 Subject: PCI: Update CXL DVSEC definitions CXL DVSEC definitions were recently moved into uapi/pci_regs.h, but the newly added macros do not follow the file's existing naming conventions. The current format uses CXL_DVSEC_XYZ, while the new CXL entries must instead use the PCI_DVSEC_CXL_XYZ prefix to match the conventions already established in pci_regs.h. The new CXL DVSEC macros also introduce _MASK and _OFFSET suffixes, which are not used anywhere else in the file. These suffixes lengthen the identifiers and reduce readability. Remove _MASK and _OFFSET from the recently added definitions. Additionally, remove PCI_DVSEC_HEADER1_LENGTH, as it duplicates the existing PCI_DVSEC_HEADER1_LEN() macro. Update all existing references to use the new macro names. Finally, update the inline documentation to reference the latest revision of the CXL specification. Signed-off-by: Terry Bowman Reviewed-by: Dan Williams Acked-by: Bjorn Helgaas Link: https://patch.msgid.link/20260114182055.46029-3-terry.bowman@amd.com Signed-off-by: Dan Williams Signed-off-by: Dave Jiang --- include/uapi/linux/pci_regs.h | 94 ++++++++++++++++++++----------------------- 1 file changed, 44 insertions(+), 50 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 6c4b6f19b18e..662582bdccf0 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -1333,63 +1333,57 @@ #define PCI_IDE_SEL_ADDR_3(x) (28 + (x) * PCI_IDE_SEL_ADDR_BLOCK_SIZE) #define PCI_IDE_SEL_BLOCK_SIZE(nr_assoc) (20 + PCI_IDE_SEL_ADDR_BLOCK_SIZE * (nr_assoc)) -/* Compute Express Link (CXL r3.1, sec 8.1.5) */ -#define PCI_DVSEC_CXL_PORT 3 -#define PCI_DVSEC_CXL_PORT_CTL 0x0c -#define PCI_DVSEC_CXL_PORT_CTL_UNMASK_SBR 0x00000001 - /* - * Compute Express Link (CXL r3.2, sec 8.1) + * Compute Express Link (CXL r4.0, sec 8.1) * * Note that CXL DVSEC id 3 and 7 to be ignored when the CXL link state - * is "disconnected" (CXL r3.2, sec 9.12.3). Re-enumerate these + * is "disconnected" (CXL r4.0, sec 9.12.3). Re-enumerate these * registers on downstream link-up events. */ -#define PCI_DVSEC_HEADER1_LENGTH_MASK __GENMASK(31, 20) - -/* CXL 3.2 8.1.3: PCIe DVSEC for CXL Device */ -#define CXL_DVSEC_PCIE_DEVICE 0 -#define CXL_DVSEC_CAP_OFFSET 0xA -#define CXL_DVSEC_MEM_CAPABLE _BITUL(2) -#define CXL_DVSEC_HDM_COUNT_MASK __GENMASK(5, 4) -#define CXL_DVSEC_CTRL_OFFSET 0xC -#define CXL_DVSEC_MEM_ENABLE _BITUL(2) -#define CXL_DVSEC_RANGE_SIZE_HIGH(i) (0x18 + (i * 0x10)) -#define CXL_DVSEC_RANGE_SIZE_LOW(i) (0x1C + (i * 0x10)) -#define CXL_DVSEC_MEM_INFO_VALID _BITUL(0) -#define CXL_DVSEC_MEM_ACTIVE _BITUL(1) -#define CXL_DVSEC_MEM_SIZE_LOW_MASK __GENMASK(31, 28) -#define CXL_DVSEC_RANGE_BASE_HIGH(i) (0x20 + (i * 0x10)) -#define CXL_DVSEC_RANGE_BASE_LOW(i) (0x24 + (i * 0x10)) -#define CXL_DVSEC_MEM_BASE_LOW_MASK __GENMASK(31, 28) + +/* CXL r4.0, 8.1.3: PCIe DVSEC for CXL Device */ +#define PCI_DVSEC_CXL_DEVICE 0 +#define PCI_DVSEC_CXL_CAP 0xA +#define PCI_DVSEC_CXL_MEM_CAPABLE _BITUL(2) +#define PCI_DVSEC_CXL_HDM_COUNT __GENMASK(5, 4) +#define PCI_DVSEC_CXL_CTRL 0xC +#define PCI_DVSEC_CXL_MEM_ENABLE _BITUL(2) +#define PCI_DVSEC_CXL_RANGE_SIZE_HIGH(i) (0x18 + (i * 0x10)) +#define PCI_DVSEC_CXL_RANGE_SIZE_LOW(i) (0x1C + (i * 0x10)) +#define PCI_DVSEC_CXL_MEM_INFO_VALID _BITUL(0) +#define PCI_DVSEC_CXL_MEM_ACTIVE _BITUL(1) +#define PCI_DVSEC_CXL_MEM_SIZE_LOW __GENMASK(31, 28) +#define PCI_DVSEC_CXL_RANGE_BASE_HIGH(i) (0x20 + (i * 0x10)) +#define PCI_DVSEC_CXL_RANGE_BASE_LOW(i) (0x24 + (i * 0x10)) +#define PCI_DVSEC_CXL_MEM_BASE_LOW __GENMASK(31, 28) #define CXL_DVSEC_RANGE_MAX 2 -/* CXL 3.2 8.1.4: Non-CXL Function Map DVSEC */ -#define CXL_DVSEC_FUNCTION_MAP 2 - -/* CXL 3.2 8.1.5: Extensions DVSEC for Ports */ -#define CXL_DVSEC_PORT 3 -#define CXL_DVSEC_PORT_CTL 0x0c -#define CXL_DVSEC_PORT_CTL_UNMASK_SBR 0x00000001 - -/* CXL 3.2 8.1.6: GPF DVSEC for CXL Port */ -#define CXL_DVSEC_PORT_GPF 4 -#define CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET 0x0C -#define CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK __GENMASK(3, 0) -#define CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK __GENMASK(11, 8) -#define CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET 0xE -#define CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK __GENMASK(3, 0) -#define CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK __GENMASK(11, 8) - -/* CXL 3.2 8.1.7: GPF DVSEC for CXL Device */ -#define CXL_DVSEC_DEVICE_GPF 5 - -/* CXL 3.2 8.1.9: Register Locator DVSEC */ -#define CXL_DVSEC_REG_LOCATOR 8 -#define CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET 0xC -#define CXL_DVSEC_REG_LOCATOR_BIR_MASK __GENMASK(2, 0) -#define CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK __GENMASK(15, 8) -#define CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK __GENMASK(31, 16) +/* CXL r4.0, 8.1.4: Non-CXL Function Map DVSEC */ +#define PCI_DVSEC_CXL_FUNCTION_MAP 2 + +/* CXL r4.0, 8.1.5: Extensions DVSEC for Ports */ +#define PCI_DVSEC_CXL_PORT 3 +#define PCI_DVSEC_CXL_PORT_CTL 0x0c +#define PCI_DVSEC_CXL_PORT_CTL_UNMASK_SBR 0x00000001 + +/* CXL r4.0, 8.1.6: GPF DVSEC for CXL Port */ +#define PCI_DVSEC_CXL_PORT_GPF 4 +#define PCI_DVSEC_CXL_PORT_GPF_PHASE_1_CONTROL 0x0C +#define PCI_DVSEC_CXL_PORT_GPF_PHASE_1_TMO_BASE __GENMASK(3, 0) +#define PCI_DVSEC_CXL_PORT_GPF_PHASE_1_TMO_SCALE __GENMASK(11, 8) +#define PCI_DVSEC_CXL_PORT_GPF_PHASE_2_CONTROL 0xE +#define PCI_DVSEC_CXL_PORT_GPF_PHASE_2_TMO_BASE __GENMASK(3, 0) +#define PCI_DVSEC_CXL_PORT_GPF_PHASE_2_TMO_SCALE __GENMASK(11, 8) + +/* CXL r4.0, 8.1.7: GPF DVSEC for CXL Device */ +#define PCI_DVSEC_CXL_DEVICE_GPF 5 + +/* CXL r4.0, 8.1.9: Register Locator DVSEC */ +#define PCI_DVSEC_CXL_REG_LOCATOR 8 +#define PCI_DVSEC_CXL_REG_LOCATOR_BLOCK1 0xC +#define PCI_DVSEC_CXL_REG_LOCATOR_BIR __GENMASK(2, 0) +#define PCI_DVSEC_CXL_REG_LOCATOR_BLOCK_ID __GENMASK(15, 8) +#define PCI_DVSEC_CXL_REG_LOCATOR_BLOCK_OFF_LOW __GENMASK(31, 16) #endif /* LINUX_PCI_REGS_H */ -- cgit v1.2.3 From 7c29ba02210c6e4570cdce53813a1ae68fb6d049 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Wed, 14 Jan 2026 12:20:24 -0600 Subject: PCI: Introduce pcie_is_cxl() CXL is a protocol that runs on top of PCIe electricals. Its error model also runs on top of the PCIe AER error model by standardizing "internal" errors as "CXL" errors. Linux has historically ignored internal errors. CXL protocol error handling is then a task of enhancing the PCIe AER core to understand that PCIe ports (upstream and downstream) and endpoints may throw internal errors that represent standard CXL protocol errors. The proposed method to make that determination is to teach 'struct pci_dev' to cache when its link has trained the CXL.mem and/or CXL.cache protocols and then treat all internal errors as CXL errors. A design goal is to not burden the PCIe AER core with CXL knowledge beyond just enough to forward error notifications to the CXL RAS core. The forwarded notification looks up a 'struct cxl_port' or 'struct cxl_dport' companion device to the PCI device. Introduce set_pcie_cxl() with logic checking for CXL.mem or CXL.cache status in the CXL Flex Bus DVSEC status register. The CXL Flex Bus DVSEC presence is used because it is required for all the CXL PCIe devices.[1] [1] CXL 3.1 Spec, 8.1.1 PCIe Designated Vendor-Specific Extended Capability (DVSEC) ID Assignment, Table 8-2 Signed-off-by: Terry Bowman Reviewed-by: Ira Weiny Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Reviewed-by: Alejandro Lucero Reviewed-by: Ben Cheatham Reviewed-by: Dan Williams Acked-by: Bjorn Helgaas Link: https://patch.msgid.link/20260114182055.46029-4-terry.bowman@amd.com Signed-off-by: Dan Williams Signed-off-by: Dave Jiang --- include/uapi/linux/pci_regs.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 662582bdccf0..b6622fd60fd9 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -1379,6 +1379,12 @@ /* CXL r4.0, 8.1.7: GPF DVSEC for CXL Device */ #define PCI_DVSEC_CXL_DEVICE_GPF 5 +/* CXL r4.0, 8.1.8: Flex Bus DVSEC */ +#define PCI_DVSEC_CXL_FLEXBUS_PORT 7 +#define PCI_DVSEC_CXL_FLEXBUS_PORT_STATUS 0xE +#define PCI_DVSEC_CXL_FLEXBUS_PORT_STATUS_CACHE _BITUL(0) +#define PCI_DVSEC_CXL_FLEXBUS_PORT_STATUS_MEM _BITUL(2) + /* CXL r4.0, 8.1.9: Register Locator DVSEC */ #define PCI_DVSEC_CXL_REG_LOCATOR 8 #define PCI_DVSEC_CXL_REG_LOCATOR_BLOCK1 0xC -- cgit v1.2.3 From 5247c034a67f5a93cc1faa15e9867eec5b22f38a Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 20 Jan 2026 20:47:40 +0000 Subject: io_uring: introduce non-circular SQ Outside of SQPOLL, normally SQ entries are consumed by the time the submission syscall returns. For those cases we don't need a circular buffer and the head/tail tracking, instead the kernel can assume that entries always start from the beginning of the SQ at index 0. This patch introduces a setup flag doing exactly that. It's a simpler and helps to keeps SQEs hot in cache. The feature is optional and enabled by setting IORING_SETUP_SQ_REWIND. The flag is rejected if passed together with SQPOLL as it'd require waiting for SQ before each submission. It also requires IORING_SETUP_NO_SQARRAY, which can be supported but it's unlikely there will be users, so leave more space for future optimisations. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index b5b23c0d5283..475094c7a668 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -237,6 +237,18 @@ enum io_uring_sqe_flags_bit { */ #define IORING_SETUP_SQE_MIXED (1U << 19) +/* + * When set, io_uring ignores SQ head and tail and fetches SQEs to submit + * starting from index 0 instead from the index stored in the head pointer. + * IOW, the user should place all SQE at the beginning of the SQ memory + * before issuing a submission syscall. + * + * It requires IORING_SETUP_NO_SQARRAY and is incompatible with + * IORING_SETUP_SQPOLL. The user must also never change the SQ head and tail + * values and keep it set to 0. Any other value is undefined behaviour. + */ +#define IORING_SETUP_SQ_REWIND (1U << 20) + enum io_uring_op { IORING_OP_NOP, IORING_OP_READV, -- cgit v1.2.3 From e86f89ab24f5ec595879a01eebb5df84f5ed6d2b Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 16 Jan 2026 22:18:36 +0800 Subject: ublk: add new batch command UBLK_U_IO_PREP_IO_CMDS & UBLK_U_IO_COMMIT_IO_CMDS Add new command UBLK_U_IO_PREP_IO_CMDS, which is the batch version of UBLK_IO_FETCH_REQ. Add new command UBLK_U_IO_COMMIT_IO_CMDS, which is for committing io command result only, still the batch version. The new command header type is `struct ublk_batch_io`. This patch doesn't actually implement these commands yet, just validates the SQE fields. Reviewed-by: Caleb Sander Mateos Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 49 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 90f47da4f435..0cc58e19d401 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -103,6 +103,10 @@ _IOWR('u', 0x23, struct ublksrv_io_cmd) #define UBLK_U_IO_UNREGISTER_IO_BUF \ _IOWR('u', 0x24, struct ublksrv_io_cmd) +#define UBLK_U_IO_PREP_IO_CMDS \ + _IOWR('u', 0x25, struct ublk_batch_io) +#define UBLK_U_IO_COMMIT_IO_CMDS \ + _IOWR('u', 0x26, struct ublk_batch_io) /* only ABORT means that no re-fetch */ #define UBLK_IO_RES_OK 0 @@ -544,6 +548,51 @@ struct ublksrv_io_cmd { }; }; +struct ublk_elem_header { + __u16 tag; /* IO tag */ + + /* + * Buffer index for incoming io command, only valid iff + * UBLK_F_AUTO_BUF_REG is set + */ + __u16 buf_index; + __s32 result; /* I/O completion result (commit only) */ +}; + +/* + * uring_cmd buffer structure for batch commands + * + * buffer includes multiple elements, which number is specified by + * `nr_elem`. Each element buffer is organized in the following order: + * + * struct ublk_elem_buffer { + * // Mandatory fields (8 bytes) + * struct ublk_elem_header header; + * + * // Optional fields (8 bytes each, included based on flags) + * + * // Buffer address (if UBLK_BATCH_F_HAS_BUF_ADDR) for copying data + * // between ublk request and ublk server buffer + * __u64 buf_addr; + * + * // returned Zone append LBA (if UBLK_BATCH_F_HAS_ZONE_LBA) + * __u64 zone_lba; + * } + * + * Used for `UBLK_U_IO_PREP_IO_CMDS` and `UBLK_U_IO_COMMIT_IO_CMDS` + */ +struct ublk_batch_io { + __u16 q_id; +#define UBLK_BATCH_F_HAS_ZONE_LBA (1 << 0) +#define UBLK_BATCH_F_HAS_BUF_ADDR (1 << 1) +#define UBLK_BATCH_F_AUTO_BUF_REG_FALLBACK (1 << 2) + __u16 flags; + __u16 nr_elem; + __u8 elem_bytes; + __u8 reserved; + __u64 reserved2; +}; + struct ublk_param_basic { #define UBLK_ATTR_READ_ONLY (1 << 0) #define UBLK_ATTR_ROTATIONAL (1 << 1) -- cgit v1.2.3 From b256795b3606e9a67c725dde8eaae91dd9d21de4 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 16 Jan 2026 22:18:37 +0800 Subject: ublk: handle UBLK_U_IO_PREP_IO_CMDS This commit implements the handling of the UBLK_U_IO_PREP_IO_CMDS command, which allows userspace to prepare a batch of I/O requests. The core of this change is the `ublk_walk_cmd_buf` function, which iterates over the elements in the uring_cmd fixed buffer. For each element, it parses the I/O details, finds the corresponding `ublk_io` structure, and prepares it for future dispatch. Add per-io lock for protecting concurrent delivery and committing. Reviewed-by: Caleb Sander Mateos Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 0cc58e19d401..1a3d4d33c1d1 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -103,6 +103,11 @@ _IOWR('u', 0x23, struct ublksrv_io_cmd) #define UBLK_U_IO_UNREGISTER_IO_BUF \ _IOWR('u', 0x24, struct ublksrv_io_cmd) + +/* + * return 0 if the command is run successfully, otherwise failure code + * is returned + */ #define UBLK_U_IO_PREP_IO_CMDS \ _IOWR('u', 0x25, struct ublk_batch_io) #define UBLK_U_IO_COMMIT_IO_CMDS \ -- cgit v1.2.3 From 1e500e106d5a82280db59dba06f0108085beba65 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 16 Jan 2026 22:18:38 +0800 Subject: ublk: handle UBLK_U_IO_COMMIT_IO_CMDS Handle UBLK_U_IO_COMMIT_IO_CMDS by walking the uring_cmd fixed buffer: - read each element into one temp buffer in batch style - parse and apply each element for committing io result Reviewed-by: Caleb Sander Mateos Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 1a3d4d33c1d1..3894d676dd02 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -110,6 +110,14 @@ */ #define UBLK_U_IO_PREP_IO_CMDS \ _IOWR('u', 0x25, struct ublk_batch_io) +/* + * If failure code is returned, nothing in the command buffer is handled. + * Otherwise, the returned value means how many bytes in command buffer + * are handled actually, then number of handled IOs can be calculated with + * `elem_bytes` for each IO. IOs in the remained bytes are not committed, + * userspace has to check return value for dealing with partial committing + * correctly. + */ #define UBLK_U_IO_COMMIT_IO_CMDS \ _IOWR('u', 0x26, struct ublk_batch_io) -- cgit v1.2.3 From a4d88375539920b7401ead59d2f944ac23c668ea Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 16 Jan 2026 22:18:41 +0800 Subject: ublk: add UBLK_U_IO_FETCH_IO_CMDS for batch I/O processing Add UBLK_U_IO_FETCH_IO_CMDS command to enable efficient batch processing of I/O requests. This multishot uring_cmd allows the ublk server to fetch multiple I/O commands in a single operation, significantly reducing submission overhead compared to individual FETCH_REQ* commands. Key Design Features: 1. Multishot Operation: One UBLK_U_IO_FETCH_IO_CMDS can fetch many I/O commands, with the batch size limited by the provided buffer length. 2. Dynamic Load Balancing: Multiple fetch commands can be submitted simultaneously, but only one is active at any time. This enables efficient load distribution across multiple server task contexts. 3. Implicit State Management: The implementation uses three key variables to track state: - evts_fifo: Queue of request tags awaiting processing - fcmd_head: List of available fetch commands - active_fcmd: Currently active fetch command (NULL = none active) States are derived implicitly: - IDLE: No fetch commands available - READY: Fetch commands available, none active - ACTIVE: One fetch command processing events 4. Lockless Reader Optimization: The active fetch command can read from evts_fifo without locking (single reader guarantee), while writers (ublk_queue_rq/ublk_queue_rqs) use evts_lock protection. The memory barrier pairing plays key role for the single lockless reader optimization. Implementation Details: - ublk_queue_rq() and ublk_queue_rqs() save request tags to evts_fifo - __ublk_acquire_fcmd() selects an available fetch command when events arrive and no command is currently active - ublk_batch_dispatch() moves tags from evts_fifo to the fetch command's buffer and posts completion via io_uring_mshot_cmd_post_cqe() - State transitions are coordinated via evts_lock to maintain consistency Reviewed-by: Caleb Sander Mateos Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 3894d676dd02..70d8ebbf4326 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -121,6 +121,13 @@ #define UBLK_U_IO_COMMIT_IO_CMDS \ _IOWR('u', 0x26, struct ublk_batch_io) +/* + * Fetch io commands to provided buffer in multishot style, + * `IORING_URING_CMD_MULTISHOT` is required for this command. + */ +#define UBLK_U_IO_FETCH_IO_CMDS \ + _IOWR('u', 0x27, struct ublk_batch_io) + /* only ABORT means that no re-fetch */ #define UBLK_IO_RES_OK 0 #define UBLK_IO_RES_NEED_GET_DATA 1 -- cgit v1.2.3 From e2723e6ce6025026b6d79d9a00048386a69e00c3 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 16 Jan 2026 22:18:44 +0800 Subject: ublk: add new feature UBLK_F_BATCH_IO Add new feature UBLK_F_BATCH_IO which replaces the following two per-io commands: - UBLK_U_IO_FETCH_REQ - UBLK_U_IO_COMMIT_AND_FETCH_REQ with three per-queue batch io uring_cmd: - UBLK_U_IO_PREP_IO_CMDS - UBLK_U_IO_COMMIT_IO_CMDS - UBLK_U_IO_FETCH_IO_CMDS Then ublk can deliver batch io commands to ublk server in single multishort uring_cmd, also allows to prepare & commit multiple commands in batch style via single uring_cmd, communication cost is reduced a lot. This feature also doesn't limit task context any more for all supported commands, so any allowed uring_cmd can be issued in any task context. ublk server implementation becomes much easier. Meantime load balance becomes much easier to support with this feature. The command `UBLK_U_IO_FETCH_IO_CMDS` can be issued from multiple task contexts, so each task can adjust this command's buffer length or number of inflight commands for controlling how much load is handled by current task. Later, priority parameter will be added to command `UBLK_U_IO_FETCH_IO_CMDS` for improving load balance support. UBLK_U_IO_NEED_GET_DATA isn't supported in batch io yet, but it may be enabled in future via its batch pair. Reviewed-by: Caleb Sander Mateos Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 70d8ebbf4326..743d31491387 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -340,6 +340,21 @@ */ #define UBLK_F_BUF_REG_OFF_DAEMON (1ULL << 14) +/* + * Support the following commands for delivering & committing io command + * in batch. + * + * - UBLK_U_IO_PREP_IO_CMDS + * - UBLK_U_IO_COMMIT_IO_CMDS + * - UBLK_U_IO_FETCH_IO_CMDS + * - UBLK_U_IO_REGISTER_IO_BUF + * - UBLK_U_IO_UNREGISTER_IO_BUF + * + * The existing UBLK_U_IO_FETCH_REQ, UBLK_U_IO_COMMIT_AND_FETCH_REQ and + * UBLK_U_IO_NEED_GET_DATA uring_cmd are not supported for this feature. + */ +#define UBLK_F_BATCH_IO (1ULL << 15) + /* * ublk device supports requests with integrity/metadata buffer. * Requires UBLK_F_USER_COPY. -- cgit v1.2.3 From fa9893fadbc245e179cb17f3c371c67471b5a8a8 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Fri, 9 Jan 2026 17:17:32 -0600 Subject: KVM: Introduce KVM_EXIT_SNP_REQ_CERTS for SNP certificate-fetching For SEV-SNP, the host can optionally provide a certificate table to the guest when it issues an attestation request to firmware (see GHCB 2.0 specification regarding "SNP Extended Guest Requests"). This certificate table can then be used to verify the endorsement key used by firmware to sign the attestation report. While it is possible for guests to obtain the certificates through other means, handling it via the host provides more flexibility in being able to keep the certificate data in sync with the endorsement key throughout host-side operations that might resulting in the endorsement key changing. In the case of KVM, userspace will be responsible for fetching the certificate table and keeping it in sync with any modifications to the endorsement key by other userspace management tools. Define a new KVM_EXIT_SNP_REQ_CERTS event where userspace is provided with the GPA of the buffer the guest has provided as part of the attestation request so that userspace can write the certificate data into it while relying on filesystem-based locking to keep the certificates up-to-date relative to the endorsement keys installed/utilized by firmware at the time the certificates are fetched. [Melody: Update the documentation scheme about how file locking is expected to happen.] Reviewed-by: Liam Merwick Tested-by: Liam Merwick Tested-by: Dionna Glaze Signed-off-by: Michael Roth Signed-off-by: Melody Wang Signed-off-by: Michael Roth Link: https://patch.msgid.link/20260109231732.1160759-2-michael.roth@amd.com Signed-off-by: Sean Christopherson --- include/uapi/linux/kvm.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index dddb781b0507..8cd107cdcf0b 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -135,6 +135,12 @@ struct kvm_xen_exit { } u; }; +struct kvm_exit_snp_req_certs { + __u64 gpa; + __u64 npages; + __u64 ret; +}; + #define KVM_S390_GET_SKEYS_NONE 1 #define KVM_S390_SKEYS_MAX 1048576 @@ -180,6 +186,7 @@ struct kvm_xen_exit { #define KVM_EXIT_MEMORY_FAULT 39 #define KVM_EXIT_TDX 40 #define KVM_EXIT_ARM_SEA 41 +#define KVM_EXIT_SNP_REQ_CERTS 42 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -482,6 +489,8 @@ struct kvm_run { __u64 gva; __u64 gpa; } arm_sea; + /* KVM_EXIT_SNP_REQ_CERTS */ + struct kvm_exit_snp_req_certs snp_req_certs; /* Fix the size of the union. */ char padding[256]; }; -- cgit v1.2.3 From ba1b8c97b9a0414432382a11f144a8597f6f597e Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 21 Jan 2026 17:11:30 +0100 Subject: geneve: add netlink support for GRO hint Allow configuring and dumping the new device option, and cache its value into the geneve socket itself. The new option is not tie to it any code yet. Signed-off-by: Paolo Abeni Link: https://patch.msgid.link/2295d4e4d1e919a3189425141bbc71c7850a2de0.1769011015.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 3b491d96e52e..e9b5f79e1ee1 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1443,6 +1443,7 @@ enum { IFLA_GENEVE_DF, IFLA_GENEVE_INNER_PROTO_INHERIT, IFLA_GENEVE_PORT_RANGE, + IFLA_GENEVE_GRO_HINT, __IFLA_GENEVE_MAX }; #define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1) -- cgit v1.2.3 From 795663b4d160ba652959f1a46381c5e8b1342a53 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 24 Jan 2026 10:36:17 +0000 Subject: io_uring/zcrx: implement large rx buffer support There are network cards that support receive buffers larger than 4K, and that can be vastly beneficial for performance, and benchmarks for this patch showed up to 30% CPU util improvement for 32K vs 4K buffers. Allows zcrx users to specify the size in struct io_uring_zcrx_ifq_reg::rx_buf_len. If set to zero, zcrx will use a default value. zcrx will check and fail if the memory backing the area can't be split into physically contiguous chunks of the required size. It's more restrictive as it only needs dma addresses to be contig, but that's beyond this series. Signed-off-by: Pavel Begunkov [axboe: kill duplicate netdev_queues.h include] Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index b5b23c0d5283..3184f7e7f1f2 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -1082,7 +1082,7 @@ struct io_uring_zcrx_ifq_reg { struct io_uring_zcrx_offsets offsets; __u32 zcrx_id; - __u32 __resv2; + __u32 rx_buf_len; __u64 __resv[3]; }; -- cgit v1.2.3 From 2d419c44658f75e7655794341a95c0687830f3df Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Sat, 24 Jan 2026 14:19:56 +0800 Subject: bpf: add fsession support The fsession is something that similar to kprobe session. It allow to attach a single BPF program to both the entry and the exit of the target functions. Introduce the struct bpf_fsession_link, which allows to add the link to both the fentry and fexit progs_hlist of the trampoline. Signed-off-by: Menglong Dong Co-developed-by: Leon Hwang Signed-off-by: Leon Hwang Link: https://lore.kernel.org/r/20260124062008.8657-2-dongml2@chinatelecom.cn Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2a2ade4be60f..44e7dbc278e3 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1145,6 +1145,7 @@ enum bpf_attach_type { BPF_NETKIT_PEER, BPF_TRACE_KPROBE_SESSION, BPF_TRACE_UPROBE_SESSION, + BPF_TRACE_FSESSION, __MAX_BPF_ATTACH_TYPE }; -- cgit v1.2.3 From 0ac903d1bfdce8ff40657c2b7d996947b72b6645 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 9 Dec 2025 19:28:50 -0500 Subject: NFS: NFSERR_INVAL is not defined by NFSv2 A documenting comment in include/uapi/linux/nfs.h claims incorrectly that NFSv2 defines NFSERR_INVAL. There is no such definition in either RFC 1094 or https://pubs.opengroup.org/onlinepubs/9629799/chap7.htm NFS3ERR_INVAL is introduced in RFC 1813. NFSD returns NFSERR_INVAL for PROC_GETACL, which has no specification (yet). However, nfsd_map_status() maps nfserr_symlink and nfserr_wrong_type to nfserr_inval, which does not align with RFC 1094. This logic was introduced only recently by commit 438f81e0e92a ("nfsd: move error choice for incorrect object types to version-specific code."). Given that we have no INVAL or SERVERFAULT status in NFSv2, probably the only choice is NFSERR_IO. Fixes: 438f81e0e92a ("nfsd: move error choice for incorrect object types to version-specific code.") Reviewed-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- include/uapi/linux/nfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/nfs.h b/include/uapi/linux/nfs.h index 71c7196d3281..e629c4953534 100644 --- a/include/uapi/linux/nfs.h +++ b/include/uapi/linux/nfs.h @@ -55,7 +55,7 @@ NFSERR_NODEV = 19, /* v2 v3 v4 */ NFSERR_NOTDIR = 20, /* v2 v3 v4 */ NFSERR_ISDIR = 21, /* v2 v3 v4 */ - NFSERR_INVAL = 22, /* v2 v3 v4 */ + NFSERR_INVAL = 22, /* v3 v4 */ NFSERR_FBIG = 27, /* v2 v3 v4 */ NFSERR_NOSPC = 28, /* v2 v3 v4 */ NFSERR_ROFS = 30, /* v2 v3 v4 */ -- cgit v1.2.3 From 1965bbb8f3c72e5f1972b5eeb6f19a36664a676d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 22 Dec 2025 08:55:10 +0100 Subject: ipc/shm: uapi: remove dependency on libc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using libc types and headers from the UAPI headers is problematic as it introduces a dependency on a full C toolchain. shm.h does not even use any symbols from the libc header as the usage of getpagesize() was removed a decade ago in commit 060028bac94b ("ipc/shm.c: increase the defaults for SHMALL, SHMMAX") Drop the unnecessary inclusion. Link: https://lkml.kernel.org/r/20251222-uapi-shm-v1-1-270bb7f75d97@linutronix.de Signed-off-by: Thomas Weißschuh Cc: Arnd Bergmann Signed-off-by: Andrew Morton --- include/uapi/linux/shm.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/shm.h b/include/uapi/linux/shm.h index 8d1f17a4e08e..7269f9f402e3 100644 --- a/include/uapi/linux/shm.h +++ b/include/uapi/linux/shm.h @@ -5,9 +5,6 @@ #include #include #include -#ifndef __KERNEL__ -#include -#endif /* * SHMMNI, SHMMAX and SHMALL are default upper limits which can be -- cgit v1.2.3 From 3d702678f57edc524f73a7865382ae304269f590 Mon Sep 17 00:00:00 2001 From: Jinjiang Tu Date: Tue, 23 Dec 2025 19:05:23 +0800 Subject: mm/mempolicy: fix mpol_rebind_nodemask() for MPOL_F_NUMA_BALANCING commit bda420b98505 ("numa balancing: migrate on fault among multiple bound nodes") adds new flag MPOL_F_NUMA_BALANCING to enable NUMA balancing for MPOL_BIND memory policy. When the cpuset of tasks changes, the mempolicy of the task is rebound by mpol_rebind_nodemask(). When MPOL_F_STATIC_NODES and MPOL_F_RELATIVE_NODES are both not set, the behaviour of rebinding should be same whenever MPOL_F_NUMA_BALANCING is set or not. So, when an application calls set_mempolicy() with MPOL_F_NUMA_BALANCING set but both MPOL_F_STATIC_NODES and MPOL_F_RELATIVE_NODES cleared, mempolicy.w.cpuset_mems_allowed should be set to cpuset_current_mems_allowed nodemask. However, in current implementation, mpol_store_user_nodemask() wrongly returns true, causing mempolicy->w.user_nodemask to be incorrectly set to the user-specified nodemask. Later, when the cpuset of the application changes, mpol_rebind_nodemask() ends up rebinding based on the user-specified nodemask rather than the cpuset_mems_allowed nodemask as intended. I can reproduce with the following steps in qemu with 4 NUMA nodes: 1. echo '+cpuset' > /sys/fs/cgroup/cgroup.subtree_control 2. mkdir /sys/fs/cgroup/test 3. ./reproducer & 4. cat /proc/$pid/numa_maps, the task is bound to NUMA 1 5. echo $pid > /sys/fs/cgroup/test/cgroup.procs 6. cat /proc/$pid/numa_maps, the task is bound to NUMA 0 now. The reproducer code: int main() { struct bitmask *bmp; int ret; bmp = numa_parse_nodestring("1"); ret = set_mempolicy(MPOL_BIND | MPOL_F_NUMA_BALANCING, bmp->maskp, bmp->size + 1); if (ret < 0) { perror("Failed to call set_mempolicy"); exit(-1); } while (1); return 0; } If I call set_mempolicy() without MPOL_F_NUMA_BALANCING in the reproducer code. After step 5, the task is still bound to NUMA 1. To fix this, only set mempolicy->w.user_nodemask to the user-specified nodemask if MPOL_F_STATIC_NODES or MPOL_F_RELATIVE_NODES is present. Link: https://lkml.kernel.org/r/20260120011018.1256654-1-tujinjiang@huawei.com Link: https://lkml.kernel.org/r/20251223110523.1161421-1-tujinjiang@huawei.com Fixes: bda420b98505 ("numa balancing: migrate on fault among multiple bound nodes") Signed-off-by: Jinjiang Tu Reviewed-by: Gregory Price Reviewed-by: Huang Ying Acked-by: David Hildenbrand (Red Hat) Cc: Alistair Popple Cc: Byungchul Park Cc: Joshua Hahn Cc: Kefeng Wang Cc: Mathew Brost Cc: Mel Gorman Cc: Rakie Kim Cc: Zi Yan Signed-off-by: Andrew Morton --- include/uapi/linux/mempolicy.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h index 8fbbe613611a..6c962d866e86 100644 --- a/include/uapi/linux/mempolicy.h +++ b/include/uapi/linux/mempolicy.h @@ -39,6 +39,9 @@ enum { #define MPOL_MODE_FLAGS \ (MPOL_F_STATIC_NODES | MPOL_F_RELATIVE_NODES | MPOL_F_NUMA_BALANCING) +/* Whether the nodemask is specified by users */ +#define MPOL_USER_NODEMASK_FLAGS (MPOL_F_STATIC_NODES | MPOL_F_RELATIVE_NODES) + /* Flags for get_mempolicy */ #define MPOL_F_NODE (1<<0) /* return next IL mode instead of node mask */ #define MPOL_F_ADDR (1<<1) /* look up vma using address */ -- cgit v1.2.3 From 86c6b6e4d187652d718915e15cf126f98e24e955 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Sun, 11 Jan 2026 19:03:48 +0200 Subject: wifi: nl80211/cfg80211: add new FTM capabilities Add new capabilities to the PMSR FTM capabilities list. The new capabilities include 6 GHz support, supported number of spatial streams and supported number of LTF repetitions. Signed-off-by: Avraham Stern Tested-by: Miriam Rachel Korenblit Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260111190221.bf43785c18f6.Ic98cf9790ddee84bf88e5720b93c46c23af3c96c@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index b0f050e36fa4..200703c8b2c1 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -7790,6 +7790,28 @@ enum nl80211_peer_measurement_attrs { * trigger based ranging measurement is supported * @NL80211_PMSR_FTM_CAPA_ATTR_NON_TRIGGER_BASED: flag attribute indicating * if non-trigger-based ranging measurement is supported + * @NL80211_PMSR_FTM_CAPA_ATTR_6GHZ_SUPPORT: flag attribute indicating if + * ranging on the 6 GHz band is supported + * @NL80211_PMSR_FTM_CAPA_ATTR_MAX_TX_LTF_REP: u32 attribute indicating + * the maximum number of LTF repetitions the device can transmit in the + * preamble of the ranging NDP (zero means only one LTF, no repetitions) + * @NL80211_PMSR_FTM_CAPA_ATTR_MAX_RX_LTF_REP: u32 attribute indicating + * the maximum number of LTF repetitions the device can receive in the + * preamble of the ranging NDP (zero means only one LTF, no repetitions) + * @NL80211_PMSR_FTM_CAPA_ATTR_MAX_TX_STS: u32 attribute indicating + * the maximum number of space-time streams supported for ranging NDP TX + * (zero-based) + * @NL80211_PMSR_FTM_CAPA_ATTR_MAX_RX_STS: u32 attribute indicating + * the maximum number of space-time streams supported for ranging NDP RX + * (zero-based) + * @NL80211_PMSR_FTM_CAPA_ATTR_MAX_TOTAL_LTF_TX: u32 attribute indicating the + * maximum total number of LTFs the device can transmit. The total number + * of LTFs is (number of LTF repetitions) * (number of space-time streams). + * This limits the allowed combinations of LTF repetitions and STS. + * @NL80211_PMSR_FTM_CAPA_ATTR_MAX_TOTAL_LTF_RX: u32 attribute indicating the + * maximum total number of LTFs the device can receive. The total number + * of LTFs is (number of LTF repetitions) * (number of space-time streams). + * This limits the allowed combinations of LTF repetitions and STS. * * @NUM_NL80211_PMSR_FTM_CAPA_ATTR: internal * @NL80211_PMSR_FTM_CAPA_ATTR_MAX: highest attribute number @@ -7807,6 +7829,13 @@ enum nl80211_peer_measurement_ftm_capa { NL80211_PMSR_FTM_CAPA_ATTR_MAX_FTMS_PER_BURST, NL80211_PMSR_FTM_CAPA_ATTR_TRIGGER_BASED, NL80211_PMSR_FTM_CAPA_ATTR_NON_TRIGGER_BASED, + NL80211_PMSR_FTM_CAPA_ATTR_6GHZ_SUPPORT, + NL80211_PMSR_FTM_CAPA_ATTR_MAX_TX_LTF_REP, + NL80211_PMSR_FTM_CAPA_ATTR_MAX_RX_LTF_REP, + NL80211_PMSR_FTM_CAPA_ATTR_MAX_TX_STS, + NL80211_PMSR_FTM_CAPA_ATTR_MAX_RX_STS, + NL80211_PMSR_FTM_CAPA_ATTR_MAX_TOTAL_LTF_TX, + NL80211_PMSR_FTM_CAPA_ATTR_MAX_TOTAL_LTF_RX, /* keep last */ NUM_NL80211_PMSR_FTM_CAPA_ATTR, -- cgit v1.2.3 From 853ce6943c385be2f6cccf371080e592f2e08b0f Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Sun, 11 Jan 2026 19:03:49 +0200 Subject: wifi: nl80211/cfg80211: clarify periodic FTM parameters for non-EDCA based ranging Periodic FTM request attributes are defined based on the periodic parameters used in EDCA-based ranging negotiation. However, non-EDCA based ranging (trigger-based/non-trigger-based) does not include periodic parameters in the negotiation protocol, even though upper layers may still request periodic measurements. Clarify the semantics of periodic ranging attributes when used with non-EDCA based ranging. Signed-off-by: Avraham Stern Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260111190221.b89cb3f68e1a.I7a9d8c6d1c66c77f1b43120a841101c96c3f19ad@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 200703c8b2c1..71219445f5c7 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -7851,12 +7851,15 @@ enum nl80211_peer_measurement_ftm_capa { * &enum nl80211_preamble), optional for DMG (u32) * @NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP: number of bursts exponent as in * 802.11-2016 9.4.2.168 "Fine Timing Measurement Parameters element" - * (u8, 0-15, optional with default 15 i.e. "no preference") + * (u8, 0-15, optional with default 15 i.e. "no preference". No limit for + * non-EDCA ranging) * @NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD: interval between bursts in units * of 100ms (u16, optional with default 0) * @NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION: burst duration, as in 802.11-2016 * Table 9-257 "Burst Duration field encoding" (u8, 0-15, optional with - * default 15 i.e. "no preference") + * default 15 i.e. "no preference"). For non-EDCA ranging, this is the + * burst duration in milliseconds (optional with default 0, i.e. let the + * device decide). * @NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST: number of successful FTM frames * requested per burst * (u8, 0-31, optional with default 0 i.e. "no preference") -- cgit v1.2.3 From cfd46d1c6f4bf232c5630b1cf5c8b317d38101c5 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Sun, 11 Jan 2026 19:03:50 +0200 Subject: wifi: nl80211/cfg80211: add negotiated burst period to FTM result The FTM result includes some of the periodic measurement negotiated parameters (like the burst duration and number of bursts), but it doesn't include the burst period. Add it to the FTM result notification. Signed-off-by: Avraham Stern Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260111190221.e0778f86edef.I3c98c1933eb639963bc3ffdef81a8788b59f2188@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 71219445f5c7..8910b709bfb1 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -7992,6 +7992,8 @@ enum nl80211_peer_measurement_ftm_failure_reasons { * 9.4.2.22.1) starting with the Measurement Token, with Measurement * Type 11. * @NL80211_PMSR_FTM_RESP_ATTR_PAD: ignore, for u64/s64 padding only + * @NL80211_PMSR_FTM_RESP_ATTR_BURST_PERIOD: actual burst period used by + * the responder (similar to request, u16) * * @NUM_NL80211_PMSR_FTM_RESP_ATTR: internal * @NL80211_PMSR_FTM_RESP_ATTR_MAX: highest attribute number @@ -8020,6 +8022,7 @@ enum nl80211_peer_measurement_ftm_resp { NL80211_PMSR_FTM_RESP_ATTR_LCI, NL80211_PMSR_FTM_RESP_ATTR_CIVICLOC, NL80211_PMSR_FTM_RESP_ATTR_PAD, + NL80211_PMSR_FTM_RESP_ATTR_BURST_PERIOD, /* keep last */ NUM_NL80211_PMSR_FTM_RESP_ATTR, -- cgit v1.2.3 From 853800c746d38486673ef67f461b660a01d52716 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Sun, 11 Jan 2026 19:03:51 +0200 Subject: wifi: nl80211/cfg80211: support operating as RSTA in PMSR FTM request Add an option to operate as the RSTA in an FTM measurement request. When requested, the device will dwell on the requested channel until the peer starts the FTM negotiation. This option is only valid for trigger-based/non trigger-based measurement with LMR feedback which will allow the RSTA to receive the results of the measurement. Signed-off-by: Avraham Stern Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260111190221.1f95fc0afab4.Iae2d32783b8e7c4a29089fec0f4c6bce94d303cc@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 8910b709bfb1..54ddbd9a5459 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -7812,6 +7812,8 @@ enum nl80211_peer_measurement_attrs { * maximum total number of LTFs the device can receive. The total number * of LTFs is (number of LTF repetitions) * (number of space-time streams). * This limits the allowed combinations of LTF repetitions and STS. + * @NL80211_PMSR_FTM_CAPA_ATTR_RSTA_SUPPORT: flag attribute indicating the + * device supports operating as the RSTA in PMSR FTM request * * @NUM_NL80211_PMSR_FTM_CAPA_ATTR: internal * @NL80211_PMSR_FTM_CAPA_ATTR_MAX: highest attribute number @@ -7836,6 +7838,7 @@ enum nl80211_peer_measurement_ftm_capa { NL80211_PMSR_FTM_CAPA_ATTR_MAX_RX_STS, NL80211_PMSR_FTM_CAPA_ATTR_MAX_TOTAL_LTF_TX, NL80211_PMSR_FTM_CAPA_ATTR_MAX_TOTAL_LTF_RX, + NL80211_PMSR_FTM_CAPA_ATTR_RSTA_SUPPORT, /* keep last */ NUM_NL80211_PMSR_FTM_CAPA_ATTR, @@ -7888,6 +7891,14 @@ enum nl80211_peer_measurement_ftm_capa { * @NL80211_PMSR_FTM_REQ_ATTR_BSS_COLOR: optional. The BSS color of the * responder. Only valid if %NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED * or %NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED is set. + * @NL80211_PMSR_FTM_REQ_ATTR_RSTA: optional. Request to perform the measurement + * as the RSTA (flag). When set, the device is expected to dwell on the + * channel specified in %NL80211_PMSR_PEER_ATTR_CHAN until it receives the + * FTM request from the peer or the timeout specified by + * %NL80211_ATTR_TIMEOUT has expired. + * Only valid if %NL80211_PMSR_FTM_REQ_ATTR_LMR_FEEDBACK is set (so the + * RSTA will have the measurement results to report back in the FTM + * response). * * @NUM_NL80211_PMSR_FTM_REQ_ATTR: internal * @NL80211_PMSR_FTM_REQ_ATTR_MAX: highest attribute number @@ -7908,6 +7919,7 @@ enum nl80211_peer_measurement_ftm_req { NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED, NL80211_PMSR_FTM_REQ_ATTR_LMR_FEEDBACK, NL80211_PMSR_FTM_REQ_ATTR_BSS_COLOR, + NL80211_PMSR_FTM_REQ_ATTR_RSTA, /* keep last */ NUM_NL80211_PMSR_FTM_REQ_ATTR, -- cgit v1.2.3 From 752b807028e63f1473b84eb1350e131eca5e5249 Mon Sep 17 00:00:00 2001 From: Matt Bobrowski Date: Tue, 27 Jan 2026 08:51:10 +0000 Subject: bpf: add new BPF_CGROUP_ITER_CHILDREN control option Currently, the BPF cgroup iterator supports walking descendants in either pre-order (BPF_CGROUP_ITER_DESCENDANTS_PRE) or post-order (BPF_CGROUP_ITER_DESCENDANTS_POST). These modes perform an exhaustive depth-first search (DFS) of the hierarchy. In scenarios where a BPF program may need to inspect only the direct children of a given parent cgroup, a full DFS is unnecessarily expensive. This patch introduces a new BPF cgroup iterator control option, BPF_CGROUP_ITER_CHILDREN. This control option restricts the traversal to the immediate children of a specified parent cgroup, allowing for more targeted and efficient iteration, particularly when exhaustive depth-first search (DFS) traversal is not required. Signed-off-by: Matt Bobrowski Link: https://lore.kernel.org/r/20260127085112.3608687-1-mattbobrowski@google.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 44e7dbc278e3..c8d400b7680a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -119,6 +119,14 @@ enum bpf_cgroup_iter_order { BPF_CGROUP_ITER_DESCENDANTS_PRE, /* walk descendants in pre-order. */ BPF_CGROUP_ITER_DESCENDANTS_POST, /* walk descendants in post-order. */ BPF_CGROUP_ITER_ANCESTORS_UP, /* walk ancestors upward. */ + /* + * Walks the immediate children of the specified parent + * cgroup_subsys_state. Unlike BPF_CGROUP_ITER_DESCENDANTS_PRE, + * BPF_CGROUP_ITER_DESCENDANTS_POST, and BPF_CGROUP_ITER_ANCESTORS_UP + * the iterator does not include the specified parent as one of the + * returned iterator elements. + */ + BPF_CGROUP_ITER_CHILDREN, }; union bpf_iter_link_info { -- cgit v1.2.3 From d42eb05e60fea31de49897d63a1d73f933303bd4 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 15 Jan 2026 08:24:02 -0700 Subject: io_uring: add support for BPF filtering for opcode restrictions Add support for loading classic BPF programs with io_uring to provide fine-grained filtering of SQE operations. Unlike IORING_REGISTER_RESTRICTIONS which only allows bitmap-based allow/deny of opcodes, BPF filters can inspect request attributes and make dynamic decisions. The filter is registered via IORING_REGISTER_BPF_FILTER with a struct io_uring_bpf: struct io_uring_bpf_filter { __u32 opcode; /* io_uring opcode to filter */ __u32 flags; __u32 filter_len; /* number of BPF instructions */ __u32 resv; __u64 filter_ptr; /* pointer to BPF filter */ __u64 resv2[5]; }; enum { IO_URING_BPF_CMD_FILTER = 1, }; struct io_uring_bpf { __u16 cmd_type; /* IO_URING_BPF_* values */ __u16 cmd_flags; /* none so far */ __u32 resv; union { struct io_uring_bpf_filter filter; }; }; and the filters get supplied a struct io_uring_bpf_ctx: struct io_uring_bpf_ctx { __u64 user_data; __u8 opcode; __u8 sqe_flags; __u8 pdu_size; __u8 pad[5]; }; where it's possible to filter on opcode and sqe_flags, with pdu_size indicating how much extra data is being passed in beyond the pad field. This will used for specific finer grained filtering inside an opcode. An example of that for sockets is in one of the following patches. Anything the opcode supports can end up in this struct, populated by the opcode itself, and hence can be filtered for. Filters have the following semantics: - Return 1 to allow the request - Return 0 to deny the request with -EACCES - Multiple filters can be stacked per opcode. All filters must return 1 for the opcode to be allowed. - Filters are evaluated in registration order (most recent first) The implementation uses classic BPF (cBPF) rather than eBPF for as that's required for containers, and since they can be used by any user in the system. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 3 ++ include/uapi/linux/io_uring/bpf_filter.h | 50 ++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 include/uapi/linux/io_uring/bpf_filter.h (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index b5b23c0d5283..94669b77fee8 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -700,6 +700,9 @@ enum io_uring_register_op { /* auxiliary zcrx configuration, see enum zcrx_ctrl_op */ IORING_REGISTER_ZCRX_CTRL = 36, + /* register bpf filtering programs */ + IORING_REGISTER_BPF_FILTER = 37, + /* this goes last */ IORING_REGISTER_LAST, diff --git a/include/uapi/linux/io_uring/bpf_filter.h b/include/uapi/linux/io_uring/bpf_filter.h new file mode 100644 index 000000000000..2d4d0e5743e4 --- /dev/null +++ b/include/uapi/linux/io_uring/bpf_filter.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */ +/* + * Header file for the io_uring BPF filters. + */ +#ifndef LINUX_IO_URING_BPF_FILTER_H +#define LINUX_IO_URING_BPF_FILTER_H + +#include + +/* + * Struct passed to filters. + */ +struct io_uring_bpf_ctx { + __u64 user_data; + __u8 opcode; + __u8 sqe_flags; + __u8 pdu_size; /* size of aux data for filter */ + __u8 pad[5]; +}; + +enum { + /* + * If set, any currently unset opcode will have a deny filter attached + */ + IO_URING_BPF_FILTER_DENY_REST = 1, +}; + +struct io_uring_bpf_filter { + __u32 opcode; /* io_uring opcode to filter */ + __u32 flags; + __u32 filter_len; /* number of BPF instructions */ + __u32 resv; + __u64 filter_ptr; /* pointer to BPF filter */ + __u64 resv2[5]; +}; + +enum { + IO_URING_BPF_CMD_FILTER = 1, +}; + +struct io_uring_bpf { + __u16 cmd_type; /* IO_URING_BPF_* values */ + __u16 cmd_flags; /* none so far */ + __u32 resv; + union { + struct io_uring_bpf_filter filter; + }; +}; + +#endif -- cgit v1.2.3 From cff1c26b4223820431129696b45525e5928e6409 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 16 Jan 2026 14:50:05 -0700 Subject: io_uring/net: allow filtering on IORING_OP_SOCKET data Example population method for the BPF based opcode filtering. This exposes the socket family, type, and protocol to a registered BPF filter. This in turn enables the filter to make decisions based on what was passed in to the IORING_OP_SOCKET request type. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring/bpf_filter.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring/bpf_filter.h b/include/uapi/linux/io_uring/bpf_filter.h index 2d4d0e5743e4..4dbc89bbbf10 100644 --- a/include/uapi/linux/io_uring/bpf_filter.h +++ b/include/uapi/linux/io_uring/bpf_filter.h @@ -16,6 +16,13 @@ struct io_uring_bpf_ctx { __u8 sqe_flags; __u8 pdu_size; /* size of aux data for filter */ __u8 pad[5]; + union { + struct { + __u32 family; + __u32 type; + __u32 protocol; + } socket; + }; }; enum { -- cgit v1.2.3 From 8768770cf5d76d177fa2200e6957a372e61e06b5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 19 Jan 2026 15:59:54 -0700 Subject: io_uring/bpf_filter: allow filtering on contents of struct open_how This adds custom filtering for IORING_OP_OPENAT and IORING_OP_OPENAT2, where the open_how flags, mode, and resolve can be checked by filters. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring/bpf_filter.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring/bpf_filter.h b/include/uapi/linux/io_uring/bpf_filter.h index 4dbc89bbbf10..220351b81bc0 100644 --- a/include/uapi/linux/io_uring/bpf_filter.h +++ b/include/uapi/linux/io_uring/bpf_filter.h @@ -22,6 +22,11 @@ struct io_uring_bpf_ctx { __u32 type; __u32 protocol; } socket; + struct { + __u64 flags; + __u64 mode; + __u64 resolve; + } open; }; }; -- cgit v1.2.3 From cad3337bb6c3a2ba2307d6a9061e752e15681d2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 19 Dec 2025 19:40:33 +0200 Subject: PCI: Add dword #defines for Bus Number + Secondary Latency Timer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit uapi/linux/pci_regs.h defines Primary/Secondary/Subordinate Bus Numbers and Secondary Latency Timer (PCIe r7.0, sec. 7.5.1.3) as byte register offsets, but in practice the code may read/write the entire dword. In the lack of #defines to handle the dword fields, the code ends up using literals which are not as easy to read. Add dword field masks for the Bus Number and Secondary Latency Timer fields and use them in probe.c. Signed-off-by: Ilpo Järvinen [bhelgaas: squash new #defines and uses together] Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20251219174036.16738-21-ilpo.jarvinen@linux.intel.com Link: https://patch.msgid.link/20251219174036.16738-22-ilpo.jarvinen@linux.intel.com --- include/uapi/linux/pci_regs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 3add74ae2594..8be55ece2a21 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -132,6 +132,11 @@ #define PCI_SECONDARY_BUS 0x19 /* Secondary bus number */ #define PCI_SUBORDINATE_BUS 0x1a /* Highest bus number behind the bridge */ #define PCI_SEC_LATENCY_TIMER 0x1b /* Latency timer for secondary interface */ +/* Masks for dword-sized processing of Bus Number and Sec Latency Timer fields */ +#define PCI_PRIMARY_BUS_MASK 0x000000ff +#define PCI_SECONDARY_BUS_MASK 0x0000ff00 +#define PCI_SUBORDINATE_BUS_MASK 0x00ff0000 +#define PCI_SEC_LATENCY_TIMER_MASK 0xff000000 #define PCI_IO_BASE 0x1c /* I/O range behind the bridge */ #define PCI_IO_LIMIT 0x1d #define PCI_IO_RANGE_TYPE_MASK 0x0fUL /* I/O bridging type */ -- cgit v1.2.3 From e698127eb7249d6c70fa8f2bdba469c0e54f2e2b Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Wed, 23 Jul 2025 10:07:28 -0400 Subject: drm/amdkfd: add extended capabilities to device snapshot Add additional capabilities reporting. Signed-off-by: Jonathan Kim Reviewed-by: James Zhu Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 047bcb1cc078..e72359370857 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -149,6 +149,8 @@ struct kfd_dbg_device_info_entry { __u32 num_xcc; __u32 capability; __u32 debug_prop; + __u32 capability2; + __u32 pad; }; /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */ -- cgit v1.2.3 From d8316b837c2ca5f92e781fa1575095c0132ae3c1 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 6 Jan 2026 13:59:50 -0500 Subject: nfsd: add controls to set the minimum number of threads per pool Add a new "min_threads" variable to the nfsd_net, along with the corresponding netlink interface, to set that value from userland. Pass that value to svc_set_pool_threads() and svc_set_num_threads(). Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- include/uapi/linux/nfsd_netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nfsd_netlink.h b/include/uapi/linux/nfsd_netlink.h index e157e2009ea8..e9efbc9e63d8 100644 --- a/include/uapi/linux/nfsd_netlink.h +++ b/include/uapi/linux/nfsd_netlink.h @@ -35,6 +35,7 @@ enum { NFSD_A_SERVER_GRACETIME, NFSD_A_SERVER_LEASETIME, NFSD_A_SERVER_SCOPE, + NFSD_A_SERVER_MIN_THREADS, __NFSD_A_SERVER_MAX, NFSD_A_SERVER_MAX = (__NFSD_A_SERVER_MAX - 1) -- cgit v1.2.3 From a006ed4ecd4905b69402980ad7d4e5f31bf44953 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Mon, 19 Jan 2026 15:32:55 +0100 Subject: vduse: add v1 API definition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows the kernel to detect whether the userspace VDUSE device supports the VQ group and ASID features. VDUSE devices that don't set the V1 API will not receive the new messages, and vdpa device will be created with only one vq group and asid. The next patches implement the new feature incrementally, only enabling the VDUSE device to set the V1 API version by the end of the series. Acked-by: Jason Wang Reviewed-by: Xie Yongji Signed-off-by: Eugenio Pérez Signed-off-by: Michael S. Tsirkin Message-Id: <20260119143306.1818855-3-eperezma@redhat.com> --- include/uapi/linux/vduse.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/vduse.h b/include/uapi/linux/vduse.h index 10ad71aa00d6..ccb92a1efce0 100644 --- a/include/uapi/linux/vduse.h +++ b/include/uapi/linux/vduse.h @@ -10,6 +10,10 @@ #define VDUSE_API_VERSION 0 +/* VQ groups and ASID support */ + +#define VDUSE_API_VERSION_1 1 + /* * Get the version of VDUSE API that kernel supported (VDUSE_API_VERSION). * This is used for future extension. -- cgit v1.2.3 From 9350a09afd086771b0612c7b7c9583e8a1568135 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Mon, 19 Jan 2026 15:32:56 +0100 Subject: vduse: add vq group support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows separate the different virtqueues in groups that shares the same address space. Asking the VDUSE device for the groups of the vq at the beginning as they're needed for the DMA API. Allocating 3 vq groups as net is the device that need the most groups: * Dataplane (guest passthrough) * CVQ * Shadowed vrings. Future versions of the series can include dynamic allocation of the groups array so VDUSE can declare more groups. Acked-by: Jason Wang Reviewed-by: Xie Yongji Signed-off-by: Eugenio Pérez Signed-off-by: Michael S. Tsirkin Message-Id: <20260119143306.1818855-4-eperezma@redhat.com> --- include/uapi/linux/vduse.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/vduse.h b/include/uapi/linux/vduse.h index ccb92a1efce0..a3d51cf6df3a 100644 --- a/include/uapi/linux/vduse.h +++ b/include/uapi/linux/vduse.h @@ -31,6 +31,7 @@ * @features: virtio features * @vq_num: the number of virtqueues * @vq_align: the allocation alignment of virtqueue's metadata + * @ngroups: number of vq groups that VDUSE device declares * @reserved: for future use, needs to be initialized to zero * @config_size: the size of the configuration space * @config: the buffer of the configuration space @@ -45,7 +46,8 @@ struct vduse_dev_config { __u64 features; __u32 vq_num; __u32 vq_align; - __u32 reserved[13]; + __u32 ngroups; /* if VDUSE_API_VERSION >= 1 */ + __u32 reserved[12]; __u32 config_size; __u8 config[]; }; @@ -122,14 +124,18 @@ struct vduse_config_data { * struct vduse_vq_config - basic configuration of a virtqueue * @index: virtqueue index * @max_size: the max size of virtqueue - * @reserved: for future use, needs to be initialized to zero + * @reserved1: for future use, needs to be initialized to zero + * @group: virtqueue group + * @reserved2: for future use, needs to be initialized to zero * * Structure used by VDUSE_VQ_SETUP ioctl to setup a virtqueue. */ struct vduse_vq_config { __u32 index; __u16 max_size; - __u16 reserved[13]; + __u16 reserved1; + __u32 group; + __u16 reserved2[10]; }; /* -- cgit v1.2.3 From 079212f6877e5d07308c8998a8fbc7539ca3f8f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Mon, 19 Jan 2026 15:33:04 +0100 Subject: vduse: add vq group asid support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for assigning Address Space Identifiers (ASIDs) to each VQ group. This enables mapping each group into a distinct memory space. The vq group to ASID association is protected by a rwlock now. But the mutex domain_lock keeps protecting the domains of all ASIDs, as some operations like the one related with the bounce buffer size still requires to lock all the ASIDs. Acked-by: Jason Wang Signed-off-by: Eugenio Pérez Signed-off-by: Michael S. Tsirkin Message-Id: <20260119143306.1818855-12-eperezma@redhat.com> --- include/uapi/linux/vduse.h | 66 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 63 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/vduse.h b/include/uapi/linux/vduse.h index a3d51cf6df3a..68b4287f9fac 100644 --- a/include/uapi/linux/vduse.h +++ b/include/uapi/linux/vduse.h @@ -32,6 +32,7 @@ * @vq_num: the number of virtqueues * @vq_align: the allocation alignment of virtqueue's metadata * @ngroups: number of vq groups that VDUSE device declares + * @nas: number of address spaces that VDUSE device declares * @reserved: for future use, needs to be initialized to zero * @config_size: the size of the configuration space * @config: the buffer of the configuration space @@ -47,7 +48,8 @@ struct vduse_dev_config { __u32 vq_num; __u32 vq_align; __u32 ngroups; /* if VDUSE_API_VERSION >= 1 */ - __u32 reserved[12]; + __u32 nas; /* if VDUSE_API_VERSION >= 1 */ + __u32 reserved[11]; __u32 config_size; __u8 config[]; }; @@ -166,6 +168,16 @@ struct vduse_vq_state_packed { __u16 last_used_idx; }; +/** + * struct vduse_vq_group_asid - virtqueue group ASID + * @group: Index of the virtqueue group + * @asid: Address space ID of the group + */ +struct vduse_vq_group_asid { + __u32 group; + __u32 asid; +}; + /** * struct vduse_vq_info - information of a virtqueue * @index: virtqueue index @@ -225,6 +237,7 @@ struct vduse_vq_eventfd { * @uaddr: start address of userspace memory, it must be aligned to page size * @iova: start of the IOVA region * @size: size of the IOVA region + * @asid: Address space ID of the IOVA region * @reserved: for future use, needs to be initialized to zero * * Structure used by VDUSE_IOTLB_REG_UMEM and VDUSE_IOTLB_DEREG_UMEM @@ -234,7 +247,8 @@ struct vduse_iova_umem { __u64 uaddr; __u64 iova; __u64 size; - __u64 reserved[3]; + __u32 asid; + __u32 reserved[5]; }; /* Register userspace memory for IOVA regions */ @@ -248,6 +262,7 @@ struct vduse_iova_umem { * @start: start of the IOVA region * @last: last of the IOVA region * @capability: capability of the IOVA region + * @asid: Address space ID of the IOVA region, only if device API version >= 1 * @reserved: for future use, needs to be initialized to zero * * Structure used by VDUSE_IOTLB_GET_INFO ioctl to get information of @@ -258,7 +273,8 @@ struct vduse_iova_info { __u64 last; #define VDUSE_IOVA_CAP_UMEM (1 << 0) __u64 capability; - __u64 reserved[3]; + __u32 asid; /* Only if device API version >= 1 */ + __u32 reserved[5]; }; /* @@ -267,6 +283,28 @@ struct vduse_iova_info { */ #define VDUSE_IOTLB_GET_INFO _IOWR(VDUSE_BASE, 0x1a, struct vduse_iova_info) +/** + * struct vduse_iotlb_entry_v2 - entry of IOTLB to describe one IOVA region + * + * @v1: the original vduse_iotlb_entry + * @asid: address space ID of the IOVA region + * @reserved: for future use, needs to be initialized to zero + * + * Structure used by VDUSE_IOTLB_GET_FD2 ioctl to find an overlapped IOVA region. + */ +struct vduse_iotlb_entry_v2 { + struct vduse_iotlb_entry v1; + __u32 asid; + __u32 reserved[12]; +}; + +/* + * Same as VDUSE_IOTLB_GET_FD but with vduse_iotlb_entry_v2 argument that + * support extra fields. + */ +#define VDUSE_IOTLB_GET_FD2 _IOWR(VDUSE_BASE, 0x1b, struct vduse_iotlb_entry_v2) + + /* The control messages definition for read(2)/write(2) on /dev/vduse/$NAME */ /** @@ -275,11 +313,14 @@ struct vduse_iova_info { * @VDUSE_SET_STATUS: set the device status * @VDUSE_UPDATE_IOTLB: Notify userspace to update the memory mapping for * specified IOVA range via VDUSE_IOTLB_GET_FD ioctl + * @VDUSE_SET_VQ_GROUP_ASID: Notify userspace to update the address space of a + * virtqueue group. */ enum vduse_req_type { VDUSE_GET_VQ_STATE, VDUSE_SET_STATUS, VDUSE_UPDATE_IOTLB, + VDUSE_SET_VQ_GROUP_ASID, }; /** @@ -314,6 +355,18 @@ struct vduse_iova_range { __u64 last; }; +/** + * struct vduse_iova_range_v2 - IOVA range [start, last] if API_VERSION >= 1 + * @start: start of the IOVA range + * @last: last of the IOVA range + * @asid: address space ID of the IOVA range + */ +struct vduse_iova_range_v2 { + __u64 start; + __u64 last; + __u32 asid; +}; + /** * struct vduse_dev_request - control request * @type: request type @@ -322,6 +375,8 @@ struct vduse_iova_range { * @vq_state: virtqueue state, only index field is available * @s: device status * @iova: IOVA range for updating + * @iova_v2: IOVA range for updating if API_VERSION >= 1 + * @vq_group_asid: ASID of a virtqueue group * @padding: padding * * Structure used by read(2) on /dev/vduse/$NAME. @@ -334,6 +389,11 @@ struct vduse_dev_request { struct vduse_vq_state vq_state; struct vduse_dev_status s; struct vduse_iova_range iova; + /* Following members but padding exist only if vduse api + * version >= 1 + */ + struct vduse_iova_range_v2 iova_v2; + struct vduse_vq_group_asid vq_group_asid; __u32 padding[32]; }; }; -- cgit v1.2.3 From 5ca243f6e3c30b979a54a96b96df355dda2b4d0f Mon Sep 17 00:00:00 2001 From: Deepak Gupta Date: Sun, 25 Jan 2026 21:09:54 -0700 Subject: prctl: add arch-agnostic prctl()s for indirect branch tracking Three architectures (x86, aarch64, riscv) have support for indirect branch tracking feature in a very similar fashion. On a very high level, indirect branch tracking is a CPU feature where CPU tracks branches which use a memory operand to transfer control. As part of this tracking, during an indirect branch, the CPU expects a landing pad instruction on the target PC, and if not found, the CPU raises some fault (architecture-dependent). x86 landing pad instr - 'ENDBRANCH' arch64 landing pad instr - 'BTI' riscv landing instr - 'lpad' Given that three major architectures have support for indirect branch tracking, this patch creates architecture-agnostic 'prctls' to allow userspace to control this feature. They are: - PR_GET_INDIR_BR_LP_STATUS: Get the current configured status for indirect branch tracking. - PR_SET_INDIR_BR_LP_STATUS: Set the configuration for indirect branch tracking. The following status options are allowed: - PR_INDIR_BR_LP_ENABLE: Enables indirect branch tracking on user thread. - PR_INDIR_BR_LP_DISABLE: Disables indirect branch tracking on user thread. - PR_LOCK_INDIR_BR_LP_STATUS: Locks configured status for indirect branch tracking for user thread. Reviewed-by: Mark Brown Reviewed-by: Zong Li Signed-off-by: Deepak Gupta Tested-by: Andreas Korb # QEMU, custom CVA6 Tested-by: Valentin Haudiquet Link: https://patch.msgid.link/20251112-v5_user_cfi_series-v23-13-b55691eacf4f@rivosinc.com [pjw@kernel.org: cleaned up patch description, code comments] Signed-off-by: Paul Walmsley --- include/uapi/linux/prctl.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 51c4e8c82b1e..f57098fb0ba8 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -386,4 +386,31 @@ struct prctl_mm_map { # define PR_FUTEX_HASH_SET_SLOTS 1 # define PR_FUTEX_HASH_GET_SLOTS 2 +/* + * Get the current indirect branch tracking configuration for the current + * thread, this will be the value configured via PR_SET_INDIR_BR_LP_STATUS. + */ +#define PR_GET_INDIR_BR_LP_STATUS 79 + +/* + * Set the indirect branch tracking configuration. PR_INDIR_BR_LP_ENABLE will + * enable cpu feature for user thread, to track all indirect branches and ensure + * they land on arch defined landing pad instruction. + * x86 - If enabled, an indirect branch must land on an ENDBRANCH instruction. + * arch64 - If enabled, an indirect branch must land on a BTI instruction. + * riscv - If enabled, an indirect branch must land on an lpad instruction. + * PR_INDIR_BR_LP_DISABLE will disable feature for user thread and indirect + * branches will no more be tracked by cpu to land on arch defined landing pad + * instruction. + */ +#define PR_SET_INDIR_BR_LP_STATUS 80 +# define PR_INDIR_BR_LP_ENABLE (1UL << 0) + +/* + * Prevent further changes to the specified indirect branch tracking + * configuration. All bits may be locked via this call, including + * undefined bits. + */ +#define PR_LOCK_INDIR_BR_LP_STATUS 81 + #endif /* _LINUX_PRCTL_H */ -- cgit v1.2.3 From 2af7c9cf021c5dabe880b68e5cc22c618060d954 Mon Sep 17 00:00:00 2001 From: Deepak Gupta Date: Sun, 25 Jan 2026 21:09:55 -0700 Subject: riscv/ptrace: expose riscv CFI status and state via ptrace and in core files Expose a new register type NT_RISCV_USER_CFI for risc-v CFI status and state. Intentionally, both landing pad and shadow stack status and state are rolled into the CFI state. Creating two different NT_RISCV_USER_XXX would not be useful and would waste a note type. Enabling, disabling and locking the CFI feature is not allowed via ptrace set interface. However, setting 'elp' state or setting shadow stack pointer are allowed via the ptrace set interface. It is expected that 'gdb' might need to fixup 'elp' state or 'shadow stack' pointer. Signed-off-by: Deepak Gupta Tested-by: Andreas Korb # QEMU, custom CVA6 Tested-by: Valentin Haudiquet Link: https://patch.msgid.link/20251112-v5_user_cfi_series-v23-19-b55691eacf4f@rivosinc.com [pjw@kernel.org: updated to apply; cleaned patch description and comments; addressed checkpatch issues] Signed-off-by: Paul Walmsley --- include/uapi/linux/elf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 819ded2d39de..ee30dcd80901 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -545,6 +545,8 @@ typedef struct elf64_shdr { #define NT_RISCV_VECTOR 0x901 /* RISC-V vector registers */ #define NN_RISCV_TAGGED_ADDR_CTRL "LINUX" #define NT_RISCV_TAGGED_ADDR_CTRL 0x902 /* RISC-V tagged address control (prctl()) */ +#define NN_RISCV_USER_CFI "LINUX" +#define NT_RISCV_USER_CFI 0x903 /* RISC-V shadow stack state */ #define NN_LOONGARCH_CPUCFG "LINUX" #define NT_LOONGARCH_CPUCFG 0xa00 /* LoongArch CPU config registers */ #define NN_LOONGARCH_CSR "LINUX" -- cgit v1.2.3 From 0e6b7eae1fded85f94a357d6132f07d64c614cfa Mon Sep 17 00:00:00 2001 From: Andrey Albershteyn Date: Mon, 26 Jan 2026 12:56:57 +0100 Subject: fs: add FS_XFLAG_VERITY for fs-verity files fs-verity introduced inode flag for inodes with enabled fs-verity on them. This patch adds FS_XFLAG_VERITY file attribute which can be retrieved with FS_IOC_FSGETXATTR ioctl() and file_getattr() syscall. This flag is read-only and can not be set with corresponding set ioctl() and file_setattr(). The FS_IOC_SETFLAGS requires file to be opened for writing which is not allowed for verity files. The FS_IOC_FSSETXATTR and file_setattr() clears this flag from the user input. As this is now common flag for both flag interfaces (flags/xflags) add it to overlapping flags list to exclude it from overwrite. Signed-off-by: Andrey Albershteyn Link: https://patch.msgid.link/20260126115658.27656-2-aalbersh@kernel.org Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- include/uapi/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 66ca526cf786..70b2b661f42c 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -253,6 +253,7 @@ struct file_attr { #define FS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */ #define FS_XFLAG_DAX 0x00008000 /* use DAX for IO */ #define FS_XFLAG_COWEXTSIZE 0x00010000 /* CoW extent size allocator hint */ +#define FS_XFLAG_VERITY 0x00020000 /* fs-verity enabled */ #define FS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ /* the read-only stuff doesn't really belong here, but any other place is -- cgit v1.2.3 From 8cf82bb558517503a81f8e3c49914c0836360aa6 Mon Sep 17 00:00:00 2001 From: Koichiro Den Date: Sat, 24 Jan 2026 23:50:11 +0900 Subject: misc: pci_endpoint_test: Add BAR subrange mapping test case Add a new PCITEST_BAR_SUBRANGE ioctl to exercise EPC BAR subrange mapping end-to-end. The test programs a simple 2-subrange layout on the endpoint (via pci-epf-test) and verifies that: - the endpoint-provided per-subrange signature bytes are observed at the expected PCIe BAR offsets, and - writes to each subrange are routed to the correct backing region (i.e. the submap order is applied rather than accidentally working due to an identity mapping). Return -EOPNOTSUPP when the endpoint does not advertise subrange mapping, -ENODATA when the BAR is disabled, and -EBUSY when the BAR is reserved for the test register space. Signed-off-by: Koichiro Den Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20260124145012.2794108-8-den@valinux.co.jp --- include/uapi/linux/pcitest.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/pcitest.h b/include/uapi/linux/pcitest.h index d6023a45a9d0..710f8842223f 100644 --- a/include/uapi/linux/pcitest.h +++ b/include/uapi/linux/pcitest.h @@ -22,6 +22,7 @@ #define PCITEST_GET_IRQTYPE _IO('P', 0x9) #define PCITEST_BARS _IO('P', 0xa) #define PCITEST_DOORBELL _IO('P', 0xb) +#define PCITEST_BAR_SUBRANGE _IO('P', 0xc) #define PCITEST_CLEAR_IRQ _IO('P', 0x10) #define PCITEST_IRQ_TYPE_UNDEFINED -1 -- cgit v1.2.3 From bc443c253fcdd2636e2a29fde3f749d39d479cf0 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Mon, 26 Jan 2026 17:22:51 +0100 Subject: dpll: expose fractional frequency offset in ppt Currently, the dpll subsystem exports the fractional frequency offset (FFO) in parts per million (ppm). This granularity is insufficient for high-precision synchronization scenarios which often require parts per trillion (ppt) resolution. Add a new netlink attribute DPLL_A_PIN_FRACTIONAL_FREQUENCY_OFFSET_PPT to expose the FFO in ppt. Update the dpll netlink core to expect the driver-provided FFO value to be in ppt. To maintain backward compatibility with existing userspace tools, populate the legacy DPLL_A_PIN_FRACTIONAL_FREQUENCY_OFFSET attribute by dividing the new ppt value by 1,000,000. Update the zl3073x and mlx5 drivers to provide the FFO value in ppt: - zl3073x: adjust the fixed-point calculation to produce ppt (10^12) instead of ppm (10^6). - mlx5: scale the existing ppm value by 1,000,000. Signed-off-by: Ivan Vecera Reviewed-by: Arkadiusz Kubalewski Reviewed-by: Jiri Pirko Link: https://patch.msgid.link/20260126162253.27890-1-ivecera@redhat.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/dpll.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/dpll.h b/include/uapi/linux/dpll.h index b7ff9c44f9aa..de0005f28e5c 100644 --- a/include/uapi/linux/dpll.h +++ b/include/uapi/linux/dpll.h @@ -253,6 +253,7 @@ enum dpll_a_pin { DPLL_A_PIN_ESYNC_PULSE, DPLL_A_PIN_REFERENCE_SYNC, DPLL_A_PIN_PHASE_ADJUST_GRAN, + DPLL_A_PIN_FRACTIONAL_FREQUENCY_OFFSET_PPT, __DPLL_A_PIN_MAX, DPLL_A_PIN_MAX = (__DPLL_A_PIN_MAX - 1) -- cgit v1.2.3 From 8443e2087e7002fa25984faad6bbf5f63b280645 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 30 Jan 2026 00:19:51 +0800 Subject: ublk: add UBLK_F_NO_AUTO_PART_SCAN feature flag Add a new feature flag UBLK_F_NO_AUTO_PART_SCAN to allow users to suppress automatic partition scanning when starting a ublk device. This is useful for some cases in which user don't want to scan partitions. Users still can manually trigger partition scanning later when appropriate using standard tools (e.g., partprobe, blockdev --rereadpt). Reported-by: Yoav Cohen Link: https://lore.kernel.org/linux-block/DM4PR12MB63280C5637917C071C2F0D65A9A8A@DM4PR12MB6328.namprd12.prod.outlook.com/ Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 743d31491387..a88876756805 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -367,6 +367,9 @@ */ #define UBLK_F_SAFE_STOP_DEV (1ULL << 17) +/* Disable automatic partition scanning when device is started */ +#define UBLK_F_NO_AUTO_PART_SCAN (1ULL << 18) + /* device state */ #define UBLK_S_DEV_DEAD 0 #define UBLK_S_DEV_LIVE 1 -- cgit v1.2.3 From 503efe850c7463a1e59df133b84461ef53c0361f Mon Sep 17 00:00:00 2001 From: Wang Yaxin Date: Mon, 19 Jan 2026 10:02:41 +0800 Subject: delayacct: add timestamp of delay max Problem ======= Commit 658eb5ab916d ("delayacct: add delay max to record delay peak") introduced the delay max for getdelays, which records abnormal latency peaks and helps us understand the magnitude of such delays. However, the peak latency value alone is insufficient for effective root cause analysis. Without the precise timestamp of when the peak occurred, we still lack the critical context needed to correlate it with other system events. Solution ======== To address this, we need to additionally record a precise timestamp when the maximum latency occurs. By correlating this timestamp with system logs and monitoring metrics, we can identify processes with abnormal resource usage at the same moment, which can help us to pinpoint root causes. Use Case ======== bash-4.4# ./getdelays -d -t 227 print delayacct stats ON TGID 227 CPU count real total virtual total delay total delay average delay max delay min delay max timestamp 46 188000000 192348334 4098012 0.089ms 0.429260ms 0.051205ms 2026-01-15T15:06:58 IO count delay total delay average delay max delay min delay max timestamp 0 0 0.000ms 0.000000ms 0.000000ms N/A SWAP count delay total delay average delay max delay min delay max timestamp 0 0 0.000ms 0.000000ms 0.000000ms N/A RECLAIM count delay total delay average delay max delay min delay max timestamp 0 0 0.000ms 0.000000ms 0.000000ms N/A THRAS HING count delay total delay average delay max delay min delay max timestamp 0 0 0.000ms 0.000000ms 0.000000ms N/A COMPACT count delay total delay average delay max delay min delay max timestamp 0 0 0.000ms 0.000000ms 0.000000ms N/A WPCOPY count delay total delay average delay max delay min delay max timestamp 182 19413338 0.107ms 0.547353ms 0.022462ms 2026-01-15T15:05:24 IRQ count delay total delay average delay max delay min delay max timestamp 0 0 0.000ms 0.000000ms 0.000000ms N/A Link: https://lkml.kernel.org/r/20260119100241520gWubW8-5QfhSf9gjqcc_E@zte.com.cn Signed-off-by: Wang Yaxin Cc: Fan Yu Cc: Jonathan Corbet Cc: xu xin Cc: Yang Yang Signed-off-by: Andrew Morton --- include/uapi/linux/taskstats.h | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h index 5929030d4e8b..1b31e8e14d2f 100644 --- a/include/uapi/linux/taskstats.h +++ b/include/uapi/linux/taskstats.h @@ -18,6 +18,16 @@ #define _LINUX_TASKSTATS_H #include +#ifdef __KERNEL__ +#include +#else +#ifndef _LINUX_TIME64_H +struct timespec64 { + __s64 tv_sec; /* seconds */ + long tv_nsec; /* nanoseconds */ +}; +#endif +#endif /* Format for per-task data returned to userland when * - a task exits @@ -34,7 +44,7 @@ */ -#define TASKSTATS_VERSION 16 +#define TASKSTATS_VERSION 17 #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN * in linux/sched.h */ @@ -230,6 +240,16 @@ struct taskstats { __u64 irq_delay_max; __u64 irq_delay_min; + + /*v17: delay max timestamp record*/ + struct timespec64 cpu_delay_max_ts; + struct timespec64 blkio_delay_max_ts; + struct timespec64 swapin_delay_max_ts; + struct timespec64 freepages_delay_max_ts; + struct timespec64 thrashing_delay_max_ts; + struct timespec64 compact_delay_max_ts; + struct timespec64 wpcopy_delay_max_ts; + struct timespec64 irq_delay_max_ts; }; -- cgit v1.2.3 From 072e6f7f416f5d17be71000b31fb108651ad360d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 30 Jan 2026 16:21:06 +0100 Subject: wifi: cfg80211: add initial UHR support Add initial support for making UHR connections (or suppressing that), adding UHR capable stations on the AP side, encoding and decoding UHR MCSes (except rate calculation for the new MCSes 17, 19, 20 and 23) as well as regulatory support. Link: https://patch.msgid.link/20260130164259.54cc12fbb307.I26126bebd83c7ab17e99827489f946ceabb3521f@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 706a98686068..b63f71850906 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2977,6 +2977,13 @@ enum nl80211_commands { * @NL80211_ATTR_EPP_PEER: A flag attribute to indicate if the peer is an EPP * STA. Used with %NL80211_CMD_NEW_STA and %NL80211_CMD_ADD_LINK_STA * + * @NL80211_ATTR_UHR_CAPABILITY: UHR Capability information element (from + * association request when used with NL80211_CMD_NEW_STATION). Can be set + * only if HE/EHT are also available. + * @NL80211_ATTR_DISABLE_UHR: Force UHR capable interfaces to disable + * this feature during association. This is a flag attribute. + * Currently only supported in mac80211 drivers. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3547,6 +3554,9 @@ enum nl80211_attrs { NL80211_ATTR_EPP_PEER, + NL80211_ATTR_UHR_CAPABILITY, + NL80211_ATTR_DISABLE_UHR, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -3899,6 +3909,12 @@ enum nl80211_eht_ru_alloc { * @NL80211_RATE_INFO_4_MHZ_WIDTH: 4 MHz S1G rate * @NL80211_RATE_INFO_8_MHZ_WIDTH: 8 MHz S1G rate * @NL80211_RATE_INFO_16_MHZ_WIDTH: 16 MHz S1G rate + * @NL80211_RATE_INFO_UHR_MCS: UHR MCS index (u8, 0-15, 17, 19, 20, 23) + * Note that the other EHT attributes (such as @NL80211_RATE_INFO_EHT_NSS) + * are used in conjunction with this where applicable + * @NL80211_RATE_INFO_UHR_ELR: UHR ELR flag, which restricts NSS to 1, + * MCS to 0 or 1, and GI to %NL80211_RATE_INFO_EHT_GI_1_6. + * @NL80211_RATE_INFO_UHR_IM: UHR Interference Mitigation flag * @__NL80211_RATE_INFO_AFTER_LAST: internal use */ enum nl80211_rate_info { @@ -3932,6 +3948,9 @@ enum nl80211_rate_info { NL80211_RATE_INFO_4_MHZ_WIDTH, NL80211_RATE_INFO_8_MHZ_WIDTH, NL80211_RATE_INFO_16_MHZ_WIDTH, + NL80211_RATE_INFO_UHR_MCS, + NL80211_RATE_INFO_UHR_ELR, + NL80211_RATE_INFO_UHR_IM, /* keep last */ __NL80211_RATE_INFO_AFTER_LAST, @@ -4254,6 +4273,10 @@ enum nl80211_mpath_info { * capabilities element * @NL80211_BAND_IFTYPE_ATTR_EHT_CAP_PPE: EHT PPE thresholds information as * defined in EHT capabilities element + * @NL80211_BAND_IFTYPE_ATTR_UHR_CAP_MAC: UHR MAC capabilities as in UHR + * capabilities element + * @NL80211_BAND_IFTYPE_ATTR_UHR_CAP_PHY: UHR PHY capabilities as in UHR + * capabilities element * @__NL80211_BAND_IFTYPE_ATTR_AFTER_LAST: internal use * @NL80211_BAND_IFTYPE_ATTR_MAX: highest band attribute currently defined */ @@ -4271,6 +4294,8 @@ enum nl80211_band_iftype_attr { NL80211_BAND_IFTYPE_ATTR_EHT_CAP_PHY, NL80211_BAND_IFTYPE_ATTR_EHT_CAP_MCS_SET, NL80211_BAND_IFTYPE_ATTR_EHT_CAP_PPE, + NL80211_BAND_IFTYPE_ATTR_UHR_CAP_MAC, + NL80211_BAND_IFTYPE_ATTR_UHR_CAP_PHY, /* keep last */ __NL80211_BAND_IFTYPE_ATTR_AFTER_LAST, @@ -4453,6 +4478,8 @@ enum nl80211_wmm_rule { * @NL80211_FREQUENCY_ATTR_S1G_NO_PRIMARY: Channel is not permitted for use * as a primary channel. Does not prevent the channel from existing * as a non-primary subchannel. Only applicable to S1G channels. + * @NL80211_FREQUENCY_ATTR_NO_UHR: UHR operation is not allowed on this channel + * in current regulatory domain. * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number * currently defined * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use @@ -4502,6 +4529,7 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_NO_8MHZ, NL80211_FREQUENCY_ATTR_NO_16MHZ, NL80211_FREQUENCY_ATTR_S1G_NO_PRIMARY, + NL80211_FREQUENCY_ATTR_NO_UHR, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, @@ -4715,6 +4743,7 @@ enum nl80211_sched_scan_match_attr { * despite NO_IR configuration. * @NL80211_RRF_ALLOW_20MHZ_ACTIVITY: Allow activity in 20 MHz bandwidth, * despite NO_IR configuration. + * @NL80211_RRF_NO_UHR: UHR operation not allowed */ enum nl80211_reg_rule_flags { NL80211_RRF_NO_OFDM = 1 << 0, @@ -4741,6 +4770,7 @@ enum nl80211_reg_rule_flags { NL80211_RRF_NO_6GHZ_AFC_CLIENT = 1 << 23, NL80211_RRF_ALLOW_6GHZ_VLP_AP = 1 << 24, NL80211_RRF_ALLOW_20MHZ_ACTIVITY = 1 << 25, + NL80211_RRF_NO_UHR = 1 << 26, }; #define NL80211_RRF_PASSIVE_SCAN NL80211_RRF_NO_IR -- cgit v1.2.3 From 13edc7d4e0aa4abb5d50a062b61b9bffb01b0327 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Mon, 2 Feb 2026 19:04:10 +0530 Subject: RDMA/bnxt_re: Report packet pacing capabilities when querying device Enable the support to report packet pacing capabilities from kernel to user space. Packet pacing allows to limit the rate to any number between the maximum and minimum. The capabilities are exposed to user space through query_device. The following capabilities are reported: 1. The maximum and minimum rate limit in kbps. 2. Bitmap showing which QP types support rate limit. Signed-off-by: Damodharam Ammepalli Signed-off-by: Kalesh AP Link: https://patch.msgid.link/20260202133413.3182578-3-kalesh-anakkur.purayil@broadcom.com Reviewed-by: Anantha Prabhu Signed-off-by: Leon Romanovsky --- include/uapi/rdma/bnxt_re-abi.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/rdma/bnxt_re-abi.h b/include/uapi/rdma/bnxt_re-abi.h index faa9d62b3b30..f24edf1c75eb 100644 --- a/include/uapi/rdma/bnxt_re-abi.h +++ b/include/uapi/rdma/bnxt_re-abi.h @@ -56,6 +56,7 @@ enum { BNXT_RE_UCNTX_CMASK_DBR_PACING_ENABLED = 0x08ULL, BNXT_RE_UCNTX_CMASK_POW2_DISABLED = 0x10ULL, BNXT_RE_UCNTX_CMASK_MSN_TABLE_ENABLED = 0x40, + BNXT_RE_UCNTX_CMASK_QP_RATE_LIMIT_ENABLED = 0x80ULL, }; enum bnxt_re_wqe_mode { @@ -215,4 +216,19 @@ enum bnxt_re_toggle_mem_methods { BNXT_RE_METHOD_GET_TOGGLE_MEM = (1U << UVERBS_ID_NS_SHIFT), BNXT_RE_METHOD_RELEASE_TOGGLE_MEM, }; + +struct bnxt_re_packet_pacing_caps { + __u32 qp_rate_limit_min; + __u32 qp_rate_limit_max; /* In kbps */ + /* Corresponding bit will be set if qp type from + * 'enum ib_qp_type' is supported, e.g. + * supported_qpts |= 1 << IB_QPT_RC + */ + __u32 supported_qpts; + __u32 reserved; +}; + +struct bnxt_re_query_device_ex_resp { + struct bnxt_re_packet_pacing_caps packet_pacing_caps; +}; #endif /* __BNXT_RE_UVERBS_ABI_H__*/ -- cgit v1.2.3 From 3495064b6d65a669b409cfe1241db4f3c540251a Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Fri, 30 Jan 2026 17:36:00 +0000 Subject: ptp: vmclock: add vm generation counter Similar to live migration, loading a VM from some saved state (aka snapshot) is also an event that calls for clock adjustments in the guest. However, guests might want to take more actions as a response to such events, e.g. as discarding UUIDs, resetting network connections, reseeding entropy pools, etc. These are actions that guests don't typically take during live migration, so add a new field in the vmclock_abi called vm_generation_counter which informs the guest about such events. Hypervisor advertises support for vm_generation_counter through the VMCLOCK_FLAG_VM_GEN_COUNTER_PRESENT flag. Users need to check the presence of this bit in vmclock_abi flags field before using this flag. Signed-off-by: Babis Chalios Signed-off-by: David Woodhouse Reviewed-by: David Woodhouse Tested-by: Takahiro Itazur Link: https://patch.msgid.link/20260130173704.12575-2-itazur@amazon.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/vmclock-abi.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/vmclock-abi.h b/include/uapi/linux/vmclock-abi.h index 2d99b29ac44a..937fe00e4f33 100644 --- a/include/uapi/linux/vmclock-abi.h +++ b/include/uapi/linux/vmclock-abi.h @@ -115,6 +115,12 @@ struct vmclock_abi { * bit again after the update, using the about-to-be-valid fields. */ #define VMCLOCK_FLAG_TIME_MONOTONIC (1 << 7) + /* + * If the VM_GEN_COUNTER_PRESENT flag is set, the hypervisor will + * bump the vm_generation_counter field every time the guest is + * loaded from some save state (restored from a snapshot). + */ +#define VMCLOCK_FLAG_VM_GEN_COUNTER_PRESENT (1 << 8) __u8 pad[2]; __u8 clock_status; @@ -177,6 +183,15 @@ struct vmclock_abi { __le64 time_frac_sec; /* Units of 1/2^64 of a second */ __le64 time_esterror_nanosec; __le64 time_maxerror_nanosec; + + /* + * This field changes to another non-repeating value when the guest + * has been loaded from a snapshot. In addition to handling a + * disruption in time (which will also be signalled through the + * disruption_marker field), a guest may wish to discard UUIDs, + * reset network connections, reseed entropy, etc. + */ + __le64 vm_generation_counter; }; #endif /* __VMCLOCK_ABI_H__ */ -- cgit v1.2.3 From 3b1526ddb25452385b52f2588b655f524a57070b Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Fri, 30 Jan 2026 17:36:01 +0000 Subject: ptp: vmclock: support device notifications Add optional support for device notifications in VMClock. When supported, the hypervisor will send a device notification every time it updates the seq_count to a new even value. Moreover, add support for poll() in VMClock as a means to propagate this notification to user space. poll() will return a POLLIN event to listeners every time seq_count changes to a value different than the one last seen (since open() or last read()/pread()). This means that when poll() returns a POLLIN event, listeners need to use read() to observe what has changed and update the reader's view of seq_count. In other words, after a poll() returned, all subsequent calls to poll() will immediately return with a POLLIN event until the listener calls read(). The device advertises support for the notification mechanism by setting flag VMCLOCK_FLAG_NOTIFICATION_PRESENT in vmclock_abi flags field. If the flag is not present the driver won't setup the ACPI notification handler and poll() will always immediately return POLLHUP. Signed-off-by: Babis Chalios Signed-off-by: David Woodhouse Signed-off-by: Takahiro Itazuri Reviewed-by: David Woodhouse Tested-by: Takahiro Itazuri Link: https://patch.msgid.link/20260130173704.12575-3-itazur@amazon.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/vmclock-abi.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/vmclock-abi.h b/include/uapi/linux/vmclock-abi.h index 937fe00e4f33..d320623b0118 100644 --- a/include/uapi/linux/vmclock-abi.h +++ b/include/uapi/linux/vmclock-abi.h @@ -121,6 +121,11 @@ struct vmclock_abi { * loaded from some save state (restored from a snapshot). */ #define VMCLOCK_FLAG_VM_GEN_COUNTER_PRESENT (1 << 8) + /* + * If the NOTIFICATION_PRESENT flag is set, the hypervisor will send + * a notification every time it updates seq_count to a new even number. + */ +#define VMCLOCK_FLAG_NOTIFICATION_PRESENT (1 << 9) __u8 pad[2]; __u8 clock_status; -- cgit v1.2.3 From ef6a31d035a1000071dc4846aebd02ad081db9e4 Mon Sep 17 00:00:00 2001 From: Mark Harmstone Date: Wed, 7 Jan 2026 14:09:01 +0000 Subject: btrfs: add definitions and constants for remap-tree Add an incompat flag for the new remap-tree feature, and the constants and definitions needed to support it. Reviewed-by: Boris Burkov Signed-off-by: Mark Harmstone Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 1 + include/uapi/linux/btrfs_tree.h | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index e8fd92789423..9165154a274d 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -336,6 +336,7 @@ struct btrfs_ioctl_fs_info_args { #define BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2 (1ULL << 13) #define BTRFS_FEATURE_INCOMPAT_RAID_STRIPE_TREE (1ULL << 14) #define BTRFS_FEATURE_INCOMPAT_SIMPLE_QUOTA (1ULL << 16) +#define BTRFS_FEATURE_INCOMPAT_REMAP_TREE (1ULL << 17) struct btrfs_ioctl_feature_flags { __u64 compat_flags; diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index fc29d273845d..f011d34cb699 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -76,6 +76,9 @@ /* Tracks RAID stripes in block groups. */ #define BTRFS_RAID_STRIPE_TREE_OBJECTID 12ULL +/* Holds details of remapped addresses after relocation. */ +#define BTRFS_REMAP_TREE_OBJECTID 13ULL + /* device stats in the device tree */ #define BTRFS_DEV_STATS_OBJECTID 0ULL @@ -282,6 +285,10 @@ #define BTRFS_RAID_STRIPE_KEY 230 +#define BTRFS_IDENTITY_REMAP_KEY 234 +#define BTRFS_REMAP_KEY 235 +#define BTRFS_REMAP_BACKREF_KEY 236 + /* * Records the overall state of the qgroups. * There's only one instance of this key present, @@ -1161,6 +1168,7 @@ struct btrfs_dev_replace_item { #define BTRFS_BLOCK_GROUP_RAID6 (1ULL << 8) #define BTRFS_BLOCK_GROUP_RAID1C3 (1ULL << 9) #define BTRFS_BLOCK_GROUP_RAID1C4 (1ULL << 10) +#define BTRFS_BLOCK_GROUP_REMAPPED (1ULL << 11) #define BTRFS_BLOCK_GROUP_RESERVED (BTRFS_AVAIL_ALLOC_BIT_SINGLE | \ BTRFS_SPACE_INFO_GLOBAL_RSV) @@ -1323,4 +1331,13 @@ struct btrfs_verity_descriptor_item { __u8 encryption; } __attribute__ ((__packed__)); +/* + * For a range identified by a BTRFS_REMAP_KEY item in the remap tree, gives + * the address that the start of the range will get remapped to. This + * structure is also shared by BTRFS_REMAP_BACKREF_KEY. + */ +struct btrfs_remap_item { + __le64 address; +} __attribute__ ((__packed__)); + #endif /* _BTRFS_CTREE_H_ */ -- cgit v1.2.3 From 0b4d29fa98ca1a49c4498353253f857573871ba0 Mon Sep 17 00:00:00 2001 From: Mark Harmstone Date: Wed, 7 Jan 2026 14:09:02 +0000 Subject: btrfs: add METADATA_REMAP chunk type Add a new METADATA_REMAP chunk type, which is a metadata chunk that holds the remap tree. This is needed for bootstrapping purposes: the remap tree can't itself be remapped, and must be relocated the existing way, by COWing every leaf. The remap tree can't go in the SYSTEM chunk as space there is limited, because a copy of the chunk item gets placed in the superblock. The changes in fs/btrfs/volumes.h are because we're adding a new block group type bit after the profile bits, and so can no longer rely on the const_ilog2 trick. The sizing to 32MB per chunk, matching the SYSTEM chunk, is an estimate here, we can adjust it later if it proves to be too big or too small. This works out to be ~500,000 remap items, which for a 4KB block size covers ~2GB of remapped data in the worst case and ~500TB in the best case. Reviewed-by: Boris Burkov Signed-off-by: Mark Harmstone Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/uapi/linux/btrfs_tree.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index f011d34cb699..76578426671c 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -1169,12 +1169,14 @@ struct btrfs_dev_replace_item { #define BTRFS_BLOCK_GROUP_RAID1C3 (1ULL << 9) #define BTRFS_BLOCK_GROUP_RAID1C4 (1ULL << 10) #define BTRFS_BLOCK_GROUP_REMAPPED (1ULL << 11) +#define BTRFS_BLOCK_GROUP_METADATA_REMAP (1ULL << 12) #define BTRFS_BLOCK_GROUP_RESERVED (BTRFS_AVAIL_ALLOC_BIT_SINGLE | \ BTRFS_SPACE_INFO_GLOBAL_RSV) #define BTRFS_BLOCK_GROUP_TYPE_MASK (BTRFS_BLOCK_GROUP_DATA | \ BTRFS_BLOCK_GROUP_SYSTEM | \ - BTRFS_BLOCK_GROUP_METADATA) + BTRFS_BLOCK_GROUP_METADATA | \ + BTRFS_BLOCK_GROUP_METADATA_REMAP) #define BTRFS_BLOCK_GROUP_PROFILE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \ BTRFS_BLOCK_GROUP_RAID1 | \ -- cgit v1.2.3 From 7977011460cffc6f5a0cd830584c832c4aa07076 Mon Sep 17 00:00:00 2001 From: Mark Harmstone Date: Wed, 7 Jan 2026 14:09:07 +0000 Subject: btrfs: add extended version of struct block_group_item Add a struct btrfs_block_group_item_v2, which is used in the block group tree if the remap-tree incompat flag is set. This adds two new fields to the block group item: `remap_bytes` and `identity_remap_count`. `remap_bytes` records the amount of data that's physically within this block group, but nominally in another, remapped block group. This is necessary because this data will need to be moved first if this block group is itself relocated. If `remap_bytes` > 0, this is an indicator to the relocation thread that it will need to search the remap-tree for backrefs. A block group must also have `remap_bytes` == 0 before it can be dropped. `identity_remap_count` records how many identity remap items are located in the remap tree for this block group. When relocation is begun for this block group, this is set to the number of holes in the free-space tree for this range. As identity remaps are converted into actual remaps by the relocation process, this number is decreased. Once it reaches 0, either because of relocation or because extents have been deleted, the block group has been fully remapped and its chunk's device extents are removed. Reviewed-by: Boris Burkov Signed-off-by: Mark Harmstone Signed-off-by: David Sterba --- include/uapi/linux/btrfs_tree.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index 76578426671c..86820a9644e8 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -1229,6 +1229,14 @@ struct btrfs_block_group_item { __le64 flags; } __attribute__ ((__packed__)); +struct btrfs_block_group_item_v2 { + __le64 used; + __le64 chunk_objectid; + __le64 flags; + __le64 remap_bytes; + __le32 identity_remap_count; +} __attribute__ ((__packed__)); + struct btrfs_free_space_info { __le32 extent_count; __le32 flags; -- cgit v1.2.3 From 8620da16fb6be1fd9906374fa1c763a10c6918df Mon Sep 17 00:00:00 2001 From: Mark Harmstone Date: Wed, 7 Jan 2026 14:09:08 +0000 Subject: btrfs: allow mounting filesystems with remap-tree incompat flag If we encounter a filesystem with the remap-tree incompat flag set, validate its compatibility with the other flags, and load the remap tree using the values that have been added to the superblock. The remap-tree feature depends on the free-space-tree, but no-holes and block-group-tree have been made dependencies to reduce the testing matrix. Similarly I'm not aware of any reason why mixed-bg and zoned would be incompatible with remap-tree, but this is blocked for the time being until it can be fully tested. Reviewed-by: Boris Burkov Signed-off-by: Mark Harmstone Signed-off-by: David Sterba --- include/uapi/linux/btrfs_tree.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index 86820a9644e8..f7843e6bb978 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -721,9 +721,12 @@ struct btrfs_super_block { __u8 metadata_uuid[BTRFS_FSID_SIZE]; __u64 nr_global_roots; + __le64 remap_root; + __le64 remap_root_generation; + __u8 remap_root_level; /* Future expansion */ - __le64 reserved[27]; + __u8 reserved[199]; __u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS]; -- cgit v1.2.3 From 4fa4ac5e584841c0f9b01c2f7dd0c2e3caa8bca0 Mon Sep 17 00:00:00 2001 From: Chia-Yu Chang Date: Sat, 31 Jan 2026 23:25:13 +0100 Subject: tcp: accecn: add tcpi_ecn_mode and tcpi_option2 in tcp_info Add 2-bit tcpi_ecn_mode feild within tcp_info to indicate which ECN mode is negotiated: ECN_MODE_DISABLED, ECN_MODE_RFC3168, ECN_MODE_ACCECN, or ECN_MODE_PENDING. This is done by utilizing available bits from tcpi_accecn_opt_seen (reduced from 16 bits to 2 bits) and tcpi_accecn_fail_mode (reduced from 16 bits to 4 bits). Also, an extra 24-bit tcpi_options2 field is identified to represent newer options and connection features, as all 8 bits of tcpi_options field have been used. Signed-off-by: Chia-Yu Chang Co-developed-by: Neal Cardwell Signed-off-by: Neal Cardwell Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260131222515.8485-14-chia-yu.chang@nokia-bell-labs.com Signed-off-by: Paolo Abeni --- include/uapi/linux/tcp.h | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index dce3113787a7..03772dd4d399 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -226,6 +226,24 @@ enum tcp_ca_state { #define TCPF_CA_Loss (1< Date: Wed, 4 Feb 2026 10:18:26 +0200 Subject: uapi: sound: sof: tokens: Add missing token for KCPS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Align with the firmware and add the missing token for pipeline kcps. Signed-off-by: Ranjani Sridharan Reviewed-by: Péter Ujfalusi Reviewed-by: Bard Liao Signed-off-by: Peter Ujfalusi Link: https://patch.msgid.link/20260204081833.16630-4-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- include/uapi/sound/sof/tokens.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/sound/sof/tokens.h b/include/uapi/sound/sof/tokens.h index 5fa8ab5088e0..a68381a263eb 100644 --- a/include/uapi/sound/sof/tokens.h +++ b/include/uapi/sound/sof/tokens.h @@ -56,6 +56,7 @@ #define SOF_TKN_SCHED_LP_MODE 207 #define SOF_TKN_SCHED_MEM_USAGE 208 #define SOF_TKN_SCHED_USE_CHAIN_DMA 209 +#define SOF_TKN_SCHED_KCPS 210 /* volume */ #define SOF_TKN_VOLUME_RAMP_STEP_TYPE 250 -- cgit v1.2.3 From 15a55ec2f8b956d6aa0dd948c907e13db7978c6e Mon Sep 17 00:00:00 2001 From: Ranjani Sridharan Date: Wed, 4 Feb 2026 10:18:28 +0200 Subject: ASoC: SOF: ipc4-topology: Add new tokens for pipeline direction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Parse the pipeline direction from topology. The direction_valid token is required for backward-compatibility with older topologies that may not have the direction set for pipelines. This will be used when setting up pipelines to check if a pipeline is in the same direction as the requested params and skip those in the opposite direction like in the case of echo reference capture pipelines during playback. Signed-off-by: Ranjani Sridharan Reviewed-by: Péter Ujfalusi Reviewed-by: Bard Liao Signed-off-by: Peter Ujfalusi Link: https://patch.msgid.link/20260204081833.16630-6-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- include/uapi/sound/sof/tokens.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/sound/sof/tokens.h b/include/uapi/sound/sof/tokens.h index a68381a263eb..f4a7baadb44d 100644 --- a/include/uapi/sound/sof/tokens.h +++ b/include/uapi/sound/sof/tokens.h @@ -57,6 +57,8 @@ #define SOF_TKN_SCHED_MEM_USAGE 208 #define SOF_TKN_SCHED_USE_CHAIN_DMA 209 #define SOF_TKN_SCHED_KCPS 210 +#define SOF_TKN_SCHED_DIRECTION 211 +#define SOF_TKN_SCHED_DIRECTION_VALID 212 /* volume */ #define SOF_TKN_VOLUME_RAMP_STEP_TYPE 250 -- cgit v1.2.3 From 0ee4ddc1647b8b3b9e7a94d798a1774a764428c1 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Wed, 4 Feb 2026 16:02:57 +0100 Subject: KVM: s390: Storage key manipulation IOCTL Add a new IOCTL to allow userspace to manipulate storage keys directly. This will make it easier to write selftests related to storage keys. Acked-by: Heiko Carstens Signed-off-by: Claudio Imbrenda --- include/uapi/linux/kvm.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index dddb781b0507..ab3d3d96e75f 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -974,6 +974,7 @@ struct kvm_enable_cap { #define KVM_CAP_GUEST_MEMFD_FLAGS 244 #define KVM_CAP_ARM_SEA_TO_USER 245 #define KVM_CAP_S390_USER_OPEREXEC 246 +#define KVM_CAP_S390_KEYOP 247 struct kvm_irq_routing_irqchip { __u32 irqchip; @@ -1219,6 +1220,15 @@ struct kvm_vfio_spapr_tce { __s32 tablefd; }; +#define KVM_S390_KEYOP_ISKE 0x01 +#define KVM_S390_KEYOP_RRBE 0x02 +#define KVM_S390_KEYOP_SSKE 0x03 +struct kvm_s390_keyop { + __u64 guest_addr; + __u8 key; + __u8 operation; +}; + /* * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns * a vcpu fd. @@ -1238,6 +1248,7 @@ struct kvm_vfio_spapr_tce { #define KVM_S390_UCAS_MAP _IOW(KVMIO, 0x50, struct kvm_s390_ucas_mapping) #define KVM_S390_UCAS_UNMAP _IOW(KVMIO, 0x51, struct kvm_s390_ucas_mapping) #define KVM_S390_VCPU_FAULT _IOW(KVMIO, 0x52, unsigned long) +#define KVM_S390_KEYOP _IOWR(KVMIO, 0x53, struct kvm_s390_keyop) /* Device model IOC */ #define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60) -- cgit v1.2.3 From f7ab71f178d56447e5efb55b65436feb68662f8f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 6 Feb 2026 10:17:30 +0100 Subject: KVM: s390: Add explicit padding to struct kvm_s390_keyop The newly added structure causes a warning about implied padding: ./usr/include/linux/kvm.h:1247:1: error: padding struct size to alignment boundary with 6 bytes [-Werror=padded] The padding can lead to leaking kernel data and ABI incompatibilies when used on x86. Neither of these is a problem in this specific patch, but it's best to avoid it and use explicit padding fields in general. Fixes: 0ee4ddc1647b ("KVM: s390: Storage key manipulation IOCTL") Signed-off-by: Arnd Bergmann Reviewed-by: Claudio Imbrenda Signed-off-by: Claudio Imbrenda --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index ab3d3d96e75f..d4250ab662fe 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1227,6 +1227,7 @@ struct kvm_s390_keyop { __u64 guest_addr; __u8 key; __u8 operation; + __u8 pad[6]; }; /* -- cgit v1.2.3 From ed82f35b926b2e505c14b7006473614b8f58b4f4 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 8 Jan 2026 10:18:31 -0700 Subject: io_uring: allow registration of per-task restrictions Currently io_uring supports restricting operations on a per-ring basis. To use those, the ring must be setup in a disabled state by setting IORING_SETUP_R_DISABLED. Then restrictions can be set for the ring, and the ring can then be enabled. This commit adds support for IORING_REGISTER_RESTRICTIONS with ring_fd == -1, like the other "blind" register opcodes which work on the task rather than a specific ring. This allows registration of the same kind of restrictions as can been done on a specific ring, but with the task itself. Once done, any ring created will inherit these restrictions. If a restriction filter is registered with a task, then it's inherited on fork for its children. Children may only further restrict operations, not extend them. Inheriting restrictions include both the classic IORING_REGISTER_RESTRICTIONS based restrictions, as well as the BPF filters that have been registered with the task via IORING_REGISTER_BPF_FILTER. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 94669b77fee8..aeeffcf27fee 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -808,6 +808,13 @@ struct io_uring_restriction { __u32 resv2[3]; }; +struct io_uring_task_restriction { + __u16 flags; + __u16 nr_res; + __u32 resv[3]; + __DECLARE_FLEX_ARRAY(struct io_uring_restriction, restrictions); +}; + struct io_uring_clock_register { __u32 clockid; __u32 __resv[3]; -- cgit v1.2.3 From 42fc7e6543f6d17d2cf9ed3e5021f103a3d11182 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnther=20Noack?= Date: Thu, 27 Nov 2025 12:51:34 +0100 Subject: landlock: Multithreading support for landlock_restrict_self() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce the LANDLOCK_RESTRICT_SELF_TSYNC flag. With this flag, a given Landlock ruleset is applied to all threads of the calling process, instead of only the current one. Without this flag, multithreaded userspace programs currently resort to using the nptl(7)/libpsx hack for multithreaded policy enforcement, which is also used by libcap and for setuid(2). Using this userspace-based scheme, the threads of a process enforce the same Landlock policy, but the resulting Landlock domains are still separate. The domains being separate causes multiple problems: * When using Landlock's "scoped" access rights, the domain identity is used to determine whether an operation is permitted. As a result, when using LANLDOCK_SCOPE_SIGNAL, signaling between sibling threads stops working. This is a problem for programming languages and frameworks which are inherently multithreaded (e.g. Go). * In audit logging, the domains of separate threads in a process will get logged with different domain IDs, even when they are based on the same ruleset FD, which might confuse users. Cc: Andrew G. Morgan Cc: John Johansen Cc: Paul Moore Suggested-by: Jann Horn Signed-off-by: Günther Noack Link: https://lore.kernel.org/r/20251127115136.3064948-2-gnoack@google.com [mic: Fix restrict_self_flags test, clean up Makefile, allign comments, reduce local variable scope, add missing includes] Closes: https://github.com/landlock-lsm/linux/issues/2 Signed-off-by: Mickaël Salaün --- include/uapi/linux/landlock.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h index 75fd7f5e6cc3..d5081ab4e5ef 100644 --- a/include/uapi/linux/landlock.h +++ b/include/uapi/linux/landlock.h @@ -117,11 +117,24 @@ struct landlock_ruleset_attr { * future nested domains, not the one being created. It can also be used * with a @ruleset_fd value of -1 to mute subdomain logs without creating a * domain. + * + * The following flag supports policy enforcement in multithreaded processes: + * + * %LANDLOCK_RESTRICT_SELF_TSYNC + * Applies the new Landlock configuration atomically to all threads of the + * current process, including the Landlock domain and logging + * configuration. This overrides the Landlock configuration of sibling + * threads, irrespective of previously established Landlock domains and + * logging configurations on these threads. + * + * If the calling thread is running with no_new_privs, this operation + * enables no_new_privs on the sibling threads as well. */ /* clang-format off */ #define LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF (1U << 0) #define LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON (1U << 1) #define LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF (1U << 2) +#define LANDLOCK_RESTRICT_SELF_TSYNC (1U << 3) /* clang-format on */ /** -- cgit v1.2.3 From bbb6f53e905ca119f99ccab8496f8921d9db9c50 Mon Sep 17 00:00:00 2001 From: Matthieu Buffet Date: Fri, 12 Dec 2025 17:36:57 +0100 Subject: landlock: Minor reword of docs for TCP access rights MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Move ABI requirement next to each access right to prepare adding more access rights; - Mention the possibility to remove the random component of a socket's ephemeral port choice within the netns-wide ephemeral port range, since it allows choosing the "random" ephemeral port. Signed-off-by: Matthieu Buffet Link: https://lore.kernel.org/r/20251212163704.142301-2-matthieu@buffet.re Signed-off-by: Mickaël Salaün --- include/uapi/linux/landlock.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h index d5081ab4e5ef..f88fa1f68b77 100644 --- a/include/uapi/linux/landlock.h +++ b/include/uapi/linux/landlock.h @@ -195,11 +195,13 @@ struct landlock_net_port_attr { * It should be noted that port 0 passed to :manpage:`bind(2)` will bind * to an available port from the ephemeral port range. This can be * configured with the ``/proc/sys/net/ipv4/ip_local_port_range`` sysctl - * (also used for IPv6). + * (also used for IPv6), and within that range, on a per-socket basis + * with ``setsockopt(IP_LOCAL_PORT_RANGE)``. * - * A Landlock rule with port 0 and the ``LANDLOCK_ACCESS_NET_BIND_TCP`` + * A Landlock rule with port 0 and the %LANDLOCK_ACCESS_NET_BIND_TCP * right means that requesting to bind on port 0 is allowed and it will - * automatically translate to binding on the related port range. + * automatically translate to binding on a kernel-assigned ephemeral + * port. */ __u64 port; }; @@ -342,13 +344,12 @@ struct landlock_net_port_attr { * These flags enable to restrict a sandboxed process to a set of network * actions. * - * This is supported since Landlock ABI version 4. - * * The following access rights apply to TCP port numbers: * - * - %LANDLOCK_ACCESS_NET_BIND_TCP: Bind a TCP socket to a local port. - * - %LANDLOCK_ACCESS_NET_CONNECT_TCP: Connect an active TCP socket to - * a remote port. + * - %LANDLOCK_ACCESS_NET_BIND_TCP: Bind TCP sockets to the given local + * port. Support added in Landlock ABI version 4. + * - %LANDLOCK_ACCESS_NET_CONNECT_TCP: Connect TCP sockets to the given + * remote port. Support added in Landlock ABI version 4. */ /* clang-format off */ #define LANDLOCK_ACCESS_NET_BIND_TCP (1ULL << 0) -- cgit v1.2.3 From 136f1e168f4941021565f8c10ff4bb81b1f13f2c Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Thu, 5 Feb 2026 18:34:23 +0100 Subject: mptcp: fix kdoc warnings The following warnings were visible: $ ./scripts/kernel-doc -Wall -none \ net/mptcp/ include/net/mptcp.h include/uapi/linux/mptcp*.h \ include/trace/events/mptcp.h Warning: net/mptcp/token.c:108 No description found for return value of 'mptcp_token_new_request' Warning: net/mptcp/token.c:151 No description found for return value of 'mptcp_token_new_connect' Warning: net/mptcp/token.c:246 No description found for return value of 'mptcp_token_get_sock' Warning: net/mptcp/token.c:298 No description found for return value of 'mptcp_token_iter_next' Warning: net/mptcp/protocol.c:4431 No description found for return value of 'mptcp_splice_read' Warning: include/uapi/linux/mptcp_pm.h:13 missing initial short description on line: * enum mptcp_event_type Address all of them: either by using the 'Return:' keyword, or by adding a missing initial short description. The MPTCP CI will soon report issues with kdoc to avoid introducing new issues and being flagged by the Netdev CI. Reviewed-by: Geliang Tang Reviewed-by: Randy Dunlap Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260205-net-mptcp-misc-fixes-6-19-rc8-v2-3-c2720ce75c34@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/mptcp_pm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/mptcp_pm.h b/include/uapi/linux/mptcp_pm.h index c97d060ee90b..fe9863d75350 100644 --- a/include/uapi/linux/mptcp_pm.h +++ b/include/uapi/linux/mptcp_pm.h @@ -11,7 +11,7 @@ #define MPTCP_PM_VER 1 /** - * enum mptcp_event_type + * enum mptcp_event_type - Netlink MPTCP event types * @MPTCP_EVENT_UNSPEC: unused event * @MPTCP_EVENT_CREATED: A new MPTCP connection has been created. It is the * good time to allocate memory and send ADD_ADDR if needed. Depending on the -- cgit v1.2.3 From 90079798f1d748e97c74e23736491543577b8aee Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 2 Feb 2026 10:59:00 +0100 Subject: delayacct: fix uapi timespec64 definition The custom definition of 'struct timespec64' is incompatible with both the kernel's internal definition and the glibc type, at least on big-endian targets that have the tv_nsec field in a different place, and the definition clashes with any userspace that also defines a timespec64 structure. Running the header check with -Wpadding enabled produces this output that warns about the incorrect padding: usr/include/linux/taskstats.h:25:1: error: padding struct size to alignment boundary with 4 bytes [-Werror=padded] Remove the hack and instead use the regular __kernel_timespec type that is meant to be used in uapi definitions. Link: https://lkml.kernel.org/r/20260202095906.1344100-1-arnd@kernel.org Fixes: 29b63f6eff0e ("delayacct: add timestamp of delay max") Signed-off-by: Arnd Bergmann Cc: Fan Yu Cc: Jonathan Corbet Cc: xu xin Cc: Yang Yang Cc: Balbir Singh Cc: Jiang Kun Signed-off-by: Andrew Morton --- include/uapi/linux/taskstats.h | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h index 1b31e8e14d2f..3ae25f3ce067 100644 --- a/include/uapi/linux/taskstats.h +++ b/include/uapi/linux/taskstats.h @@ -18,16 +18,7 @@ #define _LINUX_TASKSTATS_H #include -#ifdef __KERNEL__ -#include -#else -#ifndef _LINUX_TIME64_H -struct timespec64 { - __s64 tv_sec; /* seconds */ - long tv_nsec; /* nanoseconds */ -}; -#endif -#endif +#include /* Format for per-task data returned to userland when * - a task exits @@ -242,14 +233,14 @@ struct taskstats { __u64 irq_delay_min; /*v17: delay max timestamp record*/ - struct timespec64 cpu_delay_max_ts; - struct timespec64 blkio_delay_max_ts; - struct timespec64 swapin_delay_max_ts; - struct timespec64 freepages_delay_max_ts; - struct timespec64 thrashing_delay_max_ts; - struct timespec64 compact_delay_max_ts; - struct timespec64 wpcopy_delay_max_ts; - struct timespec64 irq_delay_max_ts; + struct __kernel_timespec cpu_delay_max_ts; + struct __kernel_timespec blkio_delay_max_ts; + struct __kernel_timespec swapin_delay_max_ts; + struct __kernel_timespec freepages_delay_max_ts; + struct __kernel_timespec thrashing_delay_max_ts; + struct __kernel_timespec compact_delay_max_ts; + struct __kernel_timespec wpcopy_delay_max_ts; + struct __kernel_timespec irq_delay_max_ts; }; -- cgit v1.2.3 From 0ac6f4056c4a257f4b230b910e3e6fee6c6fc9b9 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 1 Feb 2026 16:34:05 +0200 Subject: RDMA/uverbs: Add DMABUF object type and operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Expose DMABUF functionality to userspace through the uverbs interface, enabling InfiniBand/RDMA devices to export PCI based memory regions (e.g. device memory) as DMABUF file descriptors. This allows zero-copy sharing of RDMA memory with other subsystems that support the dma-buf framework. A new UVERBS_OBJECT_DMABUF object type and allocation method were introduced. During allocation, uverbs invokes the driver to supply the rdma_user_mmap_entry associated with the given page offset (pgoff). Based on the returned rdma_user_mmap_entry, uverbs requests the driver to provide the corresponding physical-memory details as well as the driver’s PCI provider information. Using this information, dma_buf_export() is called; if it succeeds, uobj->object is set to the underlying file pointer returned by the dma-buf framework. The file descriptor number follows the standard uverbs allocation flow, but the file pointer comes from the dma-buf subsystem, including its own fops and private data. When an mmap entry is removed, uverbs iterates over its associated DMABUFs, marks them as revoked, and calls dma_buf_move_notify() so that their importers are notified. The same procedure applies during the disassociate flow; final cleanup occurs when the application closes the file. Signed-off-by: Yishai Hadas Signed-off-by: Edward Srouji Link: https://patch.msgid.link/20260201-dmabuf-export-v3-2-da238b614fe3@nvidia.com Signed-off-by: Leon Romanovsky --- include/uapi/rdma/ib_user_ioctl_cmds.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h index 35da4026f452..72041c1b0ea5 100644 --- a/include/uapi/rdma/ib_user_ioctl_cmds.h +++ b/include/uapi/rdma/ib_user_ioctl_cmds.h @@ -56,6 +56,7 @@ enum uverbs_default_objects { UVERBS_OBJECT_COUNTERS, UVERBS_OBJECT_ASYNC_EVENT, UVERBS_OBJECT_DMAH, + UVERBS_OBJECT_DMABUF, }; enum { @@ -263,6 +264,15 @@ enum uverbs_methods_dmah { UVERBS_METHOD_DMAH_FREE, }; +enum uverbs_attrs_alloc_dmabuf_cmd_attr_ids { + UVERBS_ATTR_ALLOC_DMABUF_HANDLE, + UVERBS_ATTR_ALLOC_DMABUF_PGOFF, +}; + +enum uverbs_methods_dmabuf { + UVERBS_METHOD_DMABUF_ALLOC, +}; + enum uverbs_attrs_reg_dm_mr_cmd_attr_ids { UVERBS_ATTR_REG_DM_MR_HANDLE, UVERBS_ATTR_REG_DM_MR_OFFSET, -- cgit v1.2.3 From ebcff9dacaf2c1418f8bc927388186d7d3674603 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 2 Feb 2026 23:48:07 +0100 Subject: vduse: avoid adding implicit padding The vduse_iova_range_v2 and vduse_iotlb_entry_v2 structures are both defined in a way that adds implicit padding and is incompatible between i386 and x86_64 userspace because of the different structure alignment requirements. Building the header with -Wpadded shows these new warnings: vduse.h:305:1: error: padding struct size to alignment boundary with 4 bytes [-Werror=padded] vduse.h:374:1: error: padding struct size to alignment boundary with 4 bytes [-Werror=padded] Change the amount of padding in these two structures to align them to 64 bit words and avoid those problems. Since the v1 vduse_iotlb_entry already has an inconsistent size, do not attempt to reuse the structure but rather list the members indiviudally, with a fixed amount of padding. Fixes: 079212f6877e ("vduse: add vq group asid support") Signed-off-by: Arnd Bergmann Signed-off-by: Michael S. Tsirkin Message-Id: <20260202224835.559538-1-arnd@kernel.org> --- include/uapi/linux/vduse.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/vduse.h b/include/uapi/linux/vduse.h index 68b4287f9fac..361eea511c21 100644 --- a/include/uapi/linux/vduse.h +++ b/include/uapi/linux/vduse.h @@ -293,9 +293,13 @@ struct vduse_iova_info { * Structure used by VDUSE_IOTLB_GET_FD2 ioctl to find an overlapped IOVA region. */ struct vduse_iotlb_entry_v2 { - struct vduse_iotlb_entry v1; + __u64 offset; + __u64 start; + __u64 last; + __u8 perm; + __u8 padding[7]; __u32 asid; - __u32 reserved[12]; + __u32 reserved[11]; }; /* @@ -365,6 +369,7 @@ struct vduse_iova_range_v2 { __u64 start; __u64 last; __u32 asid; + __u32 padding; }; /** -- cgit v1.2.3 From 57d00816c6a9c152f01b65bb7b3662f4d03ccd09 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 10 Feb 2026 16:53:08 -0500 Subject: drm/amdgpu: set family for GC 11.5.4 Set the family for GC 11.5.4 Fixes: 47ae1f938d12 ("drm/amdgpu: add support for GC IP version 11.5.4") Cc: Tim Huang Cc: Pratik Vishwakarma Cc: Roman Li Reviewed-by: Tim Huang Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 1d34daa0ebcd..ebbd861ef0bc 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1667,6 +1667,7 @@ struct drm_amdgpu_info_uq_metadata { #define AMDGPU_FAMILY_GC_10_3_6 149 /* GC 10.3.6 */ #define AMDGPU_FAMILY_GC_10_3_7 151 /* GC 10.3.7 */ #define AMDGPU_FAMILY_GC_11_5_0 150 /* GC 11.5.0 */ +#define AMDGPU_FAMILY_GC_11_5_4 154 /* GC 11.5.4 */ #define AMDGPU_FAMILY_GC_12_0_0 152 /* GC 12.0.0 */ #if defined(__cplusplus) -- cgit v1.2.3 From c29214677a9fc1a3a4ee65e189afeb5fd10d676f Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 15 Feb 2026 21:34:28 +0000 Subject: io_uring/query: return support for custom rx page size Add an ability to query if the zcrx rx page size setting is available. Note, even when the API is supported by io_uring, the registration can still get rejected for various reasons, e.g. when the NIC or the driver doesn't support it, when the particular specified size is unsupported, when the memory area doesn't satisfy all requirements, etc. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 8 ++++++++ include/uapi/linux/io_uring/query.h | 3 ++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index da5156954731..c462bdf3c42c 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -1090,6 +1090,14 @@ enum zcrx_reg_flags { ZCRX_REG_IMPORT = 1, }; +enum zcrx_features { + /* + * The user can ask for the desired rx page size by passing the + * value in struct io_uring_zcrx_ifq_reg::rx_buf_len. + */ + ZCRX_FEATURE_RX_PAGE_SIZE = 1 << 0, +}; + /* * Argument for IORING_REGISTER_ZCRX_IFQ */ diff --git a/include/uapi/linux/io_uring/query.h b/include/uapi/linux/io_uring/query.h index 2456e6c5ebb5..0b6248175e26 100644 --- a/include/uapi/linux/io_uring/query.h +++ b/include/uapi/linux/io_uring/query.h @@ -50,7 +50,8 @@ struct io_uring_query_zcrx { __u64 area_flags; /* The number of supported ZCRX_CTRL_* opcodes */ __u32 nr_ctrl_opcodes; - __u32 __resv1; + /* Bitmask of ZCRX_FEATURE_* indicating which features are available */ + __u32 features; /* The refill ring header size */ __u32 rq_hdr_size; /* The alignment for the header */ -- cgit v1.2.3 From 6b34f8edf8b807b7f87901623aa52dfa1b29ef93 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 15 Feb 2026 21:38:09 +0000 Subject: io_uring/query: add query.h copyright notice Add a copyright notice to io_uring's query uapi header. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring/query.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring/query.h b/include/uapi/linux/io_uring/query.h index 0b6248175e26..95500759cc13 100644 --- a/include/uapi/linux/io_uring/query.h +++ b/include/uapi/linux/io_uring/query.h @@ -1,6 +1,9 @@ /* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */ /* * Header file for the io_uring query interface. + * + * Copyright (C) 2026 Pavel Begunkov + * Copyright (C) Meta Platforms, Inc. */ #ifndef LINUX_IO_URING_QUERY_H #define LINUX_IO_URING_QUERY_H -- cgit v1.2.3 From be3573124e630736d2d39650b12f5ef220b47ac1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 10 Feb 2026 10:00:44 -0700 Subject: io_uring/bpf_filter: pass in expected filter payload size It's quite possible that opcodes that have payloads attached to them, like IORING_OP_OPENAT/OPENAT2 or IORING_OP_SOCKET, that these paylods can change over time. For example, on the openat/openat2 side, the struct open_how argument is extensible, and could be extended in the future to allow further arguments to be passed in. Allow registration of a cBPF filter to give the size of the filter as seen by userspace. If that filter is for an opcode that takes extra payload data, allow it if the application payload expectation is the same size than the kernels. If that is the case, the kernel supports filtering on the payload that the application expects. If the size differs, the behavior depends on the IO_URING_BPF_FILTER_SZ_STRICT flag: 1) If IO_URING_BPF_FILTER_SZ_STRICT is set and the size expectation differs, fail the attempt to load the filter. 2) If IO_URING_BPF_FILTER_SZ_STRICT isn't set, allow the filter if the userspace pdu size is smaller than what the kernel offers. 3) Regardless if IO_URING_BPF_FILTER_SZ_STRICT, fail loading the filter if the userspace pdu size is bigger than what the kernel supports. An attempt to load a filter due to sizing will error with -EMSGSIZE. For that error, the registration struct will have filter->pdu_size populated with the pdu size that the kernel uses. Reported-by: Christian Brauner Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring/bpf_filter.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring/bpf_filter.h b/include/uapi/linux/io_uring/bpf_filter.h index 220351b81bc0..1b461d792a7b 100644 --- a/include/uapi/linux/io_uring/bpf_filter.h +++ b/include/uapi/linux/io_uring/bpf_filter.h @@ -35,13 +35,19 @@ enum { * If set, any currently unset opcode will have a deny filter attached */ IO_URING_BPF_FILTER_DENY_REST = 1, + /* + * If set, if kernel and application don't agree on pdu_size for + * the given opcode, fail the registration of the filter. + */ + IO_URING_BPF_FILTER_SZ_STRICT = 2, }; struct io_uring_bpf_filter { __u32 opcode; /* io_uring opcode to filter */ __u32 flags; __u32 filter_len; /* number of BPF instructions */ - __u32 resv; + __u8 pdu_size; /* expected pdu size for opcode */ + __u8 resv[3]; __u64 filter_ptr; /* pointer to BPF filter */ __u64 resv2[5]; }; -- cgit v1.2.3 From 4edd4ba71ce0df015303dba75ea9d20d1a217546 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Sat, 14 Feb 2026 15:54:06 +0100 Subject: include: uapi: netfilter_bridge.h: Cover for musl libc Musl defines its own struct ethhdr and thus defines __UAPI_DEF_ETHHDR to zero. To avoid struct redefinition errors, user space is therefore supposed to include netinet/if_ether.h before (or instead of) linux/if_ether.h. To relieve them from this burden, include the libc header here if not building for kernel space. Reported-by: Alyssa Ross Suggested-by: Florian Westphal Signed-off-by: Phil Sutter Signed-off-by: Florian Westphal --- include/uapi/linux/netfilter_bridge.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter_bridge.h b/include/uapi/linux/netfilter_bridge.h index f6e8d1e05c97..758de72b2764 100644 --- a/include/uapi/linux/netfilter_bridge.h +++ b/include/uapi/linux/netfilter_bridge.h @@ -5,6 +5,10 @@ /* bridge-specific defines for netfilter. */ +#ifndef __KERNEL__ +#include /* for __UAPI_DEF_ETHHDR if defined */ +#endif + #include #include #include -- cgit v1.2.3 From a284dbc96a47891a7a595a1c81b1e2da4d309cf6 Mon Sep 17 00:00:00 2001 From: Muminul Islam Date: Wed, 18 Feb 2026 14:47:59 +0000 Subject: mshv: Add nested virtualization creation flag Introduce HV_PARTITION_CREATION_FLAG_NESTED_VIRTUALIZATION_CAPABLE to indicate support for nested virtualization during partition creation. This enables clearer configuration and capability checks for nested virtualization scenarios. Signed-off-by: Stanislav Kinsburskii Signed-off-by: Muminul Islam Signed-off-by: Wei Liu --- include/uapi/linux/mshv.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h index dee3ece28ce5..7ef5dd67a232 100644 --- a/include/uapi/linux/mshv.h +++ b/include/uapi/linux/mshv.h @@ -27,6 +27,7 @@ enum { MSHV_PT_BIT_X2APIC, MSHV_PT_BIT_GPA_SUPER_PAGES, MSHV_PT_BIT_CPU_AND_XSAVE_FEATURES, + MSHV_PT_BIT_NESTED_VIRTUALIZATION, MSHV_PT_BIT_COUNT, }; -- cgit v1.2.3 From 8927a108a7662eb83eb667bc0c5a0633397122b1 Mon Sep 17 00:00:00 2001 From: Anatol Belski Date: Wed, 18 Feb 2026 14:48:02 +0000 Subject: mshv: Add SMT_ENABLED_GUEST partition creation flag Add support for HV_PARTITION_CREATION_FLAG_SMT_ENABLED_GUEST to allow userspace VMMs to enable SMT for guest partitions. Expose this via new MSHV_PT_BIT_SMT_ENABLED_GUEST flag in the UAPI. Without this flag, the hypervisor schedules guest VPs incorrectly, causing SMT unusable. Signed-off-by: Anatol Belski Signed-off-by: Wei Liu --- include/uapi/linux/mshv.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h index 7ef5dd67a232..e0645a34b55b 100644 --- a/include/uapi/linux/mshv.h +++ b/include/uapi/linux/mshv.h @@ -28,6 +28,7 @@ enum { MSHV_PT_BIT_GPA_SUPER_PAGES, MSHV_PT_BIT_CPU_AND_XSAVE_FEATURES, MSHV_PT_BIT_NESTED_VIRTUALIZATION, + MSHV_PT_BIT_SMT_ENABLED_GUEST, MSHV_PT_BIT_COUNT, }; -- cgit v1.2.3