From 9c75b16cabc69adbbfdc9d219df87c9173f0da0a Mon Sep 17 00:00:00 2001 From: Mary Guillemard Date: Mon, 19 Aug 2024 10:02:22 +0200 Subject: drm/panfrost: Add SYSTEM_TIMESTAMP and SYSTEM_TIMESTAMP_FREQUENCY parameters Expose system timestamp and frequency supported by the GPU. Mali uses an external timer as GPU system time. On ARM, this is wired to the generic arch timer so we wire cntfrq_el0 as device frequency. This new uAPI will be used in Mesa to implement timestamp queries and VK_KHR_calibrated_timestamps. v2: - Rewrote to use GPU timestamp register - Add missing include for arch_timer_get_cntfrq - Rework commit message v3: - Move panfrost_cycle_counter_get and panfrost_cycle_counter_put to panfrost_ioctl_query_timestamp - Handle possible overflow in panfrost_timestamp_read Signed-off-by: Mary Guillemard Tested-by: Heiko Stuebner Reviewed-by: Steven Price Signed-off-by: Steven Price Link: https://patchwork.freedesktop.org/patch/msgid/20240819080224.24914-2-mary.guillemard@collabora.com --- include/uapi/drm/panfrost_drm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h index 9f231d40a146..52b050e2b660 100644 --- a/include/uapi/drm/panfrost_drm.h +++ b/include/uapi/drm/panfrost_drm.h @@ -172,6 +172,8 @@ enum drm_panfrost_param { DRM_PANFROST_PARAM_NR_CORE_GROUPS, DRM_PANFROST_PARAM_THREAD_TLS_ALLOC, DRM_PANFROST_PARAM_AFBC_FEATURES, + DRM_PANFROST_PARAM_SYSTEM_TIMESTAMP, + DRM_PANFROST_PARAM_SYSTEM_TIMESTAMP_FREQUENCY, }; struct drm_panfrost_get_param { -- cgit v1.2.3 From 3a8d97611b564b5b25f68c90b543056fc9ae0bec Mon Sep 17 00:00:00 2001 From: Mary Guillemard Date: Mon, 19 Aug 2024 10:02:23 +0200 Subject: drm/panfrost: Add cycle counter job requirement Extend the uAPI with a new job requirement flag for cycle counters. This requirement is used by userland to indicate that a job requires cycle counters or system timestamp to be propagated. (for use with write value timestamp jobs) We cannot enable cycle counters unconditionally as this would result in an increase of GPU power consumption. As a result, they should be left off unless required by the application. If a job requires cycle counters or system timestamps propagation, we must enable cycle counting before issuing a job and disable it right after the job completes. Since this extends the uAPI and because userland needs a way to advertise features like VK_KHR_shader_clock conditionally, we bumps the driver minor version. v2: - Rework commit message - Squash uAPI changes and implementation in this commit - Simplify changes based on Steven Price comments v3: - Add Steven Price r-b - Fix a codestyle issue Signed-off-by: Mary Guillemard Reviewed-by: Steven Price Tested-by: Heiko Stuebner Signed-off-by: Steven Price Link: https://patchwork.freedesktop.org/patch/msgid/20240819080224.24914-3-mary.guillemard@collabora.com --- include/uapi/drm/panfrost_drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h index 52b050e2b660..568724be6628 100644 --- a/include/uapi/drm/panfrost_drm.h +++ b/include/uapi/drm/panfrost_drm.h @@ -40,6 +40,7 @@ extern "C" { #define DRM_IOCTL_PANFROST_PERFCNT_DUMP DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_PERFCNT_DUMP, struct drm_panfrost_perfcnt_dump) #define PANFROST_JD_REQ_FS (1 << 0) +#define PANFROST_JD_REQ_CYCLE_COUNT (1 << 1) /** * struct drm_panfrost_submit - ioctl argument for submitting commands to the 3D * engine. -- cgit v1.2.3 From a778028cc575deeb5224cc798de6e03d37331bca Mon Sep 17 00:00:00 2001 From: Mary Guillemard Date: Fri, 30 Aug 2024 10:03:50 +0200 Subject: drm/panthor: Add DEV_QUERY_TIMESTAMP_INFO dev query Expose timestamp information supported by the GPU with a new device query. Mali uses an external timer as GPU system time. On ARM, this is wired to the generic arch timer so we wire cntfrq_el0 as device frequency. This new uAPI will be used in Mesa to implement timestamp queries and VK_KHR_calibrated_timestamps. Since this extends the uAPI and because userland needs a way to advertise those features conditionally, this also bumps the driver minor version. v2: - Rewrote to use GPU timestamp register - Added timestamp_offset to drm_panthor_timestamp_info - Add missing include for arch_timer_get_cntfrq - Rework commit message v3: - Add panthor_gpu_read_64bit_counter - Change panthor_gpu_read_timestamp to use panthor_gpu_read_64bit_counter v4: - Fix multiple typos in uAPI documentation - Mention behavior when the timestamp frequency is unknown - Use u64 instead of unsigned long long for panthor_gpu_read_timestamp - Apply r-b from Mihail Signed-off-by: Mary Guillemard Reviewed-by: Mihail Atanassov Reviewed-by: Boris Brezillon Signed-off-by: Boris Brezillon Link: https://patchwork.freedesktop.org/patch/msgid/20240830080349.24736-2-mary.guillemard@collabora.com --- include/uapi/drm/panthor_drm.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index 926b1deb1116..95e3a984c69a 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -260,6 +260,9 @@ enum drm_panthor_dev_query_type { /** @DRM_PANTHOR_DEV_QUERY_CSIF_INFO: Query command-stream interface information. */ DRM_PANTHOR_DEV_QUERY_CSIF_INFO, + + /** @DRM_PANTHOR_DEV_QUERY_TIMESTAMP_INFO: Query timestamp information. */ + DRM_PANTHOR_DEV_QUERY_TIMESTAMP_INFO, }; /** @@ -377,6 +380,25 @@ struct drm_panthor_csif_info { __u32 pad; }; +/** + * struct drm_panthor_timestamp_info - Timestamp information + * + * Structure grouping all queryable information relating to the GPU timestamp. + */ +struct drm_panthor_timestamp_info { + /** + * @timestamp_frequency: The frequency of the timestamp timer or 0 if + * unknown. + */ + __u64 timestamp_frequency; + + /** @current_timestamp: The current timestamp. */ + __u64 current_timestamp; + + /** @timestamp_offset: The offset of the timestamp timer. */ + __u64 timestamp_offset; +}; + /** * struct drm_panthor_dev_query - Arguments passed to DRM_PANTHOR_IOCTL_DEV_QUERY */ -- cgit v1.2.3 From 9f8e1c93a0d459463819d8bd222196b2655c279f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 23 Sep 2024 10:55:15 -0300 Subject: drm/v3d: Expose Super Pages capability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new V3D parameter to expose the support of Super Pages to userspace. The userspace might want to know this information to apply optimizations that are specific to kernels with Super Pages enabled. Signed-off-by: Maíra Canal Reviewed-by: Iago Toral Quiroga Link: https://patchwork.freedesktop.org/patch/msgid/20240923141348.2422499-12-mcanal@igalia.com --- include/uapi/drm/v3d_drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h index 87fc5bb0a61e..2376c73abca1 100644 --- a/include/uapi/drm/v3d_drm.h +++ b/include/uapi/drm/v3d_drm.h @@ -290,6 +290,7 @@ enum drm_v3d_param { DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT, DRM_V3D_PARAM_SUPPORTS_CPU_QUEUE, DRM_V3D_PARAM_MAX_PERF_COUNTERS, + DRM_V3D_PARAM_SUPPORTS_SUPER_PAGES, }; struct drm_v3d_get_param { -- cgit v1.2.3 From f73716fd4550d588a811f11c370e90c303f0829b Mon Sep 17 00:00:00 2001 From: Mary Guillemard Date: Mon, 9 Sep 2024 08:48:20 +0200 Subject: drm/panthor: Add PANTHOR_GROUP_PRIORITY_REALTIME group priority This adds a new value to drm_panthor_group_priority exposing the realtime priority to userspace. This is required to implement NV_context_priority_realtime in Mesa. v2: - Add Steven Price r-b v3: - Add Boris Brezillon r-b Signed-off-by: Mary Guillemard Reviewed-by: Steven Price Reviewed-by: Boris Brezillon Signed-off-by: Steven Price Link: https://patchwork.freedesktop.org/patch/msgid/20240909064820.34982-3-mary.guillemard@collabora.com --- include/uapi/drm/panthor_drm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index 1fd8473548ac..011a555e4674 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -720,6 +720,13 @@ enum drm_panthor_group_priority { * Requires CAP_SYS_NICE or DRM_MASTER. */ PANTHOR_GROUP_PRIORITY_HIGH, + + /** + * @PANTHOR_GROUP_PRIORITY_REALTIME: Realtime priority group. + * + * Requires CAP_SYS_NICE or DRM_MASTER. + */ + PANTHOR_GROUP_PRIORITY_REALTIME, }; /** -- cgit v1.2.3 From f70000ef23527f6d928d1175c66c5fafa968814b Mon Sep 17 00:00:00 2001 From: Mary Guillemard Date: Mon, 9 Sep 2024 08:48:21 +0200 Subject: drm/panthor: Add DEV_QUERY_GROUP_PRIORITIES_INFO dev query Expose allowed group priorities with a new device query. This new uAPI will be used in Mesa to properly report what priorities a user can use for EGL_IMG_context_priority. Since this extends the uAPI and because userland needs a way to advertise priorities accordingly, this also bumps the driver minor version. v2: - Remove drm_panthor_group_allow_priority_flags definition - Document that allowed_mask is a bitmask of drm_panthor_group_priority v3: - Use BIT macro in panthor_query_group_priorities_info - Add r-b from Steven Price and Boris Brezillon Signed-off-by: Mary Guillemard Reviewed-by: Steven Price Reviewed-by: Boris Brezillon Signed-off-by: Steven Price Link: https://patchwork.freedesktop.org/patch/msgid/20240909064820.34982-4-mary.guillemard@collabora.com --- include/uapi/drm/panthor_drm.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index 011a555e4674..87c9cb555dd1 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -263,6 +263,11 @@ enum drm_panthor_dev_query_type { /** @DRM_PANTHOR_DEV_QUERY_TIMESTAMP_INFO: Query timestamp information. */ DRM_PANTHOR_DEV_QUERY_TIMESTAMP_INFO, + + /** + * @DRM_PANTHOR_DEV_QUERY_GROUP_PRIORITIES_INFO: Query allowed group priorities information. + */ + DRM_PANTHOR_DEV_QUERY_GROUP_PRIORITIES_INFO, }; /** @@ -399,6 +404,23 @@ struct drm_panthor_timestamp_info { __u64 timestamp_offset; }; +/** + * struct drm_panthor_group_priorities_info - Group priorities information + * + * Structure grouping all queryable information relating to the allowed group priorities. + */ +struct drm_panthor_group_priorities_info { + /** + * @allowed_mask: Bitmask of the allowed group priorities. + * + * Each bit represents a variant of the enum drm_panthor_group_priority. + */ + __u8 allowed_mask; + + /** @pad: Padding fields, MBZ. */ + __u8 pad[3]; +}; + /** * struct drm_panthor_dev_query - Arguments passed to DRM_PANTHOR_IOCTL_DEV_QUERY */ -- cgit v1.2.3 From e4ca0e59c39442546866f3dd514a3a5956577daf Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 3 Sep 2024 20:59:04 +0300 Subject: types: Complement the aligned types with signed 64-bit one Some user may want to use aligned signed 64-bit type. Provide it for them. Signed-off-by: Andy Shevchenko Link: https://patch.msgid.link/20240903180218.3640501-2-andriy.shevchenko@linux.intel.com Signed-off-by: Jonathan Cameron --- include/uapi/linux/types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/types.h b/include/uapi/linux/types.h index 6375a0684052..48b933938877 100644 --- a/include/uapi/linux/types.h +++ b/include/uapi/linux/types.h @@ -53,6 +53,7 @@ typedef __u32 __bitwise __wsum; * No conversions are necessary between 32-bit user-space and a 64-bit kernel. */ #define __aligned_u64 __u64 __attribute__((aligned(8))) +#define __aligned_s64 __s64 __attribute__((aligned(8))) #define __aligned_be64 __be64 __attribute__((aligned(8))) #define __aligned_le64 __le64 __attribute__((aligned(8))) -- cgit v1.2.3 From f69767a1ada3ac74be2e1ac0795a05e1d1384eff Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 2 Oct 2024 11:59:50 -0500 Subject: PCI: Add TLP Processing Hints (TPH) support Add support for PCIe TLP Processing Hints (TPH) support (see PCIe r6.2, sec 6.17). Add TPH register definitions in pci_regs.h, including the TPH Requester capability register, TPH Requester control register, TPH Completer capability, and the ST fields of MSI-X entry. Introduce pcie_enable_tph() and pcie_disable_tph(), enabling drivers to toggle TPH support and configure specific ST mode as needed. Also add a new kernel parameter, "pci=notph", allowing users to disable TPH support across the entire system. Link: https://lore.kernel.org/r/20241002165954.128085-2-wei.huang2@amd.com Co-developed-by: Jing Liu Co-developed-by: Paul Luse Co-developed-by: Eric Van Tassell Signed-off-by: Jing Liu Signed-off-by: Paul Luse Signed-off-by: Eric Van Tassell Signed-off-by: Wei Huang Signed-off-by: Bjorn Helgaas Reviewed-by: Ajit Khaparde Reviewed-by: Somnath Kotur Reviewed-by: Andy Gospodarek Reviewed-by: Jonathan Cameron Reviewed-by: Lukas Wunner --- include/uapi/linux/pci_regs.h | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 12323b3334a9..155dea741615 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -340,7 +340,8 @@ #define PCI_MSIX_ENTRY_UPPER_ADDR 0x4 /* Message Upper Address */ #define PCI_MSIX_ENTRY_DATA 0x8 /* Message Data */ #define PCI_MSIX_ENTRY_VECTOR_CTRL 0xc /* Vector Control */ -#define PCI_MSIX_ENTRY_CTRL_MASKBIT 0x00000001 +#define PCI_MSIX_ENTRY_CTRL_MASKBIT 0x00000001 /* Mask Bit */ +#define PCI_MSIX_ENTRY_CTRL_ST 0xffff0000 /* Steering Tag */ /* CompactPCI Hotswap Register */ @@ -659,6 +660,7 @@ #define PCI_EXP_DEVCAP2_ATOMIC_COMP64 0x00000100 /* 64b AtomicOp completion */ #define PCI_EXP_DEVCAP2_ATOMIC_COMP128 0x00000200 /* 128b AtomicOp completion */ #define PCI_EXP_DEVCAP2_LTR 0x00000800 /* Latency tolerance reporting */ +#define PCI_EXP_DEVCAP2_TPH_COMP_MASK 0x00003000 /* TPH completer support */ #define PCI_EXP_DEVCAP2_OBFF_MASK 0x000c0000 /* OBFF support mechanism */ #define PCI_EXP_DEVCAP2_OBFF_MSG 0x00040000 /* New message signaling */ #define PCI_EXP_DEVCAP2_OBFF_WAKE 0x00080000 /* Re-use WAKE# for OBFF */ @@ -1023,15 +1025,34 @@ #define PCI_DPA_CAP_SUBSTATE_MASK 0x1F /* # substates - 1 */ #define PCI_DPA_BASE_SIZEOF 16 /* size with 0 substates */ +/* TPH Completer Support */ +#define PCI_EXP_DEVCAP2_TPH_COMP_NONE 0x0 /* None */ +#define PCI_EXP_DEVCAP2_TPH_COMP_TPH_ONLY 0x1 /* TPH only */ +#define PCI_EXP_DEVCAP2_TPH_COMP_EXT_TPH 0x3 /* TPH and Extended TPH */ + /* TPH Requester */ #define PCI_TPH_CAP 4 /* capability register */ -#define PCI_TPH_CAP_LOC_MASK 0x600 /* location mask */ -#define PCI_TPH_LOC_NONE 0x000 /* no location */ -#define PCI_TPH_LOC_CAP 0x200 /* in capability */ -#define PCI_TPH_LOC_MSIX 0x400 /* in MSI-X */ -#define PCI_TPH_CAP_ST_MASK 0x07FF0000 /* ST table mask */ -#define PCI_TPH_CAP_ST_SHIFT 16 /* ST table shift */ -#define PCI_TPH_BASE_SIZEOF 0xc /* size with no ST table */ +#define PCI_TPH_CAP_ST_NS 0x00000001 /* No ST Mode Supported */ +#define PCI_TPH_CAP_ST_IV 0x00000002 /* Interrupt Vector Mode Supported */ +#define PCI_TPH_CAP_ST_DS 0x00000004 /* Device Specific Mode Supported */ +#define PCI_TPH_CAP_EXT_TPH 0x00000100 /* Ext TPH Requester Supported */ +#define PCI_TPH_CAP_LOC_MASK 0x00000600 /* ST Table Location */ +#define PCI_TPH_LOC_NONE 0x00000000 /* Not present */ +#define PCI_TPH_LOC_CAP 0x00000200 /* In capability */ +#define PCI_TPH_LOC_MSIX 0x00000400 /* In MSI-X */ +#define PCI_TPH_CAP_ST_MASK 0x07FF0000 /* ST Table Size */ +#define PCI_TPH_CAP_ST_SHIFT 16 /* ST Table Size shift */ +#define PCI_TPH_BASE_SIZEOF 0xc /* Size with no ST table */ + +#define PCI_TPH_CTRL 8 /* control register */ +#define PCI_TPH_CTRL_MODE_SEL_MASK 0x00000007 /* ST Mode Select */ +#define PCI_TPH_ST_NS_MODE 0x0 /* No ST Mode */ +#define PCI_TPH_ST_IV_MODE 0x1 /* Interrupt Vector Mode */ +#define PCI_TPH_ST_DS_MODE 0x2 /* Device Specific Mode */ +#define PCI_TPH_CTRL_REQ_EN_MASK 0x00000300 /* TPH Requester Enable */ +#define PCI_TPH_REQ_DISABLE 0x0 /* No TPH requests allowed */ +#define PCI_TPH_REQ_TPH_ONLY 0x1 /* TPH only requests allowed */ +#define PCI_TPH_REQ_EXT_TPH 0x3 /* Extended TPH requests allowed */ /* Downstream Port Containment */ #define PCI_EXP_DPC_CAP 0x04 /* DPC Capability */ -- cgit v1.2.3 From 7788d320ba5ecbfa88d0be8c32ef8f018f2f020f Mon Sep 17 00:00:00 2001 From: Antonino Maniscalco Date: Thu, 3 Oct 2024 18:12:59 +0200 Subject: drm/msm/a6xx: Add a flag to allow preemption to submitqueue_create Some userspace changes are necessary so add a flag for userspace to advertise support for preemption when creating the submitqueue. When this flag is not set preemption will not be allowed in the middle of the submitted IBs therefore mantaining compatibility with older userspace. The flag is rejected if preemption is not supported on the target, this allows userspace to know whether preemption is supported. Tested-by: Rob Clark Tested-by: Neil Armstrong # on SM8650-QRD Tested-by: Neil Armstrong # on SM8550-QRD Tested-by: Neil Armstrong # on SM8450-HDK Signed-off-by: Antonino Maniscalco Patchwork: https://patchwork.freedesktop.org/patch/618028/ Signed-off-by: Rob Clark --- include/uapi/drm/msm_drm.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index 2377147b6af0..b916aab80dde 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -347,7 +347,10 @@ struct drm_msm_gem_madvise { * backwards compatibility as a "default" submitqueue */ -#define MSM_SUBMITQUEUE_FLAGS (0) +#define MSM_SUBMITQUEUE_ALLOW_PREEMPT 0x00000001 +#define MSM_SUBMITQUEUE_FLAGS ( \ + MSM_SUBMITQUEUE_ALLOW_PREEMPT | \ + 0) /* * The submitqueue priority should be between 0 and MSM_PARAM_PRIORITIES-1, -- cgit v1.2.3 From 4f647a780f3606acbd2116248d51eadb4d865615 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Mon, 16 Sep 2024 02:17:10 -0700 Subject: bpf: __bpf_fastcall for bpf_get_smp_processor_id in uapi Since [1] kernel supports __bpf_fastcall attribute for helper function bpf_get_smp_processor_id(). Update uapi definition for this helper in order to have this attribute in the generated bpf_helper_defs.h [1] commit 91b7fbf3936f ("bpf, x86, riscv, arm: no_caller_saved_registers for bpf_get_smp_processor_id()") Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240916091712.2929279-3-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c6cd7c7aeeee..8ab4d8184b9d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1970,6 +1970,8 @@ union bpf_attr { * program. * Return * The SMP id of the processor running the program. + * Attributes + * __bpf_fastcall * * long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) * Description -- cgit v1.2.3 From 91e102e79740ae43ded050ccac71aa3371db4f33 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 1 Oct 2024 23:58:43 +0100 Subject: prctl: arch-agnostic prctl for shadow stack Three architectures (x86, aarch64, riscv) have announced support for shadow stacks with fairly similar functionality. While x86 is using arch_prctl() to control the functionality neither arm64 nor riscv uses that interface so this patch adds arch-agnostic prctl() support to get and set status of shadow stacks and lock the current configuation to prevent further changes, with support for turning on and off individual subfeatures so applications can limit their exposure to features that they do not need. The features are: - PR_SHADOW_STACK_ENABLE: Tracking and enforcement of shadow stacks, including allocation of a shadow stack if one is not already allocated. - PR_SHADOW_STACK_WRITE: Writes to specific addresses in the shadow stack. - PR_SHADOW_STACK_PUSH: Push additional values onto the shadow stack. These features are expected to be inherited by new threads and cleared on exec(), unknown features should be rejected for enable but accepted for locking (in order to allow for future proofing). This is based on a patch originally written by Deepak Gupta but modified fairly heavily, support for indirect landing pads is removed, additional modes added and the locking interface reworked. The set status prctl() is also reworked to just set flags, if setting/reading the shadow stack pointer is required this could be a separate prctl. Reviewed-by: Thiago Jung Bauermann Reviewed-by: Catalin Marinas Acked-by: Yury Khrustalev Signed-off-by: Mark Brown Reviewed-by: Deepak Gupta Link: https://lore.kernel.org/r/20241001-arm64-gcs-v13-4-222b78d87eee@kernel.org Signed-off-by: Catalin Marinas --- include/uapi/linux/prctl.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 35791791a879..557a3d2ac1d4 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -328,4 +328,26 @@ struct prctl_mm_map { # define PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC 0x10 /* Clear the aspect on exec */ # define PR_PPC_DEXCR_CTRL_MASK 0x1f +/* + * Get the current shadow stack configuration for the current thread, + * this will be the value configured via PR_SET_SHADOW_STACK_STATUS. + */ +#define PR_GET_SHADOW_STACK_STATUS 74 + +/* + * Set the current shadow stack configuration. Enabling the shadow + * stack will cause a shadow stack to be allocated for the thread. + */ +#define PR_SET_SHADOW_STACK_STATUS 75 +# define PR_SHADOW_STACK_ENABLE (1UL << 0) +# define PR_SHADOW_STACK_WRITE (1UL << 1) +# define PR_SHADOW_STACK_PUSH (1UL << 2) + +/* + * Prevent further changes to the specified shadow stack + * configuration. All bits may be locked via this call, including + * undefined bits. + */ +#define PR_LOCK_SHADOW_STACK_STATUS 76 + #endif /* _LINUX_PRCTL_H */ -- cgit v1.2.3 From 3630e82ab6bd2642f0fc03b574783ccf2fb0c955 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 1 Oct 2024 23:58:44 +0100 Subject: mman: Add map_shadow_stack() flags In preparation for adding arm64 GCS support make the map_shadow_stack() SHADOW_STACK_SET_TOKEN flag generic and add _SET_MARKER. The existing flag indicates that a token usable for stack switch should be added to the top of the newly mapped GCS region while the new flag indicates that a top of stack marker suitable for use by unwinders should be added above that. For arm64 the top of stack marker is all bits 0. Reviewed-by: Thiago Jung Bauermann Reviewed-by: Catalin Marinas Acked-by: Yury Khrustalev Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20241001-arm64-gcs-v13-5-222b78d87eee@kernel.org Signed-off-by: Catalin Marinas --- include/uapi/asm-generic/mman.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/asm-generic/mman.h b/include/uapi/asm-generic/mman.h index 57e8195d0b53..5e3d61ddbd8c 100644 --- a/include/uapi/asm-generic/mman.h +++ b/include/uapi/asm-generic/mman.h @@ -19,4 +19,8 @@ #define MCL_FUTURE 2 /* lock all future mappings */ #define MCL_ONFAULT 4 /* lock all pages that are faulted in */ +#define SHADOW_STACK_SET_TOKEN (1ULL << 0) /* Set up a restore token in the shadow stack */ +#define SHADOW_STACK_SET_MARKER (1ULL << 1) /* Set up a top of stack marker in the shadow stack */ + + #endif /* __ASM_GENERIC_MMAN_H */ -- cgit v1.2.3 From 7ec3b57cb29f8371bf12a725b6e8f75831a03f27 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 1 Oct 2024 23:59:06 +0100 Subject: arm64/ptrace: Expose GCS via ptrace and core files Provide a new register type NT_ARM_GCS reporting the current GCS mode and pointer for EL0. Due to the interactions with allocation and deallocation of Guarded Control Stacks we do not permit any changes to the GCS mode via ptrace, only GCSPR_EL0 may be changed. Reviewed-by: Thiago Jung Bauermann Reviewed-by: Catalin Marinas Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20241001-arm64-gcs-v13-27-222b78d87eee@kernel.org Signed-off-by: Catalin Marinas --- include/uapi/linux/elf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index b9935988da5c..9adc218fb6df 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -443,6 +443,7 @@ typedef struct elf64_shdr { #define NT_ARM_ZT 0x40d /* ARM SME ZT registers */ #define NT_ARM_FPMR 0x40e /* ARM floating point mode register */ #define NT_ARM_POE 0x40f /* ARM POE registers */ +#define NT_ARM_GCS 0x410 /* ARM GCS state */ #define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */ #define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */ #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */ -- cgit v1.2.3 From 4aecca4c76808f3736056d18ff510df80424bc9f Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Tue, 1 Oct 2024 05:57:14 -0700 Subject: net_tstamp: add SCM_TS_OPT_ID to provide OPT_ID in control message SOF_TIMESTAMPING_OPT_ID socket option flag gives a way to correlate TX timestamps and packets sent via socket. Unfortunately, there is no way to reliably predict socket timestamp ID value in case of error returned by sendmsg. For UDP sockets it's impossible because of lockless nature of UDP transmit, several threads may send packets in parallel. In case of RAW sockets MSG_MORE option makes things complicated. More details are in the conversation [1]. This patch adds new control message type to give user-space software an opportunity to control the mapping between packets and values by providing ID with each sendmsg for UDP sockets. The documentation is also added in this patch. [1] https://lore.kernel.org/netdev/CALCETrU0jB+kg0mhV6A8mrHfTE1D1pr1SD_B9Eaa9aDPfgHdtA@mail.gmail.com/ Reviewed-by: Willem de Bruijn Reviewed-by: Jason Xing Signed-off-by: Vadim Fedorenko Link: https://patch.msgid.link/20241001125716.2832769-2-vadfed@meta.com Signed-off-by: Jakub Kicinski --- include/uapi/asm-generic/socket.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 3b4e3e815602..deacfd6dd197 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -141,6 +141,8 @@ #define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF #define SO_DEVMEM_DONTNEED 80 +#define SCM_TS_OPT_ID 81 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) -- cgit v1.2.3 From f858cc9eed5b05cbe38d7ffd2787c21e3718eb7d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 3 Oct 2024 12:12:18 +0000 Subject: net: add IFLA_MAX_PACING_OFFLOAD_HORIZON device attribute Some network devices have the ability to offload EDT (Earliest Departure Time) which is the model used for TCP pacing and FQ packet scheduler. Some of them implement the timing wheel mechanism described in https://saeed.github.io/files/carousel-sigcomm17.pdf with an associated 'timing wheel horizon'. This patch adds dev->max_pacing_offload_horizon expressing this timing wheel horizon in nsec units. This is a read-only attribute. Unless a driver sets it, dev->max_pacing_offload_horizon is zero. v2: addressed Jakub feedback ( https://lore.kernel.org/netdev/20240930152304.472767-2-edumazet@google.com/T/#mf6294d714c41cc459962154cc2580ce3c9693663 ) v3: added yaml doc (also per Jakub feedback) Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20241003121219.2396589-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 6dc258993b17..506ba9c80e83 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -377,6 +377,7 @@ enum { IFLA_GSO_IPV4_MAX_SIZE, IFLA_GRO_IPV4_MAX_SIZE, IFLA_DPLL_PIN, + IFLA_MAX_PACING_OFFLOAD_HORIZON, __IFLA_MAX }; -- cgit v1.2.3 From f26080d47007df2ee90e65b7d390207ff3a588af Mon Sep 17 00:00:00 2001 From: Jeffrey Ji Date: Thu, 3 Oct 2024 12:12:19 +0000 Subject: net_sched: sch_fq: add the ability to offload pacing Some network devices have the ability to offload EDT (Earliest Departure Time) which is the model used for TCP pacing and FQ packet scheduler. Some of them implement the timing wheel mechanism described in https://saeed.github.io/files/carousel-sigcomm17.pdf with an associated 'timing wheel horizon'. This patchs adds to FQ packet scheduler TCA_FQ_OFFLOAD_HORIZON attribute. Its value is capped by the device max_pacing_offload_horizon, added in the prior patch. It allows FQ to let packets within pacing offload horizon to be delivered to the device, which will handle the needed delay without host involvement. Signed-off-by: Jeffrey Ji Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20241003121219.2396589-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/pkt_sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index a3cd0c2dc995..25a9a47001cd 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -836,6 +836,8 @@ enum { TCA_FQ_WEIGHTS, /* Weights for each band */ + TCA_FQ_OFFLOAD_HORIZON, /* dequeue paced packets within this horizon immediately (us units) */ + __TCA_FQ_MAX }; -- cgit v1.2.3 From 65c4c93caaf1a9fca2855942e338530967162d25 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 10 Sep 2024 16:30:12 +0200 Subject: crypto: sig - Introduce sig_alg backend Commit 6cb8815f41a9 ("crypto: sig - Add interface for sign/verify") began a transition of asymmetric sign/verify operations from crypto_akcipher to a new crypto_sig frontend. Internally, the crypto_sig frontend still uses akcipher_alg as backend, however: "The link between sig and akcipher is meant to be temporary. The plan is to create a new low-level API for sig and then migrate the signature code over to that from akcipher." https://lore.kernel.org/r/ZrG6w9wsb-iiLZIF@gondor.apana.org.au/ "having a separate alg for sig is definitely where we want to be since there is very little that the two types actually share." https://lore.kernel.org/r/ZrHlpz4qnre0zWJO@gondor.apana.org.au/ Take the next step of that migration and augment the crypto_sig frontend with a sig_alg backend to which all algorithms can be moved. During the migration, there will briefly be signature algorithms that are still based on crypto_akcipher, whilst others are already based on crypto_sig. Allow for that by building a fork into crypto_sig_*() API calls (i.e. crypto_sig_maxsize() and friends) such that one of the two backends is selected based on the transform's cra_type. Signed-off-by: Lukas Wunner Signed-off-by: Herbert Xu --- include/uapi/linux/cryptouser.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/cryptouser.h b/include/uapi/linux/cryptouser.h index 20a6c0fc149e..db05e0419972 100644 --- a/include/uapi/linux/cryptouser.h +++ b/include/uapi/linux/cryptouser.h @@ -64,6 +64,7 @@ enum crypto_attr_type_t { CRYPTOCFGA_STAT_AKCIPHER, /* No longer supported, do not use. */ CRYPTOCFGA_STAT_KPP, /* No longer supported, do not use. */ CRYPTOCFGA_STAT_ACOMP, /* No longer supported, do not use. */ + CRYPTOCFGA_REPORT_SIG, /* struct crypto_report_sig */ __CRYPTOCFGA_MAX #define CRYPTOCFGA_MAX (__CRYPTOCFGA_MAX - 1) @@ -207,6 +208,10 @@ struct crypto_report_acomp { char type[CRYPTO_MAX_NAME]; }; +struct crypto_report_sig { + char type[CRYPTO_MAX_NAME]; +}; + #define CRYPTO_REPORT_MAXSIZE (sizeof(struct crypto_user_alg) + \ sizeof(struct crypto_report_blkcipher)) -- cgit v1.2.3 From 5b553e06b3215fa97d222ebddc2bc964f1824c5b Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 10 Sep 2024 16:30:19 +0200 Subject: crypto: virtio - Drop sign/verify operations The virtio crypto driver exposes akcipher sign/verify operations in a user space ABI. This blocks removal of sign/verify from akcipher_alg. Herbert opines: "I would say that this is something that we can break. Breaking it is no different to running virtio on a host that does not support these algorithms. After all, a software implementation must always be present. I deliberately left akcipher out of crypto_user because the API is still in flux. We should not let virtio constrain ourselves." https://lore.kernel.org/all/ZtqoNAgcnXnrYhZZ@gondor.apana.org.au/ "I would remove virtio akcipher support in its entirety. This API was never meant to be exposed outside of the kernel." https://lore.kernel.org/all/Ztqql_gqgZiMW8zz@gondor.apana.org.au/ Drop sign/verify support from virtio crypto. There's no strong reason to also remove encrypt/decrypt support, so keep it. A key selling point of virtio crypto is to allow guest access to crypto accelerators on the host. So far the only akcipher algorithm supported by virtio crypto is RSA. Dropping sign/verify merely means that the PKCS#1 padding is now always generated or verified inside the guest, but the actual signature generation/verification (which is an RSA decrypt/encrypt operation) may still use an accelerator on the host. Generating or verifying the PKCS#1 padding is cheap, so a hardware accelerator won't be of much help there. Which begs the question whether virtio crypto support for sign/verify makes sense at all. It would make sense for the sign operation if the host has a security chip to store asymmetric private keys. But the kernel doesn't even have an asymmetric_key_subtype yet for hardware-based private keys. There's at least one rudimentary driver for such chips (atmel-ecc.c for ATECC508A), but it doesn't implement the sign operation. The kernel would first have to grow support for a hardware asymmetric_key_subtype and at least one driver implementing the sign operation before exposure to guests via virtio makes sense. Signed-off-by: Lukas Wunner Signed-off-by: Herbert Xu --- include/uapi/linux/virtio_crypto.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/virtio_crypto.h b/include/uapi/linux/virtio_crypto.h index 71a54a6849ca..2fccb64c9d6b 100644 --- a/include/uapi/linux/virtio_crypto.h +++ b/include/uapi/linux/virtio_crypto.h @@ -329,6 +329,7 @@ struct virtio_crypto_op_header { VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x00) #define VIRTIO_CRYPTO_AKCIPHER_DECRYPT \ VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x01) + /* akcipher sign/verify opcodes are deprecated */ #define VIRTIO_CRYPTO_AKCIPHER_SIGN \ VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x02) #define VIRTIO_CRYPTO_AKCIPHER_VERIFY \ -- cgit v1.2.3 From 4436df478860bb5da1864df2cd20f281a210f139 Mon Sep 17 00:00:00 2001 From: Erick Archer Date: Fri, 7 Jun 2024 18:19:12 +0200 Subject: batman-adv: Add flex array to struct batadv_tvlv_tt_data The "struct batadv_tvlv_tt_data" uses a dynamically sized set of trailing elements. Specifically, it uses an array of structures of type "batadv_tvlv_tt_vlan_data". So, use the preferred way in the kernel declaring a flexible array [1]. At the same time, prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). In this case, it is important to note that the attribute used is specifically __counted_by_be since variable "num_vlan" is of type __be16. The following change to the "batadv_tt_tvlv_ogm_handler_v1" function: - tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(tt_data + 1); - tt_change = (struct batadv_tvlv_tt_change *)(tt_vlan + num_vlan); + tt_change = (struct batadv_tvlv_tt_change *)((void *)tt_data + + flex_size); is intended to prevent the compiler from generating an "out-of-bounds" notification due to the __counted_by attribute. The compiler can do a pointer calculation using the vlan_data flexible array memory, or in other words, this may be calculated as an array offset, since it is the same as: &tt_data->vlan_data[num_vlan] Therefore, we go past the end of the array. In other "multiple trailing flexible array" situations, this has been solved by addressing from the base pointer, since the compiler either knows the full allocation size or it knows nothing about it (this case, since it came from a "void *" function argument). The order in which the structure batadv_tvlv_tt_data and the structure batadv_tvlv_tt_vlan_data are defined must be swap to avoid an incomplete type error. Also, avoid the open-coded arithmetic in memory allocator functions [2] using the "struct_size" macro and use the "flex_array_size" helper to clarify some calculations, when possible. Moreover, the new structure member also allow us to avoid the open-coded arithmetic on pointers in some situations. Take advantage of this. This code was detected with the help of Coccinelle, and audited and modified manually. Link: https://www.kernel.org/doc/html/next/process/deprecated.html#zero-length-and-one-element-arrays [1] Link: https://www.kernel.org/doc/html/next/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments [2] Reviewed-by: Kees Cook Signed-off-by: Erick Archer Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- include/uapi/linux/batadv_packet.h | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/batadv_packet.h b/include/uapi/linux/batadv_packet.h index 6e25753015df..439132a819ea 100644 --- a/include/uapi/linux/batadv_packet.h +++ b/include/uapi/linux/batadv_packet.h @@ -9,6 +9,7 @@ #include #include +#include #include /** @@ -592,19 +593,6 @@ struct batadv_tvlv_gateway_data { __be32 bandwidth_up; }; -/** - * struct batadv_tvlv_tt_data - tt data propagated through the tt tvlv container - * @flags: translation table flags (see batadv_tt_data_flags) - * @ttvn: translation table version number - * @num_vlan: number of announced VLANs. In the TVLV this struct is followed by - * one batadv_tvlv_tt_vlan_data object per announced vlan - */ -struct batadv_tvlv_tt_data { - __u8 flags; - __u8 ttvn; - __be16 num_vlan; -}; - /** * struct batadv_tvlv_tt_vlan_data - vlan specific tt data propagated through * the tt tvlv container @@ -618,6 +606,21 @@ struct batadv_tvlv_tt_vlan_data { __u16 reserved; }; +/** + * struct batadv_tvlv_tt_data - tt data propagated through the tt tvlv container + * @flags: translation table flags (see batadv_tt_data_flags) + * @ttvn: translation table version number + * @num_vlan: number of announced VLANs. In the TVLV this struct is followed by + * one batadv_tvlv_tt_vlan_data object per announced vlan + * @vlan_data: array of batadv_tvlv_tt_vlan_data objects + */ +struct batadv_tvlv_tt_data { + __u8 flags; + __u8 ttvn; + __be16 num_vlan; + struct batadv_tvlv_tt_vlan_data vlan_data[] __counted_by_be(num_vlan); +}; + /** * struct batadv_tvlv_tt_change - translation table diff data * @flags: status indicators concerning the non-mesh client (see -- cgit v1.2.3 From dad6c45cbd40b57db95c9d46e01ff6d302e86042 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Fri, 16 Feb 2024 11:41:16 -0500 Subject: drm/amdkfd: Output migrate end event if migrate failed If page migration failed, also output migrate end event to match with migrate start event, with failure error_code added to the end of the migrate message macro. This will not break uAPI because application uses old message macro sscanf drop and ignore the error_code. Output GPU page fault restore end event if migration failed. Signed-off-by: Philip Yang Reviewed-by: James Zhu Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 717307d6b5b7..fa9f9846b88e 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -609,6 +609,7 @@ struct kfd_ioctl_smi_events_args { * migrate_update: GPU page fault is recovered by 'M' for migrate, 'U' for update * rw: 'W' for write page fault, 'R' for read page fault * rescheduled: 'R' if the queue restore failed and rescheduled to try again + * error_code: migrate failure error code, 0 if no error */ #define KFD_EVENT_FMT_UPDATE_GPU_RESET(reset_seq_num, reset_cause)\ "%x %s\n", (reset_seq_num), (reset_cause) @@ -630,9 +631,9 @@ struct kfd_ioctl_smi_events_args { "%lld -%d @%lx(%lx) %x->%x %x:%x %d\n", (ns), (pid), (start), (size),\ (from), (to), (prefetch_loc), (preferred_loc), (migrate_trigger) -#define KFD_EVENT_FMT_MIGRATE_END(ns, pid, start, size, from, to, migrate_trigger)\ - "%lld -%d @%lx(%lx) %x->%x %d\n", (ns), (pid), (start), (size),\ - (from), (to), (migrate_trigger) +#define KFD_EVENT_FMT_MIGRATE_END(ns, pid, start, size, from, to, migrate_trigger, error_code) \ + "%lld -%d @%lx(%lx) %x->%x %d %d\n", (ns), (pid), (start), (size),\ + (from), (to), (migrate_trigger), (error_code) #define KFD_EVENT_FMT_QUEUE_EVICTION(ns, pid, node, evict_trigger)\ "%lld -%d %x %d\n", (ns), (pid), (node), (evict_trigger) -- cgit v1.2.3 From a3ab2d45b9887ee609cd3bea39f668236935774c Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Tue, 30 Jul 2024 15:33:23 -0400 Subject: drm/amdkfd: SMI report dropped event count Add new SMI event to report the dropped event count. When the event kfifo is full, drop count is not zero, or no enough space left to store the event message, increase drop count. After reading event out from kfifo, if event was dropped, drop_count is not zero, generate a dropped event record and reset drop count to zero. Signed-off-by: Philip Yang Reviewed-by: James Zhu Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index fa9f9846b88e..7afd66d45313 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -530,6 +530,7 @@ enum kfd_smi_event { KFD_SMI_EVENT_QUEUE_EVICTION = 9, KFD_SMI_EVENT_QUEUE_RESTORE = 10, KFD_SMI_EVENT_UNMAP_FROM_GPU = 11, + KFD_SMI_EVENT_DROPPED_EVENT = 12, /* * max event number, as a flag bit to get events from all processes, @@ -610,6 +611,7 @@ struct kfd_ioctl_smi_events_args { * rw: 'W' for write page fault, 'R' for read page fault * rescheduled: 'R' if the queue restore failed and rescheduled to try again * error_code: migrate failure error code, 0 if no error + * drop_count: how many events dropped when fifo is full */ #define KFD_EVENT_FMT_UPDATE_GPU_RESET(reset_seq_num, reset_cause)\ "%x %s\n", (reset_seq_num), (reset_cause) @@ -645,6 +647,10 @@ struct kfd_ioctl_smi_events_args { "%lld -%d @%lx(%lx) %x %d\n", (ns), (pid), (addr), (size),\ (node), (unmap_trigger) +#define KFD_EVENT_FMT_DROPPED_EVENT(ns, pid, drop_count)\ + "%lld -%d %d\n", (ns), (pid), (drop_count) + + /************************************************************************************************** * CRIU IOCTLs (Checkpoint Restore In Userspace) * -- cgit v1.2.3 From 83134ef4609388f6b9ca31a384f531155196c2a7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 4 Oct 2024 12:13:31 +0200 Subject: netkit: Add option for scrubbing skb meta data Jordan reported that when running Cilium with netkit in per-endpoint-routes mode, network policy misclassifies traffic. In this direct routing mode of Cilium which is used in case of GKE/EKS/AKS, the Pod's BPF program to enforce policy sits on the netkit primary device's egress side. The issue here is that in case of netkit's netkit_prep_forward(), it will clear meta data such as skb->mark and skb->priority before executing the BPF program. Thus, identity data stored in there from earlier BPF programs (e.g. from tcx ingress on the physical device) gets cleared instead of being made available for the primary's program to process. While for traffic egressing the Pod via the peer device this might be desired, this is different for the primary one where compared to tcx egress on the host veth this information would be available. To address this, add a new parameter for the device orchestration to allow control of skb->mark and skb->priority scrubbing, to make the two accessible from BPF (and eventually leave it up to the program to scrub). By default, the current behavior is retained. For netkit peer this also enables the use case where applications could cooperate/signal intent to the BPF program. Note that struct netkit has a 4 byte hole between policy and bundle which is used here, in other words, struct netkit's first cacheline content used in fast-path does not get moved around. Fixes: 35dfaad7188c ("netkit, bpf: Add bpf programmable net device") Reported-by: Jordan Rife Signed-off-by: Daniel Borkmann Cc: Nikolay Aleksandrov Link: https://github.com/cilium/cilium/issues/34042 Acked-by: Jakub Kicinski Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20241004101335.117711-1-daniel@iogearbox.net Signed-off-by: Martin KaFai Lau --- include/uapi/linux/if_link.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 6dc258993b17..2acc7687e017 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1292,6 +1292,19 @@ enum netkit_mode { NETKIT_L3, }; +/* NETKIT_SCRUB_NONE leaves clearing skb->{mark,priority} up to + * the BPF program if attached. This also means the latter can + * consume the two fields if they were populated earlier. + * + * NETKIT_SCRUB_DEFAULT zeroes skb->{mark,priority} fields before + * invoking the attached BPF program when the peer device resides + * in a different network namespace. This is the default behavior. + */ +enum netkit_scrub { + NETKIT_SCRUB_NONE, + NETKIT_SCRUB_DEFAULT, +}; + enum { IFLA_NETKIT_UNSPEC, IFLA_NETKIT_PEER_INFO, @@ -1299,6 +1312,8 @@ enum { IFLA_NETKIT_POLICY, IFLA_NETKIT_PEER_POLICY, IFLA_NETKIT_MODE, + IFLA_NETKIT_SCRUB, + IFLA_NETKIT_PEER_SCRUB, __IFLA_NETKIT_MAX, }; #define IFLA_NETKIT_MAX (__IFLA_NETKIT_MAX - 1) -- cgit v1.2.3 From 56c594d8df64e726e803652ee9f4ab08659d4574 Mon Sep 17 00:00:00 2001 From: Pierre-Eric Pelloux-Prayer Date: Thu, 3 Oct 2024 14:43:09 +0200 Subject: drm: add DRM_SET_CLIENT_NAME ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Giving the opportunity to userspace to associate a free-form name with a drm_file struct is helpful for tracking and debugging. This is similar to the existing DMA_BUF_SET_NAME ioctl. Access to client_name is protected by a mutex, and the 'clients' debugfs file has been updated to print it. Userspace MR to use this ioctl: https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/1428 If the string passed by userspace contains chars that would mess up output when it's going to be printed (in dmesg, fdinfo, etc), -EINVAL is returned. A 0-length string is a valid use, and clears the existing name. Reviewed-by: Tvrtko Ursulin Reviewed-by: Dmitry Osipenko Signed-off-by: Pierre-Eric Pelloux-Prayer Link: https://patchwork.freedesktop.org/patch/msgid/20241003124506.470931-2-pierre-eric.pelloux-prayer@amd.com Reviewed-by: Christian König Signed-off-by: Christian König --- include/uapi/drm/drm.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 16122819edfe..7fba37b94401 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -1024,6 +1024,13 @@ struct drm_crtc_queue_sequence { __u64 user_data; /* user data passed to event */ }; +#define DRM_CLIENT_NAME_MAX_LEN 64 +struct drm_set_client_name { + __u64 name_len; + __u64 name; +}; + + #if defined(__cplusplus) } #endif @@ -1288,6 +1295,16 @@ extern "C" { */ #define DRM_IOCTL_MODE_CLOSEFB DRM_IOWR(0xD0, struct drm_mode_closefb) +/** + * DRM_IOCTL_SET_CLIENT_NAME - Attach a name to a drm_file + * + * Having a name allows for easier tracking and debugging. + * The length of the name (without null ending char) must be + * <= DRM_CLIENT_NAME_MAX_LEN. + * The call will fail if the name contains whitespaces or non-printable chars. + */ +#define DRM_IOCTL_SET_CLIENT_NAME DRM_IOWR(0xD1, struct drm_set_client_name) + /* * Device specific ioctls should only be in their respective headers * The device specific ioctl range is from 0x40 to 0x9f. -- cgit v1.2.3 From 9ab440a9d0426cf7842240891cc457155db1a97e Mon Sep 17 00:00:00 2001 From: Shekhar Chauhan Date: Mon, 7 Oct 2024 08:41:44 -0700 Subject: drm/xe/ptl: L3bank mask is not available on the media GT On PTL platforms with media version 30.00, the fuse registers for reporting L3 bank availability to the GT just read out as ~0 and do not provide proper values. Xe does not use the L3 bank mask for anything internally; it only passes the mask through to userspace via the GT topology query. Since we don't have any way to get the real L3 bank mask, we don't want to pass garbage to userspace. Passing a zeroed mask or a copy of the primary GT's L3 bank mask would also be inaccurate and likely to cause confusion for userspace. The best approach is to simply not include L3 in the list of masks returned by the topology query in cases where we aren't able to provide a meaningful value. This won't change the behavior for any existing platforms (where we can always obtain L3 masks successfully for all GTs), it will only prevent us from mis-reporting bad information on upcoming platform(s). There's a good chance this will become a formal workaround in the future, but for now we don't have a lineage number so "no_media_l3" is used in place of a lineage as the OOB workaround descriptor. v2: - Re-calculate query size to properly match data returned. (Gustavo) - Update kerneldoc to clarify that the L3bank mask may not be included in the query results if the hardware doesn't make it available. (Gustavo) Cc: Matt Atwood Cc: Gustavo Sousa Signed-off-by: Shekhar Chauhan Co-developed-by: Matt Roper Signed-off-by: Matt Roper Reviewed-by: Jonathan Cavitt Reviewed-by: Gustavo Sousa Acked-by: Francois Dugast Link: https://patchwork.freedesktop.org/patch/msgid/20241007154143.2021124-2-matthew.d.roper@intel.com --- include/uapi/drm/xe_drm.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index b6fbe4988f2e..c4182e95a619 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -512,7 +512,9 @@ struct drm_xe_query_gt_list { * containing the following in mask: * ``DSS_COMPUTE ff ff ff ff 00 00 00 00`` * means 32 DSS are available for compute. - * - %DRM_XE_TOPO_L3_BANK - To query the mask of enabled L3 banks + * - %DRM_XE_TOPO_L3_BANK - To query the mask of enabled L3 banks. This type + * may be omitted if the driver is unable to query the mask from the + * hardware. * - %DRM_XE_TOPO_EU_PER_DSS - To query the mask of Execution Units (EU) * available per Dual Sub Slices (DSS). For example a query response * containing the following in mask: -- cgit v1.2.3 From a8f2cdd27d114ed6c3354a0e39502e6d56215804 Mon Sep 17 00:00:00 2001 From: Dmitry Perchanov Date: Mon, 26 Aug 2024 16:04:23 +0300 Subject: media: v4l: Add luma 16-bit interlaced pixel format The formats added by this patch are: V4L2_PIX_FMT_Y16I Interlaced lumina format primary use in RealSense Depth cameras with stereo stream for left and right image sensors. Signed-off-by: Dmitry Perchanov Reviewed-by: Laurent Pinchart Link: https://lore.kernel.org/r/568efbd75290e286b8ad9e7347b5f43745121020.camel@intel.com Signed-off-by: Laurent Pinchart Signed-off-by: Hans Verkuil --- include/uapi/linux/videodev2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 27239cb64065..21a8aa575ea3 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -798,6 +798,7 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_S5C_UYVY_JPG v4l2_fourcc('S', '5', 'C', 'I') /* S5C73M3 interleaved UYVY/JPEG */ #define V4L2_PIX_FMT_Y8I v4l2_fourcc('Y', '8', 'I', ' ') /* Greyscale 8-bit L/R interleaved */ #define V4L2_PIX_FMT_Y12I v4l2_fourcc('Y', '1', '2', 'I') /* Greyscale 12-bit L/R interleaved */ +#define V4L2_PIX_FMT_Y16I v4l2_fourcc('Y', '1', '6', 'I') /* Greyscale 16-bit L/R interleaved */ #define V4L2_PIX_FMT_Z16 v4l2_fourcc('Z', '1', '6', ' ') /* Depth data 16-bit */ #define V4L2_PIX_FMT_MT21C v4l2_fourcc('M', 'T', '2', '1') /* Mediatek compressed block mode */ #define V4L2_PIX_FMT_MM21 v4l2_fourcc('M', 'M', '2', '1') /* Mediatek 8-bit block mode, two non-contiguous planes */ -- cgit v1.2.3 From 20503272422693d793b84f88bf23fe4e955d3a33 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 6 Oct 2024 08:17:58 +0100 Subject: ptp: Add support for the AMZNC10C 'vmclock' device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The vmclock device addresses the problem of live migration with precision clocks. The tolerances of a hardware counter (e.g. TSC) are typically around ±50PPM. A guest will use NTP/PTP/PPS to discipline that counter against an external source of 'real' time, and track the precise frequency of the counter as it changes with environmental conditions. When a guest is live migrated, anything it knows about the frequency of the underlying counter becomes invalid. It may move from a host where the counter running at -50PPM of its nominal frequency, to a host where it runs at +50PPM. There will also be a step change in the value of the counter, as the correctness of its absolute value at migration is limited by the accuracy of the source and destination host's time synchronization. In its simplest form, the device merely advertises a 'disruption_marker' which indicates that the guest should throw away any NTP synchronization it thinks it has, and start again. Because the shared memory region can be exposed all the way to userspace through the /dev/vmclock0 node, applications can still use time from a fast vDSO 'system call', and check the disruption marker to be sure that their timestamp is indeed truthful. The structure also allows for the precise time, as known by the host, to be exposed directly to guests so that they don't have to wait for NTP to resync from scratch. The PTP driver consumes this information if present. Like the KVM PTP clock, this PTP driver can convert TSC-based cross timestamps into KVM clock values. Unlike the KVM PTP clock, it does so only when such is actually helpful. The values and fields are based on the nascent virtio-rtc specification, and the intent is that a version (hopefully precisely this version) of this structure will be included as an optional part of that spec. In the meantime, this driver supports the simple ACPI form of the device which is being shipped in certain commercial hypervisors (and submitted for inclusion in QEMU). Signed-off-by: David Woodhouse Acked-by: Richard Cochran Signed-off-by: David S. Miller --- include/uapi/linux/vmclock-abi.h | 182 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 182 insertions(+) create mode 100644 include/uapi/linux/vmclock-abi.h (limited to 'include/uapi') diff --git a/include/uapi/linux/vmclock-abi.h b/include/uapi/linux/vmclock-abi.h new file mode 100644 index 000000000000..2d99b29ac44a --- /dev/null +++ b/include/uapi/linux/vmclock-abi.h @@ -0,0 +1,182 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ + +/* + * This structure provides a vDSO-style clock to VM guests, exposing the + * relationship (or lack thereof) between the CPU clock (TSC, timebase, arch + * counter, etc.) and real time. It is designed to address the problem of + * live migration, which other clock enlightenments do not. + * + * When a guest is live migrated, this affects the clock in two ways. + * + * First, even between identical hosts the actual frequency of the underlying + * counter will change within the tolerances of its specification (typically + * ±50PPM, or 4 seconds a day). This frequency also varies over time on the + * same host, but can be tracked by NTP as it generally varies slowly. With + * live migration there is a step change in the frequency, with no warning. + * + * Second, there may be a step change in the value of the counter itself, as + * its accuracy is limited by the precision of the NTP synchronization on the + * source and destination hosts. + * + * So any calibration (NTP, PTP, etc.) which the guest has done on the source + * host before migration is invalid, and needs to be redone on the new host. + * + * In its most basic mode, this structure provides only an indication to the + * guest that live migration has occurred. This allows the guest to know that + * its clock is invalid and take remedial action. For applications that need + * reliable accurate timestamps (e.g. distributed databases), the structure + * can be mapped all the way to userspace. This allows the application to see + * directly for itself that the clock is disrupted and take appropriate + * action, even when using a vDSO-style method to get the time instead of a + * system call. + * + * In its more advanced mode. this structure can also be used to expose the + * precise relationship of the CPU counter to real time, as calibrated by the + * host. This means that userspace applications can have accurate time + * immediately after live migration, rather than having to pause operations + * and wait for NTP to recover. This mode does, of course, rely on the + * counter being reliable and consistent across CPUs. + * + * Note that this must be true UTC, never with smeared leap seconds. If a + * guest wishes to construct a smeared clock, it can do so. Presenting a + * smeared clock through this interface would be problematic because it + * actually messes with the apparent counter *period*. A linear smearing + * of 1 ms per second would effectively tweak the counter period by 1000PPM + * at the start/end of the smearing period, while a sinusoidal smear would + * basically be impossible to represent. + * + * This structure is offered with the intent that it be adopted into the + * nascent virtio-rtc standard, as a virtio-rtc that does not address the live + * migration problem seems a little less than fit for purpose. For that + * reason, certain fields use precisely the same numeric definitions as in + * the virtio-rtc proposal. The structure can also be exposed through an ACPI + * device with the CID "VMCLOCK", modelled on the "VMGENID" device except for + * the fact that it uses a real _CRS to convey the address of the structure + * (which should be a full page, to allow for mapping directly to userspace). + */ + +#ifndef __VMCLOCK_ABI_H__ +#define __VMCLOCK_ABI_H__ + +#include + +struct vmclock_abi { + /* CONSTANT FIELDS */ + __le32 magic; +#define VMCLOCK_MAGIC 0x4b4c4356 /* "VCLK" */ + __le32 size; /* Size of region containing this structure */ + __le16 version; /* 1 */ + __u8 counter_id; /* Matches VIRTIO_RTC_COUNTER_xxx except INVALID */ +#define VMCLOCK_COUNTER_ARM_VCNT 0 +#define VMCLOCK_COUNTER_X86_TSC 1 +#define VMCLOCK_COUNTER_INVALID 0xff + __u8 time_type; /* Matches VIRTIO_RTC_TYPE_xxx */ +#define VMCLOCK_TIME_UTC 0 /* Since 1970-01-01 00:00:00z */ +#define VMCLOCK_TIME_TAI 1 /* Since 1970-01-01 00:00:00z */ +#define VMCLOCK_TIME_MONOTONIC 2 /* Since undefined epoch */ +#define VMCLOCK_TIME_INVALID_SMEARED 3 /* Not supported */ +#define VMCLOCK_TIME_INVALID_MAYBE_SMEARED 4 /* Not supported */ + + /* NON-CONSTANT FIELDS PROTECTED BY SEQCOUNT LOCK */ + __le32 seq_count; /* Low bit means an update is in progress */ + /* + * This field changes to another non-repeating value when the CPU + * counter is disrupted, for example on live migration. This lets + * the guest know that it should discard any calibration it has + * performed of the counter against external sources (NTP/PTP/etc.). + */ + __le64 disruption_marker; + __le64 flags; + /* Indicates that the tai_offset_sec field is valid */ +#define VMCLOCK_FLAG_TAI_OFFSET_VALID (1 << 0) + /* + * Optionally used to notify guests of pending maintenance events. + * A guest which provides latency-sensitive services may wish to + * remove itself from service if an event is coming up. Two flags + * indicate the approximate imminence of the event. + */ +#define VMCLOCK_FLAG_DISRUPTION_SOON (1 << 1) /* About a day */ +#define VMCLOCK_FLAG_DISRUPTION_IMMINENT (1 << 2) /* About an hour */ +#define VMCLOCK_FLAG_PERIOD_ESTERROR_VALID (1 << 3) +#define VMCLOCK_FLAG_PERIOD_MAXERROR_VALID (1 << 4) +#define VMCLOCK_FLAG_TIME_ESTERROR_VALID (1 << 5) +#define VMCLOCK_FLAG_TIME_MAXERROR_VALID (1 << 6) + /* + * If the MONOTONIC flag is set then (other than leap seconds) it is + * guaranteed that the time calculated according this structure at + * any given moment shall never appear to be later than the time + * calculated via the structure at any *later* moment. + * + * In particular, a timestamp based on a counter reading taken + * immediately after setting the low bit of seq_count (and the + * associated memory barrier), using the previously-valid time and + * period fields, shall never be later than a timestamp based on + * a counter reading taken immediately before *clearing* the low + * bit again after the update, using the about-to-be-valid fields. + */ +#define VMCLOCK_FLAG_TIME_MONOTONIC (1 << 7) + + __u8 pad[2]; + __u8 clock_status; +#define VMCLOCK_STATUS_UNKNOWN 0 +#define VMCLOCK_STATUS_INITIALIZING 1 +#define VMCLOCK_STATUS_SYNCHRONIZED 2 +#define VMCLOCK_STATUS_FREERUNNING 3 +#define VMCLOCK_STATUS_UNRELIABLE 4 + + /* + * The time exposed through this device is never smeared. This field + * corresponds to the 'subtype' field in virtio-rtc, which indicates + * the smearing method. However in this case it provides a *hint* to + * the guest operating system, such that *if* the guest OS wants to + * provide its users with an alternative clock which does not follow + * UTC, it may do so in a fashion consistent with the other systems + * in the nearby environment. + */ + __u8 leap_second_smearing_hint; /* Matches VIRTIO_RTC_SUBTYPE_xxx */ +#define VMCLOCK_SMEARING_STRICT 0 +#define VMCLOCK_SMEARING_NOON_LINEAR 1 +#define VMCLOCK_SMEARING_UTC_SLS 2 + __le16 tai_offset_sec; /* Actually two's complement signed */ + __u8 leap_indicator; + /* + * This field is based on the VIRTIO_RTC_LEAP_xxx values as defined + * in the current draft of virtio-rtc, but since smearing cannot be + * used with the shared memory device, some values are not used. + * + * The _POST_POS and _POST_NEG values allow the guest to perform + * its own smearing during the day or so after a leap second when + * such smearing may need to continue being applied for a leap + * second which is now theoretically "historical". + */ +#define VMCLOCK_LEAP_NONE 0x00 /* No known nearby leap second */ +#define VMCLOCK_LEAP_PRE_POS 0x01 /* Positive leap second at EOM */ +#define VMCLOCK_LEAP_PRE_NEG 0x02 /* Negative leap second at EOM */ +#define VMCLOCK_LEAP_POS 0x03 /* Set during 23:59:60 second */ +#define VMCLOCK_LEAP_POST_POS 0x04 +#define VMCLOCK_LEAP_POST_NEG 0x05 + + /* Bit shift for counter_period_frac_sec and its error rate */ + __u8 counter_period_shift; + /* + * Paired values of counter and UTC at a given point in time. + */ + __le64 counter_value; + /* + * Counter period, and error margin of same. The unit of these + * fields is 1/2^(64 + counter_period_shift) of a second. + */ + __le64 counter_period_frac_sec; + __le64 counter_period_esterror_rate_frac_sec; + __le64 counter_period_maxerror_rate_frac_sec; + + /* + * Time according to time_type field above. + */ + __le64 time_sec; /* Seconds since time_type epoch */ + __le64 time_frac_sec; /* Units of 1/2^64 of a second */ + __le64 time_esterror_nanosec; + __le64 time_maxerror_nanosec; +}; + +#endif /* __VMCLOCK_ABI_H__ */ -- cgit v1.2.3 From 80c549cd1ab0241a7af262690a0ff9991fc74ec5 Mon Sep 17 00:00:00 2001 From: Alexander Zubkov Date: Tue, 8 Oct 2024 18:27:57 +0200 Subject: Fix misspelling of "accept*" in net Several files have "accept*" misspelled as "accpet*" in the comments. Fix all such occurrences. Signed-off-by: Alexander Zubkov Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241008162756.22618-2-green@qrator.net Signed-off-by: Jakub Kicinski --- include/uapi/linux/udp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h index 1a0fe8b151fb..d85d671deed3 100644 --- a/include/uapi/linux/udp.h +++ b/include/uapi/linux/udp.h @@ -31,7 +31,7 @@ struct udphdr { #define UDP_CORK 1 /* Never send partially complete segments */ #define UDP_ENCAP 100 /* Set the socket to accept encapsulated packets */ #define UDP_NO_CHECK6_TX 101 /* Disable sending checksum for UDP6X */ -#define UDP_NO_CHECK6_RX 102 /* Disable accpeting checksum for UDP6 */ +#define UDP_NO_CHECK6_RX 102 /* Disable accepting checksum for UDP6 */ #define UDP_SEGMENT 103 /* Set GSO segmentation size */ #define UDP_GRO 104 /* This socket can receive UDP GRO packets */ -- cgit v1.2.3 From 04e65df94b3112a1b319b6deb5bab83fd740bc7d Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 9 Oct 2024 10:09:48 +0200 Subject: netlink: spec: add shaper YAML spec Define the user-space visible interface to query, configure and delete network shapers via yaml definition. Add dummy implementations for the relevant NL callbacks. set() and delete() operations touch a single shaper creating/updating or deleting it. The group() operation creates a shaper's group, nesting multiple input shapers under the specified output shaper. Reviewed-by: Jiri Pirko Signed-off-by: Paolo Abeni Link: https://patch.msgid.link/7a33a1ff370bdbcd0cd3f909575c912cd56f41da.1728460186.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/net_shaper.h | 78 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 include/uapi/linux/net_shaper.h (limited to 'include/uapi') diff --git a/include/uapi/linux/net_shaper.h b/include/uapi/linux/net_shaper.h new file mode 100644 index 000000000000..9e3fa63618ee --- /dev/null +++ b/include/uapi/linux/net_shaper.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/net_shaper.yaml */ +/* YNL-GEN uapi header */ + +#ifndef _UAPI_LINUX_NET_SHAPER_H +#define _UAPI_LINUX_NET_SHAPER_H + +#define NET_SHAPER_FAMILY_NAME "net-shaper" +#define NET_SHAPER_FAMILY_VERSION 1 + +/** + * enum net_shaper_scope - Defines the shaper @id interpretation. + * @NET_SHAPER_SCOPE_UNSPEC: The scope is not specified. + * @NET_SHAPER_SCOPE_NETDEV: The main shaper for the given network device. + * @NET_SHAPER_SCOPE_QUEUE: The shaper is attached to the given device queue, + * the @id represents the queue number. + * @NET_SHAPER_SCOPE_NODE: The shaper allows grouping of queues or other node + * shapers; can be nested in either @netdev shapers or other @node shapers, + * allowing placement in any location of the scheduling tree, except leaves + * and root. + */ +enum net_shaper_scope { + NET_SHAPER_SCOPE_UNSPEC, + NET_SHAPER_SCOPE_NETDEV, + NET_SHAPER_SCOPE_QUEUE, + NET_SHAPER_SCOPE_NODE, + + /* private: */ + __NET_SHAPER_SCOPE_MAX, + NET_SHAPER_SCOPE_MAX = (__NET_SHAPER_SCOPE_MAX - 1) +}; + +/** + * enum net_shaper_metric - Different metric supported by the shaper. + * @NET_SHAPER_METRIC_BPS: Shaper operates on a bits per second basis. + * @NET_SHAPER_METRIC_PPS: Shaper operates on a packets per second basis. + */ +enum net_shaper_metric { + NET_SHAPER_METRIC_BPS, + NET_SHAPER_METRIC_PPS, +}; + +enum { + NET_SHAPER_A_HANDLE = 1, + NET_SHAPER_A_METRIC, + NET_SHAPER_A_BW_MIN, + NET_SHAPER_A_BW_MAX, + NET_SHAPER_A_BURST, + NET_SHAPER_A_PRIORITY, + NET_SHAPER_A_WEIGHT, + NET_SHAPER_A_IFINDEX, + NET_SHAPER_A_PARENT, + NET_SHAPER_A_LEAVES, + + __NET_SHAPER_A_MAX, + NET_SHAPER_A_MAX = (__NET_SHAPER_A_MAX - 1) +}; + +enum { + NET_SHAPER_A_HANDLE_SCOPE = 1, + NET_SHAPER_A_HANDLE_ID, + + __NET_SHAPER_A_HANDLE_MAX, + NET_SHAPER_A_HANDLE_MAX = (__NET_SHAPER_A_HANDLE_MAX - 1) +}; + +enum { + NET_SHAPER_CMD_GET = 1, + NET_SHAPER_CMD_SET, + NET_SHAPER_CMD_DELETE, + NET_SHAPER_CMD_GROUP, + + __NET_SHAPER_CMD_MAX, + NET_SHAPER_CMD_MAX = (__NET_SHAPER_CMD_MAX - 1) +}; + +#endif /* _UAPI_LINUX_NET_SHAPER_H */ -- cgit v1.2.3 From 14bba9285aedefb99647d716b0f61bf32081e387 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 9 Oct 2024 10:09:54 +0200 Subject: netlink: spec: add shaper introspection support Allow the user-space to fine-grain query the shaping features supported by the NIC on each domain. Reviewed-by: Jiri Pirko Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni Link: https://patch.msgid.link/3ddd10e450e3fe7d4b944c0d0b886d4483529ee6.1728460186.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/net_shaper.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/net_shaper.h b/include/uapi/linux/net_shaper.h index 9e3fa63618ee..d8834b59f7d7 100644 --- a/include/uapi/linux/net_shaper.h +++ b/include/uapi/linux/net_shaper.h @@ -65,11 +65,28 @@ enum { NET_SHAPER_A_HANDLE_MAX = (__NET_SHAPER_A_HANDLE_MAX - 1) }; +enum { + NET_SHAPER_A_CAPS_IFINDEX = 1, + NET_SHAPER_A_CAPS_SCOPE, + NET_SHAPER_A_CAPS_SUPPORT_METRIC_BPS, + NET_SHAPER_A_CAPS_SUPPORT_METRIC_PPS, + NET_SHAPER_A_CAPS_SUPPORT_NESTING, + NET_SHAPER_A_CAPS_SUPPORT_BW_MIN, + NET_SHAPER_A_CAPS_SUPPORT_BW_MAX, + NET_SHAPER_A_CAPS_SUPPORT_BURST, + NET_SHAPER_A_CAPS_SUPPORT_PRIORITY, + NET_SHAPER_A_CAPS_SUPPORT_WEIGHT, + + __NET_SHAPER_A_CAPS_MAX, + NET_SHAPER_A_CAPS_MAX = (__NET_SHAPER_A_CAPS_MAX - 1) +}; + enum { NET_SHAPER_CMD_GET = 1, NET_SHAPER_CMD_SET, NET_SHAPER_CMD_DELETE, NET_SHAPER_CMD_GROUP, + NET_SHAPER_CMD_CAP_GET, __NET_SHAPER_CMD_MAX, NET_SHAPER_CMD_MAX = (__NET_SHAPER_CMD_MAX - 1) -- cgit v1.2.3 From 5bd48a3a14df4b3ee1be0757efcc0f40d4f57b35 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Thu, 10 Oct 2024 04:56:52 +0100 Subject: bpf: fix argument type in bpf_loop documentation The `index` argument to bpf_loop() is threaded as an u64. This lead in a subtle verifier denial where clang cloned the argument in another register[1]. [1] https://github.com/systemd/systemd/pull/34650#issuecomment-2401092895 Signed-off-by: Matteo Croce Link: https://lore.kernel.org/r/20241010035652.17830-1-technoboy85@gmail.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 8ab4d8184b9d..874af0186fe8 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5371,7 +5371,7 @@ union bpf_attr { * Currently, the **flags** must be 0. Currently, nr_loops is * limited to 1 << 23 (~8 million) loops. * - * long (\*callback_fn)(u32 index, void \*ctx); + * long (\*callback_fn)(u64 index, void \*ctx); * * where **index** is the current index in the loop. The index * is zero-indexed. -- cgit v1.2.3 From c6ca31981b545ad3081007b6aa88b6aab1b0cece Mon Sep 17 00:00:00 2001 From: Martin Kelly Date: Thu, 10 Oct 2024 12:33:01 -0700 Subject: bpf: Update bpf_override_return() comment The documentation says CONFIG_FUNCTION_ERROR_INJECTION is supported only on x86. This was presumably true at the time of writing, but it's now supported on many other architectures too. Drop this statement, since it's not correct anymore and it fits better in other documentation anyway. Signed-off-by: Martin Kelly Link: https://lore.kernel.org/r/20241010193301.995909-1-martin.kelly@crowdstrike.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 874af0186fe8..627c4195f04f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3103,10 +3103,6 @@ union bpf_attr { * with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration * option, and in this case it only works on functions tagged with * **ALLOW_ERROR_INJECTION** in the kernel code. - * - * Also, the helper is only available for the architectures having - * the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing, - * x86 architecture is the only one to support this feature. * Return * 0 * -- cgit v1.2.3 From 445936f9e258eca624c8239056bd8cd6e853b3fd Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 23 Sep 2024 11:59:57 +0200 Subject: thermal: core: Add user thresholds support The user thresholds mechanism is a way to have the userspace to tell the thermal framework to send a notification when a temperature limit is crossed. There is no id, no hysteresis, just the temperature and the direction of the limit crossing. That means we can be notified when a threshold is crossed the way up only, or the way down only or both ways. That allows to create hysteresis values if it is needed. A threshold can be added, deleted or flushed. The latter means all thresholds belonging to a thermal zone will be deleted. When a threshold is added: - if the same threshold (temperature and direction) exists, an error is returned - if a threshold is specified with the same temperature but a different direction, the specified direction is added - if there is no threshold with the same temperature then it is created When a threshold is deleted: - if the same threshold (temperature and direction) exists, it is deleted - if a threshold is specified with the same temperature but a different direction, the specified direction is removed - if there is no threshold with the same temperature, then an error is returned When the threshold are flushed: - All thresholds related to a thermal zone are deleted When a threshold is crossed: - the userspace does not need to know which threshold(s) have been crossed, it will be notified with the current temperature and the previous temperature - if multiple thresholds have been crossed between two updates only one notification will be send to the userspace, it is pointless to send a notification per thresholds crossed as the userspace can handle that easily when it has the temperature delta information Signed-off-by: Daniel Lezcano Link: https://patch.msgid.link/20240923100005.2532430-2-daniel.lezcano@linaro.org [ rjw: Subject edit, use BIT(0) and BIT(1) in symbol definitions ] Signed-off-by: Rafael J. Wysocki --- include/uapi/linux/thermal.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/thermal.h b/include/uapi/linux/thermal.h index fc78bf3aead7..2e6f60a36173 100644 --- a/include/uapi/linux/thermal.h +++ b/include/uapi/linux/thermal.h @@ -3,6 +3,8 @@ #define _UAPI_LINUX_THERMAL_H #define THERMAL_NAME_LENGTH 20 +#define THERMAL_THRESHOLD_WAY_UP BIT(0) +#define THERMAL_THRESHOLD_WAY_DOWN BIT(1) enum thermal_device_mode { THERMAL_DEVICE_DISABLED = 0, -- cgit v1.2.3 From e38501cee5364aeb3bd265b484a8e47baa6634aa Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Mon, 30 Sep 2024 21:53:03 +0200 Subject: accel/ivpu: Stop using hardcoded DRIVER_DATE Hardcoded driver date is useless, so use kernel version as a driver date to make identifying .ko file easier. Also allow to pass DRIVER_DATE on build time to allow versioning the driver in case it is built out of the tree. Reviewed-by: Karol Wachowski Link: https://patchwork.freedesktop.org/patch/msgid/20240930195322.461209-13-jacek.lawrynowicz@linux.intel.com Signed-off-by: Jacek Lawrynowicz --- include/uapi/drm/ivpu_accel.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h index 084fb529e1e9..234664d35250 100644 --- a/include/uapi/drm/ivpu_accel.h +++ b/include/uapi/drm/ivpu_accel.h @@ -12,9 +12,6 @@ extern "C" { #endif -#define DRM_IVPU_DRIVER_MAJOR 1 -#define DRM_IVPU_DRIVER_MINOR 0 - #define DRM_IVPU_GET_PARAM 0x00 #define DRM_IVPU_SET_PARAM 0x01 #define DRM_IVPU_BO_CREATE 0x02 -- cgit v1.2.3 From 06f5531958dd5decaabb21c6fa1da3dcaf8dfc24 Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Mon, 26 Aug 2024 17:24:09 +0000 Subject: media: videodev2: Add flag to unconditionally enumerate pixel formats When the index is ORed with V4L2_FMTDESC_FLAG_ENUM_ALL the driver clears the flag and enumerate all the possible formats, ignoring any limitations from the current configuration. Drivers which do not support this flag yet always return an EINVAL. Signed-off-by: Benjamin Gaignard Signed-off-by: Hans Verkuil [hverkuil: improved doc when the new flag is not supported by the driver] --- include/uapi/linux/videodev2.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 21a8aa575ea3..ded023edac70 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -907,6 +907,9 @@ struct v4l2_fmtdesc { #define V4L2_FMT_FLAG_CSC_QUANTIZATION 0x0100 #define V4L2_FMT_FLAG_META_LINE_BASED 0x0200 +/* Format description flag, to be ORed with the index */ +#define V4L2_FMTDESC_FLAG_ENUM_ALL 0x80000000 + /* Frame Size and frame rate enumeration */ /* * F R A M E S I Z E E N U M E R A T I O N -- cgit v1.2.3 From 516010460011ae74ac3b7383cf90ed27e2711cd6 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Fri, 11 Oct 2024 18:44:57 +0000 Subject: netdev-genl: Dump napi_defer_hard_irqs Support dumping defer_hard_irqs for a NAPI ID. Signed-off-by: Joe Damato Reviewed-by: Eric Dumazet Reviewed-by: Jakub Kicinski Link: https://patch.msgid.link/20241011184527.16393-3-jdamato@fastly.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/netdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 7c308f04e7a0..13dc0b027e86 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -122,6 +122,7 @@ enum { NETDEV_A_NAPI_ID, NETDEV_A_NAPI_IRQ, NETDEV_A_NAPI_PID, + NETDEV_A_NAPI_DEFER_HARD_IRQS, __NETDEV_A_NAPI_MAX, NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) -- cgit v1.2.3 From 0137891e74576f77a7901718dc0ce08ca074ae74 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Fri, 11 Oct 2024 18:44:59 +0000 Subject: netdev-genl: Dump gro_flush_timeout Support dumping gro_flush_timeout for a NAPI ID. Signed-off-by: Joe Damato Reviewed-by: Jakub Kicinski Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241011184527.16393-5-jdamato@fastly.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/netdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 13dc0b027e86..cacd33359c76 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -123,6 +123,7 @@ enum { NETDEV_A_NAPI_IRQ, NETDEV_A_NAPI_PID, NETDEV_A_NAPI_DEFER_HARD_IRQS, + NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT, __NETDEV_A_NAPI_MAX, NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) -- cgit v1.2.3 From 1287c1ae0fc227e5acef11a539eb4e75646e31c7 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Fri, 11 Oct 2024 18:45:01 +0000 Subject: netdev-genl: Support setting per-NAPI config values Add support to set per-NAPI defer_hard_irqs and gro_flush_timeout. Signed-off-by: Joe Damato Reviewed-by: Jakub Kicinski Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241011184527.16393-7-jdamato@fastly.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/netdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index cacd33359c76..e3ebb49f60d2 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -201,6 +201,7 @@ enum { NETDEV_CMD_NAPI_GET, NETDEV_CMD_QSTATS_GET, NETDEV_CMD_BIND_RX, + NETDEV_CMD_NAPI_SET, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) -- cgit v1.2.3 From 6390834c6f9b2c5e33f52f34579efa0d0df073db Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Thu, 3 Oct 2024 13:31:10 +0300 Subject: media: uapi: Add meta formats for PiSP FE config and stats Add two meta formats for PiSP FE: V4L2_META_FMT_RPI_FE_CFG and V4L2_META_FMT_RPI_FE_STATS. The former is used to provide configuration for the FE and the latter is used to read the statistics from the FE. Signed-off-by: Tomi Valkeinen Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index ded023edac70..e7c4dce39007 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -860,6 +860,8 @@ struct v4l2_pix_format { /* Vendor specific - used for RaspberryPi PiSP */ #define V4L2_META_FMT_RPI_BE_CFG v4l2_fourcc('R', 'P', 'B', 'C') /* PiSP BE configuration */ +#define V4L2_META_FMT_RPI_FE_CFG v4l2_fourcc('R', 'P', 'F', 'C') /* PiSP FE configuration */ +#define V4L2_META_FMT_RPI_FE_STATS v4l2_fourcc('R', 'P', 'F', 'S') /* PiSP FE stats */ #ifdef __KERNEL__ /* -- cgit v1.2.3 From 6edb685abb2af445773876a326292b989dcb3c9f Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Thu, 3 Oct 2024 13:31:12 +0300 Subject: media: raspberrypi: Add support for RP1-CFE Add support for Raspberry Pi CFE. The CFE is a hardware block that contains: - MIPI D-PHY - MIPI CSI-2 receiver - Front End ISP (FE) The driver has been upported from the Raspberry Pi kernel commit 88a681df9623 ("ARM: dts: bcm2712-rpi: Add i2c_pins labels"). Co-developed-by: Naushir Patuck Signed-off-by: Naushir Patuck Signed-off-by: Tomi Valkeinen Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- .../uapi/linux/media/raspberrypi/pisp_fe_config.h | 273 +++++++++++++++++++++ .../linux/media/raspberrypi/pisp_fe_statistics.h | 64 +++++ 2 files changed, 337 insertions(+) create mode 100644 include/uapi/linux/media/raspberrypi/pisp_fe_config.h create mode 100644 include/uapi/linux/media/raspberrypi/pisp_fe_statistics.h (limited to 'include/uapi') diff --git a/include/uapi/linux/media/raspberrypi/pisp_fe_config.h b/include/uapi/linux/media/raspberrypi/pisp_fe_config.h new file mode 100644 index 000000000000..77237460a3b5 --- /dev/null +++ b/include/uapi/linux/media/raspberrypi/pisp_fe_config.h @@ -0,0 +1,273 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * RP1 PiSP Front End Driver Configuration structures + * + * Copyright (C) 2021 - Raspberry Pi Ltd. + * + */ +#ifndef _UAPI_PISP_FE_CONFIG_ +#define _UAPI_PISP_FE_CONFIG_ + +#include + +#include "pisp_common.h" +#include "pisp_fe_statistics.h" + +#define PISP_FE_NUM_OUTPUTS 2 + +enum pisp_fe_enable { + PISP_FE_ENABLE_INPUT = 0x000001, + PISP_FE_ENABLE_DECOMPRESS = 0x000002, + PISP_FE_ENABLE_DECOMPAND = 0x000004, + PISP_FE_ENABLE_BLA = 0x000008, + PISP_FE_ENABLE_DPC = 0x000010, + PISP_FE_ENABLE_STATS_CROP = 0x000020, + PISP_FE_ENABLE_DECIMATE = 0x000040, + PISP_FE_ENABLE_BLC = 0x000080, + PISP_FE_ENABLE_CDAF_STATS = 0x000100, + PISP_FE_ENABLE_AWB_STATS = 0x000200, + PISP_FE_ENABLE_RGBY = 0x000400, + PISP_FE_ENABLE_LSC = 0x000800, + PISP_FE_ENABLE_AGC_STATS = 0x001000, + PISP_FE_ENABLE_CROP0 = 0x010000, + PISP_FE_ENABLE_DOWNSCALE0 = 0x020000, + PISP_FE_ENABLE_COMPRESS0 = 0x040000, + PISP_FE_ENABLE_OUTPUT0 = 0x080000, + PISP_FE_ENABLE_CROP1 = 0x100000, + PISP_FE_ENABLE_DOWNSCALE1 = 0x200000, + PISP_FE_ENABLE_COMPRESS1 = 0x400000, + PISP_FE_ENABLE_OUTPUT1 = 0x800000 +}; + +#define PISP_FE_ENABLE_CROP(i) (PISP_FE_ENABLE_CROP0 << (4 * (i))) +#define PISP_FE_ENABLE_DOWNSCALE(i) (PISP_FE_ENABLE_DOWNSCALE0 << (4 * (i))) +#define PISP_FE_ENABLE_COMPRESS(i) (PISP_FE_ENABLE_COMPRESS0 << (4 * (i))) +#define PISP_FE_ENABLE_OUTPUT(i) (PISP_FE_ENABLE_OUTPUT0 << (4 * (i))) + +/* + * We use the enable flags to show when blocks are "dirty", but we need some + * extra ones too. + */ +enum pisp_fe_dirty { + PISP_FE_DIRTY_GLOBAL = 0x0001, + PISP_FE_DIRTY_FLOATING = 0x0002, + PISP_FE_DIRTY_OUTPUT_AXI = 0x0004 +}; + +struct pisp_fe_global_config { + __u32 enables; + __u8 bayer_order; + __u8 pad[3]; +} __attribute__((packed)); + +struct pisp_fe_input_axi_config { + /* burst length minus one, in the range 0..15; OR'd with flags */ + __u8 maxlen_flags; + /* { prot[2:0], cache[3:0] } fields */ + __u8 cache_prot; + /* QoS (only 4 LS bits are used) */ + __u16 qos; +} __attribute__((packed)); + +struct pisp_fe_output_axi_config { + /* burst length minus one, in the range 0..15; OR'd with flags */ + __u8 maxlen_flags; + /* { prot[2:0], cache[3:0] } fields */ + __u8 cache_prot; + /* QoS (4 bitfields of 4 bits each for different panic levels) */ + __u16 qos; + /* For Panic mode: Output FIFO panic threshold */ + __u16 thresh; + /* For Panic mode: Output FIFO statistics throttle threshold */ + __u16 throttle; +} __attribute__((packed)); + +struct pisp_fe_input_config { + __u8 streaming; + __u8 pad[3]; + struct pisp_image_format_config format; + struct pisp_fe_input_axi_config axi; + /* Extra cycles delay before issuing each burst request */ + __u8 holdoff; + __u8 pad2[3]; +} __attribute__((packed)); + +struct pisp_fe_output_config { + struct pisp_image_format_config format; + __u16 ilines; + __u8 pad[2]; +} __attribute__((packed)); + +struct pisp_fe_input_buffer_config { + __u32 addr_lo; + __u32 addr_hi; + __u16 frame_id; + __u16 pad; +} __attribute__((packed)); + +#define PISP_FE_DECOMPAND_LUT_SIZE 65 + +struct pisp_fe_decompand_config { + __u16 lut[PISP_FE_DECOMPAND_LUT_SIZE]; + __u16 pad; +} __attribute__((packed)); + +struct pisp_fe_dpc_config { + __u8 coeff_level; + __u8 coeff_range; + __u8 coeff_range2; +#define PISP_FE_DPC_FLAG_FOLDBACK 1 +#define PISP_FE_DPC_FLAG_VFLAG 2 + __u8 flags; +} __attribute__((packed)); + +#define PISP_FE_LSC_LUT_SIZE 16 + +struct pisp_fe_lsc_config { + __u8 shift; + __u8 pad0; + __u16 scale; + __u16 centre_x; + __u16 centre_y; + __u16 lut[PISP_FE_LSC_LUT_SIZE]; +} __attribute__((packed)); + +struct pisp_fe_rgby_config { + __u16 gain_r; + __u16 gain_g; + __u16 gain_b; + __u8 maxflag; + __u8 pad; +} __attribute__((packed)); + +struct pisp_fe_agc_stats_config { + __u16 offset_x; + __u16 offset_y; + __u16 size_x; + __u16 size_y; + /* each weight only 4 bits */ + __u8 weights[PISP_AGC_STATS_NUM_ZONES / 2]; + __u16 row_offset_x; + __u16 row_offset_y; + __u16 row_size_x; + __u16 row_size_y; + __u8 row_shift; + __u8 float_shift; + __u8 pad1[2]; +} __attribute__((packed)); + +struct pisp_fe_awb_stats_config { + __u16 offset_x; + __u16 offset_y; + __u16 size_x; + __u16 size_y; + __u8 shift; + __u8 pad[3]; + __u16 r_lo; + __u16 r_hi; + __u16 g_lo; + __u16 g_hi; + __u16 b_lo; + __u16 b_hi; +} __attribute__((packed)); + +struct pisp_fe_floating_stats_region { + __u16 offset_x; + __u16 offset_y; + __u16 size_x; + __u16 size_y; +} __attribute__((packed)); + +struct pisp_fe_floating_stats_config { + struct pisp_fe_floating_stats_region + regions[PISP_FLOATING_STATS_NUM_ZONES]; +} __attribute__((packed)); + +#define PISP_FE_CDAF_NUM_WEIGHTS 8 + +struct pisp_fe_cdaf_stats_config { + __u16 noise_constant; + __u16 noise_slope; + __u16 offset_x; + __u16 offset_y; + __u16 size_x; + __u16 size_y; + __u16 skip_x; + __u16 skip_y; + __u32 mode; +} __attribute__((packed)); + +struct pisp_fe_stats_buffer_config { + __u32 addr_lo; + __u32 addr_hi; +} __attribute__((packed)); + +struct pisp_fe_crop_config { + __u16 offset_x; + __u16 offset_y; + __u16 width; + __u16 height; +} __attribute__((packed)); + +enum pisp_fe_downscale_flags { + /* downscale the four Bayer components independently... */ + DOWNSCALE_BAYER = 1, + /* ...without trying to preserve their spatial relationship */ + DOWNSCALE_BIN = 2, +}; + +struct pisp_fe_downscale_config { + __u8 xin; + __u8 xout; + __u8 yin; + __u8 yout; + __u8 flags; /* enum pisp_fe_downscale_flags */ + __u8 pad[3]; + __u16 output_width; + __u16 output_height; +} __attribute__((packed)); + +struct pisp_fe_output_buffer_config { + __u32 addr_lo; + __u32 addr_hi; +} __attribute__((packed)); + +/* Each of the two output channels/branches: */ +struct pisp_fe_output_branch_config { + struct pisp_fe_crop_config crop; + struct pisp_fe_downscale_config downscale; + struct pisp_compress_config compress; + struct pisp_fe_output_config output; + __u32 pad; +} __attribute__((packed)); + +/* And finally one to rule them all: */ +struct pisp_fe_config { + /* I/O configuration: */ + struct pisp_fe_stats_buffer_config stats_buffer; + struct pisp_fe_output_buffer_config output_buffer[PISP_FE_NUM_OUTPUTS]; + struct pisp_fe_input_buffer_config input_buffer; + /* processing configuration: */ + struct pisp_fe_global_config global; + struct pisp_fe_input_config input; + struct pisp_decompress_config decompress; + struct pisp_fe_decompand_config decompand; + struct pisp_bla_config bla; + struct pisp_fe_dpc_config dpc; + struct pisp_fe_crop_config stats_crop; + __u32 spare1; /* placeholder for future decimate configuration */ + struct pisp_bla_config blc; + struct pisp_fe_rgby_config rgby; + struct pisp_fe_lsc_config lsc; + struct pisp_fe_agc_stats_config agc_stats; + struct pisp_fe_awb_stats_config awb_stats; + struct pisp_fe_cdaf_stats_config cdaf_stats; + struct pisp_fe_floating_stats_config floating_stats; + struct pisp_fe_output_axi_config output_axi; + struct pisp_fe_output_branch_config ch[PISP_FE_NUM_OUTPUTS]; + /* non-register fields: */ + __u32 dirty_flags; /* these use pisp_fe_enable */ + __u32 dirty_flags_extra; /* these use pisp_fe_dirty */ +} __attribute__((packed)); + +#endif /* _UAPI_PISP_FE_CONFIG_ */ diff --git a/include/uapi/linux/media/raspberrypi/pisp_fe_statistics.h b/include/uapi/linux/media/raspberrypi/pisp_fe_statistics.h new file mode 100644 index 000000000000..a7d42985aee8 --- /dev/null +++ b/include/uapi/linux/media/raspberrypi/pisp_fe_statistics.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * RP1 PiSP Front End statistics definitions + * + * Copyright (C) 2021 - Raspberry Pi Ltd. + * + */ +#ifndef _UAPI_PISP_FE_STATISTICS_H_ +#define _UAPI_PISP_FE_STATISTICS_H_ + +#include + +#define PISP_FLOATING_STATS_NUM_ZONES 4 +#define PISP_AGC_STATS_NUM_BINS 1024 +#define PISP_AGC_STATS_SIZE 16 +#define PISP_AGC_STATS_NUM_ZONES (PISP_AGC_STATS_SIZE * PISP_AGC_STATS_SIZE) +#define PISP_AGC_STATS_NUM_ROW_SUMS 512 + +struct pisp_agc_statistics_zone { + __u64 Y_sum; + __u32 counted; + __u32 pad; +} __attribute__((packed)); + +struct pisp_agc_statistics { + __u32 row_sums[PISP_AGC_STATS_NUM_ROW_SUMS]; + /* + * 32-bits per bin means an image (just less than) 16384x16384 pixels + * in size can weight every pixel from 0 to 15. + */ + __u32 histogram[PISP_AGC_STATS_NUM_BINS]; + struct pisp_agc_statistics_zone floating[PISP_FLOATING_STATS_NUM_ZONES]; +} __attribute__((packed)); + +#define PISP_AWB_STATS_SIZE 32 +#define PISP_AWB_STATS_NUM_ZONES (PISP_AWB_STATS_SIZE * PISP_AWB_STATS_SIZE) + +struct pisp_awb_statistics_zone { + __u32 R_sum; + __u32 G_sum; + __u32 B_sum; + __u32 counted; +} __attribute__((packed)); + +struct pisp_awb_statistics { + struct pisp_awb_statistics_zone zones[PISP_AWB_STATS_NUM_ZONES]; + struct pisp_awb_statistics_zone floating[PISP_FLOATING_STATS_NUM_ZONES]; +} __attribute__((packed)); + +#define PISP_CDAF_STATS_SIZE 8 +#define PISP_CDAF_STATS_NUM_FOMS (PISP_CDAF_STATS_SIZE * PISP_CDAF_STATS_SIZE) + +struct pisp_cdaf_statistics { + __u64 foms[PISP_CDAF_STATS_NUM_FOMS]; + __u64 floating[PISP_FLOATING_STATS_NUM_ZONES]; +} __attribute__((packed)); + +struct pisp_statistics { + struct pisp_awb_statistics awb; + struct pisp_agc_statistics agc; + struct pisp_cdaf_statistics cdaf; +} __attribute__((packed)); + +#endif /* _UAPI_PISP_FE_STATISTICS_H_ */ -- cgit v1.2.3 From 7b5a58952fc3b51905c2963647485565df1e5e26 Mon Sep 17 00:00:00 2001 From: Akash Kumar Date: Fri, 27 Sep 2024 20:51:38 +0530 Subject: usb: gadget: uvc: configfs: Add frame-based frame format support Add support for frame-based frame format, which can be used to support multiple formats like H264 or H265, in addition to MJPEG and YUV frames. The frame-based format is set to H264 by default, but it can be updated to other formats by modifying the GUID through the guid configfs attribute. Different structures are used for all three formats, as H264 has a different structure compared to MJPEG and uncompressed formats. These structures will be passed to the frame make function based on the active format, using a common frame structure with additional parameters needed only for frame-based formats. These parameters are handled at runtime in the UVC driver. Signed-off-by: Akash Kumar Link: https://lore.kernel.org/r/20240927152138.31416-1-quic_akakum@quicinc.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/usb/video.h | 58 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/usb/video.h b/include/uapi/linux/usb/video.h index 2ff0e8a3a683..526b5155e23c 100644 --- a/include/uapi/linux/usb/video.h +++ b/include/uapi/linux/usb/video.h @@ -597,5 +597,63 @@ struct UVC_FRAME_MJPEG(n) { \ __le32 dwFrameInterval[n]; \ } __attribute__ ((packed)) +/* Frame Based Payload - 3.1.1. Frame Based Video Format Descriptor */ +struct uvc_format_framebased { + __u8 bLength; + __u8 bDescriptorType; + __u8 bDescriptorSubType; + __u8 bFormatIndex; + __u8 bNumFrameDescriptors; + __u8 guidFormat[16]; + __u8 bBitsPerPixel; + __u8 bDefaultFrameIndex; + __u8 bAspectRatioX; + __u8 bAspectRatioY; + __u8 bmInterfaceFlags; + __u8 bCopyProtect; + __u8 bVariableSize; +} __attribute__((__packed__)); + +#define UVC_DT_FORMAT_FRAMEBASED_SIZE 28 + +/* Frame Based Payload - 3.1.2. Frame Based Video Frame Descriptor */ +struct uvc_frame_framebased { + __u8 bLength; + __u8 bDescriptorType; + __u8 bDescriptorSubType; + __u8 bFrameIndex; + __u8 bmCapabilities; + __u16 wWidth; + __u16 wHeight; + __u32 dwMinBitRate; + __u32 dwMaxBitRate; + __u32 dwDefaultFrameInterval; + __u8 bFrameIntervalType; + __u32 dwBytesPerLine; + __u32 dwFrameInterval[]; +} __attribute__((__packed__)); + +#define UVC_DT_FRAME_FRAMEBASED_SIZE(n) (26+4*(n)) + +#define UVC_FRAME_FRAMEBASED(n) \ + uvc_frame_framebased_##n + +#define DECLARE_UVC_FRAME_FRAMEBASED(n) \ +struct UVC_FRAME_FRAMEBASED(n) { \ + __u8 bLength; \ + __u8 bDescriptorType; \ + __u8 bDescriptorSubType; \ + __u8 bFrameIndex; \ + __u8 bmCapabilities; \ + __u16 wWidth; \ + __u16 wHeight; \ + __u32 dwMinBitRate; \ + __u32 dwMaxBitRate; \ + __u32 dwDefaultFrameInterval; \ + __u8 bFrameIntervalType; \ + __u32 dwBytesPerLine; \ + __u32 dwFrameInterval[n]; \ +} __attribute__ ((packed)) + #endif /* __LINUX_USB_VIDEO_H */ -- cgit v1.2.3 From 522249f05c5551aec9ec0ba9b6438f1ec19c138d Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 3 Oct 2024 16:29:22 +0200 Subject: fanotify: allow reporting errors on failure to open fd When working in "fd mode", fanotify_read() needs to open an fd from a dentry to report event->fd to userspace. Opening an fd from dentry can fail for several reasons. For example, when tasks are gone and we try to open their /proc files or we try to open a WRONLY file like in sysfs or when trying to open a file that was deleted on the remote network server. Add a new flag FAN_REPORT_FD_ERROR for fanotify_init(). For a group with FAN_REPORT_FD_ERROR, we will send the event with the error instead of the open fd, otherwise userspace may not get the error at all. For an overflow event, we report -EBADF to avoid confusing FAN_NOFD with -EPERM. Similarly for pidfd open errors we report either -ESRCH or the open error instead of FAN_NOPIDFD and FAN_EPIDFD. In any case, userspace will not know which file failed to open, so add a debug print for further investigation. Reported-by: Krishna Vivek Vitta Link: https://lore.kernel.org/linux-fsdevel/SI2P153MB07182F3424619EDDD1F393EED46D2@SI2P153MB0718.APCP153.PROD.OUTLOOK.COM/ Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/20241003142922.111539-1-amir73il@gmail.com --- include/uapi/linux/fanotify.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index a37de58ca571..34f221d3a1b9 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -60,6 +60,7 @@ #define FAN_REPORT_DIR_FID 0x00000400 /* Report unique directory id */ #define FAN_REPORT_NAME 0x00000800 /* Report events with name */ #define FAN_REPORT_TARGET_FID 0x00001000 /* Report dirent target id */ +#define FAN_REPORT_FD_ERROR 0x00002000 /* event->fd can report error */ /* Convenience macro - FAN_REPORT_NAME requires FAN_REPORT_DIR_FID */ #define FAN_REPORT_DFID_NAME (FAN_REPORT_DIR_FID | FAN_REPORT_NAME) -- cgit v1.2.3 From fb6f20ecb121cef4d7946f834a6ee867c4e21b4a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 17 Oct 2024 12:28:23 +0200 Subject: reiserfs: The last commit Deprecation period of reiserfs ends with the end of this year so it is time to remove it from the kernel. Acked-by: Darrick J. Wong Acked-by: Christian Brauner Signed-off-by: Jan Kara --- include/uapi/linux/reiserfs_fs.h | 27 --------------------------- include/uapi/linux/reiserfs_xattr.h | 25 ------------------------- 2 files changed, 52 deletions(-) delete mode 100644 include/uapi/linux/reiserfs_fs.h delete mode 100644 include/uapi/linux/reiserfs_xattr.h (limited to 'include/uapi') diff --git a/include/uapi/linux/reiserfs_fs.h b/include/uapi/linux/reiserfs_fs.h deleted file mode 100644 index 5bb921409f2b..000000000000 --- a/include/uapi/linux/reiserfs_fs.h +++ /dev/null @@ -1,27 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for licensing and copyright details - */ -#ifndef _LINUX_REISER_FS_H -#define _LINUX_REISER_FS_H - -#include -#include - -/* - * include/linux/reiser_fs.h - * - * Reiser File System constants and structures - * - */ - -/* ioctl's command */ -#define REISERFS_IOC_UNPACK _IOW(0xCD,1,long) -/* define following flags to be the same as in ext2, so that chattr(1), - lsattr(1) will work with us. */ -#define REISERFS_IOC_GETFLAGS FS_IOC_GETFLAGS -#define REISERFS_IOC_SETFLAGS FS_IOC_SETFLAGS -#define REISERFS_IOC_GETVERSION FS_IOC_GETVERSION -#define REISERFS_IOC_SETVERSION FS_IOC_SETVERSION - -#endif /* _LINUX_REISER_FS_H */ diff --git a/include/uapi/linux/reiserfs_xattr.h b/include/uapi/linux/reiserfs_xattr.h deleted file mode 100644 index 503ad018ce5b..000000000000 --- a/include/uapi/linux/reiserfs_xattr.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - File: linux/reiserfs_xattr.h -*/ - -#ifndef _LINUX_REISERFS_XATTR_H -#define _LINUX_REISERFS_XATTR_H - -#include - -/* Magic value in header */ -#define REISERFS_XATTR_MAGIC 0x52465841 /* "RFXA" */ - -struct reiserfs_xattr_header { - __le32 h_magic; /* magic number for identification */ - __le32 h_hash; /* hash of the value */ -}; - -struct reiserfs_security_handle { - const char *name; - void *value; - __kernel_size_t length; -}; - -#endif /* _LINUX_REISERFS_XATTR_H */ -- cgit v1.2.3 From 48931f65e9f785b65244550cc8f0c8bf9eab7acd Mon Sep 17 00:00:00 2001 From: Michael Margolin Date: Tue, 15 Oct 2024 17:42:42 +0000 Subject: RDMA/efa: Add option to set QP service level on create Using modify QP with AH attributes and IB_QP_AV flag set doesn't make much sense for connectionless QP types like SRD. Add SL parameter to EFA create QP user ABI and pass it to the device. Link: https://patch.msgid.link/r/20241015174242.3490-3-mrgolin@amazon.com Reviewed-by: Firas Jahjah Reviewed-by: Yonatan Nachum Signed-off-by: Michael Margolin Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/efa-abi.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/rdma/efa-abi.h b/include/uapi/rdma/efa-abi.h index d689b8b34189..11b94b0b035b 100644 --- a/include/uapi/rdma/efa-abi.h +++ b/include/uapi/rdma/efa-abi.h @@ -95,7 +95,8 @@ struct efa_ibv_create_qp { __u32 sq_ring_size; /* bytes */ __u32 driver_qp_type; __u16 flags; - __u8 reserved_90[6]; + __u8 sl; + __u8 reserved_98[5]; }; struct efa_ibv_create_qp_resp { -- cgit v1.2.3 From 59eaa01ce7a6cbc5c36b928f52888f99fca6b295 Mon Sep 17 00:00:00 2001 From: Uday Shankar Date: Mon, 7 Oct 2024 12:24:17 -0600 Subject: ublk: support device recovery without I/O queueing ublk currently supports the following behaviors on ublk server exit: A: outstanding I/Os get errors, subsequently issued I/Os get errors B: outstanding I/Os get errors, subsequently issued I/Os queue C: outstanding I/Os get reissued, subsequently issued I/Os queue and the following behaviors for recovery of preexisting block devices by a future incarnation of the ublk server: 1: ublk devices stopped on ublk server exit (no recovery possible) 2: ublk devices are recoverable using start/end_recovery commands The userspace interface allows selection of combinations of these behaviors using flags specified at device creation time, namely: default behavior: A + 1 UBLK_F_USER_RECOVERY: B + 2 UBLK_F_USER_RECOVERY|UBLK_F_USER_RECOVERY_REISSUE: C + 2 The behavior A + 2 is currently unsupported. Add support for this behavior under the new flag combination UBLK_F_USER_RECOVERY|UBLK_F_USER_RECOVERY_FAIL_IO. Signed-off-by: Uday Shankar Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20241007182419.3263186-5-ushankar@purestorage.com Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 12873639ea96..a8bc98bb69fc 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -147,8 +147,18 @@ */ #define UBLK_F_NEED_GET_DATA (1UL << 2) +/* + * - Block devices are recoverable if ublk server exits and restarts + * - Outstanding I/O when ublk server exits is met with errors + * - I/O issued while there is no ublk server queues + */ #define UBLK_F_USER_RECOVERY (1UL << 3) +/* + * - Block devices are recoverable if ublk server exits and restarts + * - Outstanding I/O when ublk server exits is reissued + * - I/O issued while there is no ublk server queues + */ #define UBLK_F_USER_RECOVERY_REISSUE (1UL << 4) /* @@ -190,10 +200,18 @@ */ #define UBLK_F_ZONED (1ULL << 8) +/* + * - Block devices are recoverable if ublk server exits and restarts + * - Outstanding I/O when ublk server exits is met with errors + * - I/O issued while there is no ublk server is met with errors + */ +#define UBLK_F_USER_RECOVERY_FAIL_IO (1ULL << 9) + /* device state */ #define UBLK_S_DEV_DEAD 0 #define UBLK_S_DEV_LIVE 1 #define UBLK_S_DEV_QUIESCED 2 +#define UBLK_S_DEV_FAIL_IO 3 /* shipped via sqe->cmd of io_uring command */ struct ublksrv_ctrl_cmd { -- cgit v1.2.3 From b21d948f4cc73e3296f2365c7afca721dd6893fa Mon Sep 17 00:00:00 2001 From: Greg Joyce Date: Thu, 29 Aug 2024 12:56:11 -0500 Subject: block: sed-opal: add ioctl IOC_OPAL_SET_SID_PW After a SED drive is provisioned, there is no way to change the SID password via the ioctl() interface. A new ioctl IOC_OPAL_SET_SID_PW will allow the password to be changed. The valid current password is required. Signed-off-by: Greg Joyce Reviewed-by: Daniel Wagner Link: https://lore.kernel.org/r/20240829175639.6478-2-gjoyce@linux.ibm.com Signed-off-by: Jens Axboe --- include/uapi/linux/sed-opal.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/sed-opal.h b/include/uapi/linux/sed-opal.h index d3994b7716bc..9025dd5a4f0f 100644 --- a/include/uapi/linux/sed-opal.h +++ b/include/uapi/linux/sed-opal.h @@ -215,5 +215,6 @@ struct opal_revert_lsp { #define IOC_OPAL_GET_GEOMETRY _IOR('p', 238, struct opal_geometry) #define IOC_OPAL_DISCOVERY _IOW('p', 239, struct opal_discovery) #define IOC_OPAL_REVERT_LSP _IOW('p', 240, struct opal_revert_lsp) +#define IOC_OPAL_SET_SID_PW _IOW('p', 241, struct opal_new_pw) #endif /* _UAPI_SED_OPAL_H */ -- cgit v1.2.3 From 97ddae76ddd20ea35d2059086aacd85b707a09c5 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 21 Oct 2024 13:41:13 -0400 Subject: Revert "drm/amdkfd: SMI report dropped event count" This reverts commit a3ab2d45b9887ee609cd3bea39f668236935774c. The userspace side for this code is not ready yet so revert for now. Reviewed-by: Philip Yang Signed-off-by: Alex Deucher Cc: Philip Yang --- include/uapi/linux/kfd_ioctl.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 7afd66d45313..fa9f9846b88e 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -530,7 +530,6 @@ enum kfd_smi_event { KFD_SMI_EVENT_QUEUE_EVICTION = 9, KFD_SMI_EVENT_QUEUE_RESTORE = 10, KFD_SMI_EVENT_UNMAP_FROM_GPU = 11, - KFD_SMI_EVENT_DROPPED_EVENT = 12, /* * max event number, as a flag bit to get events from all processes, @@ -611,7 +610,6 @@ struct kfd_ioctl_smi_events_args { * rw: 'W' for write page fault, 'R' for read page fault * rescheduled: 'R' if the queue restore failed and rescheduled to try again * error_code: migrate failure error code, 0 if no error - * drop_count: how many events dropped when fifo is full */ #define KFD_EVENT_FMT_UPDATE_GPU_RESET(reset_seq_num, reset_cause)\ "%x %s\n", (reset_seq_num), (reset_cause) @@ -647,10 +645,6 @@ struct kfd_ioctl_smi_events_args { "%lld -%d @%lx(%lx) %x %d\n", (ns), (pid), (addr), (size),\ (node), (unmap_trigger) -#define KFD_EVENT_FMT_DROPPED_EVENT(ns, pid, drop_count)\ - "%lld -%d %d\n", (ns), (pid), (drop_count) - - /************************************************************************************************** * CRIU IOCTLs (Checkpoint Restore In Userspace) * -- cgit v1.2.3 From 3607798ad9bdef35ad08489a8239390fccaac6b5 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 9 Oct 2024 10:25:42 +0200 Subject: wifi: cfg80211: add option for vif allowed radios This allows users to prevent a vif from affecting radios other than the configured ones. This can be useful in cases where e.g. an AP is running on one radio, and triggering a scan on another radio should not disturb it. Changing the allowed radios list for a vif is supported, but only while it is down. While it is possible to achieve the same by always explicitly specifying a frequency list for scan requests and ensuring that the wrong channel/band is never accidentally set on an unrelated interface, this change makes multi-radio wiphy setups a lot easier to deal with for CLI users. By itself, this patch only enforces the radio mask for scanning requests and remain-on-channel. Follow-up changes build on this to limit configured frequencies. Signed-off-by: Felix Fietkau Link: https://patch.msgid.link/eefcb218780f71a1549875d149f1196486762756.1728462320.git-series.nbd@nbd.name Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index f97f5adc8d51..d31ccee99cc7 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2868,6 +2868,9 @@ enum nl80211_commands { * nested item, it contains attributes defined in * &enum nl80211_if_combination_attrs. * + * @NL80211_ATTR_VIF_RADIO_MASK: Bitmask of allowed radios (u32). + * A value of 0 means all radios. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3416,6 +3419,8 @@ enum nl80211_attrs { NL80211_ATTR_WIPHY_RADIOS, NL80211_ATTR_WIPHY_INTERFACE_COMBINATIONS, + NL80211_ATTR_VIF_RADIO_MASK, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.3 From ebda716ea4da03326ac4d0a71604d18aa8a2e695 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 9 Oct 2024 10:25:45 +0200 Subject: wifi: cfg80211: report per wiphy radio antenna mask With multi-radio devices, each radio typically gets a fixed set of antennas. In order to be able to disable specific antennas for some radios, user space needs to know which antenna mask bits are assigned to which radio. Signed-off-by: Felix Fietkau Link: https://patch.msgid.link/e0a26afa2c88eaa188ec96ec6d17ecac4e827641.1728462320.git-series.nbd@nbd.name Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index d31ccee99cc7..1b8827f920ff 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -8036,6 +8036,8 @@ enum nl80211_ap_settings_flags { * @NL80211_WIPHY_RADIO_ATTR_INTERFACE_COMBINATION: Supported interface * combination for this radio. Attribute may be present multiple times * and contains attributes defined in &enum nl80211_if_combination_attrs. + * @NL80211_WIPHY_RADIO_ATTR_ANTENNA_MASK: bitmask (u32) of antennas + * connected to this radio. * * @__NL80211_WIPHY_RADIO_ATTR_LAST: Internal * @NL80211_WIPHY_RADIO_ATTR_MAX: Highest attribute @@ -8046,6 +8048,7 @@ enum nl80211_wiphy_radio_attrs { NL80211_WIPHY_RADIO_ATTR_INDEX, NL80211_WIPHY_RADIO_ATTR_FREQ_RANGE, NL80211_WIPHY_RADIO_ATTR_INTERFACE_COMBINATION, + NL80211_WIPHY_RADIO_ATTR_ANTENNA_MASK, /* keep last */ __NL80211_WIPHY_RADIO_ATTR_LAST, -- cgit v1.2.3 From a77e527b470cc38754c730bce1483711f643bb60 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 9 Oct 2024 10:25:49 +0200 Subject: wifi: cfg80211: add monitor SKIP_TX flag This can be used to indicate that the user is not interested in receiving locally sent packets on the monitor interface. Signed-off-by: Felix Fietkau Link: https://patch.msgid.link/f0c20f832eadd36c71fba9a2a16ba57d78389b6c.1728462320.git-series.nbd@nbd.name Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 1b8827f920ff..6d11437596b9 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4703,6 +4703,7 @@ enum nl80211_survey_info { * overrides all other flags. * @NL80211_MNTR_FLAG_ACTIVE: use the configured MAC address * and ACK incoming unicast packets. + * @NL80211_MNTR_FLAG_SKIP_TX: do not pass local tx packets * * @__NL80211_MNTR_FLAG_AFTER_LAST: internal use * @NL80211_MNTR_FLAG_MAX: highest possible monitor flag @@ -4715,6 +4716,7 @@ enum nl80211_mntr_flags { NL80211_MNTR_FLAG_OTHER_BSS, NL80211_MNTR_FLAG_COOK_FRAMES, NL80211_MNTR_FLAG_ACTIVE, + NL80211_MNTR_FLAG_SKIP_TX, /* keep last */ __NL80211_MNTR_FLAG_AFTER_LAST, -- cgit v1.2.3 From c8507a25cebd179db935dd266a33c51bef1b1e80 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Tue, 22 Oct 2024 13:03:47 -0700 Subject: drm/xe/oa/uapi: Define and parse OA sync properties MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we have laid the groundwork, introduce OA sync properties in the uapi and parse the input xe_sync array as is done elsewhere in the driver. Also add DRM_XE_OA_CAPS_SYNCS bit in OA capabilities for userspace. v2: Fix and document DRM_XE_SYNC_TYPE_USER_FENCE for OA (Matt B) Add DRM_XE_OA_CAPS_SYNCS bit to OA capabilities (Jose) Acked-by: José Roberto de Souza Reviewed-by: Jonathan Cavitt Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20241022200352.1192560-3-ashutosh.dixit@intel.com --- include/uapi/drm/xe_drm.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index c4182e95a619..4a8a4a63e99c 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1485,6 +1485,7 @@ struct drm_xe_oa_unit { /** @capabilities: OA capabilities bit-mask */ __u64 capabilities; #define DRM_XE_OA_CAPS_BASE (1 << 0) +#define DRM_XE_OA_CAPS_SYNCS (1 << 1) /** @oa_timestamp_freq: OA timestamp freq */ __u64 oa_timestamp_freq; @@ -1634,6 +1635,22 @@ enum drm_xe_oa_property_id { * to be disabled for the stream exec queue. */ DRM_XE_OA_PROPERTY_NO_PREEMPT, + + /** + * @DRM_XE_OA_PROPERTY_NUM_SYNCS: Number of syncs in the sync array + * specified in @DRM_XE_OA_PROPERTY_SYNCS + */ + DRM_XE_OA_PROPERTY_NUM_SYNCS, + + /** + * @DRM_XE_OA_PROPERTY_SYNCS: Pointer to struct @drm_xe_sync array + * with array size specified via @DRM_XE_OA_PROPERTY_NUM_SYNCS. OA + * configuration will wait till input fences signal. Output fences + * will signal after the new OA configuration takes effect. For + * @DRM_XE_SYNC_TYPE_USER_FENCE, @addr is a user pointer, similar + * to the VM bind case. + */ + DRM_XE_OA_PROPERTY_SYNCS, }; /** -- cgit v1.2.3 From cdda1f26e74bac732eca537a69f19f6a37b641be Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Thu, 10 Oct 2024 16:52:32 +0100 Subject: pidfd: add ioctl to retrieve pid info A common pattern when using pid fds is having to get information about the process, which currently requires /proc being mounted, resolving the fd to a pid, and then do manual string parsing of /proc/N/status and friends. This needs to be reimplemented over and over in all userspace projects (e.g.: I have reimplemented resolving in systemd, dbus, dbus-daemon, polkit so far), and requires additional care in checking that the fd is still valid after having parsed the data, to avoid races. Having a programmatic API that can be used directly removes all these requirements, including having /proc mounted. As discussed at LPC24, add an ioctl with an extensible struct so that more parameters can be added later if needed. Start with returning pid/tgid/ppid and creds unconditionally, and cgroupid optionally. Signed-off-by: Luca Boccassi Link: https://lore.kernel.org/r/20241010155401.2268522-1-luca.boccassi@gmail.com Signed-off-by: Christian Brauner --- include/uapi/linux/pidfd.h | 50 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h index 565fc0629fff..4540f6301b8c 100644 --- a/include/uapi/linux/pidfd.h +++ b/include/uapi/linux/pidfd.h @@ -16,6 +16,55 @@ #define PIDFD_SIGNAL_THREAD_GROUP (1UL << 1) #define PIDFD_SIGNAL_PROCESS_GROUP (1UL << 2) +/* Flags for pidfd_info. */ +#define PIDFD_INFO_PID (1UL << 0) /* Always returned, even if not requested */ +#define PIDFD_INFO_CREDS (1UL << 1) /* Always returned, even if not requested */ +#define PIDFD_INFO_CGROUPID (1UL << 2) /* Always returned if available, even if not requested */ + +#define PIDFD_INFO_SIZE_VER0 64 /* sizeof first published struct */ + +struct pidfd_info { + /* + * This mask is similar to the request_mask in statx(2). + * + * Userspace indicates what extensions or expensive-to-calculate fields + * they want by setting the corresponding bits in mask. The kernel + * will ignore bits that it does not know about. + * + * When filling the structure, the kernel will only set bits + * corresponding to the fields that were actually filled by the kernel. + * This also includes any future extensions that might be automatically + * filled. If the structure size is too small to contain a field + * (requested or not), to avoid confusion the mask will not + * contain a bit for that field. + * + * As such, userspace MUST verify that mask contains the + * corresponding flags after the ioctl(2) returns to ensure that it is + * using valid data. + */ + __u64 mask; + /* + * The information contained in the following fields might be stale at the + * time it is received, as the target process might have exited as soon as + * the IOCTL was processed, and there is no way to avoid that. However, it + * is guaranteed that if the call was successful, then the information was + * correct and referred to the intended process at the time the work was + * performed. */ + __u64 cgroupid; + __u32 pid; + __u32 tgid; + __u32 ppid; + __u32 ruid; + __u32 rgid; + __u32 euid; + __u32 egid; + __u32 suid; + __u32 sgid; + __u32 fsuid; + __u32 fsgid; + __u32 spare0[1]; +}; + #define PIDFS_IOCTL_MAGIC 0xFF #define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1) @@ -28,5 +77,6 @@ #define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8) #define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9) #define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10) +#define PIDFD_GET_INFO _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info) #endif /* _UAPI_LINUX_PIDFD_H */ -- cgit v1.2.3 From 1773572863c43a14a3e45f0591f28b7dec1ee52a Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 22 Oct 2024 17:51:42 +0200 Subject: thermal: netlink: Add the commands and the events for the thresholds The thresholds exist but there is no notification neither action code related to them yet. These changes implement the netlink for the notifications when the thresholds are crossed, added, deleted or flushed as well as the commands which allows to get the list of the thresholds, flush them, add and delete. Signed-off-by: Daniel Lezcano Reviewed-by: Lukasz Luba Link: https://patch.msgid.link/20241022155147.463475-3-daniel.lezcano@linaro.org [ rjw: Use the thermal_zone guard for locking, subject edit ] Signed-off-by: Rafael J. Wysocki --- include/uapi/linux/thermal.h | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/thermal.h b/include/uapi/linux/thermal.h index 2e6f60a36173..ba8604bdf206 100644 --- a/include/uapi/linux/thermal.h +++ b/include/uapi/linux/thermal.h @@ -20,7 +20,7 @@ enum thermal_trip_type { /* Adding event notification support elements */ #define THERMAL_GENL_FAMILY_NAME "thermal" -#define THERMAL_GENL_VERSION 0x01 +#define THERMAL_GENL_VERSION 0x02 #define THERMAL_GENL_SAMPLING_GROUP_NAME "sampling" #define THERMAL_GENL_EVENT_GROUP_NAME "event" @@ -30,6 +30,7 @@ enum thermal_genl_attr { THERMAL_GENL_ATTR_TZ, THERMAL_GENL_ATTR_TZ_ID, THERMAL_GENL_ATTR_TZ_TEMP, + THERMAL_GENL_ATTR_TZ_PREV_TEMP, THERMAL_GENL_ATTR_TZ_TRIP, THERMAL_GENL_ATTR_TZ_TRIP_ID, THERMAL_GENL_ATTR_TZ_TRIP_TYPE, @@ -50,6 +51,9 @@ enum thermal_genl_attr { THERMAL_GENL_ATTR_CPU_CAPABILITY_ID, THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE, THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY, + THERMAL_GENL_ATTR_THRESHOLD, + THERMAL_GENL_ATTR_THRESHOLD_TEMP, + THERMAL_GENL_ATTR_THRESHOLD_DIRECTION, __THERMAL_GENL_ATTR_MAX, }; #define THERMAL_GENL_ATTR_MAX (__THERMAL_GENL_ATTR_MAX - 1) @@ -77,6 +81,11 @@ enum thermal_genl_event { THERMAL_GENL_EVENT_CDEV_STATE_UPDATE, /* Cdev state updated */ THERMAL_GENL_EVENT_TZ_GOV_CHANGE, /* Governor policy changed */ THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE, /* CPU capability changed */ + THERMAL_GENL_EVENT_THRESHOLD_ADD, /* A thresold has been added */ + THERMAL_GENL_EVENT_THRESHOLD_DELETE, /* A thresold has been deleted */ + THERMAL_GENL_EVENT_THRESHOLD_FLUSH, /* All thresolds have been deleted */ + THERMAL_GENL_EVENT_THRESHOLD_UP, /* A thresold has been crossed the way up */ + THERMAL_GENL_EVENT_THRESHOLD_DOWN, /* A thresold has been crossed the way down */ __THERMAL_GENL_EVENT_MAX, }; #define THERMAL_GENL_EVENT_MAX (__THERMAL_GENL_EVENT_MAX - 1) @@ -84,12 +93,16 @@ enum thermal_genl_event { /* Commands supported by the thermal_genl_family */ enum thermal_genl_cmd { THERMAL_GENL_CMD_UNSPEC, - THERMAL_GENL_CMD_TZ_GET_ID, /* List of thermal zones id */ - THERMAL_GENL_CMD_TZ_GET_TRIP, /* List of thermal trips */ - THERMAL_GENL_CMD_TZ_GET_TEMP, /* Get the thermal zone temperature */ - THERMAL_GENL_CMD_TZ_GET_GOV, /* Get the thermal zone governor */ - THERMAL_GENL_CMD_TZ_GET_MODE, /* Get the thermal zone mode */ - THERMAL_GENL_CMD_CDEV_GET, /* List of cdev id */ + THERMAL_GENL_CMD_TZ_GET_ID, /* List of thermal zones id */ + THERMAL_GENL_CMD_TZ_GET_TRIP, /* List of thermal trips */ + THERMAL_GENL_CMD_TZ_GET_TEMP, /* Get the thermal zone temperature */ + THERMAL_GENL_CMD_TZ_GET_GOV, /* Get the thermal zone governor */ + THERMAL_GENL_CMD_TZ_GET_MODE, /* Get the thermal zone mode */ + THERMAL_GENL_CMD_CDEV_GET, /* List of cdev id */ + THERMAL_GENL_CMD_THRESHOLD_GET, /* List of thresholds */ + THERMAL_GENL_CMD_THRESHOLD_ADD, /* Add a threshold */ + THERMAL_GENL_CMD_THRESHOLD_DELETE, /* Delete a threshold */ + THERMAL_GENL_CMD_THRESHOLD_FLUSH, /* Flush all the thresholds */ __THERMAL_GENL_CMD_MAX, }; #define THERMAL_GENL_CMD_MAX (__THERMAL_GENL_CMD_MAX - 1) -- cgit v1.2.3 From 09d6775f503b393d0457c7126aa43208e1724004 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Wed, 16 Oct 2024 13:27:45 -0700 Subject: riscv: Add support for userspace pointer masking RISC-V supports pointer masking with a variable number of tag bits (which is called "PMLEN" in the specification) and which is configured at the next higher privilege level. Wire up the PR_SET_TAGGED_ADDR_CTRL and PR_GET_TAGGED_ADDR_CTRL prctls so userspace can request a lower bound on the number of tag bits and determine the actual number of tag bits. As with arm64's PR_TAGGED_ADDR_ENABLE, the pointer masking configuration is thread-scoped, inherited on clone() and fork() and cleared on execve(). Reviewed-by: Charlie Jenkins Tested-by: Charlie Jenkins Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20241016202814.4061541-5-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- include/uapi/linux/prctl.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 35791791a879..cefd656ebf43 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -230,7 +230,7 @@ struct prctl_mm_map { # define PR_PAC_APDBKEY (1UL << 3) # define PR_PAC_APGAKEY (1UL << 4) -/* Tagged user address controls for arm64 */ +/* Tagged user address controls for arm64 and RISC-V */ #define PR_SET_TAGGED_ADDR_CTRL 55 #define PR_GET_TAGGED_ADDR_CTRL 56 # define PR_TAGGED_ADDR_ENABLE (1UL << 0) @@ -244,6 +244,9 @@ struct prctl_mm_map { # define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) /* Unused; kept only for source compatibility */ # define PR_MTE_TCF_SHIFT 1 +/* RISC-V pointer masking tag length */ +# define PR_PMLEN_SHIFT 24 +# define PR_PMLEN_MASK (0x7fUL << PR_PMLEN_SHIFT) /* Control reclaim behavior when allocating memory */ #define PR_SET_IO_FLUSHER 57 -- cgit v1.2.3 From 78844482a1c939a972681842f8ee2a8ddb202441 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Wed, 16 Oct 2024 13:27:47 -0700 Subject: riscv: Allow ptrace control of the tagged address ABI This allows a tracer to control the ABI of the tracee, as on arm64. Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20241016202814.4061541-7-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- include/uapi/linux/elf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index b9935988da5c..a920cf8934dc 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -450,6 +450,7 @@ typedef struct elf64_shdr { #define NT_MIPS_MSA 0x802 /* MIPS SIMD registers */ #define NT_RISCV_CSR 0x900 /* RISC-V Control and Status Registers */ #define NT_RISCV_VECTOR 0x901 /* RISC-V vector registers */ +#define NT_RISCV_TAGGED_ADDR_CTRL 0x902 /* RISC-V tagged address control (prctl()) */ #define NT_LOONGARCH_CPUCFG 0xa00 /* LoongArch CPU config registers */ #define NT_LOONGARCH_CSR 0xa01 /* LoongArch control and status registers */ #define NT_LOONGARCH_LSX 0xa02 /* LoongArch Loongson SIMD Extension registers */ -- cgit v1.2.3 From 2f2d46959808e9b039ecb241ff13d50be2d6e231 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 19 Oct 2024 18:15:42 +0100 Subject: firmware/psci: Add definitions for PSCI v1.3 specification The v1.3 PSCI spec (https://developer.arm.com/documentation/den0022) adds the SYSTEM_OFF2 function. Add definitions for it and its hibernation type parameter. Signed-off-by: David Woodhouse Reviewed-by: Miguel Luis Link: https://lore.kernel.org/r/20241019172459.2241939-2-dwmw2@infradead.org Signed-off-by: Oliver Upton --- include/uapi/linux/psci.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h index 42a40ad3fb62..81759ff385e6 100644 --- a/include/uapi/linux/psci.h +++ b/include/uapi/linux/psci.h @@ -59,6 +59,7 @@ #define PSCI_1_1_FN_SYSTEM_RESET2 PSCI_0_2_FN(18) #define PSCI_1_1_FN_MEM_PROTECT PSCI_0_2_FN(19) #define PSCI_1_1_FN_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN(20) +#define PSCI_1_3_FN_SYSTEM_OFF2 PSCI_0_2_FN(21) #define PSCI_1_0_FN64_CPU_DEFAULT_SUSPEND PSCI_0_2_FN64(12) #define PSCI_1_0_FN64_NODE_HW_STATE PSCI_0_2_FN64(13) @@ -68,6 +69,7 @@ #define PSCI_1_1_FN64_SYSTEM_RESET2 PSCI_0_2_FN64(18) #define PSCI_1_1_FN64_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN64(20) +#define PSCI_1_3_FN64_SYSTEM_OFF2 PSCI_0_2_FN64(21) /* PSCI v0.2 power state encoding for CPU_SUSPEND function */ #define PSCI_0_2_POWER_STATE_ID_MASK 0xffff @@ -100,6 +102,9 @@ #define PSCI_1_1_RESET_TYPE_SYSTEM_WARM_RESET 0 #define PSCI_1_1_RESET_TYPE_VENDOR_START 0x80000000U +/* PSCI v1.3 hibernate type for SYSTEM_OFF2 */ +#define PSCI_1_3_OFF_TYPE_HIBERNATE_OFF BIT(0) + /* PSCI version decoding (independent of PSCI version) */ #define PSCI_VERSION_MAJOR_SHIFT 16 #define PSCI_VERSION_MINOR_MASK \ -- cgit v1.2.3 From f4986a72d6e4be78ec0e4ee0e03531474621183f Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Fri, 25 Oct 2024 06:11:57 -0700 Subject: iommufd: Add IOMMU_IOAS_MAP_FILE Define the IOMMU_IOAS_MAP_FILE ioctl interface, which allows a user to register memory by passing a memfd plus offset and length. Implement it using the memfd_pin_folios() kAPI. Link: https://patch.msgid.link/r/1729861919-234514-8-git-send-email-steven.sistare@oracle.com Suggested-by: Jason Gunthorpe Signed-off-by: Steve Sistare Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 72010f71c5e4..41b1a01e9293 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -51,6 +51,7 @@ enum { IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP = 0x8c, IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d, IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e, + IOMMUFD_CMD_IOAS_MAP_FILE = 0x8f, }; /** @@ -213,6 +214,30 @@ struct iommu_ioas_map { }; #define IOMMU_IOAS_MAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP) +/** + * struct iommu_ioas_map_file - ioctl(IOMMU_IOAS_MAP_FILE) + * @size: sizeof(struct iommu_ioas_map_file) + * @flags: same as for iommu_ioas_map + * @ioas_id: same as for iommu_ioas_map + * @fd: the memfd to map + * @start: byte offset from start of file to map from + * @length: same as for iommu_ioas_map + * @iova: same as for iommu_ioas_map + * + * Set an IOVA mapping from a memfd file. All other arguments and semantics + * match those of IOMMU_IOAS_MAP. + */ +struct iommu_ioas_map_file { + __u32 size; + __u32 flags; + __u32 ioas_id; + __s32 fd; + __aligned_u64 start; + __aligned_u64 length; + __aligned_u64 iova; +}; +#define IOMMU_IOAS_MAP_FILE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP_FILE) + /** * struct iommu_ioas_copy - ioctl(IOMMU_IOAS_COPY) * @size: sizeof(struct iommu_ioas_copy) -- cgit v1.2.3 From cb67ff6272eceb5fcb2fe3b74f0293fa0706841a Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Tue, 22 Oct 2024 12:30:50 -0400 Subject: drm/amdkfd: flag per-queue reset support for gfx9 Flag KFD support for per-queue reset on GFX9 devices. Signed-off-by: Jonathan Kim Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_sysfs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/kfd_sysfs.h b/include/uapi/linux/kfd_sysfs.h index 5e8d28617efa..859b8e91d4d3 100644 --- a/include/uapi/linux/kfd_sysfs.h +++ b/include/uapi/linux/kfd_sysfs.h @@ -60,7 +60,8 @@ #define HSA_CAP_FLAGS_COHERENTHOSTACCESS 0x10000000 #define HSA_CAP_TRAP_DEBUG_FIRMWARE_SUPPORTED 0x20000000 #define HSA_CAP_TRAP_DEBUG_PRECISE_ALU_OPERATIONS_SUPPORTED 0x40000000 -#define HSA_CAP_RESERVED 0x800f8000 +#define HSA_CAP_PER_QUEUE_RESET_SUPPORTED 0x80000000 +#define HSA_CAP_RESERVED 0x000f8000 /* debug_prop bits in node properties */ #define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_MASK 0x0000000f -- cgit v1.2.3 From 6d89ead19946181df1e41d38917fddc951dbd95b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 12 Jul 2024 11:35:23 +0200 Subject: UAPI/ioctl: Improve parameter name of ioctl request definition helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The third parameter to _IOR et al is a type name, not a size. So the parameter being named "size" is irritating. Rename it to "argtype" instead to reduce confusion. There is a very minor chance that this breaks stuff. It only hurts however if there is a variable (or macro) in userspace that is called "argtype" *and* it's used in the parameters of _IOR and friends. IMHO this is negligible because usually definitions making use of these macros are provided by kernel headers (i.e. us) or if they are replicated in userspace code, they are replicated and so supposed to match the kernel definitions (e.g. to make them usable by programs without the need to update the kernel headers used to compile the program). Signed-off-by: Uwe Kleine-König Signed-off-by: Arnd Bergmann --- include/uapi/asm-generic/ioctl.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/asm-generic/ioctl.h b/include/uapi/asm-generic/ioctl.h index a84f4db8a250..e3290a5824c9 100644 --- a/include/uapi/asm-generic/ioctl.h +++ b/include/uapi/asm-generic/ioctl.h @@ -82,13 +82,13 @@ * NOTE: _IOW means userland is writing and kernel is reading. _IOR * means userland is reading and kernel is writing. */ -#define _IO(type,nr) _IOC(_IOC_NONE,(type),(nr),0) -#define _IOR(type,nr,size) _IOC(_IOC_READ,(type),(nr),(_IOC_TYPECHECK(size))) -#define _IOW(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(size))) -#define _IOWR(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(size))) -#define _IOR_BAD(type,nr,size) _IOC(_IOC_READ,(type),(nr),sizeof(size)) -#define _IOW_BAD(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),sizeof(size)) -#define _IOWR_BAD(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size)) +#define _IO(type,nr) _IOC(_IOC_NONE,(type),(nr),0) +#define _IOR(type,nr,argtype) _IOC(_IOC_READ,(type),(nr),(_IOC_TYPECHECK(argtype))) +#define _IOW(type,nr,argtype) _IOC(_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(argtype))) +#define _IOWR(type,nr,argtype) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(argtype))) +#define _IOR_BAD(type,nr,argtype) _IOC(_IOC_READ,(type),(nr),sizeof(argtype)) +#define _IOW_BAD(type,nr,argtype) _IOC(_IOC_WRITE,(type),(nr),sizeof(argtype)) +#define _IOWR_BAD(type,nr,argtype) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(argtype)) /* used to decode ioctl numbers.. */ #define _IOC_DIR(nr) (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK) -- cgit v1.2.3 From b7a0855eb95f6db8ac8e17596e76f7b94a790fe6 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 28 Oct 2024 09:38:01 +0000 Subject: iommu: Add new flag to explictly request PASID capable domain Introduce new flag (IOMMU_HWPT_ALLOC_PASID) to domain_alloc_users() ops. If IOMMU supports PASID it will allocate domain. Otherwise return error. In error path check for -EOPNOTSUPP and try to allocate non-PASID domain so that DMA-API mode work fine for drivers which does not support PASID as well. Also modify __iommu_group_alloc_default_domain() to call iommu_paging_domain_alloc_flags() with appropriate flag when allocating paging domain. Signed-off-by: Jason Gunthorpe Co-developed-by: Vasant Hegde Signed-off-by: Vasant Hegde Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20241028093810.5901-4-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- include/uapi/linux/iommufd.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 72010f71c5e4..0c0ed28ee113 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -359,11 +359,19 @@ struct iommu_vfio_ioas { * enforced on device attachment * @IOMMU_HWPT_FAULT_ID_VALID: The fault_id field of hwpt allocation data is * valid. + * @IOMMU_HWPT_ALLOC_PASID: Requests a domain that can be used with PASID. The + * domain can be attached to any PASID on the device. + * Any domain attached to the non-PASID part of the + * device must also be flaged, otherwise attaching a + * PASID will blocked. + * If IOMMU does not support PASID it will return + * error (-EOPNOTSUPP). */ enum iommufd_hwpt_alloc_flags { IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0, IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1, IOMMU_HWPT_FAULT_ID_VALID = 1 << 2, + IOMMU_HWPT_ALLOC_PASID = 1 << 3, }; /** -- cgit v1.2.3 From 1ddf9916ac09313128e40d6581cef889c0b4ce84 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 23 Oct 2024 12:53:42 +0200 Subject: xfrm: Add support for per cpu xfrm state handling. Currently all flows for a certain SA must be processed by the same cpu to avoid packet reordering and lock contention of the xfrm state lock. To get rid of this limitation, the IETF standardized per cpu SAs in RFC 9611. This patch implements the xfrm part of it. We add the cpu as a lookup key for xfrm states and a config option to generate acquire messages for each cpu. With that, we can have on each cpu a SA with identical traffic selector so that flows can be processed in parallel on all cpus. Signed-off-by: Steffen Klassert Tested-by: Antony Antony Tested-by: Tobias Brunner --- include/uapi/linux/xfrm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index f28701500714..d73a97e3030a 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -322,6 +322,7 @@ enum xfrm_attr_type_t { XFRMA_MTIMER_THRESH, /* __u32 in seconds for input SA */ XFRMA_SA_DIR, /* __u8 */ XFRMA_NAT_KEEPALIVE_INTERVAL, /* __u32 in seconds for NAT keepalive */ + XFRMA_SA_PCPU, /* __u32 */ __XFRMA_MAX #define XFRMA_OUTPUT_MARK XFRMA_SET_MARK /* Compatibility */ @@ -437,6 +438,7 @@ struct xfrm_userpolicy_info { #define XFRM_POLICY_LOCALOK 1 /* Allow user to override global policy */ /* Automatically expand selector to include matching ICMP payloads. */ #define XFRM_POLICY_ICMP 2 +#define XFRM_POLICY_CPU_ACQUIRE 4 __u8 share; }; -- cgit v1.2.3 From a377132154ab8404dafcc52e8bc0c73050a954c2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 24 Sep 2024 05:57:31 -0600 Subject: io_uring/msg_ring: add support for sending a sync message Normally MSG_RING requires both a source and a destination ring. But some users don't always have a ring avilable to send a message from, yet they still need to notify a target ring. Add support for using io_uring_register(2) without having a source ring, using a file descriptor of -1 for that. Internally those are called blind registration opcodes. Implement IORING_REGISTER_SEND_MSG_RING as a blind opcode, which simply takes an sqe that the application can put on the stack and use the normal liburing helpers to initialize it. Then the app can call: io_uring_register(-1, IORING_REGISTER_SEND_MSG_RING, &sqe, 1); and get the same behavior in terms of the target, where a CQE is posted with the details given in the sqe. For now this takes a single sqe pointer argument, and hence arg must be set to that, and nr_args must be 1. Could easily be extended to take an array of sqes, but for now let's keep it simple. Link: https://lore.kernel.org/r/20240924115932.116167-3-axboe@kernel.dk Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 1fe79e750470..86cb385fe0b5 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -612,6 +612,9 @@ enum io_uring_register_op { /* clone registered buffers from source ring to current ring */ IORING_REGISTER_CLONE_BUFFERS = 30, + /* send MSG_RING without having a ring */ + IORING_REGISTER_SEND_MSG_RING = 31, + /* this goes last */ IORING_REGISTER_LAST, -- cgit v1.2.3 From 79cfe9e59c2a12c3b3faeeefe38d23f3d8030972 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 21 Oct 2024 13:34:10 -0600 Subject: io_uring/register: add IORING_REGISTER_RESIZE_RINGS Once a ring has been created, the size of the CQ and SQ rings are fixed. Usually this isn't a problem on the SQ ring side, as it merely controls the available number of requests that can be submitted in a single system call, and there's rarely a need to change that. For the CQ ring, it's a different story. For most efficient use of io_uring, it's important that the CQ ring never overflows. This means that applications must size it for the worst case scenario, which can be wasteful. Add IORING_REGISTER_RESIZE_RINGS, which allows an application to resize the existing rings. It takes a struct io_uring_params argument, the same one which is used to setup the ring initially, and resizes rings according to the sizes given. Certain properties are always inherited from the original ring setup, like SQE128/CQE32 and other setup options. The implementation only allows flag associated with how the CQ ring is sized and clamped. Existing unconsumed SQE and CQE entries are copied as part of the process. If either the SQ or CQ resized destination ring cannot hold the entries already present in the source rings, then the operation is failed with -EOVERFLOW. Any register op holds ->uring_lock, which prevents new submissions, and the internal mapping holds the completion lock as well across moving CQ ring state. To prevent races between mmap and ring resizing, add a mutex that's solely used to serialize ring resize and mmap. mmap_sem can't be used here, as as fork'ed process may be doing mmaps on the ring as well. The ctx->resize_lock is held across mmap operations, and the resize will grab it before swapping out the already mapped new data. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 86cb385fe0b5..60b9c98595fa 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -615,6 +615,11 @@ enum io_uring_register_op { /* send MSG_RING without having a ring */ IORING_REGISTER_SEND_MSG_RING = 31, + /* 32 reserved for zc rx */ + + /* resize CQ ring */ + IORING_REGISTER_RESIZE_RINGS = 33, + /* this goes last */ IORING_REGISTER_LAST, -- cgit v1.2.3 From aa00f67adc2c0d6439f81b5a81ff181377c47a7e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 22 Oct 2024 13:47:00 -0600 Subject: io_uring: add support for fixed wait regions Generally applications have 1 or a few waits of waiting, yet they pass in a struct io_uring_getevents_arg every time. This needs to get copied and, in turn, the timeout value needs to get copied. Rather than do this for every invocation, allow the application to register a fixed set of wait regions that can simply be indexed when asking the kernel to wait on events. At ring setup time, the application can register a number of these wait regions and initialize region/index 0 upfront: struct io_uring_reg_wait *reg; reg = io_uring_setup_reg_wait(ring, nr_regions, &ret); /* set timeout and mark as set, sigmask/sigmask_sz as needed */ reg->ts.tv_sec = 0; reg->ts.tv_nsec = 100000; reg->flags = IORING_REG_WAIT_TS; where nr_regions >= 1 && nr_regions <= PAGE_SIZE / sizeof(*reg). The above initializes index 0, but 63 other regions can be initialized, if needed. Now, instead of doing: struct __kernel_timespec timeout = { .tv_nsec = 100000, }; io_uring_submit_and_wait_timeout(ring, &cqe, nr, &t, NULL); to wait for events for each submit_and_wait, or just wait, operation, it can just reference the above region at offset 0 and do: io_uring_submit_and_wait_reg(ring, &cqe, nr, 0); to achieve the same goal of waiting 100usec without needing to copy both struct io_uring_getevents_arg (24b) and struct __kernel_timeout (16b) for each invocation. Struct io_uring_reg_wait looks as follows: struct io_uring_reg_wait { struct __kernel_timespec ts; __u32 min_wait_usec; __u32 flags; __u64 sigmask; __u32 sigmask_sz; __u32 pad[3]; __u64 pad2[2]; }; embedding the timeout itself in the region, rather than passing it as a pointer as well. Note that the signal mask is still passed as a pointer, both for compatability reasons, but also because there doesn't seem to be a lot of high frequency waits scenarios that involve setting and resetting the signal mask for each wait. The application is free to modify any region before a wait call, or it can use keep multiple regions with different settings to avoid needing to modify the same one for wait calls. Up to a page size of regions is mapped by default, allowing PAGE_SIZE / 64 available regions for use. The registered region must fit within a page. On a 4kb page size system, that allows for 64 wait regions if a full page is used, as the size of struct io_uring_reg_wait is 64b. The region registered must be aligned to io_uring_reg_wait in size. It's valid to register less than 64 entries. In network performance testing with zero-copy, this reduced the time spent waiting on the TX side from 3.12% to 0.3% and the RX side from 4.4% to 0.3%. Wait regions are fixed for the lifetime of the ring - once registered, they are persistent until the ring is torn down. The regions support minimum wait timeout as well as the regular waits. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 60b9c98595fa..65b7417c1b05 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -518,6 +518,7 @@ struct io_cqring_offsets { #define IORING_ENTER_EXT_ARG (1U << 3) #define IORING_ENTER_REGISTERED_RING (1U << 4) #define IORING_ENTER_ABS_TIMER (1U << 5) +#define IORING_ENTER_EXT_ARG_REG (1U << 6) /* * Passed in for io_uring_setup(2). Copied back with updated info on success @@ -620,6 +621,9 @@ enum io_uring_register_op { /* resize CQ ring */ IORING_REGISTER_RESIZE_RINGS = 33, + /* register fixed io_uring_reg_wait arguments */ + IORING_REGISTER_CQWAIT_REG = 34, + /* this goes last */ IORING_REGISTER_LAST, @@ -803,6 +807,43 @@ enum io_uring_register_restriction_op { IORING_RESTRICTION_LAST }; +enum { + IORING_REG_WAIT_TS = (1U << 0), +}; + +/* + * Argument for IORING_REGISTER_CQWAIT_REG, registering a region of + * struct io_uring_reg_wait that can be indexed when io_uring_enter(2) is + * called rather than pass in a wait argument structure separately. + */ +struct io_uring_cqwait_reg_arg { + __u32 flags; + __u32 struct_size; + __u32 nr_entries; + __u32 pad; + __u64 user_addr; + __u64 pad2[3]; +}; + +/* + * Argument for io_uring_enter(2) with + * IORING_GETEVENTS | IORING_ENTER_EXT_ARG_REG set, where the actual argument + * is an index into a previously registered fixed wait region described by + * the below structure. + */ +struct io_uring_reg_wait { + struct __kernel_timespec ts; + __u32 min_wait_usec; + __u32 flags; + __u64 sigmask; + __u32 sigmask_sz; + __u32 pad[3]; + __u64 pad2[2]; +}; + +/* + * Argument for io_uring_enter(2) with IORING_GETEVENTS | IORING_ENTER_EXT_ARG + */ struct io_uring_getevents_arg { __u64 sigmask; __u32 sigmask_sz; -- cgit v1.2.3 From a85f31052bce52111b4e9d5a536003481d0421d0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 27 Oct 2024 08:59:10 -0600 Subject: io_uring/nop: add support for testing registered files and buffers Useful for testing performance/efficiency impact of registered files and buffers, vs (particularly) non-registered files. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 65b7417c1b05..024745283783 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -416,6 +416,9 @@ enum io_uring_msg_ring_flags { * IORING_NOP_INJECT_RESULT Inject result from sqe->result */ #define IORING_NOP_INJECT_RESULT (1U << 0) +#define IORING_NOP_FILE (1U << 1) +#define IORING_NOP_FIXED_FILE (1U << 2) +#define IORING_NOP_FIXED_BUFFER (1U << 3) /* * IO completion data structure (Completion Queue Entry) -- cgit v1.2.3 From 94b2a2c0e7cba3f163609dbd94120ee533ad2a07 Mon Sep 17 00:00:00 2001 From: Andrzej Kacprowski Date: Thu, 17 Oct 2024 16:58:09 +0200 Subject: accel/ivpu: Remove copy engine support Copy engine was deprecated by the FW and is no longer supported. Compute engine includes all copy engine functionality and should be used instead. This change does not affect user space as the copy engine was never used outside of a couple of tests. Signed-off-by: Andrzej Kacprowski Reviewed-by: Jacek Lawrynowicz Reviewed-by: Jeffrey Hugo Signed-off-by: Jacek Lawrynowicz Link: https://patchwork.freedesktop.org/patch/msgid/20241017145817.121590-4-jacek.lawrynowicz@linux.intel.com --- include/uapi/drm/ivpu_accel.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h index 234664d35250..a35b97b097bf 100644 --- a/include/uapi/drm/ivpu_accel.h +++ b/include/uapi/drm/ivpu_accel.h @@ -258,7 +258,7 @@ struct drm_ivpu_bo_info { /* drm_ivpu_submit engines */ #define DRM_IVPU_ENGINE_COMPUTE 0 -#define DRM_IVPU_ENGINE_COPY 1 +#define DRM_IVPU_ENGINE_COPY 1 /* Deprecated */ /** * struct drm_ivpu_submit - Submit commands to the VPU @@ -289,10 +289,6 @@ struct drm_ivpu_submit { * %DRM_IVPU_ENGINE_COMPUTE: * * Performs Deep Learning Neural Compute Inference Operations - * - * %DRM_IVPU_ENGINE_COPY: - * - * Performs memory copy operations to/from system memory allocated for VPU */ __u32 engine; -- cgit v1.2.3 From 128d333f0dff2fbe41c546581c6f151e9d68cd4c Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Thu, 17 Oct 2024 10:31:53 -0700 Subject: f2fs: introduce device aliasing file F2FS should understand how the device aliasing file works and support deleting the file after use. A device aliasing file can be created by mkfs.f2fs tool and it can map the whole device with an extent, not using node blocks. The file space should be pinned and normally used for read-only usages. Signed-off-by: Daeho Jeong Signed-off-by: Chao Yu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/uapi/linux/f2fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/f2fs.h b/include/uapi/linux/f2fs.h index 955d440be104..f7aaf8d23e20 100644 --- a/include/uapi/linux/f2fs.h +++ b/include/uapi/linux/f2fs.h @@ -43,6 +43,7 @@ #define F2FS_IOC_DECOMPRESS_FILE _IO(F2FS_IOCTL_MAGIC, 23) #define F2FS_IOC_COMPRESS_FILE _IO(F2FS_IOCTL_MAGIC, 24) #define F2FS_IOC_START_ATOMIC_REPLACE _IO(F2FS_IOCTL_MAGIC, 25) +#define F2FS_IOC_GET_DEV_ALIAS_FILE _IOR(F2FS_IOCTL_MAGIC, 26, __u32) /* * should be same as XFS_IOC_GOINGDOWN. -- cgit v1.2.3 From b16e920a1909da6799c43000db730d8fcdcae907 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 28 Oct 2024 18:43:13 -0600 Subject: io_uring/rsrc: allow cloning at an offset Right now buffer cloning is an all-or-nothing kind of thing - either the whole table is cloned from a source to a destination ring, or nothing at all. However, it's not always desired to clone the whole thing. Allow for the application to specify a source and destination offset, and a number of buffers to clone. If the destination offset is non-zero, then allocate sparse nodes upfront. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 024745283783..cc8dbe78c126 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -719,7 +719,10 @@ enum { struct io_uring_clone_buffers { __u32 src_fd; __u32 flags; - __u32 pad[6]; + __u32 src_off; + __u32 dst_off; + __u32 nr; + __u32 pad[3]; }; struct io_uring_buf { -- cgit v1.2.3 From c1329532d5aabecf79788924941afb8a7b7c1024 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 29 Oct 2024 07:50:56 -0600 Subject: io_uring/rsrc: allow cloning with node replacements Currently cloning a buffer table will fail if the destination already has a table. But it should be possible to use it to replace existing elements. Add a IORING_REGISTER_DST_REPLACE cloning flag, which if set, will allow the destination to already having a buffer table. If that is the case, then entries designated by offset + nr buffers will be replaced if they already exist. Note that it's allowed to use IORING_REGISTER_DST_REPLACE and not have an existing table, in which case it'll work just like not having the flag set and an empty table - it'll just assign the newly created table for that case. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index cc8dbe78c126..ce58c4590de6 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -713,7 +713,8 @@ struct io_uring_clock_register { }; enum { - IORING_REGISTER_SRC_REGISTERED = 1, + IORING_REGISTER_SRC_REGISTERED = (1U << 0), + IORING_REGISTER_DST_REPLACE = (1U << 1), }; struct io_uring_clone_buffers { -- cgit v1.2.3 From 01ee194d1aba1202f0926d5047a2a4cf84d0e45d Mon Sep 17 00:00:00 2001 From: hexue Date: Fri, 1 Nov 2024 17:19:57 +0800 Subject: io_uring: add support for hybrid IOPOLL A new hybrid poll is implemented on the io_uring layer. Once an IO is issued, it will not poll immediately, but rather block first and re-run before IO complete, then poll to reap IO. While this poll method could be a suboptimal solution when running on a single thread, it offers performance lower than regular polling but higher than IRQ, and CPU utilization is also lower than polling. To use hybrid polling, the ring must be setup with both the IORING_SETUP_IOPOLL and IORING_SETUP_HYBRID)IOPOLL flags set. Hybrid polling has the same restrictions as IOPOLL, in that commands must explicitly support it. Signed-off-by: hexue Link: https://lore.kernel.org/r/20241101091957.564220-2-xue01.he@samsung.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index ce58c4590de6..47977a5c65f5 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -200,6 +200,9 @@ enum io_uring_sqe_flags_bit { */ #define IORING_SETUP_NO_SQARRAY (1U << 16) +/* Use hybrid poll in iopoll process */ +#define IORING_SETUP_HYBRID_IOPOLL (1U << 17) + enum io_uring_op { IORING_OP_NOP, IORING_OP_READV, -- cgit v1.2.3 From a1afb959add1fad43cb337448c244ed70bac3109 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 30 Oct 2024 09:11:56 +0100 Subject: dpll: add clock quality level attribute and op In order to allow driver expose quality level of the clock it is running, introduce a new netlink attr with enum to carry it to the userspace. Also, introduce an op the dpll netlink code calls into the driver to obtain the value. Signed-off-by: Jiri Pirko Link: https://patch.msgid.link/20241030081157.966604-2-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- include/uapi/linux/dpll.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/dpll.h b/include/uapi/linux/dpll.h index b0654ade7b7e..2b7ec2da4bcc 100644 --- a/include/uapi/linux/dpll.h +++ b/include/uapi/linux/dpll.h @@ -79,6 +79,29 @@ enum dpll_lock_status_error { DPLL_LOCK_STATUS_ERROR_MAX = (__DPLL_LOCK_STATUS_ERROR_MAX - 1) }; +/** + * enum dpll_clock_quality_level - level of quality of a clock device. This + * mainly applies when the dpll lock-status is DPLL_LOCK_STATUS_HOLDOVER. The + * current list is defined according to the table 11-7 contained in ITU-T + * G.8264/Y.1364 document. One may extend this list freely by other ITU-T + * defined clock qualities, or different ones defined by another + * standardization body (for those, please use different prefix). + */ +enum dpll_clock_quality_level { + DPLL_CLOCK_QUALITY_LEVEL_ITU_OPT1_PRC = 1, + DPLL_CLOCK_QUALITY_LEVEL_ITU_OPT1_SSU_A, + DPLL_CLOCK_QUALITY_LEVEL_ITU_OPT1_SSU_B, + DPLL_CLOCK_QUALITY_LEVEL_ITU_OPT1_EEC1, + DPLL_CLOCK_QUALITY_LEVEL_ITU_OPT1_PRTC, + DPLL_CLOCK_QUALITY_LEVEL_ITU_OPT1_EPRTC, + DPLL_CLOCK_QUALITY_LEVEL_ITU_OPT1_EEEC, + DPLL_CLOCK_QUALITY_LEVEL_ITU_OPT1_EPRC, + + /* private: */ + __DPLL_CLOCK_QUALITY_LEVEL_MAX, + DPLL_CLOCK_QUALITY_LEVEL_MAX = (__DPLL_CLOCK_QUALITY_LEVEL_MAX - 1) +}; + #define DPLL_TEMP_DIVIDER 1000 /** @@ -180,6 +203,7 @@ enum dpll_a { DPLL_A_TEMP, DPLL_A_TYPE, DPLL_A_LOCK_STATUS_ERROR, + DPLL_A_CLOCK_QUALITY_LEVEL, __DPLL_A_MAX, DPLL_A_MAX = (__DPLL_A_MAX - 1) -- cgit v1.2.3 From 43d3487035e9a86fad952de4240a518614240d43 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 29 Oct 2024 15:55:35 -0600 Subject: UAPI: ethtool: Use __struct_group() in struct ethtool_link_settings Use the `__struct_group()` helper to create a new tagged `struct ethtool_link_settings_hdr`. This structure groups together all the members of the flexible `struct ethtool_link_settings` except the flexible array. As a result, the array is effectively separated from the rest of the members without modifying the memory layout of the flexible structure. This new tagged struct will be used to fix problematic declarations of middle-flex-arrays in composite structs[1]. [1] https://git.kernel.org/linus/d88cabfd9abc Signed-off-by: Gustavo A. R. Silva Link: https://patch.msgid.link/9e9fb0bd72e5ba1e916acbb4995b1e358b86a689.1730238285.git.gustavoars@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool.h | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index c405ed63acfa..fc1f54b065f9 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -2511,21 +2511,24 @@ enum ethtool_reset_flags { * autonegotiation; 0 if unknown or not applicable. Read-only. */ struct ethtool_link_settings { - __u32 cmd; - __u32 speed; - __u8 duplex; - __u8 port; - __u8 phy_address; - __u8 autoneg; - __u8 mdio_support; - __u8 eth_tp_mdix; - __u8 eth_tp_mdix_ctrl; - __s8 link_mode_masks_nwords; - __u8 transceiver; - __u8 master_slave_cfg; - __u8 master_slave_state; - __u8 rate_matching; - __u32 reserved[7]; + /* New members MUST be added within the __struct_group() macro below. */ + __struct_group(ethtool_link_settings_hdr, hdr, /* no attrs */, + __u32 cmd; + __u32 speed; + __u8 duplex; + __u8 port; + __u8 phy_address; + __u8 autoneg; + __u8 mdio_support; + __u8 eth_tp_mdix; + __u8 eth_tp_mdix_ctrl; + __s8 link_mode_masks_nwords; + __u8 transceiver; + __u8 master_slave_cfg; + __u8 master_slave_state; + __u8 rate_matching; + __u32 reserved[7]; + ); __u32 link_mode_masks[]; /* layout of link_mode_masks fields: * __u32 map_supported[link_mode_masks_nwords]; -- cgit v1.2.3 From 9d2fe9cd02ca5f1e70a7eff0262fb3668a27db0c Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Fri, 1 Nov 2024 07:46:29 +0000 Subject: iio: Add channel type for attention Add a new channel type representing if the user's attention state to the the system. This usually means if the user is looking at the screen or not. Signed-off-by: Ricardo Ribalda Link: https://patch.msgid.link/20241101-hpd-v3-3-e9c80b7c7164@chromium.org Signed-off-by: Jonathan Cameron --- include/uapi/linux/iio/types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h index f2e0b2d50e6b..12886d4465e4 100644 --- a/include/uapi/linux/iio/types.h +++ b/include/uapi/linux/iio/types.h @@ -51,6 +51,7 @@ enum iio_chan_type { IIO_DELTA_VELOCITY, IIO_COLORTEMP, IIO_CHROMATICITY, + IIO_ATTENTION, }; enum iio_modifier { -- cgit v1.2.3 From 8b36f7c3c6618dc97697a6a20a13b29651f68ab6 Mon Sep 17 00:00:00 2001 From: Edward Srouji Date: Tue, 3 Sep 2024 14:37:52 +0300 Subject: RDMA/mlx5: Support OOO RX WQE consumption Support QP with out-of-order (OOO) capabilities enabled. This allows WRs on the receiver side of the QP to be consumed OOO, permitting the sender side to transmit messages without guaranteeing arrival order on the receiver side. When enabled, the completion ordering of WRs remains in-order, regardless of the Receive WRs consumption order. RDMA Read and RDMA Atomic operations on the responder side continue to be executed in-order, while the ordering of data placement for RDMA Write and Send operations is not guaranteed. Atomic operations larger than 8 bytes are currently not supported. Therefore, when this feature is enabled, the created QP restricts its atomic support to 8 bytes at most. In addition, when querying the device, a new flag is returned in response to indicate that the Kernel supports OOO QP. Signed-off-by: Edward Srouji Reviewed-by: Yishai Hadas Link: https://patch.msgid.link/06ac609a5f358c8fb0a090d22c61a2f9329d82e6.1725362773.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/uapi/rdma/mlx5-abi.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index d4f6a36dffb0..8a6ad6c6841c 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -252,6 +252,7 @@ enum mlx5_ib_query_dev_resp_flags { MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD = 1 << 1, MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE = 1 << 2, MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT = 1 << 3, + MLX5_IB_QUERY_DEV_RESP_FLAGS_OOO_DP = 1 << 4, }; enum mlx5_ib_tunnel_offloads { @@ -439,6 +440,10 @@ struct mlx5_ib_burst_info { __u16 reserved; }; +enum mlx5_ib_modify_qp_mask { + MLX5_IB_MODIFY_QP_OOO_DP = 1 << 0, +}; + struct mlx5_ib_modify_qp { __u32 comp_mask; struct mlx5_ib_burst_info burst_info; -- cgit v1.2.3 From 7566752e4d7d7fc0186531aa800068a7243f95c1 Mon Sep 17 00:00:00 2001 From: Chiara Meiohas Date: Thu, 31 Oct 2024 11:31:14 +0200 Subject: RDMA/nldev: Add IB device and net device rename events Implement event sending for IB device rename and IB device port associated netdevice rename. In iproute2, rdma monitor displays the IB device name, port and the netdevice name when displaying event info. Since users can modiy these names, we track and notify on renaming events. Note: In order to receive netdevice rename events, drivers must use the ib_device_set_netdev() API when attaching net devices to IB devices. $ rdma monitor $ rmmod mlx5_ib [UNREGISTER] dev 1 rocep8s0f1 [UNREGISTER] dev 0 rocep8s0f0 $ modprobe mlx5_ib [REGISTER] dev 2 mlx5_0 [NETDEV_ATTACH] dev 2 mlx5_0 port 1 netdev 4 eth2 [REGISTER] dev 3 mlx5_1 [NETDEV_ATTACH] dev 3 mlx5_1 port 1 netdev 5 eth3 [RENAME] dev 2 rocep8s0f0 [RENAME] dev 3 rocep8s0f1 $ devlink dev eswitch set pci/0000:08:00.0 mode switchdev [UNREGISTER] dev 2 rocep8s0f0 [REGISTER] dev 4 mlx5_0 [NETDEV_ATTACH] dev 4 mlx5_0 port 30 netdev 4 eth2 [RENAME] dev 4 rdmap8s0f0 $ echo 4 > /sys/class/net/eth2/device/sriov_numvfs [NETDEV_ATTACH] dev 4 rdmap8s0f0 port 2 netdev 7 eth4 [NETDEV_ATTACH] dev 4 rdmap8s0f0 port 3 netdev 8 eth5 [NETDEV_ATTACH] dev 4 rdmap8s0f0 port 4 netdev 9 eth6 [NETDEV_ATTACH] dev 4 rdmap8s0f0 port 5 netdev 10 eth7 [REGISTER] dev 5 mlx5_0 [NETDEV_ATTACH] dev 5 mlx5_0 port 1 netdev 11 eth8 [REGISTER] dev 6 mlx5_1 [NETDEV_ATTACH] dev 6 mlx5_1 port 1 netdev 12 eth9 [RENAME] dev 5 rocep8s0f0v0 [RENAME] dev 6 rocep8s0f0v1 [REGISTER] dev 7 mlx5_0 [NETDEV_ATTACH] dev 7 mlx5_0 port 1 netdev 13 eth10 [RENAME] dev 7 rocep8s0f0v2 [REGISTER] dev 8 mlx5_0 [NETDEV_ATTACH] dev 8 mlx5_0 port 1 netdev 14 eth11 [RENAME] dev 8 rocep8s0f0v3 $ ip link set eth2 name myeth2 [NETDEV_RENAME] netdev 4 myeth2 $ ip link set eth1 name myeth1 ** no events received, because eth1 is not attached to an IB device ** Signed-off-by: Chiara Meiohas Link: https://patch.msgid.link/093c978ef2766fd3ab4ff8798eeb68f2f11582f6.1730367038.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/uapi/rdma/rdma_netlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 39be09c0ffbb..9f9cf20c1cd8 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -638,6 +638,8 @@ enum rdma_nl_notify_event_type { RDMA_UNREGISTER_EVENT, RDMA_NETDEV_ATTACH_EVENT, RDMA_NETDEV_DETACH_EVENT, + RDMA_RENAME_EVENT, + RDMA_NETDEV_RENAME_EVENT, }; #endif /* _UAPI_RDMA_NETLINK_H */ -- cgit v1.2.3 From 690e50dd69ee48e43e0f7c42396487da1b81be14 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 3 Nov 2024 08:53:14 -0800 Subject: tools: ynl-gen: de-kdocify enums with no doc for entries Sometimes the names of the enum entries are self-explanatory or come from standards. Forcing authors to write trivial kdoc for each of such entries seems unreasonable, but kdoc would complain about undocumented entries. Detect enums which only have documentation for the entire type and no documentation for entries. Render their doc as a plain comment. Link: https://patch.msgid.link/20241103165314.1631237-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/dpll.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/dpll.h b/include/uapi/linux/dpll.h index 2b7ec2da4bcc..bf97d4b6d51f 100644 --- a/include/uapi/linux/dpll.h +++ b/include/uapi/linux/dpll.h @@ -79,13 +79,13 @@ enum dpll_lock_status_error { DPLL_LOCK_STATUS_ERROR_MAX = (__DPLL_LOCK_STATUS_ERROR_MAX - 1) }; -/** - * enum dpll_clock_quality_level - level of quality of a clock device. This - * mainly applies when the dpll lock-status is DPLL_LOCK_STATUS_HOLDOVER. The - * current list is defined according to the table 11-7 contained in ITU-T - * G.8264/Y.1364 document. One may extend this list freely by other ITU-T - * defined clock qualities, or different ones defined by another - * standardization body (for those, please use different prefix). +/* + * level of quality of a clock device. This mainly applies when the dpll + * lock-status is DPLL_LOCK_STATUS_HOLDOVER. The current list is defined + * according to the table 11-7 contained in ITU-T G.8264/Y.1364 document. One + * may extend this list freely by other ITU-T defined clock qualities, or + * different ones defined by another standardization body (for those, please + * use different prefix). */ enum dpll_clock_quality_level { DPLL_CLOCK_QUALITY_LEVEL_ITU_OPT1_PRC = 1, -- cgit v1.2.3 From 35890f85573c2ebbbf3491dc66f7ee2ad63055af Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 30 Oct 2024 21:20:45 -0300 Subject: vfio: Remove VFIO_TYPE1_NESTING_IOMMU This control causes the ARM SMMU drivers to choose a stage 2 implementation for the IO pagetable (vs the stage 1 usual default), however this choice has no significant visible impact to the VFIO user. Further qemu never implemented this and no other userspace user is known. The original description in commit f5c9ecebaf2a ("vfio/iommu_type1: add new VFIO_TYPE1_NESTING_IOMMU IOMMU type") suggested this was to "provide SMMU translation services to the guest operating system" however the rest of the API to set the guest table pointer for the stage 1 and manage invalidation was never completed, or at least never upstreamed, rendering this part useless dead code. Upstream has now settled on iommufd as the uAPI for controlling nested translation. Choosing the stage 2 implementation should be done by through the IOMMU_HWPT_ALLOC_NEST_PARENT flag during domain allocation. Remove VFIO_TYPE1_NESTING_IOMMU and everything under it including the enable_nesting iommu_domain_op. Just in-case there is some userspace using this continue to treat requesting it as a NOP, but do not advertise support any more. Acked-by: Alex Williamson Reviewed-by: Mostafa Saleh Reviewed-by: Kevin Tian Reviewed-by: Jerry Snitselaar Reviewed-by: Donald Dutile Tested-by: Nicolin Chen Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/1-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com Signed-off-by: Will Deacon --- include/uapi/linux/vfio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 2b68e6cdf190..c8dbf8219c4f 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -35,7 +35,7 @@ #define VFIO_EEH 5 /* Two-stage IOMMU */ -#define VFIO_TYPE1_NESTING_IOMMU 6 /* Implies v2 */ +#define __VFIO_RESERVED_TYPE1_NESTING_IOMMU 6 /* Implies v2 */ #define VFIO_SPAPR_TCE_v2_IOMMU 7 -- cgit v1.2.3 From 6912ec91828b8d7f21b393befad1c36dadbcd751 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 30 Oct 2024 21:20:49 -0300 Subject: iommu/arm-smmu-v3: Support IOMMU_GET_HW_INFO via struct arm_smmu_hw_info For virtualization cases the IDR/IIDR/AIDR values of the actual SMMU instance need to be available to the VMM so it can construct an appropriate vSMMUv3 that reflects the correct HW capabilities. For userspace page tables these values are required to constrain the valid values within the CD table and the IOPTEs. The kernel does not sanitize these values. If building a VMM then userspace is required to only forward bits into a VM that it knows it can implement. Some bits will also require a VMM to detect if appropriate kernel support is available such as for ATS and BTM. Start a new file and kconfig for the advanced iommufd support. This lets it be compiled out for kernels that are not intended to support virtualization, and allows distros to leave it disabled until they are shipping a matching qemu too. Tested-by: Nicolin Chen Signed-off-by: Nicolin Chen Reviewed-by: Kevin Tian Reviewed-by: Jerry Snitselaar Reviewed-by: Donald Dutile Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/5-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com Signed-off-by: Will Deacon --- include/uapi/linux/iommufd.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 72010f71c5e4..b5c94fecb94c 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -484,15 +484,50 @@ struct iommu_hw_info_vtd { __aligned_u64 ecap_reg; }; +/** + * struct iommu_hw_info_arm_smmuv3 - ARM SMMUv3 hardware information + * (IOMMU_HW_INFO_TYPE_ARM_SMMUV3) + * + * @flags: Must be set to 0 + * @__reserved: Must be 0 + * @idr: Implemented features for ARM SMMU Non-secure programming interface + * @iidr: Information about the implementation and implementer of ARM SMMU, + * and architecture version supported + * @aidr: ARM SMMU architecture version + * + * For the details of @idr, @iidr and @aidr, please refer to the chapters + * from 6.3.1 to 6.3.6 in the SMMUv3 Spec. + * + * User space should read the underlying ARM SMMUv3 hardware information for + * the list of supported features. + * + * Note that these values reflect the raw HW capability, without any insight if + * any required kernel driver support is present. Bits may be set indicating the + * HW has functionality that is lacking kernel software support, such as BTM. If + * a VMM is using this information to construct emulated copies of these + * registers it should only forward bits that it knows it can support. + * + * In future, presence of required kernel support will be indicated in flags. + */ +struct iommu_hw_info_arm_smmuv3 { + __u32 flags; + __u32 __reserved; + __u32 idr[6]; + __u32 iidr; + __u32 aidr; +}; + /** * enum iommu_hw_info_type - IOMMU Hardware Info Types * @IOMMU_HW_INFO_TYPE_NONE: Used by the drivers that do not report hardware * info * @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type + * @IOMMU_HW_INFO_TYPE_ARM_SMMUV3: ARM SMMUv3 iommu info type */ enum iommu_hw_info_type { IOMMU_HW_INFO_TYPE_NONE = 0, IOMMU_HW_INFO_TYPE_INTEL_VTD = 1, + IOMMU_HW_INFO_TYPE_ARM_SMMUV3 = 2, }; /** -- cgit v1.2.3 From 5205b63099507a84458075c3ca7e648407e6c8cc Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Mon, 4 Nov 2024 11:27:57 +0800 Subject: media: uapi: Add MEDIA_BUS_FMT_RGB101010_1X7X5_{SPWG, JEIDA} Add two media bus formats that identify 30-bit RGB pixels transmitted by a LVDS link with five differential data pairs, serialized into 7 time slots, using standard SPWG/VESA or JEIDA data mapping. Signed-off-by: Liu Ying Acked-by: Sakari Ailus Link: https://patchwork.freedesktop.org/patch/msgid/20241104032806.611890-5-victor.liu@nxp.com Signed-off-by: Dmitry Baryshkov --- include/uapi/linux/media-bus-format.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/media-bus-format.h b/include/uapi/linux/media-bus-format.h index d4c1d991014b..ff62056feed5 100644 --- a/include/uapi/linux/media-bus-format.h +++ b/include/uapi/linux/media-bus-format.h @@ -34,7 +34,7 @@ #define MEDIA_BUS_FMT_FIXED 0x0001 -/* RGB - next is 0x1026 */ +/* RGB - next is 0x1028 */ #define MEDIA_BUS_FMT_RGB444_1X12 0x1016 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_BE 0x1001 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_LE 0x1002 @@ -68,6 +68,8 @@ #define MEDIA_BUS_FMT_ARGB8888_1X32 0x100d #define MEDIA_BUS_FMT_RGB888_1X32_PADHI 0x100f #define MEDIA_BUS_FMT_RGB101010_1X30 0x1018 +#define MEDIA_BUS_FMT_RGB101010_1X7X5_SPWG 0x1026 +#define MEDIA_BUS_FMT_RGB101010_1X7X5_JEIDA 0x1027 #define MEDIA_BUS_FMT_RGB666_1X36_CPADLO 0x1020 #define MEDIA_BUS_FMT_RGB888_1X36_CPADLO 0x1021 #define MEDIA_BUS_FMT_RGB121212_1X36 0x1019 -- cgit v1.2.3 From 18d92bb57c39504d9da11c6ef604f58eb1d5a117 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 22 Oct 2024 18:59:08 +0300 Subject: perf/core: Add aux_pause, aux_resume, aux_start_paused Hardware traces, such as instruction traces, can produce a vast amount of trace data, so being able to reduce tracing to more specific circumstances can be useful. The ability to pause or resume tracing when another event happens, can do that. Add ability for an event to "pause" or "resume" AUX area tracing. Add aux_pause bit to perf_event_attr to indicate that, if the event happens, the associated AUX area tracing should be paused. Ditto aux_resume. Do not allow aux_pause and aux_resume to be set together. Add aux_start_paused bit to perf_event_attr to indicate to an AUX area event that it should start in a "paused" state. Add aux_paused to struct hw_perf_event for AUX area events to keep track of the "paused" state. aux_paused is initialized to aux_start_paused. Add PERF_EF_PAUSE and PERF_EF_RESUME modes for ->stop() and ->start() callbacks. Call as needed, during __perf_event_output(). Add aux_in_pause_resume to struct perf_buffer to prevent races with the NMI handler. Pause/resume in NMI context will miss out if it coincides with another pause/resume. To use aux_pause or aux_resume, an event must be in a group with the AUX area event as the group leader. Example (requires Intel PT and tools patches also): $ perf record --kcore -e intel_pt/aux-action=start-paused/k,syscalls:sys_enter_newuname/aux-action=resume/,syscalls:sys_exit_newuname/aux-action=pause/ uname Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.043 MB perf.data ] $ perf script --call-trace uname 30805 [000] 24001.058782799: name: 0x7ffc9c1865b0 uname 30805 [000] 24001.058784424: psb offs: 0 uname 30805 [000] 24001.058784424: cbr: 39 freq: 3904 MHz (139%) uname 30805 [000] 24001.058784629: ([kernel.kallsyms]) debug_smp_processor_id uname 30805 [000] 24001.058784629: ([kernel.kallsyms]) __x64_sys_newuname uname 30805 [000] 24001.058784629: ([kernel.kallsyms]) down_read uname 30805 [000] 24001.058784629: ([kernel.kallsyms]) __cond_resched uname 30805 [000] 24001.058784629: ([kernel.kallsyms]) preempt_count_add uname 30805 [000] 24001.058784629: ([kernel.kallsyms]) in_lock_functions uname 30805 [000] 24001.058784629: ([kernel.kallsyms]) preempt_count_sub uname 30805 [000] 24001.058784629: ([kernel.kallsyms]) up_read uname 30805 [000] 24001.058784629: ([kernel.kallsyms]) preempt_count_add uname 30805 [000] 24001.058784838: ([kernel.kallsyms]) in_lock_functions uname 30805 [000] 24001.058784838: ([kernel.kallsyms]) preempt_count_sub uname 30805 [000] 24001.058784838: ([kernel.kallsyms]) _copy_to_user uname 30805 [000] 24001.058784838: ([kernel.kallsyms]) syscall_exit_to_user_mode uname 30805 [000] 24001.058784838: ([kernel.kallsyms]) syscall_exit_work uname 30805 [000] 24001.058784838: ([kernel.kallsyms]) perf_syscall_exit uname 30805 [000] 24001.058784838: ([kernel.kallsyms]) debug_smp_processor_id uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) perf_trace_buf_alloc uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) perf_swevent_get_recursion_context uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) debug_smp_processor_id uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) debug_smp_processor_id uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) perf_tp_event uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) perf_trace_buf_update uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) tracing_gen_ctx_irq_test uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) perf_swevent_event uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) __perf_event_account_interrupt uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) __this_cpu_preempt_check uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) perf_event_output_forward uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) perf_event_aux_pause uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) ring_buffer_get uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) __rcu_read_lock uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) __rcu_read_unlock uname 30805 [000] 24001.058785254: ([kernel.kallsyms]) pt_event_stop uname 30805 [000] 24001.058785254: ([kernel.kallsyms]) debug_smp_processor_id uname 30805 [000] 24001.058785254: ([kernel.kallsyms]) debug_smp_processor_id uname 30805 [000] 24001.058785254: ([kernel.kallsyms]) native_write_msr uname 30805 [000] 24001.058785463: ([kernel.kallsyms]) native_write_msr uname 30805 [000] 24001.058785639: 0x0 Signed-off-by: Adrian Hunter Signed-off-by: Peter Zijlstra (Intel) Acked-by: James Clark Link: https://lkml.kernel.org/r/20241022155920.17511-3-adrian.hunter@intel.com --- include/uapi/linux/perf_event.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 4842c36fdf80..0524d541d4e3 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -511,7 +511,16 @@ struct perf_event_attr { __u16 sample_max_stack; __u16 __reserved_2; __u32 aux_sample_size; - __u32 __reserved_3; + + union { + __u32 aux_action; + struct { + __u32 aux_start_paused : 1, /* start AUX area tracing paused */ + aux_pause : 1, /* on overflow, pause AUX area tracing */ + aux_resume : 1, /* on overflow, resume AUX area tracing */ + __reserved_3 : 29; + }; + }; /* * User provided data if sigtrap=1, passed back to user via -- cgit v1.2.3 From 84bfbfbbd32aee136afea4b6bf82581dce79c305 Mon Sep 17 00:00:00 2001 From: Maurice Lambert Date: Sun, 3 Nov 2024 23:39:50 +0100 Subject: netlink: typographical error in nlmsg_type constants definition This commit fix a typographical error in netlink nlmsg_type constants definition in the include/uapi/linux/rtnetlink.h at line 177. The definition is RTM_NEWNVLAN RTM_NEWVLAN instead of RTM_NEWVLAN RTM_NEWVLAN. Signed-off-by: Maurice Lambert Fixes: 8dcea187088b ("net: bridge: vlan: add rtm definitions and dump support") Link: https://patch.msgid.link/20241103223950.230300-1-mauricelambert434@gmail.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/rtnetlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 3b687d20c9ed..db7254d52d93 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -174,7 +174,7 @@ enum { #define RTM_GETLINKPROP RTM_GETLINKPROP RTM_NEWVLAN = 112, -#define RTM_NEWNVLAN RTM_NEWVLAN +#define RTM_NEWVLAN RTM_NEWVLAN RTM_DELVLAN, #define RTM_DELVLAN RTM_DELVLAN RTM_GETVLAN, -- cgit v1.2.3 From 6140be90ec70c39fa844741ca3cc807dd0866394 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20G=C3=B6ttsche?= Date: Fri, 26 Apr 2024 18:20:14 +0200 Subject: fs/xattr: add *at family syscalls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the four syscalls setxattrat(), getxattrat(), listxattrat() and removexattrat(). Those can be used to operate on extended attributes, especially security related ones, either relative to a pinned directory or on a file descriptor without read access, avoiding a /proc//fd/ detour, requiring a mounted procfs. One use case will be setfiles(8) setting SELinux file contexts ("security.selinux") without race conditions and without a file descriptor opened with read access requiring SELinux read permission. Use the do_{name}at() pattern from fs/open.c. Pass the value of the extended attribute, its length, and for setxattrat(2) the command (XATTR_CREATE or XATTR_REPLACE) via an added struct xattr_args to not exceed six syscall arguments and not merging the AT_* and XATTR_* flags. [AV: fixes by Christian Brauner folded in, the entire thing rebased on top of {filename,file}_...xattr() primitives, treatment of empty pathnames regularized. As the result, AT_EMPTY_PATH+NULL handling is cheap, so f...(2) can use it] Signed-off-by: Christian Göttsche Link: https://lore.kernel.org/r/20240426162042.191916-1-cgoettsche@seltendoof.de Reviewed-by: Arnd Bergmann Reviewed-by: Christian Brauner CC: x86@kernel.org CC: linux-alpha@vger.kernel.org CC: linux-kernel@vger.kernel.org CC: linux-arm-kernel@lists.infradead.org CC: linux-ia64@vger.kernel.org CC: linux-m68k@lists.linux-m68k.org CC: linux-mips@vger.kernel.org CC: linux-parisc@vger.kernel.org CC: linuxppc-dev@lists.ozlabs.org CC: linux-s390@vger.kernel.org CC: linux-sh@vger.kernel.org CC: sparclinux@vger.kernel.org CC: linux-fsdevel@vger.kernel.org CC: audit@vger.kernel.org CC: linux-arch@vger.kernel.org CC: linux-api@vger.kernel.org CC: linux-security-module@vger.kernel.org CC: selinux@vger.kernel.org [brauner: slight tweaks] Signed-off-by: Christian Brauner Signed-off-by: Al Viro --- include/uapi/asm-generic/unistd.h | 11 ++++++++++- include/uapi/linux/xattr.h | 7 +++++++ 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 5bf6148cac2b..88dc393c2bca 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -841,8 +841,17 @@ __SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules) #define __NR_mseal 462 __SYSCALL(__NR_mseal, sys_mseal) +#define __NR_setxattrat 463 +__SYSCALL(__NR_setxattrat, sys_setxattrat) +#define __NR_getxattrat 464 +__SYSCALL(__NR_getxattrat, sys_getxattrat) +#define __NR_listxattrat 465 +__SYSCALL(__NR_listxattrat, sys_listxattrat) +#define __NR_removexattrat 466 +__SYSCALL(__NR_removexattrat, sys_removexattrat) + #undef __NR_syscalls -#define __NR_syscalls 463 +#define __NR_syscalls 467 /* * 32 bit systems traditionally used different diff --git a/include/uapi/linux/xattr.h b/include/uapi/linux/xattr.h index 9463db2dfa9d..9854f9cff3c6 100644 --- a/include/uapi/linux/xattr.h +++ b/include/uapi/linux/xattr.h @@ -11,6 +11,7 @@ */ #include +#include #ifndef _UAPI_LINUX_XATTR_H #define _UAPI_LINUX_XATTR_H @@ -20,6 +21,12 @@ #define XATTR_CREATE 0x1 /* set value, fail if attr already exists */ #define XATTR_REPLACE 0x2 /* set value, fail if attr does not exist */ + +struct xattr_args { + __aligned_u64 __user value; + __u32 size; + __u32 flags; +}; #endif /* Namespaces */ -- cgit v1.2.3 From 6bf90bd8c58a305994948eb3409d91a7d8f2edae Mon Sep 17 00:00:00 2001 From: Olivier Langlois Date: Sun, 13 Oct 2024 14:29:24 -0400 Subject: io_uring/napi: add static napi tracking strategy Add the static napi tracking strategy. That allows the user to manually manage the napi ids list for busy polling, and eliminate the overhead of dynamically updating the list from the fast path. Signed-off-by: Olivier Langlois Link: https://lore.kernel.org/r/96943de14968c35a5c599352259ad98f3c0770ba.1728828877.git.olivier@trillion01.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 47977a5c65f5..5d08435b95a8 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -790,12 +790,40 @@ struct io_uring_buf_status { __u32 resv[8]; }; +enum io_uring_napi_op { + /* register/ungister backward compatible opcode */ + IO_URING_NAPI_REGISTER_OP = 0, + + /* opcodes to update napi_list when static tracking is used */ + IO_URING_NAPI_STATIC_ADD_ID = 1, + IO_URING_NAPI_STATIC_DEL_ID = 2 +}; + +enum io_uring_napi_tracking_strategy { + /* value must be 0 for backward compatibility */ + IO_URING_NAPI_TRACKING_DYNAMIC = 0, + IO_URING_NAPI_TRACKING_STATIC = 1, + IO_URING_NAPI_TRACKING_INACTIVE = 255 +}; + /* argument for IORING_(UN)REGISTER_NAPI */ struct io_uring_napi { __u32 busy_poll_to; __u8 prefer_busy_poll; - __u8 pad[3]; - __u64 resv; + + /* a io_uring_napi_op value */ + __u8 opcode; + __u8 pad[2]; + + /* + * for IO_URING_NAPI_REGISTER_OP, it is a + * io_uring_napi_tracking_strategy value. + * + * for IO_URING_NAPI_STATIC_ADD_ID/IO_URING_NAPI_STATIC_DEL_ID + * it is the napi id to add/del from napi_list. + */ + __u32 op_param; + __u32 resv; }; /* -- cgit v1.2.3 From 647da5f709f112319c0d51e06f330d8afecb1940 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 5 Nov 2024 09:14:48 +0100 Subject: posix-timers: Move sequence logic into struct k_itimer The posix timer signal handling uses siginfo::si_sys_private for handling the sequence counter check. That indirection is not longer required and the sequence count value at signal queueing time can be stored in struct k_itimer itself. This removes the requirement of treating siginfo::si_sys_private special as it's now always zero as the kernel does not touch it anymore. Suggested-by: Eric W. Biederman Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Acked-by: "Eric W. Biederman" Link: https://lore.kernel.org/all/20241105064213.852619866@linutronix.de --- include/uapi/asm-generic/siginfo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h index b7bc545ec3b2..5a1ca43b5fc6 100644 --- a/include/uapi/asm-generic/siginfo.h +++ b/include/uapi/asm-generic/siginfo.h @@ -46,7 +46,7 @@ union __sifields { __kernel_timer_t _tid; /* timer id */ int _overrun; /* overrun count */ sigval_t _sigval; /* same as below */ - int _sys_private; /* not to be passed to user */ + int _sys_private; /* Not used by the kernel. Historic leftover. Always 0. */ } _timer; /* POSIX.1b signals */ -- cgit v1.2.3 From 9907cda95fcbf44141b1292faab89cf8ec542f22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juraj=20=C5=A0arinay?= Date: Sun, 3 Nov 2024 13:45:25 +0100 Subject: net: nfc: Propagate ISO14443 type A target ATS to userspace via netlink MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a 20-byte field ats to struct nfc_target and expose it as NFC_ATTR_TARGET_ATS via the netlink interface. The payload contains 'historical bytes' that help to distinguish cards from one another. The information is commonly used to assemble an emulated ATR similar to that reported by smart cards with contacts. Add a 20-byte field target_ats to struct nci_dev to hold the payload obtained in nci_rf_intf_activated_ntf_packet() and copy it to over to nfc_target.ats in nci_activate_target(). The approach is similar to the handling of 'general bytes' within ATR_RES. Replace the hard-coded size of rats_res within struct activation_params_nfca_poll_iso_dep by the equal constant NFC_ATS_MAXSIZE now defined in nfc.h Within NCI, the information corresponds to the 'RATS Response' activation parameter that omits the initial length byte TL. This loses no information and is consistent with our handling of SENSB_RES that also drops the first (constant) byte. Tested with nxp_nci_i2c on a few type A targets including an ICAO 9303 compliant passport. I refrain from the corresponding change to digital_in_recv_ats() to have the few drivers based on digital.h fill nfc_target.ats, as I have no way to test it. That class of drivers appear not to set NFC_ATTR_TARGET_SENSB_RES either. Consider a separate patch to propagate (all) the parameters. Signed-off-by: Juraj Šarinay Link: https://patch.msgid.link/20241103124525.8392-1-juraj@sarinay.com Signed-off-by: Paolo Abeni --- include/uapi/linux/nfc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index 4fa4e979e948..2f5b4be25261 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -164,6 +164,7 @@ enum nfc_commands { * @NFC_ATTR_VENDOR_SUBCMD: Vendor specific sub command * @NFC_ATTR_VENDOR_DATA: Vendor specific data, to be optionally passed * to a vendor specific command implementation + * @NFC_ATTR_TARGET_ATS: ISO 14443 type A target Answer To Select */ enum nfc_attrs { NFC_ATTR_UNSPEC, @@ -198,6 +199,7 @@ enum nfc_attrs { NFC_ATTR_VENDOR_ID, NFC_ATTR_VENDOR_SUBCMD, NFC_ATTR_VENDOR_DATA, + NFC_ATTR_TARGET_ATS, /* private: internal use only */ __NFC_ATTR_AFTER_LAST }; @@ -225,6 +227,7 @@ enum nfc_sdp_attr { #define NFC_GB_MAXSIZE 48 #define NFC_FIRMWARE_NAME_MAXSIZE 32 #define NFC_ISO15693_UID_MAXSIZE 8 +#define NFC_ATS_MAXSIZE 20 /* NFC protocols */ #define NFC_PROTO_JEWEL 1 -- cgit v1.2.3 From b855f02427e995cbc905e134cc3a7f4e503c0455 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 1 Nov 2024 10:12:03 +0100 Subject: media: replace obsolete hans.verkuil@cisco.com alias The old hans.verkuil@cisco.com email address was discontinued years ago. Replace it with the correct hansverk@cisco.com email. Signed-off-by: Hans Verkuil --- include/uapi/linux/v4l2-dv-timings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/v4l2-dv-timings.h b/include/uapi/linux/v4l2-dv-timings.h index ef0128c7369c..44a16e0e5a12 100644 --- a/include/uapi/linux/v4l2-dv-timings.h +++ b/include/uapi/linux/v4l2-dv-timings.h @@ -2,7 +2,7 @@ /* * V4L2 DV timings header. * - * Copyright (C) 2012-2016 Hans Verkuil + * Copyright (C) 2012-2016 Hans Verkuil */ #ifndef _V4L2_DV_TIMINGS_H -- cgit v1.2.3 From 580db513b4a9d52f306580015a1872eea0a0894e Mon Sep 17 00:00:00 2001 From: Khang Nguyen Date: Tue, 5 Nov 2024 14:19:15 +0700 Subject: net: mctp: Expose transport binding identifier via IFLA attribute MCTP control protocol implementations are transport binding dependent. Endpoint discovery is mandatory based on transport binding. Message timing requirements are specified in each respective transport binding specification. However, we currently have no means to get this information from MCTP links. Add a IFLA_MCTP_PHYS_BINDING netlink link attribute, which represents the transport type using the DMTF DSP0239-defined type numbers, returned as part of RTM_GETLINK data. We get an IFLA_MCTP_PHYS_BINDING attribute for each MCTP link, for example: - 0x00 (unspec) for loopback interface; - 0x01 (SMBus/I2C) for mctpi2c%d interfaces; and - 0x05 (serial) for mctpserial%d interfaces. Signed-off-by: Khang Nguyen Reviewed-by: Matt Johnston Link: https://patch.msgid.link/20241105071915.821871-1-khangng@os.amperecomputing.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 8516c1ccd57a..2575e0cd9b48 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1958,6 +1958,7 @@ struct ifla_rmnet_flags { enum { IFLA_MCTP_UNSPEC, IFLA_MCTP_NET, + IFLA_MCTP_PHYS_BINDING, __IFLA_MCTP_MAX, }; -- cgit v1.2.3 From 662df3e5c37666d6ed75c88098699e070a4b35b5 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 28 Oct 2024 14:13:29 +0000 Subject: mm: madvise: implement lightweight guard page mechanism Implement a new lightweight guard page feature, that is regions of userland virtual memory that, when accessed, cause a fatal signal to arise. Currently users must establish PROT_NONE ranges to achieve this. However this is very costly memory-wise - we need a VMA for each and every one of these regions AND they become unmergeable with surrounding VMAs. In addition repeated mmap() calls require repeated kernel context switches and contention of the mmap lock to install these ranges, potentially also having to unmap memory if installed over existing ranges. The lightweight guard approach eliminates the VMA cost altogether - rather than establishing a PROT_NONE VMA, it operates at the level of page table entries - establishing PTE markers such that accesses to them cause a fault followed by a SIGSGEV signal being raised. This is achieved through the PTE marker mechanism, which we have already extended to provide PTE_MARKER_GUARD, which we installed via the generic page walking logic which we have extended for this purpose. These guard ranges are established with MADV_GUARD_INSTALL. If the range in which they are installed contain any existing mappings, they will be zapped, i.e. free the range and unmap memory (thus mimicking the behaviour of MADV_DONTNEED in this respect). Any existing guard entries will be left untouched. There is therefore no nesting of guarded pages. Guarded ranges are NOT cleared by MADV_DONTNEED nor MADV_FREE (in both instances the memory range may be reused at which point a user would expect guards to still be in place), but they are cleared via MADV_GUARD_REMOVE, process teardown or unmapping of memory ranges. The guard property can be removed from ranges via MADV_GUARD_REMOVE. The ranges over which this is applied, should they contain non-guard entries, will be untouched, with only guard entries being cleared. We permit this operation on anonymous memory only, and only VMAs which are non-special, non-huge and not mlock()'d (if we permitted this we'd have to drop locked pages which would be rather counterintuitive). Racing page faults can cause repeated attempts to install guard pages that are interrupted, result in a zap, and this process can end up being repeated. If this happens more than would be expected in normal operation, we rescind locks and retry the whole thing, which avoids lock contention in this scenario. Link: https://lkml.kernel.org/r/6aafb5821bf209f277dfae0787abb2ef87a37542.1730123433.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Suggested-by: Vlastimil Babka Suggested-by: Jann Horn Suggested-by: David Hildenbrand Suggested-by: Vlastimil Babka Suggested-by: Jann Horn Suggested-by: David Hildenbrand Acked-by: Vlastimil Babka Cc: Arnd Bergmann Cc: Christian Brauner Cc: Christoph Hellwig Cc: Chris Zankel Cc: Helge Deller Cc: James E.J. Bottomley Cc: Jeff Xu Cc: John Hubbard Cc: Liam R. Howlett Cc: Matthew Wilcox (Oracle) Cc: Matt Turner Cc: Max Filippov Cc: Muchun Song Cc: Paul E. McKenney Cc: Richard Henderson Cc: Shuah Khan Cc: Shuah Khan Cc: Sidhartha Kumar Cc: Suren Baghdasaryan Cc: Thomas Bogendoerfer Signed-off-by: Andrew Morton --- include/uapi/asm-generic/mman-common.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h index 6ce1f1ceb432..1ea2c4c33b86 100644 --- a/include/uapi/asm-generic/mman-common.h +++ b/include/uapi/asm-generic/mman-common.h @@ -79,6 +79,9 @@ #define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */ +#define MADV_GUARD_INSTALL 102 /* fatal signal on access to range */ +#define MADV_GUARD_REMOVE 103 /* unguard range */ + /* compatibility flags */ #define MAP_FILE 0 -- cgit v1.2.3 From 6c83d153ed86eb17c46eafe4e78af4ce2071a052 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 21 Oct 2024 21:28:26 +0200 Subject: btrfs: add new ioctl to wait for cleaned subvolumes Add a new unprivileged ioctl that will let the command 'btrfs subvolume sync' work without the (privileged) SEARCH_TREE ioctl. There are several modes of operation, where the most common ones are to wait on a specific subvolume or all currently queued for cleaning. This is utilized e.g. in backup applications that delete subvolumes and wait until they're cleaned to check for remaining space. The other modes are for flexibility, e.g. for monitoring or checkpoints in the queue of deleted subvolumes, again without the need to use SEARCH_TREE. Notes: - waiting is interruptible, the timeout is set to 1 second and is not configurable - repeated calls to the ioctl see a different state, so this is inherently racy when using e.g. the count or peek next/last Use cases: - a subvolume A was deleted, wait for cleaning (WAIT_FOR_ONE) - a bunch of subvolumes were deleted, wait for all (WAIT_FOR_QUEUED or PEEK_LAST + WAIT_FOR_ONE) - count how many are queued (not blocking), for monitoring purposes - report progress (PEEK_NEXT), may miss some if cleaning is quick - own waiting in user space (PEEK_LAST until it's 0) Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index cdf6ad872149..d3b222d7af24 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -1049,6 +1049,29 @@ struct btrfs_ioctl_encoded_io_args { #define BTRFS_ENCODED_IO_ENCRYPTION_NONE 0 #define BTRFS_ENCODED_IO_ENCRYPTION_TYPES 1 +/* + * Wait for subvolume cleaning process. This queries the kernel queue and it + * can change between the calls. + * + * - FOR_ONE - specify the subvolid + * - FOR_QUEUED - wait for all currently queued + * - COUNT - count number of queued + * - PEEK_FIRST - read which is the first in the queue (to be cleaned or being + * cleaned already), or 0 if the queue is empty + * - PEEK_LAST - read the last subvolid in the queue, or 0 if the queue is empty + */ +struct btrfs_ioctl_subvol_wait { + __u64 subvolid; + __u32 mode; + __u32 count; +}; + +#define BTRFS_SUBVOL_SYNC_WAIT_FOR_ONE (0) +#define BTRFS_SUBVOL_SYNC_WAIT_FOR_QUEUED (1) +#define BTRFS_SUBVOL_SYNC_COUNT (2) +#define BTRFS_SUBVOL_SYNC_PEEK_FIRST (3) +#define BTRFS_SUBVOL_SYNC_PEEK_LAST (4) + /* Error codes as returned by the kernel */ enum btrfs_err_code { BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET = 1, @@ -1181,6 +1204,8 @@ enum btrfs_err_code { struct btrfs_ioctl_encoded_io_args) #define BTRFS_IOC_ENCODED_WRITE _IOW(BTRFS_IOCTL_MAGIC, 64, \ struct btrfs_ioctl_encoded_io_args) +#define BTRFS_IOC_SUBVOL_SYNC_WAIT _IOW(BTRFS_IOCTL_MAGIC, 65, \ + struct btrfs_ioctl_subvol_wait) #ifdef __cplusplus } -- cgit v1.2.3 From d920179b3d4842a0e27cae54fdddbe5ef3977e73 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 8 Nov 2024 14:45:34 +0100 Subject: bpf: Add support for uprobe multi session attach Adding support to attach BPF program for entry and return probe of the same function. This is common use case which at the moment requires to create two uprobe multi links. Adding new BPF_TRACE_UPROBE_SESSION attach type that instructs kernel to attach single link program to both entry and exit probe. It's possible to control execution of the BPF program on return probe simply by returning zero or non zero from the entry BPF program execution to execute or not the BPF program on return probe respectively. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20241108134544.480660-4-jolsa@kernel.org --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f28b6527e815..4162afc6b5d0 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1116,6 +1116,7 @@ enum bpf_attach_type { BPF_NETKIT_PRIMARY, BPF_NETKIT_PEER, BPF_TRACE_KPROBE_SESSION, + BPF_TRACE_UPROBE_SESSION, __MAX_BPF_ATTACH_TYPE }; -- cgit v1.2.3 From 377dda2cff59825079aee3906aa4904779747b0b Mon Sep 17 00:00:00 2001 From: Qiang Yu Date: Fri, 25 Oct 2024 11:23:29 +0800 Subject: drm/fourcc: add AMD_FMT_MOD_TILE_GFX9_4K_D_X MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is used when radeonsi export small texture's modifier to user with eglExportDMABUFImageQueryMESA(). mesa changes is available here: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31658 Reviewed-by: Marek Olšák Signed-off-by: Qiang Yu Signed-off-by: Alex Deucher --- include/uapi/drm/drm_fourcc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 78abd819fd62..70f3b00b0681 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -1516,6 +1516,7 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) * 64K_D_2D on GFX12 is identical to 64K_D on GFX11. */ #define AMD_FMT_MOD_TILE_GFX9_64K_D 10 +#define AMD_FMT_MOD_TILE_GFX9_4K_D_X 22 #define AMD_FMT_MOD_TILE_GFX9_64K_S_X 25 #define AMD_FMT_MOD_TILE_GFX9_64K_D_X 26 #define AMD_FMT_MOD_TILE_GFX9_64K_R_X 27 -- cgit v1.2.3 From d2bd39c0456b75be9dfc7d774b8d021355c26ae3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 18 Oct 2024 17:47:49 +0300 Subject: PCI: Store all PCIe Supported Link Speeds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PCIe bandwidth controller added by a subsequent commit will require selecting PCIe Link Speeds that are lower than the Maximum Link Speed. The struct pci_bus only stores max_bus_speed. Even if PCIe r6.1 sec 8.2.1 currently disallows gaps in supported Link Speeds, the Implementation Note in PCIe r6.1 sec 7.5.3.18, recommends determining supported Link Speeds using the Supported Link Speeds Vector in the Link Capabilities 2 Register (when available) to "avoid software being confused if a future specification defines Links that do not require support for all slower speeds." Reuse code in pcie_get_speed_cap() to add pcie_get_supported_speeds() to query the Supported Link Speeds Vector of a PCIe device. The value is taken directly from the Supported Link Speeds Vector or synthesized from the Max Link Speed in the Link Capabilities Register when the Link Capabilities 2 Register is not available. The Supported Link Speeds Vector in the Link Capabilities Register 2 corresponds to the bus below on Root Ports and Downstream Ports, whereas it corresponds to the bus above on Upstream Ports and Endpoints (PCIe r6.1 sec 7.5.3.18): Supported Link Speeds Vector - This field indicates the supported Link speed(s) of the associated Port. Add supported_speeds into the struct pci_dev that caches the Supported Link Speeds Vector. supported_speeds contains a set of Link Speeds only in the case where PCIe Link Speed can be determined. Root Complex Integrated Endpoints do not have a well-defined Link Speed because they do not implement either of the Link Capabilities Registers, which is allowed by PCIe r6.1 sec 7.5.3 (the same limitation applies to determining cur_bus_speed and max_bus_speed that are PCI_SPEED_UNKNOWN in such case). This is of no concern from PCIe bandwidth controller point of view because such devices are not attached into a PCIe Root Port that could be controlled. The supported_speeds field keeps the extra reserved zero at the least significant bit to match the Link Capabilities 2 Register layout. An attempt was made to store supported_speeds field into the struct pci_bus as an intersection of both ends of the Link, however, the subordinate struct pci_bus is not available early enough. The Target Speed quirk (in pcie_failed_link_retrain()) can run either during initial scan or later, requiring it to use the API provided by the PCIe bandwidth controller to set the Target Link Speed in order to co-exist with the bandwidth controller. When the Target Speed quirk is calling the bandwidth controller during initial scan, the struct pci_bus is not yet initialized. As such, storing supported_speeds into the struct pci_bus is not viable. Suggested-by: Lukas Wunner Link: https://lore.kernel.org/r/20241018144755.7875-4-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen [bhelgaas: move pcie_get_supported_speeds() decl to drivers/pci/pci.h] Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron --- include/uapi/linux/pci_regs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 12323b3334a9..f3c9de0a497c 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -678,6 +678,7 @@ #define PCI_EXP_DEVSTA2 0x2a /* Device Status 2 */ #define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V2 0x2c /* end of v2 EPs w/o link */ #define PCI_EXP_LNKCAP2 0x2c /* Link Capabilities 2 */ +#define PCI_EXP_LNKCAP2_SLS 0x000000fe /* Supported Link Speeds Vector */ #define PCI_EXP_LNKCAP2_SLS_2_5GB 0x00000002 /* Supported Speed 2.5GT/s */ #define PCI_EXP_LNKCAP2_SLS_5_0GB 0x00000004 /* Supported Speed 5GT/s */ #define PCI_EXP_LNKCAP2_SLS_8_0GB 0x00000008 /* Supported Speed 8GT/s */ -- cgit v1.2.3 From 5dc51ec86df6e2214d8398079c1e31736593ab53 Mon Sep 17 00:00:00 2001 From: Martin Karsten Date: Sat, 9 Nov 2024 05:02:31 +0000 Subject: net: Add napi_struct parameter irq_suspend_timeout Add a per-NAPI IRQ suspension parameter, which can be get/set with netdev-genl. This patch doesn't change any behavior but prepares the code for other changes in the following commits which use irq_suspend_timeout as a timeout for IRQ suspension. Signed-off-by: Martin Karsten Co-developed-by: Joe Damato Signed-off-by: Joe Damato Tested-by: Joe Damato Tested-by: Martin Karsten Acked-by: Stanislav Fomichev Reviewed-by: Sridhar Samudrala Link: https://patch.msgid.link/20241109050245.191288-2-jdamato@fastly.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/netdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index e3ebb49f60d2..e4be227d3ad6 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -124,6 +124,7 @@ enum { NETDEV_A_NAPI_PID, NETDEV_A_NAPI_DEFER_HARD_IRQS, NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT, + NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT, __NETDEV_A_NAPI_MAX, NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) -- cgit v1.2.3 From ed9d95f691c29748f21bc019de9566b698fdfab7 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 11 Nov 2024 10:09:56 -0500 Subject: fs: add the ability for statmount() to report the fs_subtype /proc/self/mountinfo prints out the sb->s_subtype after the type. This is particularly useful for disambiguating FUSE mounts (at least when the userland driver bothers to set it). Add STATMOUNT_FS_SUBTYPE and claim one of the __spare2 fields to point to the offset into the str[] array. Reviewed-by: Jan Kara Reviewed-by: Ian Kent Signed-off-by: Jeff Layton Link: https://lore.kernel.org/r/20241111-statmount-v4-2-2eaf35d07a80@kernel.org Acked-by: Miklos Szeredi Signed-off-by: Christian Brauner --- include/uapi/linux/mount.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index 225bc366ffcb..2e939dddf9cb 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -173,7 +173,9 @@ struct statmount { __u32 mnt_root; /* [str] Root of mount relative to root of fs */ __u32 mnt_point; /* [str] Mountpoint relative to current root */ __u64 mnt_ns_id; /* ID of the mount namespace */ - __u64 __spare2[49]; + __u32 fs_subtype; /* [str] Subtype of fs_type (if any) */ + __u32 __spare1[1]; + __u64 __spare2[48]; char str[]; /* Variable size part containing strings */ }; @@ -207,6 +209,7 @@ struct mnt_id_req { #define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ #define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */ #define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */ +#define STATMOUNT_FS_SUBTYPE 0x00000100U /* Want/got fs_subtype */ /* * Special @mnt_id values that can be passed to listmount -- cgit v1.2.3 From 4db97c21ed07a7d4081ed9820599fa36857083d6 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 5 Nov 2024 12:04:21 -0800 Subject: iommufd/viommu: Add IOMMU_VIOMMU_ALLOC ioctl Add a new ioctl for user space to do a vIOMMU allocation. It must be based on a nesting parent HWPT, so take its refcount. IOMMU driver wanting to support vIOMMUs must define its IOMMU_VIOMMU_TYPE_ in the uAPI header and implement a viommu_alloc op in its iommu_ops. Link: https://patch.msgid.link/r/dc2b8ba9ac935007beff07c1761c31cd097ed780.1730836219.git.nicolinc@nvidia.com Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 41b1a01e9293..302844136b02 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -52,6 +52,7 @@ enum { IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d, IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e, IOMMUFD_CMD_IOAS_MAP_FILE = 0x8f, + IOMMUFD_CMD_VIOMMU_ALLOC = 0x90, }; /** @@ -822,4 +823,43 @@ struct iommu_fault_alloc { __u32 out_fault_fd; }; #define IOMMU_FAULT_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_FAULT_QUEUE_ALLOC) + +/** + * enum iommu_viommu_type - Virtual IOMMU Type + * @IOMMU_VIOMMU_TYPE_DEFAULT: Reserved for future use + */ +enum iommu_viommu_type { + IOMMU_VIOMMU_TYPE_DEFAULT = 0, +}; + +/** + * struct iommu_viommu_alloc - ioctl(IOMMU_VIOMMU_ALLOC) + * @size: sizeof(struct iommu_viommu_alloc) + * @flags: Must be 0 + * @type: Type of the virtual IOMMU. Must be defined in enum iommu_viommu_type + * @dev_id: The device's physical IOMMU will be used to back the virtual IOMMU + * @hwpt_id: ID of a nesting parent HWPT to associate to + * @out_viommu_id: Output virtual IOMMU ID for the allocated object + * + * Allocate a virtual IOMMU object, representing the underlying physical IOMMU's + * virtualization support that is a security-isolated slice of the real IOMMU HW + * that is unique to a specific VM. Operations global to the IOMMU are connected + * to the vIOMMU, such as: + * - Security namespace for guest owned ID, e.g. guest-controlled cache tags + * - Non-device-affiliated event reporting, e.g. invalidation queue errors + * - Access to a sharable nesting parent pagetable across physical IOMMUs + * - Virtualization of various platforms IDs, e.g. RIDs and others + * - Delivery of paravirtualized invalidation + * - Direct assigned invalidation queues + * - Direct assigned interrupts + */ +struct iommu_viommu_alloc { + __u32 size; + __u32 flags; + __u32 type; + __u32 dev_id; + __u32 hwpt_id; + __u32 out_viommu_id; +}; +#define IOMMU_VIOMMU_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_ALLOC) #endif -- cgit v1.2.3 From 13a750180fc86d41695c8f64d8892412482a401d Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 5 Nov 2024 12:04:23 -0800 Subject: iommufd: Allow pt_id to carry viommu_id for IOMMU_HWPT_ALLOC Now a vIOMMU holds a shareable nesting parent HWPT. So, it can act like that nesting parent HWPT to allocate a nested HWPT. Support that in the IOMMU_HWPT_ALLOC ioctl handler, and update its kdoc. Also, add an iommufd_viommu_alloc_hwpt_nested helper to allocate a nested HWPT for a vIOMMU object. Since a vIOMMU object holds the parent hwpt's refcount already, increase the refcount of the vIOMMU only. Link: https://patch.msgid.link/r/a0f24f32bfada8b448d17587adcaedeeb50a67ed.1730836219.git.nicolinc@nvidia.com Reviewed-by: Kevin Tian Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 302844136b02..a498d4838f9a 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -435,7 +435,7 @@ enum iommu_hwpt_data_type { * @size: sizeof(struct iommu_hwpt_alloc) * @flags: Combination of enum iommufd_hwpt_alloc_flags * @dev_id: The device to allocate this HWPT for - * @pt_id: The IOAS or HWPT to connect this HWPT to + * @pt_id: The IOAS or HWPT or vIOMMU to connect this HWPT to * @out_hwpt_id: The ID of the new HWPT * @__reserved: Must be 0 * @data_type: One of enum iommu_hwpt_data_type @@ -454,11 +454,13 @@ enum iommu_hwpt_data_type { * IOMMU_HWPT_DATA_NONE. The HWPT can be allocated as a parent HWPT for a * nesting configuration by passing IOMMU_HWPT_ALLOC_NEST_PARENT via @flags. * - * A user-managed nested HWPT will be created from a given parent HWPT via - * @pt_id, in which the parent HWPT must be allocated previously via the - * same ioctl from a given IOAS (@pt_id). In this case, the @data_type - * must be set to a pre-defined type corresponding to an I/O page table - * type supported by the underlying IOMMU hardware. + * A user-managed nested HWPT will be created from a given vIOMMU (wrapping a + * parent HWPT) or a parent HWPT via @pt_id, in which the parent HWPT must be + * allocated previously via the same ioctl from a given IOAS (@pt_id). In this + * case, the @data_type must be set to a pre-defined type corresponding to an + * I/O page table type supported by the underlying IOMMU hardware. The device + * via @dev_id and the vIOMMU via @pt_id must be associated to the same IOMMU + * instance. * * If the @data_type is set to IOMMU_HWPT_DATA_NONE, @data_len and * @data_uptr should be zero. Otherwise, both @data_len and @data_uptr -- cgit v1.2.3 From 0ce5c2477af2e2284b9c70474e4dae85db211680 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 5 Nov 2024 12:05:09 -0800 Subject: iommufd/viommu: Add IOMMUFD_OBJ_VDEVICE and IOMMU_VDEVICE_ALLOC ioctl Introduce a new IOMMUFD_OBJ_VDEVICE to represent a physical device (struct device) against a vIOMMU (struct iommufd_viommu) object in a VM. This vDEVICE object (and its structure) holds all the infos and attributes in the VM, regarding the device related to the vIOMMU. As an initial patch, add a per-vIOMMU virtual ID. This can be: - Virtual StreamID on a nested ARM SMMUv3, an index to a Stream Table - Virtual DeviceID on a nested AMD IOMMU, an index to a Device Table - Virtual RID on a nested Intel VT-D IOMMU, an index to a Context Table Potentially, this vDEVICE structure would hold some vData for Confidential Compute Architecture (CCA). Use this virtual ID to index an "vdevs" xarray that belongs to a vIOMMU object. Add a new ioctl for vDEVICE allocations. Since a vDEVICE is a connection of a device object and an iommufd_viommu object, take two refcounts in the ioctl handler. Link: https://patch.msgid.link/r/cda8fd2263166e61b8191a3b3207e0d2b08545bf.1730836308.git.nicolinc@nvidia.com Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index a498d4838f9a..9b5236004b8e 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -53,6 +53,7 @@ enum { IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e, IOMMUFD_CMD_IOAS_MAP_FILE = 0x8f, IOMMUFD_CMD_VIOMMU_ALLOC = 0x90, + IOMMUFD_CMD_VDEVICE_ALLOC = 0x91, }; /** @@ -864,4 +865,25 @@ struct iommu_viommu_alloc { __u32 out_viommu_id; }; #define IOMMU_VIOMMU_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_ALLOC) + +/** + * struct iommu_vdevice_alloc - ioctl(IOMMU_VDEVICE_ALLOC) + * @size: sizeof(struct iommu_vdevice_alloc) + * @viommu_id: vIOMMU ID to associate with the virtual device + * @dev_id: The physical device to allocate a virtual instance on the vIOMMU + * @out_vdevice_id: Object handle for the vDevice. Pass to IOMMU_DESTORY + * @virt_id: Virtual device ID per vIOMMU, e.g. vSID of ARM SMMUv3, vDeviceID + * of AMD IOMMU, and vRID of a nested Intel VT-d to a Context Table + * + * Allocate a virtual device instance (for a physical device) against a vIOMMU. + * This instance holds the device's information (related to its vIOMMU) in a VM. + */ +struct iommu_vdevice_alloc { + __u32 size; + __u32 viommu_id; + __u32 dev_id; + __u32 out_vdevice_id; + __aligned_u64 virt_id; +}; +#define IOMMU_VDEVICE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VDEVICE_ALLOC) #endif -- cgit v1.2.3 From 54ce69e36c71c88f258b1a322c54343d90954858 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 5 Nov 2024 12:05:12 -0800 Subject: iommufd: Allow hwpt_id to carry viommu_id for IOMMU_HWPT_INVALIDATE With a vIOMMU object, use space can flush any IOMMU related cache that can be directed via a vIOMMU object. It is similar to the IOMMU_HWPT_INVALIDATE uAPI, but can cover a wider range than IOTLB, e.g. device/desciprtor cache. Allow hwpt_id of the iommu_hwpt_invalidate structure to carry a viommu_id, and reuse the IOMMU_HWPT_INVALIDATE uAPI for vIOMMU invalidations. Drivers can define different structures for vIOMMU invalidations v.s. HWPT ones. Since both the HWPT-based and vIOMMU-based invalidation pathways check own cache invalidation op, remove the WARN_ON_ONCE in the allocator. Update the uAPI, kdoc, and selftest case accordingly. Link: https://patch.msgid.link/r/b411e2245e303b8a964f39f49453a5dff280968f.1730836308.git.nicolinc@nvidia.com Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 9b5236004b8e..badb41c5bfa4 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -700,7 +700,7 @@ struct iommu_hwpt_vtd_s1_invalidate { /** * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE) * @size: sizeof(struct iommu_hwpt_invalidate) - * @hwpt_id: ID of a nested HWPT for cache invalidation + * @hwpt_id: ID of a nested HWPT or a vIOMMU, for cache invalidation * @data_uptr: User pointer to an array of driver-specific cache invalidation * data. * @data_type: One of enum iommu_hwpt_invalidate_data_type, defining the data @@ -711,8 +711,11 @@ struct iommu_hwpt_vtd_s1_invalidate { * Output the number of requests successfully handled by kernel. * @__reserved: Must be 0. * - * Invalidate the iommu cache for user-managed page table. Modifications on a - * user-managed page table should be followed by this operation to sync cache. + * Invalidate iommu cache for user-managed page table or vIOMMU. Modifications + * on a user-managed page table should be followed by this operation, if a HWPT + * is passed in via @hwpt_id. Other caches, such as device cache or descriptor + * cache can be flushed if a vIOMMU is passed in via the @hwpt_id field. + * * Each ioctl can support one or more cache invalidation requests in the array * that has a total size of @entry_len * @entry_num. * -- cgit v1.2.3 From 69d9b312f38aa19f8c801e90bd23d70685be49f0 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 30 Oct 2024 21:20:52 -0300 Subject: iommu/arm-smmu-v3: Support IOMMU_VIOMMU_ALLOC Add a new driver-type for ARM SMMUv3 to enum iommu_viommu_type. Implement an arm_vsmmu_alloc(). As an initial step, copy the VMID from s2_parent. A followup series is required to give the VIOMMU object it's own VMID that will be used in all nesting configurations. Link: https://patch.msgid.link/r/8-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com Signed-off-by: Nicolin Chen Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index f4f76759b738..7cb13a29969d 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -425,10 +425,12 @@ struct iommu_hwpt_vtd_s1 { * enum iommu_hwpt_data_type - IOMMU HWPT Data Type * @IOMMU_HWPT_DATA_NONE: no data * @IOMMU_HWPT_DATA_VTD_S1: Intel VT-d stage-1 page table + * @IOMMU_HWPT_DATA_ARM_SMMUV3: ARM SMMUv3 Context Descriptor Table */ enum iommu_hwpt_data_type { IOMMU_HWPT_DATA_NONE = 0, IOMMU_HWPT_DATA_VTD_S1 = 1, + IOMMU_HWPT_DATA_ARM_SMMUV3 = 2, }; /** @@ -868,9 +870,11 @@ struct iommu_fault_alloc { /** * enum iommu_viommu_type - Virtual IOMMU Type * @IOMMU_VIOMMU_TYPE_DEFAULT: Reserved for future use + * @IOMMU_VIOMMU_TYPE_ARM_SMMUV3: ARM SMMUv3 driver specific type */ enum iommu_viommu_type { IOMMU_VIOMMU_TYPE_DEFAULT = 0, + IOMMU_VIOMMU_TYPE_ARM_SMMUV3 = 1, }; /** -- cgit v1.2.3 From 1e8be08d1c91d52a9b51d424db78ddbf88660bbb Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 30 Oct 2024 21:20:53 -0300 Subject: iommu/arm-smmu-v3: Support IOMMU_DOMAIN_NESTED For SMMUv3 a IOMMU_DOMAIN_NESTED is composed of a S2 iommu_domain acting as the parent and a user provided STE fragment that defines the CD table and related data with addresses translated by the S2 iommu_domain. The kernel only permits userspace to control certain allowed bits of the STE that are safe for user/guest control. IOTLB maintenance is a bit subtle here, the S1 implicitly includes the S2 translation, but there is no way of knowing which S1 entries refer to a range of S2. For the IOTLB we follow ARM's guidance and issue a CMDQ_OP_TLBI_NH_ALL to flush all ASIDs from the VMID after flushing the S2 on any change to the S2. The IOMMU_DOMAIN_NESTED can only be created from inside a VIOMMU as the invalidation path relies on the VIOMMU to translate virtual stream ID used in the invalidation commands for the CD table and ATS. Link: https://patch.msgid.link/r/9-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com Reviewed-by: Nicolin Chen Reviewed-by: Kevin Tian Reviewed-by: Jerry Snitselaar Reviewed-by: Donald Dutile Signed-off-by: Nicolin Chen Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 7cb13a29969d..b6baaa1e55b1 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -421,6 +421,26 @@ struct iommu_hwpt_vtd_s1 { __u32 __reserved; }; +/** + * struct iommu_hwpt_arm_smmuv3 - ARM SMMUv3 nested STE + * (IOMMU_HWPT_DATA_ARM_SMMUV3) + * + * @ste: The first two double words of the user space Stream Table Entry for + * the translation. Must be little-endian. + * Allowed fields: (Refer to "5.2 Stream Table Entry" in SMMUv3 HW Spec) + * - word-0: V, Cfg, S1Fmt, S1ContextPtr, S1CDMax + * - word-1: S1DSS, S1CIR, S1COR, S1CSH, S1STALLD + * + * -EIO will be returned if @ste is not legal or contains any non-allowed field. + * Cfg can be used to select a S1, Bypass or Abort configuration. A Bypass + * nested domain will translate the same as the nesting parent. The S1 will + * install a Context Descriptor Table pointing at userspace memory translated + * by the nesting parent. + */ +struct iommu_hwpt_arm_smmuv3 { + __aligned_le64 ste[2]; +}; + /** * enum iommu_hwpt_data_type - IOMMU HWPT Data Type * @IOMMU_HWPT_DATA_NONE: no data -- cgit v1.2.3 From f27298a82ba09a1c8aecee8a209b2a312beac672 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 30 Oct 2024 21:20:55 -0300 Subject: iommu/arm-smmu-v3: Allow ATS for IOMMU_DOMAIN_NESTED The EATS flag needs to flow through the vSTE and into the pSTE, and ensure physical ATS is enabled on the PCI device. The physical ATS state must match the VM's idea of EATS as we rely on the VM to issue the ATS invalidation commands. Thus ATS must remain off at the device until EATS on a nesting domain turns it on. Attaching a nesting domain is the point where the invalidation responsibility transfers to userspace. Update the ATS logic to track EATS for nesting domains and flush the ATC whenever the S2 nesting parent changes. Link: https://patch.msgid.link/r/11-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com Signed-off-by: Nicolin Chen Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index b6baaa1e55b1..a66eb0384cd6 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -429,7 +429,7 @@ struct iommu_hwpt_vtd_s1 { * the translation. Must be little-endian. * Allowed fields: (Refer to "5.2 Stream Table Entry" in SMMUv3 HW Spec) * - word-0: V, Cfg, S1Fmt, S1ContextPtr, S1CDMax - * - word-1: S1DSS, S1CIR, S1COR, S1CSH, S1STALLD + * - word-1: EATS, S1DSS, S1CIR, S1COR, S1CSH, S1STALLD * * -EIO will be returned if @ste is not legal or contains any non-allowed field. * Cfg can be used to select a S1, Bypass or Abort configuration. A Bypass -- cgit v1.2.3 From d68beb276ba26cec47350a6d468e967673ee0c56 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 30 Oct 2024 21:20:56 -0300 Subject: iommu/arm-smmu-v3: Support IOMMU_HWPT_INVALIDATE using a VIOMMU object Implement the vIOMMU's cache_invalidate op for user space to invalidate the IOTLB entries, Device ATS and CD entries that are cached by hardware. Add struct iommu_viommu_arm_smmuv3_invalidate defining invalidation entries that are simply in the native format of a 128-bit TLBI command. Scan those commands against the permitted command list and fix their VMID/SID fields to match what is stored in the vIOMMU. Link: https://patch.msgid.link/r/12-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com Co-developed-by: Eric Auger Signed-off-by: Eric Auger Co-developed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index a66eb0384cd6..747d3d9baa3d 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -713,9 +713,11 @@ struct iommu_hwpt_get_dirty_bitmap { * enum iommu_hwpt_invalidate_data_type - IOMMU HWPT Cache Invalidation * Data Type * @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1 + * @IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3: Invalidation data for ARM SMMUv3 */ enum iommu_hwpt_invalidate_data_type { IOMMU_HWPT_INVALIDATE_DATA_VTD_S1 = 0, + IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3 = 1, }; /** @@ -754,6 +756,28 @@ struct iommu_hwpt_vtd_s1_invalidate { __u32 __reserved; }; +/** + * struct iommu_viommu_arm_smmuv3_invalidate - ARM SMMUv3 cahce invalidation + * (IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3) + * @cmd: 128-bit cache invalidation command that runs in SMMU CMDQ. + * Must be little-endian. + * + * Supported command list only when passing in a vIOMMU via @hwpt_id: + * CMDQ_OP_TLBI_NSNH_ALL + * CMDQ_OP_TLBI_NH_VA + * CMDQ_OP_TLBI_NH_VAA + * CMDQ_OP_TLBI_NH_ALL + * CMDQ_OP_TLBI_NH_ASID + * CMDQ_OP_ATC_INV + * CMDQ_OP_CFGI_CD + * CMDQ_OP_CFGI_CD_ALL + * + * -EIO will be returned if the command is not supported. + */ +struct iommu_viommu_arm_smmuv3_invalidate { + __aligned_le64 cmd[2]; +}; + /** * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE) * @size: sizeof(struct iommu_hwpt_invalidate) -- cgit v1.2.3 From c532de5a67a70f8533d495f8f2aaa9a0491c3ad0 Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Wed, 13 Nov 2024 16:18:27 +0800 Subject: LoongArch: KVM: Add IPI device support Add device model for IPI interrupt controller, implement basic create & destroy interfaces, and register device model to kvm device table. Signed-off-by: Tianrui Zhao Signed-off-by: Xianglai Li Signed-off-by: Huacai Chen --- include/uapi/linux/kvm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 637efc055145..9fff439c30ea 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1158,7 +1158,11 @@ enum kvm_device_type { #define KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_RISCV_AIA, #define KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_RISCV_AIA + KVM_DEV_TYPE_LOONGARCH_IPI, +#define KVM_DEV_TYPE_LOONGARCH_IPI KVM_DEV_TYPE_LOONGARCH_IPI + KVM_DEV_TYPE_MAX, + }; struct kvm_vfio_spapr_tce { -- cgit v1.2.3 From 2e8b9df82631e714cc2b7bf302772c8259673180 Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Wed, 13 Nov 2024 16:18:27 +0800 Subject: LoongArch: KVM: Add EIOINTC device support Add device model for EIOINTC interrupt controller, implement basic create & destroy interfaces, and register device model to kvm device table. Signed-off-by: Tianrui Zhao Signed-off-by: Xianglai Li Signed-off-by: Huacai Chen --- include/uapi/linux/kvm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 9fff439c30ea..0ec5c631d9e9 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1160,6 +1160,8 @@ enum kvm_device_type { #define KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_LOONGARCH_IPI, #define KVM_DEV_TYPE_LOONGARCH_IPI KVM_DEV_TYPE_LOONGARCH_IPI + KVM_DEV_TYPE_LOONGARCH_EIOINTC, +#define KVM_DEV_TYPE_LOONGARCH_EIOINTC KVM_DEV_TYPE_LOONGARCH_EIOINTC KVM_DEV_TYPE_MAX, -- cgit v1.2.3 From e785dfacf7e7fe94370fa0e8e3ff1bc8fe179831 Mon Sep 17 00:00:00 2001 From: Xianglai Li Date: Wed, 13 Nov 2024 16:18:27 +0800 Subject: LoongArch: KVM: Add PCHPIC device support Add device model for PCHPIC interrupt controller, implemente basic create & destroy interface, and register device model to kvm device table. Signed-off-by: Tianrui Zhao Signed-off-by: Xianglai Li Signed-off-by: Huacai Chen --- include/uapi/linux/kvm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 0ec5c631d9e9..502ea63b5d2e 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1162,6 +1162,8 @@ enum kvm_device_type { #define KVM_DEV_TYPE_LOONGARCH_IPI KVM_DEV_TYPE_LOONGARCH_IPI KVM_DEV_TYPE_LOONGARCH_EIOINTC, #define KVM_DEV_TYPE_LOONGARCH_EIOINTC KVM_DEV_TYPE_LOONGARCH_EIOINTC + KVM_DEV_TYPE_LOONGARCH_PCHPIC, +#define KVM_DEV_TYPE_LOONGARCH_PCHPIC KVM_DEV_TYPE_LOONGARCH_PCHPIC KVM_DEV_TYPE_MAX, -- cgit v1.2.3 From 44010543fc8bedad172aa5b6c43480e5d2124497 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 11 Nov 2024 10:09:57 -0500 Subject: fs: add the ability for statmount() to report the sb_source /proc/self/mountinfo displays the source for the mount, but statmount() doesn't yet have a way to return it. Add a new STATMOUNT_SB_SOURCE flag, claim the 32-bit __spare1 field to hold the offset into the str[] array. Reviewed-by: Jan Kara Signed-off-by: Jeff Layton Link: https://lore.kernel.org/r/20241111-statmount-v4-3-2eaf35d07a80@kernel.org Acked-by: Miklos Szeredi Signed-off-by: Christian Brauner --- include/uapi/linux/mount.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index 2e939dddf9cb..2b49e9131d77 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -174,7 +174,7 @@ struct statmount { __u32 mnt_point; /* [str] Mountpoint relative to current root */ __u64 mnt_ns_id; /* ID of the mount namespace */ __u32 fs_subtype; /* [str] Subtype of fs_type (if any) */ - __u32 __spare1[1]; + __u32 sb_source; /* [str] Source string of the mount */ __u64 __spare2[48]; char str[]; /* Variable size part containing strings */ }; @@ -210,6 +210,7 @@ struct mnt_id_req { #define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */ #define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */ #define STATMOUNT_FS_SUBTYPE 0x00000100U /* Want/got fs_subtype */ +#define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */ /* * Special @mnt_id values that can be passed to listmount -- cgit v1.2.3 From 2f4d4503e9e5ab765a7948f98bc5deef7850f607 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 12 Nov 2024 11:10:04 +0100 Subject: statmount: add flag to retrieve unescaped options Filesystem options can be retrieved with STATMOUNT_MNT_OPTS, which returns a string of comma separated options, where some characters are escaped using the \OOO notation. Add a new flag, STATMOUNT_OPT_ARRAY, which instead returns the raw option values separated with '\0' charaters. Since escaped charaters are rare, this inteface is preferable for non-libmount users which likley don't want to deal with option de-escaping. Example code: if (st->mask & STATMOUNT_OPT_ARRAY) { const char *opt = st->str + st->opt_array; for (unsigned int i = 0; i < st->opt_num; i++) { printf("opt_array[%i]: <%s>\n", i, opt); opt += strlen(opt) + 1; } } Example ouput: (1) mnt_opts: (2) opt_array[0]: opt_array[1]: opt_array[2]: opt_array[3]: opt_array[4]: opt_array[5]: Signed-off-by: Miklos Szeredi Link: https://lore.kernel.org/r/20241112101006.30715-1-mszeredi@redhat.com Acked-by: Jeff Layton [brauner: tweak variable naming and parsing add example output] Signed-off-by: Christian Brauner --- include/uapi/linux/mount.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index 2b49e9131d77..c0fda4604187 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -154,7 +154,7 @@ struct mount_attr { */ struct statmount { __u32 size; /* Total size, including strings */ - __u32 mnt_opts; /* [str] Mount options of the mount */ + __u32 mnt_opts; /* [str] Options (comma separated, escaped) */ __u64 mask; /* What results were written */ __u32 sb_dev_major; /* Device ID */ __u32 sb_dev_minor; @@ -175,7 +175,9 @@ struct statmount { __u64 mnt_ns_id; /* ID of the mount namespace */ __u32 fs_subtype; /* [str] Subtype of fs_type (if any) */ __u32 sb_source; /* [str] Source string of the mount */ - __u64 __spare2[48]; + __u32 opt_num; /* Number of fs options */ + __u32 opt_array; /* [str] Array of nul terminated fs options */ + __u64 __spare2[47]; char str[]; /* Variable size part containing strings */ }; @@ -211,6 +213,7 @@ struct mnt_id_req { #define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */ #define STATMOUNT_FS_SUBTYPE 0x00000100U /* Want/got fs_subtype */ #define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */ +#define STATMOUNT_OPT_ARRAY 0x00000400U /* Want/got opt_... */ /* * Special @mnt_id values that can be passed to listmount -- cgit v1.2.3 From 95f567f81e43a1bcb5fbf0559e55b7505707300d Mon Sep 17 00:00:00 2001 From: Stefan Berger Date: Fri, 1 Nov 2024 15:37:03 -0400 Subject: fs: Simplify getattr interface function checking AT_GETATTR_NOSEC flag Commit 8a924db2d7b5 ("fs: Pass AT_GETATTR_NOSEC flag to getattr interface function")' introduced the AT_GETATTR_NOSEC flag to ensure that the call paths only call vfs_getattr_nosec if it is set instead of vfs_getattr. Now, simplify the getattr interface functions of filesystems where the flag AT_GETATTR_NOSEC is checked. There is only a single caller of inode_operations getattr function and it is located in fs/stat.c in vfs_getattr_nosec. The caller there is the only one from which the AT_GETATTR_NOSEC flag is passed from. Two filesystems are checking this flag in .getattr and the flag is always passed to them unconditionally from only vfs_getattr_nosec: - ecryptfs: Simplify by always calling vfs_getattr_nosec in ecryptfs_getattr. From there the flag is passed to no other function and this function is not called otherwise. - overlayfs: Simplify by always calling vfs_getattr_nosec in ovl_getattr. From there the flag is passed to no other function and this function is not called otherwise. The query_flags in vfs_getattr_nosec will mask-out AT_GETATTR_NOSEC from any caller using AT_STATX_SYNC_TYPE as mask so that the flag is not important inside this function. Also, since no filesystem is checking the flag anymore, remove the flag entirely now, including the BUG_ON check that never triggered. The net change of the changes here combined with the original commit is that ecryptfs and overlayfs do not call vfs_getattr but only vfs_getattr_nosec. Fixes: 8a924db2d7b5 ("fs: Pass AT_GETATTR_NOSEC flag to getattr interface function") Reported-by: Al Viro Closes: https://lore.kernel.org/linux-fsdevel/20241101011724.GN1350452@ZenIV/T/#u Cc: Tyler Hicks Cc: ecryptfs@vger.kernel.org Cc: Miklos Szeredi Cc: Amir Goldstein Cc: linux-unionfs@vger.kernel.org Cc: Christian Brauner Cc: linux-fsdevel@vger.kernel.org Reviewed-by: Christian Brauner Signed-off-by: Stefan Berger Signed-off-by: Al Viro --- include/uapi/linux/fcntl.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index 87e2dec79fea..a40833bf2855 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -154,8 +154,4 @@ usable with open_by_handle_at(2). */ #define AT_HANDLE_MNT_ID_UNIQUE 0x001 /* Return the u64 unique mount ID. */ -#if defined(__KERNEL__) -#define AT_GETATTR_NOSEC 0x80000000 -#endif - #endif /* _UAPI_LINUX_FCNTL_H */ -- cgit v1.2.3 From 7c1ae151e81268db1fe8c8a473d922fc5ba47b72 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Wed, 13 Nov 2024 13:51:54 +0200 Subject: virtio_pci: Introduce device parts access commands Introduce device parts access commands via the admin queue. These commands and their structure adhere to the Virtio 1.4 specification. Acked-by: Michael S. Tsirkin Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20241113115200.209269-2-yishaih@nvidia.com Signed-off-by: Alex Williamson --- include/uapi/linux/virtio_pci.h | 131 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h index a8208492e822..1beb317df1b9 100644 --- a/include/uapi/linux/virtio_pci.h +++ b/include/uapi/linux/virtio_pci.h @@ -40,6 +40,7 @@ #define _LINUX_VIRTIO_PCI_H #include +#include #ifndef VIRTIO_PCI_NO_LEGACY @@ -240,6 +241,17 @@ struct virtio_pci_cfg_cap { #define VIRTIO_ADMIN_CMD_LEGACY_DEV_CFG_READ 0x5 #define VIRTIO_ADMIN_CMD_LEGACY_NOTIFY_INFO 0x6 +/* Device parts access commands. */ +#define VIRTIO_ADMIN_CMD_CAP_ID_LIST_QUERY 0x7 +#define VIRTIO_ADMIN_CMD_DEVICE_CAP_GET 0x8 +#define VIRTIO_ADMIN_CMD_DRIVER_CAP_SET 0x9 +#define VIRTIO_ADMIN_CMD_RESOURCE_OBJ_CREATE 0xa +#define VIRTIO_ADMIN_CMD_RESOURCE_OBJ_DESTROY 0xd +#define VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_GET 0xe +#define VIRTIO_ADMIN_CMD_DEV_PARTS_GET 0xf +#define VIRTIO_ADMIN_CMD_DEV_PARTS_SET 0x10 +#define VIRTIO_ADMIN_CMD_DEV_MODE_SET 0x11 + struct virtio_admin_cmd_hdr { __le16 opcode; /* @@ -286,4 +298,123 @@ struct virtio_admin_cmd_notify_info_result { struct virtio_admin_cmd_notify_info_data entries[VIRTIO_ADMIN_CMD_MAX_NOTIFY_INFO]; }; +#define VIRTIO_DEV_PARTS_CAP 0x0000 + +struct virtio_dev_parts_cap { + __u8 get_parts_resource_objects_limit; + __u8 set_parts_resource_objects_limit; +}; + +#define MAX_CAP_ID __KERNEL_DIV_ROUND_UP(VIRTIO_DEV_PARTS_CAP + 1, 64) + +struct virtio_admin_cmd_query_cap_id_result { + __le64 supported_caps[MAX_CAP_ID]; +}; + +struct virtio_admin_cmd_cap_get_data { + __le16 id; + __u8 reserved[6]; +}; + +struct virtio_admin_cmd_cap_set_data { + __le16 id; + __u8 reserved[6]; + __u8 cap_specific_data[]; +}; + +struct virtio_admin_cmd_resource_obj_cmd_hdr { + __le16 type; + __u8 reserved[2]; + __le32 id; /* Indicates unique resource object id per resource object type */ +}; + +struct virtio_admin_cmd_resource_obj_create_data { + struct virtio_admin_cmd_resource_obj_cmd_hdr hdr; + __le64 flags; + __u8 resource_obj_specific_data[]; +}; + +#define VIRTIO_RESOURCE_OBJ_DEV_PARTS 0 + +#define VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_GET 0 +#define VIRTIO_RESOURCE_OBJ_DEV_PARTS_TYPE_SET 1 + +struct virtio_resource_obj_dev_parts { + __u8 type; + __u8 reserved[7]; +}; + +#define VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE 0 +#define VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_COUNT 1 +#define VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_LIST 2 + +struct virtio_admin_cmd_dev_parts_metadata_data { + struct virtio_admin_cmd_resource_obj_cmd_hdr hdr; + __u8 type; + __u8 reserved[7]; +}; + +#define VIRTIO_DEV_PART_F_OPTIONAL 0 + +struct virtio_dev_part_hdr { + __le16 part_type; + __u8 flags; + __u8 reserved; + union { + struct { + __le32 offset; + __le32 reserved; + } pci_common_cfg; + struct { + __le16 index; + __u8 reserved[6]; + } vq_index; + } selector; + __le32 length; +}; + +struct virtio_dev_part { + struct virtio_dev_part_hdr hdr; + __u8 value[]; +}; + +struct virtio_admin_cmd_dev_parts_metadata_result { + union { + struct { + __le32 size; + __le32 reserved; + } parts_size; + struct { + __le32 count; + __le32 reserved; + } hdr_list_count; + struct { + __le32 count; + __le32 reserved; + struct virtio_dev_part_hdr hdrs[]; + } hdr_list; + }; +}; + +#define VIRTIO_ADMIN_CMD_DEV_PARTS_GET_TYPE_SELECTED 0 +#define VIRTIO_ADMIN_CMD_DEV_PARTS_GET_TYPE_ALL 1 + +struct virtio_admin_cmd_dev_parts_get_data { + struct virtio_admin_cmd_resource_obj_cmd_hdr hdr; + __u8 type; + __u8 reserved[7]; + struct virtio_dev_part_hdr hdr_list[]; +}; + +struct virtio_admin_cmd_dev_parts_set_data { + struct virtio_admin_cmd_resource_obj_cmd_hdr hdr; + struct virtio_dev_part parts[]; +}; + +#define VIRTIO_ADMIN_CMD_DEV_MODE_F_STOPPED 0 + +struct virtio_admin_cmd_dev_mode_set_data { + __u8 flags; +}; + #endif -- cgit v1.2.3 From aefff51e1c2986e16f2780ca8e4c97b784800ab5 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 14 Nov 2024 16:31:27 +0100 Subject: statmount: retrieve security mount options Add the ability to retrieve security mount options. Keep them separate from filesystem specific mount options so it's easy to tell them apart. Also allow to retrieve them separate from other mount options as most of the time users won't be interested in security specific mount options. Link: https://lore.kernel.org/r/20241114-radtour-ofenrohr-ff34b567b40a@brauner Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- include/uapi/linux/mount.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index c0fda4604187..c07008816aca 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -177,7 +177,9 @@ struct statmount { __u32 sb_source; /* [str] Source string of the mount */ __u32 opt_num; /* Number of fs options */ __u32 opt_array; /* [str] Array of nul terminated fs options */ - __u64 __spare2[47]; + __u32 opt_sec_num; /* Number of security options */ + __u32 opt_sec_array; /* [str] Array of nul terminated security options */ + __u64 __spare2[46]; char str[]; /* Variable size part containing strings */ }; @@ -214,6 +216,7 @@ struct mnt_id_req { #define STATMOUNT_FS_SUBTYPE 0x00000100U /* Want/got fs_subtype */ #define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */ #define STATMOUNT_OPT_ARRAY 0x00000400U /* Want/got opt_... */ +#define STATMOUNT_OPT_SEC_ARRAY 0x00000800U /* Want/got opt_sec... */ /* * Special @mnt_id values that can be passed to listmount -- cgit v1.2.3 From 829ed626499c11c9d11c65e93febc1e0da7cd61b Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 13 Nov 2024 11:51:36 -0800 Subject: iommufd: Add IOMMU_IOAS_CHANGE_PROCESS Add an ioctl that updates all DMA mappings to reflect the current process, Change the mm and transfer locked memory accounting from old to current mm. This will be used for live update, allowing an old process to hand the iommufd device descriptor to a new process. The new process calls the ioctl. IOMMU_IOAS_CHANGE_PROCESS only supports DMA mappings created with IOMMU_IOAS_MAP_FILE, because the kernel metadata for such mappings does not depend on the userland VA of the pages (which is different in the new process). IOMMU_IOAS_CHANGE_PROCESS fails if other types of mappings are present. This is a revised version of code originally provided by Jason. Link: https://patch.msgid.link/r/1731527497-16091-4-git-send-email-steven.sistare@oracle.com Suggested-by: Jason Gunthorpe Signed-off-by: Steve Sistare Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 747d3d9baa3d..4ae8b1ee0444 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -54,6 +54,7 @@ enum { IOMMUFD_CMD_IOAS_MAP_FILE = 0x8f, IOMMUFD_CMD_VIOMMU_ALLOC = 0x90, IOMMUFD_CMD_VDEVICE_ALLOC = 0x91, + IOMMUFD_CMD_IOAS_CHANGE_PROCESS = 0x92, }; /** @@ -972,4 +973,26 @@ struct iommu_vdevice_alloc { __aligned_u64 virt_id; }; #define IOMMU_VDEVICE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VDEVICE_ALLOC) + +/** + * struct iommu_ioas_change_process - ioctl(VFIO_IOAS_CHANGE_PROCESS) + * @size: sizeof(struct iommu_ioas_change_process) + * @__reserved: Must be 0 + * + * This transfers pinned memory counts for every memory map in every IOAS + * in the context to the current process. This only supports maps created + * with IOMMU_IOAS_MAP_FILE, and returns EINVAL if other maps are present. + * If the ioctl returns a failure status, then nothing is changed. + * + * This API is useful for transferring operation of a device from one process + * to another, such as during userland live update. + */ +struct iommu_ioas_change_process { + __u32 size; + __u32 __reserved; +}; + +#define IOMMU_IOAS_CHANGE_PROCESS \ + _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_CHANGE_PROCESS) + #endif -- cgit v1.2.3 From a12143e6084c502fc3cfaa8b717bffc8c14cf806 Mon Sep 17 00:00:00 2001 From: Jeremy Sowden Date: Thu, 14 Nov 2024 22:07:51 +0100 Subject: netfilter: bitwise: rename some boolean operation functions In the next patch we add support for doing AND, OR and XOR operations directly in the kernel, so rename some functions and an enum constant related to mask-and-xor boolean operations. Signed-off-by: Jeremy Sowden Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 9e9079321380..487542234ccd 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -564,16 +564,20 @@ enum nft_immediate_attributes { /** * enum nft_bitwise_ops - nf_tables bitwise operations * - * @NFT_BITWISE_BOOL: mask-and-xor operation used to implement NOT, AND, OR and - * XOR boolean operations + * @NFT_BITWISE_MASK_XOR: mask-and-xor operation used to implement NOT, AND, OR + * and XOR boolean operations * @NFT_BITWISE_LSHIFT: left-shift operation * @NFT_BITWISE_RSHIFT: right-shift operation */ enum nft_bitwise_ops { - NFT_BITWISE_BOOL, + NFT_BITWISE_MASK_XOR, NFT_BITWISE_LSHIFT, NFT_BITWISE_RSHIFT, }; +/* + * Old name for NFT_BITWISE_MASK_XOR. Retained for backwards-compatibility. + */ +#define NFT_BITWISE_BOOL NFT_BITWISE_MASK_XOR /** * enum nft_bitwise_attributes - nf_tables bitwise expression netlink attributes -- cgit v1.2.3 From c374196b2b9f4b803fccd59ed82f0712041e21e1 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 11 Oct 2024 11:00:22 +0200 Subject: fs: name_to_handle_at() support for "explicit connectable" file handles nfsd encodes "connectable" file handles for the subtree_check feature, which can be resolved to an open file with a connected path. So far, userspace nfs server could not make use of this functionality. Introduce a new flag AT_HANDLE_CONNECTABLE to name_to_handle_at(2). When used, the encoded file handle is "explicitly connectable". The "explicitly connectable" file handle sets bits in the high 16bit of the handle_type field, so open_by_handle_at(2) will know that it needs to open a file with a connected path. old kernels will now recognize the handle_type with high bits set, so "explicitly connectable" file handles cannot be decoded by open_by_handle_at(2) on old kernels. The flag AT_HANDLE_CONNECTABLE is not allowed together with either AT_HANDLE_FID or AT_EMPTY_PATH. Signed-off-by: Amir Goldstein Link: https://lore.kernel.org/r/20241011090023.655623-3-amir73il@gmail.com Fixes: 570df4e9c23f ("ceph: snapshot nfs re-export") Acked-by: Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- include/uapi/linux/fcntl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index 87e2dec79fea..56ff2100e021 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -153,6 +153,7 @@ object identity and may not be usable with open_by_handle_at(2). */ #define AT_HANDLE_MNT_ID_UNIQUE 0x001 /* Return the u64 unique mount ID. */ +#define AT_HANDLE_CONNECTABLE 0x002 /* Request a connectable file handle */ #if defined(__KERNEL__) #define AT_GETATTR_NOSEC 0x80000000 -- cgit v1.2.3 From b0ccf4f53d968e794a4ea579d5135cc1aaf1a53f Mon Sep 17 00:00:00 2001 From: Jeremy Sowden Date: Thu, 14 Nov 2024 22:08:13 +0100 Subject: netfilter: bitwise: add support for doing AND, OR and XOR directly Hitherto, these operations have been converted in user space to mask-and-xor operations on one register and two immediate values, and it is the latter which have been evaluated by the kernel. We add support for evaluating these operations directly in kernel space on one register and either an immediate value or a second register. Pablo made a few changes to the original patch: - EINVAL if NFTA_BITWISE_SREG2 is used with fast version. - Allow _AND,_OR,_XOR with _DATA != sizeof(u32) - Dump _SREG2 or _DATA with _AND,_OR,_XOR Signed-off-by: Jeremy Sowden Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 487542234ccd..49c944e78463 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -568,11 +568,17 @@ enum nft_immediate_attributes { * and XOR boolean operations * @NFT_BITWISE_LSHIFT: left-shift operation * @NFT_BITWISE_RSHIFT: right-shift operation + * @NFT_BITWISE_AND: and operation + * @NFT_BITWISE_OR: or operation + * @NFT_BITWISE_XOR: xor operation */ enum nft_bitwise_ops { NFT_BITWISE_MASK_XOR, NFT_BITWISE_LSHIFT, NFT_BITWISE_RSHIFT, + NFT_BITWISE_AND, + NFT_BITWISE_OR, + NFT_BITWISE_XOR, }; /* * Old name for NFT_BITWISE_MASK_XOR. Retained for backwards-compatibility. @@ -590,6 +596,7 @@ enum nft_bitwise_ops { * @NFTA_BITWISE_OP: type of operation (NLA_U32: nft_bitwise_ops) * @NFTA_BITWISE_DATA: argument for non-boolean operations * (NLA_NESTED: nft_data_attributes) + * @NFTA_BITWISE_SREG2: second source register (NLA_U32: nft_registers) * * The bitwise expression supports boolean and shift operations. It implements * the boolean operations by performing the following operation: @@ -613,6 +620,7 @@ enum nft_bitwise_attributes { NFTA_BITWISE_XOR, NFTA_BITWISE_OP, NFTA_BITWISE_DATA, + NFTA_BITWISE_SREG2, __NFTA_BITWISE_MAX }; #define NFTA_BITWISE_MAX (__NFTA_BITWISE_MAX - 1) -- cgit v1.2.3 From 83e041522eb9c45479f4490b212687cf1e7e9999 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 15 Nov 2024 16:54:40 +0000 Subject: io_uring: temporarily disable registered waits Disable wait argument registration as it'll be replaced with a more generic feature. We'll still need IORING_ENTER_EXT_ARG_REG parsing in a few commits so leave it be. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/70b1d1d218c41ba77a76d1789c8641dab0b0563e.1731689588.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 5d08435b95a8..132f5db3d4e8 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -627,9 +627,6 @@ enum io_uring_register_op { /* resize CQ ring */ IORING_REGISTER_RESIZE_RINGS = 33, - /* register fixed io_uring_reg_wait arguments */ - IORING_REGISTER_CQWAIT_REG = 34, - /* this goes last */ IORING_REGISTER_LAST, -- cgit v1.2.3 From dfbbfbf191878e8dd422768ce009858d8b5b761e Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 15 Nov 2024 16:54:41 +0000 Subject: io_uring: introduce concept of memory regions We've got a good number of mappings we share with the userspace, that includes the main rings, provided buffer rings, upcoming rings for zerocopy rx and more. All of them duplicate user argument parsing and some internal details as well (page pinnning, huge page optimisations, mmap'ing, etc.) Introduce a notion of regions. For userspace for now it's just a new structure called struct io_uring_region_desc which is supposed to parameterise all such mapping / queue creations. A region either represents a user provided chunk of memory, in which case the user_addr field should point to it, or a request for the kernel to allocate the memory, in which case the user would need to mmap it after using the offset returned in the mmap_offset field. With a uniform userspace API we can avoid additional boiler plate code and apply future optimisation to all of them at once. Internally, there is a new structure struct io_mapped_region holding all relevant runtime information and some helpers to work with it. This patch limits it to user provided regions. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/0e6fe25818dfbaebd1bd90b870a6cac503fe1a24.1731689588.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 132f5db3d4e8..5cbfd330c688 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -647,6 +647,20 @@ struct io_uring_files_update { __aligned_u64 /* __s32 * */ fds; }; +enum { + /* initialise with user provided memory pointed by user_addr */ + IORING_MEM_REGION_TYPE_USER = 1, +}; + +struct io_uring_region_desc { + __u64 user_addr; + __u64 size; + __u32 flags; + __u32 id; + __u64 mmap_offset; + __u64 __resv[4]; +}; + /* * Register a fully sparse file space, rather than pass in an array of all * -1 file descriptors. -- cgit v1.2.3 From 93238e66185524aad925acefb2312203b9e26d63 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 15 Nov 2024 16:54:42 +0000 Subject: io_uring: add memory region registration Regions will serve multiple purposes. First, with it we can decouple ring/etc. object creation from registration / mapping of the memory they will be placed in. We already have hacks that allow to put both SQ and CQ into the same huge page, in the future we should be able to: region = create_region(io_ring); create_pbuf_ring(io_uring, region, offset=0); create_pbuf_ring(io_uring, region, offset=N); The second use case is efficiently passing parameters. The following patch enables back on top of regions IORING_ENTER_EXT_ARG_REG, which optimises wait arguments. It'll also be useful for request arguments replacing iovecs, msghdr, etc. pointers. Eventually it would also be handy for BPF as well if it comes to fruition. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/0798cf3a14fad19cfc96fc9feca5f3e11481691d.1731689588.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 5cbfd330c688..1ee35890125b 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -627,6 +627,8 @@ enum io_uring_register_op { /* resize CQ ring */ IORING_REGISTER_RESIZE_RINGS = 33, + IORING_REGISTER_MEM_REGION = 34, + /* this goes last */ IORING_REGISTER_LAST, @@ -661,6 +663,12 @@ struct io_uring_region_desc { __u64 __resv[4]; }; +struct io_uring_mem_region_reg { + __u64 region_uptr; /* struct io_uring_region_desc * */ + __u64 flags; + __u64 __resv[2]; +}; + /* * Register a fully sparse file space, rather than pass in an array of all * -1 file descriptors. -- cgit v1.2.3 From d617b3147d54c42351eac63b5398d4ddf4f4011b Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 15 Nov 2024 16:54:43 +0000 Subject: io_uring: restore back registered wait arguments Now we've got a more generic region registration API, place IORING_ENTER_EXT_ARG_REG and re-enable it. First, the user has to register a region with the IORING_MEM_REGION_REG_WAIT_ARG flag set. It can only be done for a ring in a disabled state, aka IORING_SETUP_R_DISABLED, to avoid races with already running waiters. With that we should have stable constant values for ctx->cq_wait_{size,arg} in io_get_ext_arg_reg() and hence no READ_ONCE required. The other API difference is that we're now passing byte offsets instead of indexes. The user _must_ align all offsets / pointers to the native word size, failing to do so might but not necessarily has to lead to a failure usually returned as -EFAULT. liburing will be hiding this details from users. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/81822c1b4ffbe8ad391b4f9ad1564def0d26d990.1731689588.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 1ee35890125b..4418d0192959 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -663,6 +663,11 @@ struct io_uring_region_desc { __u64 __resv[4]; }; +enum { + /* expose the region as registered wait arguments */ + IORING_MEM_REGION_REG_WAIT_ARG = 1, +}; + struct io_uring_mem_region_reg { __u64 region_uptr; /* struct io_uring_region_desc * */ __u64 flags; -- cgit v1.2.3 From c750629caeca01979da3403f4bebecda88713233 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 18 Nov 2024 15:14:34 +0000 Subject: io_uring: remove io_uring_cqwait_reg_arg A separate wait argument registration API was removed, also delete leftover uapi definitions. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/143b6a53591badac23632d3e6fa3e5db4b342ee2.1731942445.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 4418d0192959..aac9a4f8fa9a 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -873,20 +873,6 @@ enum { IORING_REG_WAIT_TS = (1U << 0), }; -/* - * Argument for IORING_REGISTER_CQWAIT_REG, registering a region of - * struct io_uring_reg_wait that can be indexed when io_uring_enter(2) is - * called rather than pass in a wait argument structure separately. - */ -struct io_uring_cqwait_reg_arg { - __u32 flags; - __u32 struct_size; - __u32 nr_entries; - __u32 pad; - __u64 user_addr; - __u64 pad2[3]; -}; - /* * Argument for io_uring_enter(2) with * IORING_GETEVENTS | IORING_ENTER_EXT_ARG_REG set, where the actual argument -- cgit v1.2.3 From ebda123fe703f492d7d557a4da00888ddec4779e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 15 Nov 2024 12:43:04 -0800 Subject: Revert "UAPI: ethtool: Use __struct_group() in struct ethtool_link_settings" This reverts commit 43d3487035e9a86fad952de4240a518614240d43. We cannot use tagged struct groups in UAPI because C++ will throw syntax errors even under "extern C". Signed-off-by: Kees Cook Link: https://patch.msgid.link/20241115204308.3821419-2-kees@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool.h | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index fc1f54b065f9..c405ed63acfa 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -2511,24 +2511,21 @@ enum ethtool_reset_flags { * autonegotiation; 0 if unknown or not applicable. Read-only. */ struct ethtool_link_settings { - /* New members MUST be added within the __struct_group() macro below. */ - __struct_group(ethtool_link_settings_hdr, hdr, /* no attrs */, - __u32 cmd; - __u32 speed; - __u8 duplex; - __u8 port; - __u8 phy_address; - __u8 autoneg; - __u8 mdio_support; - __u8 eth_tp_mdix; - __u8 eth_tp_mdix_ctrl; - __s8 link_mode_masks_nwords; - __u8 transceiver; - __u8 master_slave_cfg; - __u8 master_slave_state; - __u8 rate_matching; - __u32 reserved[7]; - ); + __u32 cmd; + __u32 speed; + __u8 duplex; + __u8 port; + __u8 phy_address; + __u8 autoneg; + __u8 mdio_support; + __u8 eth_tp_mdix; + __u8 eth_tp_mdix_ctrl; + __s8 link_mode_masks_nwords; + __u8 transceiver; + __u8 master_slave_cfg; + __u8 master_slave_state; + __u8 rate_matching; + __u32 reserved[7]; __u32 link_mode_masks[]; /* layout of link_mode_masks fields: * __u32 map_supported[link_mode_masks_nwords]; -- cgit v1.2.3 From 96c677fca54a28fcfea4dbab9c1f2530bd0a08d1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 15 Nov 2024 12:43:05 -0800 Subject: UAPI: ethtool: Avoid flex-array in struct ethtool_link_settings struct ethtool_link_settings tends to be used as a header for other structures that have trailing bytes[1], but has a trailing flexible array itself. Using this overlapped with other structures leads to ambiguous object sizing in the compiler, so we want to avoid such situations (which have caused real bugs in the past). Detecting this can be done with -Wflex-array-member-not-at-end, which will need to be enabled globally. Using a tagged struct_group() to create a new ethtool_link_settings_hdr structure isn't possible as it seems we cannot use the tagged variant of struct_group() due to syntax issues from C++'s perspective (even within "extern C")[2]. Instead, we can just leave the offending member defined in UAPI and remove it from the kernel's view of the structure, as Linux doesn't actually use this member at all. There is also no change in size since it was already a flexible array that didn't contribute to size returned by any use of sizeof(). Reported-by: Jakub Kicinski Closes: https://lore.kernel.org/lkml/20241109100213.262a2fa0@kernel.org/ [2] Link: https://lore.kernel.org/lkml/0bc2809fe2a6c11dd4c8a9a10d9bd65cccdb559b.1730238285.git.gustavoars@kernel.org/ [1] Signed-off-by: Kees Cook Reviewed-by: Jakub Kicinski Link: https://patch.msgid.link/20241115204308.3821419-3-kees@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index c405ed63acfa..7e1b3820f91f 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -2526,12 +2526,19 @@ struct ethtool_link_settings { __u8 master_slave_state; __u8 rate_matching; __u32 reserved[7]; +#ifndef __KERNEL__ + /* Linux builds with -Wflex-array-member-not-at-end but does + * not use the "link_mode_masks" member. Leave it defined for + * userspace for now, and when userspace wants to start using + * -Wfamnae, we'll need a new solution. + */ __u32 link_mode_masks[]; /* layout of link_mode_masks fields: * __u32 map_supported[link_mode_masks_nwords]; * __u32 map_advertising[link_mode_masks_nwords]; * __u32 map_lp_advertising[link_mode_masks_nwords]; */ +#endif }; /** -- cgit v1.2.3