From 4ff542163397073f86eda484318d61980ff1031d Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Thu, 28 Sep 2023 00:15:26 -0700 Subject: iommufd: Support allocating nested parent domain Extend IOMMU_HWPT_ALLOC to allocate domains to be used as parent (stage-2) in nested translation. Add IOMMU_HWPT_ALLOC_NEST_PARENT to the uAPI. Link: https://lore.kernel.org/r/20230928071528.26258-5-yi.l.liu@intel.com Signed-off-by: Yi Liu Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index b4ba0c0cbab6..4a7c5c8fdbb4 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -347,10 +347,20 @@ struct iommu_vfio_ioas { }; #define IOMMU_VFIO_IOAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VFIO_IOAS) +/** + * enum iommufd_hwpt_alloc_flags - Flags for HWPT allocation + * @IOMMU_HWPT_ALLOC_NEST_PARENT: If set, allocate a domain which can serve + * as the parent domain in the nesting + * configuration. + */ +enum iommufd_hwpt_alloc_flags { + IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0, +}; + /** * struct iommu_hwpt_alloc - ioctl(IOMMU_HWPT_ALLOC) * @size: sizeof(struct iommu_hwpt_alloc) - * @flags: Must be 0 + * @flags: Combination of enum iommufd_hwpt_alloc_flags * @dev_id: The device to allocate this HWPT for * @pt_id: The IOAS to connect this HWPT to * @out_hwpt_id: The ID of the new HWPT -- cgit v1.2.3 From b5f9e63278d6f32789478acf1ed41d21d92b36cf Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 17 Oct 2023 11:15:52 -0700 Subject: iommufd: Correct IOMMU_HWPT_ALLOC_NEST_PARENT description The IOMMU_HWPT_ALLOC_NEST_PARENT flag is used to allocate a HWPT. Though a HWPT holds a domain in the core structure, it is still quite confusing to describe it using "domain" in the uAPI kdoc. Correct it to "HWPT". Fixes: 4ff542163397 ("iommufd: Support allocating nested parent domain") Link: https://lore.kernel.org/r/20231017181552.12667-1-nicolinc@nvidia.com Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 4a7c5c8fdbb4..be7a95042677 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -349,9 +349,8 @@ struct iommu_vfio_ioas { /** * enum iommufd_hwpt_alloc_flags - Flags for HWPT allocation - * @IOMMU_HWPT_ALLOC_NEST_PARENT: If set, allocate a domain which can serve - * as the parent domain in the nesting - * configuration. + * @IOMMU_HWPT_ALLOC_NEST_PARENT: If set, allocate a HWPT that can serve as + * the parent HWPT in a nesting configuration. */ enum iommufd_hwpt_alloc_flags { IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0, -- cgit v1.2.3 From 5f9bdbf4c65860cc8b9c544d92bfd76fbea8d9c5 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Tue, 24 Oct 2023 14:50:56 +0100 Subject: iommufd: Add a flag to enforce dirty tracking on attach Throughout IOMMU domain lifetime that wants to use dirty tracking, some guarantees are needed such that any device attached to the iommu_domain supports dirty tracking. The idea is to handle a case where IOMMU in the system are assymetric feature-wise and thus the capability may not be supported for all devices. The enforcement is done by adding a flag into HWPT_ALLOC namely: IOMMU_HWPT_ALLOC_DIRTY_TRACKING .. Passed in HWPT_ALLOC ioctl() flags. The enforcement is done by creating a iommu_domain via domain_alloc_user() and validating the requested flags with what the device IOMMU supports (and failing accordingly) advertised). Advertising the new IOMMU domain feature flag requires that the individual iommu driver capability is supported when a future device attachment happens. Link: https://lore.kernel.org/r/20231024135109.73787-6-joao.m.martins@oracle.com Signed-off-by: Joao Martins Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index be7a95042677..c76248410120 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -351,9 +351,12 @@ struct iommu_vfio_ioas { * enum iommufd_hwpt_alloc_flags - Flags for HWPT allocation * @IOMMU_HWPT_ALLOC_NEST_PARENT: If set, allocate a HWPT that can serve as * the parent HWPT in a nesting configuration. + * @IOMMU_HWPT_ALLOC_DIRTY_TRACKING: Dirty tracking support for device IOMMU is + * enforced on device attachment */ enum iommufd_hwpt_alloc_flags { IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0, + IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1, }; /** -- cgit v1.2.3 From e2a4b294784957fc28ecb1fed8a7e69da18eb18d Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Tue, 24 Oct 2023 14:50:57 +0100 Subject: iommufd: Add IOMMU_HWPT_SET_DIRTY_TRACKING Every IOMMU driver should be able to implement the needed iommu domain ops to control dirty tracking. Connect a hw_pagetable to the IOMMU core dirty tracking ops, specifically the ability to enable/disable dirty tracking on an IOMMU domain (hw_pagetable id). To that end add an io_pagetable kernel API to toggle dirty tracking: * iopt_set_dirty_tracking(iopt, [domain], state) The intended caller of this is via the hw_pagetable object that is created. Internally it will ensure the leftover dirty state is cleared /right before/ dirty tracking starts. This is also useful for iommu drivers which may decide that dirty tracking is always-enabled at boot without wanting to toggle dynamically via corresponding iommu domain op. Link: https://lore.kernel.org/r/20231024135109.73787-7-joao.m.martins@oracle.com Signed-off-by: Joao Martins Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index c76248410120..5c82b68c88f3 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -47,6 +47,7 @@ enum { IOMMUFD_CMD_VFIO_IOAS, IOMMUFD_CMD_HWPT_ALLOC, IOMMUFD_CMD_GET_HW_INFO, + IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING, }; /** @@ -453,4 +454,31 @@ struct iommu_hw_info { __u32 __reserved; }; #define IOMMU_GET_HW_INFO _IO(IOMMUFD_TYPE, IOMMUFD_CMD_GET_HW_INFO) + +/* + * enum iommufd_hwpt_set_dirty_tracking_flags - Flags for steering dirty + * tracking + * @IOMMU_HWPT_DIRTY_TRACKING_ENABLE: Enable dirty tracking + */ +enum iommufd_hwpt_set_dirty_tracking_flags { + IOMMU_HWPT_DIRTY_TRACKING_ENABLE = 1, +}; + +/** + * struct iommu_hwpt_set_dirty_tracking - ioctl(IOMMU_HWPT_SET_DIRTY_TRACKING) + * @size: sizeof(struct iommu_hwpt_set_dirty_tracking) + * @flags: Combination of enum iommufd_hwpt_set_dirty_tracking_flags + * @hwpt_id: HW pagetable ID that represents the IOMMU domain + * @__reserved: Must be 0 + * + * Toggle dirty tracking on an HW pagetable. + */ +struct iommu_hwpt_set_dirty_tracking { + __u32 size; + __u32 flags; + __u32 hwpt_id; + __u32 __reserved; +}; +#define IOMMU_HWPT_SET_DIRTY_TRACKING _IO(IOMMUFD_TYPE, \ + IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING) #endif -- cgit v1.2.3 From b9a60d6f850e4470017b60f731220a58cda199aa Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Tue, 24 Oct 2023 14:50:58 +0100 Subject: iommufd: Add IOMMU_HWPT_GET_DIRTY_BITMAP Connect a hw_pagetable to the IOMMU core dirty tracking read_and_clear_dirty iommu domain op. It exposes all of the functionality for the UAPI that read the dirtied IOVAs while clearing the Dirty bits from the PTEs. In doing so, add an IO pagetable API iopt_read_and_clear_dirty_data() that performs the reading of dirty IOPTEs for a given IOVA range and then copying back to userspace bitmap. Underneath it uses the IOMMU domain kernel API which will read the dirty bits, as well as atomically clearing the IOPTE dirty bit and flushing the IOTLB at the end. The IOVA bitmaps usage takes care of the iteration of the bitmaps user pages efficiently and without copies. Within the iterator function we iterate over io-pagetable contigous areas that have been mapped. Contrary to past incantation of a similar interface in VFIO the IOVA range to be scanned is tied in to the bitmap size, thus the application needs to pass a appropriately sized bitmap address taking into account the iova range being passed *and* page size ... as opposed to allowing bitmap-iova != iova. Link: https://lore.kernel.org/r/20231024135109.73787-8-joao.m.martins@oracle.com Signed-off-by: Joao Martins Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 5c82b68c88f3..dce38e32ca84 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -48,6 +48,7 @@ enum { IOMMUFD_CMD_HWPT_ALLOC, IOMMUFD_CMD_GET_HW_INFO, IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING, + IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP, }; /** @@ -481,4 +482,38 @@ struct iommu_hwpt_set_dirty_tracking { }; #define IOMMU_HWPT_SET_DIRTY_TRACKING _IO(IOMMUFD_TYPE, \ IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING) + +/** + * struct iommu_hwpt_get_dirty_bitmap - ioctl(IOMMU_HWPT_GET_DIRTY_BITMAP) + * @size: sizeof(struct iommu_hwpt_get_dirty_bitmap) + * @hwpt_id: HW pagetable ID that represents the IOMMU domain + * @flags: Must be zero + * @__reserved: Must be 0 + * @iova: base IOVA of the bitmap first bit + * @length: IOVA range size + * @page_size: page size granularity of each bit in the bitmap + * @data: bitmap where to set the dirty bits. The bitmap bits each + * represent a page_size which you deviate from an arbitrary iova. + * + * Checking a given IOVA is dirty: + * + * data[(iova / page_size) / 64] & (1ULL << ((iova / page_size) % 64)) + * + * Walk the IOMMU pagetables for a given IOVA range to return a bitmap + * with the dirty IOVAs. In doing so it will also by default clear any + * dirty bit metadata set in the IOPTE. + */ +struct iommu_hwpt_get_dirty_bitmap { + __u32 size; + __u32 hwpt_id; + __u32 flags; + __u32 __reserved; + __aligned_u64 iova; + __aligned_u64 length; + __aligned_u64 page_size; + __aligned_u64 data; +}; +#define IOMMU_HWPT_GET_DIRTY_BITMAP _IO(IOMMUFD_TYPE, \ + IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP) + #endif -- cgit v1.2.3 From 7623683857e52b75184d37862c70f1230aef2edd Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Tue, 24 Oct 2023 14:50:59 +0100 Subject: iommufd: Add capabilities to IOMMU_GET_HW_INFO Extend IOMMUFD_CMD_GET_HW_INFO op to query generic iommu capabilities for a given device. Capabilities are IOMMU agnostic and use device_iommu_capable() API passing one of the IOMMU_CAP_*. Enumerate IOMMU_CAP_DIRTY_TRACKING for now in the out_capabilities field returned back to userspace. Link: https://lore.kernel.org/r/20231024135109.73787-9-joao.m.martins@oracle.com Signed-off-by: Joao Martins Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index dce38e32ca84..036ebc6c19cf 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -418,6 +418,20 @@ enum iommu_hw_info_type { IOMMU_HW_INFO_TYPE_INTEL_VTD, }; +/** + * enum iommufd_hw_capabilities + * @IOMMU_HW_CAP_DIRTY_TRACKING: IOMMU hardware support for dirty tracking + * If available, it means the following APIs + * are supported: + * + * IOMMU_HWPT_GET_DIRTY_BITMAP + * IOMMU_HWPT_SET_DIRTY_TRACKING + * + */ +enum iommufd_hw_capabilities { + IOMMU_HW_CAP_DIRTY_TRACKING = 1 << 0, +}; + /** * struct iommu_hw_info - ioctl(IOMMU_GET_HW_INFO) * @size: sizeof(struct iommu_hw_info) @@ -429,6 +443,8 @@ enum iommu_hw_info_type { * the iommu type specific hardware information data * @out_data_type: Output the iommu hardware info type as defined in the enum * iommu_hw_info_type. + * @out_capabilities: Output the generic iommu capability info type as defined + * in the enum iommu_hw_capabilities. * @__reserved: Must be 0 * * Query an iommu type specific hardware information data from an iommu behind @@ -453,6 +469,7 @@ struct iommu_hw_info { __aligned_u64 data_uptr; __u32 out_data_type; __u32 __reserved; + __aligned_u64 out_capabilities; }; #define IOMMU_GET_HW_INFO _IO(IOMMUFD_TYPE, IOMMUFD_CMD_GET_HW_INFO) -- cgit v1.2.3 From 609848132c71316df3260d1ec066539c21bba585 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Tue, 24 Oct 2023 14:51:00 +0100 Subject: iommufd: Add a flag to skip clearing of IOPTE dirty VFIO has an operation where it unmaps an IOVA while returning a bitmap with the dirty data. In reality the operation doesn't quite query the IO pagetables that the PTE was dirty or not. Instead it marks as dirty on anything that was mapped, and doing so in one syscall. In IOMMUFD the equivalent is done in two operations by querying with GET_DIRTY_IOVA followed by UNMAP_IOVA. However, this would incur two TLB flushes given that after clearing dirty bits IOMMU implementations require invalidating their IOTLB, plus another invalidation needed for the UNMAP. To allow dirty bits to be queried faster, add a flag (IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR) that requests to not clear the dirty bits from the PTE (but just reading them), under the expectation that the next operation is the unmap. An alternative is to unmap and just perpectually mark as dirty as that's the same behaviour as today. So here equivalent functionally can be provided with unmap alone, and if real dirty info is required it will amortize the cost while querying. There's still a race against DMA where in theory the unmap of the IOVA (when the guest invalidates the IOTLB via emulated iommu) would race against the VF performing DMA on the same IOVA. As discussed in [0], we are accepting to resolve this race as throwing away the DMA and it doesn't matter if it hit physical DRAM or not, the VM can't tell if we threw it away because the DMA was blocked or because we failed to copy the DRAM. [0] https://lore.kernel.org/linux-iommu/20220502185239.GR8364@nvidia.com/ Link: https://lore.kernel.org/r/20231024135109.73787-10-joao.m.martins@oracle.com Signed-off-by: Joao Martins Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 036ebc6c19cf..c44eecf5d318 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -500,11 +500,24 @@ struct iommu_hwpt_set_dirty_tracking { #define IOMMU_HWPT_SET_DIRTY_TRACKING _IO(IOMMUFD_TYPE, \ IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING) +/** + * enum iommufd_hwpt_get_dirty_bitmap_flags - Flags for getting dirty bits + * @IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR: Just read the PTEs without clearing + * any dirty bits metadata. This flag + * can be passed in the expectation + * where the next operation is an unmap + * of the same IOVA range. + * + */ +enum iommufd_hwpt_get_dirty_bitmap_flags { + IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR = 1, +}; + /** * struct iommu_hwpt_get_dirty_bitmap - ioctl(IOMMU_HWPT_GET_DIRTY_BITMAP) * @size: sizeof(struct iommu_hwpt_get_dirty_bitmap) * @hwpt_id: HW pagetable ID that represents the IOMMU domain - * @flags: Must be zero + * @flags: Combination of enum iommufd_hwpt_get_dirty_bitmap_flags * @__reserved: Must be 0 * @iova: base IOVA of the bitmap first bit * @length: IOVA range size -- cgit v1.2.3 From bd529dbb661d62bd9f03e44c9fc837d98a190499 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 25 Oct 2023 21:39:35 -0700 Subject: iommufd: Add a nested HW pagetable object IOMMU_HWPT_ALLOC already supports iommu_domain allocation for usersapce. But it can only allocate a hw_pagetable that associates to a given IOAS, i.e. only a kernel-managed hw_pagetable of IOMMUFD_OBJ_HWPT_PAGING type. IOMMU drivers can now support user-managed hw_pagetables, for two-stage translation use cases that require user data input from the user space. Add a new IOMMUFD_OBJ_HWPT_NESTED type with its abort/destroy(). Pair it with a new iommufd_hwpt_nested structure and its to_hwpt_nested() helper. Update the to_hwpt_paging() helper, so a NESTED-type hw_pagetable can be handled in the callers, for example iommufd_hw_pagetable_enforce_rr(). Screen the inputs including the parent PAGING-type hw_pagetable that has a need of a new nest_parent flag in the iommufd_hwpt_paging structure. Extend the IOMMU_HWPT_ALLOC ioctl to accept an IOMMU driver specific data input which is tagged by the enum iommu_hwpt_data_type. Also, update the @pt_id to accept hwpt_id too besides an ioas_id. Then, use them to allocate a hw_pagetable of IOMMUFD_OBJ_HWPT_NESTED type using the iommufd_hw_pagetable_alloc_nested() allocator. Link: https://lore.kernel.org/r/20231026043938.63898-8-yi.l.liu@intel.com Signed-off-by: Nicolin Chen Co-developed-by: Yi Liu Signed-off-by: Yi Liu Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index c44eecf5d318..d816deac906f 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -361,20 +361,44 @@ enum iommufd_hwpt_alloc_flags { IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1, }; +/** + * enum iommu_hwpt_data_type - IOMMU HWPT Data Type + * @IOMMU_HWPT_DATA_NONE: no data + */ +enum iommu_hwpt_data_type { + IOMMU_HWPT_DATA_NONE, +}; + /** * struct iommu_hwpt_alloc - ioctl(IOMMU_HWPT_ALLOC) * @size: sizeof(struct iommu_hwpt_alloc) * @flags: Combination of enum iommufd_hwpt_alloc_flags * @dev_id: The device to allocate this HWPT for - * @pt_id: The IOAS to connect this HWPT to + * @pt_id: The IOAS or HWPT to connect this HWPT to * @out_hwpt_id: The ID of the new HWPT * @__reserved: Must be 0 + * @data_type: One of enum iommu_hwpt_data_type + * @data_len: Length of the type specific data + * @data_uptr: User pointer to the type specific data * * Explicitly allocate a hardware page table object. This is the same object * type that is returned by iommufd_device_attach() and represents the * underlying iommu driver's iommu_domain kernel object. * - * A HWPT will be created with the IOVA mappings from the given IOAS. + * A kernel-managed HWPT will be created with the mappings from the given + * IOAS via the @pt_id. The @data_type for this allocation must be set to + * IOMMU_HWPT_DATA_NONE. The HWPT can be allocated as a parent HWPT for a + * nesting configuration by passing IOMMU_HWPT_ALLOC_NEST_PARENT via @flags. + * + * A user-managed nested HWPT will be created from a given parent HWPT via + * @pt_id, in which the parent HWPT must be allocated previously via the + * same ioctl from a given IOAS (@pt_id). In this case, the @data_type + * must be set to a pre-defined type corresponding to an I/O page table + * type supported by the underlying IOMMU hardware. + * + * If the @data_type is set to IOMMU_HWPT_DATA_NONE, @data_len and + * @data_uptr should be zero. Otherwise, both @data_len and @data_uptr + * must be given. */ struct iommu_hwpt_alloc { __u32 size; @@ -383,6 +407,9 @@ struct iommu_hwpt_alloc { __u32 pt_id; __u32 out_hwpt_id; __u32 __reserved; + __u32 data_type; + __u32 data_len; + __aligned_u64 data_uptr; }; #define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC) -- cgit v1.2.3 From 82b6661c9c35e60946dee536545b4848f25eafab Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Wed, 25 Oct 2023 21:42:09 -0700 Subject: iommufd: Add data structure for Intel VT-d stage-1 domain allocation This adds IOMMU_HWPT_DATA_VTD_S1 for stage-1 hw_pagetable of Intel VT-d and the corressponding data structure for userspace specified parameter for the domain allocation. Link: https://lore.kernel.org/r/20231026044216.64964-2-yi.l.liu@intel.com Reviewed-by: Kevin Tian Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index d816deac906f..3ce5ee5f09b6 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -361,12 +361,42 @@ enum iommufd_hwpt_alloc_flags { IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1, }; +/** + * enum iommu_hwpt_vtd_s1_flags - Intel VT-d stage-1 page table + * entry attributes + * @IOMMU_VTD_S1_SRE: Supervisor request + * @IOMMU_VTD_S1_EAFE: Extended access enable + * @IOMMU_VTD_S1_WPE: Write protect enable + */ +enum iommu_hwpt_vtd_s1_flags { + IOMMU_VTD_S1_SRE = 1 << 0, + IOMMU_VTD_S1_EAFE = 1 << 1, + IOMMU_VTD_S1_WPE = 1 << 2, +}; + +/** + * struct iommu_hwpt_vtd_s1 - Intel VT-d stage-1 page table + * info (IOMMU_HWPT_DATA_VTD_S1) + * @flags: Combination of enum iommu_hwpt_vtd_s1_flags + * @pgtbl_addr: The base address of the stage-1 page table. + * @addr_width: The address width of the stage-1 page table + * @__reserved: Must be 0 + */ +struct iommu_hwpt_vtd_s1 { + __aligned_u64 flags; + __aligned_u64 pgtbl_addr; + __u32 addr_width; + __u32 __reserved; +}; + /** * enum iommu_hwpt_data_type - IOMMU HWPT Data Type * @IOMMU_HWPT_DATA_NONE: no data + * @IOMMU_HWPT_DATA_VTD_S1: Intel VT-d stage-1 page table */ enum iommu_hwpt_data_type { IOMMU_HWPT_DATA_NONE, + IOMMU_HWPT_DATA_VTD_S1, }; /** -- cgit v1.2.3 From 03476e687eb07b94f7cdb07cd3c7c4304b6c58b3 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 25 Oct 2023 21:42:16 -0700 Subject: iommu/vt-d: Disallow read-only mappings to nest parent domain When remapping hardware is configured by system software in scalable mode as Nested (PGTT=011b) and with PWSNP field Set in the PASID-table-entry, it may Set Accessed bit and Dirty bit (and Extended Access bit if enabled) in first-stage page-table entries even when second-stage mappings indicate that corresponding first-stage page-table is Read-Only. As the result, contents of pages designated by VMM as Read-Only can be modified by IOMMU via PML5E (PML4E for 4-level tables) access as part of address translation process due to DMAs issued by Guest. This disallows read-only mappings in the domain that is supposed to be used as nested parent. Reference from Sapphire Rapids Specification Update [1], errata details, SPR17. Userspace should know this limitation by checking the IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17 flag reported in the IOMMU_GET_HW_INFO ioctl. [1] https://www.intel.com/content/www/us/en/content-details/772415/content-details.html Link: https://lore.kernel.org/r/20231026044216.64964-9-yi.l.liu@intel.com Reviewed-by: Kevin Tian Signed-off-by: Lu Baolu Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 3ce5ee5f09b6..0b2bc6252e2c 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -443,10 +443,20 @@ struct iommu_hwpt_alloc { }; #define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC) +/** + * enum iommu_hw_info_vtd_flags - Flags for VT-d hw_info + * @IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17: If set, disallow read-only mappings + * on a nested_parent domain. + * https://www.intel.com/content/www/us/en/content-details/772415/content-details.html + */ +enum iommu_hw_info_vtd_flags { + IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17 = 1 << 0, +}; + /** * struct iommu_hw_info_vtd - Intel VT-d hardware information * - * @flags: Must be 0 + * @flags: Combination of enum iommu_hw_info_vtd_flags * @__reserved: Must be 0 * * @cap_reg: Value of Intel VT-d capability register defined in VT-d spec -- cgit v1.2.3