From fa1ffdb9e2937d04657c33a146aa0d2cd39abba0 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Mon, 17 Jul 2023 15:12:12 -0300 Subject: iommufd/selftest: Test iommufd_device_replace() Allow the selftest to call the function on the mock idev, add some tests to exercise it. Link: https://lore.kernel.org/r/16-v8-6659224517ea+532-iommufd_alloc_jgg@nvidia.com Reviewed-by: Kevin Tian Tested-by: Nicolin Chen Signed-off-by: Nicolin Chen Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- include/linux/iommufd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 9657c58813dc..0ac60256b659 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -23,6 +23,7 @@ struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, void iommufd_device_unbind(struct iommufd_device *idev); int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id); +int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id); void iommufd_device_detach(struct iommufd_device *idev); struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev); -- cgit v1.2.3 From 7074d7bd67d495cb3f6fe7c7c96b357a3b9d4ec2 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 17 Jul 2023 15:12:13 -0300 Subject: iommufd: Add IOMMU_HWPT_ALLOC This allows userspace to manually create HWPTs on IOAS's and then use those HWPTs as inputs to iommufd_device_attach/replace(). Following series will extend this to allow creating iommu_domains with driver specific parameters. Link: https://lore.kernel.org/r/17-v8-6659224517ea+532-iommufd_alloc_jgg@nvidia.com Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 98ebba80cfa1..8245c01adca6 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -45,6 +45,7 @@ enum { IOMMUFD_CMD_IOAS_UNMAP, IOMMUFD_CMD_OPTION, IOMMUFD_CMD_VFIO_IOAS, + IOMMUFD_CMD_HWPT_ALLOC, }; /** @@ -344,4 +345,29 @@ struct iommu_vfio_ioas { __u16 __reserved; }; #define IOMMU_VFIO_IOAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VFIO_IOAS) + +/** + * struct iommu_hwpt_alloc - ioctl(IOMMU_HWPT_ALLOC) + * @size: sizeof(struct iommu_hwpt_alloc) + * @flags: Must be 0 + * @dev_id: The device to allocate this HWPT for + * @pt_id: The IOAS to connect this HWPT to + * @out_hwpt_id: The ID of the new HWPT + * @__reserved: Must be 0 + * + * Explicitly allocate a hardware page table object. This is the same object + * type that is returned by iommufd_device_attach() and represents the + * underlying iommu driver's iommu_domain kernel object. + * + * A HWPT will be created with the IOVA mappings from the given IOAS. + */ +struct iommu_hwpt_alloc { + __u32 size; + __u32 flags; + __u32 dev_id; + __u32 pt_id; + __u32 out_hwpt_id; + __u32 __reserved; +}; +#define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC) #endif -- cgit v1.2.3 From 70c16123d865046899c36e3655b2d611f38f51a3 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 27 Jul 2023 23:33:27 -0700 Subject: iommufd: Add iommufd_access_replace() API Taking advantage of the new iommufd_access_change_ioas_id helper, add an iommufd_access_replace() API for the VFIO emulated pathway to use. Link: https://lore.kernel.org/r/a3267b924fd5f45e0d3a1dd13a9237e923563862.1690523699.git.nicolinc@nvidia.com Suggested-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Nicolin Chen Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- include/linux/iommufd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 0ac60256b659..ffc3a949f837 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -49,6 +49,7 @@ iommufd_access_create(struct iommufd_ctx *ictx, const struct iommufd_access_ops *ops, void *data, u32 *id); void iommufd_access_destroy(struct iommufd_access *access); int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id); +int iommufd_access_replace(struct iommufd_access *access, u32 ioas_id); void iommufd_access_detach(struct iommufd_access *access); void iommufd_ctx_get(struct iommufd_ctx *ictx); -- cgit v1.2.3 From c157fd88619946cd62af47c45214c479749dad8d Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 27 Jul 2023 23:33:29 -0700 Subject: vfio: Support IO page table replacement Now both the physical path and the emulated path can support an IO page table replacement. Call iommufd_device_replace/iommufd_access_replace(), when vdev->iommufd_attached is true. Also update the VFIO_DEVICE_ATTACH_IOMMUFD_PT kdoc in the uAPI header. Link: https://lore.kernel.org/r/b5f01956ff161f76aa52c95b0fa1ad6eaca95c4a.1690523699.git.nicolinc@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Reviewed-by: Alex Williamson Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/uapi/linux/vfio.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index fa06e3eb4955..537157ff8670 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -939,6 +939,12 @@ struct vfio_device_bind_iommufd { * Undo by VFIO_DEVICE_DETACH_IOMMUFD_PT or device fd close. This is only * allowed on cdev fds. * + * If a vfio device is currently attached to a valid hw_pagetable, without doing + * a VFIO_DEVICE_DETACH_IOMMUFD_PT, a second VFIO_DEVICE_ATTACH_IOMMUFD_PT ioctl + * passing in another hw_pagetable (hwpt) id is allowed. This action, also known + * as a hw_pagetable replacement, will replace the device's currently attached + * hw_pagetable with a new hw_pagetable corresponding to the given pt_id. + * * Return: 0 on success, -errno on failure. */ struct vfio_device_attach_iommufd_pt { -- cgit v1.2.3 From 92766e1b953d6e419684b39f55dab574287dd144 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 18 Aug 2023 03:10:29 -0700 Subject: iommu: Move dev_iommu_ops() to private header dev_iommu_ops() is essentially only used in iommu subsystem, so move to a private header to avoid being abused by other drivers. Link: https://lore.kernel.org/r/20230818101033.4100-2-yi.l.liu@intel.com Suggested-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Reviewed-by: Jason Gunthorpe Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- include/linux/iommu.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index d31642596675..e0245aa82b75 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -450,17 +450,6 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather) }; } -static inline const struct iommu_ops *dev_iommu_ops(struct device *dev) -{ - /* - * Assume that valid ops must be installed if iommu_probe_device() - * has succeeded. The device ops are essentially for internal use - * within the IOMMU subsystem itself, so we should be able to trust - * ourselves not to misuse the helper. - */ - return dev->iommu->iommu_dev->ops; -} - extern int bus_iommu_probe(const struct bus_type *bus); extern bool iommu_present(const struct bus_type *bus); extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap); -- cgit v1.2.3 From 60fedb262bbc632ab58bfdec7f6e47b2f94992d3 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 18 Aug 2023 03:10:30 -0700 Subject: iommu: Add new iommu op to get iommu hardware information Introduce a new iommu op to get the IOMMU hardware capabilities for iommufd. This information will be used by any vIOMMU driver which is owned by userspace. This op chooses to make the special parameters opaque to the core. This suits the current usage model where accessing any of the IOMMU device special parameters does require a userspace driver that matches the kernel driver. If a need for common parameters, implemented similarly by several drivers, arises then there's room in the design to grow a generic parameter set as well. No wrapper API is added as it is supposed to be used by iommufd only. Different IOMMU hardware would have different hardware information. So the information reported differs as well. To let the external user understand the difference, enum iommu_hw_info_type is defined. For the iommu drivers that are capable to report hardware information, it should have a unique iommu_hw_info_type and return to caller. For the driver doesn't report hardware information, caller just uses IOMMU_HW_INFO_TYPE_NONE if a type is required. Link: https://lore.kernel.org/r/20230818101033.4100-3-yi.l.liu@intel.com Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Co-developed-by: Nicolin Chen Signed-off-by: Nicolin Chen Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- include/linux/iommu.h | 5 +++++ include/uapi/linux/iommufd.h | 9 +++++++++ 2 files changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index e0245aa82b75..bd6a1110b294 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -228,6 +228,10 @@ struct iommu_iotlb_gather { /** * struct iommu_ops - iommu ops and capabilities * @capable: check capability + * @hw_info: report iommu hardware information. The data buffer returned by this + * op is allocated in the iommu driver and freed by the caller after + * use. The information type is one of enum iommu_hw_info_type defined + * in include/uapi/linux/iommufd.h. * @domain_alloc: allocate iommu domain * @probe_device: Add device to iommu driver handling * @release_device: Remove device from iommu driver handling @@ -257,6 +261,7 @@ struct iommu_iotlb_gather { */ struct iommu_ops { bool (*capable)(struct device *dev, enum iommu_cap); + void *(*hw_info)(struct device *dev, u32 *length, u32 *type); /* Domain allocation and freeing by the iommu driver */ struct iommu_domain *(*domain_alloc)(unsigned iommu_domain_type); diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 8245c01adca6..ac11ace21edb 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -370,4 +370,13 @@ struct iommu_hwpt_alloc { __u32 __reserved; }; #define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC) + +/** + * enum iommu_hw_info_type - IOMMU Hardware Info Types + * @IOMMU_HW_INFO_TYPE_NONE: Used by the drivers that do not report hardware + * info + */ +enum iommu_hw_info_type { + IOMMU_HW_INFO_TYPE_NONE, +}; #endif -- cgit v1.2.3 From 55dd4023cead250c89decf1a7a882c94cbf5765a Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 18 Aug 2023 03:10:31 -0700 Subject: iommufd: Add IOMMU_GET_HW_INFO Under nested IOMMU translation, userspace owns the stage-1 translation table (e.g. the stage-1 page table of Intel VT-d or the context table of ARM SMMUv3, and etc.). Stage-1 translation tables are vendor specific, and need to be compatible with the underlying IOMMU hardware. Hence, userspace should know the IOMMU hardware capability before creating and configuring the stage-1 translation table to kernel. This adds IOMMU_GET_HW_INFO ioctl to query the IOMMU hardware information (a.k.a capability) for a given device. The returned data is vendor specific, userspace needs to decode it with the structure by the output @out_data_type field. As only physical devices have IOMMU hardware, so this will return error if the given device is not a physical device. Link: https://lore.kernel.org/r/20230818101033.4100-4-yi.l.liu@intel.com Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Co-developed-by: Nicolin Chen Signed-off-by: Nicolin Chen Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index ac11ace21edb..e01d5ac00dc3 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -46,6 +46,7 @@ enum { IOMMUFD_CMD_OPTION, IOMMUFD_CMD_VFIO_IOAS, IOMMUFD_CMD_HWPT_ALLOC, + IOMMUFD_CMD_GET_HW_INFO, }; /** @@ -379,4 +380,42 @@ struct iommu_hwpt_alloc { enum iommu_hw_info_type { IOMMU_HW_INFO_TYPE_NONE, }; + +/** + * struct iommu_hw_info - ioctl(IOMMU_GET_HW_INFO) + * @size: sizeof(struct iommu_hw_info) + * @flags: Must be 0 + * @dev_id: The device bound to the iommufd + * @data_len: Input the length of a user buffer in bytes. Output the length of + * data that kernel supports + * @data_uptr: User pointer to a user-space buffer used by the kernel to fill + * the iommu type specific hardware information data + * @out_data_type: Output the iommu hardware info type as defined in the enum + * iommu_hw_info_type. + * @__reserved: Must be 0 + * + * Query an iommu type specific hardware information data from an iommu behind + * a given device that has been bound to iommufd. This hardware info data will + * be used to sync capabilities between the virtual iommu and the physical + * iommu, e.g. a nested translation setup needs to check the hardware info, so + * a guest stage-1 page table can be compatible with the physical iommu. + * + * To capture an iommu type specific hardware information data, @data_uptr and + * its length @data_len must be provided. Trailing bytes will be zeroed if the + * user buffer is larger than the data that kernel has. Otherwise, kernel only + * fills the buffer using the given length in @data_len. If the ioctl succeeds, + * @data_len will be updated to the length that kernel actually supports, + * @out_data_type will be filled to decode the data filled in the buffer + * pointed by @data_uptr. Input @data_len == zero is allowed. + */ +struct iommu_hw_info { + __u32 size; + __u32 flags; + __u32 dev_id; + __u32 data_len; + __aligned_u64 data_uptr; + __u32 out_data_type; + __u32 __reserved; +}; +#define IOMMU_GET_HW_INFO _IO(IOMMUFD_TYPE, IOMMUFD_CMD_GET_HW_INFO) #endif -- cgit v1.2.3 From 55243393b06ced5378dcdbb7d51bd8a8edc69294 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 18 Aug 2023 03:10:33 -0700 Subject: iommu/vt-d: Implement hw_info for iommu capability query Add intel_iommu_hw_info() to report cap_reg and ecap_reg information. Link: https://lore.kernel.org/r/20230818101033.4100-6-yi.l.liu@intel.com Signed-off-by: Lu Baolu Acked-by: Lu Baolu Reviewed-by: Kevin Tian Signed-off-by: Nicolin Chen Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index e01d5ac00dc3..b4ba0c0cbab6 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -372,13 +372,36 @@ struct iommu_hwpt_alloc { }; #define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC) +/** + * struct iommu_hw_info_vtd - Intel VT-d hardware information + * + * @flags: Must be 0 + * @__reserved: Must be 0 + * + * @cap_reg: Value of Intel VT-d capability register defined in VT-d spec + * section 11.4.2 Capability Register. + * @ecap_reg: Value of Intel VT-d capability register defined in VT-d spec + * section 11.4.3 Extended Capability Register. + * + * User needs to understand the Intel VT-d specification to decode the + * register value. + */ +struct iommu_hw_info_vtd { + __u32 flags; + __u32 __reserved; + __aligned_u64 cap_reg; + __aligned_u64 ecap_reg; +}; + /** * enum iommu_hw_info_type - IOMMU Hardware Info Types * @IOMMU_HW_INFO_TYPE_NONE: Used by the drivers that do not report hardware * info + * @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type */ enum iommu_hw_info_type { IOMMU_HW_INFO_TYPE_NONE, + IOMMU_HW_INFO_TYPE_INTEL_VTD, }; /** -- cgit v1.2.3