From e36ba5ab808ef6237c3148d469c8238674230e2b Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 11 Mar 2025 12:44:23 -0700 Subject: iommufd: Add IOMMUFD_OBJ_VEVENTQ and IOMMUFD_CMD_VEVENTQ_ALLOC Introduce a new IOMMUFD_OBJ_VEVENTQ object for vIOMMU Event Queue that provides user space (VMM) another FD to read the vIOMMU Events. Allow a vIOMMU object to allocate vEVENTQs, with a condition that each vIOMMU can only have one single vEVENTQ per type. Add iommufd_veventq_alloc() with iommufd_veventq_ops for the new ioctl. Link: https://patch.msgid.link/r/21acf0751dd5c93846935ee06f93b9c65eff5e04.1741719725.git.nicolinc@nvidia.com Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/linux/iommufd.h | 3 ++ include/uapi/linux/iommufd.h | 82 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) (limited to 'include') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 11110c749200..8948b1836940 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -34,6 +34,7 @@ enum iommufd_object_type { IOMMUFD_OBJ_FAULT, IOMMUFD_OBJ_VIOMMU, IOMMUFD_OBJ_VDEVICE, + IOMMUFD_OBJ_VEVENTQ, #ifdef CONFIG_IOMMUFD_TEST IOMMUFD_OBJ_SELFTEST, #endif @@ -93,6 +94,8 @@ struct iommufd_viommu { const struct iommufd_viommu_ops *ops; struct xarray vdevs; + struct list_head veventqs; + struct rw_semaphore veventqs_rwsem; unsigned int type; }; diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 78747b24bd0f..dbb8787d9c63 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -55,6 +55,7 @@ enum { IOMMUFD_CMD_VIOMMU_ALLOC = 0x90, IOMMUFD_CMD_VDEVICE_ALLOC = 0x91, IOMMUFD_CMD_IOAS_CHANGE_PROCESS = 0x92, + IOMMUFD_CMD_VEVENTQ_ALLOC = 0x93, }; /** @@ -1014,4 +1015,85 @@ struct iommu_ioas_change_process { #define IOMMU_IOAS_CHANGE_PROCESS \ _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_CHANGE_PROCESS) +/** + * enum iommu_veventq_flag - flag for struct iommufd_vevent_header + * @IOMMU_VEVENTQ_FLAG_LOST_EVENTS: vEVENTQ has lost vEVENTs + */ +enum iommu_veventq_flag { + IOMMU_VEVENTQ_FLAG_LOST_EVENTS = (1U << 0), +}; + +/** + * struct iommufd_vevent_header - Virtual Event Header for a vEVENTQ Status + * @flags: Combination of enum iommu_veventq_flag + * @sequence: The sequence index of a vEVENT in the vEVENTQ, with a range of + * [0, INT_MAX] where the following index of INT_MAX is 0 + * + * Each iommufd_vevent_header reports a sequence index of the following vEVENT: + * ------------------------------------------------------------------------- + * | header0 {sequence=0} | data0 | header1 {sequence=1} | data1 |...| dataN | + * ------------------------------------------------------------------------- + * And this sequence index is expected to be monotonic to the sequence index of + * the previous vEVENT. If two adjacent sequence indexes has a delta larger than + * 1, it means that delta - 1 number of vEVENTs has lost, e.g. two lost vEVENTs: + * ------------------------------------------------------------------------- + * | ... | header3 {sequence=3} | data3 | header6 {sequence=6} | data6 | ... | + * ------------------------------------------------------------------------- + * If a vEVENT lost at the tail of the vEVENTQ and there is no following vEVENT + * providing the next sequence index, an IOMMU_VEVENTQ_FLAG_LOST_EVENTS header + * would be added to the tail, and no data would follow this header: + * --------------------------------------------------------------------------- + * |..| header3 {sequence=3} | data3 | header4 {flags=LOST_EVENTS, sequence=4} | + * --------------------------------------------------------------------------- + */ +struct iommufd_vevent_header { + __u32 flags; + __u32 sequence; +}; + +/** + * enum iommu_veventq_type - Virtual Event Queue Type + * @IOMMU_VEVENTQ_TYPE_DEFAULT: Reserved for future use + */ +enum iommu_veventq_type { + IOMMU_VEVENTQ_TYPE_DEFAULT = 0, +}; + +/** + * struct iommu_veventq_alloc - ioctl(IOMMU_VEVENTQ_ALLOC) + * @size: sizeof(struct iommu_veventq_alloc) + * @flags: Must be 0 + * @viommu_id: virtual IOMMU ID to associate the vEVENTQ with + * @type: Type of the vEVENTQ. Must be defined in enum iommu_veventq_type + * @veventq_depth: Maximum number of events in the vEVENTQ + * @out_veventq_id: The ID of the new vEVENTQ + * @out_veventq_fd: The fd of the new vEVENTQ. User space must close the + * successfully returned fd after using it + * @__reserved: Must be 0 + * + * Explicitly allocate a virtual event queue interface for a vIOMMU. A vIOMMU + * can have multiple FDs for different types, but is confined to one per @type. + * User space should open the @out_veventq_fd to read vEVENTs out of a vEVENTQ, + * if there are vEVENTs available. A vEVENTQ will lose events due to overflow, + * if the number of the vEVENTs hits @veventq_depth. + * + * Each vEVENT in a vEVENTQ encloses a struct iommufd_vevent_header followed by + * a type-specific data structure, in a normal case: + * ------------------------------------------------------------- + * || header0 | data0 | header1 | data1 | ... | headerN | dataN || + * ------------------------------------------------------------- + * unless a tailing IOMMU_VEVENTQ_FLAG_LOST_EVENTS header is logged (refer to + * struct iommufd_vevent_header). + */ +struct iommu_veventq_alloc { + __u32 size; + __u32 flags; + __u32 viommu_id; + __u32 type; + __u32 veventq_depth; + __u32 out_veventq_id; + __u32 out_veventq_fd; + __u32 __reserved; +}; +#define IOMMU_VEVENTQ_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VEVENTQ_ALLOC) #endif -- cgit v1.2.3 From ea94b211c5483080b749c142090f4c4de4926e51 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 11 Mar 2025 12:44:24 -0700 Subject: iommufd/viommu: Add iommufd_viommu_get_vdev_id helper This is a reverse search v.s. iommufd_viommu_find_dev, as drivers may want to convert a struct device pointer (physical) to its virtual device ID for an event injection to the user space VM. Again, this avoids exposing more core structures to the drivers, than the iommufd_viommu alone. Link: https://patch.msgid.link/r/18b8e8bc1b8104d43b205d21602c036fd0804e56.1741719725.git.nicolinc@nvidia.com Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/linux/iommufd.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 8948b1836940..05cb393aff0a 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -190,6 +190,8 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, enum iommufd_object_type type); struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id); +int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu, + struct device *dev, unsigned long *vdev_id); #else /* !CONFIG_IOMMUFD_DRIVER_CORE */ static inline struct iommufd_object * _iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size, @@ -203,6 +205,13 @@ iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id) { return NULL; } + +static inline int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu, + struct device *dev, + unsigned long *vdev_id) +{ + return -ENOENT; +} #endif /* CONFIG_IOMMUFD_DRIVER_CORE */ /* -- cgit v1.2.3 From e8e1ef9b77a7a09b7809890a52229f24d3c8b532 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 11 Mar 2025 12:44:25 -0700 Subject: iommufd/viommu: Add iommufd_viommu_report_event helper Similar to iommu_report_device_fault, this allows IOMMU drivers to report vIOMMU events from threaded IRQ handlers to user space hypervisors. Link: https://patch.msgid.link/r/44be825042c8255e75d0151b338ffd8ba0e4920b.1741719725.git.nicolinc@nvidia.com Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/linux/iommufd.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 05cb393aff0a..60eff9272551 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -11,6 +11,7 @@ #include #include #include +#include struct device; struct file; @@ -192,6 +193,9 @@ struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id); int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu, struct device *dev, unsigned long *vdev_id); +int iommufd_viommu_report_event(struct iommufd_viommu *viommu, + enum iommu_veventq_type type, void *event_data, + size_t data_len); #else /* !CONFIG_IOMMUFD_DRIVER_CORE */ static inline struct iommufd_object * _iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size, @@ -212,6 +216,13 @@ static inline int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu, { return -ENOENT; } + +static inline int iommufd_viommu_report_event(struct iommufd_viommu *viommu, + enum iommu_veventq_type type, + void *event_data, size_t data_len) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_IOMMUFD_DRIVER_CORE */ /* -- cgit v1.2.3 From e7d3fa3d29d5b2ed12d247cf57a0a34fffe89eb8 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 11 Mar 2025 12:44:31 -0700 Subject: iommu/arm-smmu-v3: Report events that belong to devices attached to vIOMMU Aside from the IOPF framework, iommufd provides an additional pathway to report hardware events, via the vEVENTQ of vIOMMU infrastructure. Define an iommu_vevent_arm_smmuv3 uAPI structure, and report stage-1 events in the threaded IRQ handler. Also, add another four event record types that can be forwarded to a VM. Link: https://patch.msgid.link/r/5cf6719682fdfdabffdb08374cdf31ad2466d75a.1741719725.git.nicolinc@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Reviewed-by: Pranjal Shrivastava Acked-by: Will Deacon Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index dbb8787d9c63..8719d4f5d618 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -1054,9 +1054,32 @@ struct iommufd_vevent_header { /** * enum iommu_veventq_type - Virtual Event Queue Type * @IOMMU_VEVENTQ_TYPE_DEFAULT: Reserved for future use + * @IOMMU_VEVENTQ_TYPE_ARM_SMMUV3: ARM SMMUv3 Virtual Event Queue */ enum iommu_veventq_type { IOMMU_VEVENTQ_TYPE_DEFAULT = 0, + IOMMU_VEVENTQ_TYPE_ARM_SMMUV3 = 1, +}; + +/** + * struct iommu_vevent_arm_smmuv3 - ARM SMMUv3 Virtual Event + * (IOMMU_VEVENTQ_TYPE_ARM_SMMUV3) + * @evt: 256-bit ARM SMMUv3 Event record, little-endian. + * Reported event records: (Refer to "7.3 Event records" in SMMUv3 HW Spec) + * - 0x04 C_BAD_STE + * - 0x06 F_STREAM_DISABLED + * - 0x08 C_BAD_SUBSTREAMID + * - 0x0a C_BAD_CD + * - 0x10 F_TRANSLATION + * - 0x11 F_ADDR_SIZE + * - 0x12 F_ACCESS + * - 0x13 F_PERMISSION + * + * StreamID field reports a virtual device ID. To receive a virtual event for a + * device, a vDEVICE must be allocated via IOMMU_VDEVICE_ALLOC. + */ +struct iommu_vevent_arm_smmuv3 { + __aligned_le64 evt[4]; }; /** -- cgit v1.2.3 From 6aa63a4ec947f350d1a2f9f6aba8591a2455d192 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 24 Mar 2025 21:05:15 -0700 Subject: iommu: Sort out domain user data When DMA/MSI cookies were made first-class citizens back in commit 46983fcd67ac ("iommu: Pull IOVA cookie management into the core"), there was no real need to further expose the two different cookie types. However, now that IOMMUFD wants to add a third type of MSI-mapping cookie, we do have a nicely compelling reason to properly dismabiguate things at the domain level beyond just vaguely guessing from the domain type. Meanwhile, we also effectively have another "cookie" in the form of the anonymous union for other user data, which isn't much better in terms of being vague and unenforced. The fact is that all these cookie types are mutually exclusive, in the sense that combining them makes zero sense and/or would be catastrophic (iommu_set_fault_handler() on an SVA domain, anyone?) - the only combination which *might* be reasonable is perhaps a fault handler and an MSI cookie, but nobody's doing that at the moment, so let's rule it out as well for the sake of being clear and robust. To that end, we pull DMA and MSI cookies apart a little more, mostly to clear up the ambiguity at domain teardown, then for clarity (and to save a little space), move them into the union, whose ownership we can then properly describe and enforce entirely unambiguously. [nicolinc: rebase on latest tree; use prefix IOMMU_COOKIE_; merge unions in iommu_domain; add IOMMU_COOKIE_IOMMUFD for iommufd_hwpt] Link: https://patch.msgid.link/r/1ace9076c95204bbe193ee77499d395f15f44b23.1742871535.git.nicolinc@nvidia.com Signed-off-by: Robin Murphy Reviewed-by: Kevin Tian Signed-off-by: Nicolin Chen Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- include/linux/iommu.h | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index e93d2e918599..06cc14e9993d 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -41,6 +41,7 @@ struct iommu_dirty_ops; struct notifier_block; struct iommu_sva; struct iommu_dma_cookie; +struct iommu_dma_msi_cookie; struct iommu_fault_param; struct iommufd_ctx; struct iommufd_viommu; @@ -165,6 +166,15 @@ struct iommu_domain_geometry { bool force_aperture; /* DMA only allowed in mappable range? */ }; +enum iommu_domain_cookie_type { + IOMMU_COOKIE_NONE, + IOMMU_COOKIE_DMA_IOVA, + IOMMU_COOKIE_DMA_MSI, + IOMMU_COOKIE_FAULT_HANDLER, + IOMMU_COOKIE_SVA, + IOMMU_COOKIE_IOMMUFD, +}; + /* Domain feature flags */ #define __IOMMU_DOMAIN_PAGING (1U << 0) /* Support for iommu_map/unmap */ #define __IOMMU_DOMAIN_DMA_API (1U << 1) /* Domain for use in DMA-API @@ -211,12 +221,12 @@ struct iommu_domain_geometry { struct iommu_domain { unsigned type; + enum iommu_domain_cookie_type cookie_type; const struct iommu_domain_ops *ops; const struct iommu_dirty_ops *dirty_ops; const struct iommu_ops *owner; /* Whose domain_alloc we came from */ unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */ struct iommu_domain_geometry geometry; - struct iommu_dma_cookie *iova_cookie; int (*iopf_handler)(struct iopf_group *group); #if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU) @@ -224,10 +234,10 @@ struct iommu_domain { phys_addr_t msi_addr); #endif - union { /* Pointer usable by owner of the domain */ - struct iommufd_hw_pagetable *iommufd_hwpt; /* iommufd */ - }; - union { /* Fault handler */ + union { /* cookie */ + struct iommu_dma_cookie *iova_cookie; + struct iommu_dma_msi_cookie *msi_cookie; + struct iommufd_hw_pagetable *iommufd_hwpt; struct { iommu_fault_handler_t handler; void *handler_token; -- cgit v1.2.3 From 06d54f00f3f5a29cbf43410ac93ee2dd89e3b711 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Mon, 24 Mar 2025 21:05:17 -0700 Subject: iommu: Drop sw_msi from iommu_domain There are only two sw_msi implementations in the entire system, thus it's not very necessary to have an sw_msi pointer. Instead, check domain->cookie_type to call the two sw_msi implementations directly from the core code. Link: https://patch.msgid.link/r/7ded87c871afcbaac665b71354de0a335087bf0f.1742871535.git.nicolinc@nvidia.com Suggested-by: Robin Murphy Reviewed-by: Robin Murphy Reviewed-by: Kevin Tian Signed-off-by: Nicolin Chen Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- include/linux/iommu.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 06cc14e9993d..e01c855ae8a7 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -229,11 +229,6 @@ struct iommu_domain { struct iommu_domain_geometry geometry; int (*iopf_handler)(struct iopf_group *group); -#if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU) - int (*sw_msi)(struct iommu_domain *domain, struct msi_desc *desc, - phys_addr_t msi_addr); -#endif - union { /* cookie */ struct iommu_dma_cookie *iova_cookie; struct iommu_dma_msi_cookie *msi_cookie; @@ -254,16 +249,6 @@ struct iommu_domain { }; }; -static inline void iommu_domain_set_sw_msi( - struct iommu_domain *domain, - int (*sw_msi)(struct iommu_domain *domain, struct msi_desc *desc, - phys_addr_t msi_addr)) -{ -#if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU) - domain->sw_msi = sw_msi; -#endif -} - static inline bool iommu_is_dma_domain(struct iommu_domain *domain) { return domain->type & __IOMMU_DOMAIN_DMA_API; -- cgit v1.2.3 From 2fb69c602d57f77483b8dcdd12d17408a09f76fe Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 21 Mar 2025 10:19:33 -0700 Subject: iommufd: Support pasid attach/replace This extends the below APIs to support PASID. Device drivers to manage pasid attach/replace/detach. int iommufd_device_attach(struct iommufd_device *idev, ioasid_t pasid, u32 *pt_id); int iommufd_device_replace(struct iommufd_device *idev, ioasid_t pasid, u32 *pt_id); void iommufd_device_detach(struct iommufd_device *idev, ioasid_t pasid); The pasid operations share underlying attach/replace/detach infrastructure with the device operations, but still have some different implications: - no reserved region per pasid otherwise SVA architecture is already broken (CPU address space doesn't count device reserved regions); - accordingly no sw_msi trick; Cache coherency enforcement is still applied to pasid operations since it is about memory accesses post page table walking (no matter the walk is per RID or per PASID). Link: https://patch.msgid.link/r/20250321171940.7213-12-yi.l.liu@intel.com Reviewed-by: Jason Gunthorpe Signed-off-by: Kevin Tian Reviewed-by: Nicolin Chen Signed-off-by: Yi Liu Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/linux/iommufd.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 60eff9272551..34b6e6ca4bfa 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -54,9 +55,11 @@ struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, struct device *dev, u32 *id); void iommufd_device_unbind(struct iommufd_device *idev); -int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id); -int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id); -void iommufd_device_detach(struct iommufd_device *idev); +int iommufd_device_attach(struct iommufd_device *idev, ioasid_t pasid, + u32 *pt_id); +int iommufd_device_replace(struct iommufd_device *idev, ioasid_t pasid, + u32 *pt_id); +void iommufd_device_detach(struct iommufd_device *idev, ioasid_t pasid); struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev); u32 iommufd_device_to_id(struct iommufd_device *idev); -- cgit v1.2.3 From dbc5f37b4f8ad833132f77c1f67e68bb11ca9b9e Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 21 Mar 2025 10:19:36 -0700 Subject: iommufd: Allow allocating PASID-compatible domain The underlying infrastructure has supported the PASID attach and related enforcement per the requirement of the IOMMU_HWPT_ALLOC_PASID flag. This extends iommufd to support PASID compatible domain requested by userspace. Link: https://patch.msgid.link/r/20250321171940.7213-15-yi.l.liu@intel.com Reviewed-by: Jason Gunthorpe Reviewed-by: Nicolin Chen Signed-off-by: Yi Liu Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 8719d4f5d618..6901804ec736 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -393,6 +393,9 @@ struct iommu_vfio_ioas { * Any domain attached to the non-PASID part of the * device must also be flagged, otherwise attaching a * PASID will blocked. + * For the user that wants to attach PASID, ioas is + * not recommended for both the non-PASID part + * and PASID part of the device. * If IOMMU does not support PASID it will return * error (-EOPNOTSUPP). */ -- cgit v1.2.3 From 7fe6b987166b901efc5c6fce5fe853c9ebb835be Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 21 Mar 2025 11:01:39 -0700 Subject: ida: Add ida_find_first_range() There is no helpers for user to check if a given ID is allocated or not, neither a helper to loop all the allocated IDs in an IDA and do something for cleanup. With the two needs, a helper to get the lowest allocated ID of a range and two variants based on it. Caller can check if a given ID is allocated or not by: bool ida_exists(struct ida *ida, unsigned int id) Caller can iterate all allocated IDs by: int id; while ((id = ida_find_first(&pasid_ida)) >= 0) { //anything to do with the allocated ID ida_free(pasid_ida, pasid); } Link: https://patch.msgid.link/r/20250321180143.8468-2-yi.l.liu@intel.com Cc: Matthew Wilcox (Oracle) Suggested-by: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Acked-by: Matthew Wilcox (Oracle) Tested-by: Nicolin Chen Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- include/linux/idr.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/idr.h b/include/linux/idr.h index da5f5fa4a3a6..718f9b1b91af 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -257,6 +257,7 @@ struct ida { int ida_alloc_range(struct ida *, unsigned int min, unsigned int max, gfp_t); void ida_free(struct ida *, unsigned int id); void ida_destroy(struct ida *ida); +int ida_find_first_range(struct ida *ida, unsigned int min, unsigned int max); /** * ida_alloc() - Allocate an unused ID. @@ -328,4 +329,14 @@ static inline bool ida_is_empty(const struct ida *ida) { return xa_empty(&ida->xa); } + +static inline bool ida_exists(struct ida *ida, unsigned int id) +{ + return ida_find_first_range(ida, id, id) == id; +} + +static inline int ida_find_first(struct ida *ida) +{ + return ida_find_first_range(ida, 0, ~0); +} #endif /* __IDR_H__ */ -- cgit v1.2.3 From 290641346d0d1eaf400c4f968d5b2cd91f483733 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 21 Mar 2025 11:01:40 -0700 Subject: vfio-iommufd: Support pasid [at|de]tach for physical VFIO devices This adds pasid_at|de]tach_ioas ops for attaching hwpt to pasid of a device and the helpers for it. For now, only vfio-pci supports pasid attach/detach. Link: https://patch.msgid.link/r/20250321180143.8468-3-yi.l.liu@intel.com Signed-off-by: Kevin Tian Reviewed-by: Jason Gunthorpe Reviewed-by: Alex Williamson Tested-by: Nicolin Chen Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- include/linux/vfio.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 000a6cab2d31..707b00772ce1 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -67,6 +67,7 @@ struct vfio_device { struct inode *inode; #if IS_ENABLED(CONFIG_IOMMUFD) struct iommufd_device *iommufd_device; + struct ida pasids; u8 iommufd_attached:1; #endif u8 cdev_opened:1; @@ -91,6 +92,8 @@ struct vfio_device { * bound iommufd. Undo in unbind_iommufd if @detach_ioas is not * called. * @detach_ioas: Opposite of attach_ioas + * @pasid_attach_ioas: The pasid variation of attach_ioas + * @pasid_detach_ioas: Opposite of pasid_attach_ioas * @open_device: Called when the first file descriptor is opened for this device * @close_device: Opposite of open_device * @read: Perform read(2) on device file descriptor @@ -115,6 +118,9 @@ struct vfio_device_ops { void (*unbind_iommufd)(struct vfio_device *vdev); int (*attach_ioas)(struct vfio_device *vdev, u32 *pt_id); void (*detach_ioas)(struct vfio_device *vdev); + int (*pasid_attach_ioas)(struct vfio_device *vdev, u32 pasid, + u32 *pt_id); + void (*pasid_detach_ioas)(struct vfio_device *vdev, u32 pasid); int (*open_device)(struct vfio_device *vdev); void (*close_device)(struct vfio_device *vdev); ssize_t (*read)(struct vfio_device *vdev, char __user *buf, @@ -139,6 +145,10 @@ int vfio_iommufd_physical_bind(struct vfio_device *vdev, void vfio_iommufd_physical_unbind(struct vfio_device *vdev); int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id); void vfio_iommufd_physical_detach_ioas(struct vfio_device *vdev); +int vfio_iommufd_physical_pasid_attach_ioas(struct vfio_device *vdev, + u32 pasid, u32 *pt_id); +void vfio_iommufd_physical_pasid_detach_ioas(struct vfio_device *vdev, + u32 pasid); int vfio_iommufd_emulated_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx, u32 *out_device_id); void vfio_iommufd_emulated_unbind(struct vfio_device *vdev); @@ -166,6 +176,10 @@ vfio_iommufd_get_dev_id(struct vfio_device *vdev, struct iommufd_ctx *ictx) ((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL) #define vfio_iommufd_physical_detach_ioas \ ((void (*)(struct vfio_device *vdev)) NULL) +#define vfio_iommufd_physical_pasid_attach_ioas \ + ((int (*)(struct vfio_device *vdev, u32 pasid, u32 *pt_id)) NULL) +#define vfio_iommufd_physical_pasid_detach_ioas \ + ((void (*)(struct vfio_device *vdev, u32 pasid)) NULL) #define vfio_iommufd_emulated_bind \ ((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx, \ u32 *out_device_id)) NULL) -- cgit v1.2.3 From ad744ed5dd8b70e9256fc1ff18aaaffeedf5f21e Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 21 Mar 2025 11:01:41 -0700 Subject: vfio: VFIO_DEVICE_[AT|DE]TACH_IOMMUFD_PT support pasid This extends the VFIO_DEVICE_[AT|DE]TACH_IOMMUFD_PT ioctls to attach/detach a given pasid of a vfio device to/from an IOAS/HWPT. Link: https://patch.msgid.link/r/20250321180143.8468-4-yi.l.liu@intel.com Reviewed-by: Alex Williamson Reviewed-by: Kevin Tian Reviewed-by: Nicolin Chen Tested-by: Nicolin Chen Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- include/uapi/linux/vfio.h | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index c8dbf8219c4f..6899da70b929 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -931,29 +931,34 @@ struct vfio_device_bind_iommufd { * VFIO_DEVICE_ATTACH_IOMMUFD_PT - _IOW(VFIO_TYPE, VFIO_BASE + 19, * struct vfio_device_attach_iommufd_pt) * @argsz: User filled size of this data. - * @flags: Must be 0. + * @flags: Flags for attach. * @pt_id: Input the target id which can represent an ioas or a hwpt * allocated via iommufd subsystem. * Output the input ioas id or the attached hwpt id which could * be the specified hwpt itself or a hwpt automatically created * for the specified ioas by kernel during the attachment. + * @pasid: The pasid to be attached, only meaningful when + * VFIO_DEVICE_ATTACH_PASID is set in @flags * * Associate the device with an address space within the bound iommufd. * Undo by VFIO_DEVICE_DETACH_IOMMUFD_PT or device fd close. This is only * allowed on cdev fds. * - * If a vfio device is currently attached to a valid hw_pagetable, without doing - * a VFIO_DEVICE_DETACH_IOMMUFD_PT, a second VFIO_DEVICE_ATTACH_IOMMUFD_PT ioctl - * passing in another hw_pagetable (hwpt) id is allowed. This action, also known - * as a hw_pagetable replacement, will replace the device's currently attached - * hw_pagetable with a new hw_pagetable corresponding to the given pt_id. + * If a vfio device or a pasid of this device is currently attached to a valid + * hw_pagetable (hwpt), without doing a VFIO_DEVICE_DETACH_IOMMUFD_PT, a second + * VFIO_DEVICE_ATTACH_IOMMUFD_PT ioctl passing in another hwpt id is allowed. + * This action, also known as a hw_pagetable replacement, will replace the + * currently attached hwpt of the device or the pasid of this device with a new + * hwpt corresponding to the given pt_id. * * Return: 0 on success, -errno on failure. */ struct vfio_device_attach_iommufd_pt { __u32 argsz; __u32 flags; +#define VFIO_DEVICE_ATTACH_PASID (1 << 0) __u32 pt_id; + __u32 pasid; }; #define VFIO_DEVICE_ATTACH_IOMMUFD_PT _IO(VFIO_TYPE, VFIO_BASE + 19) @@ -962,17 +967,21 @@ struct vfio_device_attach_iommufd_pt { * VFIO_DEVICE_DETACH_IOMMUFD_PT - _IOW(VFIO_TYPE, VFIO_BASE + 20, * struct vfio_device_detach_iommufd_pt) * @argsz: User filled size of this data. - * @flags: Must be 0. + * @flags: Flags for detach. + * @pasid: The pasid to be detached, only meaningful when + * VFIO_DEVICE_DETACH_PASID is set in @flags * - * Remove the association of the device and its current associated address - * space. After it, the device should be in a blocking DMA state. This is only - * allowed on cdev fds. + * Remove the association of the device or a pasid of the device and its current + * associated address space. After it, the device or the pasid should be in a + * blocking DMA state. This is only allowed on cdev fds. * * Return: 0 on success, -errno on failure. */ struct vfio_device_detach_iommufd_pt { __u32 argsz; __u32 flags; +#define VFIO_DEVICE_DETACH_PASID (1 << 0) + __u32 pasid; }; #define VFIO_DEVICE_DETACH_IOMMUFD_PT _IO(VFIO_TYPE, VFIO_BASE + 20) -- cgit v1.2.3 From 803f97298e7de9242eb677a1351dcafbbcc9117e Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Fri, 21 Mar 2025 11:01:42 -0700 Subject: iommufd: Extend IOMMU_GET_HW_INFO to report PASID capability PASID usage requires PASID support in both device and IOMMU. Since the iommu drivers always enable the PASID capability for the device if it is supported, this extends the IOMMU_GET_HW_INFO to report the PASID capability to userspace. Also, enhances the selftest accordingly. Link: https://patch.msgid.link/r/20250321180143.8468-5-yi.l.liu@intel.com Cc: Bjorn Helgaas Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Tested-by: Zhangfei Gao #aarch64 platform Tested-by: Nicolin Chen Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- include/linux/pci-ats.h | 3 +++ include/uapi/linux/iommufd.h | 14 +++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h index 0e8b74e63767..75c6c86cf09d 100644 --- a/include/linux/pci-ats.h +++ b/include/linux/pci-ats.h @@ -42,6 +42,7 @@ int pci_enable_pasid(struct pci_dev *pdev, int features); void pci_disable_pasid(struct pci_dev *pdev); int pci_pasid_features(struct pci_dev *pdev); int pci_max_pasids(struct pci_dev *pdev); +int pci_pasid_status(struct pci_dev *pdev); #else /* CONFIG_PCI_PASID */ static inline int pci_enable_pasid(struct pci_dev *pdev, int features) { return -EINVAL; } @@ -50,6 +51,8 @@ static inline int pci_pasid_features(struct pci_dev *pdev) { return -EINVAL; } static inline int pci_max_pasids(struct pci_dev *pdev) { return -EINVAL; } +static inline int pci_pasid_status(struct pci_dev *pdev) +{ return -EINVAL; } #endif /* CONFIG_PCI_PASID */ #endif /* LINUX_PCI_ATS_H */ diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 6901804ec736..e2c04e58a997 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -612,9 +612,17 @@ enum iommu_hw_info_type { * IOMMU_HWPT_GET_DIRTY_BITMAP * IOMMU_HWPT_SET_DIRTY_TRACKING * + * @IOMMU_HW_CAP_PCI_PASID_EXEC: Execute Permission Supported, user ignores it + * when the struct + * iommu_hw_info::out_max_pasid_log2 is zero. + * @IOMMU_HW_CAP_PCI_PASID_PRIV: Privileged Mode Supported, user ignores it + * when the struct + * iommu_hw_info::out_max_pasid_log2 is zero. */ enum iommufd_hw_capabilities { IOMMU_HW_CAP_DIRTY_TRACKING = 1 << 0, + IOMMU_HW_CAP_PCI_PASID_EXEC = 1 << 1, + IOMMU_HW_CAP_PCI_PASID_PRIV = 1 << 2, }; /** @@ -630,6 +638,9 @@ enum iommufd_hw_capabilities { * iommu_hw_info_type. * @out_capabilities: Output the generic iommu capability info type as defined * in the enum iommu_hw_capabilities. + * @out_max_pasid_log2: Output the width of PASIDs. 0 means no PASID support. + * PCI devices turn to out_capabilities to check if the + * specific capabilities is supported or not. * @__reserved: Must be 0 * * Query an iommu type specific hardware information data from an iommu behind @@ -653,7 +664,8 @@ struct iommu_hw_info { __u32 data_len; __aligned_u64 data_uptr; __u32 out_data_type; - __u32 __reserved; + __u8 out_max_pasid_log2; + __u8 __reserved[3]; __aligned_u64 out_capabilities; }; #define IOMMU_GET_HW_INFO _IO(IOMMUFD_TYPE, IOMMUFD_CMD_GET_HW_INFO) -- cgit v1.2.3 From 858c9c10c123b7b04bba12c689db675c18d48bda Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Fri, 28 Mar 2025 18:46:54 +0700 Subject: iommufd: Fix iommu_vevent_header tables markup Stephen Rothwell reports htmldocs warnings on iommufd_vevent_header tables: Documentation/userspace-api/iommufd:323: ./include/uapi/linux/iommufd.h:1048: CRITICAL: Unexpected section title or transition. ------------------------------------------------------------------------- [docutils] WARNING: kernel-doc './scripts/kernel-doc -rst -enable-lineno -sphinx-version 8.1.3 ./include/uapi/linux/iommufd.h' processing failed with: Documentation/userspace-api/iommufd:323: ./include/uapi/linux/iommufd.h:1048: (SEVERE/4) Unexpected section title or transition. ------------------------------------------------------------------------- These are because Sphinx confuses the tables for section headings. Fix the table markup to squash away above warnings. Fixes: e36ba5ab808e ("iommufd: Add IOMMUFD_OBJ_VEVENTQ and IOMMUFD_CMD_VEVENTQ_ALLOC") Link: https://patch.msgid.link/r/20250328114654.55840-1-bagasdotme@gmail.com Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/linux-next/20250318213359.5dc56fd1@canb.auug.org.au/ Signed-off-by: Bagas Sanjaya Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index e2c04e58a997..f29b6c44655e 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -1045,21 +1045,26 @@ enum iommu_veventq_flag { * [0, INT_MAX] where the following index of INT_MAX is 0 * * Each iommufd_vevent_header reports a sequence index of the following vEVENT: - * ------------------------------------------------------------------------- + * + * +----------------------+-------+----------------------+-------+---+-------+ * | header0 {sequence=0} | data0 | header1 {sequence=1} | data1 |...| dataN | - * ------------------------------------------------------------------------- + * +----------------------+-------+----------------------+-------+---+-------+ + * * And this sequence index is expected to be monotonic to the sequence index of * the previous vEVENT. If two adjacent sequence indexes has a delta larger than * 1, it means that delta - 1 number of vEVENTs has lost, e.g. two lost vEVENTs: - * ------------------------------------------------------------------------- + * + * +-----+----------------------+-------+----------------------+-------+-----+ * | ... | header3 {sequence=3} | data3 | header6 {sequence=6} | data6 | ... | - * ------------------------------------------------------------------------- + * +-----+----------------------+-------+----------------------+-------+-----+ + * * If a vEVENT lost at the tail of the vEVENTQ and there is no following vEVENT * providing the next sequence index, an IOMMU_VEVENTQ_FLAG_LOST_EVENTS header * would be added to the tail, and no data would follow this header: - * --------------------------------------------------------------------------- + * + * +--+----------------------+-------+-----------------------------------------+ * |..| header3 {sequence=3} | data3 | header4 {flags=LOST_EVENTS, sequence=4} | - * --------------------------------------------------------------------------- + * +--+----------------------+-------+-----------------------------------------+ */ struct iommufd_vevent_header { __u32 flags; @@ -1117,9 +1122,11 @@ struct iommu_vevent_arm_smmuv3 { * * Each vEVENT in a vEVENTQ encloses a struct iommufd_vevent_header followed by * a type-specific data structure, in a normal case: - * ------------------------------------------------------------- - * || header0 | data0 | header1 | data1 | ... | headerN | dataN || - * ------------------------------------------------------------- + * + * +-+---------+-------+---------+-------+-----+---------+-------+-+ + * | | header0 | data0 | header1 | data1 | ... | headerN | dataN | | + * +-+---------+-------+---------+-------+-----+---------+-------+-+ + * * unless a tailing IOMMU_VEVENTQ_FLAG_LOST_EVENTS header is logged (refer to * struct iommufd_vevent_header). */ -- cgit v1.2.3