diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-07-31 12:43:08 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-07-31 12:43:08 -0700 |
| commit | c93529ad4fa8d8d8cb21649e70a46991a1dda0f8 (patch) | |
| tree | 6e8463ada36738a111d4978a746222575004f779 /include | |
| parent | 7ce4de1cdaf11c39b507008dfb5a4e59079d4e8a (diff) | |
| parent | 2c78e74493d33b002312296fbab1d688bfd0f76f (diff) | |
Merge tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd
Pull iommufd updates from Jason Gunthorpe:
"This broadly brings the assigned HW command queue support to iommufd.
This feature is used to improve SVA performance in VMs by avoiding
paravirtualization traps during SVA invalidations.
Along the way I think some of the core logic is in a much better state
to support future driver backed features.
Summary:
- IOMMU HW now has features to directly assign HW command queues to a
guest VM. In this mode the command queue operates on a limited set
of invalidation commands that are suitable for improving guest
invalidation performance and easy for the HW to virtualize.
This brings the generic infrastructure to allow IOMMU drivers to
expose such command queues through the iommufd uAPI, mmap the
doorbell pages, and get the guest physical range for the command
queue ring itself.
- An implementation for the NVIDIA SMMUv3 extension "cmdqv" is built
on the new iommufd command queue features. It works with the
existing SMMU driver support for cmdqv in guest VMs.
- Many precursor cleanups and improvements to support the above
cleanly, changes to the general ioctl and object helpers, driver
support for VDEVICE, and mmap pgoff cookie infrastructure.
- Sequence VDEVICE destruction to always happen before VFIO device
destruction. When using the above type features, and also in future
confidential compute, the internal virtual device representation
becomes linked to HW or CC TSM configuration and objects. If a VFIO
device is removed from iommufd those HW objects should also be
cleaned up to prevent a sort of UAF. This became important now that
we have HW backing the VDEVICE.
- Fix one syzkaller found error related to math overflows during iova
allocation"
* tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd: (57 commits)
iommu/arm-smmu-v3: Replace vsmmu_size/type with get_viommu_size
iommu/arm-smmu-v3: Do not bother impl_ops if IOMMU_VIOMMU_TYPE_ARM_SMMUV3
iommufd: Rename some shortterm-related identifiers
iommufd/selftest: Add coverage for vdevice tombstone
iommufd/selftest: Explicitly skip tests for inapplicable variant
iommufd/vdevice: Remove struct device reference from struct vdevice
iommufd: Destroy vdevice on idevice destroy
iommufd: Add a pre_destroy() op for objects
iommufd: Add iommufd_object_tombstone_user() helper
iommufd/viommu: Roll back to use iommufd_object_alloc() for vdevice
iommufd/selftest: Test reserved regions near ULONG_MAX
iommufd: Prevent ALIGN() overflow
iommu/tegra241-cmdqv: import IOMMUFD module namespace
iommufd: Do not allow _iommufd_object_alloc_ucmd if abort op is set
iommu/tegra241-cmdqv: Add IOMMU_VEVENTQ_TYPE_TEGRA241_CMDQV support
iommu/tegra241-cmdqv: Add user-space use support
iommu/tegra241-cmdqv: Do not statically map LVCMDQs
iommu/tegra241-cmdqv: Simplify deinit flow in tegra241_cmdqv_remove_vintf()
iommu/tegra241-cmdqv: Use request_threaded_irq
iommu/arm-smmu-v3-iommufd: Add hw_info to impl_ops
...
Diffstat (limited to 'include')
| -rw-r--r-- | include/linux/iommu.h | 74 | ||||
| -rw-r--r-- | include/linux/iommufd.h | 196 | ||||
| -rw-r--r-- | include/uapi/linux/iommufd.h | 154 |
3 files changed, 385 insertions, 39 deletions
diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 7073be1d8841..c30d12e16473 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -14,6 +14,7 @@ #include <linux/err.h> #include <linux/of.h> #include <linux/iova_bitmap.h> +#include <uapi/linux/iommufd.h> #define IOMMU_READ (1 << 0) #define IOMMU_WRITE (1 << 1) @@ -558,12 +559,52 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size, } /** + * __iommu_copy_struct_to_user - Report iommu driver specific user space data + * @dst_data: Pointer to a struct iommu_user_data for user space data location + * @src_data: Pointer to an iommu driver specific user data that is defined in + * include/uapi/linux/iommufd.h + * @data_type: The data type of the @src_data. Must match with @dst_data.type + * @data_len: Length of current user data structure, i.e. sizeof(struct _src) + * @min_len: Initial length of user data structure for backward compatibility. + * This should be offsetofend using the last member in the user data + * struct that was initially added to include/uapi/linux/iommufd.h + */ +static inline int +__iommu_copy_struct_to_user(const struct iommu_user_data *dst_data, + void *src_data, unsigned int data_type, + size_t data_len, size_t min_len) +{ + if (WARN_ON(!dst_data || !src_data)) + return -EINVAL; + if (dst_data->type != data_type) + return -EINVAL; + if (dst_data->len < min_len || data_len < dst_data->len) + return -EINVAL; + return copy_struct_to_user(dst_data->uptr, dst_data->len, src_data, + data_len, NULL); +} + +/** + * iommu_copy_struct_to_user - Report iommu driver specific user space data + * @user_data: Pointer to a struct iommu_user_data for user space data location + * @ksrc: Pointer to an iommu driver specific user data that is defined in + * include/uapi/linux/iommufd.h + * @data_type: The data type of the @ksrc. Must match with @user_data->type + * @min_last: The last member of the data structure @ksrc points in the initial + * version. + * Return 0 for success, otherwise -error. + */ +#define iommu_copy_struct_to_user(user_data, ksrc, data_type, min_last) \ + __iommu_copy_struct_to_user(user_data, ksrc, data_type, sizeof(*ksrc), \ + offsetofend(typeof(*ksrc), min_last)) + +/** * struct iommu_ops - iommu ops and capabilities * @capable: check capability * @hw_info: report iommu hardware information. The data buffer returned by this * op is allocated in the iommu driver and freed by the caller after - * use. The information type is one of enum iommu_hw_info_type defined - * in include/uapi/linux/iommufd.h. + * use. @type can input a requested type and output a supported type. + * Driver should reject an unsupported data @type input * @domain_alloc: Do not use in new drivers * @domain_alloc_identity: allocate an IDENTITY domain. Drivers should prefer to * use identity_domain instead. This should only be used @@ -596,14 +637,16 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size, * - IOMMU_DOMAIN_DMA: must use a dma domain * - 0: use the default setting * @default_domain_ops: the default ops for domains - * @viommu_alloc: Allocate an iommufd_viommu on a physical IOMMU instance behind - * the @dev, as the set of virtualization resources shared/passed - * to user space IOMMU instance. And associate it with a nesting - * @parent_domain. The @viommu_type must be defined in the header - * include/uapi/linux/iommufd.h - * It is required to call iommufd_viommu_alloc() helper for - * a bundled allocation of the core and the driver structures, - * using the given @ictx pointer. + * @get_viommu_size: Get the size of a driver-level vIOMMU structure for a given + * @dev corresponding to @viommu_type. Driver should return 0 + * if vIOMMU isn't supported accordingly. It is required for + * driver to use the VIOMMU_STRUCT_SIZE macro to sanitize the + * driver-level vIOMMU structure related to the core one + * @viommu_init: Init the driver-level struct of an iommufd_viommu on a physical + * IOMMU instance @viommu->iommu_dev, as the set of virtualization + * resources shared/passed to user space IOMMU instance. Associate + * it with a nesting @parent_domain. It is required for driver to + * set @viommu->ops pointing to its own viommu_ops * @owner: Driver module providing these ops * @identity_domain: An always available, always attachable identity * translation. @@ -619,7 +662,8 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size, */ struct iommu_ops { bool (*capable)(struct device *dev, enum iommu_cap); - void *(*hw_info)(struct device *dev, u32 *length, u32 *type); + void *(*hw_info)(struct device *dev, u32 *length, + enum iommu_hw_info_type *type); /* Domain allocation and freeing by the iommu driver */ #if IS_ENABLED(CONFIG_FSL_PAMU) @@ -653,9 +697,11 @@ struct iommu_ops { int (*def_domain_type)(struct device *dev); - struct iommufd_viommu *(*viommu_alloc)( - struct device *dev, struct iommu_domain *parent_domain, - struct iommufd_ctx *ictx, unsigned int viommu_type); + size_t (*get_viommu_size)(struct device *dev, + enum iommu_viommu_type viommu_type); + int (*viommu_init)(struct iommufd_viommu *viommu, + struct iommu_domain *parent_domain, + const struct iommu_user_data *user_data); const struct iommu_domain_ops *default_domain_ops; struct module *owner; diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 34b6e6ca4bfa..6e7efe83bc5d 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -37,6 +37,7 @@ enum iommufd_object_type { IOMMUFD_OBJ_VIOMMU, IOMMUFD_OBJ_VDEVICE, IOMMUFD_OBJ_VEVENTQ, + IOMMUFD_OBJ_HW_QUEUE, #ifdef CONFIG_IOMMUFD_TEST IOMMUFD_OBJ_SELFTEST, #endif @@ -45,7 +46,13 @@ enum iommufd_object_type { /* Base struct for all objects with a userspace ID handle. */ struct iommufd_object { - refcount_t shortterm_users; + /* + * Destroy will sleep and wait for wait_cnt to go to zero. This allows + * concurrent users of the ID to reliably avoid causing a spurious + * destroy failure. Incrementing this count should either be short + * lived or be revoked and blocked during pre_destroy(). + */ + refcount_t wait_cnt; refcount_t users; enum iommufd_object_type type; unsigned int id; @@ -101,7 +108,36 @@ struct iommufd_viommu { struct list_head veventqs; struct rw_semaphore veventqs_rwsem; - unsigned int type; + enum iommu_viommu_type type; +}; + +struct iommufd_vdevice { + struct iommufd_object obj; + struct iommufd_viommu *viommu; + struct iommufd_device *idev; + + /* + * Virtual device ID per vIOMMU, e.g. vSID of ARM SMMUv3, vDeviceID of + * AMD IOMMU, and vRID of Intel VT-d + */ + u64 virt_id; + + /* Clean up all driver-specific parts of an iommufd_vdevice */ + void (*destroy)(struct iommufd_vdevice *vdev); +}; + +struct iommufd_hw_queue { + struct iommufd_object obj; + struct iommufd_viommu *viommu; + struct iommufd_access *access; + + u64 base_addr; /* in guest physical address space */ + size_t length; + + enum iommu_hw_queue_type type; + + /* Clean up all driver-specific parts of an iommufd_hw_queue */ + void (*destroy)(struct iommufd_hw_queue *hw_queue); }; /** @@ -120,6 +156,30 @@ struct iommufd_viommu { * array->entry_num to report the number of handled requests. * The data structure of the array entry must be defined in * include/uapi/linux/iommufd.h + * @vdevice_size: Size of the driver-defined vDEVICE structure per this vIOMMU + * @vdevice_init: Initialize the driver-level structure of a vDEVICE object, or + * related HW procedure. @vdev is already initialized by iommufd + * core: vdev->dev and vdev->viommu pointers; vdev->id carries a + * per-vIOMMU virtual ID (refer to struct iommu_vdevice_alloc in + * include/uapi/linux/iommufd.h) + * If driver has a deinit function to revert what vdevice_init op + * does, it should set it to the @vdev->destroy function pointer + * @get_hw_queue_size: Get the size of a driver-defined HW queue structure for a + * given @viommu corresponding to @queue_type. Driver should + * return 0 if HW queue aren't supported accordingly. It is + * required for driver to use the HW_QUEUE_STRUCT_SIZE macro + * to sanitize the driver-level HW queue structure related + * to the core one + * @hw_queue_init_phys: Initialize the driver-level structure of a HW queue that + * is initialized with its core-level structure that holds + * all the info about a guest queue memory. + * Driver providing this op indicates that HW accesses the + * guest queue memory via physical addresses. + * @index carries the logical HW QUEUE ID per vIOMMU in a + * guest VM, for a multi-queue model. @base_addr_pa carries + * the physical location of the guest queue + * If driver has a deinit function to revert what this op + * does, it should set it to the @hw_queue->destroy pointer */ struct iommufd_viommu_ops { void (*destroy)(struct iommufd_viommu *viommu); @@ -128,6 +188,13 @@ struct iommufd_viommu_ops { const struct iommu_user_data *user_data); int (*cache_invalidate)(struct iommufd_viommu *viommu, struct iommu_user_data_array *array); + const size_t vdevice_size; + int (*vdevice_init)(struct iommufd_vdevice *vdev); + size_t (*get_hw_queue_size)(struct iommufd_viommu *viommu, + enum iommu_hw_queue_type queue_type); + /* AMD's HW will add hw_queue_init simply using @hw_queue->base_addr */ + int (*hw_queue_init_phys)(struct iommufd_hw_queue *hw_queue, u32 index, + phys_addr_t base_addr_pa); }; #if IS_ENABLED(CONFIG_IOMMUFD) @@ -171,8 +238,9 @@ static inline void iommufd_access_unpin_pages(struct iommufd_access *access, { } -static inline int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, - void *data, size_t len, unsigned int flags) +static inline int iommufd_access_rw(struct iommufd_access *access, + unsigned long iova, void *data, size_t len, + unsigned int flags) { return -EOPNOTSUPP; } @@ -189,9 +257,16 @@ static inline int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx) #endif /* CONFIG_IOMMUFD */ #if IS_ENABLED(CONFIG_IOMMUFD_DRIVER_CORE) -struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, - size_t size, - enum iommufd_object_type type); +int _iommufd_object_depend(struct iommufd_object *obj_dependent, + struct iommufd_object *obj_depended); +void _iommufd_object_undepend(struct iommufd_object *obj_dependent, + struct iommufd_object *obj_depended); +int _iommufd_alloc_mmap(struct iommufd_ctx *ictx, struct iommufd_object *owner, + phys_addr_t mmio_addr, size_t length, + unsigned long *offset); +void _iommufd_destroy_mmap(struct iommufd_ctx *ictx, + struct iommufd_object *owner, unsigned long offset); +struct device *iommufd_vdevice_to_device(struct iommufd_vdevice *vdev); struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id); int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu, @@ -200,11 +275,36 @@ int iommufd_viommu_report_event(struct iommufd_viommu *viommu, enum iommu_veventq_type type, void *event_data, size_t data_len); #else /* !CONFIG_IOMMUFD_DRIVER_CORE */ -static inline struct iommufd_object * -_iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size, - enum iommufd_object_type type) +static inline int _iommufd_object_depend(struct iommufd_object *obj_dependent, + struct iommufd_object *obj_depended) +{ + return -EOPNOTSUPP; +} + +static inline void +_iommufd_object_undepend(struct iommufd_object *obj_dependent, + struct iommufd_object *obj_depended) +{ +} + +static inline int _iommufd_alloc_mmap(struct iommufd_ctx *ictx, + struct iommufd_object *owner, + phys_addr_t mmio_addr, size_t length, + unsigned long *offset) +{ + return -EOPNOTSUPP; +} + +static inline void _iommufd_destroy_mmap(struct iommufd_ctx *ictx, + struct iommufd_object *owner, + unsigned long offset) { - return ERR_PTR(-EOPNOTSUPP); +} + +static inline struct device * +iommufd_vdevice_to_device(struct iommufd_vdevice *vdev) +{ + return NULL; } static inline struct device * @@ -228,21 +328,73 @@ static inline int iommufd_viommu_report_event(struct iommufd_viommu *viommu, } #endif /* CONFIG_IOMMUFD_DRIVER_CORE */ +#define VIOMMU_STRUCT_SIZE(drv_struct, member) \ + (sizeof(drv_struct) + \ + BUILD_BUG_ON_ZERO(offsetof(drv_struct, member)) + \ + BUILD_BUG_ON_ZERO(!__same_type(struct iommufd_viommu, \ + ((drv_struct *)NULL)->member))) + +#define VDEVICE_STRUCT_SIZE(drv_struct, member) \ + (sizeof(drv_struct) + \ + BUILD_BUG_ON_ZERO(offsetof(drv_struct, member)) + \ + BUILD_BUG_ON_ZERO(!__same_type(struct iommufd_vdevice, \ + ((drv_struct *)NULL)->member))) + +#define HW_QUEUE_STRUCT_SIZE(drv_struct, member) \ + (sizeof(drv_struct) + \ + BUILD_BUG_ON_ZERO(offsetof(drv_struct, member)) + \ + BUILD_BUG_ON_ZERO(!__same_type(struct iommufd_hw_queue, \ + ((drv_struct *)NULL)->member))) + /* - * Helpers for IOMMU driver to allocate driver structures that will be freed by - * the iommufd core. The free op will be called prior to freeing the memory. + * Helpers for IOMMU driver to build/destroy a dependency between two sibling + * structures created by one of the allocators above */ -#define iommufd_viommu_alloc(ictx, drv_struct, member, viommu_ops) \ +#define iommufd_hw_queue_depend(dependent, depended, member) \ ({ \ - drv_struct *ret; \ + int ret = -EINVAL; \ \ - static_assert(__same_type(struct iommufd_viommu, \ - ((drv_struct *)NULL)->member)); \ - static_assert(offsetof(drv_struct, member.obj) == 0); \ - ret = (drv_struct *)_iommufd_object_alloc( \ - ictx, sizeof(drv_struct), IOMMUFD_OBJ_VIOMMU); \ - if (!IS_ERR(ret)) \ - ret->member.ops = viommu_ops; \ + static_assert(__same_type(struct iommufd_hw_queue, \ + dependent->member)); \ + static_assert(__same_type(typeof(*dependent), *depended)); \ + if (!WARN_ON_ONCE(dependent->member.viommu != \ + depended->member.viommu)) \ + ret = _iommufd_object_depend(&dependent->member.obj, \ + &depended->member.obj); \ ret; \ }) + +#define iommufd_hw_queue_undepend(dependent, depended, member) \ + ({ \ + static_assert(__same_type(struct iommufd_hw_queue, \ + dependent->member)); \ + static_assert(__same_type(typeof(*dependent), *depended)); \ + WARN_ON_ONCE(dependent->member.viommu != \ + depended->member.viommu); \ + _iommufd_object_undepend(&dependent->member.obj, \ + &depended->member.obj); \ + }) + +/* + * Helpers for IOMMU driver to alloc/destroy an mmapable area for a structure. + * + * To support an mmappable MMIO region, kernel driver must first register it to + * iommufd core to allocate an @offset, during a driver-structure initialization + * (e.g. viommu_init op). Then, it should report to user space this @offset and + * the @length of the MMIO region for mmap syscall. + */ +static inline int iommufd_viommu_alloc_mmap(struct iommufd_viommu *viommu, + phys_addr_t mmio_addr, + size_t length, + unsigned long *offset) +{ + return _iommufd_alloc_mmap(viommu->ictx, &viommu->obj, mmio_addr, + length, offset); +} + +static inline void iommufd_viommu_destroy_mmap(struct iommufd_viommu *viommu, + unsigned long offset) +{ + _iommufd_destroy_mmap(viommu->ictx, &viommu->obj, offset); +} #endif diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index f29b6c44655e..c218c89e0e2e 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -56,6 +56,7 @@ enum { IOMMUFD_CMD_VDEVICE_ALLOC = 0x91, IOMMUFD_CMD_IOAS_CHANGE_PROCESS = 0x92, IOMMUFD_CMD_VEVENTQ_ALLOC = 0x93, + IOMMUFD_CMD_HW_QUEUE_ALLOC = 0x94, }; /** @@ -591,16 +592,43 @@ struct iommu_hw_info_arm_smmuv3 { }; /** + * struct iommu_hw_info_tegra241_cmdqv - NVIDIA Tegra241 CMDQV Hardware + * Information (IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV) + * + * @flags: Must be 0 + * @version: Version number for the CMDQ-V HW for PARAM bits[03:00] + * @log2vcmdqs: Log2 of the total number of VCMDQs for PARAM bits[07:04] + * @log2vsids: Log2 of the total number of SID replacements for PARAM bits[15:12] + * @__reserved: Must be 0 + * + * VMM can use these fields directly in its emulated global PARAM register. Note + * that only one Virtual Interface (VINTF) should be exposed to a VM, i.e. PARAM + * bits[11:08] should be set to 0 for log2 of the total number of VINTFs. + */ +struct iommu_hw_info_tegra241_cmdqv { + __u32 flags; + __u8 version; + __u8 log2vcmdqs; + __u8 log2vsids; + __u8 __reserved; +}; + +/** * enum iommu_hw_info_type - IOMMU Hardware Info Types - * @IOMMU_HW_INFO_TYPE_NONE: Used by the drivers that do not report hardware + * @IOMMU_HW_INFO_TYPE_NONE: Output by the drivers that do not report hardware * info + * @IOMMU_HW_INFO_TYPE_DEFAULT: Input to request for a default type * @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type * @IOMMU_HW_INFO_TYPE_ARM_SMMUV3: ARM SMMUv3 iommu info type + * @IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV (extension for ARM + * SMMUv3) info type */ enum iommu_hw_info_type { IOMMU_HW_INFO_TYPE_NONE = 0, + IOMMU_HW_INFO_TYPE_DEFAULT = 0, IOMMU_HW_INFO_TYPE_INTEL_VTD = 1, IOMMU_HW_INFO_TYPE_ARM_SMMUV3 = 2, + IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV = 3, }; /** @@ -626,6 +654,15 @@ enum iommufd_hw_capabilities { }; /** + * enum iommufd_hw_info_flags - Flags for iommu_hw_info + * @IOMMU_HW_INFO_FLAG_INPUT_TYPE: If set, @in_data_type carries an input type + * for user space to request for a specific info + */ +enum iommufd_hw_info_flags { + IOMMU_HW_INFO_FLAG_INPUT_TYPE = 1 << 0, +}; + +/** * struct iommu_hw_info - ioctl(IOMMU_GET_HW_INFO) * @size: sizeof(struct iommu_hw_info) * @flags: Must be 0 @@ -634,6 +671,12 @@ enum iommufd_hw_capabilities { * data that kernel supports * @data_uptr: User pointer to a user-space buffer used by the kernel to fill * the iommu type specific hardware information data + * @in_data_type: This shares the same field with @out_data_type, making it be + * a bidirectional field. When IOMMU_HW_INFO_FLAG_INPUT_TYPE is + * set, an input type carried via this @in_data_type field will + * be valid, requesting for the info data to the given type. If + * IOMMU_HW_INFO_FLAG_INPUT_TYPE is unset, any input value will + * be seen as IOMMU_HW_INFO_TYPE_DEFAULT * @out_data_type: Output the iommu hardware info type as defined in the enum * iommu_hw_info_type. * @out_capabilities: Output the generic iommu capability info type as defined @@ -663,7 +706,10 @@ struct iommu_hw_info { __u32 dev_id; __u32 data_len; __aligned_u64 data_uptr; - __u32 out_data_type; + union { + __u32 in_data_type; + __u32 out_data_type; + }; __u8 out_max_pasid_log2; __u8 __reserved[3]; __aligned_u64 out_capabilities; @@ -951,10 +997,29 @@ struct iommu_fault_alloc { * enum iommu_viommu_type - Virtual IOMMU Type * @IOMMU_VIOMMU_TYPE_DEFAULT: Reserved for future use * @IOMMU_VIOMMU_TYPE_ARM_SMMUV3: ARM SMMUv3 driver specific type + * @IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV (extension for ARM + * SMMUv3) enabled ARM SMMUv3 type */ enum iommu_viommu_type { IOMMU_VIOMMU_TYPE_DEFAULT = 0, IOMMU_VIOMMU_TYPE_ARM_SMMUV3 = 1, + IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV = 2, +}; + +/** + * struct iommu_viommu_tegra241_cmdqv - NVIDIA Tegra241 CMDQV Virtual Interface + * (IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV) + * @out_vintf_mmap_offset: mmap offset argument for VINTF's page0 + * @out_vintf_mmap_length: mmap length argument for VINTF's page0 + * + * Both @out_vintf_mmap_offset and @out_vintf_mmap_length are reported by kernel + * for user space to mmap the VINTF page0 from the host physical address space + * to the guest physical address space so that a guest kernel can directly R/W + * access to the VINTF page0 in order to control its virtual command queues. + */ +struct iommu_viommu_tegra241_cmdqv { + __aligned_u64 out_vintf_mmap_offset; + __aligned_u64 out_vintf_mmap_length; }; /** @@ -965,6 +1030,9 @@ enum iommu_viommu_type { * @dev_id: The device's physical IOMMU will be used to back the virtual IOMMU * @hwpt_id: ID of a nesting parent HWPT to associate to * @out_viommu_id: Output virtual IOMMU ID for the allocated object + * @data_len: Length of the type specific data + * @__reserved: Must be 0 + * @data_uptr: User pointer to a driver-specific virtual IOMMU data * * Allocate a virtual IOMMU object, representing the underlying physical IOMMU's * virtualization support that is a security-isolated slice of the real IOMMU HW @@ -985,6 +1053,9 @@ struct iommu_viommu_alloc { __u32 dev_id; __u32 hwpt_id; __u32 out_viommu_id; + __u32 data_len; + __u32 __reserved; + __aligned_u64 data_uptr; }; #define IOMMU_VIOMMU_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_ALLOC) @@ -995,10 +1066,15 @@ struct iommu_viommu_alloc { * @dev_id: The physical device to allocate a virtual instance on the vIOMMU * @out_vdevice_id: Object handle for the vDevice. Pass to IOMMU_DESTORY * @virt_id: Virtual device ID per vIOMMU, e.g. vSID of ARM SMMUv3, vDeviceID - * of AMD IOMMU, and vRID of a nested Intel VT-d to a Context Table + * of AMD IOMMU, and vRID of Intel VT-d * * Allocate a virtual device instance (for a physical device) against a vIOMMU. * This instance holds the device's information (related to its vIOMMU) in a VM. + * User should use IOMMU_DESTROY to destroy the virtual device before + * destroying the physical device (by closing vfio_cdev fd). Otherwise the + * virtual device would be forcibly destroyed on physical device destruction, + * its vdevice_id would be permanently leaked (unremovable & unreusable) until + * iommu fd closed. */ struct iommu_vdevice_alloc { __u32 size; @@ -1075,10 +1151,12 @@ struct iommufd_vevent_header { * enum iommu_veventq_type - Virtual Event Queue Type * @IOMMU_VEVENTQ_TYPE_DEFAULT: Reserved for future use * @IOMMU_VEVENTQ_TYPE_ARM_SMMUV3: ARM SMMUv3 Virtual Event Queue + * @IOMMU_VEVENTQ_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV Extension IRQ */ enum iommu_veventq_type { IOMMU_VEVENTQ_TYPE_DEFAULT = 0, IOMMU_VEVENTQ_TYPE_ARM_SMMUV3 = 1, + IOMMU_VEVENTQ_TYPE_TEGRA241_CMDQV = 2, }; /** @@ -1103,6 +1181,19 @@ struct iommu_vevent_arm_smmuv3 { }; /** + * struct iommu_vevent_tegra241_cmdqv - Tegra241 CMDQV IRQ + * (IOMMU_VEVENTQ_TYPE_TEGRA241_CMDQV) + * @lvcmdq_err_map: 128-bit logical vcmdq error map, little-endian. + * (Refer to register LVCMDQ_ERR_MAPs per VINTF ) + * + * The 128-bit register value from HW exclusively reflect the error bits for a + * Virtual Interface represented by a vIOMMU object. Read and report directly. + */ +struct iommu_vevent_tegra241_cmdqv { + __aligned_le64 lvcmdq_err_map[2]; +}; + +/** * struct iommu_veventq_alloc - ioctl(IOMMU_VEVENTQ_ALLOC) * @size: sizeof(struct iommu_veventq_alloc) * @flags: Must be 0 @@ -1141,4 +1232,61 @@ struct iommu_veventq_alloc { __u32 __reserved; }; #define IOMMU_VEVENTQ_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VEVENTQ_ALLOC) + +/** + * enum iommu_hw_queue_type - HW Queue Type + * @IOMMU_HW_QUEUE_TYPE_DEFAULT: Reserved for future use + * @IOMMU_HW_QUEUE_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV (extension for ARM + * SMMUv3) Virtual Command Queue (VCMDQ) + */ +enum iommu_hw_queue_type { + IOMMU_HW_QUEUE_TYPE_DEFAULT = 0, + /* + * TEGRA241_CMDQV requirements (otherwise, allocation will fail) + * - alloc starts from the lowest @index=0 in ascending order + * - destroy starts from the last allocated @index in descending order + * - @base_addr must be aligned to @length in bytes and mapped in IOAS + * - @length must be a power of 2, with a minimum 32 bytes and a maximum + * 2 ^ idr[1].CMDQS * 16 bytes (use GET_HW_INFO call to read idr[1] + * from struct iommu_hw_info_arm_smmuv3) + * - suggest to back the queue memory with contiguous physical pages or + * a single huge page with alignment of the queue size, and limit the + * emulated vSMMU's IDR1.CMDQS to log2(huge page size / 16 bytes) + */ + IOMMU_HW_QUEUE_TYPE_TEGRA241_CMDQV = 1, +}; + +/** + * struct iommu_hw_queue_alloc - ioctl(IOMMU_HW_QUEUE_ALLOC) + * @size: sizeof(struct iommu_hw_queue_alloc) + * @flags: Must be 0 + * @viommu_id: Virtual IOMMU ID to associate the HW queue with + * @type: One of enum iommu_hw_queue_type + * @index: The logical index to the HW queue per virtual IOMMU for a multi-queue + * model + * @out_hw_queue_id: The ID of the new HW queue + * @nesting_parent_iova: Base address of the queue memory in the guest physical + * address space + * @length: Length of the queue memory + * + * Allocate a HW queue object for a vIOMMU-specific HW-accelerated queue, which + * allows HW to access a guest queue memory described using @nesting_parent_iova + * and @length. + * + * A vIOMMU can allocate multiple queues, but it must use a different @index per + * type to separate each allocation, e.g:: + * + * Type1 HW queue0, Type1 HW queue1, Type2 HW queue0, ... + */ +struct iommu_hw_queue_alloc { + __u32 size; + __u32 flags; + __u32 viommu_id; + __u32 type; + __u32 index; + __u32 out_hw_queue_id; + __aligned_u64 nesting_parent_iova; + __aligned_u64 length; +}; +#define IOMMU_HW_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HW_QUEUE_ALLOC) #endif |
