From 86b0a96c2952fa07b782b37f6ec783ace63a01a6 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Tue, 18 Jul 2023 03:55:36 -0700 Subject: iommufd: Add iommufd_ctx_has_group() This adds the helper to check if any device within the given iommu_group has been bound with the iommufd_ctx. This is helpful for the checking on device ownership for the devices which have not been bound but cannot be bound to any other iommufd_ctx as the iommu_group has been bound. Reviewed-by: Jason Gunthorpe Tested-by: Yanting Jiang Tested-by: Terrence Xu Tested-by: Zhenzhong Duan Signed-off-by: Yi Liu Link: https://lore.kernel.org/r/20230718105542.4138-5-yi.l.liu@intel.com Signed-off-by: Alex Williamson --- include/linux/iommufd.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 1129a36a74c4..f241bafa03da 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -16,6 +16,7 @@ struct page; struct iommufd_ctx; struct iommufd_access; struct file; +struct iommu_group; struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, struct device *dev, u32 *id); @@ -50,6 +51,7 @@ void iommufd_ctx_get(struct iommufd_ctx *ictx); #if IS_ENABLED(CONFIG_IOMMUFD) struct iommufd_ctx *iommufd_ctx_from_file(struct file *file); void iommufd_ctx_put(struct iommufd_ctx *ictx); +bool iommufd_ctx_has_group(struct iommufd_ctx *ictx, struct iommu_group *group); int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, unsigned long length, struct page **out_pages, -- cgit v1.2.3 From 78d3df457ae5eb53ef1f295a8a704691abea1b1d Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Tue, 18 Jul 2023 03:55:37 -0700 Subject: iommufd: Add helper to retrieve iommufd_ctx and devid This is needed by the vfio-pci driver to report affected devices in the hot-reset for a given device. Reviewed-by: Jason Gunthorpe Tested-by: Yanting Jiang Tested-by: Terrence Xu Tested-by: Zhenzhong Duan Signed-off-by: Yi Liu Link: https://lore.kernel.org/r/20230718105542.4138-6-yi.l.liu@intel.com Signed-off-by: Alex Williamson --- include/linux/iommufd.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index f241bafa03da..68defed9ea48 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -25,6 +25,9 @@ void iommufd_device_unbind(struct iommufd_device *idev); int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id); void iommufd_device_detach(struct iommufd_device *idev); +struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev); +u32 iommufd_device_to_id(struct iommufd_device *idev); + struct iommufd_access_ops { u8 needs_pin_pages : 1; void (*unmap)(void *data, unsigned long iova, unsigned long length); -- cgit v1.2.3 From af949759bad27934b6f242e8a3b1c394e09fb4a3 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Tue, 18 Jul 2023 03:55:38 -0700 Subject: vfio: Mark cdev usage in vfio_device This can be used to differentiate whether to report group_id or devid in the revised VFIO_DEVICE_GET_PCI_HOT_RESET_INFO ioctl. At this moment, no cdev path yet, so the vfio_device_cdev_opened() helper always returns false. Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Tested-by: Yanting Jiang Tested-by: Terrence Xu Tested-by: Zhenzhong Duan Signed-off-by: Yi Liu Link: https://lore.kernel.org/r/20230718105542.4138-7-yi.l.liu@intel.com Signed-off-by: Alex Williamson --- include/linux/vfio.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 2c137ea94a3e..2a45853773a6 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -139,6 +139,11 @@ int vfio_iommufd_emulated_attach_ioas(struct vfio_device *vdev, u32 *pt_id); ((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL) #endif +static inline bool vfio_device_cdev_opened(struct vfio_device *device) +{ + return false; +} + /** * struct vfio_migration_ops - VFIO bus device driver migration callbacks * -- cgit v1.2.3 From a80e1de93275fbfba0617e6bbea8522ea5329eb5 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Tue, 18 Jul 2023 03:55:39 -0700 Subject: vfio: Add helper to search vfio_device in a dev_set There are drivers that need to search vfio_device within a given dev_set. e.g. vfio-pci. So add a helper. vfio_pci_is_device_in_set() now returns -EBUSY in commit a882c16a2b7e ("vfio/pci: Change vfio_pci_try_bus_reset() to use the dev_set") where it was trying to preserve the return of vfio_pci_try_zap_and_vma_lock_cb(). However, it makes more sense to return -ENODEV. Suggested-by: Alex Williamson Reviewed-by: Jason Gunthorpe Tested-by: Yanting Jiang Tested-by: Terrence Xu Tested-by: Zhenzhong Duan Signed-off-by: Yi Liu Link: https://lore.kernel.org/r/20230718105542.4138-8-yi.l.liu@intel.com Signed-off-by: Alex Williamson --- include/linux/vfio.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 2a45853773a6..ee120d2d530b 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -244,6 +244,9 @@ void vfio_unregister_group_dev(struct vfio_device *device); int vfio_assign_device_set(struct vfio_device *device, void *set_id); unsigned int vfio_device_set_open_count(struct vfio_device_set *dev_set); +struct vfio_device * +vfio_find_device_in_devset(struct vfio_device_set *dev_set, + struct device *dev); int vfio_mig_get_next_state(struct vfio_device *device, enum vfio_device_mig_state cur_fsm, -- cgit v1.2.3 From 9062ff405b49769c04f00373de2c9cefab91b600 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Tue, 18 Jul 2023 03:55:40 -0700 Subject: vfio/pci: Extend VFIO_DEVICE_GET_PCI_HOT_RESET_INFO for vfio device cdev This allows VFIO_DEVICE_GET_PCI_HOT_RESET_INFO ioctl use the iommufd_ctx of the cdev device to check the ownership of the other affected devices. When VFIO_DEVICE_GET_PCI_HOT_RESET_INFO is called on an IOMMUFD managed device, the new flag VFIO_PCI_HOT_RESET_FLAG_DEV_ID is reported to indicate the values returned are IOMMUFD devids rather than group IDs as used when accessing vfio devices through the conventional vfio group interface. Additionally the flag VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED will be reported in this mode if all of the devices affected by the hot-reset are owned by either virtue of being directly bound to the same iommufd context as the calling device, or implicitly owned via a shared IOMMU group. Suggested-by: Jason Gunthorpe Suggested-by: Alex Williamson Reviewed-by: Jason Gunthorpe Tested-by: Yanting Jiang Tested-by: Zhenzhong Duan Signed-off-by: Yi Liu Link: https://lore.kernel.org/r/20230718105542.4138-9-yi.l.liu@intel.com Signed-off-by: Alex Williamson --- include/linux/vfio.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index ee120d2d530b..7079911edfb1 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -114,6 +114,8 @@ struct vfio_device_ops { }; #if IS_ENABLED(CONFIG_IOMMUFD) +struct iommufd_ctx *vfio_iommufd_device_ictx(struct vfio_device *vdev); +int vfio_iommufd_get_dev_id(struct vfio_device *vdev, struct iommufd_ctx *ictx); int vfio_iommufd_physical_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx, u32 *out_device_id); void vfio_iommufd_physical_unbind(struct vfio_device *vdev); @@ -123,6 +125,18 @@ int vfio_iommufd_emulated_bind(struct vfio_device *vdev, void vfio_iommufd_emulated_unbind(struct vfio_device *vdev); int vfio_iommufd_emulated_attach_ioas(struct vfio_device *vdev, u32 *pt_id); #else +static inline struct iommufd_ctx * +vfio_iommufd_device_ictx(struct vfio_device *vdev) +{ + return NULL; +} + +static inline int +vfio_iommufd_get_dev_id(struct vfio_device *vdev, struct iommufd_ctx *ictx) +{ + return VFIO_PCI_DEVID_NOT_OWNED; +} + #define vfio_iommufd_physical_bind \ ((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx, \ u32 *out_device_id)) NULL) -- cgit v1.2.3 From b1a59be8a2b64d00409dc7c9d523572ed32bcff8 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Tue, 18 Jul 2023 06:55:27 -0700 Subject: vfio: Refine vfio file kAPIs for KVM This prepares for making the below kAPIs to accept both group file and device file instead of only vfio group file. bool vfio_file_enforced_coherent(struct file *file); void vfio_file_set_kvm(struct file *file, struct kvm *kvm); Reviewed-by: Kevin Tian Reviewed-by: Eric Auger Reviewed-by: Jason Gunthorpe Tested-by: Terrence Xu Tested-by: Nicolin Chen Tested-by: Matthew Rosato Tested-by: Yanting Jiang Tested-by: Shameer Kolothum Tested-by: Zhenzhong Duan Signed-off-by: Yi Liu Link: https://lore.kernel.org/r/20230718135551.6592-3-yi.l.liu@intel.com Signed-off-by: Alex Williamson --- include/linux/vfio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 7079911edfb1..06a5221949c5 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -272,6 +272,7 @@ int vfio_mig_get_next_state(struct vfio_device *device, */ struct iommu_group *vfio_file_iommu_group(struct file *file); bool vfio_file_is_group(struct file *file); +bool vfio_file_is_valid(struct file *file); bool vfio_file_enforced_coherent(struct file *file); void vfio_file_set_kvm(struct file *file, struct kvm *kvm); bool vfio_file_has_dev(struct file *file, struct vfio_device *device); -- cgit v1.2.3 From 9048c7341c4df9cae04c154a8b0f556dbe913358 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Tue, 18 Jul 2023 06:55:38 -0700 Subject: vfio-iommufd: Add detach_ioas support for physical VFIO devices This prepares for adding DETACH ioctl for physical VFIO devices. Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Tested-by: Terrence Xu Tested-by: Nicolin Chen Tested-by: Matthew Rosato Tested-by: Yanting Jiang Tested-by: Shameer Kolothum Tested-by: Zhenzhong Duan Signed-off-by: Yi Liu Link: https://lore.kernel.org/r/20230718135551.6592-14-yi.l.liu@intel.com Signed-off-by: Alex Williamson --- include/linux/vfio.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 06a5221949c5..f2f02273ece1 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -73,7 +73,9 @@ struct vfio_device { * @bind_iommufd: Called when binding the device to an iommufd * @unbind_iommufd: Opposite of bind_iommufd * @attach_ioas: Called when attaching device to an IOAS/HWPT managed by the - * bound iommufd. Undo in unbind_iommufd. + * bound iommufd. Undo in unbind_iommufd if @detach_ioas is not + * called. + * @detach_ioas: Opposite of attach_ioas * @open_device: Called when the first file descriptor is opened for this device * @close_device: Opposite of open_device * @read: Perform read(2) on device file descriptor @@ -97,6 +99,7 @@ struct vfio_device_ops { struct iommufd_ctx *ictx, u32 *out_device_id); void (*unbind_iommufd)(struct vfio_device *vdev); int (*attach_ioas)(struct vfio_device *vdev, u32 *pt_id); + void (*detach_ioas)(struct vfio_device *vdev); int (*open_device)(struct vfio_device *vdev); void (*close_device)(struct vfio_device *vdev); ssize_t (*read)(struct vfio_device *vdev, char __user *buf, @@ -120,6 +123,7 @@ int vfio_iommufd_physical_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx, u32 *out_device_id); void vfio_iommufd_physical_unbind(struct vfio_device *vdev); int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id); +void vfio_iommufd_physical_detach_ioas(struct vfio_device *vdev); int vfio_iommufd_emulated_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx, u32 *out_device_id); void vfio_iommufd_emulated_unbind(struct vfio_device *vdev); @@ -144,6 +148,8 @@ vfio_iommufd_get_dev_id(struct vfio_device *vdev, struct iommufd_ctx *ictx) ((void (*)(struct vfio_device *vdev)) NULL) #define vfio_iommufd_physical_attach_ioas \ ((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL) +#define vfio_iommufd_physical_detach_ioas \ + ((void (*)(struct vfio_device *vdev)) NULL) #define vfio_iommufd_emulated_bind \ ((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx, \ u32 *out_device_id)) NULL) -- cgit v1.2.3 From e23a6217f3bb4f6f205d4517782ad49e3533fc1c Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 18 Jul 2023 06:55:39 -0700 Subject: iommufd/device: Add iommufd_access_detach() API Previously, the detach routine is only done by the destroy(). And it was called by vfio_iommufd_emulated_unbind() when the device runs close(), so all the mappings in iopt were cleaned in that setup, when the call trace reaches this detach() routine. Now, there's a need of a detach uAPI, meaning that it does not only need a new iommufd_access_detach() API, but also requires access->ops->unmap() call as a cleanup. So add one. However, leaving that unprotected can introduce some potential of a race condition during the pin_/unpin_pages() call, where access->ioas->iopt is getting referenced. So, add an ioas_lock to protect the context of iopt referencings. Also, to allow the iommufd_access_unpin_pages() callback to happen via this unmap() call, add an ioas_unpin pointer, so the unpin routine won't be affected by the "access->ioas = NULL" trick. Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Tested-by: Terrence Xu Tested-by: Nicolin Chen Tested-by: Matthew Rosato Tested-by: Yanting Jiang Tested-by: Shameer Kolothum Tested-by: Zhenzhong Duan Signed-off-by: Nicolin Chen Signed-off-by: Yi Liu Link: https://lore.kernel.org/r/20230718135551.6592-15-yi.l.liu@intel.com Signed-off-by: Alex Williamson --- include/linux/iommufd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 68defed9ea48..3a3216cb9482 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -48,6 +48,7 @@ iommufd_access_create(struct iommufd_ctx *ictx, const struct iommufd_access_ops *ops, void *data, u32 *id); void iommufd_access_destroy(struct iommufd_access *access); int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id); +void iommufd_access_detach(struct iommufd_access *access); void iommufd_ctx_get(struct iommufd_ctx *ictx); -- cgit v1.2.3 From 8cfa71860233652f8566bcdf55e77aefe0017b4a Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Tue, 18 Jul 2023 06:55:40 -0700 Subject: vfio-iommufd: Add detach_ioas support for emulated VFIO devices This prepares for adding DETACH ioctl for emulated VFIO devices. Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Tested-by: Terrence Xu Tested-by: Nicolin Chen Tested-by: Matthew Rosato Tested-by: Yanting Jiang Tested-by: Shameer Kolothum Tested-by: Zhenzhong Duan Signed-off-by: Yi Liu Link: https://lore.kernel.org/r/20230718135551.6592-16-yi.l.liu@intel.com Signed-off-by: Alex Williamson --- include/linux/vfio.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index f2f02273ece1..24091a7c7bdb 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -128,6 +128,7 @@ int vfio_iommufd_emulated_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx, u32 *out_device_id); void vfio_iommufd_emulated_unbind(struct vfio_device *vdev); int vfio_iommufd_emulated_attach_ioas(struct vfio_device *vdev, u32 *pt_id); +void vfio_iommufd_emulated_detach_ioas(struct vfio_device *vdev); #else static inline struct iommufd_ctx * vfio_iommufd_device_ictx(struct vfio_device *vdev) @@ -157,6 +158,8 @@ vfio_iommufd_get_dev_id(struct vfio_device *vdev, struct iommufd_ctx *ictx) ((void (*)(struct vfio_device *vdev)) NULL) #define vfio_iommufd_emulated_attach_ioas \ ((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL) +#define vfio_iommufd_emulated_detach_ioas \ + ((void (*)(struct vfio_device *vdev)) NULL) #endif static inline bool vfio_device_cdev_opened(struct vfio_device *device) -- cgit v1.2.3 From 8b6f173a4ce47ef0606124710315560c64f2344e Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Tue, 18 Jul 2023 06:55:43 -0700 Subject: vfio: Add cdev for vfio_device This adds cdev support for vfio_device. It allows the user to directly open a vfio device w/o using the legacy container/group interface, as a prerequisite for supporting new iommu features like nested translation and etc. The device fd opened in this manner doesn't have the capability to access the device as the fops open() doesn't open the device until the successful VFIO_DEVICE_BIND_IOMMUFD ioctl which will be added in a later patch. With this patch, devices registered to vfio core would have both the legacy group and the new device interfaces created. - group interface : /dev/vfio/$groupID - device interface: /dev/vfio/devices/vfioX - normal device ("X" is a unique number across vfio devices) For a given device, the user can identify the matching vfioX by searching the vfio-dev folder under the sysfs path of the device. Take PCI device (0000:6a:01.0) as an example, /sys/bus/pci/devices/0000\:6a\:01.0/vfio-dev/vfioX implies the matching vfioX under /dev/vfio/devices/, and vfio-dev/vfioX/dev contains the major:minor number of the matching /dev/vfio/devices/vfioX. The user can get device fd by opening the /dev/vfio/devices/vfioX. The vfio_device cdev logic in this patch: *) __vfio_register_dev() path ends up doing cdev_device_add() for each vfio_device if VFIO_DEVICE_CDEV configured. *) vfio_unregister_group_dev() path does cdev_device_del(); cdev interface does not support noiommu devices, so VFIO only creates the legacy group interface for the physical devices that do not have IOMMU. noiommu users should use the legacy group interface. Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Tested-by: Terrence Xu Tested-by: Nicolin Chen Tested-by: Matthew Rosato Tested-by: Yanting Jiang Tested-by: Shameer Kolothum Tested-by: Zhenzhong Duan Signed-off-by: Yi Liu Link: https://lore.kernel.org/r/20230718135551.6592-19-yi.l.liu@intel.com Signed-off-by: Alex Williamson --- include/linux/vfio.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 24091a7c7bdb..e0069f26488d 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -51,6 +52,9 @@ struct vfio_device { /* Members below here are private, not for driver use */ unsigned int index; struct device device; /* device.kref covers object life circle */ +#if IS_ENABLED(CONFIG_VFIO_DEVICE_CDEV) + struct cdev cdev; +#endif refcount_t refcount; /* user count on registered device*/ unsigned int open_count; struct completion comp; -- cgit v1.2.3 From 1c9dc07487cb0f246075b2d3b305bba91156d376 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Tue, 18 Jul 2023 06:55:45 -0700 Subject: iommufd: Add iommufd_ctx_from_fd() It's common to get a reference to the iommufd context from a given file descriptor. So adds an API for it. Existing users of this API are compiled only when IOMMUFD is enabled, so no need to have a stub for the IOMMUFD disabled case. Tested-by: Yanting Jiang Reviewed-by: Jason Gunthorpe Signed-off-by: Yi Liu Link: https://lore.kernel.org/r/20230718135551.6592-21-yi.l.liu@intel.com Signed-off-by: Alex Williamson --- include/linux/iommufd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 3a3216cb9482..9657c58813dc 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -54,6 +54,7 @@ void iommufd_ctx_get(struct iommufd_ctx *ictx); #if IS_ENABLED(CONFIG_IOMMUFD) struct iommufd_ctx *iommufd_ctx_from_file(struct file *file); +struct iommufd_ctx *iommufd_ctx_from_fd(int fd); void iommufd_ctx_put(struct iommufd_ctx *ictx); bool iommufd_ctx_has_group(struct iommufd_ctx *ictx, struct iommu_group *group); -- cgit v1.2.3 From 5fcc26969a164e6a3154bb68badd6712716eb954 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Tue, 18 Jul 2023 06:55:47 -0700 Subject: vfio: Add VFIO_DEVICE_BIND_IOMMUFD This adds ioctl for userspace to bind device cdev fd to iommufd. VFIO_DEVICE_BIND_IOMMUFD: bind device to an iommufd, hence gain DMA control provided by the iommufd. open_device op is called after bind_iommufd op. Tested-by: Nicolin Chen Tested-by: Matthew Rosato Tested-by: Yanting Jiang Tested-by: Shameer Kolothum Tested-by: Terrence Xu Tested-by: Zhenzhong Duan Signed-off-by: Yi Liu Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20230718135551.6592-23-yi.l.liu@intel.com Signed-off-by: Alex Williamson --- include/linux/vfio.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index e0069f26488d..d6228c839c44 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -64,8 +64,9 @@ struct vfio_device { void (*put_kvm)(struct kvm *kvm); #if IS_ENABLED(CONFIG_IOMMUFD) struct iommufd_device *iommufd_device; - bool iommufd_attached; + u8 iommufd_attached:1; #endif + u8 cdev_opened:1; }; /** @@ -168,7 +169,7 @@ vfio_iommufd_get_dev_id(struct vfio_device *vdev, struct iommufd_ctx *ictx) static inline bool vfio_device_cdev_opened(struct vfio_device *device) { - return false; + return device->cdev_opened; } /** -- cgit v1.2.3 From c1cce6d079b875396c9a7c6838fc5b024758e540 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Tue, 18 Jul 2023 06:55:50 -0700 Subject: vfio: Compile vfio_group infrastructure optionally vfio_group is not needed for vfio device cdev, so with vfio device cdev introduced, the vfio_group infrastructures can be compiled out if only cdev is needed. Reviewed-by: Jason Gunthorpe Tested-by: Nicolin Chen Tested-by: Matthew Rosato Tested-by: Yanting Jiang Tested-by: Shameer Kolothum Tested-by: Terrence Xu Tested-by: Zhenzhong Duan Signed-off-by: Yi Liu Link: https://lore.kernel.org/r/20230718135551.6592-26-yi.l.liu@intel.com Signed-off-by: Alex Williamson --- include/linux/vfio.h | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index d6228c839c44..5a1dee983f17 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -43,7 +43,11 @@ struct vfio_device { */ const struct vfio_migration_ops *mig_ops; const struct vfio_log_ops *log_ops; +#if IS_ENABLED(CONFIG_VFIO_GROUP) struct vfio_group *group; + struct list_head group_next; + struct list_head iommu_entry; +#endif struct vfio_device_set *dev_set; struct list_head dev_set_list; unsigned int migration_flags; @@ -58,8 +62,6 @@ struct vfio_device { refcount_t refcount; /* user count on registered device*/ unsigned int open_count; struct completion comp; - struct list_head group_next; - struct list_head iommu_entry; struct iommufd_access *iommufd_access; void (*put_kvm)(struct kvm *kvm); #if IS_ENABLED(CONFIG_IOMMUFD) @@ -284,12 +286,29 @@ int vfio_mig_get_next_state(struct vfio_device *device, /* * External user API */ +#if IS_ENABLED(CONFIG_VFIO_GROUP) struct iommu_group *vfio_file_iommu_group(struct file *file); bool vfio_file_is_group(struct file *file); +bool vfio_file_has_dev(struct file *file, struct vfio_device *device); +#else +static inline struct iommu_group *vfio_file_iommu_group(struct file *file) +{ + return NULL; +} + +static inline bool vfio_file_is_group(struct file *file) +{ + return false; +} + +static inline bool vfio_file_has_dev(struct file *file, struct vfio_device *device) +{ + return false; +} +#endif bool vfio_file_is_valid(struct file *file); bool vfio_file_enforced_coherent(struct file *file); void vfio_file_set_kvm(struct file *file, struct kvm *kvm); -bool vfio_file_has_dev(struct file *file, struct vfio_device *device); #define VFIO_PIN_PAGES_MAX_ENTRIES (PAGE_SIZE/sizeof(unsigned long)) -- cgit v1.2.3 From 9a4087fab303e7923ab839a6fe35059659a54649 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 7 Aug 2023 13:57:48 -0700 Subject: vfio: Commonize combine_ranges for use in other VFIO drivers Currently only Mellanox uses the combine_ranges function. The new pds_vfio driver also needs this function. So, move it to a common location for other vendor drivers to use. Also, fix RCT ordering while moving/renaming the function. Cc: Yishai Hadas Signed-off-by: Brett Creeley Signed-off-by: Shannon Nelson Reviewed-by: Simon Horman Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Shameer Kolothum Link: https://lore.kernel.org/r/20230807205755.29579-2-brett.creeley@amd.com Signed-off-by: Alex Williamson --- include/linux/vfio.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 5a1dee983f17..454e9295970c 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -283,6 +283,9 @@ int vfio_mig_get_next_state(struct vfio_device *device, enum vfio_device_mig_state new_fsm, enum vfio_device_mig_state *next_fsm); +void vfio_combine_iova_ranges(struct rb_root_cached *root, u32 cur_nodes, + u32 req_nodes); + /* * External user API */ -- cgit v1.2.3 From b021d05e106e14b603a584b38ce62720e7d0f363 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 7 Aug 2023 13:57:50 -0700 Subject: pds_core: Require callers of register/unregister to pass PF drvdata Pass a pointer to the PF's private data structure rather than bouncing in and out of the PF's PCI function address. Signed-off-by: Shannon Nelson Signed-off-by: Brett Creeley Reviewed-by: Kevin Tian Reviewed-by: Shameer Kolothum Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20230807205755.29579-4-brett.creeley@amd.com Signed-off-by: Alex Williamson --- include/linux/pds/pds_common.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pds/pds_common.h b/include/linux/pds/pds_common.h index 435c8e8161c2..04427dcc0a59 100644 --- a/include/linux/pds/pds_common.h +++ b/include/linux/pds/pds_common.h @@ -41,9 +41,11 @@ enum pds_core_vif_types { #define PDS_VDPA_DEV_NAME PDS_CORE_DRV_NAME "." PDS_DEV_TYPE_VDPA_STR +struct pdsc; + int pdsc_register_notify(struct notifier_block *nb); void pdsc_unregister_notify(struct notifier_block *nb); void *pdsc_get_pf_struct(struct pci_dev *vf_pdev); -int pds_client_register(struct pci_dev *pf_pdev, char *devname); -int pds_client_unregister(struct pci_dev *pf_pdev, u16 client_id); +int pds_client_register(struct pdsc *pf, char *devname); +int pds_client_unregister(struct pdsc *pf, u16 client_id); #endif /* _PDS_COMMON_H_ */ -- cgit v1.2.3 From 63f77a7161a2df9924eea9be3b6c63be10151252 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 7 Aug 2023 13:57:51 -0700 Subject: vfio/pds: register with the pds_core PF The pds_core driver will supply adminq services, so find the PF and register with the DSC services. Use the following commands to enable a VF: echo 1 > /sys/bus/pci/drivers/pds_core/$PF_BDF/sriov_numvfs Signed-off-by: Brett Creeley Signed-off-by: Shannon Nelson Reviewed-by: Simon Horman Reviewed-by: Kevin Tian Reviewed-by: Shameer Kolothum Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20230807205755.29579-5-brett.creeley@amd.com Signed-off-by: Alex Williamson --- include/linux/pds/pds_common.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pds/pds_common.h b/include/linux/pds/pds_common.h index 04427dcc0a59..30581e2e04cc 100644 --- a/include/linux/pds/pds_common.h +++ b/include/linux/pds/pds_common.h @@ -34,12 +34,13 @@ enum pds_core_vif_types { #define PDS_DEV_TYPE_CORE_STR "Core" #define PDS_DEV_TYPE_VDPA_STR "vDPA" -#define PDS_DEV_TYPE_VFIO_STR "VFio" +#define PDS_DEV_TYPE_VFIO_STR "vfio" #define PDS_DEV_TYPE_ETH_STR "Eth" #define PDS_DEV_TYPE_RDMA_STR "RDMA" #define PDS_DEV_TYPE_LM_STR "LM" #define PDS_VDPA_DEV_NAME PDS_CORE_DRV_NAME "." PDS_DEV_TYPE_VDPA_STR +#define PDS_VFIO_LM_DEV_NAME PDS_CORE_DRV_NAME "." PDS_DEV_TYPE_LM_STR "." PDS_DEV_TYPE_VFIO_STR struct pdsc; -- cgit v1.2.3 From bb500dbe2ac622551d98c0bb2735a68f59489c98 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 7 Aug 2023 13:57:52 -0700 Subject: vfio/pds: Add VFIO live migration support Add live migration support via the VFIO subsystem. The migration implementation aligns with the definition from uapi/vfio.h and uses the pds_core PF's adminq for device configuration. The ability to suspend, resume, and transfer VF device state data is included along with the required admin queue command structures and implementations. PDS_LM_CMD_SUSPEND and PDS_LM_CMD_SUSPEND_STATUS are added to support the VF device suspend operation. PDS_LM_CMD_RESUME is added to support the VF device resume operation. PDS_LM_CMD_STATE_SIZE is added to determine the exact size of the VF device state data. PDS_LM_CMD_SAVE is added to get the VF device state data. PDS_LM_CMD_RESTORE is added to restore the VF device with the previously saved data from PDS_LM_CMD_SAVE. PDS_LM_CMD_HOST_VF_STATUS is added to notify the DSC/firmware when a migration is in/not-in progress from the host's perspective. The DSC/firmware can use this to clear/setup any necessary state related to a migration. Signed-off-by: Brett Creeley Signed-off-by: Shannon Nelson Reviewed-by: Simon Horman Reviewed-by: Kevin Tian Reviewed-by: Shameer Kolothum Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20230807205755.29579-6-brett.creeley@amd.com Signed-off-by: Alex Williamson --- include/linux/pds/pds_adminq.h | 197 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 197 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pds/pds_adminq.h b/include/linux/pds/pds_adminq.h index bcba7fda3cc9..9c79b3c8fc47 100644 --- a/include/linux/pds/pds_adminq.h +++ b/include/linux/pds/pds_adminq.h @@ -818,6 +818,194 @@ struct pds_vdpa_set_features_cmd { __le64 features; }; +#define PDS_LM_DEVICE_STATE_LENGTH 65536 +#define PDS_LM_CHECK_DEVICE_STATE_LENGTH(X) \ + PDS_CORE_SIZE_CHECK(union, PDS_LM_DEVICE_STATE_LENGTH, X) + +/* + * enum pds_lm_cmd_opcode - Live Migration Device commands + */ +enum pds_lm_cmd_opcode { + PDS_LM_CMD_HOST_VF_STATUS = 1, + + /* Device state commands */ + PDS_LM_CMD_STATE_SIZE = 16, + PDS_LM_CMD_SUSPEND = 18, + PDS_LM_CMD_SUSPEND_STATUS = 19, + PDS_LM_CMD_RESUME = 20, + PDS_LM_CMD_SAVE = 21, + PDS_LM_CMD_RESTORE = 22, +}; + +/** + * struct pds_lm_cmd - generic command + * @opcode: Opcode + * @rsvd: Word boundary padding + * @vf_id: VF id + * @rsvd2: Structure padding to 60 Bytes + */ +struct pds_lm_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; + u8 rsvd2[56]; +}; + +/** + * struct pds_lm_state_size_cmd - STATE_SIZE command + * @opcode: Opcode + * @rsvd: Word boundary padding + * @vf_id: VF id + */ +struct pds_lm_state_size_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; +}; + +/** + * struct pds_lm_state_size_comp - STATE_SIZE command completion + * @status: Status of the command (enum pds_core_status_code) + * @rsvd: Word boundary padding + * @comp_index: Index in the desc ring for which this is the completion + * @size: Size of the device state + * @rsvd2: Word boundary padding + * @color: Color bit + */ +struct pds_lm_state_size_comp { + u8 status; + u8 rsvd; + __le16 comp_index; + union { + __le64 size; + u8 rsvd2[11]; + } __packed; + u8 color; +}; + +enum pds_lm_suspend_resume_type { + PDS_LM_SUSPEND_RESUME_TYPE_FULL = 0, + PDS_LM_SUSPEND_RESUME_TYPE_P2P = 1, +}; + +/** + * struct pds_lm_suspend_cmd - SUSPEND command + * @opcode: Opcode PDS_LM_CMD_SUSPEND + * @rsvd: Word boundary padding + * @vf_id: VF id + * @type: Type of suspend (enum pds_lm_suspend_resume_type) + */ +struct pds_lm_suspend_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; + u8 type; +}; + +/** + * struct pds_lm_suspend_status_cmd - SUSPEND status command + * @opcode: Opcode PDS_AQ_CMD_LM_SUSPEND_STATUS + * @rsvd: Word boundary padding + * @vf_id: VF id + * @type: Type of suspend (enum pds_lm_suspend_resume_type) + */ +struct pds_lm_suspend_status_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; + u8 type; +}; + +/** + * struct pds_lm_resume_cmd - RESUME command + * @opcode: Opcode PDS_LM_CMD_RESUME + * @rsvd: Word boundary padding + * @vf_id: VF id + * @type: Type of resume (enum pds_lm_suspend_resume_type) + */ +struct pds_lm_resume_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; + u8 type; +}; + +/** + * struct pds_lm_sg_elem - Transmit scatter-gather (SG) descriptor element + * @addr: DMA address of SG element data buffer + * @len: Length of SG element data buffer, in bytes + * @rsvd: Word boundary padding + */ +struct pds_lm_sg_elem { + __le64 addr; + __le32 len; + __le16 rsvd[2]; +}; + +/** + * struct pds_lm_save_cmd - SAVE command + * @opcode: Opcode PDS_LM_CMD_SAVE + * @rsvd: Word boundary padding + * @vf_id: VF id + * @rsvd2: Word boundary padding + * @sgl_addr: IOVA address of the SGL to dma the device state + * @num_sge: Total number of SG elements + */ +struct pds_lm_save_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; + u8 rsvd2[4]; + __le64 sgl_addr; + __le32 num_sge; +} __packed; + +/** + * struct pds_lm_restore_cmd - RESTORE command + * @opcode: Opcode PDS_LM_CMD_RESTORE + * @rsvd: Word boundary padding + * @vf_id: VF id + * @rsvd2: Word boundary padding + * @sgl_addr: IOVA address of the SGL to dma the device state + * @num_sge: Total number of SG elements + */ +struct pds_lm_restore_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; + u8 rsvd2[4]; + __le64 sgl_addr; + __le32 num_sge; +} __packed; + +/** + * union pds_lm_dev_state - device state information + * @words: Device state words + */ +union pds_lm_dev_state { + __le32 words[PDS_LM_DEVICE_STATE_LENGTH / sizeof(__le32)]; +}; + +enum pds_lm_host_vf_status { + PDS_LM_STA_NONE = 0, + PDS_LM_STA_IN_PROGRESS, + PDS_LM_STA_MAX, +}; + +/** + * struct pds_lm_host_vf_status_cmd - HOST_VF_STATUS command + * @opcode: Opcode PDS_LM_CMD_HOST_VF_STATUS + * @rsvd: Word boundary padding + * @vf_id: VF id + * @status: Current LM status of host VF driver (enum pds_lm_host_status) + */ +struct pds_lm_host_vf_status_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; + u8 status; +}; + union pds_core_adminq_cmd { u8 opcode; u8 bytes[64]; @@ -844,6 +1032,13 @@ union pds_core_adminq_cmd { struct pds_vdpa_vq_init_cmd vdpa_vq_init; struct pds_vdpa_vq_reset_cmd vdpa_vq_reset; + struct pds_lm_suspend_cmd lm_suspend; + struct pds_lm_suspend_status_cmd lm_suspend_status; + struct pds_lm_resume_cmd lm_resume; + struct pds_lm_state_size_cmd lm_state_size; + struct pds_lm_save_cmd lm_save; + struct pds_lm_restore_cmd lm_restore; + struct pds_lm_host_vf_status_cmd lm_host_vf_status; }; union pds_core_adminq_comp { @@ -868,6 +1063,8 @@ union pds_core_adminq_comp { struct pds_vdpa_vq_init_comp vdpa_vq_init; struct pds_vdpa_vq_reset_comp vdpa_vq_reset; + + struct pds_lm_state_size_comp lm_state_size; }; #ifndef __CHECKER__ -- cgit v1.2.3 From f232836a9152c34ffd82bb5d5c242a1f6808be12 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 7 Aug 2023 13:57:53 -0700 Subject: vfio/pds: Add support for dirty page tracking In order to support dirty page tracking, the driver has to implement the VFIO subsystem's vfio_log_ops. This includes log_start, log_stop, and log_read_and_clear. All of the tracker resources are allocated and dirty tracking on the device is started during log_start. The resources are cleaned up and dirty tracking on the device is stopped during log_stop. The dirty pages are determined and reported during log_read_and_clear. In order to support these callbacks admin queue commands are used. All of the adminq queue command structures and implementations are included as part of this patch. PDS_LM_CMD_DIRTY_STATUS is added to query the current status of dirty tracking on the device. This includes if it's enabled (i.e. number of regions being tracked from the device's perspective) and the maximum number of regions supported from the device's perspective. PDS_LM_CMD_DIRTY_ENABLE is added to enable dirty tracking on the specified number of regions and their iova ranges. PDS_LM_CMD_DIRTY_DISABLE is added to disable dirty tracking for all regions on the device. PDS_LM_CMD_READ_SEQ and PDS_LM_CMD_DIRTY_WRITE_ACK are added to support reading and acknowledging the currently dirtied pages. Signed-off-by: Brett Creeley Signed-off-by: Shannon Nelson Reviewed-by: Simon Horman Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Shameer Kolothum Link: https://lore.kernel.org/r/20230807205755.29579-7-brett.creeley@amd.com Signed-off-by: Alex Williamson --- include/linux/pds/pds_adminq.h | 178 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 178 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pds/pds_adminq.h b/include/linux/pds/pds_adminq.h index 9c79b3c8fc47..4b4e9a98b37b 100644 --- a/include/linux/pds/pds_adminq.h +++ b/include/linux/pds/pds_adminq.h @@ -835,6 +835,13 @@ enum pds_lm_cmd_opcode { PDS_LM_CMD_RESUME = 20, PDS_LM_CMD_SAVE = 21, PDS_LM_CMD_RESTORE = 22, + + /* Dirty page tracking commands */ + PDS_LM_CMD_DIRTY_STATUS = 32, + PDS_LM_CMD_DIRTY_ENABLE = 33, + PDS_LM_CMD_DIRTY_DISABLE = 34, + PDS_LM_CMD_DIRTY_READ_SEQ = 35, + PDS_LM_CMD_DIRTY_WRITE_ACK = 36, }; /** @@ -992,6 +999,172 @@ enum pds_lm_host_vf_status { PDS_LM_STA_MAX, }; +/** + * struct pds_lm_dirty_region_info - Memory region info for STATUS and ENABLE + * @dma_base: Base address of the DMA-contiguous memory region + * @page_count: Number of pages in the memory region + * @page_size_log2: Log2 page size in the memory region + * @rsvd: Word boundary padding + */ +struct pds_lm_dirty_region_info { + __le64 dma_base; + __le32 page_count; + u8 page_size_log2; + u8 rsvd[3]; +}; + +/** + * struct pds_lm_dirty_status_cmd - DIRTY_STATUS command + * @opcode: Opcode PDS_LM_CMD_DIRTY_STATUS + * @rsvd: Word boundary padding + * @vf_id: VF id + * @max_regions: Capacity of the region info buffer + * @rsvd2: Word boundary padding + * @regions_dma: DMA address of the region info buffer + * + * The minimum of max_regions (from the command) and num_regions (from the + * completion) of struct pds_lm_dirty_region_info will be written to + * regions_dma. + * + * The max_regions may be zero, in which case regions_dma is ignored. In that + * case, the completion will only report the maximum number of regions + * supported by the device, and the number of regions currently enabled. + */ +struct pds_lm_dirty_status_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; + u8 max_regions; + u8 rsvd2[3]; + __le64 regions_dma; +} __packed; + +/** + * enum pds_lm_dirty_bmp_type - Type of dirty page bitmap + * @PDS_LM_DIRTY_BMP_TYPE_NONE: No bitmap / disabled + * @PDS_LM_DIRTY_BMP_TYPE_SEQ_ACK: Seq/Ack bitmap representation + */ +enum pds_lm_dirty_bmp_type { + PDS_LM_DIRTY_BMP_TYPE_NONE = 0, + PDS_LM_DIRTY_BMP_TYPE_SEQ_ACK = 1, +}; + +/** + * struct pds_lm_dirty_status_comp - STATUS command completion + * @status: Status of the command (enum pds_core_status_code) + * @rsvd: Word boundary padding + * @comp_index: Index in the desc ring for which this is the completion + * @max_regions: Maximum number of regions supported by the device + * @num_regions: Number of regions currently enabled + * @bmp_type: Type of dirty bitmap representation + * @rsvd2: Word boundary padding + * @bmp_type_mask: Mask of supported bitmap types, bit index per type + * @rsvd3: Word boundary padding + * @color: Color bit + * + * This completion descriptor is used for STATUS, ENABLE, and DISABLE. + */ +struct pds_lm_dirty_status_comp { + u8 status; + u8 rsvd; + __le16 comp_index; + u8 max_regions; + u8 num_regions; + u8 bmp_type; + u8 rsvd2; + __le32 bmp_type_mask; + u8 rsvd3[3]; + u8 color; +}; + +/** + * struct pds_lm_dirty_enable_cmd - DIRTY_ENABLE command + * @opcode: Opcode PDS_LM_CMD_DIRTY_ENABLE + * @rsvd: Word boundary padding + * @vf_id: VF id + * @bmp_type: Type of dirty bitmap representation + * @num_regions: Number of entries in the region info buffer + * @rsvd2: Word boundary padding + * @regions_dma: DMA address of the region info buffer + * + * The num_regions must be nonzero, and less than or equal to the maximum + * number of regions supported by the device. + * + * The memory regions should not overlap. + * + * The information should be initialized by the driver. The device may modify + * the information on successful completion, such as by size-aligning the + * number of pages in a region. + * + * The modified number of pages will be greater than or equal to the page count + * given in the enable command, and at least as coarsly aligned as the given + * value. For example, the count might be aligned to a multiple of 64, but + * if the value is already a multiple of 128 or higher, it will not change. + * If the driver requires its own minimum alignment of the number of pages, the + * driver should account for that already in the region info of this command. + * + * This command uses struct pds_lm_dirty_status_comp for its completion. + */ +struct pds_lm_dirty_enable_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; + u8 bmp_type; + u8 num_regions; + u8 rsvd2[2]; + __le64 regions_dma; +} __packed; + +/** + * struct pds_lm_dirty_disable_cmd - DIRTY_DISABLE command + * @opcode: Opcode PDS_LM_CMD_DIRTY_DISABLE + * @rsvd: Word boundary padding + * @vf_id: VF id + * + * Dirty page tracking will be disabled. This may be called in any state, as + * long as dirty page tracking is supported by the device, to ensure that dirty + * page tracking is disabled. + * + * This command uses struct pds_lm_dirty_status_comp for its completion. On + * success, num_regions will be zero. + */ +struct pds_lm_dirty_disable_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; +}; + +/** + * struct pds_lm_dirty_seq_ack_cmd - DIRTY_READ_SEQ or _WRITE_ACK command + * @opcode: Opcode PDS_LM_CMD_DIRTY_[READ_SEQ|WRITE_ACK] + * @rsvd: Word boundary padding + * @vf_id: VF id + * @off_bytes: Byte offset in the bitmap + * @len_bytes: Number of bytes to transfer + * @num_sge: Number of DMA scatter gather elements + * @rsvd2: Word boundary padding + * @sgl_addr: DMA address of scatter gather list + * + * Read bytes from the SEQ bitmap, or write bytes into the ACK bitmap. + * + * This command treats the entire bitmap as a byte buffer. It does not + * distinguish between guest memory regions. The driver should refer to the + * number of pages in each region, according to PDS_LM_CMD_DIRTY_STATUS, to + * determine the region boundaries in the bitmap. Each region will be + * represented by exactly the number of bits as the page count for that region, + * immediately following the last bit of the previous region. + */ +struct pds_lm_dirty_seq_ack_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; + __le32 off_bytes; + __le32 len_bytes; + __le16 num_sge; + u8 rsvd2[2]; + __le64 sgl_addr; +} __packed; + /** * struct pds_lm_host_vf_status_cmd - HOST_VF_STATUS command * @opcode: Opcode PDS_LM_CMD_HOST_VF_STATUS @@ -1039,6 +1212,10 @@ union pds_core_adminq_cmd { struct pds_lm_save_cmd lm_save; struct pds_lm_restore_cmd lm_restore; struct pds_lm_host_vf_status_cmd lm_host_vf_status; + struct pds_lm_dirty_status_cmd lm_dirty_status; + struct pds_lm_dirty_enable_cmd lm_dirty_enable; + struct pds_lm_dirty_disable_cmd lm_dirty_disable; + struct pds_lm_dirty_seq_ack_cmd lm_dirty_seq_ack; }; union pds_core_adminq_comp { @@ -1065,6 +1242,7 @@ union pds_core_adminq_comp { struct pds_vdpa_vq_reset_comp vdpa_vq_reset; struct pds_lm_state_size_comp lm_state_size; + struct pds_lm_dirty_status_comp lm_dirty_status; }; #ifndef __CHECKER__ -- cgit v1.2.3