From 68cbbc3a9d1fc231810b2490bca73b3b444ef542 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 26 Mar 2015 16:42:09 +1100 Subject: drivers/vfio: Support EEH error injection The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR) to inject the specified EEH error, which is represented by (struct vfio_eeh_pe_err), to the indicated PE for testing purpose. Signed-off-by: Gavin Shan Reviewed-by: David Gibson Acked-by: Alex Williamson Signed-off-by: Michael Ellerman --- include/uapi/linux/vfio.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index b57b750c222f..e4fa1995f613 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -470,12 +470,23 @@ struct vfio_iommu_spapr_tce_info { * - unfreeze IO/DMA for frozen PE; * - read PE state; * - reset PE; - * - configure PE. + * - configure PE; + * - inject EEH error. */ +struct vfio_eeh_pe_err { + __u32 type; + __u32 func; + __u64 addr; + __u64 mask; +}; + struct vfio_eeh_pe_op { __u32 argsz; __u32 flags; __u32 op; + union { + struct vfio_eeh_pe_err err; + }; }; #define VFIO_EEH_PE_DISABLE 0 /* Disable EEH functionality */ @@ -492,6 +503,7 @@ struct vfio_eeh_pe_op { #define VFIO_EEH_PE_RESET_HOT 6 /* Assert hot reset */ #define VFIO_EEH_PE_RESET_FUNDAMENTAL 7 /* Assert fundamental reset */ #define VFIO_EEH_PE_CONFIGURE 8 /* PE configuration */ +#define VFIO_EEH_PE_INJECT_ERR 9 /* Inject EEH error */ #define VFIO_EEH_PE_OP _IO(VFIO_TYPE, VFIO_BASE + 21) -- cgit v1.2.3 From 27d4dc7116eed98775902627ba61b70e9045e321 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Wed, 29 Apr 2015 15:47:23 +0530 Subject: cxl: Implement an ioctl to fetch afu card-id, offset-id and mode Given a file descriptor on an afu device, libcxl currently uses the major/minor number obtained from fstat on the fd to construct path to the afu's sysfs directory. However it is possible that rather than using one of the device in /dev/cxl, a kernel driver creates its own device which export generic cxl interface to the userspace. This causes problems with libcxl as it tries to use a wrong major/minor number to construct the sysfs path and fail. So this patch introduces a new ioctl called CXL_IOCTL_GET_AFU_ID on the afu file descriptor to fetch the cxl_afu_id struct that holds the card/offset-id and mode information. These info is then used by libcxl to construct the correct path to the afu sysfs directory. Testing: - Build against pseries be/le configs - Testing with corresponding libcxl changes to verify that it constructs right sysfs path to the afu. Signed-off-by: Vaibhav Jain Acked-by: Ian Munsie Signed-off-by: Michael Ellerman --- include/uapi/misc/cxl.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/misc/cxl.h b/include/uapi/misc/cxl.h index cd6d789b73ec..99a8ca15fe64 100644 --- a/include/uapi/misc/cxl.h +++ b/include/uapi/misc/cxl.h @@ -32,10 +32,32 @@ struct cxl_ioctl_start_work { #define CXL_START_WORK_ALL (CXL_START_WORK_AMR |\ CXL_START_WORK_NUM_IRQS) + +/* Possible modes that an afu can be in */ +#define CXL_MODE_DEDICATED 0x1 +#define CXL_MODE_DIRECTED 0x2 + +/* possible flags for the cxl_afu_id flags field */ +#define CXL_AFUID_FLAG_SLAVE 0x1 /* In directed-mode afu is in slave mode */ + +struct cxl_afu_id { + __u64 flags; /* One of CXL_AFUID_FLAG_X */ + __u32 card_id; + __u32 afu_offset; + __u32 afu_mode; /* one of the CXL_MODE_X */ + __u32 reserved1; + __u64 reserved2; + __u64 reserved3; + __u64 reserved4; + __u64 reserved5; + __u64 reserved6; +}; + /* ioctl numbers */ #define CXL_MAGIC 0xCA #define CXL_IOCTL_START_WORK _IOW(CXL_MAGIC, 0x00, struct cxl_ioctl_start_work) #define CXL_IOCTL_GET_PROCESS_ELEMENT _IOR(CXL_MAGIC, 0x01, __u32) +#define CXL_IOCTL_GET_AFU_ID _IOR(CXL_MAGIC, 0x02, struct cxl_afu_id) #define CXL_READ_MIN_SIZE 0x1000 /* 4K */ -- cgit v1.2.3 From 2157e7b82f3b81f57bd80cd67cef09ef26e5f74c Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:25 +1000 Subject: vfio: powerpc/spapr: Register memory and define IOMMU v2 The existing implementation accounts the whole DMA window in the locked_vm counter. This is going to be worse with multiple containers and huge DMA windows. Also, real-time accounting would requite additional tracking of accounted pages due to the page size difference - IOMMU uses 4K pages and system uses 4K or 64K pages. Another issue is that actual pages pinning/unpinning happens on every DMA map/unmap request. This does not affect the performance much now as we spend way too much time now on switching context between guest/userspace/host but this will start to matter when we add in-kernel DMA map/unmap acceleration. This introduces a new IOMMU type for SPAPR - VFIO_SPAPR_TCE_v2_IOMMU. New IOMMU deprecates VFIO_IOMMU_ENABLE/VFIO_IOMMU_DISABLE and introduces 2 new ioctls to register/unregister DMA memory - VFIO_IOMMU_SPAPR_REGISTER_MEMORY and VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY - which receive user space address and size of a memory region which needs to be pinned/unpinned and counted in locked_vm. New IOMMU splits physical pages pinning and TCE table update into 2 different operations. It requires: 1) guest pages to be registered first 2) consequent map/unmap requests to work only with pre-registered memory. For the default single window case this means that the entire guest (instead of 2GB) needs to be pinned before using VFIO. When a huge DMA window is added, no additional pinning will be required, otherwise it would be guest RAM + 2GB. The new memory registration ioctls are not supported by VFIO_SPAPR_TCE_IOMMU. Dynamic DMA window and in-kernel acceleration will require memory to be preregistered in order to work. The accounting is done per the user process. This advertises v2 SPAPR TCE IOMMU and restricts what the userspace can do with v1 or v2 IOMMUs. In order to support memory pre-registration, we need a way to track the use of every registered memory region and only allow unregistration if a region is not in use anymore. So we need a way to tell from what region the just cleared TCE was from. This adds a userspace view of the TCE table into iommu_table struct. It contains userspace address, one per TCE entry. The table is only allocated when the ownership over an IOMMU group is taken which means it is only used from outside of the powernv code (such as VFIO). As v2 IOMMU supports IODA2 and pre-IODA2 IOMMUs (which do not support DDW API), this creates a default DMA window for IODA2 for consistency. Signed-off-by: Alexey Kardashevskiy [aw: for the vfio related changes] Acked-by: Alex Williamson Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- include/uapi/linux/vfio.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index e4fa1995f613..fa84391a0d00 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -36,6 +36,8 @@ /* Two-stage IOMMU */ #define VFIO_TYPE1_NESTING_IOMMU 6 /* Implies v2 */ +#define VFIO_SPAPR_TCE_v2_IOMMU 7 + /* * The IOCTL interface is designed for extensibility by embedding the * structure length (argsz) and flags into structures passed between @@ -507,6 +509,31 @@ struct vfio_eeh_pe_op { #define VFIO_EEH_PE_OP _IO(VFIO_TYPE, VFIO_BASE + 21) +/** + * VFIO_IOMMU_SPAPR_REGISTER_MEMORY - _IOW(VFIO_TYPE, VFIO_BASE + 17, struct vfio_iommu_spapr_register_memory) + * + * Registers user space memory where DMA is allowed. It pins + * user pages and does the locked memory accounting so + * subsequent VFIO_IOMMU_MAP_DMA/VFIO_IOMMU_UNMAP_DMA calls + * get faster. + */ +struct vfio_iommu_spapr_register_memory { + __u32 argsz; + __u32 flags; + __u64 vaddr; /* Process virtual address */ + __u64 size; /* Size of mapping (bytes) */ +}; +#define VFIO_IOMMU_SPAPR_REGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 17) + +/** + * VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY - _IOW(VFIO_TYPE, VFIO_BASE + 18, struct vfio_iommu_spapr_register_memory) + * + * Unregisters user space memory registered with + * VFIO_IOMMU_SPAPR_REGISTER_MEMORY. + * Uses vfio_iommu_spapr_register_memory for parameters. + */ +#define VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 18) + /* ***************************************************************** */ #endif /* _UAPIVFIO_H */ -- cgit v1.2.3 From e633bc86a922468a82300eef5b9802e17be5e23d Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:26 +1000 Subject: vfio: powerpc/spapr: Support Dynamic DMA windows This adds create/remove window ioctls to create and remove DMA windows. sPAPR defines a Dynamic DMA windows capability which allows para-virtualized guests to create additional DMA windows on a PCI bus. The existing linux kernels use this new window to map the entire guest memory and switch to the direct DMA operations saving time on map/unmap requests which would normally happen in a big amounts. This adds 2 ioctl handlers - VFIO_IOMMU_SPAPR_TCE_CREATE and VFIO_IOMMU_SPAPR_TCE_REMOVE - to create and remove windows. Up to 2 windows are supported now by the hardware and by this driver. This changes VFIO_IOMMU_SPAPR_TCE_GET_INFO handler to return additional information such as a number of supported windows and maximum number levels of TCE tables. DDW is added as a capability, not as a SPAPR TCE IOMMU v2 unique feature as we still want to support v2 on platforms which cannot do DDW for the sake of TCE acceleration in KVM (coming soon). Signed-off-by: Alexey Kardashevskiy [aw: for the vfio related changes] Acked-by: Alex Williamson Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- include/uapi/linux/vfio.h | 61 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 58 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index fa84391a0d00..9fd7b5d8df2f 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -444,6 +444,23 @@ struct vfio_iommu_type1_dma_unmap { /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */ +/* + * The SPAPR TCE DDW info struct provides the information about + * the details of Dynamic DMA window capability. + * + * @pgsizes contains a page size bitmask, 4K/64K/16M are supported. + * @max_dynamic_windows_supported tells the maximum number of windows + * which the platform can create. + * @levels tells the maximum number of levels in multi-level IOMMU tables; + * this allows splitting a table into smaller chunks which reduces + * the amount of physically contiguous memory required for the table. + */ +struct vfio_iommu_spapr_tce_ddw_info { + __u64 pgsizes; /* Bitmap of supported page sizes */ + __u32 max_dynamic_windows_supported; + __u32 levels; +}; + /* * The SPAPR TCE info struct provides the information about the PCI bus * address ranges available for DMA, these values are programmed into @@ -454,14 +471,17 @@ struct vfio_iommu_type1_dma_unmap { * addresses too so the window works as a filter rather than an offset * for IOVA addresses. * - * A flag will need to be added if other page sizes are supported, - * so as defined here, it is always 4k. + * Flags supported: + * - VFIO_IOMMU_SPAPR_INFO_DDW: informs the userspace that dynamic DMA windows + * (DDW) support is present. @ddw is only supported when DDW is present. */ struct vfio_iommu_spapr_tce_info { __u32 argsz; - __u32 flags; /* reserved for future use */ + __u32 flags; +#define VFIO_IOMMU_SPAPR_INFO_DDW (1 << 0) /* DDW supported */ __u32 dma32_window_start; /* 32 bit window start (bytes) */ __u32 dma32_window_size; /* 32 bit window size (bytes) */ + struct vfio_iommu_spapr_tce_ddw_info ddw; }; #define VFIO_IOMMU_SPAPR_TCE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) @@ -534,6 +554,41 @@ struct vfio_iommu_spapr_register_memory { */ #define VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 18) +/** + * VFIO_IOMMU_SPAPR_TCE_CREATE - _IOWR(VFIO_TYPE, VFIO_BASE + 19, struct vfio_iommu_spapr_tce_create) + * + * Creates an additional TCE table and programs it (sets a new DMA window) + * to every IOMMU group in the container. It receives page shift, window + * size and number of levels in the TCE table being created. + * + * It allocates and returns an offset on a PCI bus of the new DMA window. + */ +struct vfio_iommu_spapr_tce_create { + __u32 argsz; + __u32 flags; + /* in */ + __u32 page_shift; + __u64 window_size; + __u32 levels; + /* out */ + __u64 start_addr; +}; +#define VFIO_IOMMU_SPAPR_TCE_CREATE _IO(VFIO_TYPE, VFIO_BASE + 19) + +/** + * VFIO_IOMMU_SPAPR_TCE_REMOVE - _IOW(VFIO_TYPE, VFIO_BASE + 20, struct vfio_iommu_spapr_tce_remove) + * + * Unprograms a TCE table from all groups in the container and destroys it. + * It receives a PCI bus offset as a window id. + */ +struct vfio_iommu_spapr_tce_remove { + __u32 argsz; + __u32 flags; + /* in */ + __u64 start_addr; +}; +#define VFIO_IOMMU_SPAPR_TCE_REMOVE _IO(VFIO_TYPE, VFIO_BASE + 20) + /* ***************************************************************** */ #endif /* _UAPIVFIO_H */ -- cgit v1.2.3