From 68cbbc3a9d1fc231810b2490bca73b3b444ef542 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 26 Mar 2015 16:42:09 +1100 Subject: drivers/vfio: Support EEH error injection The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR) to inject the specified EEH error, which is represented by (struct vfio_eeh_pe_err), to the indicated PE for testing purpose. Signed-off-by: Gavin Shan Reviewed-by: David Gibson Acked-by: Alex Williamson Signed-off-by: Michael Ellerman --- include/uapi/linux/vfio.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index b57b750c222f..e4fa1995f613 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -470,12 +470,23 @@ struct vfio_iommu_spapr_tce_info { * - unfreeze IO/DMA for frozen PE; * - read PE state; * - reset PE; - * - configure PE. + * - configure PE; + * - inject EEH error. */ +struct vfio_eeh_pe_err { + __u32 type; + __u32 func; + __u64 addr; + __u64 mask; +}; + struct vfio_eeh_pe_op { __u32 argsz; __u32 flags; __u32 op; + union { + struct vfio_eeh_pe_err err; + }; }; #define VFIO_EEH_PE_DISABLE 0 /* Disable EEH functionality */ @@ -492,6 +503,7 @@ struct vfio_eeh_pe_op { #define VFIO_EEH_PE_RESET_HOT 6 /* Assert hot reset */ #define VFIO_EEH_PE_RESET_FUNDAMENTAL 7 /* Assert fundamental reset */ #define VFIO_EEH_PE_CONFIGURE 8 /* PE configuration */ +#define VFIO_EEH_PE_INJECT_ERR 9 /* Inject EEH error */ #define VFIO_EEH_PE_OP _IO(VFIO_TYPE, VFIO_BASE + 21) -- cgit v1.2.3 From 2157e7b82f3b81f57bd80cd67cef09ef26e5f74c Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:25 +1000 Subject: vfio: powerpc/spapr: Register memory and define IOMMU v2 The existing implementation accounts the whole DMA window in the locked_vm counter. This is going to be worse with multiple containers and huge DMA windows. Also, real-time accounting would requite additional tracking of accounted pages due to the page size difference - IOMMU uses 4K pages and system uses 4K or 64K pages. Another issue is that actual pages pinning/unpinning happens on every DMA map/unmap request. This does not affect the performance much now as we spend way too much time now on switching context between guest/userspace/host but this will start to matter when we add in-kernel DMA map/unmap acceleration. This introduces a new IOMMU type for SPAPR - VFIO_SPAPR_TCE_v2_IOMMU. New IOMMU deprecates VFIO_IOMMU_ENABLE/VFIO_IOMMU_DISABLE and introduces 2 new ioctls to register/unregister DMA memory - VFIO_IOMMU_SPAPR_REGISTER_MEMORY and VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY - which receive user space address and size of a memory region which needs to be pinned/unpinned and counted in locked_vm. New IOMMU splits physical pages pinning and TCE table update into 2 different operations. It requires: 1) guest pages to be registered first 2) consequent map/unmap requests to work only with pre-registered memory. For the default single window case this means that the entire guest (instead of 2GB) needs to be pinned before using VFIO. When a huge DMA window is added, no additional pinning will be required, otherwise it would be guest RAM + 2GB. The new memory registration ioctls are not supported by VFIO_SPAPR_TCE_IOMMU. Dynamic DMA window and in-kernel acceleration will require memory to be preregistered in order to work. The accounting is done per the user process. This advertises v2 SPAPR TCE IOMMU and restricts what the userspace can do with v1 or v2 IOMMUs. In order to support memory pre-registration, we need a way to track the use of every registered memory region and only allow unregistration if a region is not in use anymore. So we need a way to tell from what region the just cleared TCE was from. This adds a userspace view of the TCE table into iommu_table struct. It contains userspace address, one per TCE entry. The table is only allocated when the ownership over an IOMMU group is taken which means it is only used from outside of the powernv code (such as VFIO). As v2 IOMMU supports IODA2 and pre-IODA2 IOMMUs (which do not support DDW API), this creates a default DMA window for IODA2 for consistency. Signed-off-by: Alexey Kardashevskiy [aw: for the vfio related changes] Acked-by: Alex Williamson Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- include/uapi/linux/vfio.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index e4fa1995f613..fa84391a0d00 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -36,6 +36,8 @@ /* Two-stage IOMMU */ #define VFIO_TYPE1_NESTING_IOMMU 6 /* Implies v2 */ +#define VFIO_SPAPR_TCE_v2_IOMMU 7 + /* * The IOCTL interface is designed for extensibility by embedding the * structure length (argsz) and flags into structures passed between @@ -507,6 +509,31 @@ struct vfio_eeh_pe_op { #define VFIO_EEH_PE_OP _IO(VFIO_TYPE, VFIO_BASE + 21) +/** + * VFIO_IOMMU_SPAPR_REGISTER_MEMORY - _IOW(VFIO_TYPE, VFIO_BASE + 17, struct vfio_iommu_spapr_register_memory) + * + * Registers user space memory where DMA is allowed. It pins + * user pages and does the locked memory accounting so + * subsequent VFIO_IOMMU_MAP_DMA/VFIO_IOMMU_UNMAP_DMA calls + * get faster. + */ +struct vfio_iommu_spapr_register_memory { + __u32 argsz; + __u32 flags; + __u64 vaddr; /* Process virtual address */ + __u64 size; /* Size of mapping (bytes) */ +}; +#define VFIO_IOMMU_SPAPR_REGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 17) + +/** + * VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY - _IOW(VFIO_TYPE, VFIO_BASE + 18, struct vfio_iommu_spapr_register_memory) + * + * Unregisters user space memory registered with + * VFIO_IOMMU_SPAPR_REGISTER_MEMORY. + * Uses vfio_iommu_spapr_register_memory for parameters. + */ +#define VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 18) + /* ***************************************************************** */ #endif /* _UAPIVFIO_H */ -- cgit v1.2.3 From e633bc86a922468a82300eef5b9802e17be5e23d Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:26 +1000 Subject: vfio: powerpc/spapr: Support Dynamic DMA windows This adds create/remove window ioctls to create and remove DMA windows. sPAPR defines a Dynamic DMA windows capability which allows para-virtualized guests to create additional DMA windows on a PCI bus. The existing linux kernels use this new window to map the entire guest memory and switch to the direct DMA operations saving time on map/unmap requests which would normally happen in a big amounts. This adds 2 ioctl handlers - VFIO_IOMMU_SPAPR_TCE_CREATE and VFIO_IOMMU_SPAPR_TCE_REMOVE - to create and remove windows. Up to 2 windows are supported now by the hardware and by this driver. This changes VFIO_IOMMU_SPAPR_TCE_GET_INFO handler to return additional information such as a number of supported windows and maximum number levels of TCE tables. DDW is added as a capability, not as a SPAPR TCE IOMMU v2 unique feature as we still want to support v2 on platforms which cannot do DDW for the sake of TCE acceleration in KVM (coming soon). Signed-off-by: Alexey Kardashevskiy [aw: for the vfio related changes] Acked-by: Alex Williamson Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- include/uapi/linux/vfio.h | 61 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 58 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index fa84391a0d00..9fd7b5d8df2f 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -444,6 +444,23 @@ struct vfio_iommu_type1_dma_unmap { /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */ +/* + * The SPAPR TCE DDW info struct provides the information about + * the details of Dynamic DMA window capability. + * + * @pgsizes contains a page size bitmask, 4K/64K/16M are supported. + * @max_dynamic_windows_supported tells the maximum number of windows + * which the platform can create. + * @levels tells the maximum number of levels in multi-level IOMMU tables; + * this allows splitting a table into smaller chunks which reduces + * the amount of physically contiguous memory required for the table. + */ +struct vfio_iommu_spapr_tce_ddw_info { + __u64 pgsizes; /* Bitmap of supported page sizes */ + __u32 max_dynamic_windows_supported; + __u32 levels; +}; + /* * The SPAPR TCE info struct provides the information about the PCI bus * address ranges available for DMA, these values are programmed into @@ -454,14 +471,17 @@ struct vfio_iommu_type1_dma_unmap { * addresses too so the window works as a filter rather than an offset * for IOVA addresses. * - * A flag will need to be added if other page sizes are supported, - * so as defined here, it is always 4k. + * Flags supported: + * - VFIO_IOMMU_SPAPR_INFO_DDW: informs the userspace that dynamic DMA windows + * (DDW) support is present. @ddw is only supported when DDW is present. */ struct vfio_iommu_spapr_tce_info { __u32 argsz; - __u32 flags; /* reserved for future use */ + __u32 flags; +#define VFIO_IOMMU_SPAPR_INFO_DDW (1 << 0) /* DDW supported */ __u32 dma32_window_start; /* 32 bit window start (bytes) */ __u32 dma32_window_size; /* 32 bit window size (bytes) */ + struct vfio_iommu_spapr_tce_ddw_info ddw; }; #define VFIO_IOMMU_SPAPR_TCE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) @@ -534,6 +554,41 @@ struct vfio_iommu_spapr_register_memory { */ #define VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 18) +/** + * VFIO_IOMMU_SPAPR_TCE_CREATE - _IOWR(VFIO_TYPE, VFIO_BASE + 19, struct vfio_iommu_spapr_tce_create) + * + * Creates an additional TCE table and programs it (sets a new DMA window) + * to every IOMMU group in the container. It receives page shift, window + * size and number of levels in the TCE table being created. + * + * It allocates and returns an offset on a PCI bus of the new DMA window. + */ +struct vfio_iommu_spapr_tce_create { + __u32 argsz; + __u32 flags; + /* in */ + __u32 page_shift; + __u64 window_size; + __u32 levels; + /* out */ + __u64 start_addr; +}; +#define VFIO_IOMMU_SPAPR_TCE_CREATE _IO(VFIO_TYPE, VFIO_BASE + 19) + +/** + * VFIO_IOMMU_SPAPR_TCE_REMOVE - _IOW(VFIO_TYPE, VFIO_BASE + 20, struct vfio_iommu_spapr_tce_remove) + * + * Unprograms a TCE table from all groups in the container and destroys it. + * It receives a PCI bus offset as a window id. + */ +struct vfio_iommu_spapr_tce_remove { + __u32 argsz; + __u32 flags; + /* in */ + __u64 start_addr; +}; +#define VFIO_IOMMU_SPAPR_TCE_REMOVE _IO(VFIO_TYPE, VFIO_BASE + 20) + /* ***************************************************************** */ #endif /* _UAPIVFIO_H */ -- cgit v1.2.3