diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-10-11 06:49:24 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-10-11 06:49:24 -0400 |
commit | 27a9716bc80448f7e98fb1fb316daba241a4c539 (patch) | |
tree | d9a82dc1e1eddd1ff2a27d558149ad8323975134 | |
parent | e98d6e7f7625ed60c7bc1d39aeb2375ed3918fd5 (diff) | |
parent | 93899a679fd6b2534b5c297d9316bae039ebcbe1 (diff) |
Merge tag 'vfio-v3.18-rc1' of git://github.com/awilliam/linux-vfio
Pull VFIO updates from Alex Williamson:
- Nested IOMMU extension to type1 (Will Deacon)
- Restore MSIx message before enabling (Gavin Shan)
- Fix remove path locking (Alex Williamson)
* tag 'vfio-v3.18-rc1' of git://github.com/awilliam/linux-vfio:
vfio-pci: Fix remove path locking
drivers/vfio: Export vfio_spapr_iommu_eeh_ioctl() with GPL
vfio/pci: Restore MSIx message prior to enabling
PCI: Export MSI message relevant functions
vfio/iommu_type1: add new VFIO_TYPE1_NESTING_IOMMU IOMMU type
iommu: introduce domain attribute for nesting IOMMUs
-rw-r--r-- | drivers/pci/msi.c | 2 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci.c | 136 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_intrs.c | 15 | ||||
-rw-r--r-- | drivers/vfio/vfio_iommu_type1.c | 30 | ||||
-rw-r--r-- | drivers/vfio/vfio_spapr_eeh.c | 2 | ||||
-rw-r--r-- | include/linux/iommu.h | 1 | ||||
-rw-r--r-- | include/uapi/linux/vfio.h | 3 |
7 files changed, 104 insertions, 85 deletions
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 2f7c92c4757a..9fab30af0e75 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -302,6 +302,7 @@ void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg) __get_cached_msi_msg(entry, msg); } +EXPORT_SYMBOL_GPL(get_cached_msi_msg); void __write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) { @@ -346,6 +347,7 @@ void write_msi_msg(unsigned int irq, struct msi_msg *msg) __write_msi_msg(entry, msg); } +EXPORT_SYMBOL_GPL(write_msi_msg); static void free_msi_irqs(struct pci_dev *dev) { diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index f7825332a325..9558da3f06a0 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -876,15 +876,11 @@ static void vfio_pci_remove(struct pci_dev *pdev) { struct vfio_pci_device *vdev; - mutex_lock(&driver_lock); - vdev = vfio_del_group_dev(&pdev->dev); if (vdev) { iommu_group_put(pdev->dev.iommu_group); kfree(vdev); } - - mutex_unlock(&driver_lock); } static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev, @@ -927,108 +923,90 @@ static struct pci_driver vfio_pci_driver = { .err_handler = &vfio_err_handlers, }; -/* - * Test whether a reset is necessary and possible. We mark devices as - * needs_reset when they are released, but don't have a function-local reset - * available. If any of these exist in the affected devices, we want to do - * a bus/slot reset. We also need all of the affected devices to be unused, - * so we abort if any device has a non-zero refcnt. driver_lock prevents a - * device from being opened during the scan or unbound from vfio-pci. - */ -static int vfio_pci_test_bus_reset(struct pci_dev *pdev, void *data) -{ - bool *needs_reset = data; - struct pci_driver *pci_drv = ACCESS_ONCE(pdev->driver); - int ret = -EBUSY; - - if (pci_drv == &vfio_pci_driver) { - struct vfio_device *device; - struct vfio_pci_device *vdev; - - device = vfio_device_get_from_dev(&pdev->dev); - if (!device) - return ret; - - vdev = vfio_device_data(device); - if (vdev) { - if (vdev->needs_reset) - *needs_reset = true; - - if (!vdev->refcnt) - ret = 0; - } - - vfio_device_put(device); - } - - /* - * TODO: vfio-core considers groups to be viable even if some devices - * are attached to known drivers, like pci-stub or pcieport. We can't - * freeze devices from being unbound to those drivers like we can - * here though, so it would be racy to test for them. We also can't - * use device_lock() to prevent changes as that would interfere with - * PCI-core taking device_lock during bus reset. For now, we require - * devices to be bound to vfio-pci to get a bus/slot reset on release. - */ - - return ret; -} +struct vfio_devices { + struct vfio_device **devices; + int cur_index; + int max_index; +}; -/* Clear needs_reset on all affected devices after successful bus/slot reset */ -static int vfio_pci_clear_needs_reset(struct pci_dev *pdev, void *data) +static int vfio_pci_get_devs(struct pci_dev *pdev, void *data) { + struct vfio_devices *devs = data; struct pci_driver *pci_drv = ACCESS_ONCE(pdev->driver); - if (pci_drv == &vfio_pci_driver) { - struct vfio_device *device; - struct vfio_pci_device *vdev; + if (pci_drv != &vfio_pci_driver) + return -EBUSY; - device = vfio_device_get_from_dev(&pdev->dev); - if (!device) - return 0; + if (devs->cur_index == devs->max_index) + return -ENOSPC; - vdev = vfio_device_data(device); - if (vdev) - vdev->needs_reset = false; - - vfio_device_put(device); - } + devs->devices[devs->cur_index] = vfio_device_get_from_dev(&pdev->dev); + if (!devs->devices[devs->cur_index]) + return -EINVAL; + devs->cur_index++; return 0; } /* * Attempt to do a bus/slot reset if there are devices affected by a reset for * this device that are needs_reset and all of the affected devices are unused - * (!refcnt). Callers of this function are required to hold driver_lock such - * that devices can not be unbound from vfio-pci or opened by a user while we - * test for and perform a bus/slot reset. + * (!refcnt). Callers are required to hold driver_lock when calling this to + * prevent device opens and concurrent bus reset attempts. We prevent device + * unbinds by acquiring and holding a reference to the vfio_device. + * + * NB: vfio-core considers a group to be viable even if some devices are + * bound to drivers like pci-stub or pcieport. Here we require all devices + * to be bound to vfio_pci since that's the only way we can be sure they + * stay put. */ static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev) { + struct vfio_devices devs = { .cur_index = 0 }; + int i = 0, ret = -EINVAL; bool needs_reset = false, slot = false; - int ret; + struct vfio_pci_device *tmp; if (!pci_probe_reset_slot(vdev->pdev->slot)) slot = true; else if (pci_probe_reset_bus(vdev->pdev->bus)) return; - if (vfio_pci_for_each_slot_or_bus(vdev->pdev, - vfio_pci_test_bus_reset, - &needs_reset, slot) || !needs_reset) + if (vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_count_devs, + &i, slot) || !i) return; - if (slot) - ret = pci_try_reset_slot(vdev->pdev->slot); - else - ret = pci_try_reset_bus(vdev->pdev->bus); - - if (ret) + devs.max_index = i; + devs.devices = kcalloc(i, sizeof(struct vfio_device *), GFP_KERNEL); + if (!devs.devices) return; - vfio_pci_for_each_slot_or_bus(vdev->pdev, - vfio_pci_clear_needs_reset, NULL, slot); + if (vfio_pci_for_each_slot_or_bus(vdev->pdev, + vfio_pci_get_devs, &devs, slot)) + goto put_devs; + + for (i = 0; i < devs.cur_index; i++) { + tmp = vfio_device_data(devs.devices[i]); + if (tmp->needs_reset) + needs_reset = true; + if (tmp->refcnt) + goto put_devs; + } + + if (needs_reset) + ret = slot ? pci_try_reset_slot(vdev->pdev->slot) : + pci_try_reset_bus(vdev->pdev->bus); + +put_devs: + for (i = 0; i < devs.cur_index; i++) { + if (!ret) { + tmp = vfio_device_data(devs.devices[i]); + tmp->needs_reset = false; + } + vfio_device_put(devs.devices[i]); + } + + kfree(devs.devices); } static void __exit vfio_pci_cleanup(void) diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 9dd49c9839ac..553212f037c3 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -16,6 +16,7 @@ #include <linux/device.h> #include <linux/interrupt.h> #include <linux/eventfd.h> +#include <linux/msi.h> #include <linux/pci.h> #include <linux/file.h> #include <linux/poll.h> @@ -548,6 +549,20 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev, return PTR_ERR(trigger); } + /* + * The MSIx vector table resides in device memory which may be cleared + * via backdoor resets. We don't allow direct access to the vector + * table so even if a userspace driver attempts to save/restore around + * such a reset it would be unsuccessful. To avoid this, restore the + * cached value of the message prior to enabling. + */ + if (msix) { + struct msi_msg msg; + + get_cached_msi_msg(irq, &msg); + write_msi_msg(irq, &msg); + } + ret = request_irq(irq, vfio_msihandler, 0, vdev->ctx[vector].name, trigger); if (ret) { diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 0734fbe5b651..583ccdb2c58f 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -57,7 +57,8 @@ struct vfio_iommu { struct list_head domain_list; struct mutex lock; struct rb_root dma_list; - bool v2; + bool v2; + bool nesting; }; struct vfio_domain { @@ -705,6 +706,15 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, goto out_free; } + if (iommu->nesting) { + int attr = 1; + + ret = iommu_domain_set_attr(domain->domain, DOMAIN_ATTR_NESTING, + &attr); + if (ret) + goto out_domain; + } + ret = iommu_attach_group(domain->domain, iommu_group); if (ret) goto out_domain; @@ -819,17 +829,26 @@ static void *vfio_iommu_type1_open(unsigned long arg) { struct vfio_iommu *iommu; - if (arg != VFIO_TYPE1_IOMMU && arg != VFIO_TYPE1v2_IOMMU) - return ERR_PTR(-EINVAL); - iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); if (!iommu) return ERR_PTR(-ENOMEM); + switch (arg) { + case VFIO_TYPE1_IOMMU: + break; + case VFIO_TYPE1_NESTING_IOMMU: + iommu->nesting = true; + case VFIO_TYPE1v2_IOMMU: + iommu->v2 = true; + break; + default: + kfree(iommu); + return ERR_PTR(-EINVAL); + } + INIT_LIST_HEAD(&iommu->domain_list); iommu->dma_list = RB_ROOT; mutex_init(&iommu->lock); - iommu->v2 = (arg == VFIO_TYPE1v2_IOMMU); return iommu; } @@ -885,6 +904,7 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, switch (arg) { case VFIO_TYPE1_IOMMU: case VFIO_TYPE1v2_IOMMU: + case VFIO_TYPE1_NESTING_IOMMU: return 1; case VFIO_DMA_CC_IOMMU: if (!iommu) diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c index 86dfceb9201f..5fa42db769ee 100644 --- a/drivers/vfio/vfio_spapr_eeh.c +++ b/drivers/vfio/vfio_spapr_eeh.c @@ -92,7 +92,7 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, return ret; } -EXPORT_SYMBOL(vfio_spapr_iommu_eeh_ioctl); +EXPORT_SYMBOL_GPL(vfio_spapr_iommu_eeh_ioctl); MODULE_VERSION(DRIVER_VERSION); MODULE_LICENSE("GPL v2"); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 20f9a527922a..7b02bcc85b9e 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -80,6 +80,7 @@ enum iommu_attr { DOMAIN_ATTR_FSL_PAMU_STASH, DOMAIN_ATTR_FSL_PAMU_ENABLE, DOMAIN_ATTR_FSL_PAMUV1, + DOMAIN_ATTR_NESTING, /* two stages of translation */ DOMAIN_ATTR_MAX, }; diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 6612974c64bf..29715d27548f 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -33,6 +33,9 @@ /* Check if EEH is supported */ #define VFIO_EEH 5 +/* Two-stage IOMMU */ +#define VFIO_TYPE1_NESTING_IOMMU 6 /* Implies v2 */ + /* * The IOCTL interface is designed for extensibility by embedding the * structure length (argsz) and flags into structures passed between |