diff options
author | Alexey Kardashevskiy <aik@ozlabs.ru> | 2015-06-05 16:35:09 +1000 |
---|---|---|
committer | Michael Ellerman <mpe@ellerman.id.au> | 2015-06-11 15:16:15 +1000 |
commit | 0eaf4defc7c44ed5dd33a03cab12a5f88c9b4b86 (patch) | |
tree | b95470bf0da461090ac3f8aa840110cdb0abb1ec /arch/powerpc/platforms/powernv | |
parent | b348aa65297659c310943221ac1d3f4b4491ea44 (diff) |
powerpc/spapr: vfio: Switch from iommu_table to new iommu_table_group
So far one TCE table could only be used by one IOMMU group. However
IODA2 hardware allows programming the same TCE table address to
multiple PE allowing sharing tables.
This replaces a single pointer to a group in a iommu_table struct
with a linked list of groups which provides the way of invalidating
TCE cache for every PE when an actual TCE table is updated. This adds
pnv_pci_link_table_and_group() and pnv_pci_unlink_table_and_group()
helpers to manage the list. However without VFIO, it is still going
to be a single IOMMU group per iommu_table.
This changes iommu_add_device() to add a device to a first group
from the group list of a table as it is only called from the platform
init code or PCI bus notifier and at these moments there is only
one group per table.
This does not change TCE invalidation code to loop through all
attached groups in order to simplify this patch and because
it is not really needed in most cases. IODA2 is fixed in a later
patch.
This should cause no behavioural change.
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
[aw: for the vfio related changes]
Acked-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'arch/powerpc/platforms/powernv')
-rw-r--r-- | arch/powerpc/platforms/powernv/pci-ioda.c | 45 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/pci-p5ioc2.c | 3 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/pci.c | 75 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/pci.h | 7 |
4 files changed, 113 insertions, 17 deletions
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 279dadf43d5a..3b4130697b05 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1288,7 +1288,6 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe struct iommu_table *tbl; unsigned long addr; int64_t rc; - struct iommu_table_group *table_group; bus = dev->bus; hose = pci_bus_to_host(bus); @@ -1308,14 +1307,13 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe if (rc) pe_warn(pe, "OPAL error %ld release DMA window\n", rc); - table_group = tbl->it_table_group; - if (table_group->group) { - iommu_group_put(table_group->group); - BUG_ON(table_group->group); + pnv_pci_unlink_table_and_group(tbl, &pe->table_group); + if (pe->table_group.group) { + iommu_group_put(pe->table_group.group); + BUG_ON(pe->table_group.group); } iommu_free_table(tbl, of_node_full_name(dev->dev.of_node)); free_pages(addr, get_order(TCE32_TABLE_SIZE)); - pe->table_group.tables[0] = NULL; } static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs) @@ -1676,7 +1674,10 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl, unsigned long index, unsigned long npages, bool rm) { - struct pnv_ioda_pe *pe = container_of(tbl->it_table_group, + struct iommu_table_group_link *tgl = list_first_entry_or_null( + &tbl->it_group_list, struct iommu_table_group_link, + next); + struct pnv_ioda_pe *pe = container_of(tgl->table_group, struct pnv_ioda_pe, table_group); __be64 __iomem *invalidate = rm ? (__be64 __iomem *)pe->tce_inval_reg_phys : @@ -1754,7 +1755,10 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = { static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, unsigned long index, unsigned long npages, bool rm) { - struct pnv_ioda_pe *pe = container_of(tbl->it_table_group, + struct iommu_table_group_link *tgl = list_first_entry_or_null( + &tbl->it_group_list, struct iommu_table_group_link, + next); + struct pnv_ioda_pe *pe = container_of(tgl->table_group, struct pnv_ioda_pe, table_group); unsigned long start, end, inc; __be64 __iomem *invalidate = rm ? @@ -1831,12 +1835,10 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, if (WARN_ON(pe->tce32_seg >= 0)) return; - tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, - phb->hose->node); - tbl->it_table_group = &pe->table_group; - pe->table_group.tables[0] = tbl; + tbl = pnv_pci_table_alloc(phb->hose->node); iommu_register_group(&pe->table_group, phb->hose->global_number, pe->pe_number); + pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group); /* Grab a 32-bit TCE table */ pe->tce32_seg = base; @@ -1911,11 +1913,18 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, pe->tce32_seg = -1; if (tce_mem) __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs)); + if (tbl) { + pnv_pci_unlink_table_and_group(tbl, &pe->table_group); + iommu_free_table(tbl, "pnv"); + } } static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable) { - struct pnv_ioda_pe *pe = container_of(tbl->it_table_group, + struct iommu_table_group_link *tgl = list_first_entry_or_null( + &tbl->it_group_list, struct iommu_table_group_link, + next); + struct pnv_ioda_pe *pe = container_of(tgl->table_group, struct pnv_ioda_pe, table_group); uint16_t window_id = (pe->pe_number << 1 ) + 1; int64_t rc; @@ -1970,12 +1979,10 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, if (WARN_ON(pe->tce32_seg >= 0)) return; - tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, - phb->hose->node); - tbl->it_table_group = &pe->table_group; - pe->table_group.tables[0] = tbl; + tbl = pnv_pci_table_alloc(phb->hose->node); iommu_register_group(&pe->table_group, phb->hose->global_number, pe->pe_number); + pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group); /* The PE will reserve all possible 32-bits space */ pe->tce32_seg = 0; @@ -2048,6 +2055,10 @@ fail: pe->tce32_seg = -1; if (tce_mem) __free_pages(tce_mem, get_order(tce_table_size)); + if (tbl) { + pnv_pci_unlink_table_and_group(tbl, &pe->table_group); + iommu_free_table(tbl, "pnv"); + } } static void pnv_ioda_setup_dma(struct pnv_phb *phb) diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c index 6e00104093d6..f80e5a1d6117 100644 --- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c @@ -99,6 +99,9 @@ static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb, iommu_init_table(tbl, phb->hose->node); iommu_register_group(&phb->p5ioc2.table_group, pci_domain_nr(phb->hose->bus), phb->opal_id); + INIT_LIST_HEAD_RCU(&tbl->it_group_list); + pnv_pci_link_table_and_group(phb->hose->node, 0, + tbl, &phb->p5ioc2.table_group); } set_iommu_table_base(&pdev->dev, tbl); diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index f72fc6e7d63d..00f1abd34379 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -606,6 +606,81 @@ unsigned long pnv_tce_get(struct iommu_table *tbl, long index) return ((u64 *)tbl->it_base)[index - tbl->it_offset]; } +struct iommu_table *pnv_pci_table_alloc(int nid) +{ + struct iommu_table *tbl; + + tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, nid); + INIT_LIST_HEAD_RCU(&tbl->it_group_list); + + return tbl; +} + +long pnv_pci_link_table_and_group(int node, int num, + struct iommu_table *tbl, + struct iommu_table_group *table_group) +{ + struct iommu_table_group_link *tgl = NULL; + + if (WARN_ON(!tbl || !table_group)) + return -EINVAL; + + tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL, + node); + if (!tgl) + return -ENOMEM; + + tgl->table_group = table_group; + list_add_rcu(&tgl->next, &tbl->it_group_list); + + table_group->tables[num] = tbl; + + return 0; +} + +static void pnv_iommu_table_group_link_free(struct rcu_head *head) +{ + struct iommu_table_group_link *tgl = container_of(head, + struct iommu_table_group_link, rcu); + + kfree(tgl); +} + +void pnv_pci_unlink_table_and_group(struct iommu_table *tbl, + struct iommu_table_group *table_group) +{ + long i; + bool found; + struct iommu_table_group_link *tgl; + + if (!tbl || !table_group) + return; + + /* Remove link to a group from table's list of attached groups */ + found = false; + list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) { + if (tgl->table_group == table_group) { + list_del_rcu(&tgl->next); + call_rcu(&tgl->rcu, pnv_iommu_table_group_link_free); + found = true; + break; + } + } + if (WARN_ON(!found)) + return; + + /* Clean a pointer to iommu_table in iommu_table_group::tables[] */ + found = false; + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + if (table_group->tables[i] == tbl) { + table_group->tables[i] = NULL; + found = true; + break; + } + } + WARN_ON(!found); +} + void pnv_pci_setup_iommu_table(struct iommu_table *tbl, void *tce_mem, u64 tce_size, u64 dma_offset, unsigned page_shift) diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index d8ba34d7af1b..d37cb4da409f 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -210,6 +210,13 @@ int pnv_pci_cfg_read(struct pci_dn *pdn, int where, int size, u32 *val); int pnv_pci_cfg_write(struct pci_dn *pdn, int where, int size, u32 val); +extern struct iommu_table *pnv_pci_table_alloc(int nid); + +extern long pnv_pci_link_table_and_group(int node, int num, + struct iommu_table *tbl, + struct iommu_table_group *table_group); +extern void pnv_pci_unlink_table_and_group(struct iommu_table *tbl, + struct iommu_table_group *table_group); extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl, void *tce_mem, u64 tce_size, u64 dma_offset, unsigned page_shift); |