diff options
author | David S. Miller <davem@davemloft.net> | 2015-04-16 12:45:18 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-04-16 12:45:18 -0700 |
commit | a83f5d6a11ced6f43ec467a95fe8cc76e1c96cfd (patch) | |
tree | d3bc9387c3d86187c870e967e887513c61d156b4 | |
parent | 497a5df7bf6ffd136ae21c49d1a01292930d7ca2 (diff) | |
parent | 671d773297969bebb1732e1cdc1ec03aa53c6be2 (diff) |
Merge branch 'generic-iommu-allocator'
Sowmini Varadhan says:
====================
Generic IOMMU pooled allocator
Investigation of network performance on Sparc shows a high
degree of locking contention in the IOMMU allocator, and it
was noticed that the PowerPC code has a better locking model.
This patch series tries to extract the generic parts of the
PowerPC code so that it can be shared across multiple PCI
devices and architectures.
v10: resend patchv9 without RFC tag, and a new mail Message-Id,
(previous non-RFC attempt did not show up on the patchwork queue?)
Full revision history below:
v2 changes:
- incorporate David Miller editorial comments: sparc specific
fields moved from iommu-common into sparc's iommu_64.h
- make the npools value an input parameter, for the case when
the iommu map size is not very large
- cookie_to_index mapping, and optimizations for span-boundary
check, for use case such as LDC.
v3: eliminate iommu_sparc, rearrange the ->demap indirection to
be invoked under the pool lock.
v4: David Miller review changes:
- s/IOMMU_ERROR_CODE/DMA_ERROR_CODE
- page_table_map_base and page_table_shift are unsigned long, not u32.
v5: removed ->cookie_to_index and ->demap indirection from the
iommu_tbl_ops The caller needs to call these functions as needed,
before invoking the generic arena allocator functions.
Added the "skip_span_boundary" argument to iommu_tbl_pool_init() for
those callers like LDC which do no care about span boundary checks.
v6: removed iommu_tbl_ops, and instead pass the ->flush_all as
an indirection to iommu_tbl_pool_init(); only invoke ->flush_all
when there is no large_pool, based on the assumption that large-pool
usage is infrequently encountered
v7: moved pool_hash initialization to lib/iommu-common.c and cleaned up
code duplication from sun4v/sun4u/ldc.
v8: Addresses BenH comments with one exception: I've left the
IOMMU_POOL_HASH as is, so that powerpc can tailor it to their
convenience. Discard trylock for simple spin_lock to acquire pool
v9: Addresses latest BenH comments: need_flush checks, add support
for dma mask and align_order.
v10: resend without RFC tag, and new mail Message-Id.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | arch/sparc/include/asm/iommu_64.h | 7 | ||||
-rw-r--r-- | arch/sparc/kernel/iommu.c | 188 | ||||
-rw-r--r-- | arch/sparc/kernel/iommu_common.h | 8 | ||||
-rw-r--r-- | arch/sparc/kernel/ldc.c | 185 | ||||
-rw-r--r-- | arch/sparc/kernel/pci_sun4v.c | 193 | ||||
-rw-r--r-- | include/linux/iommu-common.h | 55 | ||||
-rw-r--r-- | lib/Makefile | 2 | ||||
-rw-r--r-- | lib/iommu-common.c | 220 |
8 files changed, 533 insertions, 325 deletions
diff --git a/arch/sparc/include/asm/iommu_64.h b/arch/sparc/include/asm/iommu_64.h index 2b9321ab064d..e3cd4493d81d 100644 --- a/arch/sparc/include/asm/iommu_64.h +++ b/arch/sparc/include/asm/iommu_64.h @@ -16,6 +16,7 @@ #define IOPTE_WRITE 0x0000000000000002UL #define IOMMU_NUM_CTXS 4096 +#include <linux/iommu-common.h> struct iommu_arena { unsigned long *map; @@ -24,11 +25,10 @@ struct iommu_arena { }; struct iommu { + struct iommu_table tbl; spinlock_t lock; - struct iommu_arena arena; - void (*flush_all)(struct iommu *); + u32 dma_addr_mask; iopte_t *page_table; - u32 page_table_map_base; unsigned long iommu_control; unsigned long iommu_tsbbase; unsigned long iommu_flush; @@ -40,7 +40,6 @@ struct iommu { unsigned long dummy_page_pa; unsigned long ctx_lowest_free; DECLARE_BITMAP(ctx_bitmap, IOMMU_NUM_CTXS); - u32 dma_addr_mask; }; struct strbuf { diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c index bfa4d0c2df42..9b16b341b6ae 100644 --- a/arch/sparc/kernel/iommu.c +++ b/arch/sparc/kernel/iommu.c @@ -13,11 +13,15 @@ #include <linux/errno.h> #include <linux/iommu-helper.h> #include <linux/bitmap.h> +#include <linux/hash.h> +#include <linux/iommu-common.h> #ifdef CONFIG_PCI #include <linux/pci.h> #endif +static DEFINE_PER_CPU(unsigned int, iommu_pool_hash); + #include <asm/iommu.h> #include "iommu_common.h" @@ -45,8 +49,9 @@ "i" (ASI_PHYS_BYPASS_EC_E)) /* Must be invoked under the IOMMU lock. */ -static void iommu_flushall(struct iommu *iommu) +static void iommu_flushall(struct iommu_table *iommu_table) { + struct iommu *iommu = container_of(iommu_table, struct iommu, tbl); if (iommu->iommu_flushinv) { iommu_write(iommu->iommu_flushinv, ~(u64)0); } else { @@ -87,94 +92,23 @@ static inline void iopte_make_dummy(struct iommu *iommu, iopte_t *iopte) iopte_val(*iopte) = val; } -/* Based almost entirely upon the ppc64 iommu allocator. If you use the 'handle' - * facility it must all be done in one pass while under the iommu lock. - * - * On sun4u platforms, we only flush the IOMMU once every time we've passed - * over the entire page table doing allocations. Therefore we only ever advance - * the hint and cannot backtrack it. - */ -unsigned long iommu_range_alloc(struct device *dev, - struct iommu *iommu, - unsigned long npages, - unsigned long *handle) -{ - unsigned long n, end, start, limit, boundary_size; - struct iommu_arena *arena = &iommu->arena; - int pass = 0; - - /* This allocator was derived from x86_64's bit string search */ - - /* Sanity check */ - if (unlikely(npages == 0)) { - if (printk_ratelimit()) - WARN_ON(1); - return DMA_ERROR_CODE; - } - - if (handle && *handle) - start = *handle; - else - start = arena->hint; - - limit = arena->limit; - - /* The case below can happen if we have a small segment appended - * to a large, or when the previous alloc was at the very end of - * the available space. If so, go back to the beginning and flush. - */ - if (start >= limit) { - start = 0; - if (iommu->flush_all) - iommu->flush_all(iommu); - } - - again: - - if (dev) - boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, - 1 << IO_PAGE_SHIFT); - else - boundary_size = ALIGN(1UL << 32, 1 << IO_PAGE_SHIFT); - - n = iommu_area_alloc(arena->map, limit, start, npages, - iommu->page_table_map_base >> IO_PAGE_SHIFT, - boundary_size >> IO_PAGE_SHIFT, 0); - if (n == -1) { - if (likely(pass < 1)) { - /* First failure, rescan from the beginning. */ - start = 0; - if (iommu->flush_all) - iommu->flush_all(iommu); - pass++; - goto again; - } else { - /* Second failure, give up */ - return DMA_ERROR_CODE; - } - } - - end = n + npages; - - arena->hint = end; - - /* Update handle for SG allocations */ - if (handle) - *handle = end; - - return n; -} +static struct iommu_tbl_ops iommu_sparc_ops = { + .reset = iommu_flushall +}; -void iommu_range_free(struct iommu *iommu, dma_addr_t dma_addr, unsigned long npages) +static void setup_iommu_pool_hash(void) { - struct iommu_arena *arena = &iommu->arena; - unsigned long entry; - - entry = (dma_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT; + unsigned int i; + static bool do_once; - bitmap_clear(arena->map, entry, npages); + if (do_once) + return; + do_once = true; + for_each_possible_cpu(i) + per_cpu(iommu_pool_hash, i) = hash_32(i, IOMMU_POOL_HASHBITS); } + int iommu_table_init(struct iommu *iommu, int tsbsize, u32 dma_offset, u32 dma_addr_mask, int numa_node) @@ -187,22 +121,22 @@ int iommu_table_init(struct iommu *iommu, int tsbsize, /* Setup initial software IOMMU state. */ spin_lock_init(&iommu->lock); iommu->ctx_lowest_free = 1; - iommu->page_table_map_base = dma_offset; + iommu->tbl.page_table_map_base = dma_offset; iommu->dma_addr_mask = dma_addr_mask; /* Allocate and initialize the free area map. */ sz = num_tsb_entries / 8; sz = (sz + 7UL) & ~7UL; - iommu->arena.map = kmalloc_node(sz, GFP_KERNEL, numa_node); - if (!iommu->arena.map) { - printk(KERN_ERR "IOMMU: Error, kmalloc(arena.map) failed.\n"); + iommu->tbl.map = kmalloc_node(sz, GFP_KERNEL, numa_node); + if (!iommu->tbl.map) return -ENOMEM; - } - memset(iommu->arena.map, 0, sz); - iommu->arena.limit = num_tsb_entries; - + memset(iommu->tbl.map, 0, sz); if (tlb_type != hypervisor) - iommu->flush_all = iommu_flushall; + iommu_sparc_ops.reset = NULL; /* not needed on on sun4v */ + + setup_iommu_pool_hash(); + iommu_tbl_pool_init(&iommu->tbl, num_tsb_entries, IO_PAGE_SHIFT, + &iommu_sparc_ops, false, 1); /* Allocate and initialize the dummy page which we * set inactive IO PTEs to point to. @@ -235,18 +169,20 @@ out_free_dummy_page: iommu->dummy_page = 0UL; out_free_map: - kfree(iommu->arena.map); - iommu->arena.map = NULL; + kfree(iommu->tbl.map); + iommu->tbl.map = NULL; return -ENOMEM; } -static inline iopte_t *alloc_npages(struct device *dev, struct iommu *iommu, +static inline iopte_t *alloc_npages(struct device *dev, + struct iommu *iommu, unsigned long npages) { unsigned long entry; - entry = iommu_range_alloc(dev, iommu, npages, NULL); + entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL, + __this_cpu_read(iommu_pool_hash)); if (unlikely(entry == DMA_ERROR_CODE)) return NULL; @@ -284,7 +220,7 @@ static void *dma_4u_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addrp, gfp_t gfp, struct dma_attrs *attrs) { - unsigned long flags, order, first_page; + unsigned long order, first_page; struct iommu *iommu; struct page *page; int npages, nid; @@ -306,16 +242,14 @@ static void *dma_4u_alloc_coherent(struct device *dev, size_t size, iommu = dev->archdata.iommu; - spin_lock_irqsave(&iommu->lock, flags); iopte = alloc_npages(dev, iommu, size >> IO_PAGE_SHIFT); - spin_unlock_irqrestore(&iommu->lock, flags); if (unlikely(iopte == NULL)) { free_pages(first_page, order); return NULL; } - *dma_addrp = (iommu->page_table_map_base + + *dma_addrp = (iommu->tbl.page_table_map_base + ((iopte - iommu->page_table) << IO_PAGE_SHIFT)); ret = (void *) first_page; npages = size >> IO_PAGE_SHIFT; @@ -336,16 +270,12 @@ static void dma_4u_free_coherent(struct device *dev, size_t size, struct dma_attrs *attrs) { struct iommu *iommu; - unsigned long flags, order, npages; + unsigned long order, npages; npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT; iommu = dev->archdata.iommu; - spin_lock_irqsave(&iommu->lock, flags); - - iommu_range_free(iommu, dvma, npages); - - spin_unlock_irqrestore(&iommu->lock, flags); + iommu_tbl_range_free(&iommu->tbl, dvma, npages, false, NULL); order = get_order(size); if (order < 10) @@ -375,8 +305,8 @@ static dma_addr_t dma_4u_map_page(struct device *dev, struct page *page, npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK); npages >>= IO_PAGE_SHIFT; - spin_lock_irqsave(&iommu->lock, flags); base = alloc_npages(dev, iommu, npages); + spin_lock_irqsave(&iommu->lock, flags); ctx = 0; if (iommu->iommu_ctxflush) ctx = iommu_alloc_ctx(iommu); @@ -385,7 +315,7 @@ static dma_addr_t dma_4u_map_page(struct device *dev, struct page *page, if (unlikely(!base)) goto bad; - bus_addr = (iommu->page_table_map_base + + bus_addr = (iommu->tbl.page_table_map_base + ((base - iommu->page_table) << IO_PAGE_SHIFT)); ret = bus_addr | (oaddr & ~IO_PAGE_MASK); base_paddr = __pa(oaddr & IO_PAGE_MASK); @@ -496,7 +426,7 @@ static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr, npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK); npages >>= IO_PAGE_SHIFT; base = iommu->page_table + - ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT); + ((bus_addr - iommu->tbl.page_table_map_base) >> IO_PAGE_SHIFT); bus_addr &= IO_PAGE_MASK; spin_lock_irqsave(&iommu->lock, flags); @@ -515,11 +445,11 @@ static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr, for (i = 0; i < npages; i++) iopte_make_dummy(iommu, base + i); - iommu_range_free(iommu, bus_addr, npages); - iommu_free_ctx(iommu, ctx); - spin_unlock_irqrestore(&iommu->lock, flags); + + iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, + false, NULL); } static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist, @@ -567,7 +497,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist, max_seg_size = dma_get_max_seg_size(dev); seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, IO_PAGE_SIZE) >> IO_PAGE_SHIFT; - base_shift = iommu->page_table_map_base >> IO_PAGE_SHIFT; + base_shift = iommu->tbl.page_table_map_base >> IO_PAGE_SHIFT; for_each_sg(sglist, s, nelems, i) { unsigned long paddr, npages, entry, out_entry = 0, slen; iopte_t *base; @@ -581,7 +511,8 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist, /* Allocate iommu entries for that segment */ paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s); npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE); - entry = iommu_range_alloc(dev, iommu, npages, &handle); + entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, &handle, + __this_cpu_read(iommu_pool_hash)); /* Handle failure */ if (unlikely(entry == DMA_ERROR_CODE)) { @@ -594,7 +525,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist, base = iommu->page_table + entry; /* Convert entry to a dma_addr_t */ - dma_addr = iommu->page_table_map_base + + dma_addr = iommu->tbl.page_table_map_base + (entry << IO_PAGE_SHIFT); dma_addr |= (s->offset & ~IO_PAGE_MASK); @@ -654,15 +585,17 @@ iommu_map_failed: vaddr = s->dma_address & IO_PAGE_MASK; npages = iommu_num_pages(s->dma_address, s->dma_length, IO_PAGE_SIZE); - iommu_range_free(iommu, vaddr, npages); - entry = (vaddr - iommu->page_table_map_base) + entry = (vaddr - iommu->tbl.page_table_map_base) >> IO_PAGE_SHIFT; base = iommu->page_table + entry; for (j = 0; j < npages; j++) iopte_make_dummy(iommu, base + j); + iommu_tbl_range_free(&iommu->tbl, vaddr, npages, + false, NULL); + s->dma_address = DMA_ERROR_CODE; s->dma_length = 0; } @@ -677,17 +610,19 @@ iommu_map_failed: /* If contexts are being used, they are the same in all of the mappings * we make for a particular SG. */ -static unsigned long fetch_sg_ctx(struct iommu *iommu, struct scatterlist *sg) +static unsigned long fetch_sg_ctx(struct iommu *iommu, + struct scatterlist *sg) { unsigned long ctx = 0; if (iommu->iommu_ctxflush) { iopte_t *base; u32 bus_addr; + struct iommu_table *tbl = &iommu->tbl; bus_addr = sg->dma_address & IO_PAGE_MASK; base = iommu->page_table + - ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT); + ((bus_addr - tbl->page_table_map_base) >> IO_PAGE_SHIFT); ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL; } @@ -723,9 +658,8 @@ static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist, if (!len) break; npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE); - iommu_range_free(iommu, dma_handle, npages); - entry = ((dma_handle - iommu->page_table_map_base) + entry = ((dma_handle - iommu->tbl.page_table_map_base) >> IO_PAGE_SHIFT); base = iommu->page_table + entry; @@ -737,6 +671,8 @@ static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist, for (i = 0; i < npages; i++) iopte_make_dummy(iommu, base + i); + iommu_tbl_range_free(&iommu->tbl, dma_handle, npages, false, + NULL); sg = sg_next(sg); } @@ -770,9 +706,10 @@ static void dma_4u_sync_single_for_cpu(struct device *dev, if (iommu->iommu_ctxflush && strbuf->strbuf_ctxflush) { iopte_t *iopte; + struct iommu_table *tbl = &iommu->tbl; iopte = iommu->page_table + - ((bus_addr - iommu->page_table_map_base)>>IO_PAGE_SHIFT); + ((bus_addr - tbl->page_table_map_base)>>IO_PAGE_SHIFT); ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL; } @@ -805,9 +742,10 @@ static void dma_4u_sync_sg_for_cpu(struct device *dev, if (iommu->iommu_ctxflush && strbuf->strbuf_ctxflush) { iopte_t *iopte; + struct iommu_table *tbl = &iommu->tbl; - iopte = iommu->page_table + - ((sglist[0].dma_address - iommu->page_table_map_base) >> IO_PAGE_SHIFT); + iopte = iommu->page_table + ((sglist[0].dma_address - + tbl->page_table_map_base) >> IO_PAGE_SHIFT); ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL; } diff --git a/arch/sparc/kernel/iommu_common.h b/arch/sparc/kernel/iommu_common.h index 1ec0de4156e7..f4be0d724fc6 100644 --- a/arch/sparc/kernel/iommu_common.h +++ b/arch/sparc/kernel/iommu_common.h @@ -48,12 +48,4 @@ static inline int is_span_boundary(unsigned long entry, return iommu_is_span_boundary(entry, nr, shift, boundary_size); } -unsigned long iommu_range_alloc(struct device *dev, - struct iommu *iommu, - unsigned long npages, - unsigned long *handle); -void iommu_range_free(struct iommu *iommu, - dma_addr_t dma_addr, - unsigned long npages); - #endif /* _IOMMU_COMMON_H */ diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c index 274a9f59d95c..d485697c37c0 100644 --- a/arch/sparc/kernel/ldc.c +++ b/arch/sparc/kernel/ldc.c @@ -15,6 +15,8 @@ #include <linux/list.h> #include <linux/init.h> #include <linux/bitmap.h> +#include <linux/hash.h> +#include <linux/iommu-common.h> #include <asm/hypervisor.h> #include <asm/iommu.h> @@ -27,6 +29,11 @@ #define DRV_MODULE_VERSION "1.1" #define DRV_MODULE_RELDATE "July 22, 2008" +#define COOKIE_PGSZ_CODE 0xf000000000000000ULL +#define COOKIE_PGSZ_CODE_SHIFT 60ULL + +static DEFINE_PER_CPU(unsigned int, ldc_pool_hash); + static char version[] = DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; #define LDC_PACKET_SIZE 64 @@ -98,10 +105,10 @@ static const struct ldc_mode_ops stream_ops; int ldom_domaining_enabled; struct ldc_iommu { - /* Protects arena alloc/free. */ + /* Protects ldc_unmap. */ spinlock_t lock; - struct iommu_arena arena; struct ldc_mtable_entry *page_table; + struct iommu_table iommu_table; }; struct ldc_channel { @@ -998,31 +1005,85 @@ static void free_queue(unsigned long num_entries, struct ldc_packet *q) free_pages((unsigned long)q, order); } +static unsigned long ldc_cookie_to_index(u64 cookie, void *arg) +{ + u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT; + /* struct ldc_iommu *ldc_iommu = (struct ldc_iommu *)arg; */ + + cookie &= ~COOKIE_PGSZ_CODE; + + return (cookie >> (13ULL + (szcode * 3ULL))); +} + +struct ldc_demap_arg { + struct ldc_iommu *ldc_iommu; + u64 cookie; + unsigned long id; +}; + +static void ldc_demap(void *arg, unsigned long entry, unsigned long npages) +{ + struct ldc_demap_arg *ldc_demap_arg = arg; + struct ldc_iommu *iommu = ldc_demap_arg->ldc_iommu; + unsigned long id = ldc_demap_arg->id; + u64 cookie = ldc_demap_arg->cookie; + struct ldc_mtable_entry *base; + unsigned long i, shift; + + shift = (cookie >> COOKIE_PGSZ_CODE_SHIFT) * 3; + base = iommu->page_table + entry; + for (i = 0; i < npages; i++) { + if (base->cookie) + sun4v_ldc_revoke(id, cookie + (i << shift), + base->cookie); + base->mte = 0; + } +} + /* XXX Make this configurable... XXX */ #define LDC_IOTABLE_SIZE (8 * 1024) -static int ldc_iommu_init(struct ldc_channel *lp) +struct iommu_tbl_ops ldc_iommu_ops = { + .cookie_to_index = ldc_cookie_to_index, + .demap = ldc_demap, +}; + +static void setup_ldc_pool_hash(void) +{ + unsigned int i; + static bool do_once; + + if (do_once) + return; + do_once = true; + for_each_possible_cpu(i) + per_cpu(ldc_pool_hash, i) = hash_32(i, IOMMU_POOL_HASHBITS); +} + + +static int ldc_iommu_init(const char *name, struct ldc_channel *lp) { unsigned long sz, num_tsb_entries, tsbsize, order; - struct ldc_iommu *iommu = &lp->iommu; + struct ldc_iommu *ldc_iommu = &lp->iommu; + struct iommu_table *iommu = &ldc_iommu->iommu_table; struct ldc_mtable_entry *table; unsigned long hv_err; int err; num_tsb_entries = LDC_IOTABLE_SIZE; tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry); - - spin_lock_init(&iommu->lock); + setup_ldc_pool_hash(); + spin_lock_init(&ldc_iommu->lock); sz = num_tsb_entries / 8; sz = (sz + 7UL) & ~7UL; - iommu->arena.map = kzalloc(sz, GFP_KERNEL); - if (!iommu->arena.map) { + iommu->map = kzalloc(sz, GFP_KERNEL); + if (!iommu->map) { printk(KERN_ERR PFX "Alloc of arena map failed, sz=%lu\n", sz); return -ENOMEM; } - - iommu->arena.limit = num_tsb_entries; + iommu_tbl_pool_init(iommu, num_tsb_entries, PAGE_SHIFT, + &ldc_iommu_ops, false, 1); order = get_order(tsbsize); @@ -1037,7 +1098,7 @@ static int ldc_iommu_init(struct ldc_channel *lp) memset(table, 0, PAGE_SIZE << order); - iommu->page_table = table; + ldc_iommu->page_table = table; hv_err = sun4v_ldc_set_map_table(lp->id, __pa(table), num_tsb_entries); @@ -1049,31 +1110,32 @@ static int ldc_iommu_init(struct ldc_channel *lp) out_free_table: free_pages((unsigned long) table, order); - iommu->page_table = NULL; + ldc_iommu->page_table = NULL; out_free_map: - kfree(iommu->arena.map); - iommu->arena.map = NULL; + kfree(iommu->map); + iommu->map = NULL; return err; } static void ldc_iommu_release(struct ldc_channel *lp) { - struct ldc_iommu *iommu = &lp->iommu; + struct ldc_iommu *ldc_iommu = &lp->iommu; + struct iommu_table *iommu = &ldc_iommu->iommu_table; unsigned long num_tsb_entries, tsbsize, order; (void) sun4v_ldc_set_map_table(lp->id, 0, 0); - num_tsb_entries = iommu->arena.limit; + num_tsb_entries = iommu->poolsize * iommu->nr_pools; tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry); order = get_order(tsbsize); - free_pages((unsigned long) iommu->page_table, order); - iommu->page_table = NULL; + free_pages((unsigned long) ldc_iommu->page_table, order); + ldc_iommu->page_table = NULL; - kfree(iommu->arena.map); - iommu->arena.map = NULL; + kfree(iommu->map); + iommu->map = NULL; } struct ldc_channel *ldc_alloc(unsigned long id, @@ -1140,7 +1202,7 @@ struct ldc_channel *ldc_alloc(unsigned long id, lp->id = id; - err = ldc_iommu_init(lp); + err = ldc_iommu_init(name, lp); if (err) goto out_free_ldc; @@ -1885,40 +1947,6 @@ int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size) } EXPORT_SYMBOL(ldc_read); -static long arena_alloc(struct ldc_iommu *iommu, unsigned long npages) -{ - struct iommu_arena *arena = &iommu->arena; - unsigned long n, start, end, limit; - int pass; - - limit = arena->limit; - start = arena->hint; - pass = 0; - -again: - n = bitmap_find_next_zero_area(arena->map, limit, start, npages, 0); - end = n + npages; - if (unlikely(end >= limit)) { - if (likely(pass < 1)) { - limit = start; - start = 0; - pass++; - goto again; - } else { - /* Scanned the whole thing, give up. */ - return -1; - } - } - bitmap_set(arena->map, n, npages); - - arena->hint = end; - - return n; -} - -#define COOKIE_PGSZ_CODE 0xf000000000000000ULL -#define COOKIE_PGSZ_CODE_SHIFT 60ULL - static u64 pagesize_code(void) { switch (PAGE_SIZE) { @@ -1945,23 +1973,14 @@ static u64 make_cookie(u64 index, u64 pgsz_code, u64 page_offset) page_offset); } -static u64 cookie_to_index(u64 cookie, unsigned long *shift) -{ - u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT; - - cookie &= ~COOKIE_PGSZ_CODE; - - *shift = szcode * 3; - - return (cookie >> (13ULL + (szcode * 3ULL))); -} static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu, unsigned long npages) { long entry; - entry = arena_alloc(iommu, npages); + entry = iommu_tbl_range_alloc(NULL, &iommu->iommu_table, npages, + NULL, __this_cpu_read(ldc_pool_hash)); if (unlikely(entry < 0)) return NULL; @@ -2090,7 +2109,7 @@ int ldc_map_sg(struct ldc_channel *lp, struct ldc_trans_cookie *cookies, int ncookies, unsigned int map_perm) { - unsigned long i, npages, flags; + unsigned long i, npages; struct ldc_mtable_entry *base; struct cookie_state state; struct ldc_iommu *iommu; @@ -2109,9 +2128,7 @@ int ldc_map_sg(struct ldc_channel *lp, iommu = &lp->iommu; - spin_lock_irqsave(&iommu->lock, flags); base = alloc_npages(iommu, npages); - spin_unlock_irqrestore(&iommu->lock, flags); if (!base) return -ENOMEM; @@ -2136,7 +2153,7 @@ int ldc_map_single(struct ldc_channel *lp, struct ldc_trans_cookie *cookies, int ncookies, unsigned int map_perm) { - unsigned long npages, pa, flags; + unsigned long npages, pa; struct ldc_mtable_entry *base; struct cookie_state state; struct ldc_iommu *iommu; @@ -2152,9 +2169,7 @@ int ldc_map_single(struct ldc_channel *lp, iommu = &lp->iommu; - spin_lock_irqsave(&iommu->lock, flags); base = alloc_npages(iommu, npages); - spin_unlock_irqrestore(&iommu->lock, flags); if (!base) return -ENOMEM; @@ -2172,35 +2187,29 @@ int ldc_map_single(struct ldc_channel *lp, } EXPORT_SYMBOL(ldc_map_single); + static void free_npages(unsigned long id, struct ldc_iommu *iommu, u64 cookie, u64 size) { - struct iommu_arena *arena = &iommu->arena; - unsigned long i, shift, index, npages; - struct ldc_mtable_entry *base; + unsigned long npages; + struct ldc_demap_arg demap_arg; - npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT; - index = cookie_to_index(cookie, &shift); - base = iommu->page_table + index; + demap_arg.ldc_iommu = iommu; + demap_arg.cookie = cookie; + demap_arg.id = id; - BUG_ON(index > arena->limit || - (index + npages) > arena->limit); + npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT; + iommu_tbl_range_free(&iommu->iommu_table, cookie, npages, true, + &demap_arg); - for (i = 0; i < npages; i++) { - if (base->cookie) - sun4v_ldc_revoke(id, cookie + (i << shift), - base->cookie); - base->mte = 0; - __clear_bit(index + i, arena->map); - } } void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies, int ncookies) { struct ldc_iommu *iommu = &lp->iommu; - unsigned long flags; int i; + unsigned long flags; spin_lock_irqsave(&iommu->lock, flags); for (i = 0; i < ncookies; i++) { diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index 47ddbd496a1e..9b76b9d639e1 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -15,6 +15,8 @@ #include <linux/export.h> #include <linux/log2.h> #include <linux/of_device.h> +#include <linux/hash.h> +#include <linux/iommu-common.h> #include <asm/iommu.h> #include <asm/irq.h> @@ -28,6 +30,7 @@ #define DRIVER_NAME "pci_sun4v" #define PFX DRIVER_NAME ": " +static DEFINE_PER_CPU(unsigned int, iommu_pool_hash); static unsigned long vpci_major = 1; static unsigned long vpci_minor = 1; @@ -155,14 +158,13 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size, iommu = dev->archdata.iommu; - spin_lock_irqsave(&iommu->lock, flags); - entry = iommu_range_alloc(dev, iommu, npages, NULL); - spin_unlock_irqrestore(&iommu->lock, flags); + entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL, + __this_cpu_read(iommu_pool_hash)); if (unlikely(entry == DMA_ERROR_CODE)) goto range_alloc_fail; - *dma_addrp = (iommu->page_table_map_base + + *dma_addrp = (iommu->tbl.page_table_map_base + (entry << IO_PAGE_SHIFT)); ret = (void *) first_page; first_page = __pa(first_page); @@ -188,45 +190,46 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size, return ret; iommu_map_fail: - /* Interrupts are disabled. */ - spin_lock(&iommu->lock); - iommu_range_free(iommu, *dma_addrp, npages); - spin_unlock_irqrestore(&iommu->lock, flags); + iommu_tbl_range_free(&iommu->tbl, *dma_addrp, npages, false, NULL); range_alloc_fail: free_pages(first_page, order); return NULL; } +static void dma_4v_iommu_demap(void *demap_arg, unsigned long entry, + unsigned long npages) +{ + u32 devhandle = *(u32 *)demap_arg; + unsigned long num, flags; + + local_irq_save(flags); + do { + num = pci_sun4v_iommu_demap(devhandle, + HV_PCI_TSBID(0, entry), + npages); + + entry += num; + npages -= num; + } while (npages != 0); + local_irq_restore(flags); +} + static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu, dma_addr_t dvma, struct dma_attrs *attrs) { struct pci_pbm_info *pbm; struct iommu *iommu; - unsigned long flags, order, npages, entry; + unsigned long order, npages, entry; u32 devhandle; npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT; iommu = dev->archdata.iommu; pbm = dev->archdata.host_controller; devhandle = pbm->devhandle; - entry = ((dvma - iommu->page_table_map_base) >> IO_PAGE_SHIFT); - - spin_lock_irqsave(&iommu->lock, flags); - - iommu_range_free(iommu, dvma, npages); - - do { - unsigned long num; - - num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry), - npages); - entry += num; - npages -= num; - } while (npages != 0); - - spin_unlock_irqrestore(&iommu->lock, flags); - + entry = ((dvma - iommu->tbl.page_table_map_base) >> IO_PAGE_SHIFT); + dma_4v_iommu_demap(&devhandle, entry, npages); + iommu_tbl_range_free(&iommu->tbl, dvma, npages, false, NULL); order = get_order(size); if (order < 10) free_pages((unsigned long)cpu, order); @@ -253,14 +256,13 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page, npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK); npages >>= IO_PAGE_SHIFT; - spin_lock_irqsave(&iommu->lock, flags); - entry = iommu_range_alloc(dev, iommu, npages, NULL); - spin_unlock_irqrestore(&iommu->lock, flags); + entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL, + __this_cpu_read(iommu_pool_hash)); if (unlikely(entry == DMA_ERROR_CODE)) goto bad; - bus_addr = (iommu->page_table_map_base + + bus_addr = (iommu->tbl.page_table_map_base + (entry << IO_PAGE_SHIFT)); ret = bus_addr | (oaddr & ~IO_PAGE_MASK); base_paddr = __pa(oaddr & IO_PAGE_MASK); @@ -290,11 +292,7 @@ bad: return DMA_ERROR_CODE; iommu_map_fail: - /* Interrupts are disabled. */ - spin_lock(&iommu->lock); - iommu_range_free(iommu, bus_addr, npages); - spin_unlock_irqrestore(&iommu->lock, flags); - + iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, false, NULL); return DMA_ERROR_CODE; } @@ -304,7 +302,7 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr, { struct pci_pbm_info *pbm; struct iommu *iommu; - unsigned long flags, npages; + unsigned long npages; long entry; u32 devhandle; @@ -321,22 +319,9 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr, npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK); npages >>= IO_PAGE_SHIFT; bus_addr &= IO_PAGE_MASK; - - spin_lock_irqsave(&iommu->lock, flags); - - iommu_range_free(iommu, bus_addr, npages); - - entry = (bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT; - do { - unsigned long num; - - num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry), - npages); - entry += num; - npages -= num; - } while (npages != 0); - - spin_unlock_irqrestore(&iommu->lock, flags); + entry = (bus_addr - iommu->tbl.page_table_map_base) >> IO_PAGE_SHIFT; + dma_4v_iommu_demap(&devhandle, entry, npages); + iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, false, NULL); } static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, @@ -371,14 +356,14 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, /* Init first segment length for backout at failure */ outs->dma_length = 0; - spin_lock_irqsave(&iommu->lock, flags); + local_irq_save(flags); iommu_batch_start(dev, prot, ~0UL); max_seg_size = dma_get_max_seg_size(dev); seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, IO_PAGE_SIZE) >> IO_PAGE_SHIFT; - base_shift = iommu->page_table_map_base >> IO_PAGE_SHIFT; + base_shift = iommu->tbl.page_table_map_base >> IO_PAGE_SHIFT; for_each_sg(sglist, s, nelems, i) { unsigned long paddr, npages, entry, out_entry = 0, slen; @@ -391,7 +376,8 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, /* Allocate iommu entries for that segment */ paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s); npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE); - entry = iommu_range_alloc(dev, iommu, npages, &handle); + entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, &handle, + __this_cpu_read(iommu_pool_hash)); /* Handle failure */ if (unlikely(entry == DMA_ERROR_CODE)) { @@ -404,7 +390,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, iommu_batch_new_entry(entry); /* Convert entry to a dma_addr_t */ - dma_addr = iommu->page_table_map_base + + dma_addr = iommu->tbl.page_table_map_base + (entry << IO_PAGE_SHIFT); dma_addr |= (s->offset & ~IO_PAGE_MASK); @@ -451,7 +437,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, if (unlikely(err < 0L)) goto iommu_map_failed; - spin_unlock_irqrestore(&iommu->lock, flags); + local_irq_restore(flags); if (outcount < incount) { outs = sg_next(outs); @@ -469,7 +455,8 @@ iommu_map_failed: vaddr = s->dma_address & IO_PAGE_MASK; npages = iommu_num_pages(s->dma_address, s->dma_length, IO_PAGE_SIZE); - iommu_range_free(iommu, vaddr, npages); + iommu_tbl_range_free(&iommu->tbl, vaddr, npages, + false, NULL); /* XXX demap? XXX */ s->dma_address = DMA_ERROR_CODE; s->dma_length = 0; @@ -477,7 +464,7 @@ iommu_map_failed: if (s == outs) break; } - spin_unlock_irqrestore(&iommu->lock, flags); + local_irq_restore(flags); return 0; } @@ -489,7 +476,7 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist, struct pci_pbm_info *pbm; struct scatterlist *sg; struct iommu *iommu; - unsigned long flags; + unsigned long flags, entry; u32 devhandle; BUG_ON(direction == DMA_NONE); @@ -498,33 +485,27 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist, pbm = dev->archdata.host_controller; devhandle = pbm->devhandle; - spin_lock_irqsave(&iommu->lock, flags); + local_irq_save(flags); sg = sglist; while (nelems--) { dma_addr_t dma_handle = sg->dma_address; unsigned int len = sg->dma_length; - unsigned long npages, entry; + unsigned long npages; + struct iommu_table *tbl = &iommu->tbl; + unsigned long shift = IO_PAGE_SHIFT; if (!len) break; npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE); - iommu_range_free(iommu, dma_handle, npages); - - entry = ((dma_handle - iommu->page_table_map_base) >> IO_PAGE_SHIFT); - while (npages) { - unsigned long num; - - num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry), - npages); - entry += num; - npages -= num; - } - + entry = ((dma_handle - tbl->page_table_map_base) >> shift); + dma_4v_iommu_demap(&devhandle, entry, npages); + iommu_tbl_range_free(&iommu->tbl, dma_handle, npages, + false, NULL); sg = sg_next(sg); } - spin_unlock_irqrestore(&iommu->lock, flags); + local_irq_restore(flags); } static struct dma_map_ops sun4v_dma_ops = { @@ -536,6 +517,8 @@ static struct dma_map_ops sun4v_dma_ops = { .unmap_sg = dma_4v_unmap_sg, }; +static struct iommu_tbl_ops dma_4v_iommu_ops; + static void pci_sun4v_scan_bus(struct pci_pbm_info *pbm, struct device *parent) { struct property *prop; @@ -550,30 +533,33 @@ static void pci_sun4v_scan_bus(struct pci_pbm_info *pbm, struct device *parent) } static unsigned long probe_existing_entries(struct pci_pbm_info *pbm, - struct iommu *iommu) + struct iommu_table *iommu) { - struct iommu_arena *arena = &iommu->arena; - unsigned long i, cnt = 0; + struct iommu_pool *pool; + unsigned long i, pool_nr, cnt = 0; u32 devhandle; devhandle = pbm->devhandle; - for (i = 0; i < arena->limit; i++) { - unsigned long ret, io_attrs, ra; - - ret = pci_sun4v_iommu_getmap(devhandle, - HV_PCI_TSBID(0, i), - &io_attrs, &ra); - if (ret == HV_EOK) { - if (page_in_phys_avail(ra)) { - pci_sun4v_iommu_demap(devhandle, - HV_PCI_TSBID(0, i), 1); - } else { - cnt++; - __set_bit(i, arena->map); + for (pool_nr = 0; pool_nr < iommu->nr_pools; pool_nr++) { + pool = &(iommu->arena_pool[pool_nr]); + for (i = pool->start; i <= pool->end; i++) { + unsigned long ret, io_attrs, ra; + + ret = pci_sun4v_iommu_getmap(devhandle, + HV_PCI_TSBID(0, i), + &io_attrs, &ra); + if (ret == HV_EOK) { + if (page_in_phys_avail(ra)) { + pci_sun4v_iommu_demap(devhandle, + HV_PCI_TSBID(0, + i), 1); + } else { + cnt++; + __set_bit(i, iommu->map); + } } } } - return cnt; } @@ -601,22 +587,22 @@ static int pci_sun4v_iommu_init(struct pci_pbm_info *pbm) dma_offset = vdma[0]; /* Setup initial software IOMMU state. */ - spin_lock_init(&iommu->lock); iommu->ctx_lowest_free = 1; - iommu->page_table_map_base = dma_offset; + iommu->tbl.page_table_map_base = dma_offset; iommu->dma_addr_mask = dma_mask; /* Allocate and initialize the free area map. */ sz = (num_tsb_entries + 7) / 8; sz = (sz + 7UL) & ~7UL; - iommu->arena.map = kzalloc(sz, GFP_KERNEL); - if (!iommu->arena.map) { + iommu->tbl.map = kzalloc(sz, GFP_KERNEL); + if (!iommu->tbl.map) { printk(KERN_ERR PFX "Error, kmalloc(arena.map) failed.\n"); return -ENOMEM; } - iommu->arena.limit = num_tsb_entries; - - sz = probe_existing_entries(pbm, iommu); + iommu_tbl_pool_init(&iommu->tbl, num_tsb_entries, IO_PAGE_SHIFT, + &dma_4v_iommu_ops, false /* no large_pool */, + 0 /* default npools */); + sz = probe_existing_entries(pbm, &iommu->tbl); if (sz) printk("%s: Imported %lu TSB entries from OBP\n", pbm->name, sz); @@ -1015,8 +1001,17 @@ static struct platform_driver pci_sun4v_driver = { .probe = pci_sun4v_probe, }; +static void setup_iommu_pool_hash(void) +{ + unsigned int i; + + for_each_possible_cpu(i) + per_cpu(iommu_pool_hash, i) = hash_32(i, IOMMU_POOL_HASHBITS); +} + static int __init pci_sun4v_init(void) { + setup_iommu_pool_hash(); return platform_driver_register(&pci_sun4v_driver); } diff --git a/include/linux/iommu-common.h b/include/linux/iommu-common.h new file mode 100644 index 000000000000..6be5c863f329 --- /dev/null +++ b/include/linux/iommu-common.h @@ -0,0 +1,55 @@ +#ifndef _LINUX_IOMMU_COMMON_H +#define _LINUX_IOMMU_COMMON_H + +#include <linux/spinlock_types.h> +#include <linux/device.h> +#include <asm/page.h> + +#define IOMMU_POOL_HASHBITS 4 +#define IOMMU_NR_POOLS (1 << IOMMU_POOL_HASHBITS) + +struct iommu_pool { + unsigned long start; + unsigned long end; + unsigned long hint; + spinlock_t lock; +}; + +struct iommu_table; + +struct iommu_tbl_ops { + unsigned long (*cookie_to_index)(u64, void *); + void (*demap)(void *, unsigned long, unsigned long); + void (*reset)(struct iommu_table *); +}; + +struct iommu_table { + unsigned long page_table_map_base; + unsigned long page_table_shift; + unsigned long nr_pools; + const struct iommu_tbl_ops *iommu_tbl_ops; + unsigned long poolsize; + struct iommu_pool arena_pool[IOMMU_NR_POOLS]; + u32 flags; +#define IOMMU_HAS_LARGE_POOL 0x00000001 + struct iommu_pool large_pool; + unsigned long *map; +}; + +extern void iommu_tbl_pool_init(struct iommu_table *iommu, + unsigned long num_entries, + u32 page_table_shift, + const struct iommu_tbl_ops *iommu_tbl_ops, + bool large_pool, u32 npools); + +extern unsigned long iommu_tbl_range_alloc(struct device *dev, + struct iommu_table *iommu, + unsigned long npages, + unsigned long *handle, + unsigned int pool_hash); + +extern void iommu_tbl_range_free(struct iommu_table *iommu, + u64 dma_addr, unsigned long npages, + bool do_demap, void *demap_arg); + +#endif diff --git a/lib/Makefile b/lib/Makefile index 58f74d2dd396..60c22e65b793 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -106,7 +106,7 @@ obj-$(CONFIG_AUDIT_GENERIC) += audit.o obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o obj-$(CONFIG_SWIOTLB) += swiotlb.o -obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o +obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o iommu-common.o obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o diff --git a/lib/iommu-common.c b/lib/iommu-common.c new file mode 100644 index 000000000000..7583f9b7846b --- /dev/null +++ b/lib/iommu-common.c @@ -0,0 +1,220 @@ +/* + * IOMMU mmap management and range allocation functions. + * Based almost entirely upon the powerpc iommu allocator. + */ + +#include <linux/export.h> +#include <linux/bitmap.h> +#include <linux/bug.h> +#include <linux/iommu-helper.h> +#include <linux/iommu-common.h> +#include <linux/dma-mapping.h> + +#define IOMMU_LARGE_ALLOC 15 + +/* + * Initialize iommu_pool entries for the iommu_table. `num_entries' + * is the number of table entries. If `large_pool' is set to true, + * the top 1/4 of the table will be set aside for pool allocations + * of more than IOMMU_LARGE_ALLOC pages. + */ +extern void iommu_tbl_pool_init(struct iommu_table *iommu, + unsigned long num_entries, + u32 page_table_shift, + const struct iommu_tbl_ops *iommu_tbl_ops, + bool large_pool, u32 npools) +{ + unsigned int start, i; + struct iommu_pool *p = &(iommu->large_pool); + + if (npools == 0) + iommu->nr_pools = IOMMU_NR_POOLS; + else + iommu->nr_pools = npools; + BUG_ON(npools > IOMMU_NR_POOLS); + + iommu->page_table_shift = page_table_shift; + iommu->iommu_tbl_ops = iommu_tbl_ops; + start = 0; + if (large_pool) + iommu->flags |= IOMMU_HAS_LARGE_POOL; + + if (!large_pool) + iommu->poolsize = num_entries/iommu->nr_pools; + else + iommu->poolsize = (num_entries * 3 / 4)/iommu->nr_pools; + for (i = 0; i < iommu->nr_pools; i++) { + spin_lock_init(&(iommu->arena_pool[i].lock)); + iommu->arena_pool[i].start = start; + iommu->arena_pool[i].hint = start; + start += iommu->poolsize; /* start for next pool */ + iommu->arena_pool[i].end = start - 1; + } + if (!large_pool) + return; + /* initialize large_pool */ + spin_lock_init(&(p->lock)); + p->start = start; + p->hint = p->start; + p->end = num_entries; +} +EXPORT_SYMBOL(iommu_tbl_pool_init); + +unsigned long iommu_tbl_range_alloc(struct device *dev, + struct iommu_table *iommu, + unsigned long npages, + unsigned long *handle, + unsigned int pool_hash) +{ + unsigned long n, end, start, limit, boundary_size; + struct iommu_pool *arena; + int pass = 0; + unsigned int pool_nr; + unsigned int npools = iommu->nr_pools; + unsigned long flags; + bool large_pool = ((iommu->flags & IOMMU_HAS_LARGE_POOL) != 0); + bool largealloc = (large_pool && npages > IOMMU_LARGE_ALLOC); + unsigned long shift; + + /* Sanity check */ + if (unlikely(npages == 0)) { + printk_ratelimited("npages == 0\n"); + return DMA_ERROR_CODE; + } + + if (largealloc) { + arena = &(iommu->large_pool); + spin_lock_irqsave(&arena->lock, flags); + pool_nr = 0; /* to keep compiler happy */ + } else { + /* pick out pool_nr */ + pool_nr = pool_hash & (npools - 1); + arena = &(iommu->arena_pool[pool_nr]); + + /* find first available unlocked pool */ + while (!spin_trylock_irqsave(&(arena->lock), flags)) { + pool_nr = (pool_nr + 1) & (iommu->nr_pools - 1); + arena = &(iommu->arena_pool[pool_nr]); + } + } + + again: + if (pass == 0 && handle && *handle && + (*handle >= arena->start) && (*handle < arena->end)) + start = *handle; + else + start = arena->hint; + + limit = arena->end; + + /* The case below can happen if we have a small segment appended + * to a large, or when the previous alloc was at the very end of + * the available space. If so, go back to the beginning and flush. + */ + if (start >= limit) { + start = arena->start; + if (iommu->iommu_tbl_ops->reset != NULL) + iommu->iommu_tbl_ops->reset(iommu); + } + + if (dev) + boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, + 1 << iommu->page_table_shift); + else + boundary_size = ALIGN(1UL << 32, 1 << iommu->page_table_shift); + + shift = iommu->page_table_map_base >> iommu->page_table_shift; + boundary_size = boundary_size >> iommu->page_table_shift; + /* + * if the iommu has a non-trivial cookie <-> index mapping, we set + * things up so that iommu_is_span_boundary() merely checks if the + * (index + npages) < num_tsb_entries + */ + if (iommu->iommu_tbl_ops->cookie_to_index != NULL) { + shift = 0; + boundary_size = iommu->poolsize * iommu->nr_pools; + } + n = iommu_area_alloc(iommu->map, limit, start, npages, shift, + boundary_size, 0); + if (n == -1) { + if (likely(pass == 0)) { + /* First failure, rescan from the beginning. */ + arena->hint = arena->start; + if (iommu->iommu_tbl_ops->reset != NULL) + iommu->iommu_tbl_ops->reset(iommu); + pass++; + goto again; + } else if (!largealloc && pass <= iommu->nr_pools) { + spin_unlock(&(arena->lock)); + pool_nr = (pool_nr + 1) & (iommu->nr_pools - 1); + arena = &(iommu->arena_pool[pool_nr]); + while (!spin_trylock(&(arena->lock))) { + pool_nr = (pool_nr + 1) & (iommu->nr_pools - 1); + arena = &(iommu->arena_pool[pool_nr]); + } + arena->hint = arena->start; + pass++; + goto again; + } else { + /* give up */ + spin_unlock_irqrestore(&(arena->lock), flags); + return DMA_ERROR_CODE; + } + } + + end = n + npages; + + arena->hint = end; + + /* Update handle for SG allocations */ + if (handle) + *handle = end; + spin_unlock_irqrestore(&(arena->lock), flags); + + return n; +} +EXPORT_SYMBOL(iommu_tbl_range_alloc); + +static struct iommu_pool *get_pool(struct iommu_table *tbl, + unsigned long entry) +{ + struct iommu_pool *p; + unsigned long largepool_start = tbl->large_pool.start; + bool large_pool = ((tbl->flags & IOMMU_HAS_LARGE_POOL) != 0); + + /* The large pool is the last pool at the top of the table */ + if (large_pool && entry >= largepool_start) { + p = &tbl->large_pool; + } else { + unsigned int pool_nr = entry / tbl->poolsize; + + BUG_ON(pool_nr >= tbl->nr_pools); + p = &tbl->arena_pool[pool_nr]; + } + return p; +} + +void iommu_tbl_range_free(struct iommu_table *iommu, u64 dma_addr, + unsigned long npages, bool do_demap, void *demap_arg) +{ + unsigned long entry; + struct iommu_pool *pool; + unsigned long flags; + unsigned long shift = iommu->page_table_shift; + + if (iommu->iommu_tbl_ops->cookie_to_index != NULL) { + entry = (*iommu->iommu_tbl_ops->cookie_to_index)(dma_addr, + demap_arg); + } else { + entry = (dma_addr - iommu->page_table_map_base) >> shift; + } + pool = get_pool(iommu, entry); + + spin_lock_irqsave(&(pool->lock), flags); + if (do_demap && iommu->iommu_tbl_ops->demap != NULL) + (*iommu->iommu_tbl_ops->demap)(demap_arg, entry, npages); + + bitmap_clear(iommu->map, entry, npages); + spin_unlock_irqrestore(&(pool->lock), flags); +} +EXPORT_SYMBOL(iommu_tbl_range_free); |