From 06e6295bcecefea9dc29fc84b5fd6848061365a0 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Tue, 15 Oct 2013 15:47:14 +0000 Subject: arm: make SWIOTLB available IOMMU_HELPER is needed because SWIOTLB calls iommu_is_span_boundary, provided by lib/iommu_helper.c. Signed-off-by: Stefano Stabellini Reviewed-by: Konrad Rzeszutek Wilk CC: will.deacon@arm.com CC: linux@arm.linux.org.uk Changes in v8: - use __phys_to_pfn and __pfn_to_phys. Changes in v7: - dma_mark_clean: empty implementation; - in dma_capable use coherent_dma_mask if dma_mask hasn't been allocated. Changes in v6: - check for dev->dma_mask being NULL in dma_capable. Changes in v5: - implement dma_mark_clean using dmac_flush_range. Changes in v3: - dma_capable: do not treat dma_mask as a limit; - remove SWIOTLB dependency on NEED_SG_DMA_LENGTH. --- arch/arm/Kconfig | 6 ++++++ arch/arm/include/asm/dma-mapping.h | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 1ad6fb6c094d..b08374f8fe3b 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1872,6 +1872,12 @@ config CC_STACKPROTECTOR neutralized via a kernel panic. This feature requires gcc version 4.2 or above. +config SWIOTLB + def_bool y + +config IOMMU_HELPER + def_bool SWIOTLB + config XEN_DOM0 def_bool y depends on XEN diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index 5b579b951503..01b5a3d92197 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -10,6 +10,7 @@ #include #include +#include #define DMA_ERROR_CODE (~0) extern struct dma_map_ops arm_dma_ops; @@ -86,6 +87,42 @@ static inline dma_addr_t virt_to_dma(struct device *dev, void *addr) } #endif +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + unsigned int offset = paddr & ~PAGE_MASK; + return pfn_to_dma(dev, __phys_to_pfn(paddr)) + offset; +} + +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr) +{ + unsigned int offset = dev_addr & ~PAGE_MASK; + return __pfn_to_phys(dma_to_pfn(dev, dev_addr)) + offset; +} + +static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) +{ + u64 limit, mask; + + if (dev->dma_mask) + mask = *dev->dma_mask; + else + mask = dev->coherent_dma_mask; + + if (mask == 0) + return 0; + + limit = (mask + 1) & ~mask; + if (limit && size > limit) + return 0; + + if ((addr | (addr + size - 1)) & ~mask) + return 0; + + return 1; +} + +static inline void dma_mark_clean(void *addr, size_t size) { } + /* * DMA errors are defined by all-bits-set in the DMA address. */ -- cgit v1.2.3 From 25b719d7b45947a79d298414cdfb5ec8fadf0ec8 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Tue, 15 Oct 2013 15:49:03 +0000 Subject: arm64: define DMA_ERROR_CODE Signed-off-by: Stefano Stabellini Acked-by: Catalin Marinas CC: will.deacon@arm.com Changes in v8: - cast to dma_addr_t before returning. --- arch/arm64/include/asm/dma-mapping.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index 8d1810001aef..59206b178494 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -25,6 +25,7 @@ #define ARCH_HAS_DMA_GET_REQUIRED_MASK +#define DMA_ERROR_CODE (~(dma_addr_t)0) extern struct dma_map_ops *dma_ops; static inline struct dma_map_ops *get_dma_ops(struct device *dev) -- cgit v1.2.3 From 4a19138c6505e224d9f4cc2fe9ada9188d7100ea Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 17 Oct 2013 16:22:27 +0000 Subject: arm/xen,arm64/xen: introduce p2m Introduce physical to machine and machine to physical tracking mechanisms based on rbtrees for arm/xen and arm64/xen. We need it because any guests on ARM are an autotranslate guests, therefore a physical address is potentially different from a machine address. When programming a device to do DMA, we need to be extra-careful to use machine addresses rather than physical addresses to program the device. Therefore we need to know the physical to machine mappings. For the moment we assume that dom0 starts with a 1:1 physical to machine mapping, in other words physical addresses correspond to machine addresses. However when mapping a foreign grant reference, obviously the 1:1 model doesn't work anymore. So at the very least we need to be able to track grant mappings. We need locking to protect accesses to the two trees. Signed-off-by: Stefano Stabellini Changes in v8: - move pfn_to_mfn and mfn_to_pfn to page.h as static inline functions; - no need to walk the tree if phys_to_mach.rb_node is NULL; - correctly handle multipage p2m entries; - substitute the spin_lock with a rwlock. --- arch/arm/include/asm/xen/page.h | 49 ++++++++-- arch/arm/xen/Makefile | 2 +- arch/arm/xen/p2m.c | 208 ++++++++++++++++++++++++++++++++++++++++ arch/arm64/xen/Makefile | 2 +- 4 files changed, 252 insertions(+), 9 deletions(-) create mode 100644 arch/arm/xen/p2m.c diff --git a/arch/arm/include/asm/xen/page.h b/arch/arm/include/asm/xen/page.h index 359a7b50b158..d1b5dd566472 100644 --- a/arch/arm/include/asm/xen/page.h +++ b/arch/arm/include/asm/xen/page.h @@ -7,11 +7,10 @@ #include #include +#include #include -#define pfn_to_mfn(pfn) (pfn) #define phys_to_machine_mapping_valid(pfn) (1) -#define mfn_to_pfn(mfn) (mfn) #define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT)) #define pte_mfn pte_pfn @@ -32,6 +31,44 @@ typedef struct xpaddr { #define INVALID_P2M_ENTRY (~0UL) +unsigned long __pfn_to_mfn(unsigned long pfn); +unsigned long __mfn_to_pfn(unsigned long mfn); +extern struct rb_root phys_to_mach; + +static inline unsigned long pfn_to_mfn(unsigned long pfn) +{ + unsigned long mfn; + + if (phys_to_mach.rb_node != NULL) { + mfn = __pfn_to_mfn(pfn); + if (mfn != INVALID_P2M_ENTRY) + return mfn; + } + + if (xen_initial_domain()) + return pfn; + else + return INVALID_P2M_ENTRY; +} + +static inline unsigned long mfn_to_pfn(unsigned long mfn) +{ + unsigned long pfn; + + if (phys_to_mach.rb_node != NULL) { + pfn = __mfn_to_pfn(mfn); + if (pfn != INVALID_P2M_ENTRY) + return pfn; + } + + if (xen_initial_domain()) + return mfn; + else + return INVALID_P2M_ENTRY; +} + +#define mfn_to_local_pfn(mfn) mfn_to_pfn(mfn) + static inline xmaddr_t phys_to_machine(xpaddr_t phys) { unsigned offset = phys.paddr & ~PAGE_MASK; @@ -76,11 +113,9 @@ static inline int m2p_remove_override(struct page *page, bool clear_pte) return 0; } -static inline bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) -{ - BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); - return true; -} +bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); +bool __set_phys_to_machine_multi(unsigned long pfn, unsigned long mfn, + unsigned long nr_pages); static inline bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) { diff --git a/arch/arm/xen/Makefile b/arch/arm/xen/Makefile index 43841033afd3..21e6ff503178 100644 --- a/arch/arm/xen/Makefile +++ b/arch/arm/xen/Makefile @@ -1 +1 @@ -obj-y := enlighten.o hypercall.o grant-table.o +obj-y := enlighten.o hypercall.o grant-table.o p2m.o diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c new file mode 100644 index 000000000000..5df4a9afb8c6 --- /dev/null +++ b/arch/arm/xen/p2m.c @@ -0,0 +1,208 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +struct xen_p2m_entry { + unsigned long pfn; + unsigned long mfn; + unsigned long nr_pages; + struct rb_node rbnode_mach; + struct rb_node rbnode_phys; +}; + +rwlock_t p2m_lock; +struct rb_root phys_to_mach = RB_ROOT; +static struct rb_root mach_to_phys = RB_ROOT; + +static int xen_add_phys_to_mach_entry(struct xen_p2m_entry *new) +{ + struct rb_node **link = &phys_to_mach.rb_node; + struct rb_node *parent = NULL; + struct xen_p2m_entry *entry; + int rc = 0; + + while (*link) { + parent = *link; + entry = rb_entry(parent, struct xen_p2m_entry, rbnode_phys); + + if (new->mfn == entry->mfn) + goto err_out; + if (new->pfn == entry->pfn) + goto err_out; + + if (new->pfn < entry->pfn) + link = &(*link)->rb_left; + else + link = &(*link)->rb_right; + } + rb_link_node(&new->rbnode_phys, parent, link); + rb_insert_color(&new->rbnode_phys, &phys_to_mach); + goto out; + +err_out: + rc = -EINVAL; + pr_warn("%s: cannot add pfn=%pa -> mfn=%pa: pfn=%pa -> mfn=%pa already exists\n", + __func__, &new->pfn, &new->mfn, &entry->pfn, &entry->mfn); +out: + return rc; +} + +unsigned long __pfn_to_mfn(unsigned long pfn) +{ + struct rb_node *n = phys_to_mach.rb_node; + struct xen_p2m_entry *entry; + unsigned long irqflags; + + read_lock_irqsave(&p2m_lock, irqflags); + while (n) { + entry = rb_entry(n, struct xen_p2m_entry, rbnode_phys); + if (entry->pfn <= pfn && + entry->pfn + entry->nr_pages > pfn) { + read_unlock_irqrestore(&p2m_lock, irqflags); + return entry->mfn + (pfn - entry->pfn); + } + if (pfn < entry->pfn) + n = n->rb_left; + else + n = n->rb_right; + } + read_unlock_irqrestore(&p2m_lock, irqflags); + + return INVALID_P2M_ENTRY; +} +EXPORT_SYMBOL_GPL(__pfn_to_mfn); + +static int xen_add_mach_to_phys_entry(struct xen_p2m_entry *new) +{ + struct rb_node **link = &mach_to_phys.rb_node; + struct rb_node *parent = NULL; + struct xen_p2m_entry *entry; + int rc = 0; + + while (*link) { + parent = *link; + entry = rb_entry(parent, struct xen_p2m_entry, rbnode_mach); + + if (new->mfn == entry->mfn) + goto err_out; + if (new->pfn == entry->pfn) + goto err_out; + + if (new->mfn < entry->mfn) + link = &(*link)->rb_left; + else + link = &(*link)->rb_right; + } + rb_link_node(&new->rbnode_mach, parent, link); + rb_insert_color(&new->rbnode_mach, &mach_to_phys); + goto out; + +err_out: + rc = -EINVAL; + pr_warn("%s: cannot add pfn=%pa -> mfn=%pa: pfn=%pa -> mfn=%pa already exists\n", + __func__, &new->pfn, &new->mfn, &entry->pfn, &entry->mfn); +out: + return rc; +} + +unsigned long __mfn_to_pfn(unsigned long mfn) +{ + struct rb_node *n = mach_to_phys.rb_node; + struct xen_p2m_entry *entry; + unsigned long irqflags; + + read_lock_irqsave(&p2m_lock, irqflags); + while (n) { + entry = rb_entry(n, struct xen_p2m_entry, rbnode_mach); + if (entry->mfn <= mfn && + entry->mfn + entry->nr_pages > mfn) { + read_unlock_irqrestore(&p2m_lock, irqflags); + return entry->pfn + (mfn - entry->mfn); + } + if (mfn < entry->mfn) + n = n->rb_left; + else + n = n->rb_right; + } + read_unlock_irqrestore(&p2m_lock, irqflags); + + return INVALID_P2M_ENTRY; +} +EXPORT_SYMBOL_GPL(__mfn_to_pfn); + +bool __set_phys_to_machine_multi(unsigned long pfn, + unsigned long mfn, unsigned long nr_pages) +{ + int rc; + unsigned long irqflags; + struct xen_p2m_entry *p2m_entry; + struct rb_node *n = phys_to_mach.rb_node; + + if (mfn == INVALID_P2M_ENTRY) { + write_lock_irqsave(&p2m_lock, irqflags); + while (n) { + p2m_entry = rb_entry(n, struct xen_p2m_entry, rbnode_phys); + if (p2m_entry->pfn <= pfn && + p2m_entry->pfn + p2m_entry->nr_pages > pfn) { + rb_erase(&p2m_entry->rbnode_mach, &mach_to_phys); + rb_erase(&p2m_entry->rbnode_phys, &phys_to_mach); + write_unlock_irqrestore(&p2m_lock, irqflags); + kfree(p2m_entry); + return true; + } + if (pfn < p2m_entry->pfn) + n = n->rb_left; + else + n = n->rb_right; + } + write_unlock_irqrestore(&p2m_lock, irqflags); + return true; + } + + p2m_entry = kzalloc(sizeof(struct xen_p2m_entry), GFP_NOWAIT); + if (!p2m_entry) { + pr_warn("cannot allocate xen_p2m_entry\n"); + return false; + } + p2m_entry->pfn = pfn; + p2m_entry->nr_pages = nr_pages; + p2m_entry->mfn = mfn; + + write_lock_irqsave(&p2m_lock, irqflags); + if ((rc = xen_add_phys_to_mach_entry(p2m_entry) < 0) || + (rc = xen_add_mach_to_phys_entry(p2m_entry) < 0)) { + write_unlock_irqrestore(&p2m_lock, irqflags); + return false; + } + write_unlock_irqrestore(&p2m_lock, irqflags); + return true; +} +EXPORT_SYMBOL_GPL(__set_phys_to_machine_multi); + +bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) +{ + return __set_phys_to_machine_multi(pfn, mfn, 1); +} +EXPORT_SYMBOL_GPL(__set_phys_to_machine); + +int p2m_init(void) +{ + rwlock_init(&p2m_lock); + return 0; +} +arch_initcall(p2m_init); diff --git a/arch/arm64/xen/Makefile b/arch/arm64/xen/Makefile index be240404ba96..cd866b0c619b 100644 --- a/arch/arm64/xen/Makefile +++ b/arch/arm64/xen/Makefile @@ -1,2 +1,2 @@ -xen-arm-y += $(addprefix ../../arm/xen/, enlighten.o grant-table.o) +xen-arm-y += $(addprefix ../../arm/xen/, enlighten.o grant-table.o p2m.o) obj-y := xen-arm.o hypercall.o -- cgit v1.2.3 From 2f558d40911c1b8f929b8a382833ae1da5df3293 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 9 Oct 2013 20:39:01 +0000 Subject: xen/x86: allow __set_phys_to_machine for autotranslate guests Allow __set_phys_to_machine to be called for autotranslate guests. It can be used to keep track of phys_to_machine changes, however we don't do anything with the information at the moment. Signed-off-by: Stefano Stabellini --- arch/x86/xen/p2m.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index a61c7d5811be..2ae8699e8767 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -799,10 +799,10 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) { unsigned topidx, mididx, idx; - if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { - BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); + /* don't track P2M changes in autotranslate guests */ + if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) return true; - } + if (unlikely(pfn >= MAX_P2M_PFN)) { BUG_ON(mfn != INVALID_P2M_ENTRY); return true; -- cgit v1.2.3 From 69908907b02efee31377af0cefbcd5a3ba66334a Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 9 Oct 2013 16:56:32 +0000 Subject: xen: make xen_create_contiguous_region return the dma address Modify xen_create_contiguous_region to return the dma address of the newly contiguous buffer. Signed-off-by: Stefano Stabellini Acked-by: Konrad Rzeszutek Wilk Reviewed-by: David Vrabel Changes in v4: - use virt_to_machine instead of virt_to_bus. --- arch/x86/xen/mmu.c | 4 +++- drivers/xen/swiotlb-xen.c | 6 +++--- include/xen/xen-ops.h | 3 ++- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index fdc3ba28ca38..6c34d7c03d5b 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2329,7 +2329,8 @@ static int xen_exchange_memory(unsigned long extents_in, unsigned int order_in, } int xen_create_contiguous_region(unsigned long vstart, unsigned int order, - unsigned int address_bits) + unsigned int address_bits, + dma_addr_t *dma_handle) { unsigned long *in_frames = discontig_frames, out_frame; unsigned long flags; @@ -2368,6 +2369,7 @@ int xen_create_contiguous_region(unsigned long vstart, unsigned int order, spin_unlock_irqrestore(&xen_reservation_lock, flags); + *dma_handle = virt_to_machine(vstart).maddr; return success ? 0 : -ENOMEM; } EXPORT_SYMBOL_GPL(xen_create_contiguous_region); diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 1b2277c311d2..b72f31c1e018 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -126,6 +126,7 @@ xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs) { int i, rc; int dma_bits; + dma_addr_t dma_handle; dma_bits = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT) + PAGE_SHIFT; @@ -137,7 +138,7 @@ xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs) rc = xen_create_contiguous_region( (unsigned long)buf + (i << IO_TLB_SHIFT), get_order(slabs << IO_TLB_SHIFT), - dma_bits); + dma_bits, &dma_handle); } while (rc && dma_bits++ < max_dma_bits); if (rc) return rc; @@ -294,11 +295,10 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, *dma_handle = dev_addr; else { if (xen_create_contiguous_region(vstart, order, - fls64(dma_mask)) != 0) { + fls64(dma_mask), dma_handle) != 0) { free_pages(vstart, order); return NULL; } - *dma_handle = virt_to_machine(ret).maddr; } memset(ret, 0, size); return ret; diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index d6fe062cad6b..9ef704d3a9dd 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -20,7 +20,8 @@ int xen_setup_shutdown_event(void); extern unsigned long *xen_contiguous_bitmap; int xen_create_contiguous_region(unsigned long vstart, unsigned int order, - unsigned int address_bits); + unsigned int address_bits, + dma_addr_t *dma_handle); void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order); -- cgit v1.2.3 From 83862ccfc0a03212fde43b4ac29c28381828768b Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 10 Oct 2013 13:40:44 +0000 Subject: xen/arm,arm64: enable SWIOTLB_XEN Xen on arm and arm64 needs SWIOTLB_XEN: when running on Xen we need to program the hardware with mfns rather than pfns for dma addresses. Remove SWIOTLB_XEN dependency on X86 and PCI and make XEN select SWIOTLB_XEN on arm and arm64. At the moment always rely on swiotlb-xen, but when Xen starts supporting hardware IOMMUs we'll be able to avoid it conditionally on the presence of an IOMMU on the platform. Implement xen_create_contiguous_region on arm and arm64: for the moment we assume that dom0 has been mapped 1:1 (physical addresses == machine addresses) therefore we don't need to call XENMEM_exchange. Simply return the physical address as dma address. Initialize the xen-swiotlb from xen_early_init (before the native dma_ops are initialized), set xen_dma_ops to &xen_swiotlb_dma_ops. Signed-off-by: Stefano Stabellini Changes in v8: - assume dom0 is mapped 1:1, no need to call XENMEM_exchange. Changes in v7: - call __set_phys_to_machine_multi from xen_create_contiguous_region and xen_destroy_contiguous_region to update the P2M; - don't call XENMEM_unpin, it has been removed; - call XENMEM_exchange instead of XENMEM_exchange_and_pin; - set nr_exchanged to 0 before calling the hypercall. Changes in v6: - introduce and export xen_dma_ops; - call xen_mm_init from as arch_initcall. Changes in v4: - remove redefinition of DMA_ERROR_CODE; - update the code to use XENMEM_exchange_and_pin and XENMEM_unpin; - add a note about hardware IOMMU in the commit message. Changes in v3: - code style changes; - warn on XENMEM_put_dma_buf failures. --- arch/arm/Kconfig | 1 + arch/arm/include/asm/xen/hypervisor.h | 2 ++ arch/arm/include/asm/xen/page.h | 1 + arch/arm/xen/Makefile | 2 +- arch/arm/xen/mm.c | 64 +++++++++++++++++++++++++++++++++++ arch/arm64/Kconfig | 1 + arch/arm64/xen/Makefile | 2 +- drivers/xen/Kconfig | 1 - drivers/xen/swiotlb-xen.c | 15 ++++++++ 9 files changed, 86 insertions(+), 3 deletions(-) create mode 100644 arch/arm/xen/mm.c diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index b08374f8fe3b..01f7013c85c7 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1888,6 +1888,7 @@ config XEN depends on CPU_V7 && !CPU_V6 depends on !GENERIC_ATOMIC64 select ARM_PSCI + select SWIOTLB_XEN help Say Y if you want to run Linux in a Virtual Machine on Xen on ARM. diff --git a/arch/arm/include/asm/xen/hypervisor.h b/arch/arm/include/asm/xen/hypervisor.h index d7ab99a0c9eb..1317ee40f4df 100644 --- a/arch/arm/include/asm/xen/hypervisor.h +++ b/arch/arm/include/asm/xen/hypervisor.h @@ -16,4 +16,6 @@ static inline enum paravirt_lazy_mode paravirt_get_lazy_mode(void) return PARAVIRT_LAZY_NONE; } +extern struct dma_map_ops *xen_dma_ops; + #endif /* _ASM_ARM_XEN_HYPERVISOR_H */ diff --git a/arch/arm/include/asm/xen/page.h b/arch/arm/include/asm/xen/page.h index d1b5dd566472..5d0e4c5dc711 100644 --- a/arch/arm/include/asm/xen/page.h +++ b/arch/arm/include/asm/xen/page.h @@ -6,6 +6,7 @@ #include #include +#include #include #include diff --git a/arch/arm/xen/Makefile b/arch/arm/xen/Makefile index 21e6ff503178..12969523414c 100644 --- a/arch/arm/xen/Makefile +++ b/arch/arm/xen/Makefile @@ -1 +1 @@ -obj-y := enlighten.o hypercall.o grant-table.o p2m.o +obj-y := enlighten.o hypercall.o grant-table.o p2m.o mm.o diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c new file mode 100644 index 000000000000..d56b8c6e4fbb --- /dev/null +++ b/arch/arm/xen/mm.c @@ -0,0 +1,64 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +int xen_create_contiguous_region(unsigned long vstart, unsigned int order, + unsigned int address_bits, + dma_addr_t *dma_handle) +{ + if (!xen_initial_domain()) + return -EINVAL; + + /* we assume that dom0 is mapped 1:1 for now */ + *dma_handle = virt_to_phys(pstart); + return 0; +} +EXPORT_SYMBOL_GPL(xen_create_contiguous_region); + +void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order) +{ + return; +} +EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region); + +struct dma_map_ops *xen_dma_ops; +EXPORT_SYMBOL_GPL(xen_dma_ops); + +static struct dma_map_ops xen_swiotlb_dma_ops = { + .mapping_error = xen_swiotlb_dma_mapping_error, + .alloc = xen_swiotlb_alloc_coherent, + .free = xen_swiotlb_free_coherent, + .sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu, + .sync_single_for_device = xen_swiotlb_sync_single_for_device, + .sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu, + .sync_sg_for_device = xen_swiotlb_sync_sg_for_device, + .map_sg = xen_swiotlb_map_sg_attrs, + .unmap_sg = xen_swiotlb_unmap_sg_attrs, + .map_page = xen_swiotlb_map_page, + .unmap_page = xen_swiotlb_unmap_page, + .dma_supported = xen_swiotlb_dma_supported, +}; + +int __init xen_mm_init(void) +{ + if (!xen_initial_domain()) + return 0; + xen_swiotlb_init(1, false); + xen_dma_ops = &xen_swiotlb_dma_ops; + return 0; +} +arch_initcall(xen_mm_init); diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c04454876bcb..04ffafb6fbe9 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -211,6 +211,7 @@ config XEN_DOM0 config XEN bool "Xen guest support on ARM64 (EXPERIMENTAL)" depends on ARM64 && OF + select SWIOTLB_XEN help Say Y if you want to run Linux in a Virtual Machine on Xen on ARM64. diff --git a/arch/arm64/xen/Makefile b/arch/arm64/xen/Makefile index cd866b0c619b..74a8d87e542b 100644 --- a/arch/arm64/xen/Makefile +++ b/arch/arm64/xen/Makefile @@ -1,2 +1,2 @@ -xen-arm-y += $(addprefix ../../arm/xen/, enlighten.o grant-table.o p2m.o) +xen-arm-y += $(addprefix ../../arm/xen/, enlighten.o grant-table.o p2m.o mm.o) obj-y := xen-arm.o hypercall.o diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 23eae5cb69c2..c794ea182140 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -140,7 +140,6 @@ config XEN_GRANT_DEV_ALLOC config SWIOTLB_XEN def_bool y - depends on PCI && X86 select SWIOTLB config XEN_TMEM diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index b72f31c1e018..f0fc1a4f565a 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -42,12 +42,27 @@ #include #include #include +#include /* * Used to do a quick range check in swiotlb_tbl_unmap_single and * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this * API. */ +#ifndef CONFIG_X86 +static unsigned long dma_alloc_coherent_mask(struct device *dev, + gfp_t gfp) +{ + unsigned long dma_mask = 0; + + dma_mask = dev->coherent_dma_mask; + if (!dma_mask) + dma_mask = (gfp & GFP_DMA) ? DMA_BIT_MASK(24) : DMA_BIT_MASK(32); + + return dma_mask; +} +#endif + static char *xen_io_tlb_start, *xen_io_tlb_end; static unsigned long xen_io_tlb_nslabs; /* -- cgit v1.2.3 From eb1ddc00b81cb8cb23891b6c7c5fbfaea29f3c73 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 9 Oct 2013 16:56:33 +0000 Subject: swiotlb-xen: introduce xen_swiotlb_set_dma_mask Implement xen_swiotlb_set_dma_mask, use it for set_dma_mask on arm. Signed-off-by: Stefano Stabellini --- arch/arm/xen/mm.c | 1 + drivers/xen/swiotlb-xen.c | 12 ++++++++++++ include/xen/swiotlb-xen.h | 2 ++ 3 files changed, 15 insertions(+) diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index d56b8c6e4fbb..0d69b874d249 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -51,6 +51,7 @@ static struct dma_map_ops xen_swiotlb_dma_ops = { .map_page = xen_swiotlb_map_page, .unmap_page = xen_swiotlb_unmap_page, .dma_supported = xen_swiotlb_dma_supported, + .set_dma_mask = xen_swiotlb_set_dma_mask, }; int __init xen_mm_init(void) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index f0fc1a4f565a..d8ef0bf577d2 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -608,3 +608,15 @@ xen_swiotlb_dma_supported(struct device *hwdev, u64 mask) return xen_virt_to_bus(xen_io_tlb_end - 1) <= mask; } EXPORT_SYMBOL_GPL(xen_swiotlb_dma_supported); + +int +xen_swiotlb_set_dma_mask(struct device *dev, u64 dma_mask) +{ + if (!dev->dma_mask || !xen_swiotlb_dma_supported(dev, dma_mask)) + return -EIO; + + *dev->dma_mask = dma_mask; + + return 0; +} +EXPORT_SYMBOL_GPL(xen_swiotlb_set_dma_mask); diff --git a/include/xen/swiotlb-xen.h b/include/xen/swiotlb-xen.h index de8bcc641c49..7b644650d968 100644 --- a/include/xen/swiotlb-xen.h +++ b/include/xen/swiotlb-xen.h @@ -55,4 +55,6 @@ xen_swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr); extern int xen_swiotlb_dma_supported(struct device *hwdev, u64 mask); +extern int +xen_swiotlb_set_dma_mask(struct device *dev, u64 dma_mask); #endif /* __LINUX_SWIOTLB_XEN_H */ -- cgit v1.2.3 From c7e9bc548325f19635e7ac7cd5f3ec587228952e Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Fri, 18 Oct 2013 16:01:26 +0000 Subject: arm/xen: get_dma_ops: return xen_dma_ops if we are running as xen_initial_domain We can't simply override arm_dma_ops with xen_dma_ops because devices are allowed to have their own dma_ops and they take precedence over arm_dma_ops. When running on Xen as initial domain, we always want xen_dma_ops to be the one in use. We introduce __generic_dma_ops to allow xen_dma_ops functions to call back to the native implementation. Signed-off-by: Stefano Stabellini Suggested-by: Catalin Marinas Acked-by: Russell King CC: will.deacon@arm.com CC: linux@arm.linux.org.uk Changes in v7: - return xen_dma_ops only if we are the initial domain; - rename __get_dma_ops to __generic_dma_ops. --- arch/arm/include/asm/dma-mapping.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index 01b5a3d92197..f5945d4e4e9f 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -12,17 +12,28 @@ #include #include +#include +#include + #define DMA_ERROR_CODE (~0) extern struct dma_map_ops arm_dma_ops; extern struct dma_map_ops arm_coherent_dma_ops; -static inline struct dma_map_ops *get_dma_ops(struct device *dev) +static inline struct dma_map_ops *__generic_dma_ops(struct device *dev) { if (dev && dev->archdata.dma_ops) return dev->archdata.dma_ops; return &arm_dma_ops; } +static inline struct dma_map_ops *get_dma_ops(struct device *dev) +{ + if (xen_initial_domain()) + return xen_dma_ops; + else + return __generic_dma_ops(dev); +} + static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops) { BUG_ON(!dev); -- cgit v1.2.3 From 58c8b26909f287c72a7a39d26f800ffe61e01bf3 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Fri, 18 Oct 2013 16:01:32 +0000 Subject: arm64/xen: get_dma_ops: return xen_dma_ops if we are running as xen_initial_domain We can't simply override arm_dma_ops with xen_dma_ops because devices are allowed to have their own dma_ops and they take precedence over arm_dma_ops. When running on Xen as initial domain, we always want xen_dma_ops to be the one in use. We introduce __generic_dma_ops to allow xen_dma_ops functions to call back to the native implementation. Signed-off-by: Stefano Stabellini Acked-by: Catalin Marinas CC: will.deacon@arm.com Changes in v7: - return xen_dma_ops only if we are the initial domain; - rename __get_dma_ops to __generic_dma_ops. --- arch/arm64/include/asm/dma-mapping.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index 59206b178494..fd0c0c0e447a 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -23,12 +23,15 @@ #include +#include +#include + #define ARCH_HAS_DMA_GET_REQUIRED_MASK #define DMA_ERROR_CODE (~(dma_addr_t)0) extern struct dma_map_ops *dma_ops; -static inline struct dma_map_ops *get_dma_ops(struct device *dev) +static inline struct dma_map_ops *__generic_dma_ops(struct device *dev) { if (unlikely(!dev) || !dev->archdata.dma_ops) return dma_ops; @@ -36,6 +39,14 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) return dev->archdata.dma_ops; } +static inline struct dma_map_ops *get_dma_ops(struct device *dev) +{ + if (xen_initial_domain()) + return xen_dma_ops; + else + return __generic_dma_ops(dev); +} + #include static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) -- cgit v1.2.3 From d6fe76c58c358498b91d21f0ca8054f6aa6e672d Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 9 Oct 2013 17:18:14 +0000 Subject: xen: introduce xen_alloc/free_coherent_pages xen_swiotlb_alloc_coherent needs to allocate a coherent buffer for cpu and devices. On native x86 is sufficient to call __get_free_pages in order to get a coherent buffer, while on ARM (and potentially ARM64) we need to call the native dma_ops->alloc implementation. Introduce xen_alloc_coherent_pages to abstract the arch specific buffer allocation. Similarly introduce xen_free_coherent_pages to free a coherent buffer: on x86 is simply a call to free_pages while on ARM and ARM64 is arm_dma_ops.free. Signed-off-by: Stefano Stabellini Changes in v7: - rename __get_dma_ops to __generic_dma_ops; - call __generic_dma_ops(hwdev)->alloc/free on arm64 too. Changes in v6: - call __get_dma_ops to get the native dma_ops pointer on arm. --- arch/arm/include/asm/xen/page-coherent.h | 22 ++++++++++++++++++++++ arch/arm64/include/asm/xen/page-coherent.h | 22 ++++++++++++++++++++++ arch/ia64/include/asm/xen/page-coherent.h | 24 ++++++++++++++++++++++++ arch/x86/include/asm/xen/page-coherent.h | 24 ++++++++++++++++++++++++ 4 files changed, 92 insertions(+) create mode 100644 arch/arm/include/asm/xen/page-coherent.h create mode 100644 arch/arm64/include/asm/xen/page-coherent.h create mode 100644 arch/ia64/include/asm/xen/page-coherent.h create mode 100644 arch/x86/include/asm/xen/page-coherent.h diff --git a/arch/arm/include/asm/xen/page-coherent.h b/arch/arm/include/asm/xen/page-coherent.h new file mode 100644 index 000000000000..c4d843c300c7 --- /dev/null +++ b/arch/arm/include/asm/xen/page-coherent.h @@ -0,0 +1,22 @@ +#ifndef _ASM_ARM_XEN_PAGE_COHERENT_H +#define _ASM_ARM_XEN_PAGE_COHERENT_H + +#include +#include +#include + +static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, gfp_t flags, + struct dma_attrs *attrs) +{ + return __generic_dma_ops(hwdev)->alloc(hwdev, size, dma_handle, flags, attrs); +} + +static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, + void *cpu_addr, dma_addr_t dma_handle, + struct dma_attrs *attrs) +{ + __generic_dma_ops(hwdev)->free(hwdev, size, cpu_addr, dma_handle, attrs); +} + +#endif /* _ASM_ARM_XEN_PAGE_COHERENT_H */ diff --git a/arch/arm64/include/asm/xen/page-coherent.h b/arch/arm64/include/asm/xen/page-coherent.h new file mode 100644 index 000000000000..3b4f029b6660 --- /dev/null +++ b/arch/arm64/include/asm/xen/page-coherent.h @@ -0,0 +1,22 @@ +#ifndef _ASM_ARM64_XEN_PAGE_COHERENT_H +#define _ASM_ARM64_XEN_PAGE_COHERENT_H + +#include +#include +#include + +static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, gfp_t flags, + struct dma_attrs *attrs) +{ + return __generic_dma_ops(hwdev)->alloc(hwdev, size, dma_handle, flags, attrs); +} + +static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, + void *cpu_addr, dma_addr_t dma_handle, + struct dma_attrs *attrs) +{ + __generic_dma_ops(hwdev)->free(hwdev, size, cpu_addr, dma_handle, attrs); +} + +#endif /* _ASM_ARM64_XEN_PAGE_COHERENT_H */ diff --git a/arch/ia64/include/asm/xen/page-coherent.h b/arch/ia64/include/asm/xen/page-coherent.h new file mode 100644 index 000000000000..37b929c117bf --- /dev/null +++ b/arch/ia64/include/asm/xen/page-coherent.h @@ -0,0 +1,24 @@ +#ifndef _ASM_IA64_XEN_PAGE_COHERENT_H +#define _ASM_IA64_XEN_PAGE_COHERENT_H + +#include +#include +#include + +static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, gfp_t flags, + struct dma_attrs *attrs) +{ + void *vstart = (void*)__get_free_pages(flags, get_order(size)); + *dma_handle = virt_to_phys(vstart); + return vstart; +} + +static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, + void *cpu_addr, dma_addr_t dma_handle, + struct dma_attrs *attrs) +{ + free_pages((unsigned long) cpu_addr, get_order(size)); +} + +#endif /* _ASM_IA64_XEN_PAGE_COHERENT_H */ diff --git a/arch/x86/include/asm/xen/page-coherent.h b/arch/x86/include/asm/xen/page-coherent.h new file mode 100644 index 000000000000..31de2e07918d --- /dev/null +++ b/arch/x86/include/asm/xen/page-coherent.h @@ -0,0 +1,24 @@ +#ifndef _ASM_X86_XEN_PAGE_COHERENT_H +#define _ASM_X86_XEN_PAGE_COHERENT_H + +#include +#include +#include + +static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, gfp_t flags, + struct dma_attrs *attrs) +{ + void *vstart = (void*)__get_free_pages(flags, get_order(size)); + *dma_handle = virt_to_phys(vstart); + return vstart; +} + +static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, + void *cpu_addr, dma_addr_t dma_handle, + struct dma_attrs *attrs) +{ + free_pages((unsigned long) cpu_addr, get_order(size)); +} + +#endif /* _ASM_X86_XEN_PAGE_COHERENT_H */ -- cgit v1.2.3 From 1b65c4e5a9af1a1c61e792e2d0ed481e0c1f21a9 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 10 Oct 2013 13:41:10 +0000 Subject: swiotlb-xen: use xen_alloc/free_coherent_pages Use xen_alloc_coherent_pages and xen_free_coherent_pages to allocate or free coherent pages. We need to be careful handling the pointer returned by xen_alloc_coherent_pages, because on ARM the pointer is not equal to phys_to_virt(*dma_handle). In fact virt_to_phys only works for kernel direct mapped RAM memory. In ARM case the pointer could be an ioremap address, therefore passing it to virt_to_phys would give you another physical address that doesn't correspond to it. Make xen_create_contiguous_region take a phys_addr_t as start parameter to avoid the virt_to_phys calls which would be incorrect. Changes in v6: - remove extra spaces. Signed-off-by: Stefano Stabellini Reviewed-by: Konrad Rzeszutek Wilk --- arch/arm/xen/mm.c | 6 +++--- arch/x86/xen/mmu.c | 7 +++++-- drivers/xen/swiotlb-xen.c | 31 +++++++++++++++++++++---------- include/xen/xen-ops.h | 4 ++-- 4 files changed, 31 insertions(+), 17 deletions(-) diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index 0d69b874d249..b0e77de99148 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -16,7 +16,7 @@ #include #include -int xen_create_contiguous_region(unsigned long vstart, unsigned int order, +int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, unsigned int address_bits, dma_addr_t *dma_handle) { @@ -24,12 +24,12 @@ int xen_create_contiguous_region(unsigned long vstart, unsigned int order, return -EINVAL; /* we assume that dom0 is mapped 1:1 for now */ - *dma_handle = virt_to_phys(pstart); + *dma_handle = pstart; return 0; } EXPORT_SYMBOL_GPL(xen_create_contiguous_region); -void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order) +void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) { return; } diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 6c34d7c03d5b..883088368ff0 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2328,13 +2328,14 @@ static int xen_exchange_memory(unsigned long extents_in, unsigned int order_in, return success; } -int xen_create_contiguous_region(unsigned long vstart, unsigned int order, +int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, unsigned int address_bits, dma_addr_t *dma_handle) { unsigned long *in_frames = discontig_frames, out_frame; unsigned long flags; int success; + unsigned long vstart = (unsigned long)phys_to_virt(pstart); /* * Currently an auto-translated guest will not perform I/O, nor will @@ -2374,11 +2375,12 @@ int xen_create_contiguous_region(unsigned long vstart, unsigned int order, } EXPORT_SYMBOL_GPL(xen_create_contiguous_region); -void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order) +void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) { unsigned long *out_frames = discontig_frames, in_frame; unsigned long flags; int success; + unsigned long vstart; if (xen_feature(XENFEAT_auto_translated_physmap)) return; @@ -2386,6 +2388,7 @@ void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order) if (unlikely(order > MAX_CONTIG_ORDER)) return; + vstart = (unsigned long)phys_to_virt(pstart); memset((void *) vstart, 0, PAGE_SIZE << order); spin_lock_irqsave(&xen_reservation_lock, flags); diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index d8ef0bf577d2..189b8db5c983 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -43,6 +43,7 @@ #include #include #include +#include /* * Used to do a quick range check in swiotlb_tbl_unmap_single and * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this @@ -142,6 +143,7 @@ xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs) int i, rc; int dma_bits; dma_addr_t dma_handle; + phys_addr_t p = virt_to_phys(buf); dma_bits = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT) + PAGE_SHIFT; @@ -151,7 +153,7 @@ xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs) do { rc = xen_create_contiguous_region( - (unsigned long)buf + (i << IO_TLB_SHIFT), + p + (i << IO_TLB_SHIFT), get_order(slabs << IO_TLB_SHIFT), dma_bits, &dma_handle); } while (rc && dma_bits++ < max_dma_bits); @@ -279,7 +281,6 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, void *ret; int order = get_order(size); u64 dma_mask = DMA_BIT_MASK(32); - unsigned long vstart; phys_addr_t phys; dma_addr_t dev_addr; @@ -294,8 +295,12 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, if (dma_alloc_from_coherent(hwdev, size, dma_handle, &ret)) return ret; - vstart = __get_free_pages(flags, order); - ret = (void *)vstart; + /* On ARM this function returns an ioremap'ped virtual address for + * which virt_to_phys doesn't return the corresponding physical + * address. In fact on ARM virt_to_phys only works for kernel direct + * mapped RAM memory. Also see comment below. + */ + ret = xen_alloc_coherent_pages(hwdev, size, dma_handle, flags, attrs); if (!ret) return ret; @@ -303,15 +308,19 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, if (hwdev && hwdev->coherent_dma_mask) dma_mask = dma_alloc_coherent_mask(hwdev, flags); - phys = virt_to_phys(ret); + /* At this point dma_handle is the physical address, next we are + * going to set it to the machine address. + * Do not use virt_to_phys(ret) because on ARM it doesn't correspond + * to *dma_handle. */ + phys = *dma_handle; dev_addr = xen_phys_to_bus(phys); if (((dev_addr + size - 1 <= dma_mask)) && !range_straddles_page_boundary(phys, size)) *dma_handle = dev_addr; else { - if (xen_create_contiguous_region(vstart, order, + if (xen_create_contiguous_region(phys, order, fls64(dma_mask), dma_handle) != 0) { - free_pages(vstart, order); + xen_free_coherent_pages(hwdev, size, ret, (dma_addr_t)phys, attrs); return NULL; } } @@ -334,13 +343,15 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, if (hwdev && hwdev->coherent_dma_mask) dma_mask = hwdev->coherent_dma_mask; - phys = virt_to_phys(vaddr); + /* do not use virt_to_phys because on ARM it doesn't return you the + * physical address */ + phys = xen_bus_to_phys(dev_addr); if (((dev_addr + size - 1 > dma_mask)) || range_straddles_page_boundary(phys, size)) - xen_destroy_contiguous_region((unsigned long)vaddr, order); + xen_destroy_contiguous_region(phys, order); - free_pages((unsigned long)vaddr, order); + xen_free_coherent_pages(hwdev, size, vaddr, (dma_addr_t)phys, attrs); } EXPORT_SYMBOL_GPL(xen_swiotlb_free_coherent); diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 9ef704d3a9dd..fb2ea8f26552 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -19,11 +19,11 @@ void xen_arch_resume(void); int xen_setup_shutdown_event(void); extern unsigned long *xen_contiguous_bitmap; -int xen_create_contiguous_region(unsigned long vstart, unsigned int order, +int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, unsigned int address_bits, dma_addr_t *dma_handle); -void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order); +void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order); struct vm_area_struct; int xen_remap_domain_mfn_range(struct vm_area_struct *vma, -- cgit v1.2.3 From 7100b077ab4ff5fb0ba7760ce54465f623a0a763 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Fri, 25 Oct 2013 10:39:49 +0000 Subject: xen: introduce xen_dma_map/unmap_page and xen_dma_sync_single_for_cpu/device Introduce xen_dma_map_page, xen_dma_unmap_page, xen_dma_sync_single_for_cpu and xen_dma_sync_single_for_device. They have empty implementations on x86 and ia64 but they call the corresponding platform dma_ops function on arm and arm64. Signed-off-by: Stefano Stabellini Changes in v9: - xen_dma_map_page return void, avoid page_to_phys. --- arch/arm/include/asm/xen/page-coherent.h | 28 ++++++++++++++++++++++++++++ arch/arm64/include/asm/xen/page-coherent.h | 25 +++++++++++++++++++++++++ arch/ia64/include/asm/xen/page-coherent.h | 14 ++++++++++++++ arch/x86/include/asm/xen/page-coherent.h | 14 ++++++++++++++ 4 files changed, 81 insertions(+) diff --git a/arch/arm/include/asm/xen/page-coherent.h b/arch/arm/include/asm/xen/page-coherent.h index c4d843c300c7..1109017499e5 100644 --- a/arch/arm/include/asm/xen/page-coherent.h +++ b/arch/arm/include/asm/xen/page-coherent.h @@ -19,4 +19,32 @@ static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, __generic_dma_ops(hwdev)->free(hwdev, size, cpu_addr, dma_handle, attrs); } +static inline void xen_dma_map_page(struct device *hwdev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + __generic_dma_ops(hwdev)->map_page(hwdev, page, offset, size, dir, attrs); +} + +static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + if (__generic_dma_ops(hwdev)->unmap_page) + __generic_dma_ops(hwdev)->unmap_page(hwdev, handle, size, dir, attrs); +} + +static inline void xen_dma_sync_single_for_cpu(struct device *hwdev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + if (__generic_dma_ops(hwdev)->sync_single_for_cpu) + __generic_dma_ops(hwdev)->sync_single_for_cpu(hwdev, handle, size, dir); +} + +static inline void xen_dma_sync_single_for_device(struct device *hwdev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + if (__generic_dma_ops(hwdev)->sync_single_for_device) + __generic_dma_ops(hwdev)->sync_single_for_device(hwdev, handle, size, dir); +} #endif /* _ASM_ARM_XEN_PAGE_COHERENT_H */ diff --git a/arch/arm64/include/asm/xen/page-coherent.h b/arch/arm64/include/asm/xen/page-coherent.h index 3b4f029b6660..2820f1a6eebe 100644 --- a/arch/arm64/include/asm/xen/page-coherent.h +++ b/arch/arm64/include/asm/xen/page-coherent.h @@ -19,4 +19,29 @@ static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, __generic_dma_ops(hwdev)->free(hwdev, size, cpu_addr, dma_handle, attrs); } +static inline void xen_dma_map_page(struct device *hwdev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + __generic_dma_ops(hwdev)->map_page(hwdev, page, offset, size, dir, attrs); +} + +static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + __generic_dma_ops(hwdev)->unmap_page(hwdev, handle, size, dir, attrs); +} + +static inline void xen_dma_sync_single_for_cpu(struct device *hwdev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + __generic_dma_ops(hwdev)->sync_single_for_cpu(hwdev, handle, size, dir); +} + +static inline void xen_dma_sync_single_for_device(struct device *hwdev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + __generic_dma_ops(hwdev)->sync_single_for_device(hwdev, handle, size, dir); +} #endif /* _ASM_ARM64_XEN_PAGE_COHERENT_H */ diff --git a/arch/ia64/include/asm/xen/page-coherent.h b/arch/ia64/include/asm/xen/page-coherent.h index 37b929c117bf..96e42f97fa1f 100644 --- a/arch/ia64/include/asm/xen/page-coherent.h +++ b/arch/ia64/include/asm/xen/page-coherent.h @@ -21,4 +21,18 @@ static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, free_pages((unsigned long) cpu_addr, get_order(size)); } +static inline void xen_dma_map_page(struct device *hwdev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) { } + +static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) { } + +static inline void xen_dma_sync_single_for_cpu(struct device *hwdev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) { } + +static inline void xen_dma_sync_single_for_device(struct device *hwdev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) { } + #endif /* _ASM_IA64_XEN_PAGE_COHERENT_H */ diff --git a/arch/x86/include/asm/xen/page-coherent.h b/arch/x86/include/asm/xen/page-coherent.h index 31de2e07918d..7f02fe4e2c7b 100644 --- a/arch/x86/include/asm/xen/page-coherent.h +++ b/arch/x86/include/asm/xen/page-coherent.h @@ -21,4 +21,18 @@ static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, free_pages((unsigned long) cpu_addr, get_order(size)); } +static inline void xen_dma_map_page(struct device *hwdev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) { } + +static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) { } + +static inline void xen_dma_sync_single_for_cpu(struct device *hwdev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) { } + +static inline void xen_dma_sync_single_for_device(struct device *hwdev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) { } + #endif /* _ASM_X86_XEN_PAGE_COHERENT_H */ -- cgit v1.2.3 From 6cf054636261ca5c88f3c2984058d51f927b8a2e Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Fri, 25 Oct 2013 10:33:25 +0000 Subject: swiotlb-xen: use xen_dma_map/unmap_page, xen_dma_sync_single_for_cpu/device Call xen_dma_map_page, xen_dma_unmap_page, xen_dma_sync_single_for_cpu, xen_dma_sync_single_for_device from swiotlb-xen to ensure cpu/device coherency of the pages used for DMA, including the ones belonging to the swiotlb buffer. Signed-off-by: Stefano Stabellini --- drivers/xen/swiotlb-xen.c | 39 +++++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 189b8db5c983..4221cb52387d 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -378,8 +378,13 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, * buffering it. */ if (dma_capable(dev, dev_addr, size) && - !range_straddles_page_boundary(phys, size) && !swiotlb_force) + !range_straddles_page_boundary(phys, size) && !swiotlb_force) { + /* we are not interested in the dma_addr returned by + * xen_dma_map_page, only in the potential cache flushes executed + * by the function. */ + xen_dma_map_page(dev, page, offset, size, dir, attrs); return dev_addr; + } /* * Oh well, have to allocate and map a bounce buffer. @@ -388,6 +393,8 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, if (map == SWIOTLB_MAP_ERROR) return DMA_ERROR_CODE; + xen_dma_map_page(dev, pfn_to_page(map >> PAGE_SHIFT), + map & ~PAGE_MASK, size, dir, attrs); dev_addr = xen_phys_to_bus(map); /* @@ -410,12 +417,15 @@ EXPORT_SYMBOL_GPL(xen_swiotlb_map_page); * whatever the device wrote there. */ static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir) + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) { phys_addr_t paddr = xen_bus_to_phys(dev_addr); BUG_ON(dir == DMA_NONE); + xen_dma_unmap_page(hwdev, paddr, size, dir, attrs); + /* NOTE: We use dev_addr here, not paddr! */ if (is_xen_swiotlb_buffer(dev_addr)) { swiotlb_tbl_unmap_single(hwdev, paddr, size, dir); @@ -438,7 +448,7 @@ void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { - xen_unmap_single(hwdev, dev_addr, size, dir); + xen_unmap_single(hwdev, dev_addr, size, dir, attrs); } EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_page); @@ -461,11 +471,15 @@ xen_swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, BUG_ON(dir == DMA_NONE); + if (target == SYNC_FOR_CPU) + xen_dma_sync_single_for_cpu(hwdev, paddr, size, dir); + /* NOTE: We use dev_addr here, not paddr! */ - if (is_xen_swiotlb_buffer(dev_addr)) { + if (is_xen_swiotlb_buffer(dev_addr)) swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target); - return; - } + + if (target == SYNC_FOR_DEVICE) + xen_dma_sync_single_for_cpu(hwdev, paddr, size, dir); if (dir != DMA_FROM_DEVICE) return; @@ -536,8 +550,17 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, return DMA_ERROR_CODE; } sg->dma_address = xen_phys_to_bus(map); - } else + } else { + /* we are not interested in the dma_addr returned by + * xen_dma_map_page, only in the potential cache flushes executed + * by the function. */ + xen_dma_map_page(hwdev, pfn_to_page(paddr >> PAGE_SHIFT), + paddr & ~PAGE_MASK, + sg->length, + dir, + attrs); sg->dma_address = dev_addr; + } sg_dma_len(sg) = sg->length; } return nelems; @@ -559,7 +582,7 @@ xen_swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, BUG_ON(dir == DMA_NONE); for_each_sg(sgl, sg, nelems, i) - xen_unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir); + xen_unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir, attrs); } EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_sg_attrs); -- cgit v1.2.3 From 783d0281043b9a1111d81d11ed0610b83d8857ed Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Fri, 25 Oct 2013 10:33:26 +0000 Subject: swiotlb: print a warning when the swiotlb is full Signed-off-by: Stefano Stabellini Changes in v7: - use dev_warn instead of pr_warn. --- drivers/xen/swiotlb-xen.c | 1 + lib/swiotlb.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 4221cb52387d..4d50058d9630 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -542,6 +542,7 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, sg->length, dir); if (map == SWIOTLB_MAP_ERROR) { + dev_warn(hwdev, "swiotlb buffer is full\n"); /* Don't panic here, we expect map_sg users to do proper error handling. */ xen_swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 4e8686c7e5a4..cdc051eaf667 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -502,6 +502,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, not_found: spin_unlock_irqrestore(&io_tlb_lock, flags); + dev_warn(hwdev, "swiotlb buffer is full\n"); return SWIOTLB_MAP_ERROR; found: spin_unlock_irqrestore(&io_tlb_lock, flags); -- cgit v1.2.3 From 3d1975b57097820c131c692d2e0d075641370369 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Fri, 25 Oct 2013 10:33:26 +0000 Subject: arm,arm64: do not always merge biovec if we are running on Xen This is similar to what it is done on X86: biovecs are prevented from merging otherwise every dma requests would be forced to bounce on the swiotlb buffer. Signed-off-by: Stefano Stabellini Acked-by: Catalin Marinas Changes in v7: - remove the extra autotranslate check in biomerge.c. --- arch/arm/include/asm/io.h | 8 ++++++++ arch/arm64/include/asm/io.h | 9 +++++++++ 2 files changed, 17 insertions(+) diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index d070741b2b37..c45effc18312 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -24,9 +24,11 @@ #ifdef __KERNEL__ #include +#include #include #include #include +#include /* * ISA I/O bus memory addresses are 1:1 with the physical address. @@ -372,6 +374,12 @@ extern void pci_iounmap(struct pci_dev *dev, void __iomem *addr); #define BIOVEC_MERGEABLE(vec1, vec2) \ ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) +extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, + const struct bio_vec *vec2); +#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ + (__BIOVEC_PHYS_MERGEABLE(vec1, vec2) && \ + (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2))) + #ifdef CONFIG_MMU #define ARCH_HAS_VALID_PHYS_ADDR_RANGE extern int valid_phys_addr_range(phys_addr_t addr, size_t size); diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 1d12f89140ba..c163287b9871 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -22,11 +22,14 @@ #ifdef __KERNEL__ #include +#include #include #include #include +#include + /* * Generic IO read/write. These perform native-endian accesses. */ @@ -263,5 +266,11 @@ extern int devmem_is_allowed(unsigned long pfn); */ #define xlate_dev_kmem_ptr(p) p +extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, + const struct bio_vec *vec2); +#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ + (__BIOVEC_PHYS_MERGEABLE(vec1, vec2) && \ + (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2))) + #endif /* __KERNEL__ */ #endif /* __ASM_IO_H */ -- cgit v1.2.3 From 3d24bbd7dddbea54358a9795abaf051b0f18973c Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Fri, 25 Oct 2013 10:41:44 +0000 Subject: grant-table: call set_phys_to_machine after mapping grant refs When mapping/unmapping grant refs, call set_phys_to_machine to update the P2M with the new mappings for autotranslate guests. This is (almost) a nop on x86. Signed-off-by: Stefano Stabellini Changes in v9: - add in-code comments. --- drivers/xen/grant-table.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index c4d2298893b1..62ccf5424ba8 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -898,8 +899,16 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, map_ops + i, &map_ops[i].status, __func__); - if (xen_feature(XENFEAT_auto_translated_physmap)) + /* this is basically a nop on x86 */ + if (xen_feature(XENFEAT_auto_translated_physmap)) { + for (i = 0; i < count; i++) { + if (map_ops[i].status) + continue; + set_phys_to_machine(map_ops[i].host_addr >> PAGE_SHIFT, + map_ops[i].dev_bus_addr >> PAGE_SHIFT); + } return ret; + } if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { arch_enter_lazy_mmu_mode(); @@ -942,8 +951,14 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, if (ret) return ret; - if (xen_feature(XENFEAT_auto_translated_physmap)) + /* this is basically a nop on x86 */ + if (xen_feature(XENFEAT_auto_translated_physmap)) { + for (i = 0; i < count; i++) { + set_phys_to_machine(unmap_ops[i].host_addr >> PAGE_SHIFT, + INVALID_P2M_ENTRY); + } return ret; + } if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { arch_enter_lazy_mmu_mode(); -- cgit v1.2.3 From 6b42a7eafb25ae9fda1708c501661384d1f60482 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Fri, 25 Oct 2013 10:33:27 +0000 Subject: swiotlb-xen: static inline xen_phys_to_bus, xen_bus_to_phys, xen_virt_to_bus and range_straddles_page_boundary This functions are small and called frequently. Static inline them. Signed-off-by: Stefano Stabellini --- drivers/xen/swiotlb-xen.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 4d50058d9630..100962dcc7bb 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -72,17 +72,17 @@ static unsigned long xen_io_tlb_nslabs; static u64 start_dma_addr; -static dma_addr_t xen_phys_to_bus(phys_addr_t paddr) +static inline dma_addr_t xen_phys_to_bus(phys_addr_t paddr) { return phys_to_machine(XPADDR(paddr)).maddr; } -static phys_addr_t xen_bus_to_phys(dma_addr_t baddr) +static inline phys_addr_t xen_bus_to_phys(dma_addr_t baddr) { return machine_to_phys(XMADDR(baddr)).paddr; } -static dma_addr_t xen_virt_to_bus(void *address) +static inline dma_addr_t xen_virt_to_bus(void *address) { return xen_phys_to_bus(virt_to_phys(address)); } @@ -105,7 +105,7 @@ static int check_pages_physically_contiguous(unsigned long pfn, return 1; } -static int range_straddles_page_boundary(phys_addr_t p, size_t size) +static inline int range_straddles_page_boundary(phys_addr_t p, size_t size) { unsigned long pfn = PFN_DOWN(p); unsigned int offset = p & ~PAGE_MASK; -- cgit v1.2.3 From 15177608c703e7b4aa29aa7c93b31001effe504c Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Tue, 29 Oct 2013 00:37:37 +0000 Subject: swiotlb-xen: fix error code returned by xen_swiotlb_map_sg_attrs map_sg returns the number of elements mapped, not a dma_addr_t. In case of error return 0, not DMA_ERROR_CODE. Signed-off-by: Stefano Stabellini --- drivers/xen/swiotlb-xen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 100962dcc7bb..8af68629260b 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -548,7 +548,7 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, xen_swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, attrs); sg_dma_len(sgl) = 0; - return DMA_ERROR_CODE; + return 0; } sg->dma_address = xen_phys_to_bus(map); } else { -- cgit v1.2.3