diff options
| -rw-r--r-- | arch/arm64/Kconfig | 1 | ||||
| -rw-r--r-- | arch/arm64/include/asm/cache.h | 5 | ||||
| -rw-r--r-- | arch/arm64/mm/dma-mapping.c | 4 | ||||
| -rw-r--r-- | drivers/iommu/dma-iommu.c | 35 | ||||
| -rw-r--r-- | drivers/xen/swiotlb-xen.c | 24 | ||||
| -rw-r--r-- | include/linux/dma-map-ops.h | 6 | ||||
| -rw-r--r-- | kernel/dma/Kconfig | 3 | ||||
| -rw-r--r-- | kernel/dma/direct.c | 6 | ||||
| -rw-r--r-- | kernel/dma/direct.h | 9 | ||||
| -rw-r--r-- | kernel/dma/swiotlb.c | 7 |
10 files changed, 78 insertions, 22 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 38dba5f7e4d2..ceafaac6532c 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -55,6 +55,7 @@ config ARM64 select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_HAS_SYNC_DMA_FOR_CPU + select ARCH_HAS_BATCHED_DMA_SYNC select ARCH_HAS_SYSCALL_WRAPPER select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_ZONE_DMA_SET if EXPERT diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index dd2c8586a725..10a7ffadee3d 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -87,6 +87,11 @@ int cache_line_size(void); #define dma_get_cache_alignment cache_line_size +static inline void arch_sync_dma_flush(void) +{ + dsb(sy); +} + /* Compress a u64 MPIDR value into 32 bits. */ static inline u64 arch_compact_of_hwid(u64 id) { diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index b2b5792b2caa..ae1ae0280eef 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -17,7 +17,7 @@ void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, { unsigned long start = (unsigned long)phys_to_virt(paddr); - dcache_clean_poc(start, start + size); + dcache_clean_poc_nosync(start, start + size); } void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, @@ -28,7 +28,7 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, if (dir == DMA_TO_DEVICE) return; - dcache_inval_poc(start, start + size); + dcache_inval_poc_nosync(start, start + size); } void arch_dma_prep_coherent(struct page *page, size_t size) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 5dac64be61bb..66fc25bae85b 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1095,8 +1095,10 @@ void iommu_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, return; phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); - if (!dev_is_dma_coherent(dev)) + if (!dev_is_dma_coherent(dev)) { arch_sync_dma_for_cpu(phys, size, dir); + arch_sync_dma_flush(); + } swiotlb_sync_single_for_cpu(dev, phys, size, dir); } @@ -1112,8 +1114,10 @@ void iommu_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); swiotlb_sync_single_for_device(dev, phys, size, dir); - if (!dev_is_dma_coherent(dev)) + if (!dev_is_dma_coherent(dev)) { arch_sync_dma_for_device(phys, size, dir); + arch_sync_dma_flush(); + } } void iommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, @@ -1122,13 +1126,15 @@ void iommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, struct scatterlist *sg; int i; - if (sg_dma_is_swiotlb(sgl)) + if (sg_dma_is_swiotlb(sgl)) { for_each_sg(sgl, sg, nelems, i) iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg), sg->length, dir); - else if (!dev_is_dma_coherent(dev)) + } else if (!dev_is_dma_coherent(dev)) { for_each_sg(sgl, sg, nelems, i) arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir); + arch_sync_dma_flush(); + } } void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, @@ -1137,14 +1143,16 @@ void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, struct scatterlist *sg; int i; - if (sg_dma_is_swiotlb(sgl)) + if (sg_dma_is_swiotlb(sgl)) { for_each_sg(sgl, sg, nelems, i) iommu_dma_sync_single_for_device(dev, sg_dma_address(sg), sg->length, dir); - else if (!dev_is_dma_coherent(dev)) + } else if (!dev_is_dma_coherent(dev)) { for_each_sg(sgl, sg, nelems, i) arch_sync_dma_for_device(sg_phys(sg), sg->length, dir); + arch_sync_dma_flush(); + } } static phys_addr_t iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys, @@ -1219,8 +1227,10 @@ dma_addr_t iommu_dma_map_phys(struct device *dev, phys_addr_t phys, size_t size, return DMA_MAPPING_ERROR; } - if (!coherent && !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) + if (!coherent && !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) { arch_sync_dma_for_device(phys, size, dir); + arch_sync_dma_flush(); + } iova = __iommu_dma_map(dev, phys, size, prot, dma_mask); if (iova == DMA_MAPPING_ERROR && !(attrs & DMA_ATTR_MMIO)) @@ -1242,8 +1252,10 @@ void iommu_dma_unmap_phys(struct device *dev, dma_addr_t dma_handle, if (WARN_ON(!phys)) return; - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev)) + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev)) { arch_sync_dma_for_cpu(phys, size, dir); + arch_sync_dma_flush(); + } __iommu_dma_unmap(dev, dma_handle, size); @@ -1980,6 +1992,8 @@ int dma_iova_sync(struct device *dev, struct dma_iova_state *state, dma_addr_t addr = state->addr + offset; size_t iova_start_pad = iova_offset(iovad, addr); + if (!dev_is_dma_coherent(dev)) + arch_sync_dma_flush(); return iommu_sync_map(domain, addr - iova_start_pad, iova_align(iovad, size + iova_start_pad)); } @@ -1993,6 +2007,8 @@ static void iommu_dma_iova_unlink_range_slow(struct device *dev, struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iova_domain *iovad = &cookie->iovad; size_t iova_start_pad = iova_offset(iovad, addr); + bool need_sync_dma = !dev_is_dma_coherent(dev) && + !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO)); dma_addr_t end = addr + size; do { @@ -2016,6 +2032,9 @@ static void iommu_dma_iova_unlink_range_slow(struct device *dev, addr += len; iova_start_pad = 0; } while (addr < end); + + if (need_sync_dma) + arch_sync_dma_flush(); } static void __iommu_dma_iova_unlink(struct device *dev, diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index ccf25027bec1..b79917e785a5 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -262,10 +262,12 @@ static dma_addr_t xen_swiotlb_map_phys(struct device *dev, phys_addr_t phys, done: if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) { - if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dev_addr)))) + if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dev_addr)))) { arch_sync_dma_for_device(phys, size, dir); - else + arch_sync_dma_flush(); + } else { xen_dma_sync_for_device(dev, dev_addr, size, dir); + } } return dev_addr; } @@ -287,10 +289,12 @@ static void xen_swiotlb_unmap_phys(struct device *hwdev, dma_addr_t dev_addr, BUG_ON(dir == DMA_NONE); if (!dev_is_dma_coherent(hwdev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) { - if (pfn_valid(PFN_DOWN(dma_to_phys(hwdev, dev_addr)))) + if (pfn_valid(PFN_DOWN(dma_to_phys(hwdev, dev_addr)))) { arch_sync_dma_for_cpu(paddr, size, dir); - else + arch_sync_dma_flush(); + } else { xen_dma_sync_for_cpu(hwdev, dev_addr, size, dir); + } } /* NOTE: We use dev_addr here, not paddr! */ @@ -308,10 +312,12 @@ xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr, struct io_tlb_pool *pool; if (!dev_is_dma_coherent(dev)) { - if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr)))) + if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr)))) { arch_sync_dma_for_cpu(paddr, size, dir); - else + arch_sync_dma_flush(); + } else { xen_dma_sync_for_cpu(dev, dma_addr, size, dir); + } } pool = xen_swiotlb_find_pool(dev, dma_addr); @@ -331,10 +337,12 @@ xen_swiotlb_sync_single_for_device(struct device *dev, dma_addr_t dma_addr, __swiotlb_sync_single_for_device(dev, paddr, size, dir, pool); if (!dev_is_dma_coherent(dev)) { - if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr)))) + if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr)))) { arch_sync_dma_for_device(paddr, size, dir); - else + arch_sync_dma_flush(); + } else { xen_dma_sync_for_device(dev, dma_addr, size, dir); + } } } diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 60b63756df82..8a07df5a9ef6 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -361,6 +361,12 @@ static inline void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, } #endif /* ARCH_HAS_SYNC_DMA_FOR_CPU */ +#ifndef CONFIG_ARCH_HAS_BATCHED_DMA_SYNC +static inline void arch_sync_dma_flush(void) +{ +} +#endif + #ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL void arch_sync_dma_for_cpu_all(void); #else diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index 159900736f25..bfef21b4a9ae 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -72,6 +72,9 @@ config ARCH_HAS_DMA_PREP_COHERENT config ARCH_HAS_FORCE_DMA_UNENCRYPTED bool +config ARCH_HAS_BATCHED_DMA_SYNC + bool + # # Select this option if the architecture assumes DMA devices are coherent # by default. diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 8f43a930716d..c7666e5d5e7c 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -406,6 +406,8 @@ void dma_direct_sync_sg_for_device(struct device *dev, arch_sync_dma_for_device(paddr, sg->length, dir); } + if (!dev_is_dma_coherent(dev)) + arch_sync_dma_flush(); } #endif @@ -427,8 +429,10 @@ void dma_direct_sync_sg_for_cpu(struct device *dev, swiotlb_sync_single_for_cpu(dev, paddr, sg->length, dir); } - if (!dev_is_dma_coherent(dev)) + if (!dev_is_dma_coherent(dev)) { + arch_sync_dma_flush(); arch_sync_dma_for_cpu_all(); + } } /* diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h index f476c63b668c..f925a7e8b000 100644 --- a/kernel/dma/direct.h +++ b/kernel/dma/direct.h @@ -60,8 +60,10 @@ static inline void dma_direct_sync_single_for_device(struct device *dev, swiotlb_sync_single_for_device(dev, paddr, size, dir); - if (!dev_is_dma_coherent(dev)) + if (!dev_is_dma_coherent(dev)) { arch_sync_dma_for_device(paddr, size, dir); + arch_sync_dma_flush(); + } } static inline void dma_direct_sync_single_for_cpu(struct device *dev, @@ -71,6 +73,7 @@ static inline void dma_direct_sync_single_for_cpu(struct device *dev, if (!dev_is_dma_coherent(dev)) { arch_sync_dma_for_cpu(paddr, size, dir); + arch_sync_dma_flush(); arch_sync_dma_for_cpu_all(); } @@ -106,8 +109,10 @@ static inline dma_addr_t dma_direct_map_phys(struct device *dev, } if (!dev_is_dma_coherent(dev) && - !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) + !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) { arch_sync_dma_for_device(phys, size, dir); + arch_sync_dma_flush(); + } return dma_addr; err_overflow: diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index d8e6f1d889d5..1105db1689d5 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -867,6 +867,9 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size if (orig_addr == INVALID_PHYS_ADDR) return; + if (dir == DMA_FROM_DEVICE && !dev_is_dma_coherent(dev)) + arch_sync_dma_flush(); + /* * It's valid for tlb_offset to be negative. This can happen when the * "offset" returned by swiotlb_align_offset() is non-zero, and the @@ -1595,8 +1598,10 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size, return DMA_MAPPING_ERROR; } - if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) { arch_sync_dma_for_device(swiotlb_addr, size, dir); + arch_sync_dma_flush(); + } return dma_addr; } |
