summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--arch/arm64/include/asm/cache.h5
-rw-r--r--arch/arm64/mm/dma-mapping.c4
-rw-r--r--drivers/iommu/dma-iommu.c35
-rw-r--r--drivers/xen/swiotlb-xen.c24
-rw-r--r--include/linux/dma-map-ops.h6
-rw-r--r--kernel/dma/Kconfig3
-rw-r--r--kernel/dma/direct.c6
-rw-r--r--kernel/dma/direct.h9
-rw-r--r--kernel/dma/swiotlb.c7
10 files changed, 78 insertions, 22 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 38dba5f7e4d2..ceafaac6532c 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -55,6 +55,7 @@ config ARM64
select ARCH_HAS_STRICT_MODULE_RWX
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select ARCH_HAS_SYNC_DMA_FOR_CPU
+ select ARCH_HAS_BATCHED_DMA_SYNC
select ARCH_HAS_SYSCALL_WRAPPER
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_ZONE_DMA_SET if EXPERT
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index dd2c8586a725..10a7ffadee3d 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -87,6 +87,11 @@ int cache_line_size(void);
#define dma_get_cache_alignment cache_line_size
+static inline void arch_sync_dma_flush(void)
+{
+ dsb(sy);
+}
+
/* Compress a u64 MPIDR value into 32 bits. */
static inline u64 arch_compact_of_hwid(u64 id)
{
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index b2b5792b2caa..ae1ae0280eef 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -17,7 +17,7 @@ void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
{
unsigned long start = (unsigned long)phys_to_virt(paddr);
- dcache_clean_poc(start, start + size);
+ dcache_clean_poc_nosync(start, start + size);
}
void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
@@ -28,7 +28,7 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
if (dir == DMA_TO_DEVICE)
return;
- dcache_inval_poc(start, start + size);
+ dcache_inval_poc_nosync(start, start + size);
}
void arch_dma_prep_coherent(struct page *page, size_t size)
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 5dac64be61bb..66fc25bae85b 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -1095,8 +1095,10 @@ void iommu_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
return;
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
- if (!dev_is_dma_coherent(dev))
+ if (!dev_is_dma_coherent(dev)) {
arch_sync_dma_for_cpu(phys, size, dir);
+ arch_sync_dma_flush();
+ }
swiotlb_sync_single_for_cpu(dev, phys, size, dir);
}
@@ -1112,8 +1114,10 @@ void iommu_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
swiotlb_sync_single_for_device(dev, phys, size, dir);
- if (!dev_is_dma_coherent(dev))
+ if (!dev_is_dma_coherent(dev)) {
arch_sync_dma_for_device(phys, size, dir);
+ arch_sync_dma_flush();
+ }
}
void iommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
@@ -1122,13 +1126,15 @@ void iommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
struct scatterlist *sg;
int i;
- if (sg_dma_is_swiotlb(sgl))
+ if (sg_dma_is_swiotlb(sgl)) {
for_each_sg(sgl, sg, nelems, i)
iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
sg->length, dir);
- else if (!dev_is_dma_coherent(dev))
+ } else if (!dev_is_dma_coherent(dev)) {
for_each_sg(sgl, sg, nelems, i)
arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
+ arch_sync_dma_flush();
+ }
}
void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
@@ -1137,14 +1143,16 @@ void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
struct scatterlist *sg;
int i;
- if (sg_dma_is_swiotlb(sgl))
+ if (sg_dma_is_swiotlb(sgl)) {
for_each_sg(sgl, sg, nelems, i)
iommu_dma_sync_single_for_device(dev,
sg_dma_address(sg),
sg->length, dir);
- else if (!dev_is_dma_coherent(dev))
+ } else if (!dev_is_dma_coherent(dev)) {
for_each_sg(sgl, sg, nelems, i)
arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
+ arch_sync_dma_flush();
+ }
}
static phys_addr_t iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
@@ -1219,8 +1227,10 @@ dma_addr_t iommu_dma_map_phys(struct device *dev, phys_addr_t phys, size_t size,
return DMA_MAPPING_ERROR;
}
- if (!coherent && !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO)))
+ if (!coherent && !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) {
arch_sync_dma_for_device(phys, size, dir);
+ arch_sync_dma_flush();
+ }
iova = __iommu_dma_map(dev, phys, size, prot, dma_mask);
if (iova == DMA_MAPPING_ERROR && !(attrs & DMA_ATTR_MMIO))
@@ -1242,8 +1252,10 @@ void iommu_dma_unmap_phys(struct device *dev, dma_addr_t dma_handle,
if (WARN_ON(!phys))
return;
- if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev))
+ if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev)) {
arch_sync_dma_for_cpu(phys, size, dir);
+ arch_sync_dma_flush();
+ }
__iommu_dma_unmap(dev, dma_handle, size);
@@ -1980,6 +1992,8 @@ int dma_iova_sync(struct device *dev, struct dma_iova_state *state,
dma_addr_t addr = state->addr + offset;
size_t iova_start_pad = iova_offset(iovad, addr);
+ if (!dev_is_dma_coherent(dev))
+ arch_sync_dma_flush();
return iommu_sync_map(domain, addr - iova_start_pad,
iova_align(iovad, size + iova_start_pad));
}
@@ -1993,6 +2007,8 @@ static void iommu_dma_iova_unlink_range_slow(struct device *dev,
struct iommu_dma_cookie *cookie = domain->iova_cookie;
struct iova_domain *iovad = &cookie->iovad;
size_t iova_start_pad = iova_offset(iovad, addr);
+ bool need_sync_dma = !dev_is_dma_coherent(dev) &&
+ !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO));
dma_addr_t end = addr + size;
do {
@@ -2016,6 +2032,9 @@ static void iommu_dma_iova_unlink_range_slow(struct device *dev,
addr += len;
iova_start_pad = 0;
} while (addr < end);
+
+ if (need_sync_dma)
+ arch_sync_dma_flush();
}
static void __iommu_dma_iova_unlink(struct device *dev,
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index ccf25027bec1..b79917e785a5 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -262,10 +262,12 @@ static dma_addr_t xen_swiotlb_map_phys(struct device *dev, phys_addr_t phys,
done:
if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
- if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dev_addr))))
+ if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dev_addr)))) {
arch_sync_dma_for_device(phys, size, dir);
- else
+ arch_sync_dma_flush();
+ } else {
xen_dma_sync_for_device(dev, dev_addr, size, dir);
+ }
}
return dev_addr;
}
@@ -287,10 +289,12 @@ static void xen_swiotlb_unmap_phys(struct device *hwdev, dma_addr_t dev_addr,
BUG_ON(dir == DMA_NONE);
if (!dev_is_dma_coherent(hwdev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
- if (pfn_valid(PFN_DOWN(dma_to_phys(hwdev, dev_addr))))
+ if (pfn_valid(PFN_DOWN(dma_to_phys(hwdev, dev_addr)))) {
arch_sync_dma_for_cpu(paddr, size, dir);
- else
+ arch_sync_dma_flush();
+ } else {
xen_dma_sync_for_cpu(hwdev, dev_addr, size, dir);
+ }
}
/* NOTE: We use dev_addr here, not paddr! */
@@ -308,10 +312,12 @@ xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr,
struct io_tlb_pool *pool;
if (!dev_is_dma_coherent(dev)) {
- if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr))))
+ if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr)))) {
arch_sync_dma_for_cpu(paddr, size, dir);
- else
+ arch_sync_dma_flush();
+ } else {
xen_dma_sync_for_cpu(dev, dma_addr, size, dir);
+ }
}
pool = xen_swiotlb_find_pool(dev, dma_addr);
@@ -331,10 +337,12 @@ xen_swiotlb_sync_single_for_device(struct device *dev, dma_addr_t dma_addr,
__swiotlb_sync_single_for_device(dev, paddr, size, dir, pool);
if (!dev_is_dma_coherent(dev)) {
- if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr))))
+ if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr)))) {
arch_sync_dma_for_device(paddr, size, dir);
- else
+ arch_sync_dma_flush();
+ } else {
xen_dma_sync_for_device(dev, dma_addr, size, dir);
+ }
}
}
diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
index 60b63756df82..8a07df5a9ef6 100644
--- a/include/linux/dma-map-ops.h
+++ b/include/linux/dma-map-ops.h
@@ -361,6 +361,12 @@ static inline void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
}
#endif /* ARCH_HAS_SYNC_DMA_FOR_CPU */
+#ifndef CONFIG_ARCH_HAS_BATCHED_DMA_SYNC
+static inline void arch_sync_dma_flush(void)
+{
+}
+#endif
+
#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL
void arch_sync_dma_for_cpu_all(void);
#else
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 159900736f25..bfef21b4a9ae 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -72,6 +72,9 @@ config ARCH_HAS_DMA_PREP_COHERENT
config ARCH_HAS_FORCE_DMA_UNENCRYPTED
bool
+config ARCH_HAS_BATCHED_DMA_SYNC
+ bool
+
#
# Select this option if the architecture assumes DMA devices are coherent
# by default.
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 8f43a930716d..c7666e5d5e7c 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -406,6 +406,8 @@ void dma_direct_sync_sg_for_device(struct device *dev,
arch_sync_dma_for_device(paddr, sg->length,
dir);
}
+ if (!dev_is_dma_coherent(dev))
+ arch_sync_dma_flush();
}
#endif
@@ -427,8 +429,10 @@ void dma_direct_sync_sg_for_cpu(struct device *dev,
swiotlb_sync_single_for_cpu(dev, paddr, sg->length, dir);
}
- if (!dev_is_dma_coherent(dev))
+ if (!dev_is_dma_coherent(dev)) {
+ arch_sync_dma_flush();
arch_sync_dma_for_cpu_all();
+ }
}
/*
diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h
index f476c63b668c..f925a7e8b000 100644
--- a/kernel/dma/direct.h
+++ b/kernel/dma/direct.h
@@ -60,8 +60,10 @@ static inline void dma_direct_sync_single_for_device(struct device *dev,
swiotlb_sync_single_for_device(dev, paddr, size, dir);
- if (!dev_is_dma_coherent(dev))
+ if (!dev_is_dma_coherent(dev)) {
arch_sync_dma_for_device(paddr, size, dir);
+ arch_sync_dma_flush();
+ }
}
static inline void dma_direct_sync_single_for_cpu(struct device *dev,
@@ -71,6 +73,7 @@ static inline void dma_direct_sync_single_for_cpu(struct device *dev,
if (!dev_is_dma_coherent(dev)) {
arch_sync_dma_for_cpu(paddr, size, dir);
+ arch_sync_dma_flush();
arch_sync_dma_for_cpu_all();
}
@@ -106,8 +109,10 @@ static inline dma_addr_t dma_direct_map_phys(struct device *dev,
}
if (!dev_is_dma_coherent(dev) &&
- !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO)))
+ !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) {
arch_sync_dma_for_device(phys, size, dir);
+ arch_sync_dma_flush();
+ }
return dma_addr;
err_overflow:
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index d8e6f1d889d5..1105db1689d5 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -867,6 +867,9 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
if (orig_addr == INVALID_PHYS_ADDR)
return;
+ if (dir == DMA_FROM_DEVICE && !dev_is_dma_coherent(dev))
+ arch_sync_dma_flush();
+
/*
* It's valid for tlb_offset to be negative. This can happen when the
* "offset" returned by swiotlb_align_offset() is non-zero, and the
@@ -1595,8 +1598,10 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
return DMA_MAPPING_ERROR;
}
- if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
arch_sync_dma_for_device(swiotlb_addr, size, dir);
+ arch_sync_dma_flush();
+ }
return dma_addr;
}