From 3d6f126b15d9fd8435455fffc912d976973a7a09 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 27 Jul 2023 14:25:42 +0200 Subject: dma-mapping: move arch_dma_set_mask() declaration to header This function has a __weak definition and an override that is only used on freescale powerpc chips. The powerpc definition however does not see the declaration that is in a .c file: arch/powerpc/kernel/dma-mask.c:7:6: error: no previous prototype for 'arch_dma_set_mask' [-Werror=missing-prototypes] Move it into the linux/dma-map-ops.h header where the other arch_dma_* functions are declared. Signed-off-by: Arnd Bergmann Signed-off-by: Christoph Hellwig --- include/linux/dma-map-ops.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 9bf19b5bf755..bb5e06fd359d 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -343,6 +343,12 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs); +#ifdef CONFIG_ARCH_HAS_DMA_SET_MASK +void arch_dma_set_mask(struct device *dev, u64 mask); +#else +#define arch_dma_set_mask(dev, mask) do { } while (0) +#endif + #ifdef CONFIG_MMU /* * Page protection so that devices that can't snoop CPU caches can use the -- cgit v1.2.3 From 22e4a348f87c59df2c02f1efb7ba9a56b622c7b8 Mon Sep 17 00:00:00 2001 From: Yajun Deng Date: Fri, 12 May 2023 17:42:10 +0800 Subject: dma-contiguous: support per-numa CMA for all architectures In the commit b7176c261cdb ("dma-contiguous: provide the ability to reserve per-numa CMA"), Barry adds DMA_PERNUMA_CMA for ARM64. But this feature is architecture independent, so support per-numa CMA for all architectures, and enable it by default if NUMA. Signed-off-by: Yajun Deng Tested-by: Yicong Yang Signed-off-by: Christoph Hellwig --- include/linux/dma-map-ops.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index bb5e06fd359d..f2fc203fb8a1 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -169,12 +169,6 @@ static inline void dma_free_contiguous(struct device *dev, struct page *page, } #endif /* CONFIG_DMA_CMA*/ -#ifdef CONFIG_DMA_PERNUMA_CMA -void dma_pernuma_cma_reserve(void); -#else -static inline void dma_pernuma_cma_reserve(void) { } -#endif /* CONFIG_DMA_PERNUMA_CMA */ - #ifdef CONFIG_DMA_DECLARE_COHERENT int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, dma_addr_t device_addr, size_t size); -- cgit v1.2.3 From 05ee774122bd4a2f298668d6d5fc9e7b685a5e31 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Tue, 1 Aug 2023 08:23:57 +0200 Subject: swiotlb: make io_tlb_default_mem local to swiotlb.c SWIOTLB implementation details should not be exposed to the rest of the kernel. This will allow to make changes to the implementation without modifying non-swiotlb code. To avoid breaking existing users, provide helper functions for the few required fields. As a bonus, using a helper function to initialize struct device allows to get rid of an #ifdef in driver core. Signed-off-by: Petr Tesarik Reviewed-by: Greg Kroah-Hartman Signed-off-by: Christoph Hellwig --- include/linux/swiotlb.h | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 4e52cd5e0bdc..2d453b3e7771 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -110,7 +110,6 @@ struct io_tlb_mem { atomic_long_t used_hiwater; #endif }; -extern struct io_tlb_mem io_tlb_default_mem; static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) { @@ -128,13 +127,22 @@ static inline bool is_swiotlb_force_bounce(struct device *dev) void swiotlb_init(bool addressing_limited, unsigned int flags); void __init swiotlb_exit(void); +void swiotlb_dev_init(struct device *dev); size_t swiotlb_max_mapping_size(struct device *dev); +bool is_swiotlb_allocated(void); bool is_swiotlb_active(struct device *dev); void __init swiotlb_adjust_size(unsigned long size); +phys_addr_t default_swiotlb_base(void); +phys_addr_t default_swiotlb_limit(void); #else static inline void swiotlb_init(bool addressing_limited, unsigned int flags) { } + +static inline void swiotlb_dev_init(struct device *dev) +{ +} + static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) { return false; @@ -151,6 +159,11 @@ static inline size_t swiotlb_max_mapping_size(struct device *dev) return SIZE_MAX; } +static inline bool is_swiotlb_allocated(void) +{ + return false; +} + static inline bool is_swiotlb_active(struct device *dev) { return false; @@ -159,6 +172,16 @@ static inline bool is_swiotlb_active(struct device *dev) static inline void swiotlb_adjust_size(unsigned long size) { } + +static inline phys_addr_t default_swiotlb_base(void) +{ + return 0; +} + +static inline phys_addr_t default_swiotlb_limit(void) +{ + return 0; +} #endif /* CONFIG_SWIOTLB */ extern void swiotlb_print_info(void); -- cgit v1.2.3 From fea18777a78e845c6031128b40aaa2cf2cb46874 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Tue, 1 Aug 2023 08:23:58 +0200 Subject: swiotlb: add documentation and rename swiotlb_do_find_slots() Add some kernel-doc comments and move the existing documentation of struct io_tlb_slot to its correct location. The latter was forgotten in commit 942a8186eb445 ("swiotlb: move struct io_tlb_slot to swiotlb.c"). Use the opportunity to give swiotlb_do_find_slots() a more descriptive name and make it clear how it differs from swiotlb_find_slots(). Signed-off-by: Petr Tesarik Signed-off-by: Christoph Hellwig --- include/linux/swiotlb.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 2d453b3e7771..31625ae507ea 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -76,10 +76,6 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys, * @nslabs: The number of IO TLB blocks (in groups of 64) between @start and * @end. For default swiotlb, this is command line adjustable via * setup_io_tlb_npages. - * @list: The free list describing the number of free entries available - * from each index. - * @orig_addr: The original address corresponding to a mapped entry. - * @alloc_size: Size of the allocated buffer. * @debugfs: The dentry to debugfs. * @late_alloc: %true if allocated using the page allocator * @force_bounce: %true if swiotlb bouncing is forced @@ -111,6 +107,17 @@ struct io_tlb_mem { #endif }; +/** + * is_swiotlb_buffer() - check if a physical address belongs to a swiotlb + * @dev: Device which has mapped the buffer. + * @paddr: Physical address within the DMA buffer. + * + * Check if @paddr points into a bounce buffer. + * + * Return: + * * %true if @paddr points into a bounce buffer + * * %false otherwise + */ static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; -- cgit v1.2.3 From 158dbe9c9a3d36da824139e5304169326550c6c9 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Tue, 1 Aug 2023 08:23:59 +0200 Subject: swiotlb: separate memory pool data from other allocator data Carve out memory pool specific fields from struct io_tlb_mem. The original struct now contains shared data for the whole allocator, while the new struct io_tlb_pool contains data that is specific to one memory pool of (potentially) many. Signed-off-by: Petr Tesarik Reviewed-by: Greg Kroah-Hartman Signed-off-by: Christoph Hellwig --- include/linux/device.h | 2 +- include/linux/swiotlb.h | 45 ++++++++++++++++++++++++++++----------------- 2 files changed, 29 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index bbaeabd04b0d..d9754a68ba95 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -625,7 +625,7 @@ struct device_physical_location { * @dma_pools: Dma pools (if dma'ble device). * @dma_mem: Internal for coherent mem override. * @cma_area: Contiguous memory area for dma allocations - * @dma_io_tlb_mem: Pointer to the swiotlb pool used. Not for driver use. + * @dma_io_tlb_mem: Software IO TLB allocator. Not for driver use. * @archdata: For arch-specific additions. * @of_node: Associated device tree node. * @fwnode: Associated device node supplied by platform firmware. diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 31625ae507ea..247f0ab8795a 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -62,8 +62,7 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys, #ifdef CONFIG_SWIOTLB /** - * struct io_tlb_mem - IO TLB Memory Pool Descriptor - * + * struct io_tlb_pool - IO TLB memory pool descriptor * @start: The start address of the swiotlb memory pool. Used to do a quick * range check to see if the memory was in fact allocated by this * API. @@ -73,15 +72,34 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys, * @vaddr: The vaddr of the swiotlb memory pool. The swiotlb memory pool * may be remapped in the memory encrypted case and store virtual * address for bounce buffer operation. - * @nslabs: The number of IO TLB blocks (in groups of 64) between @start and - * @end. For default swiotlb, this is command line adjustable via - * setup_io_tlb_npages. + * @nslabs: The number of IO TLB slots between @start and @end. For the + * default swiotlb, this can be adjusted with a boot parameter, + * see setup_io_tlb_npages(). + * @late_alloc: %true if allocated using the page allocator. + * @nareas: Number of areas in the pool. + * @area_nslabs: Number of slots in each area. + * @areas: Array of memory area descriptors. + * @slots: Array of slot descriptors. + */ +struct io_tlb_pool { + phys_addr_t start; + phys_addr_t end; + void *vaddr; + unsigned long nslabs; + bool late_alloc; + unsigned int nareas; + unsigned int area_nslabs; + struct io_tlb_area *areas; + struct io_tlb_slot *slots; +}; + +/** + * struct io_tlb_mem - Software IO TLB allocator + * @defpool: Default (initial) IO TLB memory pool descriptor. + * @nslabs: Total number of IO TLB slabs in all pools. * @debugfs: The dentry to debugfs. - * @late_alloc: %true if allocated using the page allocator * @force_bounce: %true if swiotlb bouncing is forced * @for_alloc: %true if the pool is used for memory allocation - * @nareas: The area number in the pool. - * @area_nslabs: The slot number in the area. * @total_used: The total number of slots in the pool that are currently used * across all areas. Used only for calculating used_hiwater in * debugfs. @@ -89,18 +107,11 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys, * in debugfs. */ struct io_tlb_mem { - phys_addr_t start; - phys_addr_t end; - void *vaddr; + struct io_tlb_pool defpool; unsigned long nslabs; struct dentry *debugfs; - bool late_alloc; bool force_bounce; bool for_alloc; - unsigned int nareas; - unsigned int area_nslabs; - struct io_tlb_area *areas; - struct io_tlb_slot *slots; #ifdef CONFIG_DEBUG_FS atomic_long_t total_used; atomic_long_t used_hiwater; @@ -122,7 +133,7 @@ static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; - return mem && paddr >= mem->start && paddr < mem->end; + return mem && paddr >= mem->defpool.start && paddr < mem->defpool.end; } static inline bool is_swiotlb_force_bounce(struct device *dev) -- cgit v1.2.3 From 62708b2ba4055cad43a95754e939566b56dde5b6 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Tue, 1 Aug 2023 08:24:00 +0200 Subject: swiotlb: add a flag whether SWIOTLB is allowed to grow Add a config option (CONFIG_SWIOTLB_DYNAMIC) to enable or disable dynamic allocation of additional bounce buffers. If this option is set, mark the default SWIOTLB as able to grow and restricted DMA pools as unable. However, if the address of the default memory pool is explicitly queried, make the default SWIOTLB also unable to grow. This is currently used to set up PCI BAR movable regions on some Octeon MIPS boards which may not be able to use a SWIOTLB pool elsewhere in physical memory. See octeon_pci_setup() for more details. If a remap function is specified, it must be also called on any dynamically allocated pools, but there are some issues: - The remap function may block, so it should not be called from an atomic context. - There is no corresponding unremap() function if the memory pool is freed. - The only in-tree implementation (xen_swiotlb_fixup) requires that the number of slots in the memory pool is a multiple of SWIOTLB_SEGSIZE. Keep it simple for now and disable growing the SWIOTLB if a remap function was specified. Signed-off-by: Petr Tesarik Signed-off-by: Christoph Hellwig --- include/linux/swiotlb.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 247f0ab8795a..57be2a0a9fbf 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -100,6 +100,7 @@ struct io_tlb_pool { * @debugfs: The dentry to debugfs. * @force_bounce: %true if swiotlb bouncing is forced * @for_alloc: %true if the pool is used for memory allocation + * @can_grow: %true if more pools can be allocated dynamically. * @total_used: The total number of slots in the pool that are currently used * across all areas. Used only for calculating used_hiwater in * debugfs. @@ -112,6 +113,9 @@ struct io_tlb_mem { struct dentry *debugfs; bool force_bounce; bool for_alloc; +#ifdef CONFIG_SWIOTLB_DYNAMIC + bool can_grow; +#endif #ifdef CONFIG_DEBUG_FS atomic_long_t total_used; atomic_long_t used_hiwater; -- cgit v1.2.3 From 79636caad3618e2b38457f6e298c9b31ba82b489 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Tue, 1 Aug 2023 08:24:01 +0200 Subject: swiotlb: if swiotlb is full, fall back to a transient memory pool Try to allocate a transient memory pool if no suitable slots can be found and the respective SWIOTLB is allowed to grow. The transient pool is just enough big for this one bounce buffer. It is inserted into a per-device list of transient memory pools, and it is freed again when the bounce buffer is unmapped. Transient memory pools are kept in an RCU list. A memory barrier is required after adding a new entry, because any address within a transient buffer must be immediately recognized as belonging to the SWIOTLB, even if it is passed to another CPU. Deletion does not require any synchronization beyond RCU ordering guarantees. After a buffer is unmapped, its physical addresses may no longer be passed to the DMA API, so the memory range of the corresponding stale entry in the RCU list never matches. If the memory range gets allocated again, then it happens only after a RCU quiescent state. Since bounce buffers can now be allocated from different pools, add a parameter to swiotlb_alloc_pool() to let the caller know which memory pool is used. Add swiotlb_find_pool() to find the memory pool corresponding to an address. This function is now also used by is_swiotlb_buffer(), because a simple boundary check is no longer sufficient. The logic in swiotlb_alloc_tlb() is taken from __dma_direct_alloc_pages(), simplified and enhanced to use coherent memory pools if needed. Note that this is not the most efficient way to provide a bounce buffer, but when a DMA buffer can't be mapped, something may (and will) actually break. At that point it is better to make an allocation, even if it may be an expensive operation. Signed-off-by: Petr Tesarik Reviewed-by: Greg Kroah-Hartman Signed-off-by: Christoph Hellwig --- include/linux/device.h | 6 ++++++ include/linux/dma-mapping.h | 2 ++ include/linux/swiotlb.h | 29 ++++++++++++++++++++++++++++- 3 files changed, 36 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index d9754a68ba95..5fd89c9d005c 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -626,6 +626,8 @@ struct device_physical_location { * @dma_mem: Internal for coherent mem override. * @cma_area: Contiguous memory area for dma allocations * @dma_io_tlb_mem: Software IO TLB allocator. Not for driver use. + * @dma_io_tlb_pools: List of transient swiotlb memory pools. + * @dma_io_tlb_lock: Protects changes to the list of active pools. * @archdata: For arch-specific additions. * @of_node: Associated device tree node. * @fwnode: Associated device node supplied by platform firmware. @@ -731,6 +733,10 @@ struct device { #endif #ifdef CONFIG_SWIOTLB struct io_tlb_mem *dma_io_tlb_mem; +#endif +#ifdef CONFIG_SWIOTLB_DYNAMIC + struct list_head dma_io_tlb_pools; + spinlock_t dma_io_tlb_lock; #endif /* arch specific additions */ struct dev_archdata archdata; diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index e13050eb9777..f0ccca16a0ac 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -418,6 +418,8 @@ static inline void dma_sync_sgtable_for_device(struct device *dev, #define dma_get_sgtable(d, t, v, h, s) dma_get_sgtable_attrs(d, t, v, h, s, 0) #define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h, s, 0) +bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size); + static inline void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 57be2a0a9fbf..66867d2188ba 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -80,6 +80,9 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys, * @area_nslabs: Number of slots in each area. * @areas: Array of memory area descriptors. * @slots: Array of slot descriptors. + * @node: Member of the IO TLB memory pool list. + * @rcu: RCU head for swiotlb_dyn_free(). + * @transient: %true if transient memory pool. */ struct io_tlb_pool { phys_addr_t start; @@ -91,6 +94,11 @@ struct io_tlb_pool { unsigned int area_nslabs; struct io_tlb_area *areas; struct io_tlb_slot *slots; +#ifdef CONFIG_SWIOTLB_DYNAMIC + struct list_head node; + struct rcu_head rcu; + bool transient; +#endif }; /** @@ -122,6 +130,20 @@ struct io_tlb_mem { #endif }; +#ifdef CONFIG_SWIOTLB_DYNAMIC + +struct io_tlb_pool *swiotlb_find_pool(struct device *dev, phys_addr_t paddr); + +#else + +static inline struct io_tlb_pool *swiotlb_find_pool(struct device *dev, + phys_addr_t paddr) +{ + return &dev->dma_io_tlb_mem->defpool; +} + +#endif + /** * is_swiotlb_buffer() - check if a physical address belongs to a swiotlb * @dev: Device which has mapped the buffer. @@ -137,7 +159,12 @@ static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; - return mem && paddr >= mem->defpool.start && paddr < mem->defpool.end; + if (!mem) + return false; + + if (IS_ENABLED(CONFIG_SWIOTLB_DYNAMIC)) + return swiotlb_find_pool(dev, paddr); + return paddr >= mem->defpool.start && paddr < mem->defpool.end; } static inline bool is_swiotlb_force_bounce(struct device *dev) -- cgit v1.2.3 From ad96ce3252dbab773cb343220662df3d84dd8e80 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Tue, 1 Aug 2023 08:24:02 +0200 Subject: swiotlb: determine potential physical address limit The value returned by default_swiotlb_limit() should be constant, because it is used to decide whether DMA can be used. To allow allocating memory pools on the fly, use the maximum possible physical address rather than the highest address used by the default pool. For swiotlb_init_remap(), this is either an arch-specific limit used by memblock_alloc_low(), or the highest directly mapped physical address if the initialization flags include SWIOTLB_ANY. For swiotlb_init_late(), the highest address is determined by the GFP flags. Signed-off-by: Petr Tesarik Signed-off-by: Christoph Hellwig --- include/linux/swiotlb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 66867d2188ba..9825fa14abe4 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -109,6 +109,7 @@ struct io_tlb_pool { * @force_bounce: %true if swiotlb bouncing is forced * @for_alloc: %true if the pool is used for memory allocation * @can_grow: %true if more pools can be allocated dynamically. + * @phys_limit: Maximum allowed physical address. * @total_used: The total number of slots in the pool that are currently used * across all areas. Used only for calculating used_hiwater in * debugfs. @@ -123,6 +124,7 @@ struct io_tlb_mem { bool for_alloc; #ifdef CONFIG_SWIOTLB_DYNAMIC bool can_grow; + u64 phys_limit; #endif #ifdef CONFIG_DEBUG_FS atomic_long_t total_used; -- cgit v1.2.3 From 1aaa736815eb04f4dae3f0b3e977b2a0677a4cfb Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Tue, 1 Aug 2023 08:24:03 +0200 Subject: swiotlb: allocate a new memory pool when existing pools are full When swiotlb_find_slots() cannot find suitable slots, schedule the allocation of a new memory pool. It is not possible to allocate the pool immediately, because this code may run in interrupt context, which is not suitable for large memory allocations. This means that the memory pool will be available too late for the currently requested mapping, but the stress on the software IO TLB allocator is likely to continue, and subsequent allocations will benefit from the additional pool eventually. Keep all memory pools for an allocator in an RCU list to avoid locking on the read side. For modifications, add a new spinlock to struct io_tlb_mem. The spinlock also protects updates to the total number of slabs (nslabs in struct io_tlb_mem), but not reads of the value. Readers may therefore encounter a stale value, but this is not an issue: - swiotlb_tbl_map_single() and is_swiotlb_active() only check for non-zero value. This is ensured by the existence of the default memory pool, allocated at boot. - The exact value is used only for non-critical purposes (debugfs, kernel messages). Signed-off-by: Petr Tesarik Signed-off-by: Christoph Hellwig --- include/linux/swiotlb.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 9825fa14abe4..8371c92a0271 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -8,6 +8,7 @@ #include #include #include +#include struct device; struct page; @@ -104,12 +105,16 @@ struct io_tlb_pool { /** * struct io_tlb_mem - Software IO TLB allocator * @defpool: Default (initial) IO TLB memory pool descriptor. + * @pool: IO TLB memory pool descriptor (if not dynamic). * @nslabs: Total number of IO TLB slabs in all pools. * @debugfs: The dentry to debugfs. * @force_bounce: %true if swiotlb bouncing is forced * @for_alloc: %true if the pool is used for memory allocation * @can_grow: %true if more pools can be allocated dynamically. * @phys_limit: Maximum allowed physical address. + * @lock: Lock to synchronize changes to the list. + * @pools: List of IO TLB memory pool descriptors (if dynamic). + * @dyn_alloc: Dynamic IO TLB pool allocation work. * @total_used: The total number of slots in the pool that are currently used * across all areas. Used only for calculating used_hiwater in * debugfs. @@ -125,6 +130,9 @@ struct io_tlb_mem { #ifdef CONFIG_SWIOTLB_DYNAMIC bool can_grow; u64 phys_limit; + spinlock_t lock; + struct list_head pools; + struct work_struct dyn_alloc; #endif #ifdef CONFIG_DEBUG_FS atomic_long_t total_used; -- cgit v1.2.3 From 1395706a14904f2593debecf20f827e72d7392a7 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Tue, 1 Aug 2023 08:24:04 +0200 Subject: swiotlb: search the software IO TLB only if the device makes use of it Skip searching the software IO TLB if a device has never used it, making sure these devices are not affected by the introduction of multiple IO TLB memory pools. Additional memory barrier is required to ensure that the new value of the flag is visible to other CPUs after mapping a new bounce buffer. For efficiency, the flag check should be inlined, and then the memory barrier must be moved to is_swiotlb_buffer(). However, it can replace the existing barrier in swiotlb_find_pool(), because all callers use is_swiotlb_buffer() first to verify that the buffer address belongs to the software IO TLB. Signed-off-by: Petr Tesarik Signed-off-by: Christoph Hellwig --- include/linux/device.h | 2 ++ include/linux/swiotlb.h | 7 ++++++- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 5fd89c9d005c..6fc808d22bfd 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -628,6 +628,7 @@ struct device_physical_location { * @dma_io_tlb_mem: Software IO TLB allocator. Not for driver use. * @dma_io_tlb_pools: List of transient swiotlb memory pools. * @dma_io_tlb_lock: Protects changes to the list of active pools. + * @dma_uses_io_tlb: %true if device has used the software IO TLB. * @archdata: For arch-specific additions. * @of_node: Associated device tree node. * @fwnode: Associated device node supplied by platform firmware. @@ -737,6 +738,7 @@ struct device { #ifdef CONFIG_SWIOTLB_DYNAMIC struct list_head dma_io_tlb_pools; spinlock_t dma_io_tlb_lock; + bool dma_uses_io_tlb; #endif /* arch specific additions */ struct dev_archdata archdata; diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 8371c92a0271..b4536626f8ff 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -172,8 +172,13 @@ static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) if (!mem) return false; - if (IS_ENABLED(CONFIG_SWIOTLB_DYNAMIC)) + if (IS_ENABLED(CONFIG_SWIOTLB_DYNAMIC)) { + /* Pairs with smp_wmb() in swiotlb_find_slots() and + * swiotlb_dyn_alloc(), which modify the RCU lists. + */ + smp_rmb(); return swiotlb_find_pool(dev, paddr); + } return paddr >= mem->defpool.start && paddr < mem->defpool.end; } -- cgit v1.2.3