From f2ae67941138a1e53cb1bc6a1b5878a8bdc74d26 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 12 Jun 2018 18:16:19 +0200 Subject: alpha: Remove custom dec_and_lock() implementation Alpha provides a custom implementation of dec_and_lock(). The functions is split into two parts: - atomic_add_unless() + return 0 (fast path in assembly) - remaining part including locking (slow path in C) Comparing the result of the alpha implementation with the generic implementation compiled by gcc it looks like the fast path is optimized by avoiding a stack frame (and reloading the GP), register store and all this. This is only done in the slowpath. After marking the slowpath (atomic_dec_and_lock_1()) as "noinline" and doing the slowpath in C (the atomic_add_unless(atomic, -1, 1) part) I noticed differences in the resulting assembly: - the GP is still reloaded - atomic_add_unless() adds more memory barriers compared to the custom assembly - the custom assembly here does "load, sub, beq" while atomic_add_unless() does "load, cmpeq, add, bne". This is okay because it compares against zero after subtraction while the generic code compares against 1 before. I'm not sure if avoiding the stack frame (and GP reloading) brings a lot in terms of performance. Regarding the different barriers, Peter Zijlstra says: |refcount decrement needs to be a RELEASE operation, such that all the |load/stores to the object happen before we decrement the refcount. | |Otherwise things like: | | obj->foo = 5; | refcnt_dec(&obj->ref); | |can be re-ordered, which then allows fun scenarios like: | | CPU0 CPU1 | | refcnt_dec(&obj->ref); | if (dec_and_test(&obj->ref)) | free(obj); | obj->foo = 5; // oops UaF | | |This means (for alpha) that there should be a memory barrier _before_ |the decrement, however the dec_and_lock asm thing only has one _after_, |which, per the above, is too late. | |The generic version using add_unless will result in memory barrier |before and after (because that is the rule for atomic ops with a return |value) which is strictly too many barriers for the refcount story, but |who knows what other ordering requirements code has. Remove the custom alpha implementation of dec_and_lock() and if it is an issue (performance wise) then the fast path could still be inlined. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: linux-alpha@vger.kernel.org Link: https://lkml.kernel.org/r/20180606115918.GG12198@hirez.programming.kicks-ass.net Link: https://lkml.kernel.org/r20180612161621.22645-2-bigeasy@linutronix.de --- lib/Makefile | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/Makefile b/lib/Makefile index 84c6dcb31fbb..8b59f4a7c0e2 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -23,7 +23,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ sha1.o chacha20.o irq_regs.o argv_split.o \ flex_proportions.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ - earlycpio.o seq_buf.o siphash.o \ + earlycpio.o seq_buf.o siphash.o dec_and_lock.o \ nmi_backtrace.o nodemask.o win_minmax.o lib-$(CONFIG_PRINTK) += dump_stack.o @@ -98,10 +98,6 @@ obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o obj-$(CONFIG_DEBUG_LIST) += list_debug.o obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o -ifneq ($(CONFIG_HAVE_DEC_LOCK),y) - lib-y += dec_and_lock.o -endif - obj-$(CONFIG_BITREVERSE) += bitrev.o obj-$(CONFIG_RATIONAL) += rational.o obj-$(CONFIG_CRC_CCITT) += crc-ccitt.o -- cgit v1.2.3 From ccfbb5bed407053b27492a9adc06064d949a9aa6 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Tue, 12 Jun 2018 18:16:20 +0200 Subject: atomic: Add irqsave variant of atomic_dec_and_lock() There are in-tree users of atomic_dec_and_lock() which must acquire the spin lock with interrupts disabled. To workaround the lack of an irqsave variant of atomic_dec_and_lock() they use local_irq_save() at the call site. This causes extra code and creates in some places unneeded long interrupt disabled times. These places need also extra treatment for PREEMPT_RT due to the disconnect of the irq disabling and the lock function. Implement the missing irqsave variant of the function. Signed-off-by: Anna-Maria Gleixner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r20180612161621.22645-3-bigeasy@linutronix.de --- lib/dec_and_lock.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'lib') diff --git a/lib/dec_and_lock.c b/lib/dec_and_lock.c index 347fa7ac2e8a..9555b68bb774 100644 --- a/lib/dec_and_lock.c +++ b/lib/dec_and_lock.c @@ -33,3 +33,19 @@ int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) } EXPORT_SYMBOL(_atomic_dec_and_lock); + +int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock, + unsigned long *flags) +{ + /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ + if (atomic_add_unless(atomic, -1, 1)) + return 0; + + /* Otherwise do it the slow way */ + spin_lock_irqsave(lock, *flags); + if (atomic_dec_and_test(atomic)) + return 1; + spin_unlock_irqrestore(lock, *flags); + return 0; +} +EXPORT_SYMBOL(_atomic_dec_and_lock_irqsave); -- cgit v1.2.3 From 7ea959c45769612aa92557fb6464679f5fec7d9e Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Tue, 12 Jun 2018 18:16:21 +0200 Subject: locking/refcounts: Implement refcount_dec_and_lock_irqsave() There are in-tree users of refcount_dec_and_lock() which must acquire the spin lock with interrupts disabled. To workaround the lack of an irqsave variant of refcount_dec_and_lock() they use local_irq_save() at the call site. This causes extra code and creates in some places unneeded long interrupt disabled times. These places need also extra treatment for PREEMPT_RT due to the disconnect of the irq disabling and the lock function. Implement the missing irqsave variant of the function. Signed-off-by: Anna-Maria Gleixner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r20180612161621.22645-4-bigeasy@linutronix.de [bigeasy: s@atomic_dec_and_lock@refcount_dec_and_lock@g] --- lib/refcount.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'lib') diff --git a/lib/refcount.c b/lib/refcount.c index 0eb48353abe3..d3b81cefce91 100644 --- a/lib/refcount.c +++ b/lib/refcount.c @@ -350,3 +350,31 @@ bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock) } EXPORT_SYMBOL(refcount_dec_and_lock); +/** + * refcount_dec_and_lock_irqsave - return holding spinlock with disabled + * interrupts if able to decrement refcount to 0 + * @r: the refcount + * @lock: the spinlock to be locked + * @flags: saved IRQ-flags if the is acquired + * + * Same as refcount_dec_and_lock() above except that the spinlock is acquired + * with disabled interupts. + * + * Return: true and hold spinlock if able to decrement refcount to 0, false + * otherwise + */ +bool refcount_dec_and_lock_irqsave(refcount_t *r, spinlock_t *lock, + unsigned long *flags) +{ + if (refcount_dec_not_one(r)) + return false; + + spin_lock_irqsave(lock, *flags); + if (!refcount_dec_and_test(r)) { + spin_unlock_irqrestore(lock, *flags); + return false; + } + + return true; +} +EXPORT_SYMBOL(refcount_dec_and_lock_irqsave); -- cgit v1.2.3 From e37460c1ca08cf9d3b82eb3b6f205888d8d01182 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 14 Jun 2018 08:49:34 +0200 Subject: dma-mapping: use obj-y instead of lib-y for generic dma ops We already have exact config symbols to select the direct, non-coherent, or virt dma ops. So use the normal obj- scheme to select them. Signed-off-by: Christoph Hellwig --- lib/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Makefile b/lib/Makefile index 956b320292fe..5e0e160c9242 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -29,9 +29,9 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ lib-$(CONFIG_PRINTK) += dump_stack.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o -lib-$(CONFIG_DMA_DIRECT_OPS) += dma-direct.o -lib-$(CONFIG_DMA_NONCOHERENT_OPS) += dma-noncoherent.o -lib-$(CONFIG_DMA_VIRT_OPS) += dma-virt.o +obj-$(CONFIG_DMA_DIRECT_OPS) += dma-direct.o +obj-$(CONFIG_DMA_NONCOHERENT_OPS) += dma-noncoherent.o +obj-$(CONFIG_DMA_VIRT_OPS) += dma-virt.o lib-y += kobject.o klist.o obj-y += lockref.o -- cgit v1.2.3 From cf65a0f6f6ff7631ba0ac0513a14ca5b65320d80 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 12 Jun 2018 19:01:45 +0200 Subject: dma-mapping: move all DMA mapping code to kernel/dma Currently the code is split over various files with dma- prefixes in the lib/ and drives/base directories, and the number of files keeps growing. Move them into a single directory to keep the code together and remove the file name prefixes. To match the irq infrastructure this directory is placed under the kernel/ directory. Signed-off-by: Christoph Hellwig --- lib/Kconfig | 47 +- lib/Makefile | 6 - lib/dma-debug.c | 1773 ------------------------------------------------- lib/dma-direct.c | 204 ------ lib/dma-noncoherent.c | 102 --- lib/dma-virt.c | 61 -- lib/swiotlb.c | 1087 ------------------------------ 7 files changed, 1 insertion(+), 3279 deletions(-) delete mode 100644 lib/dma-debug.c delete mode 100644 lib/dma-direct.c delete mode 100644 lib/dma-noncoherent.c delete mode 100644 lib/dma-virt.c delete mode 100644 lib/swiotlb.c (limited to 'lib') diff --git a/lib/Kconfig b/lib/Kconfig index 809fdd155739..803fcbced729 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -420,60 +420,15 @@ config HAS_IOPORT_MAP depends on HAS_IOMEM && !NO_IOPORT_MAP default y -config HAS_DMA - bool - depends on !NO_DMA - default y +source "kernel/dma/Kconfig" config SGL_ALLOC bool default n -config NEED_SG_DMA_LENGTH - bool - -config NEED_DMA_MAP_STATE - bool - -config ARCH_DMA_ADDR_T_64BIT - def_bool 64BIT || PHYS_ADDR_T_64BIT - config IOMMU_HELPER bool -config ARCH_HAS_SYNC_DMA_FOR_DEVICE - bool - -config ARCH_HAS_SYNC_DMA_FOR_CPU - bool - select NEED_DMA_MAP_STATE - -config DMA_DIRECT_OPS - bool - depends on HAS_DMA - -config DMA_NONCOHERENT_OPS - bool - depends on HAS_DMA - select DMA_DIRECT_OPS - -config DMA_NONCOHERENT_MMAP - bool - depends on DMA_NONCOHERENT_OPS - -config DMA_NONCOHERENT_CACHE_SYNC - bool - depends on DMA_NONCOHERENT_OPS - -config DMA_VIRT_OPS - bool - depends on HAS_DMA - -config SWIOTLB - bool - select DMA_DIRECT_OPS - select NEED_DMA_MAP_STATE - config CHECK_SIGNATURE bool diff --git a/lib/Makefile b/lib/Makefile index 5e0e160c9242..8153fdab287f 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -29,9 +29,6 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ lib-$(CONFIG_PRINTK) += dump_stack.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o -obj-$(CONFIG_DMA_DIRECT_OPS) += dma-direct.o -obj-$(CONFIG_DMA_NONCOHERENT_OPS) += dma-noncoherent.o -obj-$(CONFIG_DMA_VIRT_OPS) += dma-virt.o lib-y += kobject.o klist.o obj-y += lockref.o @@ -148,7 +145,6 @@ obj-$(CONFIG_SMP) += percpu_counter.o obj-$(CONFIG_AUDIT_GENERIC) += audit.o obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o -obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o @@ -169,8 +165,6 @@ obj-$(CONFIG_NLATTR) += nlattr.o obj-$(CONFIG_LRU_CACHE) += lru_cache.o -obj-$(CONFIG_DMA_API_DEBUG) += dma-debug.o - obj-$(CONFIG_GENERIC_CSUM) += checksum.o obj-$(CONFIG_GENERIC_ATOMIC64) += atomic64.o diff --git a/lib/dma-debug.c b/lib/dma-debug.c deleted file mode 100644 index c007d25bee09..000000000000 --- a/lib/dma-debug.c +++ /dev/null @@ -1,1773 +0,0 @@ -/* - * Copyright (C) 2008 Advanced Micro Devices, Inc. - * - * Author: Joerg Roedel - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#define HASH_SIZE 1024ULL -#define HASH_FN_SHIFT 13 -#define HASH_FN_MASK (HASH_SIZE - 1) - -/* allow architectures to override this if absolutely required */ -#ifndef PREALLOC_DMA_DEBUG_ENTRIES -#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) -#endif - -enum { - dma_debug_single, - dma_debug_page, - dma_debug_sg, - dma_debug_coherent, - dma_debug_resource, -}; - -enum map_err_types { - MAP_ERR_CHECK_NOT_APPLICABLE, - MAP_ERR_NOT_CHECKED, - MAP_ERR_CHECKED, -}; - -#define DMA_DEBUG_STACKTRACE_ENTRIES 5 - -/** - * struct dma_debug_entry - track a dma_map* or dma_alloc_coherent mapping - * @list: node on pre-allocated free_entries list - * @dev: 'dev' argument to dma_map_{page|single|sg} or dma_alloc_coherent - * @type: single, page, sg, coherent - * @pfn: page frame of the start address - * @offset: offset of mapping relative to pfn - * @size: length of the mapping - * @direction: enum dma_data_direction - * @sg_call_ents: 'nents' from dma_map_sg - * @sg_mapped_ents: 'mapped_ents' from dma_map_sg - * @map_err_type: track whether dma_mapping_error() was checked - * @stacktrace: support backtraces when a violation is detected - */ -struct dma_debug_entry { - struct list_head list; - struct device *dev; - int type; - unsigned long pfn; - size_t offset; - u64 dev_addr; - u64 size; - int direction; - int sg_call_ents; - int sg_mapped_ents; - enum map_err_types map_err_type; -#ifdef CONFIG_STACKTRACE - struct stack_trace stacktrace; - unsigned long st_entries[DMA_DEBUG_STACKTRACE_ENTRIES]; -#endif -}; - -typedef bool (*match_fn)(struct dma_debug_entry *, struct dma_debug_entry *); - -struct hash_bucket { - struct list_head list; - spinlock_t lock; -} ____cacheline_aligned_in_smp; - -/* Hash list to save the allocated dma addresses */ -static struct hash_bucket dma_entry_hash[HASH_SIZE]; -/* List of pre-allocated dma_debug_entry's */ -static LIST_HEAD(free_entries); -/* Lock for the list above */ -static DEFINE_SPINLOCK(free_entries_lock); - -/* Global disable flag - will be set in case of an error */ -static bool global_disable __read_mostly; - -/* Early initialization disable flag, set at the end of dma_debug_init */ -static bool dma_debug_initialized __read_mostly; - -static inline bool dma_debug_disabled(void) -{ - return global_disable || !dma_debug_initialized; -} - -/* Global error count */ -static u32 error_count; - -/* Global error show enable*/ -static u32 show_all_errors __read_mostly; -/* Number of errors to show */ -static u32 show_num_errors = 1; - -static u32 num_free_entries; -static u32 min_free_entries; -static u32 nr_total_entries; - -/* number of preallocated entries requested by kernel cmdline */ -static u32 nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES; - -/* debugfs dentry's for the stuff above */ -static struct dentry *dma_debug_dent __read_mostly; -static struct dentry *global_disable_dent __read_mostly; -static struct dentry *error_count_dent __read_mostly; -static struct dentry *show_all_errors_dent __read_mostly; -static struct dentry *show_num_errors_dent __read_mostly; -static struct dentry *num_free_entries_dent __read_mostly; -static struct dentry *min_free_entries_dent __read_mostly; -static struct dentry *filter_dent __read_mostly; - -/* per-driver filter related state */ - -#define NAME_MAX_LEN 64 - -static char current_driver_name[NAME_MAX_LEN] __read_mostly; -static struct device_driver *current_driver __read_mostly; - -static DEFINE_RWLOCK(driver_name_lock); - -static const char *const maperr2str[] = { - [MAP_ERR_CHECK_NOT_APPLICABLE] = "dma map error check not applicable", - [MAP_ERR_NOT_CHECKED] = "dma map error not checked", - [MAP_ERR_CHECKED] = "dma map error checked", -}; - -static const char *type2name[5] = { "single", "page", - "scather-gather", "coherent", - "resource" }; - -static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE", - "DMA_FROM_DEVICE", "DMA_NONE" }; - -/* - * The access to some variables in this macro is racy. We can't use atomic_t - * here because all these variables are exported to debugfs. Some of them even - * writeable. This is also the reason why a lock won't help much. But anyway, - * the races are no big deal. Here is why: - * - * error_count: the addition is racy, but the worst thing that can happen is - * that we don't count some errors - * show_num_errors: the subtraction is racy. Also no big deal because in - * worst case this will result in one warning more in the - * system log than the user configured. This variable is - * writeable via debugfs. - */ -static inline void dump_entry_trace(struct dma_debug_entry *entry) -{ -#ifdef CONFIG_STACKTRACE - if (entry) { - pr_warning("Mapped at:\n"); - print_stack_trace(&entry->stacktrace, 0); - } -#endif -} - -static bool driver_filter(struct device *dev) -{ - struct device_driver *drv; - unsigned long flags; - bool ret; - - /* driver filter off */ - if (likely(!current_driver_name[0])) - return true; - - /* driver filter on and initialized */ - if (current_driver && dev && dev->driver == current_driver) - return true; - - /* driver filter on, but we can't filter on a NULL device... */ - if (!dev) - return false; - - if (current_driver || !current_driver_name[0]) - return false; - - /* driver filter on but not yet initialized */ - drv = dev->driver; - if (!drv) - return false; - - /* lock to protect against change of current_driver_name */ - read_lock_irqsave(&driver_name_lock, flags); - - ret = false; - if (drv->name && - strncmp(current_driver_name, drv->name, NAME_MAX_LEN - 1) == 0) { - current_driver = drv; - ret = true; - } - - read_unlock_irqrestore(&driver_name_lock, flags); - - return ret; -} - -#define err_printk(dev, entry, format, arg...) do { \ - error_count += 1; \ - if (driver_filter(dev) && \ - (show_all_errors || show_num_errors > 0)) { \ - WARN(1, "%s %s: " format, \ - dev ? dev_driver_string(dev) : "NULL", \ - dev ? dev_name(dev) : "NULL", ## arg); \ - dump_entry_trace(entry); \ - } \ - if (!show_all_errors && show_num_errors > 0) \ - show_num_errors -= 1; \ - } while (0); - -/* - * Hash related functions - * - * Every DMA-API request is saved into a struct dma_debug_entry. To - * have quick access to these structs they are stored into a hash. - */ -static int hash_fn(struct dma_debug_entry *entry) -{ - /* - * Hash function is based on the dma address. - * We use bits 20-27 here as the index into the hash - */ - return (entry->dev_addr >> HASH_FN_SHIFT) & HASH_FN_MASK; -} - -/* - * Request exclusive access to a hash bucket for a given dma_debug_entry. - */ -static struct hash_bucket *get_hash_bucket(struct dma_debug_entry *entry, - unsigned long *flags) - __acquires(&dma_entry_hash[idx].lock) -{ - int idx = hash_fn(entry); - unsigned long __flags; - - spin_lock_irqsave(&dma_entry_hash[idx].lock, __flags); - *flags = __flags; - return &dma_entry_hash[idx]; -} - -/* - * Give up exclusive access to the hash bucket - */ -static void put_hash_bucket(struct hash_bucket *bucket, - unsigned long *flags) - __releases(&bucket->lock) -{ - unsigned long __flags = *flags; - - spin_unlock_irqrestore(&bucket->lock, __flags); -} - -static bool exact_match(struct dma_debug_entry *a, struct dma_debug_entry *b) -{ - return ((a->dev_addr == b->dev_addr) && - (a->dev == b->dev)) ? true : false; -} - -static bool containing_match(struct dma_debug_entry *a, - struct dma_debug_entry *b) -{ - if (a->dev != b->dev) - return false; - - if ((b->dev_addr <= a->dev_addr) && - ((b->dev_addr + b->size) >= (a->dev_addr + a->size))) - return true; - - return false; -} - -/* - * Search a given entry in the hash bucket list - */ -static struct dma_debug_entry *__hash_bucket_find(struct hash_bucket *bucket, - struct dma_debug_entry *ref, - match_fn match) -{ - struct dma_debug_entry *entry, *ret = NULL; - int matches = 0, match_lvl, last_lvl = -1; - - list_for_each_entry(entry, &bucket->list, list) { - if (!match(ref, entry)) - continue; - - /* - * Some drivers map the same physical address multiple - * times. Without a hardware IOMMU this results in the - * same device addresses being put into the dma-debug - * hash multiple times too. This can result in false - * positives being reported. Therefore we implement a - * best-fit algorithm here which returns the entry from - * the hash which fits best to the reference value - * instead of the first-fit. - */ - matches += 1; - match_lvl = 0; - entry->size == ref->size ? ++match_lvl : 0; - entry->type == ref->type ? ++match_lvl : 0; - entry->direction == ref->direction ? ++match_lvl : 0; - entry->sg_call_ents == ref->sg_call_ents ? ++match_lvl : 0; - - if (match_lvl == 4) { - /* perfect-fit - return the result */ - return entry; - } else if (match_lvl > last_lvl) { - /* - * We found an entry that fits better then the - * previous one or it is the 1st match. - */ - last_lvl = match_lvl; - ret = entry; - } - } - - /* - * If we have multiple matches but no perfect-fit, just return - * NULL. - */ - ret = (matches == 1) ? ret : NULL; - - return ret; -} - -static struct dma_debug_entry *bucket_find_exact(struct hash_bucket *bucket, - struct dma_debug_entry *ref) -{ - return __hash_bucket_find(bucket, ref, exact_match); -} - -static struct dma_debug_entry *bucket_find_contain(struct hash_bucket **bucket, - struct dma_debug_entry *ref, - unsigned long *flags) -{ - - unsigned int max_range = dma_get_max_seg_size(ref->dev); - struct dma_debug_entry *entry, index = *ref; - unsigned int range = 0; - - while (range <= max_range) { - entry = __hash_bucket_find(*bucket, ref, containing_match); - - if (entry) - return entry; - - /* - * Nothing found, go back a hash bucket - */ - put_hash_bucket(*bucket, flags); - range += (1 << HASH_FN_SHIFT); - index.dev_addr -= (1 << HASH_FN_SHIFT); - *bucket = get_hash_bucket(&index, flags); - } - - return NULL; -} - -/* - * Add an entry to a hash bucket - */ -static void hash_bucket_add(struct hash_bucket *bucket, - struct dma_debug_entry *entry) -{ - list_add_tail(&entry->list, &bucket->list); -} - -/* - * Remove entry from a hash bucket list - */ -static void hash_bucket_del(struct dma_debug_entry *entry) -{ - list_del(&entry->list); -} - -static unsigned long long phys_addr(struct dma_debug_entry *entry) -{ - if (entry->type == dma_debug_resource) - return __pfn_to_phys(entry->pfn) + entry->offset; - - return page_to_phys(pfn_to_page(entry->pfn)) + entry->offset; -} - -/* - * Dump mapping entries for debugging purposes - */ -void debug_dma_dump_mappings(struct device *dev) -{ - int idx; - - for (idx = 0; idx < HASH_SIZE; idx++) { - struct hash_bucket *bucket = &dma_entry_hash[idx]; - struct dma_debug_entry *entry; - unsigned long flags; - - spin_lock_irqsave(&bucket->lock, flags); - - list_for_each_entry(entry, &bucket->list, list) { - if (!dev || dev == entry->dev) { - dev_info(entry->dev, - "%s idx %d P=%Lx N=%lx D=%Lx L=%Lx %s %s\n", - type2name[entry->type], idx, - phys_addr(entry), entry->pfn, - entry->dev_addr, entry->size, - dir2name[entry->direction], - maperr2str[entry->map_err_type]); - } - } - - spin_unlock_irqrestore(&bucket->lock, flags); - } -} - -/* - * For each mapping (initial cacheline in the case of - * dma_alloc_coherent/dma_map_page, initial cacheline in each page of a - * scatterlist, or the cacheline specified in dma_map_single) insert - * into this tree using the cacheline as the key. At - * dma_unmap_{single|sg|page} or dma_free_coherent delete the entry. If - * the entry already exists at insertion time add a tag as a reference - * count for the overlapping mappings. For now, the overlap tracking - * just ensures that 'unmaps' balance 'maps' before marking the - * cacheline idle, but we should also be flagging overlaps as an API - * violation. - * - * Memory usage is mostly constrained by the maximum number of available - * dma-debug entries in that we need a free dma_debug_entry before - * inserting into the tree. In the case of dma_map_page and - * dma_alloc_coherent there is only one dma_debug_entry and one - * dma_active_cacheline entry to track per event. dma_map_sg(), on the - * other hand, consumes a single dma_debug_entry, but inserts 'nents' - * entries into the tree. - * - * At any time debug_dma_assert_idle() can be called to trigger a - * warning if any cachelines in the given page are in the active set. - */ -static RADIX_TREE(dma_active_cacheline, GFP_NOWAIT); -static DEFINE_SPINLOCK(radix_lock); -#define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1) -#define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT) -#define CACHELINES_PER_PAGE (1 << CACHELINE_PER_PAGE_SHIFT) - -static phys_addr_t to_cacheline_number(struct dma_debug_entry *entry) -{ - return (entry->pfn << CACHELINE_PER_PAGE_SHIFT) + - (entry->offset >> L1_CACHE_SHIFT); -} - -static int active_cacheline_read_overlap(phys_addr_t cln) -{ - int overlap = 0, i; - - for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--) - if (radix_tree_tag_get(&dma_active_cacheline, cln, i)) - overlap |= 1 << i; - return overlap; -} - -static int active_cacheline_set_overlap(phys_addr_t cln, int overlap) -{ - int i; - - if (overlap > ACTIVE_CACHELINE_MAX_OVERLAP || overlap < 0) - return overlap; - - for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--) - if (overlap & 1 << i) - radix_tree_tag_set(&dma_active_cacheline, cln, i); - else - radix_tree_tag_clear(&dma_active_cacheline, cln, i); - - return overlap; -} - -static void active_cacheline_inc_overlap(phys_addr_t cln) -{ - int overlap = active_cacheline_read_overlap(cln); - - overlap = active_cacheline_set_overlap(cln, ++overlap); - - /* If we overflowed the overlap counter then we're potentially - * leaking dma-mappings. Otherwise, if maps and unmaps are - * balanced then this overflow may cause false negatives in - * debug_dma_assert_idle() as the cacheline may be marked idle - * prematurely. - */ - WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP, - "DMA-API: exceeded %d overlapping mappings of cacheline %pa\n", - ACTIVE_CACHELINE_MAX_OVERLAP, &cln); -} - -static int active_cacheline_dec_overlap(phys_addr_t cln) -{ - int overlap = active_cacheline_read_overlap(cln); - - return active_cacheline_set_overlap(cln, --overlap); -} - -static int active_cacheline_insert(struct dma_debug_entry *entry) -{ - phys_addr_t cln = to_cacheline_number(entry); - unsigned long flags; - int rc; - - /* If the device is not writing memory then we don't have any - * concerns about the cpu consuming stale data. This mitigates - * legitimate usages of overlapping mappings. - */ - if (entry->direction == DMA_TO_DEVICE) - return 0; - - spin_lock_irqsave(&radix_lock, flags); - rc = radix_tree_insert(&dma_active_cacheline, cln, entry); - if (rc == -EEXIST) - active_cacheline_inc_overlap(cln); - spin_unlock_irqrestore(&radix_lock, flags); - - return rc; -} - -static void active_cacheline_remove(struct dma_debug_entry *entry) -{ - phys_addr_t cln = to_cacheline_number(entry); - unsigned long flags; - - /* ...mirror the insert case */ - if (entry->direction == DMA_TO_DEVICE) - return; - - spin_lock_irqsave(&radix_lock, flags); - /* since we are counting overlaps the final put of the - * cacheline will occur when the overlap count is 0. - * active_cacheline_dec_overlap() returns -1 in that case - */ - if (active_cacheline_dec_overlap(cln) < 0) - radix_tree_delete(&dma_active_cacheline, cln); - spin_unlock_irqrestore(&radix_lock, flags); -} - -/** - * debug_dma_assert_idle() - assert that a page is not undergoing dma - * @page: page to lookup in the dma_active_cacheline tree - * - * Place a call to this routine in cases where the cpu touching the page - * before the dma completes (page is dma_unmapped) will lead to data - * corruption. - */ -void debug_dma_assert_idle(struct page *page) -{ - static struct dma_debug_entry *ents[CACHELINES_PER_PAGE]; - struct dma_debug_entry *entry = NULL; - void **results = (void **) &ents; - unsigned int nents, i; - unsigned long flags; - phys_addr_t cln; - - if (dma_debug_disabled()) - return; - - if (!page) - return; - - cln = (phys_addr_t) page_to_pfn(page) << CACHELINE_PER_PAGE_SHIFT; - spin_lock_irqsave(&radix_lock, flags); - nents = radix_tree_gang_lookup(&dma_active_cacheline, results, cln, - CACHELINES_PER_PAGE); - for (i = 0; i < nents; i++) { - phys_addr_t ent_cln = to_cacheline_number(ents[i]); - - if (ent_cln == cln) { - entry = ents[i]; - break; - } else if (ent_cln >= cln + CACHELINES_PER_PAGE) - break; - } - spin_unlock_irqrestore(&radix_lock, flags); - - if (!entry) - return; - - cln = to_cacheline_number(entry); - err_printk(entry->dev, entry, - "DMA-API: cpu touching an active dma mapped cacheline [cln=%pa]\n", - &cln); -} - -/* - * Wrapper function for adding an entry to the hash. - * This function takes care of locking itself. - */ -static void add_dma_entry(struct dma_debug_entry *entry) -{ - struct hash_bucket *bucket; - unsigned long flags; - int rc; - - bucket = get_hash_bucket(entry, &flags); - hash_bucket_add(bucket, entry); - put_hash_bucket(bucket, &flags); - - rc = active_cacheline_insert(entry); - if (rc == -ENOMEM) { - pr_err("DMA-API: cacheline tracking ENOMEM, dma-debug disabled\n"); - global_disable = true; - } - - /* TODO: report -EEXIST errors here as overlapping mappings are - * not supported by the DMA API - */ -} - -static struct dma_debug_entry *__dma_entry_alloc(void) -{ - struct dma_debug_entry *entry; - - entry = list_entry(free_entries.next, struct dma_debug_entry, list); - list_del(&entry->list); - memset(entry, 0, sizeof(*entry)); - - num_free_entries -= 1; - if (num_free_entries < min_free_entries) - min_free_entries = num_free_entries; - - return entry; -} - -/* struct dma_entry allocator - * - * The next two functions implement the allocator for - * struct dma_debug_entries. - */ -static struct dma_debug_entry *dma_entry_alloc(void) -{ - struct dma_debug_entry *entry; - unsigned long flags; - - spin_lock_irqsave(&free_entries_lock, flags); - - if (list_empty(&free_entries)) { - global_disable = true; - spin_unlock_irqrestore(&free_entries_lock, flags); - pr_err("DMA-API: debugging out of memory - disabling\n"); - return NULL; - } - - entry = __dma_entry_alloc(); - - spin_unlock_irqrestore(&free_entries_lock, flags); - -#ifdef CONFIG_STACKTRACE - entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES; - entry->stacktrace.entries = entry->st_entries; - entry->stacktrace.skip = 2; - save_stack_trace(&entry->stacktrace); -#endif - - return entry; -} - -static void dma_entry_free(struct dma_debug_entry *entry) -{ - unsigned long flags; - - active_cacheline_remove(entry); - - /* - * add to beginning of the list - this way the entries are - * more likely cache hot when they are reallocated. - */ - spin_lock_irqsave(&free_entries_lock, flags); - list_add(&entry->list, &free_entries); - num_free_entries += 1; - spin_unlock_irqrestore(&free_entries_lock, flags); -} - -int dma_debug_resize_entries(u32 num_entries) -{ - int i, delta, ret = 0; - unsigned long flags; - struct dma_debug_entry *entry; - LIST_HEAD(tmp); - - spin_lock_irqsave(&free_entries_lock, flags); - - if (nr_total_entries < num_entries) { - delta = num_entries - nr_total_entries; - - spin_unlock_irqrestore(&free_entries_lock, flags); - - for (i = 0; i < delta; i++) { - entry = kzalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - break; - - list_add_tail(&entry->list, &tmp); - } - - spin_lock_irqsave(&free_entries_lock, flags); - - list_splice(&tmp, &free_entries); - nr_total_entries += i; - num_free_entries += i; - } else { - delta = nr_total_entries - num_entries; - - for (i = 0; i < delta && !list_empty(&free_entries); i++) { - entry = __dma_entry_alloc(); - kfree(entry); - } - - nr_total_entries -= i; - } - - if (nr_total_entries != num_entries) - ret = 1; - - spin_unlock_irqrestore(&free_entries_lock, flags); - - return ret; -} - -/* - * DMA-API debugging init code - * - * The init code does two things: - * 1. Initialize core data structures - * 2. Preallocate a given number of dma_debug_entry structs - */ - -static int prealloc_memory(u32 num_entries) -{ - struct dma_debug_entry *entry, *next_entry; - int i; - - for (i = 0; i < num_entries; ++i) { - entry = kzalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - goto out_err; - - list_add_tail(&entry->list, &free_entries); - } - - num_free_entries = num_entries; - min_free_entries = num_entries; - - pr_info("DMA-API: preallocated %d debug entries\n", num_entries); - - return 0; - -out_err: - - list_for_each_entry_safe(entry, next_entry, &free_entries, list) { - list_del(&entry->list); - kfree(entry); - } - - return -ENOMEM; -} - -static ssize_t filter_read(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) -{ - char buf[NAME_MAX_LEN + 1]; - unsigned long flags; - int len; - - if (!current_driver_name[0]) - return 0; - - /* - * We can't copy to userspace directly because current_driver_name can - * only be read under the driver_name_lock with irqs disabled. So - * create a temporary copy first. - */ - read_lock_irqsave(&driver_name_lock, flags); - len = scnprintf(buf, NAME_MAX_LEN + 1, "%s\n", current_driver_name); - read_unlock_irqrestore(&driver_name_lock, flags); - - return simple_read_from_buffer(user_buf, count, ppos, buf, len); -} - -static ssize_t filter_write(struct file *file, const char __user *userbuf, - size_t count, loff_t *ppos) -{ - char buf[NAME_MAX_LEN]; - unsigned long flags; - size_t len; - int i; - - /* - * We can't copy from userspace directly. Access to - * current_driver_name is protected with a write_lock with irqs - * disabled. Since copy_from_user can fault and may sleep we - * need to copy to temporary buffer first - */ - len = min(count, (size_t)(NAME_MAX_LEN - 1)); - if (copy_from_user(buf, userbuf, len)) - return -EFAULT; - - buf[len] = 0; - - write_lock_irqsave(&driver_name_lock, flags); - - /* - * Now handle the string we got from userspace very carefully. - * The rules are: - * - only use the first token we got - * - token delimiter is everything looking like a space - * character (' ', '\n', '\t' ...) - * - */ - if (!isalnum(buf[0])) { - /* - * If the first character userspace gave us is not - * alphanumerical then assume the filter should be - * switched off. - */ - if (current_driver_name[0]) - pr_info("DMA-API: switching off dma-debug driver filter\n"); - current_driver_name[0] = 0; - current_driver = NULL; - goto out_unlock; - } - - /* - * Now parse out the first token and use it as the name for the - * driver to filter for. - */ - for (i = 0; i < NAME_MAX_LEN - 1; ++i) { - current_driver_name[i] = buf[i]; - if (isspace(buf[i]) || buf[i] == ' ' || buf[i] == 0) - break; - } - current_driver_name[i] = 0; - current_driver = NULL; - - pr_info("DMA-API: enable driver filter for driver [%s]\n", - current_driver_name); - -out_unlock: - write_unlock_irqrestore(&driver_name_lock, flags); - - return count; -} - -static const struct file_operations filter_fops = { - .read = filter_read, - .write = filter_write, - .llseek = default_llseek, -}; - -static int dma_debug_fs_init(void) -{ - dma_debug_dent = debugfs_create_dir("dma-api", NULL); - if (!dma_debug_dent) { - pr_err("DMA-API: can not create debugfs directory\n"); - return -ENOMEM; - } - - global_disable_dent = debugfs_create_bool("disabled", 0444, - dma_debug_dent, - &global_disable); - if (!global_disable_dent) - goto out_err; - - error_count_dent = debugfs_create_u32("error_count", 0444, - dma_debug_dent, &error_count); - if (!error_count_dent) - goto out_err; - - show_all_errors_dent = debugfs_create_u32("all_errors", 0644, - dma_debug_dent, - &show_all_errors); - if (!show_all_errors_dent) - goto out_err; - - show_num_errors_dent = debugfs_create_u32("num_errors", 0644, - dma_debug_dent, - &show_num_errors); - if (!show_num_errors_dent) - goto out_err; - - num_free_entries_dent = debugfs_create_u32("num_free_entries", 0444, - dma_debug_dent, - &num_free_entries); - if (!num_free_entries_dent) - goto out_err; - - min_free_entries_dent = debugfs_create_u32("min_free_entries", 0444, - dma_debug_dent, - &min_free_entries); - if (!min_free_entries_dent) - goto out_err; - - filter_dent = debugfs_create_file("driver_filter", 0644, - dma_debug_dent, NULL, &filter_fops); - if (!filter_dent) - goto out_err; - - return 0; - -out_err: - debugfs_remove_recursive(dma_debug_dent); - - return -ENOMEM; -} - -static int device_dma_allocations(struct device *dev, struct dma_debug_entry **out_entry) -{ - struct dma_debug_entry *entry; - unsigned long flags; - int count = 0, i; - - for (i = 0; i < HASH_SIZE; ++i) { - spin_lock_irqsave(&dma_entry_hash[i].lock, flags); - list_for_each_entry(entry, &dma_entry_hash[i].list, list) { - if (entry->dev == dev) { - count += 1; - *out_entry = entry; - } - } - spin_unlock_irqrestore(&dma_entry_hash[i].lock, flags); - } - - return count; -} - -static int dma_debug_device_change(struct notifier_block *nb, unsigned long action, void *data) -{ - struct device *dev = data; - struct dma_debug_entry *uninitialized_var(entry); - int count; - - if (dma_debug_disabled()) - return 0; - - switch (action) { - case BUS_NOTIFY_UNBOUND_DRIVER: - count = device_dma_allocations(dev, &entry); - if (count == 0) - break; - err_printk(dev, entry, "DMA-API: device driver has pending " - "DMA allocations while released from device " - "[count=%d]\n" - "One of leaked entries details: " - "[device address=0x%016llx] [size=%llu bytes] " - "[mapped with %s] [mapped as %s]\n", - count, entry->dev_addr, entry->size, - dir2name[entry->direction], type2name[entry->type]); - break; - default: - break; - } - - return 0; -} - -void dma_debug_add_bus(struct bus_type *bus) -{ - struct notifier_block *nb; - - if (dma_debug_disabled()) - return; - - nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL); - if (nb == NULL) { - pr_err("dma_debug_add_bus: out of memory\n"); - return; - } - - nb->notifier_call = dma_debug_device_change; - - bus_register_notifier(bus, nb); -} - -static int dma_debug_init(void) -{ - int i; - - /* Do not use dma_debug_initialized here, since we really want to be - * called to set dma_debug_initialized - */ - if (global_disable) - return 0; - - for (i = 0; i < HASH_SIZE; ++i) { - INIT_LIST_HEAD(&dma_entry_hash[i].list); - spin_lock_init(&dma_entry_hash[i].lock); - } - - if (dma_debug_fs_init() != 0) { - pr_err("DMA-API: error creating debugfs entries - disabling\n"); - global_disable = true; - - return 0; - } - - if (prealloc_memory(nr_prealloc_entries) != 0) { - pr_err("DMA-API: debugging out of memory error - disabled\n"); - global_disable = true; - - return 0; - } - - nr_total_entries = num_free_entries; - - dma_debug_initialized = true; - - pr_info("DMA-API: debugging enabled by kernel config\n"); - return 0; -} -core_initcall(dma_debug_init); - -static __init int dma_debug_cmdline(char *str) -{ - if (!str) - return -EINVAL; - - if (strncmp(str, "off", 3) == 0) { - pr_info("DMA-API: debugging disabled on kernel command line\n"); - global_disable = true; - } - - return 0; -} - -static __init int dma_debug_entries_cmdline(char *str) -{ - if (!str) - return -EINVAL; - if (!get_option(&str, &nr_prealloc_entries)) - nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES; - return 0; -} - -__setup("dma_debug=", dma_debug_cmdline); -__setup("dma_debug_entries=", dma_debug_entries_cmdline); - -static void check_unmap(struct dma_debug_entry *ref) -{ - struct dma_debug_entry *entry; - struct hash_bucket *bucket; - unsigned long flags; - - bucket = get_hash_bucket(ref, &flags); - entry = bucket_find_exact(bucket, ref); - - if (!entry) { - /* must drop lock before calling dma_mapping_error */ - put_hash_bucket(bucket, &flags); - - if (dma_mapping_error(ref->dev, ref->dev_addr)) { - err_printk(ref->dev, NULL, - "DMA-API: device driver tries to free an " - "invalid DMA memory address\n"); - } else { - err_printk(ref->dev, NULL, - "DMA-API: device driver tries to free DMA " - "memory it has not allocated [device " - "address=0x%016llx] [size=%llu bytes]\n", - ref->dev_addr, ref->size); - } - return; - } - - if (ref->size != entry->size) { - err_printk(ref->dev, entry, "DMA-API: device driver frees " - "DMA memory with different size " - "[device address=0x%016llx] [map size=%llu bytes] " - "[unmap size=%llu bytes]\n", - ref->dev_addr, entry->size, ref->size); - } - - if (ref->type != entry->type) { - err_printk(ref->dev, entry, "DMA-API: device driver frees " - "DMA memory with wrong function " - "[device address=0x%016llx] [size=%llu bytes] " - "[mapped as %s] [unmapped as %s]\n", - ref->dev_addr, ref->size, - type2name[entry->type], type2name[ref->type]); - } else if ((entry->type == dma_debug_coherent) && - (phys_addr(ref) != phys_addr(entry))) { - err_printk(ref->dev, entry, "DMA-API: device driver frees " - "DMA memory with different CPU address " - "[device address=0x%016llx] [size=%llu bytes] " - "[cpu alloc address=0x%016llx] " - "[cpu free address=0x%016llx]", - ref->dev_addr, ref->size, - phys_addr(entry), - phys_addr(ref)); - } - - if (ref->sg_call_ents && ref->type == dma_debug_sg && - ref->sg_call_ents != entry->sg_call_ents) { - err_printk(ref->dev, entry, "DMA-API: device driver frees " - "DMA sg list with different entry count " - "[map count=%d] [unmap count=%d]\n", - entry->sg_call_ents, ref->sg_call_ents); - } - - /* - * This may be no bug in reality - but most implementations of the - * DMA API don't handle this properly, so check for it here - */ - if (ref->direction != entry->direction) { - err_printk(ref->dev, entry, "DMA-API: device driver frees " - "DMA memory with different direction " - "[device address=0x%016llx] [size=%llu bytes] " - "[mapped with %s] [unmapped with %s]\n", - ref->dev_addr, ref->size, - dir2name[entry->direction], - dir2name[ref->direction]); - } - - /* - * Drivers should use dma_mapping_error() to check the returned - * addresses of dma_map_single() and dma_map_page(). - * If not, print this warning message. See Documentation/DMA-API.txt. - */ - if (entry->map_err_type == MAP_ERR_NOT_CHECKED) { - err_printk(ref->dev, entry, - "DMA-API: device driver failed to check map error" - "[device address=0x%016llx] [size=%llu bytes] " - "[mapped as %s]", - ref->dev_addr, ref->size, - type2name[entry->type]); - } - - hash_bucket_del(entry); - dma_entry_free(entry); - - put_hash_bucket(bucket, &flags); -} - -static void check_for_stack(struct device *dev, - struct page *page, size_t offset) -{ - void *addr; - struct vm_struct *stack_vm_area = task_stack_vm_area(current); - - if (!stack_vm_area) { - /* Stack is direct-mapped. */ - if (PageHighMem(page)) - return; - addr = page_address(page) + offset; - if (object_is_on_stack(addr)) - err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [addr=%p]\n", addr); - } else { - /* Stack is vmalloced. */ - int i; - - for (i = 0; i < stack_vm_area->nr_pages; i++) { - if (page != stack_vm_area->pages[i]) - continue; - - addr = (u8 *)current->stack + i * PAGE_SIZE + offset; - err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [probable addr=%p]\n", addr); - break; - } - } -} - -static inline bool overlap(void *addr, unsigned long len, void *start, void *end) -{ - unsigned long a1 = (unsigned long)addr; - unsigned long b1 = a1 + len; - unsigned long a2 = (unsigned long)start; - unsigned long b2 = (unsigned long)end; - - return !(b1 <= a2 || a1 >= b2); -} - -static void check_for_illegal_area(struct device *dev, void *addr, unsigned long len) -{ - if (overlap(addr, len, _stext, _etext) || - overlap(addr, len, __start_rodata, __end_rodata)) - err_printk(dev, NULL, "DMA-API: device driver maps memory from kernel text or rodata [addr=%p] [len=%lu]\n", addr, len); -} - -static void check_sync(struct device *dev, - struct dma_debug_entry *ref, - bool to_cpu) -{ - struct dma_debug_entry *entry; - struct hash_bucket *bucket; - unsigned long flags; - - bucket = get_hash_bucket(ref, &flags); - - entry = bucket_find_contain(&bucket, ref, &flags); - - if (!entry) { - err_printk(dev, NULL, "DMA-API: device driver tries " - "to sync DMA memory it has not allocated " - "[device address=0x%016llx] [size=%llu bytes]\n", - (unsigned long long)ref->dev_addr, ref->size); - goto out; - } - - if (ref->size > entry->size) { - err_printk(dev, entry, "DMA-API: device driver syncs" - " DMA memory outside allocated range " - "[device address=0x%016llx] " - "[allocation size=%llu bytes] " - "[sync offset+size=%llu]\n", - entry->dev_addr, entry->size, - ref->size); - } - - if (entry->direction == DMA_BIDIRECTIONAL) - goto out; - - if (ref->direction != entry->direction) { - err_printk(dev, entry, "DMA-API: device driver syncs " - "DMA memory with different direction " - "[device address=0x%016llx] [size=%llu bytes] " - "[mapped with %s] [synced with %s]\n", - (unsigned long long)ref->dev_addr, entry->size, - dir2name[entry->direction], - dir2name[ref->direction]); - } - - if (to_cpu && !(entry->direction == DMA_FROM_DEVICE) && - !(ref->direction == DMA_TO_DEVICE)) - err_printk(dev, entry, "DMA-API: device driver syncs " - "device read-only DMA memory for cpu " - "[device address=0x%016llx] [size=%llu bytes] " - "[mapped with %s] [synced with %s]\n", - (unsigned long long)ref->dev_addr, entry->size, - dir2name[entry->direction], - dir2name[ref->direction]); - - if (!to_cpu && !(entry->direction == DMA_TO_DEVICE) && - !(ref->direction == DMA_FROM_DEVICE)) - err_printk(dev, entry, "DMA-API: device driver syncs " - "device write-only DMA memory to device " - "[device address=0x%016llx] [size=%llu bytes] " - "[mapped with %s] [synced with %s]\n", - (unsigned long long)ref->dev_addr, entry->size, - dir2name[entry->direction], - dir2name[ref->direction]); - - if (ref->sg_call_ents && ref->type == dma_debug_sg && - ref->sg_call_ents != entry->sg_call_ents) { - err_printk(ref->dev, entry, "DMA-API: device driver syncs " - "DMA sg list with different entry count " - "[map count=%d] [sync count=%d]\n", - entry->sg_call_ents, ref->sg_call_ents); - } - -out: - put_hash_bucket(bucket, &flags); -} - -static void check_sg_segment(struct device *dev, struct scatterlist *sg) -{ -#ifdef CONFIG_DMA_API_DEBUG_SG - unsigned int max_seg = dma_get_max_seg_size(dev); - u64 start, end, boundary = dma_get_seg_boundary(dev); - - /* - * Either the driver forgot to set dma_parms appropriately, or - * whoever generated the list forgot to check them. - */ - if (sg->length > max_seg) - err_printk(dev, NULL, "DMA-API: mapping sg segment longer than device claims to support [len=%u] [max=%u]\n", - sg->length, max_seg); - /* - * In some cases this could potentially be the DMA API - * implementation's fault, but it would usually imply that - * the scatterlist was built inappropriately to begin with. - */ - start = sg_dma_address(sg); - end = start + sg_dma_len(sg) - 1; - if ((start ^ end) & ~boundary) - err_printk(dev, NULL, "DMA-API: mapping sg segment across boundary [start=0x%016llx] [end=0x%016llx] [boundary=0x%016llx]\n", - start, end, boundary); -#endif -} - -void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, - size_t size, int direction, dma_addr_t dma_addr, - bool map_single) -{ - struct dma_debug_entry *entry; - - if (unlikely(dma_debug_disabled())) - return; - - if (dma_mapping_error(dev, dma_addr)) - return; - - entry = dma_entry_alloc(); - if (!entry) - return; - - entry->dev = dev; - entry->type = dma_debug_page; - entry->pfn = page_to_pfn(page); - entry->offset = offset, - entry->dev_addr = dma_addr; - entry->size = size; - entry->direction = direction; - entry->map_err_type = MAP_ERR_NOT_CHECKED; - - if (map_single) - entry->type = dma_debug_single; - - check_for_stack(dev, page, offset); - - if (!PageHighMem(page)) { - void *addr = page_address(page) + offset; - - check_for_illegal_area(dev, addr, size); - } - - add_dma_entry(entry); -} -EXPORT_SYMBOL(debug_dma_map_page); - -void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - struct dma_debug_entry ref; - struct dma_debug_entry *entry; - struct hash_bucket *bucket; - unsigned long flags; - - if (unlikely(dma_debug_disabled())) - return; - - ref.dev = dev; - ref.dev_addr = dma_addr; - bucket = get_hash_bucket(&ref, &flags); - - list_for_each_entry(entry, &bucket->list, list) { - if (!exact_match(&ref, entry)) - continue; - - /* - * The same physical address can be mapped multiple - * times. Without a hardware IOMMU this results in the - * same device addresses being put into the dma-debug - * hash multiple times too. This can result in false - * positives being reported. Therefore we implement a - * best-fit algorithm here which updates the first entry - * from the hash which fits the reference value and is - * not currently listed as being checked. - */ - if (entry->map_err_type == MAP_ERR_NOT_CHECKED) { - entry->map_err_type = MAP_ERR_CHECKED; - break; - } - } - - put_hash_bucket(bucket, &flags); -} -EXPORT_SYMBOL(debug_dma_mapping_error); - -void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, - size_t size, int direction, bool map_single) -{ - struct dma_debug_entry ref = { - .type = dma_debug_page, - .dev = dev, - .dev_addr = addr, - .size = size, - .direction = direction, - }; - - if (unlikely(dma_debug_disabled())) - return; - - if (map_single) - ref.type = dma_debug_single; - - check_unmap(&ref); -} -EXPORT_SYMBOL(debug_dma_unmap_page); - -void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, - int nents, int mapped_ents, int direction) -{ - struct dma_debug_entry *entry; - struct scatterlist *s; - int i; - - if (unlikely(dma_debug_disabled())) - return; - - for_each_sg(sg, s, mapped_ents, i) { - entry = dma_entry_alloc(); - if (!entry) - return; - - entry->type = dma_debug_sg; - entry->dev = dev; - entry->pfn = page_to_pfn(sg_page(s)); - entry->offset = s->offset, - entry->size = sg_dma_len(s); - entry->dev_addr = sg_dma_address(s); - entry->direction = direction; - entry->sg_call_ents = nents; - entry->sg_mapped_ents = mapped_ents; - - check_for_stack(dev, sg_page(s), s->offset); - - if (!PageHighMem(sg_page(s))) { - check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s)); - } - - check_sg_segment(dev, s); - - add_dma_entry(entry); - } -} -EXPORT_SYMBOL(debug_dma_map_sg); - -static int get_nr_mapped_entries(struct device *dev, - struct dma_debug_entry *ref) -{ - struct dma_debug_entry *entry; - struct hash_bucket *bucket; - unsigned long flags; - int mapped_ents; - - bucket = get_hash_bucket(ref, &flags); - entry = bucket_find_exact(bucket, ref); - mapped_ents = 0; - - if (entry) - mapped_ents = entry->sg_mapped_ents; - put_hash_bucket(bucket, &flags); - - return mapped_ents; -} - -void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, - int nelems, int dir) -{ - struct scatterlist *s; - int mapped_ents = 0, i; - - if (unlikely(dma_debug_disabled())) - return; - - for_each_sg(sglist, s, nelems, i) { - - struct dma_debug_entry ref = { - .type = dma_debug_sg, - .dev = dev, - .pfn = page_to_pfn(sg_page(s)), - .offset = s->offset, - .dev_addr = sg_dma_address(s), - .size = sg_dma_len(s), - .direction = dir, - .sg_call_ents = nelems, - }; - - if (mapped_ents && i >= mapped_ents) - break; - - if (!i) - mapped_ents = get_nr_mapped_entries(dev, &ref); - - check_unmap(&ref); - } -} -EXPORT_SYMBOL(debug_dma_unmap_sg); - -void debug_dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t dma_addr, void *virt) -{ - struct dma_debug_entry *entry; - - if (unlikely(dma_debug_disabled())) - return; - - if (unlikely(virt == NULL)) - return; - - /* handle vmalloc and linear addresses */ - if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt)) - return; - - entry = dma_entry_alloc(); - if (!entry) - return; - - entry->type = dma_debug_coherent; - entry->dev = dev; - entry->offset = offset_in_page(virt); - entry->size = size; - entry->dev_addr = dma_addr; - entry->direction = DMA_BIDIRECTIONAL; - - if (is_vmalloc_addr(virt)) - entry->pfn = vmalloc_to_pfn(virt); - else - entry->pfn = page_to_pfn(virt_to_page(virt)); - - add_dma_entry(entry); -} -EXPORT_SYMBOL(debug_dma_alloc_coherent); - -void debug_dma_free_coherent(struct device *dev, size_t size, - void *virt, dma_addr_t addr) -{ - struct dma_debug_entry ref = { - .type = dma_debug_coherent, - .dev = dev, - .offset = offset_in_page(virt), - .dev_addr = addr, - .size = size, - .direction = DMA_BIDIRECTIONAL, - }; - - /* handle vmalloc and linear addresses */ - if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt)) - return; - - if (is_vmalloc_addr(virt)) - ref.pfn = vmalloc_to_pfn(virt); - else - ref.pfn = page_to_pfn(virt_to_page(virt)); - - if (unlikely(dma_debug_disabled())) - return; - - check_unmap(&ref); -} -EXPORT_SYMBOL(debug_dma_free_coherent); - -void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size, - int direction, dma_addr_t dma_addr) -{ - struct dma_debug_entry *entry; - - if (unlikely(dma_debug_disabled())) - return; - - entry = dma_entry_alloc(); - if (!entry) - return; - - entry->type = dma_debug_resource; - entry->dev = dev; - entry->pfn = PHYS_PFN(addr); - entry->offset = offset_in_page(addr); - entry->size = size; - entry->dev_addr = dma_addr; - entry->direction = direction; - entry->map_err_type = MAP_ERR_NOT_CHECKED; - - add_dma_entry(entry); -} -EXPORT_SYMBOL(debug_dma_map_resource); - -void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr, - size_t size, int direction) -{ - struct dma_debug_entry ref = { - .type = dma_debug_resource, - .dev = dev, - .dev_addr = dma_addr, - .size = size, - .direction = direction, - }; - - if (unlikely(dma_debug_disabled())) - return; - - check_unmap(&ref); -} -EXPORT_SYMBOL(debug_dma_unmap_resource); - -void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, - size_t size, int direction) -{ - struct dma_debug_entry ref; - - if (unlikely(dma_debug_disabled())) - return; - - ref.type = dma_debug_single; - ref.dev = dev; - ref.dev_addr = dma_handle; - ref.size = size; - ref.direction = direction; - ref.sg_call_ents = 0; - - check_sync(dev, &ref, true); -} -EXPORT_SYMBOL(debug_dma_sync_single_for_cpu); - -void debug_dma_sync_single_for_device(struct device *dev, - dma_addr_t dma_handle, size_t size, - int direction) -{ - struct dma_debug_entry ref; - - if (unlikely(dma_debug_disabled())) - return; - - ref.type = dma_debug_single; - ref.dev = dev; - ref.dev_addr = dma_handle; - ref.size = size; - ref.direction = direction; - ref.sg_call_ents = 0; - - check_sync(dev, &ref, false); -} -EXPORT_SYMBOL(debug_dma_sync_single_for_device); - -void debug_dma_sync_single_range_for_cpu(struct device *dev, - dma_addr_t dma_handle, - unsigned long offset, size_t size, - int direction) -{ - struct dma_debug_entry ref; - - if (unlikely(dma_debug_disabled())) - return; - - ref.type = dma_debug_single; - ref.dev = dev; - ref.dev_addr = dma_handle; - ref.size = offset + size; - ref.direction = direction; - ref.sg_call_ents = 0; - - check_sync(dev, &ref, true); -} -EXPORT_SYMBOL(debug_dma_sync_single_range_for_cpu); - -void debug_dma_sync_single_range_for_device(struct device *dev, - dma_addr_t dma_handle, - unsigned long offset, - size_t size, int direction) -{ - struct dma_debug_entry ref; - - if (unlikely(dma_debug_disabled())) - return; - - ref.type = dma_debug_single; - ref.dev = dev; - ref.dev_addr = dma_handle; - ref.size = offset + size; - ref.direction = direction; - ref.sg_call_ents = 0; - - check_sync(dev, &ref, false); -} -EXPORT_SYMBOL(debug_dma_sync_single_range_for_device); - -void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, - int nelems, int direction) -{ - struct scatterlist *s; - int mapped_ents = 0, i; - - if (unlikely(dma_debug_disabled())) - return; - - for_each_sg(sg, s, nelems, i) { - - struct dma_debug_entry ref = { - .type = dma_debug_sg, - .dev = dev, - .pfn = page_to_pfn(sg_page(s)), - .offset = s->offset, - .dev_addr = sg_dma_address(s), - .size = sg_dma_len(s), - .direction = direction, - .sg_call_ents = nelems, - }; - - if (!i) - mapped_ents = get_nr_mapped_entries(dev, &ref); - - if (i >= mapped_ents) - break; - - check_sync(dev, &ref, true); - } -} -EXPORT_SYMBOL(debug_dma_sync_sg_for_cpu); - -void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, - int nelems, int direction) -{ - struct scatterlist *s; - int mapped_ents = 0, i; - - if (unlikely(dma_debug_disabled())) - return; - - for_each_sg(sg, s, nelems, i) { - - struct dma_debug_entry ref = { - .type = dma_debug_sg, - .dev = dev, - .pfn = page_to_pfn(sg_page(s)), - .offset = s->offset, - .dev_addr = sg_dma_address(s), - .size = sg_dma_len(s), - .direction = direction, - .sg_call_ents = nelems, - }; - if (!i) - mapped_ents = get_nr_mapped_entries(dev, &ref); - - if (i >= mapped_ents) - break; - - check_sync(dev, &ref, false); - } -} -EXPORT_SYMBOL(debug_dma_sync_sg_for_device); - -static int __init dma_debug_driver_setup(char *str) -{ - int i; - - for (i = 0; i < NAME_MAX_LEN - 1; ++i, ++str) { - current_driver_name[i] = *str; - if (*str == 0) - break; - } - - if (current_driver_name[0]) - pr_info("DMA-API: enable driver filter for driver [%s]\n", - current_driver_name); - - - return 1; -} -__setup("dma_debug_driver=", dma_debug_driver_setup); diff --git a/lib/dma-direct.c b/lib/dma-direct.c deleted file mode 100644 index 8be8106270c2..000000000000 --- a/lib/dma-direct.c +++ /dev/null @@ -1,204 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * DMA operations that map physical memory directly without using an IOMMU or - * flushing caches. - */ -#include -#include -#include -#include -#include -#include -#include - -#define DIRECT_MAPPING_ERROR 0 - -/* - * Most architectures use ZONE_DMA for the first 16 Megabytes, but - * some use it for entirely different regions: - */ -#ifndef ARCH_ZONE_DMA_BITS -#define ARCH_ZONE_DMA_BITS 24 -#endif - -/* - * For AMD SEV all DMA must be to unencrypted addresses. - */ -static inline bool force_dma_unencrypted(void) -{ - return sev_active(); -} - -static bool -check_addr(struct device *dev, dma_addr_t dma_addr, size_t size, - const char *caller) -{ - if (unlikely(dev && !dma_capable(dev, dma_addr, size))) { - if (!dev->dma_mask) { - dev_err(dev, - "%s: call on device without dma_mask\n", - caller); - return false; - } - - if (*dev->dma_mask >= DMA_BIT_MASK(32)) { - dev_err(dev, - "%s: overflow %pad+%zu of device mask %llx\n", - caller, &dma_addr, size, *dev->dma_mask); - } - return false; - } - return true; -} - -static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) -{ - dma_addr_t addr = force_dma_unencrypted() ? - __phys_to_dma(dev, phys) : phys_to_dma(dev, phys); - return addr + size - 1 <= dev->coherent_dma_mask; -} - -void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp, unsigned long attrs) -{ - unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; - int page_order = get_order(size); - struct page *page = NULL; - void *ret; - - /* we always manually zero the memory once we are done: */ - gfp &= ~__GFP_ZERO; - - /* GFP_DMA32 and GFP_DMA are no ops without the corresponding zones: */ - if (dev->coherent_dma_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS)) - gfp |= GFP_DMA; - if (dev->coherent_dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA)) - gfp |= GFP_DMA32; - -again: - /* CMA can be used only in the context which permits sleeping */ - if (gfpflags_allow_blocking(gfp)) { - page = dma_alloc_from_contiguous(dev, count, page_order, gfp); - if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { - dma_release_from_contiguous(dev, page, count); - page = NULL; - } - } - if (!page) - page = alloc_pages_node(dev_to_node(dev), gfp, page_order); - - if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { - __free_pages(page, page_order); - page = NULL; - - if (IS_ENABLED(CONFIG_ZONE_DMA32) && - dev->coherent_dma_mask < DMA_BIT_MASK(64) && - !(gfp & (GFP_DMA32 | GFP_DMA))) { - gfp |= GFP_DMA32; - goto again; - } - - if (IS_ENABLED(CONFIG_ZONE_DMA) && - dev->coherent_dma_mask < DMA_BIT_MASK(32) && - !(gfp & GFP_DMA)) { - gfp = (gfp & ~GFP_DMA32) | GFP_DMA; - goto again; - } - } - - if (!page) - return NULL; - ret = page_address(page); - if (force_dma_unencrypted()) { - set_memory_decrypted((unsigned long)ret, 1 << page_order); - *dma_handle = __phys_to_dma(dev, page_to_phys(page)); - } else { - *dma_handle = phys_to_dma(dev, page_to_phys(page)); - } - memset(ret, 0, size); - return ret; -} - -/* - * NOTE: this function must never look at the dma_addr argument, because we want - * to be able to use it as a helper for iommu implementations as well. - */ -void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t dma_addr, unsigned long attrs) -{ - unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; - unsigned int page_order = get_order(size); - - if (force_dma_unencrypted()) - set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order); - if (!dma_release_from_contiguous(dev, virt_to_page(cpu_addr), count)) - free_pages((unsigned long)cpu_addr, page_order); -} - -dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - dma_addr_t dma_addr = phys_to_dma(dev, page_to_phys(page)) + offset; - - if (!check_addr(dev, dma_addr, size, __func__)) - return DIRECT_MAPPING_ERROR; - return dma_addr; -} - -int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, - enum dma_data_direction dir, unsigned long attrs) -{ - int i; - struct scatterlist *sg; - - for_each_sg(sgl, sg, nents, i) { - BUG_ON(!sg_page(sg)); - - sg_dma_address(sg) = phys_to_dma(dev, sg_phys(sg)); - if (!check_addr(dev, sg_dma_address(sg), sg->length, __func__)) - return 0; - sg_dma_len(sg) = sg->length; - } - - return nents; -} - -int dma_direct_supported(struct device *dev, u64 mask) -{ -#ifdef CONFIG_ZONE_DMA - if (mask < DMA_BIT_MASK(ARCH_ZONE_DMA_BITS)) - return 0; -#else - /* - * Because 32-bit DMA masks are so common we expect every architecture - * to be able to satisfy them - either by not supporting more physical - * memory, or by providing a ZONE_DMA32. If neither is the case, the - * architecture needs to use an IOMMU instead of the direct mapping. - */ - if (mask < DMA_BIT_MASK(32)) - return 0; -#endif - /* - * Various PCI/PCIe bridges have broken support for > 32bit DMA even - * if the device itself might support it. - */ - if (dev->dma_32bit_limit && mask > DMA_BIT_MASK(32)) - return 0; - return 1; -} - -int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return dma_addr == DIRECT_MAPPING_ERROR; -} - -const struct dma_map_ops dma_direct_ops = { - .alloc = dma_direct_alloc, - .free = dma_direct_free, - .map_page = dma_direct_map_page, - .map_sg = dma_direct_map_sg, - .dma_supported = dma_direct_supported, - .mapping_error = dma_direct_mapping_error, -}; -EXPORT_SYMBOL(dma_direct_ops); diff --git a/lib/dma-noncoherent.c b/lib/dma-noncoherent.c deleted file mode 100644 index 79e9a757387f..000000000000 --- a/lib/dma-noncoherent.c +++ /dev/null @@ -1,102 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2018 Christoph Hellwig. - * - * DMA operations that map physical memory directly without providing cache - * coherence. - */ -#include -#include -#include -#include -#include - -static void dma_noncoherent_sync_single_for_device(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ - arch_sync_dma_for_device(dev, dma_to_phys(dev, addr), size, dir); -} - -static void dma_noncoherent_sync_sg_for_device(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nents, i) - arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir); -} - -static dma_addr_t dma_noncoherent_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - dma_addr_t addr; - - addr = dma_direct_map_page(dev, page, offset, size, dir, attrs); - if (!dma_mapping_error(dev, addr) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - arch_sync_dma_for_device(dev, page_to_phys(page) + offset, - size, dir); - return addr; -} - -static int dma_noncoherent_map_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - nents = dma_direct_map_sg(dev, sgl, nents, dir, attrs); - if (nents > 0 && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - dma_noncoherent_sync_sg_for_device(dev, sgl, nents, dir); - return nents; -} - -#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU -static void dma_noncoherent_sync_single_for_cpu(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ - arch_sync_dma_for_cpu(dev, dma_to_phys(dev, addr), size, dir); -} - -static void dma_noncoherent_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nents, i) - arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir); -} - -static void dma_noncoherent_unmap_page(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir, unsigned long attrs) -{ - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - dma_noncoherent_sync_single_for_cpu(dev, addr, size, dir); -} - -static void dma_noncoherent_unmap_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - dma_noncoherent_sync_sg_for_cpu(dev, sgl, nents, dir); -} -#endif - -const struct dma_map_ops dma_noncoherent_ops = { - .alloc = arch_dma_alloc, - .free = arch_dma_free, - .mmap = arch_dma_mmap, - .sync_single_for_device = dma_noncoherent_sync_single_for_device, - .sync_sg_for_device = dma_noncoherent_sync_sg_for_device, - .map_page = dma_noncoherent_map_page, - .map_sg = dma_noncoherent_map_sg, -#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU - .sync_single_for_cpu = dma_noncoherent_sync_single_for_cpu, - .sync_sg_for_cpu = dma_noncoherent_sync_sg_for_cpu, - .unmap_page = dma_noncoherent_unmap_page, - .unmap_sg = dma_noncoherent_unmap_sg, -#endif - .dma_supported = dma_direct_supported, - .mapping_error = dma_direct_mapping_error, - .cache_sync = arch_dma_cache_sync, -}; -EXPORT_SYMBOL(dma_noncoherent_ops); diff --git a/lib/dma-virt.c b/lib/dma-virt.c deleted file mode 100644 index 8e61a02ef9ca..000000000000 --- a/lib/dma-virt.c +++ /dev/null @@ -1,61 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * lib/dma-virt.c - * - * DMA operations that map to virtual addresses without flushing memory. - */ -#include -#include -#include -#include - -static void *dma_virt_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, - unsigned long attrs) -{ - void *ret; - - ret = (void *)__get_free_pages(gfp, get_order(size)); - if (ret) - *dma_handle = (uintptr_t)ret; - return ret; -} - -static void dma_virt_free(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t dma_addr, - unsigned long attrs) -{ - free_pages((unsigned long)cpu_addr, get_order(size)); -} - -static dma_addr_t dma_virt_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - return (uintptr_t)(page_address(page) + offset); -} - -static int dma_virt_map_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, - unsigned long attrs) -{ - int i; - struct scatterlist *sg; - - for_each_sg(sgl, sg, nents, i) { - BUG_ON(!sg_page(sg)); - sg_dma_address(sg) = (uintptr_t)sg_virt(sg); - sg_dma_len(sg) = sg->length; - } - - return nents; -} - -const struct dma_map_ops dma_virt_ops = { - .alloc = dma_virt_alloc, - .free = dma_virt_free, - .map_page = dma_virt_map_page, - .map_sg = dma_virt_map_sg, -}; -EXPORT_SYMBOL(dma_virt_ops); diff --git a/lib/swiotlb.c b/lib/swiotlb.c deleted file mode 100644 index 04b68d9dffac..000000000000 --- a/lib/swiotlb.c +++ /dev/null @@ -1,1087 +0,0 @@ -/* - * Dynamic DMA mapping support. - * - * This implementation is a fallback for platforms that do not support - * I/O TLBs (aka DMA address translation hardware). - * Copyright (C) 2000 Asit Mallick - * Copyright (C) 2000 Goutham Rao - * Copyright (C) 2000, 2003 Hewlett-Packard Co - * David Mosberger-Tang - * - * 03/05/07 davidm Switch from PCI-DMA to generic device DMA API. - * 00/12/13 davidm Rename to swiotlb.c and add mark_clean() to avoid - * unnecessary i-cache flushing. - * 04/07/.. ak Better overflow handling. Assorted fixes. - * 05/09/10 linville Add support for syncing ranges, support syncing for - * DMA_BIDIRECTIONAL mappings, miscellaneous cleanup. - * 08/12/11 beckyb Add highmem support - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include - -#define CREATE_TRACE_POINTS -#include - -#define OFFSET(val,align) ((unsigned long) \ - ( (val) & ( (align) - 1))) - -#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT)) - -/* - * Minimum IO TLB size to bother booting with. Systems with mainly - * 64bit capable cards will only lightly use the swiotlb. If we can't - * allocate a contiguous 1MB, we're probably in trouble anyway. - */ -#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) - -enum swiotlb_force swiotlb_force; - -/* - * Used to do a quick range check in swiotlb_tbl_unmap_single and - * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this - * API. - */ -static phys_addr_t io_tlb_start, io_tlb_end; - -/* - * The number of IO TLB blocks (in groups of 64) between io_tlb_start and - * io_tlb_end. This is command line adjustable via setup_io_tlb_npages. - */ -static unsigned long io_tlb_nslabs; - -/* - * When the IOMMU overflows we return a fallback buffer. This sets the size. - */ -static unsigned long io_tlb_overflow = 32*1024; - -static phys_addr_t io_tlb_overflow_buffer; - -/* - * This is a free list describing the number of free entries available from - * each index - */ -static unsigned int *io_tlb_list; -static unsigned int io_tlb_index; - -/* - * Max segment that we can provide which (if pages are contingous) will - * not be bounced (unless SWIOTLB_FORCE is set). - */ -unsigned int max_segment; - -/* - * We need to save away the original address corresponding to a mapped entry - * for the sync operations. - */ -#define INVALID_PHYS_ADDR (~(phys_addr_t)0) -static phys_addr_t *io_tlb_orig_addr; - -/* - * Protect the above data structures in the map and unmap calls - */ -static DEFINE_SPINLOCK(io_tlb_lock); - -static int late_alloc; - -static int __init -setup_io_tlb_npages(char *str) -{ - if (isdigit(*str)) { - io_tlb_nslabs = simple_strtoul(str, &str, 0); - /* avoid tail segment of size < IO_TLB_SEGSIZE */ - io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); - } - if (*str == ',') - ++str; - if (!strcmp(str, "force")) { - swiotlb_force = SWIOTLB_FORCE; - } else if (!strcmp(str, "noforce")) { - swiotlb_force = SWIOTLB_NO_FORCE; - io_tlb_nslabs = 1; - } - - return 0; -} -early_param("swiotlb", setup_io_tlb_npages); -/* make io_tlb_overflow tunable too? */ - -unsigned long swiotlb_nr_tbl(void) -{ - return io_tlb_nslabs; -} -EXPORT_SYMBOL_GPL(swiotlb_nr_tbl); - -unsigned int swiotlb_max_segment(void) -{ - return max_segment; -} -EXPORT_SYMBOL_GPL(swiotlb_max_segment); - -void swiotlb_set_max_segment(unsigned int val) -{ - if (swiotlb_force == SWIOTLB_FORCE) - max_segment = 1; - else - max_segment = rounddown(val, PAGE_SIZE); -} - -/* default to 64MB */ -#define IO_TLB_DEFAULT_SIZE (64UL<<20) -unsigned long swiotlb_size_or_default(void) -{ - unsigned long size; - - size = io_tlb_nslabs << IO_TLB_SHIFT; - - return size ? size : (IO_TLB_DEFAULT_SIZE); -} - -static bool no_iotlb_memory; - -void swiotlb_print_info(void) -{ - unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT; - unsigned char *vstart, *vend; - - if (no_iotlb_memory) { - pr_warn("software IO TLB: No low mem\n"); - return; - } - - vstart = phys_to_virt(io_tlb_start); - vend = phys_to_virt(io_tlb_end); - - printk(KERN_INFO "software IO TLB [mem %#010llx-%#010llx] (%luMB) mapped at [%p-%p]\n", - (unsigned long long)io_tlb_start, - (unsigned long long)io_tlb_end, - bytes >> 20, vstart, vend - 1); -} - -/* - * Early SWIOTLB allocation may be too early to allow an architecture to - * perform the desired operations. This function allows the architecture to - * call SWIOTLB when the operations are possible. It needs to be called - * before the SWIOTLB memory is used. - */ -void __init swiotlb_update_mem_attributes(void) -{ - void *vaddr; - unsigned long bytes; - - if (no_iotlb_memory || late_alloc) - return; - - vaddr = phys_to_virt(io_tlb_start); - bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT); - set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); - memset(vaddr, 0, bytes); - - vaddr = phys_to_virt(io_tlb_overflow_buffer); - bytes = PAGE_ALIGN(io_tlb_overflow); - set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); - memset(vaddr, 0, bytes); -} - -int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) -{ - void *v_overflow_buffer; - unsigned long i, bytes; - - bytes = nslabs << IO_TLB_SHIFT; - - io_tlb_nslabs = nslabs; - io_tlb_start = __pa(tlb); - io_tlb_end = io_tlb_start + bytes; - - /* - * Get the overflow emergency buffer - */ - v_overflow_buffer = memblock_virt_alloc_low_nopanic( - PAGE_ALIGN(io_tlb_overflow), - PAGE_SIZE); - if (!v_overflow_buffer) - return -ENOMEM; - - io_tlb_overflow_buffer = __pa(v_overflow_buffer); - - /* - * Allocate and initialize the free list array. This array is used - * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE - * between io_tlb_start and io_tlb_end. - */ - io_tlb_list = memblock_virt_alloc( - PAGE_ALIGN(io_tlb_nslabs * sizeof(int)), - PAGE_SIZE); - io_tlb_orig_addr = memblock_virt_alloc( - PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)), - PAGE_SIZE); - for (i = 0; i < io_tlb_nslabs; i++) { - io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); - io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; - } - io_tlb_index = 0; - - if (verbose) - swiotlb_print_info(); - - swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT); - return 0; -} - -/* - * Statically reserve bounce buffer space and initialize bounce buffer data - * structures for the software IO TLB used to implement the DMA API. - */ -void __init -swiotlb_init(int verbose) -{ - size_t default_size = IO_TLB_DEFAULT_SIZE; - unsigned char *vstart; - unsigned long bytes; - - if (!io_tlb_nslabs) { - io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); - io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); - } - - bytes = io_tlb_nslabs << IO_TLB_SHIFT; - - /* Get IO TLB memory from the low pages */ - vstart = memblock_virt_alloc_low_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE); - if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose)) - return; - - if (io_tlb_start) - memblock_free_early(io_tlb_start, - PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); - pr_warn("Cannot allocate SWIOTLB buffer"); - no_iotlb_memory = true; -} - -/* - * Systems with larger DMA zones (those that don't support ISA) can - * initialize the swiotlb later using the slab allocator if needed. - * This should be just like above, but with some error catching. - */ -int -swiotlb_late_init_with_default_size(size_t default_size) -{ - unsigned long bytes, req_nslabs = io_tlb_nslabs; - unsigned char *vstart = NULL; - unsigned int order; - int rc = 0; - - if (!io_tlb_nslabs) { - io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); - io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); - } - - /* - * Get IO TLB memory from the low pages - */ - order = get_order(io_tlb_nslabs << IO_TLB_SHIFT); - io_tlb_nslabs = SLABS_PER_PAGE << order; - bytes = io_tlb_nslabs << IO_TLB_SHIFT; - - while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { - vstart = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, - order); - if (vstart) - break; - order--; - } - - if (!vstart) { - io_tlb_nslabs = req_nslabs; - return -ENOMEM; - } - if (order != get_order(bytes)) { - printk(KERN_WARNING "Warning: only able to allocate %ld MB " - "for software IO TLB\n", (PAGE_SIZE << order) >> 20); - io_tlb_nslabs = SLABS_PER_PAGE << order; - } - rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs); - if (rc) - free_pages((unsigned long)vstart, order); - - return rc; -} - -int -swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) -{ - unsigned long i, bytes; - unsigned char *v_overflow_buffer; - - bytes = nslabs << IO_TLB_SHIFT; - - io_tlb_nslabs = nslabs; - io_tlb_start = virt_to_phys(tlb); - io_tlb_end = io_tlb_start + bytes; - - set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT); - memset(tlb, 0, bytes); - - /* - * Get the overflow emergency buffer - */ - v_overflow_buffer = (void *)__get_free_pages(GFP_DMA, - get_order(io_tlb_overflow)); - if (!v_overflow_buffer) - goto cleanup2; - - set_memory_decrypted((unsigned long)v_overflow_buffer, - io_tlb_overflow >> PAGE_SHIFT); - memset(v_overflow_buffer, 0, io_tlb_overflow); - io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer); - - /* - * Allocate and initialize the free list array. This array is used - * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE - * between io_tlb_start and io_tlb_end. - */ - io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL, - get_order(io_tlb_nslabs * sizeof(int))); - if (!io_tlb_list) - goto cleanup3; - - io_tlb_orig_addr = (phys_addr_t *) - __get_free_pages(GFP_KERNEL, - get_order(io_tlb_nslabs * - sizeof(phys_addr_t))); - if (!io_tlb_orig_addr) - goto cleanup4; - - for (i = 0; i < io_tlb_nslabs; i++) { - io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); - io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; - } - io_tlb_index = 0; - - swiotlb_print_info(); - - late_alloc = 1; - - swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT); - - return 0; - -cleanup4: - free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * - sizeof(int))); - io_tlb_list = NULL; -cleanup3: - free_pages((unsigned long)v_overflow_buffer, - get_order(io_tlb_overflow)); - io_tlb_overflow_buffer = 0; -cleanup2: - io_tlb_end = 0; - io_tlb_start = 0; - io_tlb_nslabs = 0; - max_segment = 0; - return -ENOMEM; -} - -void __init swiotlb_exit(void) -{ - if (!io_tlb_orig_addr) - return; - - if (late_alloc) { - free_pages((unsigned long)phys_to_virt(io_tlb_overflow_buffer), - get_order(io_tlb_overflow)); - free_pages((unsigned long)io_tlb_orig_addr, - get_order(io_tlb_nslabs * sizeof(phys_addr_t))); - free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * - sizeof(int))); - free_pages((unsigned long)phys_to_virt(io_tlb_start), - get_order(io_tlb_nslabs << IO_TLB_SHIFT)); - } else { - memblock_free_late(io_tlb_overflow_buffer, - PAGE_ALIGN(io_tlb_overflow)); - memblock_free_late(__pa(io_tlb_orig_addr), - PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t))); - memblock_free_late(__pa(io_tlb_list), - PAGE_ALIGN(io_tlb_nslabs * sizeof(int))); - memblock_free_late(io_tlb_start, - PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); - } - io_tlb_nslabs = 0; - max_segment = 0; -} - -int is_swiotlb_buffer(phys_addr_t paddr) -{ - return paddr >= io_tlb_start && paddr < io_tlb_end; -} - -/* - * Bounce: copy the swiotlb buffer back to the original dma location - */ -static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir) -{ - unsigned long pfn = PFN_DOWN(orig_addr); - unsigned char *vaddr = phys_to_virt(tlb_addr); - - if (PageHighMem(pfn_to_page(pfn))) { - /* The buffer does not have a mapping. Map it in and copy */ - unsigned int offset = orig_addr & ~PAGE_MASK; - char *buffer; - unsigned int sz = 0; - unsigned long flags; - - while (size) { - sz = min_t(size_t, PAGE_SIZE - offset, size); - - local_irq_save(flags); - buffer = kmap_atomic(pfn_to_page(pfn)); - if (dir == DMA_TO_DEVICE) - memcpy(vaddr, buffer + offset, sz); - else - memcpy(buffer + offset, vaddr, sz); - kunmap_atomic(buffer); - local_irq_restore(flags); - - size -= sz; - pfn++; - vaddr += sz; - offset = 0; - } - } else if (dir == DMA_TO_DEVICE) { - memcpy(vaddr, phys_to_virt(orig_addr), size); - } else { - memcpy(phys_to_virt(orig_addr), vaddr, size); - } -} - -phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, - dma_addr_t tbl_dma_addr, - phys_addr_t orig_addr, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - unsigned long flags; - phys_addr_t tlb_addr; - unsigned int nslots, stride, index, wrap; - int i; - unsigned long mask; - unsigned long offset_slots; - unsigned long max_slots; - - if (no_iotlb_memory) - panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); - - if (mem_encrypt_active()) - pr_warn_once("%s is active and system is using DMA bounce buffers\n", - sme_active() ? "SME" : "SEV"); - - mask = dma_get_seg_boundary(hwdev); - - tbl_dma_addr &= mask; - - offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; - - /* - * Carefully handle integer overflow which can occur when mask == ~0UL. - */ - max_slots = mask + 1 - ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT - : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT); - - /* - * For mappings greater than or equal to a page, we limit the stride - * (and hence alignment) to a page size. - */ - nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; - if (size >= PAGE_SIZE) - stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT)); - else - stride = 1; - - BUG_ON(!nslots); - - /* - * Find suitable number of IO TLB entries size that will fit this - * request and allocate a buffer from that IO TLB pool. - */ - spin_lock_irqsave(&io_tlb_lock, flags); - index = ALIGN(io_tlb_index, stride); - if (index >= io_tlb_nslabs) - index = 0; - wrap = index; - - do { - while (iommu_is_span_boundary(index, nslots, offset_slots, - max_slots)) { - index += stride; - if (index >= io_tlb_nslabs) - index = 0; - if (index == wrap) - goto not_found; - } - - /* - * If we find a slot that indicates we have 'nslots' number of - * contiguous buffers, we allocate the buffers from that slot - * and mark the entries as '0' indicating unavailable. - */ - if (io_tlb_list[index] >= nslots) { - int count = 0; - - for (i = index; i < (int) (index + nslots); i++) - io_tlb_list[i] = 0; - for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--) - io_tlb_list[i] = ++count; - tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT); - - /* - * Update the indices to avoid searching in the next - * round. - */ - io_tlb_index = ((index + nslots) < io_tlb_nslabs - ? (index + nslots) : 0); - - goto found; - } - index += stride; - if (index >= io_tlb_nslabs) - index = 0; - } while (index != wrap); - -not_found: - spin_unlock_irqrestore(&io_tlb_lock, flags); - if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) - dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes)\n", size); - return SWIOTLB_MAP_ERROR; -found: - spin_unlock_irqrestore(&io_tlb_lock, flags); - - /* - * Save away the mapping from the original address to the DMA address. - * This is needed when we sync the memory. Then we sync the buffer if - * needed. - */ - for (i = 0; i < nslots; i++) - io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT); - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && - (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) - swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE); - - return tlb_addr; -} - -/* - * Allocates bounce buffer and returns its physical address. - */ -static phys_addr_t -map_single(struct device *hwdev, phys_addr_t phys, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - dma_addr_t start_dma_addr; - - if (swiotlb_force == SWIOTLB_NO_FORCE) { - dev_warn_ratelimited(hwdev, "Cannot do DMA to address %pa\n", - &phys); - return SWIOTLB_MAP_ERROR; - } - - start_dma_addr = __phys_to_dma(hwdev, io_tlb_start); - return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, - dir, attrs); -} - -/* - * tlb_addr is the physical address of the bounce buffer to unmap. - */ -void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - unsigned long flags; - int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; - int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT; - phys_addr_t orig_addr = io_tlb_orig_addr[index]; - - /* - * First, sync the memory before unmapping the entry - */ - if (orig_addr != INVALID_PHYS_ADDR && - !(attrs & DMA_ATTR_SKIP_CPU_SYNC) && - ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) - swiotlb_bounce(orig_addr, tlb_addr, size, DMA_FROM_DEVICE); - - /* - * Return the buffer to the free list by setting the corresponding - * entries to indicate the number of contiguous entries available. - * While returning the entries to the free list, we merge the entries - * with slots below and above the pool being returned. - */ - spin_lock_irqsave(&io_tlb_lock, flags); - { - count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ? - io_tlb_list[index + nslots] : 0); - /* - * Step 1: return the slots to the free list, merging the - * slots with superceeding slots - */ - for (i = index + nslots - 1; i >= index; i--) { - io_tlb_list[i] = ++count; - io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; - } - /* - * Step 2: merge the returned slots with the preceding slots, - * if available (non zero) - */ - for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--) - io_tlb_list[i] = ++count; - } - spin_unlock_irqrestore(&io_tlb_lock, flags); -} - -void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir, - enum dma_sync_target target) -{ - int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT; - phys_addr_t orig_addr = io_tlb_orig_addr[index]; - - if (orig_addr == INVALID_PHYS_ADDR) - return; - orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1); - - switch (target) { - case SYNC_FOR_CPU: - if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) - swiotlb_bounce(orig_addr, tlb_addr, - size, DMA_FROM_DEVICE); - else - BUG_ON(dir != DMA_TO_DEVICE); - break; - case SYNC_FOR_DEVICE: - if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) - swiotlb_bounce(orig_addr, tlb_addr, - size, DMA_TO_DEVICE); - else - BUG_ON(dir != DMA_FROM_DEVICE); - break; - default: - BUG(); - } -} - -static inline bool dma_coherent_ok(struct device *dev, dma_addr_t addr, - size_t size) -{ - u64 mask = DMA_BIT_MASK(32); - - if (dev && dev->coherent_dma_mask) - mask = dev->coherent_dma_mask; - return addr + size - 1 <= mask; -} - -static void * -swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle, - unsigned long attrs) -{ - phys_addr_t phys_addr; - - if (swiotlb_force == SWIOTLB_NO_FORCE) - goto out_warn; - - phys_addr = swiotlb_tbl_map_single(dev, - __phys_to_dma(dev, io_tlb_start), - 0, size, DMA_FROM_DEVICE, attrs); - if (phys_addr == SWIOTLB_MAP_ERROR) - goto out_warn; - - *dma_handle = __phys_to_dma(dev, phys_addr); - if (!dma_coherent_ok(dev, *dma_handle, size)) - goto out_unmap; - - memset(phys_to_virt(phys_addr), 0, size); - return phys_to_virt(phys_addr); - -out_unmap: - dev_warn(dev, "hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n", - (unsigned long long)dev->coherent_dma_mask, - (unsigned long long)*dma_handle); - - /* - * DMA_TO_DEVICE to avoid memcpy in unmap_single. - * DMA_ATTR_SKIP_CPU_SYNC is optional. - */ - swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE, - DMA_ATTR_SKIP_CPU_SYNC); -out_warn: - if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) { - dev_warn(dev, - "swiotlb: coherent allocation failed, size=%zu\n", - size); - dump_stack(); - } - return NULL; -} - -static bool swiotlb_free_buffer(struct device *dev, size_t size, - dma_addr_t dma_addr) -{ - phys_addr_t phys_addr = dma_to_phys(dev, dma_addr); - - WARN_ON_ONCE(irqs_disabled()); - - if (!is_swiotlb_buffer(phys_addr)) - return false; - - /* - * DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single. - * DMA_ATTR_SKIP_CPU_SYNC is optional. - */ - swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE, - DMA_ATTR_SKIP_CPU_SYNC); - return true; -} - -static void -swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir, - int do_panic) -{ - if (swiotlb_force == SWIOTLB_NO_FORCE) - return; - - /* - * Ran out of IOMMU space for this operation. This is very bad. - * Unfortunately the drivers cannot handle this operation properly. - * unless they check for dma_mapping_error (most don't) - * When the mapping is small enough return a static buffer to limit - * the damage, or panic when the transfer is too big. - */ - dev_err_ratelimited(dev, "DMA: Out of SW-IOMMU space for %zu bytes\n", - size); - - if (size <= io_tlb_overflow || !do_panic) - return; - - if (dir == DMA_BIDIRECTIONAL) - panic("DMA: Random memory could be DMA accessed\n"); - if (dir == DMA_FROM_DEVICE) - panic("DMA: Random memory could be DMA written\n"); - if (dir == DMA_TO_DEVICE) - panic("DMA: Random memory could be DMA read\n"); -} - -/* - * Map a single buffer of the indicated size for DMA in streaming mode. The - * physical address to use is returned. - * - * Once the device is given the dma address, the device owns this memory until - * either swiotlb_unmap_page or swiotlb_dma_sync_single is performed. - */ -dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - phys_addr_t map, phys = page_to_phys(page) + offset; - dma_addr_t dev_addr = phys_to_dma(dev, phys); - - BUG_ON(dir == DMA_NONE); - /* - * If the address happens to be in the device's DMA window, - * we can safely return the device addr and not worry about bounce - * buffering it. - */ - if (dma_capable(dev, dev_addr, size) && swiotlb_force != SWIOTLB_FORCE) - return dev_addr; - - trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force); - - /* Oh well, have to allocate and map a bounce buffer. */ - map = map_single(dev, phys, size, dir, attrs); - if (map == SWIOTLB_MAP_ERROR) { - swiotlb_full(dev, size, dir, 1); - return __phys_to_dma(dev, io_tlb_overflow_buffer); - } - - dev_addr = __phys_to_dma(dev, map); - - /* Ensure that the address returned is DMA'ble */ - if (dma_capable(dev, dev_addr, size)) - return dev_addr; - - attrs |= DMA_ATTR_SKIP_CPU_SYNC; - swiotlb_tbl_unmap_single(dev, map, size, dir, attrs); - - return __phys_to_dma(dev, io_tlb_overflow_buffer); -} - -/* - * Unmap a single streaming mode DMA translation. The dma_addr and size must - * match what was provided for in a previous swiotlb_map_page call. All - * other usages are undefined. - * - * After this call, reads by the cpu to the buffer are guaranteed to see - * whatever the device wrote there. - */ -static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); - - BUG_ON(dir == DMA_NONE); - - if (is_swiotlb_buffer(paddr)) { - swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs); - return; - } - - if (dir != DMA_FROM_DEVICE) - return; - - /* - * phys_to_virt doesn't work with hihgmem page but we could - * call dma_mark_clean() with hihgmem page here. However, we - * are fine since dma_mark_clean() is null on POWERPC. We can - * make dma_mark_clean() take a physical address if necessary. - */ - dma_mark_clean(phys_to_virt(paddr), size); -} - -void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - unmap_single(hwdev, dev_addr, size, dir, attrs); -} - -/* - * Make physical memory consistent for a single streaming mode DMA translation - * after a transfer. - * - * If you perform a swiotlb_map_page() but wish to interrogate the buffer - * using the cpu, yet do not wish to teardown the dma mapping, you must - * call this function before doing so. At the next point you give the dma - * address back to the card, you must first perform a - * swiotlb_dma_sync_for_device, and then the device again owns the buffer - */ -static void -swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir, - enum dma_sync_target target) -{ - phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); - - BUG_ON(dir == DMA_NONE); - - if (is_swiotlb_buffer(paddr)) { - swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target); - return; - } - - if (dir != DMA_FROM_DEVICE) - return; - - dma_mark_clean(phys_to_virt(paddr), size); -} - -void -swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir) -{ - swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU); -} - -void -swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir) -{ - swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE); -} - -/* - * Map a set of buffers described by scatterlist in streaming mode for DMA. - * This is the scatter-gather version of the above swiotlb_map_page - * interface. Here the scatter gather list elements are each tagged with the - * appropriate dma address and length. They are obtained via - * sg_dma_{address,length}(SG). - * - * NOTE: An implementation may be able to use a smaller number of - * DMA address/length pairs than there are SG table elements. - * (for example via virtual mapping capabilities) - * The routine returns the number of addr/length pairs actually - * used, at most nents. - * - * Device ownership issues as mentioned above for swiotlb_map_page are the - * same here. - */ -int -swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, - enum dma_data_direction dir, unsigned long attrs) -{ - struct scatterlist *sg; - int i; - - BUG_ON(dir == DMA_NONE); - - for_each_sg(sgl, sg, nelems, i) { - phys_addr_t paddr = sg_phys(sg); - dma_addr_t dev_addr = phys_to_dma(hwdev, paddr); - - if (swiotlb_force == SWIOTLB_FORCE || - !dma_capable(hwdev, dev_addr, sg->length)) { - phys_addr_t map = map_single(hwdev, sg_phys(sg), - sg->length, dir, attrs); - if (map == SWIOTLB_MAP_ERROR) { - /* Don't panic here, we expect map_sg users - to do proper error handling. */ - swiotlb_full(hwdev, sg->length, dir, 0); - attrs |= DMA_ATTR_SKIP_CPU_SYNC; - swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, - attrs); - sg_dma_len(sgl) = 0; - return 0; - } - sg->dma_address = __phys_to_dma(hwdev, map); - } else - sg->dma_address = dev_addr; - sg_dma_len(sg) = sg->length; - } - return nelems; -} - -/* - * Unmap a set of streaming mode DMA translations. Again, cpu read rules - * concerning calls here are the same as for swiotlb_unmap_page() above. - */ -void -swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, - int nelems, enum dma_data_direction dir, - unsigned long attrs) -{ - struct scatterlist *sg; - int i; - - BUG_ON(dir == DMA_NONE); - - for_each_sg(sgl, sg, nelems, i) - unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir, - attrs); -} - -/* - * Make physical memory consistent for a set of streaming mode DMA translations - * after a transfer. - * - * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules - * and usage. - */ -static void -swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, - int nelems, enum dma_data_direction dir, - enum dma_sync_target target) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nelems, i) - swiotlb_sync_single(hwdev, sg->dma_address, - sg_dma_len(sg), dir, target); -} - -void -swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir) -{ - swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU); -} - -void -swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir) -{ - swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE); -} - -int -swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr) -{ - return (dma_addr == __phys_to_dma(hwdev, io_tlb_overflow_buffer)); -} - -/* - * Return whether the given device DMA address mask can be supported - * properly. For example, if your device can only drive the low 24-bits - * during bus mastering, then you would pass 0x00ffffff as the mask to - * this function. - */ -int -swiotlb_dma_supported(struct device *hwdev, u64 mask) -{ - return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask; -} - -void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp, unsigned long attrs) -{ - void *vaddr; - - /* temporary workaround: */ - if (gfp & __GFP_NOWARN) - attrs |= DMA_ATTR_NO_WARN; - - /* - * Don't print a warning when the first allocation attempt fails. - * swiotlb_alloc_coherent() will print a warning when the DMA memory - * allocation ultimately failed. - */ - gfp |= __GFP_NOWARN; - - vaddr = dma_direct_alloc(dev, size, dma_handle, gfp, attrs); - if (!vaddr) - vaddr = swiotlb_alloc_buffer(dev, size, dma_handle, attrs); - return vaddr; -} - -void swiotlb_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_addr, unsigned long attrs) -{ - if (!swiotlb_free_buffer(dev, size, dma_addr)) - dma_direct_free(dev, size, vaddr, dma_addr, attrs); -} - -const struct dma_map_ops swiotlb_dma_ops = { - .mapping_error = swiotlb_dma_mapping_error, - .alloc = swiotlb_alloc, - .free = swiotlb_free, - .sync_single_for_cpu = swiotlb_sync_single_for_cpu, - .sync_single_for_device = swiotlb_sync_single_for_device, - .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, - .sync_sg_for_device = swiotlb_sync_sg_for_device, - .map_sg = swiotlb_map_sg_attrs, - .unmap_sg = swiotlb_unmap_sg_attrs, - .map_page = swiotlb_map_page, - .unmap_page = swiotlb_unmap_page, - .dma_supported = dma_direct_supported, -}; -- cgit v1.2.3 From 043f891b70e6197bc181f3b087c2bd04c60fddd2 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Mon, 25 Jun 2018 13:28:06 +0200 Subject: Revert "lib/test_printf.c: call wait_for_random_bytes() before plain %p tests" This reverts commit ee410f15b1418f2f4428e79980674c979081bcb7. It might prevent the machine from boot. It would wait for enough randomness at the very beginning of kernel_init(). But there is basically nothing running in parallel that would help to produce any randomness. Reported-by: Steven Rostedt (VMware) Signed-off-by: Petr Mladek --- lib/test_printf.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'lib') diff --git a/lib/test_printf.c b/lib/test_printf.c index b2aa8f514844..cea592f402ed 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -260,13 +260,6 @@ plain(void) { int err; - /* - * Make sure crng is ready. Otherwise we get "(ptrval)" instead - * of a hashed address when printing '%p' in plain_hash() and - * plain_format(). - */ - wait_for_random_bytes(); - err = plain_hash(); if (err) { pr_warn("plain 'p' does not appear to be hashed\n"); -- cgit v1.2.3 From 4bb6e96ab808f88d746343637f0cc2243b527da5 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 27 Jun 2018 23:26:01 -0700 Subject: lib/percpu_ida.c: don't do alloc from per-CPU list if there is none In commit 804209d8a009 ("lib/percpu_ida.c: use _irqsave() instead of local_irq_save() + spin_lock") I inlined alloc_local_tag() and mixed up the >= check from percpu_ida_alloc() with the one in alloc_local_tag(). Don't alloc from per-CPU freelist if ->nr_free is zero. Link: http://lkml.kernel.org/r/20180613075830.c3zeva52fuj6fxxv@linutronix.de Fixes: 804209d8a009 ("lib/percpu_ida.c: use _irqsave() instead of local_irq_save() + spin_lock") Signed-off-by: Sebastian Andrzej Siewior Reported-by: David Disseldorp Tested-by: David Disseldorp Cc: Thomas Gleixner Cc: Nicholas Bellinger Cc: Shaohua Li Cc: Kent Overstreet Cc: Matthew Wilcox Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/percpu_ida.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c index 9bbd9c5d375a..beb14839b41a 100644 --- a/lib/percpu_ida.c +++ b/lib/percpu_ida.c @@ -141,7 +141,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state) spin_lock_irqsave(&tags->lock, flags); /* Fastpath */ - if (likely(tags->nr_free >= 0)) { + if (likely(tags->nr_free)) { tag = tags->freelist[--tags->nr_free]; spin_unlock_irqrestore(&tags->lock, flags); return tag; -- cgit v1.2.3 From dd275caf4a0d9b219fffe49288b6cc33cd564312 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 27 Jun 2018 23:26:20 -0700 Subject: kasan: depend on CONFIG_SLUB_DEBUG KASAN depends on having access to some of the accounting that SLUB_DEBUG does; without it, there are immediate crashes [1]. So, the natural thing to do is to make KASAN select SLUB_DEBUG. [1] http://lkml.kernel.org/r/CAHmME9rtoPwxUSnktxzKso14iuVCWT7BE_-_8PAC=pGw1iJnQg@mail.gmail.com Link: http://lkml.kernel.org/r/20180622154623.25388-1-Jason@zx2c4.com Fixes: f9e13c0a5a33 ("slab, slub: skip unnecessary kasan_cache_shutdown()") Signed-off-by: Jason A. Donenfeld Acked-by: Michal Hocko Reviewed-by: Shakeel Butt Acked-by: Christoph Lameter Cc: Shakeel Butt Cc: David Rientjes Cc: Pekka Enberg Cc: Joonsoo Kim Cc: Andrey Ryabinin Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.kasan | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 3d35d062970d..c253c1b46c6b 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -6,6 +6,7 @@ if HAVE_ARCH_KASAN config KASAN bool "KASan: runtime memory debugger" depends on SLUB || (SLAB && !DEBUG_SLAB) + select SLUB_DEBUG if SLUB select CONSTRUCTORS select STACKDEPOT help -- cgit v1.2.3 From 3203c9010060806ff88c9989aeab4dc8d9a474dc Mon Sep 17 00:00:00 2001 From: Kleber Sacilotto de Souza Date: Wed, 27 Jun 2018 17:19:21 +0200 Subject: test_bpf: flag tests that cannot be jited on s390 Flag with FLAG_EXPECTED_FAIL the BPF_MAXINSNS tests that cannot be jited on s390 because they exceed BPF_SIZE_MAX and fail when CONFIG_BPF_JIT_ALWAYS_ON is set. Also set .expected_errcode to -ENOTSUPP so the tests pass in that case. Signed-off-by: Kleber Sacilotto de Souza Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- lib/test_bpf.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'lib') diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 60aedc879361..08d3d59dca17 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -5282,21 +5282,31 @@ static struct bpf_test tests[] = { { /* Mainly checking JIT here. */ "BPF_MAXINSNS: Ctx heavy transformations", { }, +#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390) + CLASSIC | FLAG_EXPECTED_FAIL, +#else CLASSIC, +#endif { }, { { 1, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) }, { 10, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) } }, .fill_helper = bpf_fill_maxinsns6, + .expected_errcode = -ENOTSUPP, }, { /* Mainly checking JIT here. */ "BPF_MAXINSNS: Call heavy transformations", { }, +#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390) + CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, +#else CLASSIC | FLAG_NO_DATA, +#endif { }, { { 1, 0 }, { 10, 0 } }, .fill_helper = bpf_fill_maxinsns7, + .expected_errcode = -ENOTSUPP, }, { /* Mainly checking JIT here. */ "BPF_MAXINSNS: Jump heavy test", @@ -5347,18 +5357,28 @@ static struct bpf_test tests[] = { { "BPF_MAXINSNS: exec all MSH", { }, +#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390) + CLASSIC | FLAG_EXPECTED_FAIL, +#else CLASSIC, +#endif { 0xfa, 0xfb, 0xfc, 0xfd, }, { { 4, 0xababab83 } }, .fill_helper = bpf_fill_maxinsns13, + .expected_errcode = -ENOTSUPP, }, { "BPF_MAXINSNS: ld_abs+get_processor_id", { }, +#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390) + CLASSIC | FLAG_EXPECTED_FAIL, +#else CLASSIC, +#endif { }, { { 1, 0xbee } }, .fill_helper = bpf_fill_ld_abs_get_processor_id, + .expected_errcode = -ENOTSUPP, }, /* * LD_IND / LD_ABS on fragmented SKBs -- cgit v1.2.3 From 9544bc5347207a68eb308cc8aaaed6c3a687cabd Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 29 Jun 2018 08:48:06 -0600 Subject: sg: remove ->sg_magic member This was introduced more than a decade ago when sg chaining was added, but we never really caught anything with it. The scatterlist entry size can be critical, since drivers allocate it, so remove the magic member. Recently it's been triggering allocation stalls and failures in NVMe. Tested-by: Jordan Glover Acked-by: Christoph Hellwig Signed-off-by: Jens Axboe --- lib/scatterlist.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'lib') diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 06dad7a072fd..d4ae67d6cd1e 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -24,9 +24,6 @@ **/ struct scatterlist *sg_next(struct scatterlist *sg) { -#ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); -#endif if (sg_is_last(sg)) return NULL; @@ -111,10 +108,7 @@ struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents) for_each_sg(sgl, sg, nents, i) ret = sg; -#ifdef CONFIG_DEBUG_SG - BUG_ON(sgl[0].sg_magic != SG_MAGIC); BUG_ON(!sg_is_last(ret)); -#endif return ret; } EXPORT_SYMBOL(sg_last); -- cgit v1.2.3 From c643ecf354e25ceeae14add9064d4d6253d75577 Mon Sep 17 00:00:00 2001 From: Rishabh Bhatnagar Date: Mon, 2 Jul 2018 09:35:34 -0700 Subject: lib: rhashtable: Correct self-assignment in rhashtable.c In file lib/rhashtable.c line 777, skip variable is assigned to itself. The following error was observed: lib/rhashtable.c:777:41: warning: explicitly assigning value of variable of type 'int' to itself [-Wself-assign] error, forbidden warning: rhashtable.c:777 This error was found when compiling with Clang 6.0. Change it to iter->skip. Signed-off-by: Rishabh Bhatnagar Acked-by: Herbert Xu Reviewed-by: NeilBrown Signed-off-by: David S. Miller --- lib/rhashtable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 9427b5766134..3109b2e1d552 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -774,7 +774,7 @@ int rhashtable_walk_start_check(struct rhashtable_iter *iter) skip++; if (list == iter->list) { iter->p = p; - skip = skip; + iter->skip = skip; goto found; } } -- cgit v1.2.3 From 0026129c8629265bfe5079c1e017fa8543796d9f Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 8 Jul 2018 11:55:51 +0900 Subject: rhashtable: add restart routine in rhashtable_free_and_destroy() rhashtable_free_and_destroy() cancels re-hash deferred work then walks and destroys elements. at this moment, some elements can be still in future_tbl. that elements are not destroyed. test case: nft_rhash_destroy() calls rhashtable_free_and_destroy() to destroy all elements of sets before destroying sets and chains. But rhashtable_free_and_destroy() doesn't destroy elements of future_tbl. so that splat occurred. test script: %cat test.nft table ip aa { map map1 { type ipv4_addr : verdict; elements = { 0 : jump a0, 1 : jump a0, 2 : jump a0, 3 : jump a0, 4 : jump a0, 5 : jump a0, 6 : jump a0, 7 : jump a0, 8 : jump a0, 9 : jump a0, } } chain a0 { } } flush ruleset table ip aa { map map1 { type ipv4_addr : verdict; elements = { 0 : jump a0, 1 : jump a0, 2 : jump a0, 3 : jump a0, 4 : jump a0, 5 : jump a0, 6 : jump a0, 7 : jump a0, 8 : jump a0, 9 : jump a0, } } chain a0 { } } flush ruleset %while :; do nft -f test.nft; done Splat looks like: [ 200.795603] kernel BUG at net/netfilter/nf_tables_api.c:1363! [ 200.806944] invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI [ 200.812253] CPU: 1 PID: 1582 Comm: nft Not tainted 4.17.0+ #24 [ 200.820297] Hardware name: To be filled by O.E.M. To be filled by O.E.M./Aptio CRB, BIOS 5.6.5 07/08/2015 [ 200.830309] RIP: 0010:nf_tables_chain_destroy.isra.34+0x62/0x240 [nf_tables] [ 200.838317] Code: 43 50 85 c0 74 26 48 8b 45 00 48 8b 4d 08 ba 54 05 00 00 48 c7 c6 60 6d 29 c0 48 c7 c7 c0 65 29 c0 4c 8b 40 08 e8 58 e5 fd f8 <0f> 0b 48 89 da 48 b8 00 00 00 00 00 fc ff [ 200.860366] RSP: 0000:ffff880118dbf4d0 EFLAGS: 00010282 [ 200.866354] RAX: 0000000000000061 RBX: ffff88010cdeaf08 RCX: 0000000000000000 [ 200.874355] RDX: 0000000000000061 RSI: 0000000000000008 RDI: ffffed00231b7e90 [ 200.882361] RBP: ffff880118dbf4e8 R08: ffffed002373bcfb R09: ffffed002373bcfa [ 200.890354] R10: 0000000000000000 R11: ffffed002373bcfb R12: dead000000000200 [ 200.898356] R13: dead000000000100 R14: ffffffffbb62af38 R15: dffffc0000000000 [ 200.906354] FS: 00007fefc31fd700(0000) GS:ffff88011b800000(0000) knlGS:0000000000000000 [ 200.915533] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 200.922355] CR2: 0000557f1c8e9128 CR3: 0000000106880000 CR4: 00000000001006e0 [ 200.930353] Call Trace: [ 200.932351] ? nf_tables_commit+0x26f6/0x2c60 [nf_tables] [ 200.939525] ? nf_tables_setelem_notify.constprop.49+0x1a0/0x1a0 [nf_tables] [ 200.947525] ? nf_tables_delchain+0x6e0/0x6e0 [nf_tables] [ 200.952383] ? nft_add_set_elem+0x1700/0x1700 [nf_tables] [ 200.959532] ? nla_parse+0xab/0x230 [ 200.963529] ? nfnetlink_rcv_batch+0xd06/0x10d0 [nfnetlink] [ 200.968384] ? nfnetlink_net_init+0x130/0x130 [nfnetlink] [ 200.975525] ? debug_show_all_locks+0x290/0x290 [ 200.980363] ? debug_show_all_locks+0x290/0x290 [ 200.986356] ? sched_clock_cpu+0x132/0x170 [ 200.990352] ? find_held_lock+0x39/0x1b0 [ 200.994355] ? sched_clock_local+0x10d/0x130 [ 200.999531] ? memset+0x1f/0x40 V2: - free all tables requested by Herbert Xu Signed-off-by: Taehee Yoo Acked-by: Herbert Xu Signed-off-by: David S. Miller --- lib/rhashtable.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 3109b2e1d552..0183d07a9b4d 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -1143,13 +1143,14 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, void (*free_fn)(void *ptr, void *arg), void *arg) { - struct bucket_table *tbl; + struct bucket_table *tbl, *next_tbl; unsigned int i; cancel_work_sync(&ht->run_work); mutex_lock(&ht->mutex); tbl = rht_dereference(ht->tbl, ht); +restart: if (free_fn) { for (i = 0; i < tbl->size; i++) { struct rhash_head *pos, *next; @@ -1166,7 +1167,12 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, } } + next_tbl = rht_dereference(tbl->future_tbl, ht); bucket_table_free(tbl); + if (next_tbl) { + tbl = next_tbl; + goto restart; + } mutex_unlock(&ht->mutex); } EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy); -- cgit v1.2.3 From bf3eeb9b5f2a1a05b3a68c6d82112babd58d6a39 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 8 Jul 2018 13:46:02 -0700 Subject: lib/iov_iter: Document _copy_to_iter_mcsafe() Add some theory of operation documentation to _copy_to_iter_mcsafe(). Reported-by: Al Viro Signed-off-by: Dan Williams Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Link: http://lkml.kernel.org/r/153108276256.37979.1689794213845539316.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Ingo Molnar --- lib/iov_iter.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'lib') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 7e43cd54c84c..94fa361be7bb 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -596,6 +596,32 @@ static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset, return ret; } +/** + * _copy_to_iter_mcsafe - copy to user with source-read error exception handling + * @addr: source kernel address + * @bytes: total transfer length + * @iter: destination iterator + * + * The pmem driver arranges for filesystem-dax to use this facility via + * dax_copy_to_iter() for protecting read/write to persistent memory. + * Unless / until an architecture can guarantee identical performance + * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a + * performance regression to switch more users to the mcsafe version. + * + * Otherwise, the main differences between this and typical _copy_to_iter(). + * + * * Typical tail/residue handling after a fault retries the copy + * byte-by-byte until the fault happens again. Re-triggering machine + * checks is potentially fatal so the implementation uses source + * alignment and poison alignment assumptions to avoid re-triggering + * hardware exceptions. + * + * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies. + * Compare to copy_to_iter() where only ITER_IOVEC attempts might return + * a short copy. + * + * See MCSAFE_TEST for self-test. + */ size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i) { const char *from = addr; -- cgit v1.2.3 From abd08d7d245397bcbded8c6c29ff79a36b3875b0 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 8 Jul 2018 13:46:07 -0700 Subject: lib/iov_iter: Document _copy_to_iter_flushcache() Add some theory of operation documentation to _copy_to_iter_flushcache(). Reported-by: Al Viro Signed-off-by: Dan Williams Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Link: http://lkml.kernel.org/r/153108276767.37979.9462477994086841699.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Ingo Molnar --- lib/iov_iter.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'lib') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 94fa361be7bb..09fb73ad9d54 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -727,6 +727,20 @@ size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) EXPORT_SYMBOL(_copy_from_iter_nocache); #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE +/** + * _copy_from_iter_flushcache - write destination through cpu cache + * @addr: destination kernel address + * @bytes: total transfer length + * @iter: source iterator + * + * The pmem driver arranges for filesystem-dax to use this facility via + * dax_copy_from_iter() for ensuring that writes to persistent memory + * are flushed through the CPU cache. It is differentiated from + * _copy_from_iter_nocache() in that guarantees all data is flushed for + * all iterator types. The _copy_from_iter_nocache() only attempts to + * bypass the cache for the ITER_IOVEC case, and on some archs may use + * instructions that strand dirty-data in the cache. + */ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) { char *to = addr; -- cgit v1.2.3 From ca146f6f091e47b3fd18d6a7e76ec0297d202e0f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 8 Jul 2018 13:46:12 -0700 Subject: lib/iov_iter: Fix pipe handling in _copy_to_iter_mcsafe() By mistake the ITER_PIPE early-exit / warning from copy_from_iter() was cargo-culted in _copy_to_iter_mcsafe() rather than a machine-check-safe version of copy_to_iter_pipe(). Implement copy_pipe_to_iter_mcsafe() being careful to return the indication of short copies due to a CPU exception. Without this regression-fix all splice reads to dax-mode files fail. Reported-by: Ross Zwisler Tested-by: Ross Zwisler Signed-off-by: Dan Williams Acked-by: Al Viro Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Fixes: 8780356ef630 ("x86/asm/memcpy_mcsafe: Define copy_to_iter_mcsafe()") Link: http://lkml.kernel.org/r/153108277278.37979.3327916996902264102.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Ingo Molnar --- lib/iov_iter.c | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 09fb73ad9d54..8be175df3075 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -596,6 +596,37 @@ static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset, return ret; } +static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes, + struct iov_iter *i) +{ + struct pipe_inode_info *pipe = i->pipe; + size_t n, off, xfer = 0; + int idx; + + if (!sanity(i)) + return 0; + + bytes = n = push_pipe(i, bytes, &idx, &off); + if (unlikely(!n)) + return 0; + for ( ; n; idx = next_idx(idx, pipe), off = 0) { + size_t chunk = min_t(size_t, n, PAGE_SIZE - off); + unsigned long rem; + + rem = memcpy_mcsafe_to_page(pipe->bufs[idx].page, off, addr, + chunk); + i->idx = idx; + i->iov_offset = off + chunk - rem; + xfer += chunk - rem; + if (rem) + break; + n -= chunk; + addr += chunk; + } + i->count -= xfer; + return xfer; +} + /** * _copy_to_iter_mcsafe - copy to user with source-read error exception handling * @addr: source kernel address @@ -627,10 +658,8 @@ size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i) const char *from = addr; unsigned long rem, curr_addr, s_addr = (unsigned long) addr; - if (unlikely(i->type & ITER_PIPE)) { - WARN_ON(1); - return 0; - } + if (unlikely(i->type & ITER_PIPE)) + return copy_pipe_to_iter_mcsafe(addr, bytes, i); if (iter_is_iovec(i)) might_fault(); iterate_and_advance(i, bytes, v, -- cgit v1.2.3 From 107d01f5ba10f4162c38109496607eb197059064 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 16 Jul 2018 13:26:13 -0700 Subject: lib/rhashtable: consider param->min_size when setting initial table size rhashtable_init() currently does not take into account the user-passed min_size parameter unless param->nelem_hint is set as well. As such, the default size (number of buckets) will always be HASH_DEFAULT_SIZE even if the smallest allowed size is larger than that. Remediate this by unconditionally calling into rounded_hashtable_size() and handling things accordingly. Signed-off-by: Davidlohr Bueso Acked-by: Herbert Xu Signed-off-by: David S. Miller --- lib/rhashtable.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 0183d07a9b4d..e5c8586cf717 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -964,8 +964,16 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_stop); static size_t rounded_hashtable_size(const struct rhashtable_params *params) { - return max(roundup_pow_of_two(params->nelem_hint * 4 / 3), - (unsigned long)params->min_size); + size_t retsize; + + if (params->nelem_hint) + retsize = max(roundup_pow_of_two(params->nelem_hint * 4 / 3), + (unsigned long)params->min_size); + else + retsize = max(HASH_DEFAULT_SIZE, + (unsigned long)params->min_size); + + return retsize; } static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed) @@ -1022,8 +1030,6 @@ int rhashtable_init(struct rhashtable *ht, struct bucket_table *tbl; size_t size; - size = HASH_DEFAULT_SIZE; - if ((!params->key_len && !params->obj_hashfn) || (params->obj_hashfn && !params->obj_cmpfn)) return -EINVAL; @@ -1050,8 +1056,7 @@ int rhashtable_init(struct rhashtable *ht, ht->p.min_size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE); - if (params->nelem_hint) - size = rounded_hashtable_size(&ht->p); + size = rounded_hashtable_size(&ht->p); if (params->locks_mul) ht->p.locks_mul = roundup_pow_of_two(params->locks_mul); -- cgit v1.2.3 From 03758dbbe2b5c0627b361ad5b4a2c60f9964ccde Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 26 Jul 2018 16:37:12 -0700 Subject: kasan: only select SLUB_DEBUG with SYSFS=y Building with KASAN and SLUB but without sysfs now results in a build-time error: WARNING: unmet direct dependencies detected for SLUB_DEBUG Depends on [n]: SLUB [=y] && SYSFS [=n] Selected by [y]: - KASAN [=y] && HAVE_ARCH_KASAN [=y] && (SLUB [=y] || SLAB [=n] && !DEBUG_SLAB [=n]) && SLUB [=y] mm/slub.c:4565:12: error: 'list_locations' defined but not used [-Werror=unused-function] static int list_locations(struct kmem_cache *s, char *buf, ^~~~~~~~~~~~~~ mm/slub.c:4406:13: error: 'validate_slab_cache' defined but not used [-Werror=unused-function] static long validate_slab_cache(struct kmem_cache *s) This disallows that broken configuration in Kconfig. Link: http://lkml.kernel.org/r/20180709154019.1693026-1-arnd@arndb.de Fixes: dd275caf4a0d ("kasan: depend on CONFIG_SLUB_DEBUG") Signed-off-by: Arnd Bergmann Cc: "Jason A. Donenfeld" Cc: Arnd Bergmann Cc: Shakeel Butt Cc: Andrey Ryabinin Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.kasan | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index c253c1b46c6b..befb127507c0 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -5,7 +5,7 @@ if HAVE_ARCH_KASAN config KASAN bool "KASan: runtime memory debugger" - depends on SLUB || (SLAB && !DEBUG_SLAB) + depends on (SLUB && SYSFS) || (SLAB && !DEBUG_SLAB) select SLUB_DEBUG if SLUB select CONSTRUCTORS select STACKDEPOT -- cgit v1.2.3 From 3ca17b1f3628f916f79e0ab62f1bf0e45ec9ba92 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 10 Aug 2018 17:23:03 -0700 Subject: lib/ubsan: remove null-pointer checks With gcc-8 fsanitize=null become very noisy. GCC started to complain about things like &a->b, where 'a' is NULL pointer. There is no NULL dereference, we just calculate address to struct member. It's technically undefined behavior so UBSAN is correct to report it. But as long as there is no real NULL-dereference, I think, we should be fine. -fno-delete-null-pointer-checks compiler flag should protect us from any consequences. So let's just no use -fsanitize=null as it's not useful for us. If there is a real NULL-deref we will see crash. Even if userspace mapped something at NULL (root can do this), with things like SMAP should catch the issue. Link: http://lkml.kernel.org/r/20180802153209.813-1-aryabinin@virtuozzo.com Signed-off-by: Andrey Ryabinin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.ubsan | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'lib') diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan index 19d42ea75ec2..98fa559ebd80 100644 --- a/lib/Kconfig.ubsan +++ b/lib/Kconfig.ubsan @@ -1,9 +1,6 @@ config ARCH_HAS_UBSAN_SANITIZE_ALL bool -config ARCH_WANTS_UBSAN_NO_NULL - def_bool n - config UBSAN bool "Undefined behaviour sanity checker" help @@ -39,14 +36,6 @@ config UBSAN_ALIGNMENT Enabling this option on architectures that support unaligned accesses may produce a lot of false positives. -config UBSAN_NULL - bool "Enable checking of null pointers" - depends on UBSAN - default y if !ARCH_WANTS_UBSAN_NO_NULL - help - This option enables detection of memory accesses via a - null pointer. - config TEST_UBSAN tristate "Module for testing for undefined behavior detection" depends on m && UBSAN -- cgit v1.2.3