From bd9dcd046509cd5355605e43791eacee8bf5e40f Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Tue, 6 Feb 2018 07:18:25 +0800 Subject: debugobjects: Export max loops counter __debug_check_no_obj_freed() can be an expensive operation depending on the size of memory freed. It already exports the maximum chain walk length via debugfs, but this only records the maximum of a single memory chunk. Though there is no information about the total number of objects inspected for a __debug_check_no_obj_freed() operation, which might be significantly larger when a huge memory region is freed. Aggregate the number of objects inspected for a single invocation of __debug_check_no_obj_freed() and export it via sysfs. The resulting output of /sys/kernel/debug/debug_objects/stats looks like: max_chain :121 max_checked :543267 warnings :0 fixups :0 pool_free :1764 pool_min_free :341 pool_used :86438 pool_max_used :268887 objs_allocated:6068254 objs_freed :5981076 [ tglx: Renamed the variable to max_checked and adjusted changelog ] Signed-off-by: Yang Shi Signed-off-by: Thomas Gleixner Cc: longman@redhat.com Link: https://lkml.kernel.org/r/1517872708-24207-2-git-send-email-yang.shi@linux.alibaba.com --- lib/debugobjects.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 2f5349c6e81a..f6d57a11c927 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -50,6 +50,7 @@ static int obj_pool_max_used; static struct kmem_cache *obj_cache; static int debug_objects_maxchain __read_mostly; +static int debug_objects_maxchecked __read_mostly; static int debug_objects_fixups __read_mostly; static int debug_objects_warnings __read_mostly; static int debug_objects_enabled __read_mostly @@ -720,7 +721,7 @@ static void __debug_check_no_obj_freed(const void *address, unsigned long size) enum debug_obj_state state; struct debug_bucket *db; struct debug_obj *obj; - int cnt; + int cnt, objs_checked = 0; saddr = (unsigned long) address; eaddr = saddr + size; @@ -765,7 +766,12 @@ repeat: if (cnt > debug_objects_maxchain) debug_objects_maxchain = cnt; + + objs_checked += cnt; } + + if (objs_checked > debug_objects_maxchecked) + debug_objects_maxchecked = objs_checked; } void debug_check_no_obj_freed(const void *address, unsigned long size) @@ -780,6 +786,7 @@ void debug_check_no_obj_freed(const void *address, unsigned long size) static int debug_stats_show(struct seq_file *m, void *v) { seq_printf(m, "max_chain :%d\n", debug_objects_maxchain); + seq_printf(m, "max_checked :%d\n", debug_objects_maxchecked); seq_printf(m, "warnings :%d\n", debug_objects_warnings); seq_printf(m, "fixups :%d\n", debug_objects_fixups); seq_printf(m, "pool_free :%d\n", obj_pool_free); -- cgit v1.2.3 From 36c4ead6f6dfbbe777d3d7e9cc8702530b71a94f Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Tue, 6 Feb 2018 07:18:26 +0800 Subject: debugobjects: Add global free list and the counter free_object() adds objects to the pool list and schedules work when the pool list is larger than the pool size. The worker handles the actual kfree() of the object by iterating the pool list until the pool size is below the maximum pool size again. To iterate the pool list, pool_lock has to be held and the objects which should be freed() need to be put into temporary storage so pool_lock can be dropped for the actual kmem_cache_free() invocation. That's a pointless and expensive exercise if there is a large number of objects to free. In such a case its better to evaulate the fill level of the pool in free_objects() and queue the object to free either in the pool list or if it's full on a separate global free list. The worker can then do the following simpler operation: - Move objects back from the global free list to the pool list if the pool list is not longer full. - Remove the remaining objects in a single list move operation from the global free list and do the kmem_cache_free() operation lockless from the temporary list head. In fill_pool() the global free list is checked as well to avoid real allocations from the kmem cache. Add the necessary list head and a counter for the number of objects on the global free list and export that counter via sysfs: max_chain :79 max_loops :8147 warnings :0 fixups :0 pool_free :1697 pool_min_free :346 pool_used :15356 pool_max_used :23933 on_free_list :39 objs_allocated:32617 objs_freed :16588 Nothing queues objects on the global free list yet. This happens in a follow up change. [ tglx: Simplified implementation and massaged changelog ] Suggested-by: Thomas Gleixner Signed-off-by: Yang Shi Signed-off-by: Thomas Gleixner Cc: longman@redhat.com Link: https://lkml.kernel.org/r/1517872708-24207-3-git-send-email-yang.shi@linux.alibaba.com --- lib/debugobjects.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/debugobjects.c b/lib/debugobjects.c index f6d57a11c927..e31273b45da5 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -42,11 +42,14 @@ static struct debug_obj obj_static_pool[ODEBUG_POOL_SIZE] __initdata; static DEFINE_RAW_SPINLOCK(pool_lock); static HLIST_HEAD(obj_pool); +static HLIST_HEAD(obj_to_free); static int obj_pool_min_free = ODEBUG_POOL_SIZE; static int obj_pool_free = ODEBUG_POOL_SIZE; static int obj_pool_used; static int obj_pool_max_used; +/* The number of objs on the global free list */ +static int obj_nr_tofree; static struct kmem_cache *obj_cache; static int debug_objects_maxchain __read_mostly; @@ -97,12 +100,32 @@ static const char *obj_states[ODEBUG_STATE_MAX] = { static void fill_pool(void) { gfp_t gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN; - struct debug_obj *new; + struct debug_obj *new, *obj; unsigned long flags; if (likely(obj_pool_free >= debug_objects_pool_min_level)) return; + /* + * Reuse objs from the global free list; they will be reinitialized + * when allocating. + */ + while (obj_nr_tofree && (obj_pool_free < obj_pool_min_free)) { + raw_spin_lock_irqsave(&pool_lock, flags); + /* + * Recheck with the lock held as the worker thread might have + * won the race and freed the global free list already. + */ + if (obj_nr_tofree) { + obj = hlist_entry(obj_to_free.first, typeof(*obj), node); + hlist_del(&obj->node); + obj_nr_tofree--; + hlist_add_head(&obj->node, &obj_pool); + obj_pool_free++; + } + raw_spin_unlock_irqrestore(&pool_lock, flags); + } + if (unlikely(!obj_cache)) return; @@ -186,11 +209,38 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr) static void free_obj_work(struct work_struct *work) { struct debug_obj *objs[ODEBUG_FREE_BATCH]; + struct hlist_node *tmp; + struct debug_obj *obj; unsigned long flags; int i; + HLIST_HEAD(tofree); if (!raw_spin_trylock_irqsave(&pool_lock, flags)) return; + + /* + * The objs on the pool list might be allocated before the work is + * run, so recheck if pool list it full or not, if not fill pool + * list from the global free list + */ + while (obj_nr_tofree && obj_pool_free < debug_objects_pool_size) { + obj = hlist_entry(obj_to_free.first, typeof(*obj), node); + hlist_del(&obj->node); + hlist_add_head(&obj->node, &obj_pool); + obj_pool_free++; + obj_nr_tofree--; + } + + /* + * Pool list is already full and there are still objs on the free + * list. Move remaining free objs to a temporary list to free the + * memory outside the pool_lock held region. + */ + if (obj_nr_tofree) { + hlist_move_list(&obj_to_free, &tofree); + obj_nr_tofree = 0; + } + while (obj_pool_free >= debug_objects_pool_size + ODEBUG_FREE_BATCH) { for (i = 0; i < ODEBUG_FREE_BATCH; i++) { objs[i] = hlist_entry(obj_pool.first, @@ -211,6 +261,11 @@ static void free_obj_work(struct work_struct *work) return; } raw_spin_unlock_irqrestore(&pool_lock, flags); + + hlist_for_each_entry_safe(obj, tmp, &tofree, node) { + hlist_del(&obj->node); + kmem_cache_free(obj_cache, obj); + } } /* @@ -793,6 +848,7 @@ static int debug_stats_show(struct seq_file *m, void *v) seq_printf(m, "pool_min_free :%d\n", obj_pool_min_free); seq_printf(m, "pool_used :%d\n", obj_pool_used); seq_printf(m, "pool_max_used :%d\n", obj_pool_max_used); + seq_printf(m, "on_free_list :%d\n", obj_nr_tofree); seq_printf(m, "objs_allocated:%d\n", debug_objects_allocated); seq_printf(m, "objs_freed :%d\n", debug_objects_freed); return 0; -- cgit v1.2.3 From 636e1970fd7deaa0d0ee0dfb6ac65fbd690b32d2 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Tue, 6 Feb 2018 07:18:27 +0800 Subject: debugobjects: Use global free list in free_object() The newly added global free list allows to avoid lengthy pool_list iterations in free_obj_work() by putting objects either into the pool list when the fill level of the pool is below the maximum or by putting them on the global free list immediately. As the pool is now guaranteed to never exceed the maximum fill level this allows to remove the batch removal from pool list in free_obj_work(). Split free_object() into two parts, so the actual queueing function can be reused without invoking schedule_work() on every invocation. [ tglx: Remove the batch removal from pool list and massage changelog ] Suggested-by: Thomas Gleixner Signed-off-by: Yang Shi Signed-off-by: Thomas Gleixner Cc: longman@redhat.com Link: https://lkml.kernel.org/r/1517872708-24207-4-git-send-email-yang.shi@linux.alibaba.com --- lib/debugobjects.c | 63 +++++++++++++++++++----------------------------------- 1 file changed, 22 insertions(+), 41 deletions(-) (limited to 'lib') diff --git a/lib/debugobjects.c b/lib/debugobjects.c index e31273b45da5..3e79c100271f 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -201,18 +201,13 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr) * workqueue function to free objects. * * To reduce contention on the global pool_lock, the actual freeing of - * debug objects will be delayed if the pool_lock is busy. We also free - * the objects in a batch of 4 for each lock/unlock cycle. + * debug objects will be delayed if the pool_lock is busy. */ -#define ODEBUG_FREE_BATCH 4 - static void free_obj_work(struct work_struct *work) { - struct debug_obj *objs[ODEBUG_FREE_BATCH]; struct hlist_node *tmp; struct debug_obj *obj; unsigned long flags; - int i; HLIST_HEAD(tofree); if (!raw_spin_trylock_irqsave(&pool_lock, flags)) @@ -240,26 +235,6 @@ static void free_obj_work(struct work_struct *work) hlist_move_list(&obj_to_free, &tofree); obj_nr_tofree = 0; } - - while (obj_pool_free >= debug_objects_pool_size + ODEBUG_FREE_BATCH) { - for (i = 0; i < ODEBUG_FREE_BATCH; i++) { - objs[i] = hlist_entry(obj_pool.first, - typeof(*objs[0]), node); - hlist_del(&objs[i]->node); - } - - obj_pool_free -= ODEBUG_FREE_BATCH; - debug_objects_freed += ODEBUG_FREE_BATCH; - /* - * We release pool_lock across kmem_cache_free() to - * avoid contention on pool_lock. - */ - raw_spin_unlock_irqrestore(&pool_lock, flags); - for (i = 0; i < ODEBUG_FREE_BATCH; i++) - kmem_cache_free(obj_cache, objs[i]); - if (!raw_spin_trylock_irqsave(&pool_lock, flags)) - return; - } raw_spin_unlock_irqrestore(&pool_lock, flags); hlist_for_each_entry_safe(obj, tmp, &tofree, node) { @@ -268,27 +243,33 @@ static void free_obj_work(struct work_struct *work) } } -/* - * Put the object back into the pool and schedule work to free objects - * if necessary. - */ -static void free_object(struct debug_obj *obj) +static bool __free_object(struct debug_obj *obj) { unsigned long flags; - int sched = 0; + bool work; raw_spin_lock_irqsave(&pool_lock, flags); - /* - * schedule work when the pool is filled and the cache is - * initialized: - */ - if (obj_pool_free > debug_objects_pool_size && obj_cache) - sched = 1; - hlist_add_head(&obj->node, &obj_pool); - obj_pool_free++; + work = (obj_pool_free > debug_objects_pool_size) && obj_cache; obj_pool_used--; + + if (work) { + obj_nr_tofree++; + hlist_add_head(&obj->node, &obj_to_free); + } else { + obj_pool_free++; + hlist_add_head(&obj->node, &obj_pool); + } raw_spin_unlock_irqrestore(&pool_lock, flags); - if (sched) + return work; +} + +/* + * Put the object back into the pool and schedule work to free objects + * if necessary. + */ +static void free_object(struct debug_obj *obj) +{ + if (__free_object(obj)) schedule_work(&debug_obj_work); } -- cgit v1.2.3 From 1ea9b98b007a662e402551a41a4413becad40a65 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Tue, 6 Feb 2018 07:18:28 +0800 Subject: debugobjects: Use global free list in __debug_check_no_obj_freed() __debug_check_no_obj_freed() iterates over the to be freed memory region in chunks and iterates over the corresponding hash bucket list for each chunk. This can accumulate to hundred thousands of checked objects. In the worst case this can trigger the soft lockup detector: NMI watchdog: BUG: soft lockup - CPU#15 stuck for 22s! CPU: 15 PID: 110342 Comm: stress-ng-getde Call Trace: [] debug_check_no_obj_freed+0x13e/0x220 [] __free_pages_ok+0x1f1/0x5c0 [] __free_pages+0x25/0x40 [] __free_slab+0x19b/0x270 [] discard_slab+0x39/0x50 [] __slab_free+0x207/0x270 [] ___cache_free+0xa6/0xb0 [] qlist_free_all+0x47/0x80 [] quarantine_reduce+0x159/0x190 [] kasan_kmalloc+0xaf/0xc0 [] kasan_slab_alloc+0x12/0x20 [] kmem_cache_alloc+0xfa/0x360 [] ? getname_flags+0x4f/0x1f0 [] getname_flags+0x4f/0x1f0 [] getname+0x12/0x20 [] do_sys_open+0xf9/0x210 [] SyS_open+0x1e/0x20 [] entry_SYSCALL_64_fastpath+0x1f/0xc2 The code path might be called in either atomic or non-atomic context, but in_atomic() can't tell if the current context is atomic or not on a PREEMPT=n kernel, so cond_resched() can't be used to prevent the softlockup. Utilize the global free list to shorten the loop execution time. [ tglx: Massaged changelog ] Suggested-by: Thomas Gleixner Signed-off-by: Yang Shi Signed-off-by: Thomas Gleixner Cc: longman@redhat.com Link: https://lkml.kernel.org/r/1517872708-24207-5-git-send-email-yang.shi@linux.alibaba.com --- lib/debugobjects.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 3e79c100271f..faab2c4ea024 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -751,13 +751,13 @@ EXPORT_SYMBOL_GPL(debug_object_active_state); static void __debug_check_no_obj_freed(const void *address, unsigned long size) { unsigned long flags, oaddr, saddr, eaddr, paddr, chunks; - struct hlist_node *tmp; - HLIST_HEAD(freelist); struct debug_obj_descr *descr; enum debug_obj_state state; struct debug_bucket *db; + struct hlist_node *tmp; struct debug_obj *obj; int cnt, objs_checked = 0; + bool work = false; saddr = (unsigned long) address; eaddr = saddr + size; @@ -788,18 +788,12 @@ repeat: goto repeat; default: hlist_del(&obj->node); - hlist_add_head(&obj->node, &freelist); + work |= __free_object(obj); break; } } raw_spin_unlock_irqrestore(&db->lock, flags); - /* Now free them */ - hlist_for_each_entry_safe(obj, tmp, &freelist, node) { - hlist_del(&obj->node); - free_object(obj); - } - if (cnt > debug_objects_maxchain) debug_objects_maxchain = cnt; @@ -808,6 +802,10 @@ repeat: if (objs_checked > debug_objects_maxchecked) debug_objects_maxchecked = objs_checked; + + /* Schedule work to actually kmem_cache_free() objects */ + if (work) + schedule_work(&debug_obj_work); } void debug_check_no_obj_freed(const void *address, unsigned long size) -- cgit v1.2.3 From 04148187aa9df3626168f7429d2287997787e387 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 22 Feb 2018 16:52:58 +0100 Subject: debugobjects: Fix debug_objects_freed accounting The removal of the batched object freeing has caused the debug_objects_freed to become read-only, and the reading is inside an ifdef, so gcc warns that it is completely unused without CONFIG_DEBUG_FS: lib/debugobjects.c:71:14: error: 'debug_objects_freed' defined but not used [-Werror=unused-variable] Assuming we are still interested in this number, this adds back code to keep track of the freed objects. Fixes: 636e1970fd7d ("debugobjects: Use global free list in free_object()") Suggested-by: Waiman Long Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Acked-by: Yang Shi Acked-by: Waiman Long Link: https://lkml.kernel.org/r/20180222155335.1647466-1-arnd@arndb.de --- lib/debugobjects.c | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/debugobjects.c b/lib/debugobjects.c index faab2c4ea024..105ecfc47d8c 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -233,6 +233,7 @@ static void free_obj_work(struct work_struct *work) */ if (obj_nr_tofree) { hlist_move_list(&obj_to_free, &tofree); + debug_objects_freed += obj_nr_tofree; obj_nr_tofree = 0; } raw_spin_unlock_irqrestore(&pool_lock, flags); -- cgit v1.2.3 From 5f171577b4f35b44795a73bde8cf2c49b4073925 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 24 Oct 2017 16:52:32 +0100 Subject: Drop a bunch of metag references Now that arch/metag/ has been removed, drop a bunch of metag references in various codes across the whole tree: - VM_GROWSUP and __VM_ARCH_SPECIFIC_1. - MT_METAG_* ELF note types. - METAG Kconfig dependencies (FRAME_POINTER) and ranges (MAX_STACK_SIZE_MB). - metag cases in tools (checkstack.pl, recordmcount.c, perf). Signed-off-by: James Hogan Acked-by: Steven Rostedt (VMware) Acked-by: Peter Zijlstra (Intel) Reviewed-by: Guenter Roeck Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: linux-mm@kvack.org Cc: linux-metag@vger.kernel.org --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 6088408ef26c..d1c523e408e9 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -356,7 +356,7 @@ config FRAME_POINTER bool "Compile the kernel with frame pointers" depends on DEBUG_KERNEL && \ (CRIS || M68K || FRV || UML || \ - SUPERH || BLACKFIN || MN10300 || METAG) || \ + SUPERH || BLACKFIN || MN10300) || \ ARCH_WANT_FRAME_POINTERS default y if (DEBUG_INFO && UML) || ARCH_WANT_FRAME_POINTERS help -- cgit v1.2.3 From d3dcf8eb615537526bd42ff27a081d46d337816e Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Sun, 4 Mar 2018 17:29:48 +0200 Subject: rhashtable: Fix rhlist duplicates insertion When inserting duplicate objects (those with the same key), current rhlist implementation messes up the chain pointers by updating the bucket pointer instead of prev next pointer to the newly inserted node. This causes missing elements on removal and travesal. Fix that by properly updating pprev pointer to point to the correct rhash_head next pointer. Issue: 1241076 Change-Id: I86b2c140bcb4aeb10b70a72a267ff590bb2b17e7 Fixes: ca26893f05e8 ('rhashtable: Add rhlist interface') Signed-off-by: Paul Blakey Acked-by: Herbert Xu Signed-off-by: David S. Miller --- lib/rhashtable.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 3825c30aaa36..47de025b6245 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -506,8 +506,10 @@ static void *rhashtable_lookup_one(struct rhashtable *ht, if (!key || (ht->p.obj_cmpfn ? ht->p.obj_cmpfn(&arg, rht_obj(ht, head)) : - rhashtable_compare(&arg, rht_obj(ht, head)))) + rhashtable_compare(&arg, rht_obj(ht, head)))) { + pprev = &head->next; continue; + } if (!ht->rhlist) return rht_obj(ht, head); -- cgit v1.2.3 From 499ac3b60f657dae82055fc81c7b01e6242ac9bc Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Sun, 4 Mar 2018 17:29:49 +0200 Subject: test_rhashtable: add test case for rhltable with duplicate objects Tries to insert duplicates in the middle of bucket's chain: bucket 1: [[val 21 (tid=1)]] -> [[ val 1 (tid=2), val 1 (tid=0) ]] Reuses tid to distinguish the elements insertion order. Signed-off-by: Paul Blakey Acked-by: Herbert Xu Signed-off-by: David S. Miller --- lib/test_rhashtable.c | 134 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) (limited to 'lib') diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index 76d3667fdea2..f4000c137dbe 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -79,6 +79,21 @@ struct thread_data { struct test_obj *objs; }; +static u32 my_hashfn(const void *data, u32 len, u32 seed) +{ + const struct test_obj_rhl *obj = data; + + return (obj->value.id % 10) << RHT_HASH_RESERVED_SPACE; +} + +static int my_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) +{ + const struct test_obj_rhl *test_obj = obj; + const struct test_obj_val *val = arg->key; + + return test_obj->value.id - val->id; +} + static struct rhashtable_params test_rht_params = { .head_offset = offsetof(struct test_obj, node), .key_offset = offsetof(struct test_obj, value), @@ -87,6 +102,17 @@ static struct rhashtable_params test_rht_params = { .nulls_base = (3U << RHT_BASE_SHIFT), }; +static struct rhashtable_params test_rht_params_dup = { + .head_offset = offsetof(struct test_obj_rhl, list_node), + .key_offset = offsetof(struct test_obj_rhl, value), + .key_len = sizeof(struct test_obj_val), + .hashfn = jhash, + .obj_hashfn = my_hashfn, + .obj_cmpfn = my_cmpfn, + .nelem_hint = 128, + .automatic_shrinking = false, +}; + static struct semaphore prestart_sem; static struct semaphore startup_sem = __SEMAPHORE_INITIALIZER(startup_sem, 0); @@ -465,6 +491,112 @@ static int __init test_rhashtable_max(struct test_obj *array, return err; } +static unsigned int __init print_ht(struct rhltable *rhlt) +{ + struct rhashtable *ht; + const struct bucket_table *tbl; + char buff[512] = ""; + unsigned int i, cnt = 0; + + ht = &rhlt->ht; + tbl = rht_dereference(ht->tbl, ht); + for (i = 0; i < tbl->size; i++) { + struct rhash_head *pos, *next; + struct test_obj_rhl *p; + + pos = rht_dereference(tbl->buckets[i], ht); + next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL; + + if (!rht_is_a_nulls(pos)) { + sprintf(buff, "%s\nbucket[%d] -> ", buff, i); + } + + while (!rht_is_a_nulls(pos)) { + struct rhlist_head *list = container_of(pos, struct rhlist_head, rhead); + sprintf(buff, "%s[[", buff); + do { + pos = &list->rhead; + list = rht_dereference(list->next, ht); + p = rht_obj(ht, pos); + + sprintf(buff, "%s val %d (tid=%d)%s", buff, p->value.id, p->value.tid, + list? ", " : " "); + cnt++; + } while (list); + + pos = next, + next = !rht_is_a_nulls(pos) ? + rht_dereference(pos->next, ht) : NULL; + + sprintf(buff, "%s]]%s", buff, !rht_is_a_nulls(pos) ? " -> " : ""); + } + } + printk(KERN_ERR "\n---- ht: ----%s\n-------------\n", buff); + + return cnt; +} + +static int __init test_insert_dup(struct test_obj_rhl *rhl_test_objects, + int cnt, bool slow) +{ + struct rhltable rhlt; + unsigned int i, ret; + const char *key; + int err = 0; + + err = rhltable_init(&rhlt, &test_rht_params_dup); + if (WARN_ON(err)) + return err; + + for (i = 0; i < cnt; i++) { + rhl_test_objects[i].value.tid = i; + key = rht_obj(&rhlt.ht, &rhl_test_objects[i].list_node.rhead); + key += test_rht_params_dup.key_offset; + + if (slow) { + err = PTR_ERR(rhashtable_insert_slow(&rhlt.ht, key, + &rhl_test_objects[i].list_node.rhead)); + if (err == -EAGAIN) + err = 0; + } else + err = rhltable_insert(&rhlt, + &rhl_test_objects[i].list_node, + test_rht_params_dup); + if (WARN(err, "error %d on element %d/%d (%s)\n", err, i, cnt, slow? "slow" : "fast")) + goto skip_print; + } + + ret = print_ht(&rhlt); + WARN(ret != cnt, "missing rhltable elements (%d != %d, %s)\n", ret, cnt, slow? "slow" : "fast"); + +skip_print: + rhltable_destroy(&rhlt); + + return 0; +} + +static int __init test_insert_duplicates_run(void) +{ + struct test_obj_rhl rhl_test_objects[3] = {}; + + pr_info("test inserting duplicates\n"); + + /* two different values that map to same bucket */ + rhl_test_objects[0].value.id = 1; + rhl_test_objects[1].value.id = 21; + + /* and another duplicate with same as [0] value + * which will be second on the bucket list */ + rhl_test_objects[2].value.id = rhl_test_objects[0].value.id; + + test_insert_dup(rhl_test_objects, 2, false); + test_insert_dup(rhl_test_objects, 3, false); + test_insert_dup(rhl_test_objects, 2, true); + test_insert_dup(rhl_test_objects, 3, true); + + return 0; +} + static int thread_lookup_test(struct thread_data *tdata) { unsigned int entries = tdata->entries; @@ -613,6 +745,8 @@ static int __init test_rht_init(void) do_div(total_time, runs); pr_info("Average test time: %llu\n", total_time); + test_insert_duplicates_run(); + if (!tcount) return 0; -- cgit v1.2.3 From 739d875dd6982618020d30f58f8acf10f6076e6d Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 8 Mar 2018 09:48:46 +0000 Subject: mn10300: Remove the architecture Remove the MN10300 arch as the hardware is defunct. Suggested-by: Arnd Bergmann Signed-off-by: David Howells cc: Masahiro Yamada cc: linux-am33-list@redhat.com Signed-off-by: Arnd Bergmann --- lib/Kconfig.debug | 2 +- lib/test_user_copy.c | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index d5964b051017..41ac9d294245 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -356,7 +356,7 @@ config FRAME_POINTER bool "Compile the kernel with frame pointers" depends on DEBUG_KERNEL && \ (CRIS || M68K || FRV || UML || \ - SUPERH || BLACKFIN || MN10300) || \ + SUPERH || BLACKFIN) || \ ARCH_WANT_FRAME_POINTERS default y if (DEBUG_INFO && UML) || ARCH_WANT_FRAME_POINTERS help diff --git a/lib/test_user_copy.c b/lib/test_user_copy.c index 4621db801b23..a6556f3364d1 100644 --- a/lib/test_user_copy.c +++ b/lib/test_user_copy.c @@ -35,7 +35,6 @@ !defined(CONFIG_M32R) && \ !defined(CONFIG_M68K) && \ !defined(CONFIG_MICROBLAZE) && \ - !defined(CONFIG_MN10300) && \ !defined(CONFIG_NIOS2) && \ !defined(CONFIG_PPC32) && \ !defined(CONFIG_SUPERH)) -- cgit v1.2.3 From 0862ca422b79cb5aa70823ee0f07f6b468f86070 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 9 Mar 2018 15:50:59 -0800 Subject: bug: use %pB in BUG and stack protector failure The BUG and stack protector reports were still using a raw %p. This changes it to %pB for more meaningful output. Link: http://lkml.kernel.org/r/20180301225704.GA34198@beast Fixes: ad67b74d2469 ("printk: hash addresses printed with %p") Signed-off-by: Kees Cook Reviewed-by: Andrew Morton Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Richard Weinberger , Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/bug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/bug.c b/lib/bug.c index c1b0fad31b10..44f432cb064d 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -191,7 +191,7 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) if (file) pr_crit("kernel BUG at %s:%u!\n", file, line); else - pr_crit("Kernel BUG at %p [verbose debug info unavailable]\n", + pr_crit("Kernel BUG at %pB [verbose debug info unavailable]\n", (void *)bugaddr); return BUG_TRAP_TYPE_BUG; -- cgit v1.2.3 From 1b4cfe3c0a30dde968fb43c577a8d7e262a145ee Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 9 Mar 2018 15:51:02 -0800 Subject: lib/bug.c: exclude non-BUG/WARN exceptions from report_bug() Commit b8347c219649 ("x86/debug: Handle warnings before the notifier chain, to fix KGDB crash") changed the ordering of fixups, and did not take into account the case of x86 processing non-WARN() and non-BUG() exceptions. This would lead to output of a false BUG line with no other information. In the case of a refcount exception, it would be immediately followed by the refcount WARN(), producing very strange double-"cut here": lkdtm: attempting bad refcount_inc() overflow ------------[ cut here ]------------ Kernel BUG at 0000000065f29de5 [verbose debug info unavailable] ------------[ cut here ]------------ refcount_t overflow at lkdtm_REFCOUNT_INC_OVERFLOW+0x6b/0x90 in cat[3065], uid/euid: 0/0 WARNING: CPU: 0 PID: 3065 at kernel/panic.c:657 refcount_error_report+0x9a/0xa4 ... In the prior ordering, exceptions were searched first: do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, ... if (fixup_exception(regs, trapnr)) return 0; - if (fixup_bug(regs, trapnr)) - return 0; - As a result, fixup_bugs()'s is_valid_bugaddr() didn't take into account needing to search the exception list first, since that had already happened. So, instead of searching the exception list twice (once in is_valid_bugaddr() and then again in fixup_exception()), just add a simple sanity check to report_bug() that will immediately bail out if a BUG() (or WARN()) entry is not found. Link: http://lkml.kernel.org/r/20180301225934.GA34350@beast Fixes: b8347c219649 ("x86/debug: Handle warnings before the notifier chain, to fix KGDB crash") Signed-off-by: Kees Cook Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Richard Weinberger Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/bug.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/bug.c b/lib/bug.c index 44f432cb064d..1077366f496b 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -150,6 +150,8 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) return BUG_TRAP_TYPE_NONE; bug = find_bug(bugaddr); + if (!bug) + return BUG_TRAP_TYPE_NONE; file = NULL; line = 0; -- cgit v1.2.3 From ac68b1b3b9c73e652dc7ce0585672e23c5a2dca4 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Fri, 9 Mar 2018 15:51:20 -0800 Subject: lib/test_kmod.c: fix limit check on number of test devices created As reported by Dan the parentheses is in the wrong place, and since unlikely() call returns either 0 or 1 it's never less than zero. The second issue is that signed integer overflows like "INT_MAX + 1" are undefined behavior. Since num_test_devs represents the number of devices, we want to stop prior to hitting the max, and not rely on the wrap arround at all. So just cap at num_test_devs + 1, prior to assigning a new device. Link: http://lkml.kernel.org/r/20180224030046.24238-1-mcgrof@kernel.org Fixes: d9c6a72d6fa2 ("kmod: add test driver to stress test the module loader") Reported-by: Dan Carpenter Signed-off-by: Luis R. Rodriguez Acked-by: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_kmod.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/test_kmod.c b/lib/test_kmod.c index e372b97eee13..0e5b7a61460b 100644 --- a/lib/test_kmod.c +++ b/lib/test_kmod.c @@ -1141,7 +1141,7 @@ static struct kmod_test_device *register_test_dev_kmod(void) mutex_lock(®_dev_mutex); /* int should suffice for number of devices, test for wrap */ - if (unlikely(num_test_devs + 1) < 0) { + if (num_test_devs + 1 == INT_MAX) { pr_err("reached limit of number of test devices\n"); goto out; } -- cgit v1.2.3 From 163cf842f5837334bc69aaf09ad38e11f4573914 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 13 Mar 2018 14:18:46 +0100 Subject: debugobjects: Avoid another unused variable warning debug_objects_maxchecked is only updated in __debug_check_no_obj_freed(), and only read in debug_objects_maxchecked, unfortunately both of these are optional and depend on different Kconfig symbols. When both CONFIG_DEBUG_OBJECTS_FREE and CONFIG_DEBUG_FS are disabled this warning is emitted: lib/debugobjects.c:56:14: error: 'debug_objects_maxchecked' defined but not used [-Werror=unused-variable] Rather than trying to add more complex #ifdef protections, mark the variable as __maybe_unused so it can be silently dropped when usused. Fixes: bd9dcd046509 ("debugobjects: Export max loops counter") Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Acked-by: Yang Shi Cc: Waiman Long Link: https://lkml.kernel.org/r/20180313131857.158876-1-arnd@arndb.de --- lib/debugobjects.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 105ecfc47d8c..994be4805cec 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -53,7 +53,7 @@ static int obj_nr_tofree; static struct kmem_cache *obj_cache; static int debug_objects_maxchain __read_mostly; -static int debug_objects_maxchecked __read_mostly; +static int __maybe_unused debug_objects_maxchecked __read_mostly; static int debug_objects_fixups __read_mostly; static int debug_objects_warnings __read_mostly; static int debug_objects_enabled __read_mostly -- cgit v1.2.3 From 8df3aaaf9b5f8bdfc4036695fa22f35b45b4d92f Mon Sep 17 00:00:00 2001 From: Joern Engel Date: Tue, 13 Mar 2018 11:36:49 -0700 Subject: btree: avoid variable-length allocations geo->keylen cannot be larger than 4. So we might as well make fixed-size allocations. Given the one remaining user, geo->keylen cannot even be larger than 1. Logfs used to have 64bit and 128bit keys, tcm_qla2xxx only has 32bit keys. But let's not break the code if we don't have to. Signed-off-by: Joern Engel Signed-off-by: Linus Torvalds --- lib/btree.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/btree.c b/lib/btree.c index f93a945274af..590facba2c50 100644 --- a/lib/btree.c +++ b/lib/btree.c @@ -3,7 +3,7 @@ * * As should be obvious for Linux kernel code, license is GPLv2 * - * Copyright (c) 2007-2008 Joern Engel + * Copyright (c) 2007-2008 Joern Engel * Bits and pieces stolen from Peter Zijlstra's code, which is * Copyright 2007, Red Hat Inc. Peter Zijlstra * GPLv2 @@ -76,6 +76,8 @@ struct btree_geo btree_geo128 = { }; EXPORT_SYMBOL_GPL(btree_geo128); +#define MAX_KEYLEN (2 * LONG_PER_U64) + static struct kmem_cache *btree_cachep; void *btree_alloc(gfp_t gfp_mask, void *pool_data) @@ -313,7 +315,7 @@ void *btree_get_prev(struct btree_head *head, struct btree_geo *geo, { int i, height; unsigned long *node, *oldnode; - unsigned long *retry_key = NULL, key[geo->keylen]; + unsigned long *retry_key = NULL, key[MAX_KEYLEN]; if (keyzero(geo, __key)) return NULL; @@ -639,8 +641,8 @@ EXPORT_SYMBOL_GPL(btree_remove); int btree_merge(struct btree_head *target, struct btree_head *victim, struct btree_geo *geo, gfp_t gfp) { - unsigned long key[geo->keylen]; - unsigned long dup[geo->keylen]; + unsigned long key[MAX_KEYLEN]; + unsigned long dup[MAX_KEYLEN]; void *val; int err; -- cgit v1.2.3 From b3a5d111994450909158929560906f2c1c6c1d85 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 14 Mar 2018 12:45:12 -0700 Subject: percpu_ref: Update doc to dissuade users from depending on internal RCU grace periods percpu_ref internally uses sched-RCU to implement the percpu -> atomic mode switching and the documentation suggested that this could be depended upon. This doesn't seem like a good idea. * percpu_ref uses sched-RCU which has different grace periods regular RCU. Users may combine percpu_ref with regular RCU usage and incorrectly believe that regular RCU grace periods are performed by percpu_ref. This can lead to, for example, use-after-free due to premature freeing. * percpu_ref has a grace period when switching from percpu to atomic mode. It doesn't have one between the last put and release. This distinction is subtle and can lead to surprising bugs. * percpu_ref allows starting in and switching to atomic mode manually for debugging and other purposes. This means that there may not be any grace periods from kill to release. This patch makes it clear that the grace periods are percpu_ref's internal implementation detail and can't be depended upon by the users. Signed-off-by: Tejun Heo Cc: Kent Overstreet Cc: Linus Torvalds Signed-off-by: Tejun Heo --- lib/percpu-refcount.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index 30e7dd88148b..9f96fa7bc000 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c @@ -322,6 +322,8 @@ EXPORT_SYMBOL_GPL(percpu_ref_switch_to_percpu); * This function normally doesn't block and can be called from any context * but it may block if @confirm_kill is specified and @ref is in the * process of switching to atomic mode by percpu_ref_switch_to_atomic(). + * + * There are no implied RCU grace periods between kill and release. */ void percpu_ref_kill_and_confirm(struct percpu_ref *ref, percpu_ref_func_t *confirm_kill) -- cgit v1.2.3 From e7de6c7cc207be78369d45fb833d7d53aeda47f8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Mar 2018 11:38:23 +0100 Subject: dma/swiotlb: Remove swiotlb_set_mem_attributes() Now that set_memory_decrypted() is always available we can just call it directly. Tested-by: Tom Lendacky Signed-off-by: Christoph Hellwig Reviewed-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk Reviewed-by: Tom Lendacky Cc: David Woodhouse Cc: Joerg Roedel Cc: Jon Mason Cc: Linus Torvalds Cc: Muli Ben-Yehuda Cc: Peter Zijlstra Cc: iommu@lists.linux-foundation.org Link: http://lkml.kernel.org/r/20180319103826.12853-12-hch@lst.de Signed-off-by: Ingo Molnar --- lib/swiotlb.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/swiotlb.c b/lib/swiotlb.c index c43ec2271469..005d1d87bb2e 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -156,8 +157,6 @@ unsigned long swiotlb_size_or_default(void) return size ? size : (IO_TLB_DEFAULT_SIZE); } -void __weak swiotlb_set_mem_attributes(void *vaddr, unsigned long size) { } - /* For swiotlb, clear memory encryption mask from dma addresses */ static dma_addr_t swiotlb_phys_to_dma(struct device *hwdev, phys_addr_t address) @@ -209,12 +208,12 @@ void __init swiotlb_update_mem_attributes(void) vaddr = phys_to_virt(io_tlb_start); bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT); - swiotlb_set_mem_attributes(vaddr, bytes); + set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); memset(vaddr, 0, bytes); vaddr = phys_to_virt(io_tlb_overflow_buffer); bytes = PAGE_ALIGN(io_tlb_overflow); - swiotlb_set_mem_attributes(vaddr, bytes); + set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); memset(vaddr, 0, bytes); } @@ -355,7 +354,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) io_tlb_start = virt_to_phys(tlb); io_tlb_end = io_tlb_start + bytes; - swiotlb_set_mem_attributes(tlb, bytes); + set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT); memset(tlb, 0, bytes); /* @@ -366,7 +365,8 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) if (!v_overflow_buffer) goto cleanup2; - swiotlb_set_mem_attributes(v_overflow_buffer, io_tlb_overflow); + set_memory_decrypted((unsigned long)v_overflow_buffer, + io_tlb_overflow >> PAGE_SHIFT); memset(v_overflow_buffer, 0, io_tlb_overflow); io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer); -- cgit v1.2.3 From b6e05477c10c12e36141558fc14f04b00ea634d4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Mar 2018 11:38:24 +0100 Subject: dma/direct: Handle the memory encryption bit in common code Give the basic phys_to_dma() and dma_to_phys() helpers a __-prefix and add the memory encryption mask to the non-prefixed versions. Use the __-prefixed versions directly instead of clearing the mask again in various places. Tested-by: Tom Lendacky Signed-off-by: Christoph Hellwig Reviewed-by: Thomas Gleixner Cc: David Woodhouse Cc: Joerg Roedel Cc: Jon Mason Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Muli Ben-Yehuda Cc: Peter Zijlstra Cc: iommu@lists.linux-foundation.org Link: http://lkml.kernel.org/r/20180319103826.12853-13-hch@lst.de Signed-off-by: Ingo Molnar --- lib/swiotlb.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 005d1d87bb2e..8b06b4485e65 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -157,13 +157,6 @@ unsigned long swiotlb_size_or_default(void) return size ? size : (IO_TLB_DEFAULT_SIZE); } -/* For swiotlb, clear memory encryption mask from dma addresses */ -static dma_addr_t swiotlb_phys_to_dma(struct device *hwdev, - phys_addr_t address) -{ - return __sme_clr(phys_to_dma(hwdev, address)); -} - /* Note that this doesn't work with highmem page */ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, volatile void *address) @@ -622,7 +615,7 @@ map_single(struct device *hwdev, phys_addr_t phys, size_t size, return SWIOTLB_MAP_ERROR; } - start_dma_addr = swiotlb_phys_to_dma(hwdev, io_tlb_start); + start_dma_addr = __phys_to_dma(hwdev, io_tlb_start); return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, dir, attrs); } @@ -726,12 +719,12 @@ swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle, goto out_warn; phys_addr = swiotlb_tbl_map_single(dev, - swiotlb_phys_to_dma(dev, io_tlb_start), + __phys_to_dma(dev, io_tlb_start), 0, size, DMA_FROM_DEVICE, 0); if (phys_addr == SWIOTLB_MAP_ERROR) goto out_warn; - *dma_handle = swiotlb_phys_to_dma(dev, phys_addr); + *dma_handle = __phys_to_dma(dev, phys_addr); if (dma_coherent_ok(dev, *dma_handle, size)) goto out_unmap; @@ -867,10 +860,10 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, map = map_single(dev, phys, size, dir, attrs); if (map == SWIOTLB_MAP_ERROR) { swiotlb_full(dev, size, dir, 1); - return swiotlb_phys_to_dma(dev, io_tlb_overflow_buffer); + return __phys_to_dma(dev, io_tlb_overflow_buffer); } - dev_addr = swiotlb_phys_to_dma(dev, map); + dev_addr = __phys_to_dma(dev, map); /* Ensure that the address returned is DMA'ble */ if (dma_capable(dev, dev_addr, size)) @@ -879,7 +872,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, attrs |= DMA_ATTR_SKIP_CPU_SYNC; swiotlb_tbl_unmap_single(dev, map, size, dir, attrs); - return swiotlb_phys_to_dma(dev, io_tlb_overflow_buffer); + return __phys_to_dma(dev, io_tlb_overflow_buffer); } /* @@ -1009,7 +1002,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, sg_dma_len(sgl) = 0; return 0; } - sg->dma_address = swiotlb_phys_to_dma(hwdev, map); + sg->dma_address = __phys_to_dma(hwdev, map); } else sg->dma_address = dev_addr; sg_dma_len(sg) = sg->length; @@ -1073,7 +1066,7 @@ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, int swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr) { - return (dma_addr == swiotlb_phys_to_dma(hwdev, io_tlb_overflow_buffer)); + return (dma_addr == __phys_to_dma(hwdev, io_tlb_overflow_buffer)); } /* @@ -1085,7 +1078,7 @@ swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr) int swiotlb_dma_supported(struct device *hwdev, u64 mask) { - return swiotlb_phys_to_dma(hwdev, io_tlb_end - 1) <= mask; + return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask; } #ifdef CONFIG_DMA_DIRECT_OPS -- cgit v1.2.3 From c10f07aa27dadf5ab5b3d58c48c91a467f80db49 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Mar 2018 11:38:25 +0100 Subject: dma/direct: Handle force decryption for DMA coherent buffers in common code With that in place the generic DMA-direct routines can be used to allocate non-encrypted bounce buffers, and the x86 SEV case can use the generic swiotlb ops including nice features such as using CMA allocations. Note that I'm not too happy about using sev_active() in DMA-direct, but I couldn't come up with a good enough name for a wrapper to make it worth adding. Tested-by: Tom Lendacky Signed-off-by: Christoph Hellwig Reviewed-by: Thomas Gleixner Cc: David Woodhouse Cc: Joerg Roedel Cc: Jon Mason Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Muli Ben-Yehuda Cc: Peter Zijlstra Cc: iommu@lists.linux-foundation.org Link: http://lkml.kernel.org/r/20180319103826.12853-14-hch@lst.de Signed-off-by: Ingo Molnar --- lib/dma-direct.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/dma-direct.c b/lib/dma-direct.c index c9e8e21cb334..1277d293d4da 100644 --- a/lib/dma-direct.c +++ b/lib/dma-direct.c @@ -9,6 +9,7 @@ #include #include #include +#include #define DIRECT_MAPPING_ERROR 0 @@ -20,6 +21,14 @@ #define ARCH_ZONE_DMA_BITS 24 #endif +/* + * For AMD SEV all DMA must be to unencrypted addresses. + */ +static inline bool force_dma_unencrypted(void) +{ + return sev_active(); +} + static bool check_addr(struct device *dev, dma_addr_t dma_addr, size_t size, const char *caller) @@ -37,7 +46,9 @@ check_addr(struct device *dev, dma_addr_t dma_addr, size_t size, static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) { - return phys_to_dma(dev, phys) + size - 1 <= dev->coherent_dma_mask; + dma_addr_t addr = force_dma_unencrypted() ? + __phys_to_dma(dev, phys) : phys_to_dma(dev, phys); + return addr + size - 1 <= dev->coherent_dma_mask; } void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, @@ -46,6 +57,7 @@ void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; int page_order = get_order(size); struct page *page = NULL; + void *ret; /* GFP_DMA32 and GFP_DMA are no ops without the corresponding zones: */ if (dev->coherent_dma_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS)) @@ -78,10 +90,15 @@ again: if (!page) return NULL; - - *dma_handle = phys_to_dma(dev, page_to_phys(page)); - memset(page_address(page), 0, size); - return page_address(page); + ret = page_address(page); + if (force_dma_unencrypted()) { + set_memory_decrypted((unsigned long)ret, 1 << page_order); + *dma_handle = __phys_to_dma(dev, page_to_phys(page)); + } else { + *dma_handle = phys_to_dma(dev, page_to_phys(page)); + } + memset(ret, 0, size); + return ret; } /* @@ -92,9 +109,12 @@ void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) { unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned int page_order = get_order(size); + if (force_dma_unencrypted()) + set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order); if (!dma_release_from_contiguous(dev, virt_to_page(cpu_addr), count)) - free_pages((unsigned long)cpu_addr, get_order(size)); + free_pages((unsigned long)cpu_addr, page_order); } static dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, -- cgit v1.2.3 From 16e73adbca76fd18733278cb688b0ddb4cad162c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Mar 2018 11:38:26 +0100 Subject: dma/swiotlb: Remove swiotlb_{alloc,free}_coherent() Unused now that everyone uses swiotlb_{alloc,free}(). Tested-by: Tom Lendacky Signed-off-by: Christoph Hellwig Reviewed-by: Thomas Gleixner Cc: David Woodhouse Cc: Joerg Roedel Cc: Jon Mason Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Muli Ben-Yehuda Cc: Peter Zijlstra Cc: iommu@lists.linux-foundation.org Link: http://lkml.kernel.org/r/20180319103826.12853-15-hch@lst.de Signed-off-by: Ingo Molnar --- lib/swiotlb.c | 38 -------------------------------------- 1 file changed, 38 deletions(-) (limited to 'lib') diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 8b06b4485e65..15954b86f09e 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -157,13 +157,6 @@ unsigned long swiotlb_size_or_default(void) return size ? size : (IO_TLB_DEFAULT_SIZE); } -/* Note that this doesn't work with highmem page */ -static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, - volatile void *address) -{ - return phys_to_dma(hwdev, virt_to_phys(address)); -} - static bool no_iotlb_memory; void swiotlb_print_info(void) @@ -752,28 +745,6 @@ out_warn: return NULL; } -void * -swiotlb_alloc_coherent(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flags) -{ - int order = get_order(size); - unsigned long attrs = (flags & __GFP_NOWARN) ? DMA_ATTR_NO_WARN : 0; - void *ret; - - ret = (void *)__get_free_pages(flags, order); - if (ret) { - *dma_handle = swiotlb_virt_to_bus(hwdev, ret); - if (dma_coherent_ok(hwdev, *dma_handle, size)) { - memset(ret, 0, size); - return ret; - } - free_pages((unsigned long)ret, order); - } - - return swiotlb_alloc_buffer(hwdev, size, dma_handle, attrs); -} -EXPORT_SYMBOL(swiotlb_alloc_coherent); - static bool swiotlb_free_buffer(struct device *dev, size_t size, dma_addr_t dma_addr) { @@ -793,15 +764,6 @@ static bool swiotlb_free_buffer(struct device *dev, size_t size, return true; } -void -swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, - dma_addr_t dev_addr) -{ - if (!swiotlb_free_buffer(hwdev, size, dev_addr)) - free_pages((unsigned long)vaddr, get_order(size)); -} -EXPORT_SYMBOL(swiotlb_free_coherent); - static void swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir, int do_panic) -- cgit v1.2.3 From 52fda36d63bfc8c8e8ae5eda8eb5ac6f52cd67ed Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Tue, 20 Mar 2018 09:58:51 -0300 Subject: test_bpf: Fix testing with CONFIG_BPF_JIT_ALWAYS_ON=y on other arches Function bpf_fill_maxinsns11 is designed to not be able to be JITed on x86_64. So, it fails when CONFIG_BPF_JIT_ALWAYS_ON=y, and commit 09584b406742 ("bpf: fix selftests/bpf test_kmod.sh failure when CONFIG_BPF_JIT_ALWAYS_ON=y") makes sure that failure is detected on that case. However, it does not fail on other architectures, which have a different JIT compiler design. So, test_bpf has started to fail to load on those. After this fix, test_bpf loads fine on both x86_64 and ppc64el. Fixes: 09584b406742 ("bpf: fix selftests/bpf test_kmod.sh failure when CONFIG_BPF_JIT_ALWAYS_ON=y") Signed-off-by: Thadeu Lima de Souza Cascardo Reviewed-by: Yonghong Song Signed-off-by: Daniel Borkmann --- lib/test_bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 2efb213716fa..3e9335493fe4 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -5467,7 +5467,7 @@ static struct bpf_test tests[] = { { "BPF_MAXINSNS: Jump, gap, jump, ...", { }, -#ifdef CONFIG_BPF_JIT_ALWAYS_ON +#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_X86) CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, #else CLASSIC | FLAG_NO_DATA, -- cgit v1.2.3 From b6bdb7517c3d3f41f20e5c2948d6bc3f8897394e Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Thu, 22 Mar 2018 16:17:20 -0700 Subject: mm/vmalloc: add interfaces to free unmapped page table On architectures with CONFIG_HAVE_ARCH_HUGE_VMAP set, ioremap() may create pud/pmd mappings. A kernel panic was observed on arm64 systems with Cortex-A75 in the following steps as described by Hanjun Guo. 1. ioremap a 4K size, valid page table will build, 2. iounmap it, pte0 will set to 0; 3. ioremap the same address with 2M size, pgd/pmd is unchanged, then set the a new value for pmd; 4. pte0 is leaked; 5. CPU may meet exception because the old pmd is still in TLB, which will lead to kernel panic. This panic is not reproducible on x86. INVLPG, called from iounmap, purges all levels of entries associated with purged address on x86. x86 still has memory leak. The patch changes the ioremap path to free unmapped page table(s) since doing so in the unmap path has the following issues: - The iounmap() path is shared with vunmap(). Since vmap() only supports pte mappings, making vunmap() to free a pte page is an overhead for regular vmap users as they do not need a pte page freed up. - Checking if all entries in a pte page are cleared in the unmap path is racy, and serializing this check is expensive. - The unmap path calls free_vmap_area_noflush() to do lazy TLB purges. Clearing a pud/pmd entry before the lazy TLB purges needs extra TLB purge. Add two interfaces, pud_free_pmd_page() and pmd_free_pte_page(), which clear a given pud/pmd entry and free up a page for the lower level entries. This patch implements their stub functions on x86 and arm64, which work as workaround. [akpm@linux-foundation.org: fix typo in pmd_free_pte_page() stub] Link: http://lkml.kernel.org/r/20180314180155.19492-2-toshi.kani@hpe.com Fixes: e61ce6ade404e ("mm: change ioremap to set up huge I/O mappings") Reported-by: Lei Li Signed-off-by: Toshi Kani Cc: Catalin Marinas Cc: Wang Xuefeng Cc: Will Deacon Cc: Hanjun Guo Cc: Michal Hocko Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Borislav Petkov Cc: Matthew Wilcox Cc: Chintan Pandya Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/ioremap.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/ioremap.c b/lib/ioremap.c index b808a390e4c3..54e5bbaa3200 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c @@ -91,7 +91,8 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, if (ioremap_pmd_enabled() && ((next - addr) == PMD_SIZE) && - IS_ALIGNED(phys_addr + addr, PMD_SIZE)) { + IS_ALIGNED(phys_addr + addr, PMD_SIZE) && + pmd_free_pte_page(pmd)) { if (pmd_set_huge(pmd, phys_addr + addr, prot)) continue; } @@ -117,7 +118,8 @@ static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr, if (ioremap_pud_enabled() && ((next - addr) == PUD_SIZE) && - IS_ALIGNED(phys_addr + addr, PUD_SIZE)) { + IS_ALIGNED(phys_addr + addr, PUD_SIZE) && + pud_free_pmd_page(pud)) { if (pud_set_huge(pud, phys_addr + addr, prot)) continue; } -- cgit v1.2.3 From 0803e6051c1562e1525c3e044313390bf8b35c2b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 23 Mar 2018 18:49:30 +0100 Subject: swiotlb: Make swiotlb_{alloc,free}_buffer depend on CONFIG_DMA_DIRECT_OPS Otherwise this causes unused symbol warnings for configs that build swiotlb.c only for use by xen-swiotlb.c and that don't otherwise select CONFIG_DMA_DIRECT_OPS, which is possible on arm. Fixes: 16e73adbca76 ("dma/swiotlb: Remove swiotlb_{alloc,free}_coherent()") Reported-by: Stephen Rothwell Signed-off-by: Christoph Hellwig Signed-off-by: Thomas Gleixner Cc: iommu@lists.linux-foundation.org Cc: konrad.wilk@oracle.com Link: https://lkml.kernel.org/r/20180323174930.17767-1-hch@lst.de --- lib/swiotlb.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 15954b86f09e..47aeb04c1997 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -692,6 +692,7 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr, } } +#ifdef CONFIG_DMA_DIRECT_OPS static inline bool dma_coherent_ok(struct device *dev, dma_addr_t addr, size_t size) { @@ -763,6 +764,7 @@ static bool swiotlb_free_buffer(struct device *dev, size_t size, DMA_ATTR_SKIP_CPU_SYNC); return true; } +#endif static void swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir, -- cgit v1.2.3 From a687a5337063af99ebd0eebaa6f4b4cf2e07c21b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 7 Mar 2018 23:30:54 +0100 Subject: treewide: simplify Kconfig dependencies for removed archs A lot of Kconfig symbols have architecture specific dependencies. In those cases that depend on architectures we have already removed, they can be omitted. Acked-by: Kalle Valo Acked-by: Alexandre Belloni Signed-off-by: Arnd Bergmann --- lib/Kconfig.debug | 13 +++++-------- lib/test_user_copy.c | 2 -- 2 files changed, 5 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 41ac9d294245..6927c6d8d185 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -165,7 +165,7 @@ config DEBUG_INFO_REDUCED config DEBUG_INFO_SPLIT bool "Produce split debuginfo in .dwo files" - depends on DEBUG_INFO && !FRV + depends on DEBUG_INFO help Generate debug info into separate .dwo files. This significantly reduces the build directory size for builds with DEBUG_INFO, @@ -354,10 +354,7 @@ config ARCH_WANT_FRAME_POINTERS config FRAME_POINTER bool "Compile the kernel with frame pointers" - depends on DEBUG_KERNEL && \ - (CRIS || M68K || FRV || UML || \ - SUPERH || BLACKFIN) || \ - ARCH_WANT_FRAME_POINTERS + depends on DEBUG_KERNEL && (M68K || UML || SUPERH) || ARCH_WANT_FRAME_POINTERS default y if (DEBUG_INFO && UML) || ARCH_WANT_FRAME_POINTERS help If you say Y here the resulting kernel image will be slightly @@ -1138,7 +1135,7 @@ config LOCKDEP bool depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT select STACKTRACE - select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE && !ARC && !SCORE && !X86 + select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE && !ARC && !X86 select KALLSYMS select KALLSYMS_ALL @@ -1571,7 +1568,7 @@ config FAULT_INJECTION_STACKTRACE_FILTER depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT depends on !X86_64 select STACKTRACE - select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC && !SCORE && !X86 + select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC && !X86 help Provide stacktrace filter for fault-injection capabilities @@ -1969,7 +1966,7 @@ config STRICT_DEVMEM bool "Filter access to /dev/mem" depends on MMU && DEVMEM depends on ARCH_HAS_DEVMEM_IS_ALLOWED - default y if TILE || PPC || X86 || ARM64 + default y if PPC || X86 || ARM64 ---help--- If this option is disabled, you allow userspace (root) access to all of memory, including kernel and userspace memory. Accidental diff --git a/lib/test_user_copy.c b/lib/test_user_copy.c index a6556f3364d1..e161f0498f42 100644 --- a/lib/test_user_copy.c +++ b/lib/test_user_copy.c @@ -31,8 +31,6 @@ * their capability at compile-time, we just have to opt-out certain archs. */ #if BITS_PER_LONG == 64 || (!(defined(CONFIG_ARM) && !defined(MMU)) && \ - !defined(CONFIG_BLACKFIN) && \ - !defined(CONFIG_M32R) && \ !defined(CONFIG_M68K) && \ !defined(CONFIG_MICROBLAZE) && \ !defined(CONFIG_NIOS2) && \ -- cgit v1.2.3 From 889ce12b1650b3c388634451872638a08faf6d6b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 9 Mar 2018 16:05:23 +0100 Subject: raid: remove tile specific raid6 implementation The Tile architecture is getting removed, so we no longer need this either. Acked-by: Ard Biesheuvel Signed-off-by: Arnd Bergmann --- lib/raid6/Makefile | 6 ---- lib/raid6/algos.c | 3 -- lib/raid6/test/Makefile | 7 ---- lib/raid6/tilegx.uc | 87 ------------------------------------------------- 4 files changed, 103 deletions(-) delete mode 100644 lib/raid6/tilegx.uc (limited to 'lib') diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index 4add700ddfe3..44d6b46df051 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -7,7 +7,6 @@ raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \ raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o avx512.o recov_avx512.o raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o -raid6_pq-$(CONFIG_TILEGX) += tilegx8.o raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o hostprogs-y += mktables @@ -115,11 +114,6 @@ $(obj)/neon8.c: UNROLL := 8 $(obj)/neon8.c: $(src)/neon.uc $(src)/unroll.awk FORCE $(call if_changed,unroll) -targets += tilegx8.c -$(obj)/tilegx8.c: UNROLL := 8 -$(obj)/tilegx8.c: $(src)/tilegx.uc $(src)/unroll.awk FORCE - $(call if_changed,unroll) - targets += s390vx8.c $(obj)/s390vx8.c: UNROLL := 8 $(obj)/s390vx8.c: $(src)/s390vx.uc $(src)/unroll.awk FORCE diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 476994723258..c65aa80d67ed 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -75,9 +75,6 @@ const struct raid6_calls * const raid6_algos[] = { &raid6_altivec4, &raid6_altivec8, #endif -#if defined(CONFIG_TILEGX) - &raid6_tilegx8, -#endif #if defined(CONFIG_S390) &raid6_s390vx8, #endif diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index be1010bdc435..fabc477b1417 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile @@ -51,9 +51,6 @@ else OBJS += altivec1.o altivec2.o altivec4.o altivec8.o endif endif -ifeq ($(ARCH),tilegx) -OBJS += tilegx8.o -endif .c.o: $(CC) $(CFLAGS) -c -o $@ $< @@ -116,15 +113,11 @@ int16.c: int.uc ../unroll.awk int32.c: int.uc ../unroll.awk $(AWK) ../unroll.awk -vN=32 < int.uc > $@ -tilegx8.c: tilegx.uc ../unroll.awk - $(AWK) ../unroll.awk -vN=8 < tilegx.uc > $@ - tables.c: mktables ./mktables > tables.c clean: rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c neon*.c tables.c raid6test - rm -f tilegx*.c spotless: clean rm -f *~ diff --git a/lib/raid6/tilegx.uc b/lib/raid6/tilegx.uc deleted file mode 100644 index 2dd291a11264..000000000000 --- a/lib/raid6/tilegx.uc +++ /dev/null @@ -1,87 +0,0 @@ -/* -*- linux-c -*- ------------------------------------------------------- * - * - * Copyright 2002 H. Peter Anvin - All Rights Reserved - * Copyright 2012 Tilera Corporation - All Rights Reserved - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, Inc., 53 Temple Place Ste 330, - * Boston MA 02111-1307, USA; either version 2 of the License, or - * (at your option) any later version; incorporated herein by reference. - * - * ----------------------------------------------------------------------- */ - -/* - * tilegx$#.c - * - * $#-way unrolled TILE-Gx SIMD for RAID-6 math. - * - * This file is postprocessed using unroll.awk. - * - */ - -#include - -/* Create 8 byte copies of constant byte */ -# define NBYTES(x) (__insn_v1addi(0, x)) -# define NSIZE 8 - -/* - * The SHLBYTE() operation shifts each byte left by 1, *not* - * rolling over into the next byte - */ -static inline __attribute_const__ u64 SHLBYTE(u64 v) -{ - /* Vector One Byte Shift Left Immediate. */ - return __insn_v1shli(v, 1); -} - -/* - * The MASK() operation returns 0xFF in any byte for which the high - * bit is 1, 0x00 for any byte for which the high bit is 0. - */ -static inline __attribute_const__ u64 MASK(u64 v) -{ - /* Vector One Byte Shift Right Signed Immediate. */ - return __insn_v1shrsi(v, 7); -} - - -void raid6_tilegx$#_gen_syndrome(int disks, size_t bytes, void **ptrs) -{ - u8 **dptr = (u8 **)ptrs; - u64 *p, *q; - int d, z, z0; - - u64 wd$$, wq$$, wp$$, w1$$, w2$$; - u64 x1d = NBYTES(0x1d); - u64 * z0ptr; - - z0 = disks - 3; /* Highest data disk */ - p = (u64 *)dptr[z0+1]; /* XOR parity */ - q = (u64 *)dptr[z0+2]; /* RS syndrome */ - - z0ptr = (u64 *)&dptr[z0][0]; - for ( d = 0 ; d < bytes ; d += NSIZE*$# ) { - wq$$ = wp$$ = *z0ptr++; - for ( z = z0-1 ; z >= 0 ; z-- ) { - wd$$ = *(u64 *)&dptr[z][d+$$*NSIZE]; - wp$$ = wp$$ ^ wd$$; - w2$$ = MASK(wq$$); - w1$$ = SHLBYTE(wq$$); - w2$$ = w2$$ & x1d; - w1$$ = w1$$ ^ w2$$; - wq$$ = w1$$ ^ wd$$; - } - *p++ = wp$$; - *q++ = wq$$; - } -} - -const struct raid6_calls raid6_tilegx$# = { - raid6_tilegx$#_gen_syndrome, - NULL, /* XOR not yet implemented */ - NULL, - "tilegx$#", - 0 -}; -- cgit v1.2.3 From e89f5b37015309a8bdf0b21d08007580b92f92a4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 28 Mar 2018 15:35:35 +0200 Subject: dma-mapping: Don't clear GFP_ZERO in dma_alloc_attrs Revert the clearing of __GFP_ZERO in dma_alloc_attrs and move it to dma_direct_alloc for now. While most common architectures always zero dma cohereny allocations (and x86 did so since day one) this is not documented and at least arc and s390 do not zero without the explicit __GFP_ZERO argument. Fixes: 57bf5a8963f8 ("dma-mapping: clear harmful GFP_* flags in common code") Reported-by: Evgeniy Didin Reported-by: Sebastian Ott Signed-off-by: Christoph Hellwig Signed-off-by: Thomas Gleixner Tested-by: Evgeniy Didin Cc: iommu@lists.linux-foundation.org Link: https://lkml.kernel.org/r/20180328133535.17302-2-hch@lst.de --- lib/dma-direct.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib') diff --git a/lib/dma-direct.c b/lib/dma-direct.c index 1277d293d4da..c0bba30fef0a 100644 --- a/lib/dma-direct.c +++ b/lib/dma-direct.c @@ -59,6 +59,9 @@ void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, struct page *page = NULL; void *ret; + /* we always manually zero the memory once we are done: */ + gfp &= ~__GFP_ZERO; + /* GFP_DMA32 and GFP_DMA are no ops without the corresponding zones: */ if (dev->coherent_dma_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS)) gfp |= GFP_DMA; -- cgit v1.2.3 From 5149cbac4235e12a34cf089592a8bd1c9fcfa467 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 30 Mar 2018 17:27:58 -0400 Subject: locking/rwsem: Add DEBUG_RWSEMS to look for lock/unlock mismatches For a rwsem, locking can either be exclusive or shared. The corresponding exclusive or shared unlock must be used. Otherwise, the protected data structures may get corrupted or the lock may be in an inconsistent state. In order to detect such anomaly, a new configuration option DEBUG_RWSEMS is added which can be enabled to look for such mismatches and print warnings that that happens. Signed-off-by: Waiman Long Acked-by: Davidlohr Bueso Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1522445280-7767-2-git-send-email-longman@redhat.com Signed-off-by: Ingo Molnar --- lib/Kconfig.debug | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 64155e310a9f..88650ab5cedb 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1075,6 +1075,13 @@ config DEBUG_WW_MUTEX_SLOWPATH even a debug kernel. If you are a driver writer, enable it. If you are a distro, do not. +config DEBUG_RWSEMS + bool "RW Semaphore debugging: basic checks" + depends on DEBUG_KERNEL && RWSEM_SPIN_ON_OWNER + help + This debugging feature allows mismatched rw semaphore locks and unlocks + to be detected and reported. + config DEBUG_LOCK_ALLOC bool "Lock debugging: detect incorrect freeing of live locks" depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT @@ -1097,6 +1104,7 @@ config PROVE_LOCKING select DEBUG_SPINLOCK select DEBUG_MUTEXES select DEBUG_RT_MUTEXES if RT_MUTEXES + select DEBUG_RWSEMS if RWSEM_SPIN_ON_OWNER select DEBUG_LOCK_ALLOC select TRACE_IRQFLAGS default n -- cgit v1.2.3 From f07cbebb6daf04e5c9721e5be2737a6068c7e2a2 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 30 Mar 2018 17:27:59 -0400 Subject: locking/Kconfig: Add LOCK_DEBUGGING_SUPPORT to make it more readable There are a couples of lock debugging Kconfig options that depends on the following support options: - TRACE_IRQFLAGS_SUPPORT - STACKTRACE_SUPPORT - LOCKDEP_SUPPORT That makes those lock debugging options harder to read and understand. So a new LOCK_DEBUGGING_SUPPORT option is added that is equivalent to the above three options together. That makes the Kconfig.debug file more readable. Signed-off-by: Waiman Long Acked-by: Davidlohr Bueso Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1522445280-7767-3-git-send-email-longman@redhat.com Signed-off-by: Ingo Molnar --- lib/Kconfig.debug | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 88650ab5cedb..ee7ca42e737e 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1034,6 +1034,11 @@ config DEBUG_PREEMPT menu "Lock Debugging (spinlocks, mutexes, etc...)" +config LOCK_DEBUGGING_SUPPORT + bool + depends on TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT + default y + config DEBUG_RT_MUTEXES bool "RT Mutex debugging, deadlock detection" depends on DEBUG_KERNEL && RT_MUTEXES @@ -1060,7 +1065,7 @@ config DEBUG_MUTEXES config DEBUG_WW_MUTEX_SLOWPATH bool "Wait/wound mutex debugging: Slowpath testing" - depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT + depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT select DEBUG_LOCK_ALLOC select DEBUG_SPINLOCK select DEBUG_MUTEXES @@ -1084,7 +1089,7 @@ config DEBUG_RWSEMS config DEBUG_LOCK_ALLOC bool "Lock debugging: detect incorrect freeing of live locks" - depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT + depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT select DEBUG_SPINLOCK select DEBUG_MUTEXES select DEBUG_RT_MUTEXES if RT_MUTEXES @@ -1099,7 +1104,7 @@ config DEBUG_LOCK_ALLOC config PROVE_LOCKING bool "Lock debugging: prove locking correctness" - depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT + depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT select LOCKDEP select DEBUG_SPINLOCK select DEBUG_MUTEXES @@ -1144,7 +1149,7 @@ config PROVE_LOCKING config LOCKDEP bool - depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT + depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT select STACKTRACE select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE && !ARC && !SCORE && !X86 select KALLSYMS @@ -1155,7 +1160,7 @@ config LOCKDEP_SMALL config LOCK_STAT bool "Lock usage statistics" - depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT + depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT select LOCKDEP select DEBUG_SPINLOCK select DEBUG_MUTEXES -- cgit v1.2.3 From 19193bcad8dced863f2f720b1a76110bda07c970 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 30 Mar 2018 17:28:00 -0400 Subject: locking/Kconfig: Restructure the lock debugging menu Two config options in the lock debugging menu that are probably the most frequently used, as far as I am concerned, is the PROVE_LOCKING and LOCK_STAT. From a UI perspective, they should be front and center. So these two options are now moved to the top of the lock debugging menu. The DEBUG_WW_MUTEX_SLOWPATH option is also added to the PROVE_LOCKING umbrella. Signed-off-by: Waiman Long Acked-by: Davidlohr Bueso Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1522445280-7767-4-git-send-email-longman@redhat.com Signed-off-by: Ingo Molnar --- lib/Kconfig.debug | 135 +++++++++++++++++++++++++++--------------------------- 1 file changed, 68 insertions(+), 67 deletions(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index ee7ca42e737e..4f7b3a11eb4d 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1039,6 +1039,74 @@ config LOCK_DEBUGGING_SUPPORT depends on TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT default y +config PROVE_LOCKING + bool "Lock debugging: prove locking correctness" + depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT + select LOCKDEP + select DEBUG_SPINLOCK + select DEBUG_MUTEXES + select DEBUG_RT_MUTEXES if RT_MUTEXES + select DEBUG_RWSEMS if RWSEM_SPIN_ON_OWNER + select DEBUG_WW_MUTEX_SLOWPATH + select DEBUG_LOCK_ALLOC + select TRACE_IRQFLAGS + default n + help + This feature enables the kernel to prove that all locking + that occurs in the kernel runtime is mathematically + correct: that under no circumstance could an arbitrary (and + not yet triggered) combination of observed locking + sequences (on an arbitrary number of CPUs, running an + arbitrary number of tasks and interrupt contexts) cause a + deadlock. + + In short, this feature enables the kernel to report locking + related deadlocks before they actually occur. + + The proof does not depend on how hard and complex a + deadlock scenario would be to trigger: how many + participant CPUs, tasks and irq-contexts would be needed + for it to trigger. The proof also does not depend on + timing: if a race and a resulting deadlock is possible + theoretically (no matter how unlikely the race scenario + is), it will be proven so and will immediately be + reported by the kernel (once the event is observed that + makes the deadlock theoretically possible). + + If a deadlock is impossible (i.e. the locking rules, as + observed by the kernel, are mathematically correct), the + kernel reports nothing. + + NOTE: this feature can also be enabled for rwlocks, mutexes + and rwsems - in which case all dependencies between these + different locking variants are observed and mapped too, and + the proof of observed correctness is also maintained for an + arbitrary combination of these separate locking variants. + + For more details, see Documentation/locking/lockdep-design.txt. + +config LOCK_STAT + bool "Lock usage statistics" + depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT + select LOCKDEP + select DEBUG_SPINLOCK + select DEBUG_MUTEXES + select DEBUG_RT_MUTEXES if RT_MUTEXES + select DEBUG_LOCK_ALLOC + default n + help + This feature enables tracking lock contention points + + For more details, see Documentation/locking/lockstat.txt + + This also enables lock events required by "perf lock", + subcommand of perf. + If you want to use "perf lock", you also need to turn on + CONFIG_EVENT_TRACING. + + CONFIG_LOCK_STAT defines "contended" and "acquired" lock events. + (CONFIG_LOCKDEP defines "acquire" and "release" events.) + config DEBUG_RT_MUTEXES bool "RT Mutex debugging, deadlock detection" depends on DEBUG_KERNEL && RT_MUTEXES @@ -1102,51 +1170,6 @@ config DEBUG_LOCK_ALLOC spin_lock_init()/mutex_init()/etc., or whether there is any lock held during task exit. -config PROVE_LOCKING - bool "Lock debugging: prove locking correctness" - depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT - select LOCKDEP - select DEBUG_SPINLOCK - select DEBUG_MUTEXES - select DEBUG_RT_MUTEXES if RT_MUTEXES - select DEBUG_RWSEMS if RWSEM_SPIN_ON_OWNER - select DEBUG_LOCK_ALLOC - select TRACE_IRQFLAGS - default n - help - This feature enables the kernel to prove that all locking - that occurs in the kernel runtime is mathematically - correct: that under no circumstance could an arbitrary (and - not yet triggered) combination of observed locking - sequences (on an arbitrary number of CPUs, running an - arbitrary number of tasks and interrupt contexts) cause a - deadlock. - - In short, this feature enables the kernel to report locking - related deadlocks before they actually occur. - - The proof does not depend on how hard and complex a - deadlock scenario would be to trigger: how many - participant CPUs, tasks and irq-contexts would be needed - for it to trigger. The proof also does not depend on - timing: if a race and a resulting deadlock is possible - theoretically (no matter how unlikely the race scenario - is), it will be proven so and will immediately be - reported by the kernel (once the event is observed that - makes the deadlock theoretically possible). - - If a deadlock is impossible (i.e. the locking rules, as - observed by the kernel, are mathematically correct), the - kernel reports nothing. - - NOTE: this feature can also be enabled for rwlocks, mutexes - and rwsems - in which case all dependencies between these - different locking variants are observed and mapped too, and - the proof of observed correctness is also maintained for an - arbitrary combination of these separate locking variants. - - For more details, see Documentation/locking/lockdep-design.txt. - config LOCKDEP bool depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT @@ -1158,28 +1181,6 @@ config LOCKDEP config LOCKDEP_SMALL bool -config LOCK_STAT - bool "Lock usage statistics" - depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT - select LOCKDEP - select DEBUG_SPINLOCK - select DEBUG_MUTEXES - select DEBUG_RT_MUTEXES if RT_MUTEXES - select DEBUG_LOCK_ALLOC - default n - help - This feature enables tracking lock contention points - - For more details, see Documentation/locking/lockstat.txt - - This also enables lock events required by "perf lock", - subcommand of perf. - If you want to use "perf lock", you also need to turn on - CONFIG_EVENT_TRACING. - - CONFIG_LOCK_STAT defines "contended" and "acquired" lock events. - (CONFIG_LOCKDEP defines "acquire" and "release" events.) - config DEBUG_LOCKDEP bool "Lock dependency engine debugging" depends on DEBUG_KERNEL && LOCKDEP -- cgit v1.2.3