From 3aaabbf1c39effa2ac0c11103ed07ef03b0a0d89 Mon Sep 17 00:00:00 2001 From: Miles Chen Date: Fri, 17 Nov 2017 15:26:19 -0800 Subject: lib/dma-debug.c: fix incorrect pfn calculation dma-debug reports the following warning: WARNING: CPU: 3 PID: 298 at kernel-4.4/lib/dma-debug.c:604 debug _dma_assert_idle+0x1a8/0x230() DMA-API: cpu touching an active dma mapped cacheline [cln=0x00000882300] CPU: 3 PID: 298 Comm: vold Tainted: G W O 4.4.22+ #1 Hardware name: MT6739 (DT) Call trace: debug_dma_assert_idle+0x1a8/0x230 wp_page_copy.isra.96+0x118/0x520 do_wp_page+0x4fc/0x534 handle_mm_fault+0xd4c/0x1310 do_page_fault+0x1c8/0x394 do_mem_abort+0x50/0xec I found that debug_dma_alloc_coherent() and debug_dma_free_coherent() assume that dma_alloc_coherent() always returns a linear address. However it's possible that dma_alloc_coherent() returns a non-linear address. In this case, page_to_pfn(virt_to_page(virt)) will return an incorrect pfn. If the pfn is valid and mapped as a COW page, we will hit the warning when doing wp_page_copy(). Fix this by calculating pfn for linear and non-linear addresses. [miles.chen@mediatek.com: v4] Link: http://lkml.kernel.org/r/1510872972-23919-1-git-send-email-miles.chen@mediatek.com Link: http://lkml.kernel.org/r/1506484087-1177-1-git-send-email-miles.chen@mediatek.com Signed-off-by: Miles Chen Reviewed-by: Robin Murphy Cc: Christoph Hellwig Cc: Marek Szyprowski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/dma-debug.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/dma-debug.c b/lib/dma-debug.c index ea4cc3dde4f1..1b34d210452c 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -1495,14 +1495,22 @@ void debug_dma_alloc_coherent(struct device *dev, size_t size, if (!entry) return; + /* handle vmalloc and linear addresses */ + if (!is_vmalloc_addr(virt) && !virt_to_page(virt)) + return; + entry->type = dma_debug_coherent; entry->dev = dev; - entry->pfn = page_to_pfn(virt_to_page(virt)); entry->offset = offset_in_page(virt); entry->size = size; entry->dev_addr = dma_addr; entry->direction = DMA_BIDIRECTIONAL; + if (is_vmalloc_addr(virt)) + entry->pfn = vmalloc_to_pfn(virt); + else + entry->pfn = page_to_pfn(virt_to_page(virt)); + add_dma_entry(entry); } EXPORT_SYMBOL(debug_dma_alloc_coherent); @@ -1513,13 +1521,21 @@ void debug_dma_free_coherent(struct device *dev, size_t size, struct dma_debug_entry ref = { .type = dma_debug_coherent, .dev = dev, - .pfn = page_to_pfn(virt_to_page(virt)), .offset = offset_in_page(virt), .dev_addr = addr, .size = size, .direction = DMA_BIDIRECTIONAL, }; + /* handle vmalloc and linear addresses */ + if (!is_vmalloc_addr(virt) && !virt_to_page(virt)) + return; + + if (is_vmalloc_addr(virt)) + ref.pfn = vmalloc_to_pfn(virt); + else + ref.pfn = page_to_pfn(virt_to_page(virt)); + if (unlikely(dma_debug_disabled())) return; -- cgit v1.2.3 From aaf5dcfb223617ac2d16113e4b500199c65689de Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 17 Nov 2017 15:27:06 -0800 Subject: kernel debug: support resetting WARN_ONCE for all architectures Some architectures store the WARN_ONCE state in the flags field of the bug_entry. Clear that one too when resetting once state through /sys/kernel/debug/clear_warn_once Pointed out by Michael Ellerman Improves the earlier patch that add clear_warn_once. [ak@linux.intel.com: add a missing ifdef CONFIG_MODULES] Link: http://lkml.kernel.org/r/20171020170633.9593-1-andi@firstfloor.org [akpm@linux-foundation.org: fix unused var warning] [akpm@linux-foundation.org: Use 0200 for clear_warn_once file, per mpe] [akpm@linux-foundation.org: clear BUGFLAG_DONE in clear_once_table(), per mpe] Link: http://lkml.kernel.org/r/20171019204642.7404-1-andi@firstfloor.org Signed-off-by: Andi Kleen Tested-by: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/bug.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'lib') diff --git a/lib/bug.c b/lib/bug.c index 1e094408c893..f66be6bf6206 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -196,3 +196,26 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) return BUG_TRAP_TYPE_BUG; } + +static void clear_once_table(struct bug_entry *start, struct bug_entry *end) +{ + struct bug_entry *bug; + + for (bug = start; bug < end; bug++) + bug->flags &= ~BUGFLAG_DONE; +} + +void generic_bug_clear_once(void) +{ +#ifdef CONFIG_MODULES + struct module *mod; + + rcu_read_lock_sched(); + list_for_each_entry_rcu(mod, &module_bug_list, bug_list) + clear_once_table(mod->bug_table, + mod->bug_table + mod->num_bugs); + rcu_read_unlock_sched(); +#endif + + clear_once_table(__start___bug_table, __stop___bug_table); +} -- cgit v1.2.3 From 2a8358d8a339540f00ec596526690e8eeca931a3 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 17 Nov 2017 15:27:21 -0800 Subject: bug: define the "cut here" string in a single place The "cut here" string is used in a few paths. Define it in a single place. Link: http://lkml.kernel.org/r/1510100869-73751-3-git-send-email-keescook@chromium.org Signed-off-by: Kees Cook Cc: Arnd Bergmann Cc: Fengguang Wu Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Peter Zijlstra (Intel) Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/bug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/bug.c b/lib/bug.c index f66be6bf6206..c1b0fad31b10 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -186,7 +186,7 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) return BUG_TRAP_TYPE_WARN; } - printk(KERN_DEFAULT "------------[ cut here ]------------\n"); + printk(KERN_DEFAULT CUT_HERE); if (file) pr_crit("kernel BUG at %s:%u!\n", file, line); -- cgit v1.2.3 From 1f3c790bd5989fcfec9e53ad8fa09f5b740c958f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 17 Nov 2017 15:27:35 -0800 Subject: dynamic-debug-howto: fix optional/omitted ending line number to be LARGE instead of 0 line-range is supposed to treat "1-" as "1-endoffile", so handle the special case by setting last_lineno to UINT_MAX. Fixes this error: dynamic_debug:ddebug_parse_query: last-line:0 < 1st-line:1 dynamic_debug:ddebug_exec_query: query parse failed Link: http://lkml.kernel.org/r/10a6a101-e2be-209f-1f41-54637824788e@infradead.org Signed-off-by: Randy Dunlap Acked-by: Jason Baron Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/dynamic_debug.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib') diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index da796e2dc4f5..c7c96bc7654a 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -360,6 +360,10 @@ static int ddebug_parse_query(char *words[], int nwords, if (parse_lineno(last, &query->last_lineno) < 0) return -EINVAL; + /* special case for last lineno not specified */ + if (query->last_lineno == 0) + query->last_lineno = UINT_MAX; + if (query->last_lineno < query->first_lineno) { pr_err("last-line:%d < 1st-line:%d\n", query->last_lineno, -- cgit v1.2.3 From d6b28e0996962aeadd3777ae565ae03dd5c59f18 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 17 Nov 2017 15:27:56 -0800 Subject: lib: add module support to string tests Extract the string test code into its own source file, to allow compiling it either to a loadable module, or built into the kernel. Fixes: 03270c13c5ffaa6a ("lib/string.c: add testcases for memset16/32/64") Link: http://lkml.kernel.org/r/1505397744-3387-1-git-send-email-geert@linux-m68k.org Signed-off-by: Geert Uytterhoeven Cc: Matthew Wilcox Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig | 2 +- lib/Makefile | 1 + lib/string.c | 141 ------------------------------------------------------ lib/test_string.c | 141 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 143 insertions(+), 142 deletions(-) create mode 100644 lib/test_string.c (limited to 'lib') diff --git a/lib/Kconfig b/lib/Kconfig index a2b6745324ab..368972f0db78 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -584,7 +584,7 @@ config PRIME_NUMBERS tristate config STRING_SELFTEST - bool "Test string functions" + tristate "Test string functions" endmenu diff --git a/lib/Makefile b/lib/Makefile index 136a0b254564..e3cb2a241338 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -40,6 +40,7 @@ obj-y += bcd.o div64.o sort.o parser.o debug_locks.o random32.o \ bsearch.o find_bit.o llist.o memweight.o kfifo.o \ percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o \ once.o refcount.o usercopy.o errseq.o +obj-$(CONFIG_STRING_SELFTEST) += test_string.o obj-y += string_helpers.o obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o obj-y += hexdump.o diff --git a/lib/string.c b/lib/string.c index 5e8d410a93df..64a9e33f1daa 100644 --- a/lib/string.c +++ b/lib/string.c @@ -1052,144 +1052,3 @@ void fortify_panic(const char *name) BUG(); } EXPORT_SYMBOL(fortify_panic); - -#ifdef CONFIG_STRING_SELFTEST -#include -#include - -static __init int memset16_selftest(void) -{ - unsigned i, j, k; - u16 v, *p; - - p = kmalloc(256 * 2 * 2, GFP_KERNEL); - if (!p) - return -1; - - for (i = 0; i < 256; i++) { - for (j = 0; j < 256; j++) { - memset(p, 0xa1, 256 * 2 * sizeof(v)); - memset16(p + i, 0xb1b2, j); - for (k = 0; k < 512; k++) { - v = p[k]; - if (k < i) { - if (v != 0xa1a1) - goto fail; - } else if (k < i + j) { - if (v != 0xb1b2) - goto fail; - } else { - if (v != 0xa1a1) - goto fail; - } - } - } - } - -fail: - kfree(p); - if (i < 256) - return (i << 24) | (j << 16) | k; - return 0; -} - -static __init int memset32_selftest(void) -{ - unsigned i, j, k; - u32 v, *p; - - p = kmalloc(256 * 2 * 4, GFP_KERNEL); - if (!p) - return -1; - - for (i = 0; i < 256; i++) { - for (j = 0; j < 256; j++) { - memset(p, 0xa1, 256 * 2 * sizeof(v)); - memset32(p + i, 0xb1b2b3b4, j); - for (k = 0; k < 512; k++) { - v = p[k]; - if (k < i) { - if (v != 0xa1a1a1a1) - goto fail; - } else if (k < i + j) { - if (v != 0xb1b2b3b4) - goto fail; - } else { - if (v != 0xa1a1a1a1) - goto fail; - } - } - } - } - -fail: - kfree(p); - if (i < 256) - return (i << 24) | (j << 16) | k; - return 0; -} - -static __init int memset64_selftest(void) -{ - unsigned i, j, k; - u64 v, *p; - - p = kmalloc(256 * 2 * 8, GFP_KERNEL); - if (!p) - return -1; - - for (i = 0; i < 256; i++) { - for (j = 0; j < 256; j++) { - memset(p, 0xa1, 256 * 2 * sizeof(v)); - memset64(p + i, 0xb1b2b3b4b5b6b7b8ULL, j); - for (k = 0; k < 512; k++) { - v = p[k]; - if (k < i) { - if (v != 0xa1a1a1a1a1a1a1a1ULL) - goto fail; - } else if (k < i + j) { - if (v != 0xb1b2b3b4b5b6b7b8ULL) - goto fail; - } else { - if (v != 0xa1a1a1a1a1a1a1a1ULL) - goto fail; - } - } - } - } - -fail: - kfree(p); - if (i < 256) - return (i << 24) | (j << 16) | k; - return 0; -} - -static __init int string_selftest_init(void) -{ - int test, subtest; - - test = 1; - subtest = memset16_selftest(); - if (subtest) - goto fail; - - test = 2; - subtest = memset32_selftest(); - if (subtest) - goto fail; - - test = 3; - subtest = memset64_selftest(); - if (subtest) - goto fail; - - pr_info("String selftests succeeded\n"); - return 0; -fail: - pr_crit("String selftest failure %d.%08x\n", test, subtest); - return 0; -} - -module_init(string_selftest_init); -#endif /* CONFIG_STRING_SELFTEST */ diff --git a/lib/test_string.c b/lib/test_string.c new file mode 100644 index 000000000000..0fcdb82dca86 --- /dev/null +++ b/lib/test_string.c @@ -0,0 +1,141 @@ +#include +#include +#include +#include + +static __init int memset16_selftest(void) +{ + unsigned i, j, k; + u16 v, *p; + + p = kmalloc(256 * 2 * 2, GFP_KERNEL); + if (!p) + return -1; + + for (i = 0; i < 256; i++) { + for (j = 0; j < 256; j++) { + memset(p, 0xa1, 256 * 2 * sizeof(v)); + memset16(p + i, 0xb1b2, j); + for (k = 0; k < 512; k++) { + v = p[k]; + if (k < i) { + if (v != 0xa1a1) + goto fail; + } else if (k < i + j) { + if (v != 0xb1b2) + goto fail; + } else { + if (v != 0xa1a1) + goto fail; + } + } + } + } + +fail: + kfree(p); + if (i < 256) + return (i << 24) | (j << 16) | k; + return 0; +} + +static __init int memset32_selftest(void) +{ + unsigned i, j, k; + u32 v, *p; + + p = kmalloc(256 * 2 * 4, GFP_KERNEL); + if (!p) + return -1; + + for (i = 0; i < 256; i++) { + for (j = 0; j < 256; j++) { + memset(p, 0xa1, 256 * 2 * sizeof(v)); + memset32(p + i, 0xb1b2b3b4, j); + for (k = 0; k < 512; k++) { + v = p[k]; + if (k < i) { + if (v != 0xa1a1a1a1) + goto fail; + } else if (k < i + j) { + if (v != 0xb1b2b3b4) + goto fail; + } else { + if (v != 0xa1a1a1a1) + goto fail; + } + } + } + } + +fail: + kfree(p); + if (i < 256) + return (i << 24) | (j << 16) | k; + return 0; +} + +static __init int memset64_selftest(void) +{ + unsigned i, j, k; + u64 v, *p; + + p = kmalloc(256 * 2 * 8, GFP_KERNEL); + if (!p) + return -1; + + for (i = 0; i < 256; i++) { + for (j = 0; j < 256; j++) { + memset(p, 0xa1, 256 * 2 * sizeof(v)); + memset64(p + i, 0xb1b2b3b4b5b6b7b8ULL, j); + for (k = 0; k < 512; k++) { + v = p[k]; + if (k < i) { + if (v != 0xa1a1a1a1a1a1a1a1ULL) + goto fail; + } else if (k < i + j) { + if (v != 0xb1b2b3b4b5b6b7b8ULL) + goto fail; + } else { + if (v != 0xa1a1a1a1a1a1a1a1ULL) + goto fail; + } + } + } + } + +fail: + kfree(p); + if (i < 256) + return (i << 24) | (j << 16) | k; + return 0; +} + +static __init int string_selftest_init(void) +{ + int test, subtest; + + test = 1; + subtest = memset16_selftest(); + if (subtest) + goto fail; + + test = 2; + subtest = memset32_selftest(); + if (subtest) + goto fail; + + test = 3; + subtest = memset64_selftest(); + if (subtest) + goto fail; + + pr_info("String selftests succeeded\n"); + return 0; +fail: + pr_crit("String selftest failure %d.%08x\n", test, subtest); + return 0; +} + +module_init(string_selftest_init); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From dc2bf000a2848cf1dee373db14ce2cd1fe3ee394 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Fri, 17 Nov 2017 15:28:00 -0800 Subject: lib/test: delete five error messages for failed memory allocations Omit extra messages for a memory allocation failure in these functions. This issue was detected by using the Coccinelle software. Link: http://lkml.kernel.org/r/410a4c5a-4ee0-6fcc-969c-103d8e496b78@users.sourceforge.net Signed-off-by: Markus Elfring Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_kasan.c | 5 ++--- lib/test_kmod.c | 8 ++------ lib/test_list_sort.c | 9 +++------ 3 files changed, 7 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/test_kasan.c b/lib/test_kasan.c index a25c9763fce1..ef1a3ac1397e 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -353,10 +353,9 @@ static noinline void __init memcg_accounted_kmem_cache(void) */ for (i = 0; i < 5; i++) { p = kmem_cache_alloc(cache, GFP_KERNEL); - if (!p) { - pr_err("Allocation failed\n"); + if (!p) goto free_cache; - } + kmem_cache_free(cache, p); msleep(100); } diff --git a/lib/test_kmod.c b/lib/test_kmod.c index fba78d25e825..337f408b4de6 100644 --- a/lib/test_kmod.c +++ b/lib/test_kmod.c @@ -783,10 +783,8 @@ static int kmod_config_sync_info(struct kmod_test_device *test_dev) free_test_dev_info(test_dev); test_dev->info = vzalloc(config->num_threads * sizeof(struct kmod_test_device_info)); - if (!test_dev->info) { - dev_err(test_dev->dev, "Cannot alloc test_dev info\n"); + if (!test_dev->info) return -ENOMEM; - } return 0; } @@ -1089,10 +1087,8 @@ static struct kmod_test_device *alloc_test_dev_kmod(int idx) struct miscdevice *misc_dev; test_dev = vzalloc(sizeof(struct kmod_test_device)); - if (!test_dev) { - pr_err("Cannot alloc test_dev\n"); + if (!test_dev) goto err_out; - } mutex_init(&test_dev->config_mutex); mutex_init(&test_dev->trigger_mutex); diff --git a/lib/test_list_sort.c b/lib/test_list_sort.c index 28e817387b04..5474f3f3e41d 100644 --- a/lib/test_list_sort.c +++ b/lib/test_list_sort.c @@ -76,17 +76,14 @@ static int __init list_sort_test(void) pr_debug("start testing list_sort()\n"); elts = kcalloc(TEST_LIST_LEN, sizeof(*elts), GFP_KERNEL); - if (!elts) { - pr_err("error: cannot allocate memory\n"); + if (!elts) return err; - } for (i = 0; i < TEST_LIST_LEN; i++) { el = kmalloc(sizeof(*el), GFP_KERNEL); - if (!el) { - pr_err("error: cannot allocate memory\n"); + if (!el) goto exit; - } + /* force some equivalencies */ el->value = prandom_u32() % (TEST_LIST_LEN / 3); el->serial = i; -- cgit v1.2.3 From 3f3295709edea6268ff1609855f498035286af73 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 17 Nov 2017 15:28:04 -0800 Subject: lib/int_sqrt: optimize small argument The current int_sqrt() computation is sub-optimal for the case of small @x. Which is the interesting case when we're going to do cumulative distribution functions on idle times, which we assume to be a random variable, where the target residency of the deepest idle state gives an upper bound on the variable (5e6ns on recent Intel chips). In the case of small @x, the compute loop: while (m != 0) { b = y + m; y >>= 1; if (x >= b) { x -= b; y += m; } m >>= 2; } can be reduced to: while (m > x) m >>= 2; Because y==0, b==m and until x>=m y will remain 0. And while this is computationally equivalent, it runs much faster because there's less code, in particular less branches. cycles: branches: branch-misses: OLD: hot: 45.109444 +- 0.044117 44.333392 +- 0.002254 0.018723 +- 0.000593 cold: 187.737379 +- 0.156678 44.333407 +- 0.002254 6.272844 +- 0.004305 PRE: hot: 67.937492 +- 0.064124 66.999535 +- 0.000488 0.066720 +- 0.001113 cold: 232.004379 +- 0.332811 66.999527 +- 0.000488 6.914634 +- 0.006568 POST: hot: 43.633557 +- 0.034373 45.333132 +- 0.002277 0.023529 +- 0.000681 cold: 207.438411 +- 0.125840 45.333132 +- 0.002277 6.976486 +- 0.004219 Averages computed over all values <128k using a LFSR to generate order. Cold numbers have a LFSR based branch trace buffer 'confuser' ran between each int_sqrt() invocation. Link: http://lkml.kernel.org/r/20171020164644.876503355@infradead.org Fixes: 30493cc9dddb ("lib/int_sqrt.c: optimize square root algorithm") Signed-off-by: Peter Zijlstra (Intel) Suggested-by: Anshul Garg Acked-by: Linus Torvalds Cc: Davidlohr Bueso Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Will Deacon Cc: Joe Perches Cc: David Miller Cc: Matthew Wilcox Cc: Kees Cook Cc: Michael Davidson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/int_sqrt.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib') diff --git a/lib/int_sqrt.c b/lib/int_sqrt.c index db0b5aa071fc..036c96781ea8 100644 --- a/lib/int_sqrt.c +++ b/lib/int_sqrt.c @@ -23,6 +23,9 @@ unsigned long int_sqrt(unsigned long x) return x; m = 1UL << (BITS_PER_LONG - 2); + while (m > x) + m >>= 2; + while (m != 0) { b = y + m; y >>= 1; -- cgit v1.2.3 From f8ae107eef209bff29a5816bc1aad40d5cd69a80 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 17 Nov 2017 15:28:08 -0800 Subject: lib/int_sqrt: optimize initial value compute The initial value (@m) compute is: m = 1UL << (BITS_PER_LONG - 2); while (m > x) m >>= 2; Which is a linear search for the highest even bit smaller or equal to @x We can implement this using a binary search using __fls() (or better when its hardware implemented). m = 1UL << (__fls(x) & ~1UL); Especially for small values of @x; which are the more common arguments when doing a CDF on idle times; the linear search is near to worst case, while the binary search of __fls() is a constant 6 (or 5 on 32bit) branches. cycles: branches: branch-misses: PRE: hot: 43.633557 +- 0.034373 45.333132 +- 0.002277 0.023529 +- 0.000681 cold: 207.438411 +- 0.125840 45.333132 +- 0.002277 6.976486 +- 0.004219 SOFTWARE FLS: hot: 29.576176 +- 0.028850 26.666730 +- 0.004511 0.019463 +- 0.000663 cold: 165.947136 +- 0.188406 26.666746 +- 0.004511 6.133897 +- 0.004386 HARDWARE FLS: hot: 24.720922 +- 0.025161 20.666784 +- 0.004509 0.020836 +- 0.000677 cold: 132.777197 +- 0.127471 20.666776 +- 0.004509 5.080285 +- 0.003874 Averages computed over all values <128k using a LFSR to generate order. Cold numbers have a LFSR based branch trace buffer 'confuser' ran between each int_sqrt() invocation. Link: http://lkml.kernel.org/r/20171020164644.936577234@infradead.org Signed-off-by: Peter Zijlstra (Intel) Suggested-by: Joe Perches Acked-by: Will Deacon Acked-by: Linus Torvalds Cc: Anshul Garg Cc: Davidlohr Bueso Cc: David Miller Cc: Ingo Molnar Cc: Kees Cook Cc: Matthew Wilcox Cc: Michael Davidson Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/int_sqrt.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/int_sqrt.c b/lib/int_sqrt.c index 036c96781ea8..67bb300b5b46 100644 --- a/lib/int_sqrt.c +++ b/lib/int_sqrt.c @@ -8,6 +8,7 @@ #include #include +#include /** * int_sqrt - rough approximation to sqrt @@ -22,10 +23,7 @@ unsigned long int_sqrt(unsigned long x) if (x <= 1) return x; - m = 1UL << (BITS_PER_LONG - 2); - while (m > x) - m >>= 2; - + m = 1UL << (__fls(x) & ~1UL); while (m != 0) { b = y + m; y >>= 1; -- cgit v1.2.3 From e813a614007e3a8a7b53d976e86d9a20f21f81ad Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 17 Nov 2017 15:28:12 -0800 Subject: lib/int_sqrt: adjust comments Our current int_sqrt() is not rough nor any approximation; it calculates the exact value of: floor(sqrt()). Document this. Link: http://lkml.kernel.org/r/20171020164645.001652117@infradead.org Signed-off-by: Peter Zijlstra (Intel) Acked-by: Linus Torvalds Cc: Anshul Garg Cc: Davidlohr Bueso Cc: David Miller Cc: Ingo Molnar Cc: Joe Perches Cc: Kees Cook Cc: Matthew Wilcox Cc: Michael Davidson Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/int_sqrt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/int_sqrt.c b/lib/int_sqrt.c index 67bb300b5b46..e2d329099bf7 100644 --- a/lib/int_sqrt.c +++ b/lib/int_sqrt.c @@ -11,10 +11,10 @@ #include /** - * int_sqrt - rough approximation to sqrt + * int_sqrt - computes the integer square root * @x: integer of which to calculate the sqrt * - * A very rough approximation to the sqrt() function. + * Computes: floor(sqrt(x)) */ unsigned long int_sqrt(unsigned long x) { -- cgit v1.2.3 From 36a3d1dd4e16bcd0d2ddfb4a2ec7092f0ae0d931 Mon Sep 17 00:00:00 2001 From: Stephen Bates Date: Fri, 17 Nov 2017 15:28:16 -0800 Subject: lib/genalloc.c: make the avail variable an atomic_long_t If the amount of resources allocated to a gen_pool exceeds 2^32 then the avail atomic overflows and this causes problems when clients try and borrow resources from the pool. This is only expected to be an issue on 64 bit systems. Add the header to pull in atomic_long* operations. So that 32 bit systems continue to use atomic32_t but 64 bit systems can use atomic64_t. Link: http://lkml.kernel.org/r/1509033843-25667-1-git-send-email-sbates@raithlin.com Signed-off-by: Stephen Bates Reviewed-by: Logan Gunthorpe Reviewed-by: Mathieu Desnoyers Reviewed-by: Daniel Mentz Cc: Jonathan Corbet Cc: Andrew Morton Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/genalloc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/genalloc.c b/lib/genalloc.c index 144fe6b1a03e..ca06adc4f445 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -194,7 +194,7 @@ int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phy chunk->phys_addr = phys; chunk->start_addr = virt; chunk->end_addr = virt + size - 1; - atomic_set(&chunk->avail, size); + atomic_long_set(&chunk->avail, size); spin_lock(&pool->lock); list_add_rcu(&chunk->next_chunk, &pool->chunks); @@ -304,7 +304,7 @@ unsigned long gen_pool_alloc_algo(struct gen_pool *pool, size_t size, nbits = (size + (1UL << order) - 1) >> order; rcu_read_lock(); list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) { - if (size > atomic_read(&chunk->avail)) + if (size > atomic_long_read(&chunk->avail)) continue; start_bit = 0; @@ -324,7 +324,7 @@ retry: addr = chunk->start_addr + ((unsigned long)start_bit << order); size = nbits << order; - atomic_sub(size, &chunk->avail); + atomic_long_sub(size, &chunk->avail); break; } rcu_read_unlock(); @@ -390,7 +390,7 @@ void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) remain = bitmap_clear_ll(chunk->bits, start_bit, nbits); BUG_ON(remain); size = nbits << order; - atomic_add(size, &chunk->avail); + atomic_long_add(size, &chunk->avail); rcu_read_unlock(); return; } @@ -464,7 +464,7 @@ size_t gen_pool_avail(struct gen_pool *pool) rcu_read_lock(); list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) - avail += atomic_read(&chunk->avail); + avail += atomic_long_read(&chunk->avail); rcu_read_unlock(); return avail; } -- cgit v1.2.3 From 2f9b7e08cb27d6d8d4579bb5301fb0940ff63d19 Mon Sep 17 00:00:00 2001 From: "Liu, Changcheng" Date: Fri, 17 Nov 2017 15:28:20 -0800 Subject: lib/nmi_backtrace.c: fix kernel text address leak Don't leak idle function address in NMI backtrace. Link: http://lkml.kernel.org/r/20171106165648.GA95243@sofia Signed-off-by: Liu Changcheng Reviewed-by: Petr Mladek Reviewed-by: Josh Poimboeuf Reviewed-by: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/nmi_backtrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c index 46e4c749e4eb..61a6b5aab07e 100644 --- a/lib/nmi_backtrace.c +++ b/lib/nmi_backtrace.c @@ -93,8 +93,8 @@ bool nmi_cpu_backtrace(struct pt_regs *regs) if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { arch_spin_lock(&lock); if (regs && cpu_in_idle(instruction_pointer(regs))) { - pr_warn("NMI backtrace for cpu %d skipped: idling at pc %#lx\n", - cpu, instruction_pointer(regs)); + pr_warn("NMI backtrace for cpu %d skipped: idling at %pS\n", + cpu, (void *)instruction_pointer(regs)); } else { pr_warn("NMI backtrace for cpu %d\n", cpu); if (regs) -- cgit v1.2.3 From 0b548e33e6cb2bff240fdaf1783783be15c29080 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Fri, 17 Nov 2017 15:28:27 -0800 Subject: lib/rbtree-test: lower default params Fengguang reported soft lockups while running the rbtree and interval tree test modules. The logic for these tests all occur in init phase, and we currently are pounding with the default values for number of nodes and number of iterations of each test. Reduce the latter by two orders of magnitude. This does not influence the value of the tests in that one thousand times by default is enough to get the picture. Link: http://lkml.kernel.org/r/20171109161715.xai2dtwqw2frhkcm@linux-n805 Signed-off-by: Davidlohr Bueso Reported-by: Fengguang Wu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/interval_tree_test.c | 4 ++-- lib/rbtree_test.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/interval_tree_test.c b/lib/interval_tree_test.c index 0e343fd29570..835242e74aaa 100644 --- a/lib/interval_tree_test.c +++ b/lib/interval_tree_test.c @@ -11,10 +11,10 @@ MODULE_PARM_DESC(name, msg); __param(int, nnodes, 100, "Number of nodes in the interval tree"); -__param(int, perf_loops, 100000, "Number of iterations modifying the tree"); +__param(int, perf_loops, 1000, "Number of iterations modifying the tree"); __param(int, nsearches, 100, "Number of searches to the interval tree"); -__param(int, search_loops, 10000, "Number of iterations searching the tree"); +__param(int, search_loops, 1000, "Number of iterations searching the tree"); __param(bool, search_all, false, "Searches will iterate all nodes in the tree"); __param(uint, max_endpoint, ~0, "Largest value for the interval's endpoint"); diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c index 191a238e5a9d..7d36c1e27ff6 100644 --- a/lib/rbtree_test.c +++ b/lib/rbtree_test.c @@ -11,7 +11,7 @@ MODULE_PARM_DESC(name, msg); __param(int, nnodes, 100, "Number of nodes in the rb-tree"); -__param(int, perf_loops, 100000, "Number of iterations modifying the rb-tree"); +__param(int, perf_loops, 1000, "Number of iterations modifying the rb-tree"); __param(int, check_loops, 100, "Number of iterations modifying and verifying the rb-tree"); struct test_node { -- cgit v1.2.3 From 4441fca0a27f5f0e2c652584ae9d7abec6255c1f Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Fri, 17 Nov 2017 15:28:31 -0800 Subject: lib: test module for find_*_bit() functions find_bit functions are widely used in the kernel, including hot paths. This module tests performance of those functions in 2 typical scenarios: randomly filled bitmap with relatively equal distribution of set and cleared bits, and sparse bitmap which has 1 set bit for 500 cleared bits. On ThunderX machine: Start testing find_bit() with random-filled bitmap find_next_bit: 240043 cycles, 164062 iterations find_next_zero_bit: 312848 cycles, 163619 iterations find_last_bit: 193748 cycles, 164062 iterations find_first_bit: 177720874 cycles, 164062 iterations Start testing find_bit() with sparse bitmap find_next_bit: 3633 cycles, 656 iterations find_next_zero_bit: 620399 cycles, 327025 iterations find_last_bit: 3038 cycles, 656 iterations find_first_bit: 691407 cycles, 656 iterations [arnd@arndb.de: use correct format string for find-bit tests] Link: http://lkml.kernel.org/r/20171113135605.3166307-1-arnd@arndb.de Link: http://lkml.kernel.org/r/20171109140714.13168-1-ynorov@caviumnetworks.com Signed-off-by: Yury Norov Signed-off-by: Arnd Bergmann Reviewed-by: Clement Courbet Cc: Alexey Dobriyan Cc: Matthew Wilcox Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 9 ++++ lib/Makefile | 1 + lib/test_find_bit.c | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 154 insertions(+) create mode 100644 lib/test_find_bit.c (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 8ffd891857ab..ce813731269f 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1850,6 +1850,15 @@ config TEST_BPF If unsure, say N. +config TEST_FIND_BIT + tristate "Test find_bit functions" + default n + help + This builds the "test_find_bit" module that measure find_*_bit() + functions performance. + + If unsure, say N. + config TEST_FIRMWARE tristate "Test firmware loading via userspace interface" default n diff --git a/lib/Makefile b/lib/Makefile index e3cb2a241338..d11c48ec8ffd 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -47,6 +47,7 @@ obj-y += hexdump.o obj-$(CONFIG_TEST_HEXDUMP) += test_hexdump.o obj-y += kstrtox.o obj-$(CONFIG_TEST_BPF) += test_bpf.o +obj-$(CONFIG_TEST_FIND_BIT) += test_find_bit.o obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o obj-$(CONFIG_TEST_SYSCTL) += test_sysctl.o obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o diff --git a/lib/test_find_bit.c b/lib/test_find_bit.c new file mode 100644 index 000000000000..f4394a36f9aa --- /dev/null +++ b/lib/test_find_bit.c @@ -0,0 +1,144 @@ +/* + * Test for find_*_bit functions. + * + * Copyright (c) 2017 Cavium. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +/* + * find_bit functions are widely used in kernel, so the successful boot + * is good enough test for correctness. + * + * This test is focused on performance of traversing bitmaps. Two typical + * scenarios are reproduced: + * - randomly filled bitmap with approximately equal number of set and + * cleared bits; + * - sparse bitmap with few set bits at random positions. + */ + +#include +#include +#include +#include +#include +#include + +#define BITMAP_LEN (4096UL * 8 * 10) +#define SPARSE 500 + +static DECLARE_BITMAP(bitmap, BITMAP_LEN) __initdata; + +/* + * This is Schlemiel the Painter's algorithm. It should be called after + * all other tests for the same bitmap because it sets all bits of bitmap to 1. + */ +static int __init test_find_first_bit(void *bitmap, unsigned long len) +{ + unsigned long i, cnt; + cycles_t cycles; + + cycles = get_cycles(); + for (cnt = i = 0; i < len; cnt++) { + i = find_first_bit(bitmap, len); + __clear_bit(i, bitmap); + } + cycles = get_cycles() - cycles; + pr_err("find_first_bit:\t\t%llu cycles,\t%ld iterations\n", + (u64)cycles, cnt); + + return 0; +} + +static int __init test_find_next_bit(const void *bitmap, unsigned long len) +{ + unsigned long i, cnt; + cycles_t cycles; + + cycles = get_cycles(); + for (cnt = i = 0; i < BITMAP_LEN; cnt++) + i = find_next_bit(bitmap, BITMAP_LEN, i) + 1; + cycles = get_cycles() - cycles; + pr_err("find_next_bit:\t\t%llu cycles,\t%ld iterations\n", + (u64)cycles, cnt); + + return 0; +} + +static int __init test_find_next_zero_bit(const void *bitmap, unsigned long len) +{ + unsigned long i, cnt; + cycles_t cycles; + + cycles = get_cycles(); + for (cnt = i = 0; i < BITMAP_LEN; cnt++) + i = find_next_zero_bit(bitmap, len, i) + 1; + cycles = get_cycles() - cycles; + pr_err("find_next_zero_bit:\t%llu cycles,\t%ld iterations\n", + (u64)cycles, cnt); + + return 0; +} + +static int __init test_find_last_bit(const void *bitmap, unsigned long len) +{ + unsigned long l, cnt = 0; + cycles_t cycles; + + cycles = get_cycles(); + do { + cnt++; + l = find_last_bit(bitmap, len); + if (l >= len) + break; + len = l; + } while (len); + cycles = get_cycles() - cycles; + pr_err("find_last_bit:\t\t%llu cycles,\t%ld iterations\n", + (u64)cycles, cnt); + + return 0; +} + +static int __init find_bit_test(void) +{ + unsigned long nbits = BITMAP_LEN / SPARSE; + + pr_err("\nStart testing find_bit() with random-filled bitmap\n"); + + get_random_bytes(bitmap, sizeof(bitmap)); + + test_find_next_bit(bitmap, BITMAP_LEN); + test_find_next_zero_bit(bitmap, BITMAP_LEN); + test_find_last_bit(bitmap, BITMAP_LEN); + test_find_first_bit(bitmap, BITMAP_LEN); + + pr_err("\nStart testing find_bit() with sparse bitmap\n"); + + bitmap_zero(bitmap, BITMAP_LEN); + + while (nbits--) + __set_bit(prandom_u32() % BITMAP_LEN, bitmap); + + test_find_next_bit(bitmap, BITMAP_LEN); + test_find_next_zero_bit(bitmap, BITMAP_LEN); + test_find_last_bit(bitmap, BITMAP_LEN); + test_find_first_bit(bitmap, BITMAP_LEN); + + return 0; +} +module_init(find_bit_test); + +static void __exit test_find_bit_cleanup(void) +{ +} +module_exit(test_find_bit_cleanup); + +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From d677a4d6019385488e794cc47bd3d6f9c2aab874 Mon Sep 17 00:00:00 2001 From: Victor Chibotaru Date: Fri, 17 Nov 2017 15:30:50 -0800 Subject: Makefile: support flag -fsanitizer-coverage=trace-cmp The flag enables Clang instrumentation of comparison operations (currently not supported by GCC). This instrumentation is needed by the new KCOV device to collect comparison operands. Link: http://lkml.kernel.org/r/20171011095459.70721-2-glider@google.com Signed-off-by: Victor Chibotaru Signed-off-by: Alexander Potapenko Cc: Dmitry Vyukov Cc: Andrey Konovalov Cc: Mark Rutland Cc: Alexander Popov Cc: Andrey Ryabinin Cc: Kees Cook Cc: Vegard Nossum Cc: Quentin Casasnovas Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index ce813731269f..947d3e2ed5c2 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -756,6 +756,16 @@ config KCOV For more details, see Documentation/dev-tools/kcov.rst. +config KCOV_ENABLE_COMPARISONS + bool "Enable comparison operands collection by KCOV" + depends on KCOV + default n + help + KCOV also exposes operands of every comparison in the instrumented + code along with operand sizes and PCs of the comparison instructions. + These operands can be used by fuzzing engines to improve the quality + of fuzzing coverage. + config KCOV_INSTRUMENT_ALL bool "Instrument all code by default" depends on KCOV -- cgit v1.2.3