diff options
Diffstat (limited to 'lib')
42 files changed, 1537 insertions, 414 deletions
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 8d969b250b18..dc0e0c6ed075 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2515,8 +2515,8 @@ config TEST_IDA tristate "Perform selftest on IDA functions" config TEST_MISC_MINOR - tristate "miscdevice KUnit test" if !KUNIT_ALL_TESTS - depends on KUNIT + bool "miscdevice KUnit test" if !KUNIT_ALL_TESTS + depends on KUNIT=y default KUNIT_ALL_TESTS help Kunit test for miscdevice API, specially its behavior in respect to @@ -2894,6 +2894,7 @@ config FORTIFY_KUNIT_TEST config LONGEST_SYM_KUNIT_TEST tristate "Test the longest symbol possible" if !KUNIT_ALL_TESTS depends on KUNIT && KPROBES + depends on !PREFIX_SYMBOLS && !CFI_CLANG && !GCOV_KERNEL default KUNIT_ALL_TESTS help Tests the longest symbol possible @@ -3213,6 +3214,37 @@ config TEST_OBJPOOL If unsure, say N. +config TEST_KEXEC_HANDOVER + bool "Test for Kexec HandOver" + default n + depends on KEXEC_HANDOVER + help + This option enables test for Kexec HandOver (KHO). + The test consists of two parts: saving kernel data before kexec and + restoring the data after kexec and verifying that it was properly + handed over. This test module creates and saves data on the boot of + the first kernel and restores and verifies the data on the boot of + kexec'ed kernel. + + For detailed documentation about KHO, see Documentation/core-api/kho. + + To run the test run: + + tools/testing/selftests/kho/vmtest.sh -h + + If unsure, say N. + +config RATELIMIT_KUNIT_TEST + tristate "KUnit Test for correctness and stress of ratelimit" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS + help + This builds the "test_ratelimit" module that should be used + for correctness verification and concurrent testings of rate + limiting. + + If unsure, say N. + config INT_POW_KUNIT_TEST tristate "Integer exponentiation (int_pow) test" if !KUNIT_ALL_TESTS depends on KUNIT diff --git a/lib/Makefile b/lib/Makefile index 06b954473222..392ff808c9b9 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -40,7 +40,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ earlycpio.o seq_buf.o siphash.o dec_and_lock.o \ nmi_backtrace.o win_minmax.o memcat_p.o \ - buildid.o objpool.o iomem_copy.o + buildid.o objpool.o iomem_copy.o sys_info.o lib-$(CONFIG_UNION_FIND) += union_find.o lib-$(CONFIG_PRINTK) += dump_stack.o @@ -102,6 +102,7 @@ obj-$(CONFIG_TEST_HMM) += test_hmm.o obj-$(CONFIG_TEST_FREE_PAGES) += test_free_pages.o obj-$(CONFIG_TEST_REF_TRACKER) += test_ref_tracker.o obj-$(CONFIG_TEST_OBJPOOL) += test_objpool.o +obj-$(CONFIG_TEST_KEXEC_HANDOVER) += test_kho.o obj-$(CONFIG_TEST_FPU) += test_fpu.o test_fpu-y := test_fpu_glue.o test_fpu_impl.o diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c index 0142bc916f73..e9b33848700a 100644 --- a/lib/alloc_tag.c +++ b/lib/alloc_tag.c @@ -25,8 +25,10 @@ static bool mem_profiling_support; static struct codetag_type *alloc_tag_cttype; +#ifdef CONFIG_ARCH_MODULE_NEEDS_WEAK_PER_CPU DEFINE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag); EXPORT_SYMBOL(_shared_alloc_tag); +#endif DEFINE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT, mem_alloc_profiling_key); @@ -46,21 +48,16 @@ struct allocinfo_private { static void *allocinfo_start(struct seq_file *m, loff_t *pos) { struct allocinfo_private *priv; - struct codetag *ct; loff_t node = *pos; - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - m->private = priv; - if (!priv) - return NULL; - - priv->print_header = (node == 0); + priv = (struct allocinfo_private *)m->private; codetag_lock_module_list(alloc_tag_cttype, true); - priv->iter = codetag_get_ct_iter(alloc_tag_cttype); - while ((ct = codetag_next_ct(&priv->iter)) != NULL && node) - node--; - - return ct ? priv : NULL; + if (node == 0) { + priv->print_header = true; + priv->iter = codetag_get_ct_iter(alloc_tag_cttype); + codetag_next_ct(&priv->iter); + } + return priv->iter.ct ? priv : NULL; } static void *allocinfo_next(struct seq_file *m, void *arg, loff_t *pos) @@ -77,12 +74,7 @@ static void *allocinfo_next(struct seq_file *m, void *arg, loff_t *pos) static void allocinfo_stop(struct seq_file *m, void *arg) { - struct allocinfo_private *priv = (struct allocinfo_private *)m->private; - - if (priv) { - codetag_lock_module_list(alloc_tag_cttype, false); - kfree(priv); - } + codetag_lock_module_list(alloc_tag_cttype, false); } static void print_allocinfo_header(struct seq_buf *buf) @@ -820,7 +812,8 @@ static int __init alloc_tag_init(void) return 0; } - if (!proc_create_seq(ALLOCINFO_FILE_NAME, 0400, NULL, &allocinfo_seq_op)) { + if (!proc_create_seq_private(ALLOCINFO_FILE_NAME, 0400, NULL, &allocinfo_seq_op, + sizeof(struct allocinfo_private), NULL)) { pr_err("Failed to create %s file\n", ALLOCINFO_FILE_NAME); shutdown_mem_profiling(false); return -ENOMEM; diff --git a/lib/codetag.c b/lib/codetag.c index 650d54d7e14d..545911cebd25 100644 --- a/lib/codetag.c +++ b/lib/codetag.c @@ -11,8 +11,14 @@ struct codetag_type { struct list_head link; unsigned int count; struct idr mod_idr; - struct rw_semaphore mod_lock; /* protects mod_idr */ + /* + * protects mod_idr, next_mod_seq, + * iter->mod_seq and cmod->mod_seq + */ + struct rw_semaphore mod_lock; struct codetag_type_desc desc; + /* generates unique sequence number for module load */ + unsigned long next_mod_seq; }; struct codetag_range { @@ -23,6 +29,7 @@ struct codetag_range { struct codetag_module { struct module *mod; struct codetag_range range; + unsigned long mod_seq; }; static DEFINE_MUTEX(codetag_lock); @@ -48,6 +55,7 @@ struct codetag_iterator codetag_get_ct_iter(struct codetag_type *cttype) .cmod = NULL, .mod_id = 0, .ct = NULL, + .mod_seq = 0, }; return iter; @@ -91,11 +99,13 @@ struct codetag *codetag_next_ct(struct codetag_iterator *iter) if (!cmod) break; - if (cmod != iter->cmod) { + if (!iter->cmod || iter->mod_seq != cmod->mod_seq) { iter->cmod = cmod; + iter->mod_seq = cmod->mod_seq; ct = get_first_module_ct(cmod); - } else + } else { ct = get_next_module_ct(iter); + } if (ct) break; @@ -191,6 +201,7 @@ static int codetag_module_init(struct codetag_type *cttype, struct module *mod) cmod->range = range; down_write(&cttype->mod_lock); + cmod->mod_seq = ++cttype->next_mod_seq; mod_id = idr_alloc(&cttype->mod_idr, cmod, 0, 0, GFP_KERNEL); if (mod_id >= 0) { if (cttype->desc.module_load) { diff --git a/lib/crypto/s390/chacha-glue.c b/lib/crypto/s390/chacha-glue.c index f95ba3483bbc..c57dc851214f 100644 --- a/lib/crypto/s390/chacha-glue.c +++ b/lib/crypto/s390/chacha-glue.c @@ -10,6 +10,7 @@ #include <crypto/chacha.h> #include <linux/cpufeature.h> +#include <linux/export.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/sizes.h> diff --git a/lib/find_bit.c b/lib/find_bit.c index 06b6342aa3ae..d4b5a29e3e72 100644 --- a/lib/find_bit.c +++ b/lib/find_bit.c @@ -18,6 +18,7 @@ #include <linux/math.h> #include <linux/minmax.h> #include <linux/swab.h> +#include <linux/random.h> /* * Common helper for find_bit() function family @@ -291,3 +292,26 @@ EXPORT_SYMBOL(_find_next_bit_le); #endif #endif /* __BIG_ENDIAN */ + +/** + * find_random_bit - find a set bit at random position + * @addr: The address to base the search on + * @size: The bitmap size in bits + * + * Returns: a position of a random set bit; >= @size otherwise + */ +unsigned long find_random_bit(const unsigned long *addr, unsigned long size) +{ + int w = bitmap_weight(addr, size); + + switch (w) { + case 0: + return size; + case 1: + /* Performance trick for single-bit bitmaps */ + return find_first_bit(addr, size); + default: + return find_nth_bit(addr, size, get_random_u32_below(w)); + } +} +EXPORT_SYMBOL(find_random_bit); diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index b7f2fa08d9c8..78e16b95d210 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -826,3 +826,23 @@ static int __init kobject_uevent_init(void) postcore_initcall(kobject_uevent_init); #endif + +#ifdef CONFIG_UEVENT_HELPER +static const struct ctl_table uevent_helper_sysctl_table[] = { + { + .procname = "hotplug", + .data = &uevent_helper, + .maxlen = UEVENT_HELPER_PATH_LEN, + .mode = 0644, + .proc_handler = proc_dostring, + }, +}; + +static int __init init_uevent_helper_sysctl(void) +{ + register_sysctl_init("kernel", uevent_helper_sysctl_table); + return 0; +} + +postcore_initcall(init_uevent_helper_sysctl); +#endif diff --git a/lib/kunit/Kconfig b/lib/kunit/Kconfig index a97897edd964..c10ede4b1d22 100644 --- a/lib/kunit/Kconfig +++ b/lib/kunit/Kconfig @@ -93,4 +93,17 @@ config KUNIT_AUTORUN_ENABLED In most cases this should be left as Y. Only if additional opt-in behavior is needed should this be set to N. +config KUNIT_DEFAULT_TIMEOUT + int "Default value of the timeout module parameter" + default 300 + help + Sets the default timeout, in seconds, for Kunit test cases. This value + is further multiplied by a factor determined by the assigned speed + setting: 1x for `DEFAULT`, 3x for `KUNIT_SPEED_SLOW`, and 12x for + `KUNIT_SPEED_VERY_SLOW`. This allows slower tests on slower machines + sufficient time to complete. + + If unsure, the default timeout of 300 seconds is suitable for most + cases. + endif # KUNIT diff --git a/lib/kunit/kunit-test.c b/lib/kunit/kunit-test.c index d9c781c859fd..8c01eabd4eaf 100644 --- a/lib/kunit/kunit-test.c +++ b/lib/kunit/kunit-test.c @@ -8,6 +8,7 @@ #include "linux/gfp_types.h" #include <kunit/test.h> #include <kunit/test-bug.h> +#include <kunit/static_stub.h> #include <linux/device.h> #include <kunit/device.h> @@ -43,7 +44,8 @@ static void kunit_test_try_catch_successful_try_no_catch(struct kunit *test) kunit_try_catch_init(try_catch, test, kunit_test_successful_try, - kunit_test_no_catch); + kunit_test_no_catch, + 300 * msecs_to_jiffies(MSEC_PER_SEC)); kunit_try_catch_run(try_catch, test); KUNIT_EXPECT_TRUE(test, ctx->function_called); @@ -75,7 +77,8 @@ static void kunit_test_try_catch_unsuccessful_try_does_catch(struct kunit *test) kunit_try_catch_init(try_catch, test, kunit_test_unsuccessful_try, - kunit_test_catch); + kunit_test_catch, + 300 * msecs_to_jiffies(MSEC_PER_SEC)); kunit_try_catch_run(try_catch, test); KUNIT_EXPECT_TRUE(test, ctx->function_called); @@ -129,7 +132,8 @@ static void kunit_test_fault_null_dereference(struct kunit *test) kunit_try_catch_init(try_catch, test, kunit_test_null_dereference, - kunit_test_catch); + kunit_test_catch, + 300 * msecs_to_jiffies(MSEC_PER_SEC)); kunit_try_catch_run(try_catch, test); KUNIT_EXPECT_EQ(test, try_catch->try_result, -EINTR); @@ -868,10 +872,53 @@ static struct kunit_suite kunit_current_test_suite = { .test_cases = kunit_current_test_cases, }; +static void kunit_stub_test(struct kunit *test) +{ + struct kunit fake_test; + const unsigned long fake_real_fn_addr = 0x1234; + const unsigned long fake_replacement_addr = 0x5678; + struct kunit_resource *res; + struct { + void *real_fn_addr; + void *replacement_addr; + } *stub_ctx; + + kunit_init_test(&fake_test, "kunit_stub_fake_test", NULL); + KUNIT_ASSERT_EQ(test, fake_test.status, KUNIT_SUCCESS); + KUNIT_ASSERT_EQ(test, list_count_nodes(&fake_test.resources), 0); + + __kunit_activate_static_stub(&fake_test, (void *)fake_real_fn_addr, + (void *)fake_replacement_addr); + KUNIT_ASSERT_EQ(test, fake_test.status, KUNIT_SUCCESS); + KUNIT_ASSERT_EQ(test, list_count_nodes(&fake_test.resources), 1); + + res = list_first_entry(&fake_test.resources, struct kunit_resource, node); + KUNIT_EXPECT_NOT_NULL(test, res); + + stub_ctx = res->data; + KUNIT_EXPECT_NOT_NULL(test, stub_ctx); + KUNIT_EXPECT_EQ(test, (unsigned long)stub_ctx->real_fn_addr, fake_real_fn_addr); + KUNIT_EXPECT_EQ(test, (unsigned long)stub_ctx->replacement_addr, fake_replacement_addr); + + __kunit_activate_static_stub(&fake_test, (void *)fake_real_fn_addr, NULL); + KUNIT_ASSERT_EQ(test, fake_test.status, KUNIT_SUCCESS); + KUNIT_ASSERT_EQ(test, list_count_nodes(&fake_test.resources), 0); +} + +static struct kunit_case kunit_stub_test_cases[] = { + KUNIT_CASE(kunit_stub_test), + {} +}; + +static struct kunit_suite kunit_stub_test_suite = { + .name = "kunit_stub", + .test_cases = kunit_stub_test_cases, +}; + kunit_test_suites(&kunit_try_catch_test_suite, &kunit_resource_test_suite, &kunit_log_test_suite, &kunit_status_test_suite, &kunit_current_test_suite, &kunit_device_test_suite, - &kunit_fault_test_suite); + &kunit_fault_test_suite, &kunit_stub_test_suite); MODULE_DESCRIPTION("KUnit test for core test infrastructure"); MODULE_LICENSE("GPL v2"); diff --git a/lib/kunit/test.c b/lib/kunit/test.c index 146d1b48a096..d2bfa331a2b1 100644 --- a/lib/kunit/test.c +++ b/lib/kunit/test.c @@ -70,6 +70,13 @@ module_param_named(enable, enable_param, bool, 0); MODULE_PARM_DESC(enable, "Enable KUnit tests"); /* + * Configure the base timeout. + */ +static unsigned long kunit_base_timeout = CONFIG_KUNIT_DEFAULT_TIMEOUT; +module_param_named(timeout, kunit_base_timeout, ulong, 0644); +MODULE_PARM_DESC(timeout, "Set the base timeout for Kunit test cases"); + +/* * KUnit statistic mode: * 0 - disabled * 1 - only when there is more than one subtest @@ -373,6 +380,40 @@ static void kunit_run_case_check_speed(struct kunit *test, duration.tv_sec, duration.tv_nsec); } +/* Returns timeout multiplier based on speed. + * DEFAULT: 1 + * KUNIT_SPEED_SLOW: 3 + * KUNIT_SPEED_VERY_SLOW: 12 + */ +static int kunit_timeout_mult(enum kunit_speed speed) +{ + switch (speed) { + case KUNIT_SPEED_SLOW: + return 3; + case KUNIT_SPEED_VERY_SLOW: + return 12; + default: + return 1; + } +} + +static unsigned long kunit_test_timeout(struct kunit_suite *suite, struct kunit_case *test_case) +{ + int mult = 1; + + /* + * The default test timeout is 300 seconds and will be adjusted by mult + * based on the test speed. The test speed will be overridden by the + * innermost test component. + */ + if (suite->attr.speed != KUNIT_SPEED_UNSET) + mult = kunit_timeout_mult(suite->attr.speed); + if (test_case->attr.speed != KUNIT_SPEED_UNSET) + mult = kunit_timeout_mult(test_case->attr.speed); + return mult * kunit_base_timeout * msecs_to_jiffies(MSEC_PER_SEC); +} + + /* * Initializes and runs test case. Does not clean up or do post validations. */ @@ -527,7 +568,8 @@ static void kunit_run_case_catch_errors(struct kunit_suite *suite, kunit_try_catch_init(try_catch, test, kunit_try_run_case, - kunit_catch_run_case); + kunit_catch_run_case, + kunit_test_timeout(suite, test_case)); context.test = test; context.suite = suite; context.test_case = test_case; @@ -537,7 +579,8 @@ static void kunit_run_case_catch_errors(struct kunit_suite *suite, kunit_try_catch_init(try_catch, test, kunit_try_run_case_cleanup, - kunit_catch_run_case_cleanup); + kunit_catch_run_case_cleanup, + kunit_test_timeout(suite, test_case)); kunit_try_catch_run(try_catch, &context); /* Propagate the parameter result to the test case. */ @@ -759,7 +802,6 @@ void __kunit_test_suites_exit(struct kunit_suite **suites, int num_suites) } EXPORT_SYMBOL_GPL(__kunit_test_suites_exit); -#ifdef CONFIG_MODULES static void kunit_module_init(struct module *mod) { struct kunit_suite_set suite_set, filtered_set; @@ -847,7 +889,6 @@ static struct notifier_block kunit_mod_nb = { .notifier_call = kunit_module_notify, .priority = 0, }; -#endif KUNIT_DEFINE_ACTION_WRAPPER(kfree_action_wrapper, kfree, const void *) @@ -938,20 +979,14 @@ static int __init kunit_init(void) kunit_debugfs_init(); kunit_bus_init(); -#ifdef CONFIG_MODULES return register_module_notifier(&kunit_mod_nb); -#else - return 0; -#endif } late_initcall(kunit_init); static void __exit kunit_exit(void) { memset(&kunit_hooks, 0, sizeof(kunit_hooks)); -#ifdef CONFIG_MODULES unregister_module_notifier(&kunit_mod_nb); -#endif kunit_bus_shutdown(); diff --git a/lib/kunit/try-catch-impl.h b/lib/kunit/try-catch-impl.h index 203ba6a5e740..6f401b97cd0b 100644 --- a/lib/kunit/try-catch-impl.h +++ b/lib/kunit/try-catch-impl.h @@ -17,11 +17,13 @@ struct kunit; static inline void kunit_try_catch_init(struct kunit_try_catch *try_catch, struct kunit *test, kunit_try_catch_func_t try, - kunit_try_catch_func_t catch) + kunit_try_catch_func_t catch, + unsigned long timeout) { try_catch->test = test; try_catch->try = try; try_catch->catch = catch; + try_catch->timeout = timeout; } #endif /* _KUNIT_TRY_CATCH_IMPL_H */ diff --git a/lib/kunit/try-catch.c b/lib/kunit/try-catch.c index 6bbe0025b079..d84a879f0a78 100644 --- a/lib/kunit/try-catch.c +++ b/lib/kunit/try-catch.c @@ -34,31 +34,6 @@ static int kunit_generic_run_threadfn_adapter(void *data) return 0; } -static unsigned long kunit_test_timeout(void) -{ - /* - * TODO(brendanhiggins@google.com): We should probably have some type of - * variable timeout here. The only question is what that timeout value - * should be. - * - * The intention has always been, at some point, to be able to label - * tests with some type of size bucket (unit/small, integration/medium, - * large/system/end-to-end, etc), where each size bucket would get a - * default timeout value kind of like what Bazel does: - * https://docs.bazel.build/versions/master/be/common-definitions.html#test.size - * There is still some debate to be had on exactly how we do this. (For - * one, we probably want to have some sort of test runner level - * timeout.) - * - * For more background on this topic, see: - * https://mike-bland.com/2011/11/01/small-medium-large.html - * - * If tests timeout due to exceeding sysctl_hung_task_timeout_secs, - * the task will be killed and an oops generated. - */ - return 300 * msecs_to_jiffies(MSEC_PER_SEC); /* 5 min */ -} - void kunit_try_catch_run(struct kunit_try_catch *try_catch, void *context) { struct kunit *test = try_catch->test; @@ -85,8 +60,8 @@ void kunit_try_catch_run(struct kunit_try_catch *try_catch, void *context) task_done = task_struct->vfork_done; wake_up_process(task_struct); - time_remaining = wait_for_completion_timeout(task_done, - kunit_test_timeout()); + time_remaining = wait_for_completion_timeout( + task_done, try_catch->timeout); if (time_remaining == 0) { try_catch->try_result = -ETIMEDOUT; kthread_stop(task_struct); diff --git a/lib/kunit/user_alloc.c b/lib/kunit/user_alloc.c index 46951be018be..b8cac765e620 100644 --- a/lib/kunit/user_alloc.c +++ b/lib/kunit/user_alloc.c @@ -22,8 +22,7 @@ struct kunit_vm_mmap_params { unsigned long offset; }; -/* Create and attach a new mm if it doesn't already exist. */ -static int kunit_attach_mm(void) +int kunit_attach_mm(void) { struct mm_struct *mm; @@ -49,6 +48,7 @@ static int kunit_attach_mm(void) return 0; } +EXPORT_SYMBOL_GPL(kunit_attach_mm); static int kunit_vm_mmap_init(struct kunit_resource *res, void *context) { diff --git a/lib/maple_tree.c b/lib/maple_tree.c index ef66be963798..b4ee2d29d7a9 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -1053,7 +1053,7 @@ static inline void mte_set_gap(const struct maple_enode *mn, * mas_ascend() - Walk up a level of the tree. * @mas: The maple state * - * Sets the @mas->max and @mas->min to the correct values when walking up. This + * Sets the @mas->max and @mas->min for the parent node of mas->node. This * may cause several levels of walking up to find the correct min and max. * May find a dead node which will cause a premature return. * Return: 1 on dead node, 0 otherwise @@ -1098,6 +1098,12 @@ static int mas_ascend(struct ma_state *mas) min = 0; max = ULONG_MAX; + + /* + * !mas->offset implies that parent node min == mas->min. + * mas->offset > 0 implies that we need to walk up to find the + * implied pivot min. + */ if (!mas->offset) { min = mas->min; set_min = true; @@ -4560,15 +4566,12 @@ again: if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) goto retry; - if (likely(entry)) return entry; if (!empty) { - if (mas->index <= min) { - mas->status = ma_underflow; - return NULL; - } + if (mas->index <= min) + goto underflow; goto again; } @@ -4930,7 +4933,7 @@ void *mas_walk(struct ma_state *mas) { void *entry; - if (!mas_is_active(mas) || !mas_is_start(mas)) + if (!mas_is_active(mas) && !mas_is_start(mas)) mas->status = ma_start; retry: entry = mas_state_walk(mas); @@ -5659,6 +5662,17 @@ int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries) } EXPORT_SYMBOL_GPL(mas_expected_entries); +static void mas_may_activate(struct ma_state *mas) +{ + if (!mas->node) { + mas->status = ma_start; + } else if (mas->index > mas->max || mas->index < mas->min) { + mas->status = ma_start; + } else { + mas->status = ma_active; + } +} + static bool mas_next_setup(struct ma_state *mas, unsigned long max, void **entry) { @@ -5682,11 +5696,11 @@ static bool mas_next_setup(struct ma_state *mas, unsigned long max, break; case ma_overflow: /* Overflowed before, but the max changed */ - mas->status = ma_active; + mas_may_activate(mas); break; case ma_underflow: /* The user expects the mas to be one before where it is */ - mas->status = ma_active; + mas_may_activate(mas); *entry = mas_walk(mas); if (*entry) return true; @@ -5807,11 +5821,11 @@ static bool mas_prev_setup(struct ma_state *mas, unsigned long min, void **entry break; case ma_underflow: /* underflowed before but the min changed */ - mas->status = ma_active; + mas_may_activate(mas); break; case ma_overflow: /* User expects mas to be one after where it is */ - mas->status = ma_active; + mas_may_activate(mas); *entry = mas_walk(mas); if (*entry) return true; @@ -5976,7 +5990,7 @@ static __always_inline bool mas_find_setup(struct ma_state *mas, unsigned long m return true; } - mas->status = ma_active; + mas_may_activate(mas); *entry = mas_walk(mas); if (*entry) return true; @@ -5985,7 +5999,7 @@ static __always_inline bool mas_find_setup(struct ma_state *mas, unsigned long m if (unlikely(mas->last >= max)) return true; - mas->status = ma_active; + mas_may_activate(mas); *entry = mas_walk(mas); if (*entry) return true; diff --git a/lib/math/div64.c b/lib/math/div64.c index 5faa29208bdb..bf77b9843175 100644 --- a/lib/math/div64.c +++ b/lib/math/div64.c @@ -212,12 +212,13 @@ u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c) #endif - /* make sure c is not zero, trigger exception otherwise */ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdiv-by-zero" - if (unlikely(c == 0)) - return 1/0; -#pragma GCC diagnostic pop + /* make sure c is not zero, trigger runtime exception otherwise */ + if (unlikely(c == 0)) { + unsigned long zero = 0; + + OPTIMIZER_HIDE_VAR(zero); + return ~0UL/zero; + } int shift = __builtin_ctzll(c); diff --git a/lib/math/gcd.c b/lib/math/gcd.c index e3b042214d1b..62efca6787ae 100644 --- a/lib/math/gcd.c +++ b/lib/math/gcd.c @@ -11,22 +11,16 @@ * has decent hardware division. */ +DEFINE_STATIC_KEY_TRUE(efficient_ffs_key); + #if !defined(CONFIG_CPU_NO_EFFICIENT_FFS) /* If __ffs is available, the even/odd algorithm benchmarks slower. */ -/** - * gcd - calculate and return the greatest common divisor of 2 unsigned longs - * @a: first value - * @b: second value - */ -unsigned long gcd(unsigned long a, unsigned long b) +static unsigned long binary_gcd(unsigned long a, unsigned long b) { unsigned long r = a | b; - if (!a || !b) - return r; - b >>= __ffs(b); if (b == 1) return r & -r; @@ -44,9 +38,15 @@ unsigned long gcd(unsigned long a, unsigned long b) } } -#else +#endif /* If normalization is done by loops, the even/odd algorithm is a win. */ + +/** + * gcd - calculate and return the greatest common divisor of 2 unsigned longs + * @a: first value + * @b: second value + */ unsigned long gcd(unsigned long a, unsigned long b) { unsigned long r = a | b; @@ -54,6 +54,11 @@ unsigned long gcd(unsigned long a, unsigned long b) if (!a || !b) return r; +#if !defined(CONFIG_CPU_NO_EFFICIENT_FFS) + if (static_branch_likely(&efficient_ffs_key)) + return binary_gcd(a, b); +#endif + /* Isolate lsbit of r */ r &= -r; @@ -80,6 +85,4 @@ unsigned long gcd(unsigned long a, unsigned long b) } } -#endif - EXPORT_SYMBOL_GPL(gcd); diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 75ce3e134b7c..799e0e5eac26 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -18,9 +18,6 @@ #else #include <linux/module.h> #include <linux/gfp.h> -/* In .bss so it's zeroed */ -const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); -EXPORT_SYMBOL(raid6_empty_zero_page); #endif struct raid6_calls raid6_call; diff --git a/lib/raid6/recov.c b/lib/raid6/recov.c index a7c1b2bbe40d..b5e47c008b41 100644 --- a/lib/raid6/recov.c +++ b/lib/raid6/recov.c @@ -31,10 +31,10 @@ static void raid6_2data_recov_intx1(int disks, size_t bytes, int faila, Use the dead data pages as temporary storage for delta p and delta q */ dp = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-2] = dp; dq = (u8 *)ptrs[failb]; - ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[failb] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -72,7 +72,7 @@ static void raid6_datap_recov_intx1(int disks, size_t bytes, int faila, /* Compute syndrome with zero for the missing data page Use the dead data page as temporary storage for delta q */ dq = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); diff --git a/lib/raid6/recov_avx2.c b/lib/raid6/recov_avx2.c index 4e8095403ee2..97d598d2535c 100644 --- a/lib/raid6/recov_avx2.c +++ b/lib/raid6/recov_avx2.c @@ -28,10 +28,10 @@ static void raid6_2data_recov_avx2(int disks, size_t bytes, int faila, Use the dead data pages as temporary storage for delta p and delta q */ dp = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-2] = dp; dq = (u8 *)ptrs[failb]; - ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[failb] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -196,7 +196,7 @@ static void raid6_datap_recov_avx2(int disks, size_t bytes, int faila, /* Compute syndrome with zero for the missing data page Use the dead data page as temporary storage for delta q */ dq = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); diff --git a/lib/raid6/recov_avx512.c b/lib/raid6/recov_avx512.c index 310c715db313..7986120ca444 100644 --- a/lib/raid6/recov_avx512.c +++ b/lib/raid6/recov_avx512.c @@ -37,10 +37,10 @@ static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila, */ dp = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-2] = dp; dq = (u8 *)ptrs[failb]; - ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[failb] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -238,7 +238,7 @@ static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila, */ dq = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); diff --git a/lib/raid6/recov_loongarch_simd.c b/lib/raid6/recov_loongarch_simd.c index 94aeac85e6f7..93dc515997a1 100644 --- a/lib/raid6/recov_loongarch_simd.c +++ b/lib/raid6/recov_loongarch_simd.c @@ -42,10 +42,10 @@ static void raid6_2data_recov_lsx(int disks, size_t bytes, int faila, * delta p and delta q */ dp = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks - 2] = dp; dq = (u8 *)ptrs[failb]; - ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[failb] = raid6_get_zero_page(); ptrs[disks - 1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -197,7 +197,7 @@ static void raid6_datap_recov_lsx(int disks, size_t bytes, int faila, * Use the dead data page as temporary storage for delta q */ dq = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks - 1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -316,10 +316,10 @@ static void raid6_2data_recov_lasx(int disks, size_t bytes, int faila, * delta p and delta q */ dp = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks - 2] = dp; dq = (u8 *)ptrs[failb]; - ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[failb] = raid6_get_zero_page(); ptrs[disks - 1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -436,7 +436,7 @@ static void raid6_datap_recov_lasx(int disks, size_t bytes, int faila, * Use the dead data page as temporary storage for delta q */ dq = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks - 1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); diff --git a/lib/raid6/recov_neon.c b/lib/raid6/recov_neon.c index 1bfc14174d4d..70e1404c1512 100644 --- a/lib/raid6/recov_neon.c +++ b/lib/raid6/recov_neon.c @@ -36,10 +36,10 @@ static void raid6_2data_recov_neon(int disks, size_t bytes, int faila, * delta p and delta q */ dp = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks - 2] = dp; dq = (u8 *)ptrs[failb]; - ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[failb] = raid6_get_zero_page(); ptrs[disks - 1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -74,7 +74,7 @@ static void raid6_datap_recov_neon(int disks, size_t bytes, int faila, * Use the dead data page as temporary storage for delta q */ dq = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks - 1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); diff --git a/lib/raid6/recov_rvv.c b/lib/raid6/recov_rvv.c index f29303795ccf..5d54c4b437df 100644 --- a/lib/raid6/recov_rvv.c +++ b/lib/raid6/recov_rvv.c @@ -165,10 +165,10 @@ static void raid6_2data_recov_rvv(int disks, size_t bytes, int faila, * delta p and delta q */ dp = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks - 2] = dp; dq = (u8 *)ptrs[failb]; - ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[failb] = raid6_get_zero_page(); ptrs[disks - 1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -203,7 +203,7 @@ static void raid6_datap_recov_rvv(int disks, size_t bytes, int faila, * Use the dead data page as temporary storage for delta q */ dq = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks - 1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); diff --git a/lib/raid6/recov_s390xc.c b/lib/raid6/recov_s390xc.c index 179eec900cea..487018f81192 100644 --- a/lib/raid6/recov_s390xc.c +++ b/lib/raid6/recov_s390xc.c @@ -6,7 +6,6 @@ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> */ -#include <linux/export.h> #include <linux/raid/pq.h> static inline void xor_block(u8 *p1, u8 *p2) @@ -35,10 +34,10 @@ static void raid6_2data_recov_s390xc(int disks, size_t bytes, int faila, Use the dead data pages as temporary storage for delta p and delta q */ dp = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-2] = dp; dq = (u8 *)ptrs[failb]; - ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[failb] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -82,7 +81,7 @@ static void raid6_datap_recov_s390xc(int disks, size_t bytes, int faila, /* Compute syndrome with zero for the missing data page Use the dead data page as temporary storage for delta q */ dq = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); diff --git a/lib/raid6/recov_ssse3.c b/lib/raid6/recov_ssse3.c index 4bfa3c6b60de..2e849185c32b 100644 --- a/lib/raid6/recov_ssse3.c +++ b/lib/raid6/recov_ssse3.c @@ -30,10 +30,10 @@ static void raid6_2data_recov_ssse3(int disks, size_t bytes, int faila, Use the dead data pages as temporary storage for delta p and delta q */ dp = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-2] = dp; dq = (u8 *)ptrs[failb]; - ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[failb] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -203,7 +203,7 @@ static void raid6_datap_recov_ssse3(int disks, size_t bytes, int faila, /* Compute syndrome with zero for the missing data page Use the dead data page as temporary storage for delta q */ dq = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); diff --git a/lib/ref_tracker.c b/lib/ref_tracker.c index cf5609b1ca79..a9e6ffcff04b 100644 --- a/lib/ref_tracker.c +++ b/lib/ref_tracker.c @@ -8,6 +8,7 @@ #include <linux/slab.h> #include <linux/stacktrace.h> #include <linux/stackdepot.h> +#include <linux/seq_file.h> #define REF_TRACKER_STACK_ENTRIES 16 #define STACK_BUF_SIZE 1024 @@ -28,6 +29,45 @@ struct ref_tracker_dir_stats { } stacks[]; }; +#ifdef CONFIG_DEBUG_FS +#include <linux/xarray.h> + +/* + * ref_tracker_dir_init() is usually called in allocation-safe contexts, but + * the same is not true of ref_tracker_dir_exit() which can be called from + * anywhere an object is freed. Removing debugfs dentries is a blocking + * operation, so we defer that work to the debugfs_reap_worker. + * + * Each dentry is tracked in the appropriate xarray. When + * ref_tracker_dir_exit() is called, its entries in the xarrays are marked and + * the workqueue job is scheduled. The worker then runs and deletes any marked + * dentries asynchronously. + */ +static struct xarray debugfs_dentries; +static struct xarray debugfs_symlinks; +static struct work_struct debugfs_reap_worker; + +#define REF_TRACKER_DIR_DEAD XA_MARK_0 +static inline void ref_tracker_debugfs_mark(struct ref_tracker_dir *dir) +{ + unsigned long flags; + + xa_lock_irqsave(&debugfs_dentries, flags); + __xa_set_mark(&debugfs_dentries, (unsigned long)dir, REF_TRACKER_DIR_DEAD); + xa_unlock_irqrestore(&debugfs_dentries, flags); + + xa_lock_irqsave(&debugfs_symlinks, flags); + __xa_set_mark(&debugfs_symlinks, (unsigned long)dir, REF_TRACKER_DIR_DEAD); + xa_unlock_irqrestore(&debugfs_symlinks, flags); + + schedule_work(&debugfs_reap_worker); +} +#else +static inline void ref_tracker_debugfs_mark(struct ref_tracker_dir *dir) +{ +} +#endif + static struct ref_tracker_dir_stats * ref_tracker_get_stats(struct ref_tracker_dir *dir, unsigned int limit) { @@ -63,21 +103,39 @@ ref_tracker_get_stats(struct ref_tracker_dir *dir, unsigned int limit) } struct ostream { + void __ostream_printf (*func)(struct ostream *stream, char *fmt, ...); + char *prefix; char *buf; + struct seq_file *seq; int size, used; }; +static void __ostream_printf pr_ostream_log(struct ostream *stream, char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vprintk(fmt, args); + va_end(args); +} + +static void __ostream_printf pr_ostream_buf(struct ostream *stream, char *fmt, ...) +{ + int ret, len = stream->size - stream->used; + va_list args; + + va_start(args, fmt); + ret = vsnprintf(stream->buf + stream->used, len, fmt, args); + va_end(args); + if (ret > 0) + stream->used += min(ret, len); +} + #define pr_ostream(stream, fmt, args...) \ ({ \ struct ostream *_s = (stream); \ \ - if (!_s->buf) { \ - pr_err(fmt, ##args); \ - } else { \ - int ret, len = _s->size - _s->used; \ - ret = snprintf(_s->buf + _s->used, len, pr_fmt(fmt), ##args); \ - _s->used += min(ret, len); \ - } \ + _s->func(_s, fmt, ##args); \ }) static void @@ -96,8 +154,8 @@ __ref_tracker_dir_pr_ostream(struct ref_tracker_dir *dir, stats = ref_tracker_get_stats(dir, display_limit); if (IS_ERR(stats)) { - pr_ostream(s, "%s@%pK: couldn't get stats, error %pe\n", - dir->name, dir, stats); + pr_ostream(s, "%s%s@%p: couldn't get stats, error %pe\n", + s->prefix, dir->class, dir, stats); return; } @@ -107,14 +165,15 @@ __ref_tracker_dir_pr_ostream(struct ref_tracker_dir *dir, stack = stats->stacks[i].stack_handle; if (sbuf && !stack_depot_snprint(stack, sbuf, STACK_BUF_SIZE, 4)) sbuf[0] = 0; - pr_ostream(s, "%s@%pK has %d/%d users at\n%s\n", dir->name, dir, - stats->stacks[i].count, stats->total, sbuf); + pr_ostream(s, "%s%s@%p has %d/%d users at\n%s\n", s->prefix, + dir->class, dir, stats->stacks[i].count, + stats->total, sbuf); skipped -= stats->stacks[i].count; } if (skipped) - pr_ostream(s, "%s@%pK skipped reports about %d/%d users.\n", - dir->name, dir, skipped, stats->total); + pr_ostream(s, "%s%s@%p skipped reports about %d/%d users.\n", + s->prefix, dir->class, dir, skipped, stats->total); kfree(sbuf); @@ -124,7 +183,8 @@ __ref_tracker_dir_pr_ostream(struct ref_tracker_dir *dir, void ref_tracker_dir_print_locked(struct ref_tracker_dir *dir, unsigned int display_limit) { - struct ostream os = {}; + struct ostream os = { .func = pr_ostream_log, + .prefix = "ref_tracker: " }; __ref_tracker_dir_pr_ostream(dir, display_limit, &os); } @@ -143,7 +203,10 @@ EXPORT_SYMBOL(ref_tracker_dir_print); int ref_tracker_dir_snprint(struct ref_tracker_dir *dir, char *buf, size_t size) { - struct ostream os = { .buf = buf, .size = size }; + struct ostream os = { .func = pr_ostream_buf, + .prefix = "ref_tracker: ", + .buf = buf, + .size = size }; unsigned long flags; spin_lock_irqsave(&dir->lock, flags); @@ -161,6 +224,11 @@ void ref_tracker_dir_exit(struct ref_tracker_dir *dir) bool leak = false; dir->dead = true; + /* + * The xarray entries must be marked before the dir->lock is taken to + * protect simultaneous debugfs readers. + */ + ref_tracker_debugfs_mark(dir); spin_lock_irqsave(&dir->lock, flags); list_for_each_entry_safe(tracker, n, &dir->quarantine, head) { list_del(&tracker->head); @@ -273,3 +341,194 @@ int ref_tracker_free(struct ref_tracker_dir *dir, return 0; } EXPORT_SYMBOL_GPL(ref_tracker_free); + +#ifdef CONFIG_DEBUG_FS +#include <linux/debugfs.h> + +static struct dentry *ref_tracker_debug_dir = (struct dentry *)-ENOENT; + +static void __ostream_printf pr_ostream_seq(struct ostream *stream, char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + seq_vprintf(stream->seq, fmt, args); + va_end(args); +} + +static int ref_tracker_dir_seq_print(struct ref_tracker_dir *dir, struct seq_file *seq) +{ + struct ostream os = { .func = pr_ostream_seq, + .prefix = "", + .seq = seq }; + + __ref_tracker_dir_pr_ostream(dir, 16, &os); + + return os.used; +} + +static int ref_tracker_debugfs_show(struct seq_file *f, void *v) +{ + struct ref_tracker_dir *dir = f->private; + unsigned long index = (unsigned long)dir; + unsigned long flags; + int ret; + + /* + * "dir" may not exist at this point if ref_tracker_dir_exit() has + * already been called. Take care not to dereference it until its + * legitimacy is established. + * + * The xa_lock is necessary to ensure that "dir" doesn't disappear + * before its lock can be taken. If it's in the hash and not marked + * dead, then it's safe to take dir->lock which prevents + * ref_tracker_dir_exit() from completing. Once the dir->lock is + * acquired, the xa_lock can be released. All of this must be IRQ-safe. + */ + xa_lock_irqsave(&debugfs_dentries, flags); + if (!xa_load(&debugfs_dentries, index) || + xa_get_mark(&debugfs_dentries, index, REF_TRACKER_DIR_DEAD)) { + xa_unlock_irqrestore(&debugfs_dentries, flags); + return -ENODATA; + } + + spin_lock(&dir->lock); + xa_unlock(&debugfs_dentries); + ret = ref_tracker_dir_seq_print(dir, f); + spin_unlock_irqrestore(&dir->lock, flags); + return ret; +} + +static int ref_tracker_debugfs_open(struct inode *inode, struct file *filp) +{ + struct ref_tracker_dir *dir = inode->i_private; + + return single_open(filp, ref_tracker_debugfs_show, dir); +} + +static const struct file_operations ref_tracker_debugfs_fops = { + .owner = THIS_MODULE, + .open = ref_tracker_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +/** + * ref_tracker_dir_debugfs - create debugfs file for ref_tracker_dir + * @dir: ref_tracker_dir to be associated with debugfs file + * + * In most cases, a debugfs file will be created automatically for every + * ref_tracker_dir. If the object was created before debugfs is brought up + * then that may fail. In those cases, it is safe to call this at a later + * time to create the file. + */ +void ref_tracker_dir_debugfs(struct ref_tracker_dir *dir) +{ + char name[NAME_MAX + 1]; + struct dentry *dentry; + int ret; + + /* No-op if already created */ + dentry = xa_load(&debugfs_dentries, (unsigned long)dir); + if (dentry && !xa_is_err(dentry)) + return; + + ret = snprintf(name, sizeof(name), "%s@%px", dir->class, dir); + name[sizeof(name) - 1] = '\0'; + + if (ret < sizeof(name)) { + dentry = debugfs_create_file(name, S_IFREG | 0400, + ref_tracker_debug_dir, dir, + &ref_tracker_debugfs_fops); + if (!IS_ERR(dentry)) { + void *old; + + old = xa_store_irq(&debugfs_dentries, (unsigned long)dir, + dentry, GFP_KERNEL); + + if (xa_is_err(old)) + debugfs_remove(dentry); + else + WARN_ON_ONCE(old); + } + } +} +EXPORT_SYMBOL(ref_tracker_dir_debugfs); + +void __ostream_printf ref_tracker_dir_symlink(struct ref_tracker_dir *dir, const char *fmt, ...) +{ + char name[NAME_MAX + 1]; + struct dentry *symlink, *dentry; + va_list args; + int ret; + + symlink = xa_load(&debugfs_symlinks, (unsigned long)dir); + dentry = xa_load(&debugfs_dentries, (unsigned long)dir); + + /* Already created?*/ + if (symlink && !xa_is_err(symlink)) + return; + + if (!dentry || xa_is_err(dentry)) + return; + + va_start(args, fmt); + ret = vsnprintf(name, sizeof(name), fmt, args); + va_end(args); + name[sizeof(name) - 1] = '\0'; + + if (ret < sizeof(name)) { + symlink = debugfs_create_symlink(name, ref_tracker_debug_dir, + dentry->d_name.name); + if (!IS_ERR(symlink)) { + void *old; + + old = xa_store_irq(&debugfs_symlinks, (unsigned long)dir, + symlink, GFP_KERNEL); + if (xa_is_err(old)) + debugfs_remove(symlink); + else + WARN_ON_ONCE(old); + } + } +} +EXPORT_SYMBOL(ref_tracker_dir_symlink); + +static void debugfs_reap_work(struct work_struct *work) +{ + struct dentry *dentry; + unsigned long index; + bool reaped; + + do { + reaped = false; + xa_for_each_marked(&debugfs_symlinks, index, dentry, REF_TRACKER_DIR_DEAD) { + xa_erase_irq(&debugfs_symlinks, index); + debugfs_remove(dentry); + reaped = true; + } + xa_for_each_marked(&debugfs_dentries, index, dentry, REF_TRACKER_DIR_DEAD) { + xa_erase_irq(&debugfs_dentries, index); + debugfs_remove(dentry); + reaped = true; + } + } while (reaped); +} + +static int __init ref_tracker_debugfs_postcore_init(void) +{ + INIT_WORK(&debugfs_reap_worker, debugfs_reap_work); + xa_init_flags(&debugfs_dentries, XA_FLAGS_LOCK_IRQ); + xa_init_flags(&debugfs_symlinks, XA_FLAGS_LOCK_IRQ); + return 0; +} +postcore_initcall(ref_tracker_debugfs_postcore_init); + +static int __init ref_tracker_debugfs_late_init(void) +{ + ref_tracker_debug_dir = debugfs_create_dir("ref_tracker", NULL); + return 0; +} +late_initcall(ref_tracker_debugfs_late_init); +#endif /* CONFIG_DEBUG_FS */ diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c index a2bb7738c373..94b3f6b19538 100644 --- a/lib/smp_processor_id.c +++ b/lib/smp_processor_id.c @@ -22,10 +22,8 @@ unsigned int check_preemption_disabled(const char *what1, const char *what2) if (is_percpu_thread()) goto out; -#ifdef CONFIG_SMP if (current->migration_disabled) goto out; -#endif /* * It is valid to assume CPU-locality during early bootup: diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 73d7b50924ef..de0b0025af2b 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -36,11 +36,11 @@ #include <linux/memblock.h> #include <linux/kasan-enabled.h> -#define DEPOT_POOLS_CAP 8192 -/* The pool_index is offset by 1 so the first record does not have a 0 handle. */ -#define DEPOT_MAX_POOLS \ - (((1LL << (DEPOT_POOL_INDEX_BITS)) - 1 < DEPOT_POOLS_CAP) ? \ - (1LL << (DEPOT_POOL_INDEX_BITS)) - 1 : DEPOT_POOLS_CAP) +/* + * The pool_index is offset by 1 so the first record does not have a 0 handle. + */ +static unsigned int stack_max_pools __read_mostly = + MIN((1LL << DEPOT_POOL_INDEX_BITS) - 1, 8192); static bool stack_depot_disabled; static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT); @@ -62,7 +62,7 @@ static unsigned int stack_bucket_number_order; static unsigned int stack_hash_mask; /* Array of memory regions that store stack records. */ -static void *stack_pools[DEPOT_MAX_POOLS]; +static void **stack_pools; /* Newly allocated pool that is not yet added to stack_pools. */ static void *new_pool; /* Number of pools in stack_pools. */ @@ -101,6 +101,34 @@ static int __init disable_stack_depot(char *str) } early_param("stack_depot_disable", disable_stack_depot); +static int __init parse_max_pools(char *str) +{ + const long long limit = (1LL << (DEPOT_POOL_INDEX_BITS)) - 1; + unsigned int max_pools; + int rv; + + rv = kstrtouint(str, 0, &max_pools); + if (rv) + return rv; + + if (max_pools < 1024) { + pr_err("stack_depot_max_pools below 1024, using default of %u\n", + stack_max_pools); + goto out; + } + + if (max_pools > limit) { + pr_err("stack_depot_max_pools exceeds %lld, using default of %u\n", + limit, stack_max_pools); + goto out; + } + + stack_max_pools = max_pools; +out: + return 0; +} +early_param("stack_depot_max_pools", parse_max_pools); + void __init stack_depot_request_early_init(void) { /* Too late to request early init now. */ @@ -182,6 +210,17 @@ int __init stack_depot_early_init(void) } init_stack_table(entries); + pr_info("allocating space for %u stack pools via memblock\n", + stack_max_pools); + stack_pools = + memblock_alloc(stack_max_pools * sizeof(void *), PAGE_SIZE); + if (!stack_pools) { + pr_err("stack pools allocation failed, disabling\n"); + memblock_free(stack_table, entries * sizeof(struct list_head)); + stack_depot_disabled = true; + return -ENOMEM; + } + return 0; } @@ -231,6 +270,16 @@ int stack_depot_init(void) stack_hash_mask = entries - 1; init_stack_table(entries); + pr_info("allocating space for %u stack pools via kvcalloc\n", + stack_max_pools); + stack_pools = kvcalloc(stack_max_pools, sizeof(void *), GFP_KERNEL); + if (!stack_pools) { + pr_err("stack pools allocation failed, disabling\n"); + kvfree(stack_table); + stack_depot_disabled = true; + ret = -ENOMEM; + } + out_unlock: mutex_unlock(&stack_depot_init_mutex); @@ -245,9 +294,9 @@ static bool depot_init_pool(void **prealloc) { lockdep_assert_held(&pool_lock); - if (unlikely(pools_num >= DEPOT_MAX_POOLS)) { + if (unlikely(pools_num >= stack_max_pools)) { /* Bail out if we reached the pool limit. */ - WARN_ON_ONCE(pools_num > DEPOT_MAX_POOLS); /* should never happen */ + WARN_ON_ONCE(pools_num > stack_max_pools); /* should never happen */ WARN_ON_ONCE(!new_pool); /* to avoid unnecessary pre-allocation */ WARN_ONCE(1, "Stack depot reached limit capacity"); return false; @@ -273,7 +322,7 @@ static bool depot_init_pool(void **prealloc) * NULL; do not reset to NULL if we have reached the maximum number of * pools. */ - if (pools_num < DEPOT_MAX_POOLS) + if (pools_num < stack_max_pools) WRITE_ONCE(new_pool, NULL); else WRITE_ONCE(new_pool, STACK_DEPOT_POISON); diff --git a/lib/sys_info.c b/lib/sys_info.c new file mode 100644 index 000000000000..5bf503fd7ec1 --- /dev/null +++ b/lib/sys_info.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/sched/debug.h> +#include <linux/console.h> +#include <linux/kernel.h> +#include <linux/ftrace.h> +#include <linux/sysctl.h> +#include <linux/nmi.h> + +#include <linux/sys_info.h> + +struct sys_info_name { + unsigned long bit; + const char *name; +}; + +/* + * When 'si_names' gets updated, please make sure the 'sys_info_avail' + * below is updated accordingly. + */ +static const struct sys_info_name si_names[] = { + { SYS_INFO_TASKS, "tasks" }, + { SYS_INFO_MEM, "mem" }, + { SYS_INFO_TIMERS, "timers" }, + { SYS_INFO_LOCKS, "locks" }, + { SYS_INFO_FTRACE, "ftrace" }, + { SYS_INFO_ALL_CPU_BT, "all_bt" }, + { SYS_INFO_BLOCKED_TASKS, "blocked_tasks" }, +}; + +/* Expecting string like "xxx_sys_info=tasks,mem,timers,locks,ftrace,..." */ +unsigned long sys_info_parse_param(char *str) +{ + unsigned long si_bits = 0; + char *s, *name; + int i; + + s = str; + while ((name = strsep(&s, ",")) && *name) { + for (i = 0; i < ARRAY_SIZE(si_names); i++) { + if (!strcmp(name, si_names[i].name)) { + si_bits |= si_names[i].bit; + break; + } + } + } + + return si_bits; +} + +#ifdef CONFIG_SYSCTL + +static const char sys_info_avail[] __maybe_unused = "tasks,mem,timers,locks,ftrace,all_bt,blocked_tasks"; + +int sysctl_sys_info_handler(const struct ctl_table *ro_table, int write, + void *buffer, size_t *lenp, + loff_t *ppos) +{ + char names[sizeof(sys_info_avail) + 1]; + struct ctl_table table; + unsigned long *si_bits_global; + + si_bits_global = ro_table->data; + + if (write) { + unsigned long si_bits; + int ret; + + table = *ro_table; + table.data = names; + table.maxlen = sizeof(names); + ret = proc_dostring(&table, write, buffer, lenp, ppos); + if (ret) + return ret; + + si_bits = sys_info_parse_param(names); + /* The access to the global value is not synchronized. */ + WRITE_ONCE(*si_bits_global, si_bits); + return 0; + } else { + /* for 'read' operation */ + char *delim = ""; + int i, len = 0; + + for (i = 0; i < ARRAY_SIZE(si_names); i++) { + if (*si_bits_global & si_names[i].bit) { + len += scnprintf(names + len, sizeof(names) - len, + "%s%s", delim, si_names[i].name); + delim = ","; + } + } + + table = *ro_table; + table.data = names; + table.maxlen = sizeof(names); + return proc_dostring(&table, write, buffer, lenp, ppos); + } +} +#endif + +void sys_info(unsigned long si_mask) +{ + if (si_mask & SYS_INFO_TASKS) + show_state(); + + if (si_mask & SYS_INFO_MEM) + show_mem(); + + if (si_mask & SYS_INFO_TIMERS) + sysrq_timer_list_show(); + + if (si_mask & SYS_INFO_LOCKS) + debug_show_all_locks(); + + if (si_mask & SYS_INFO_FTRACE) + ftrace_dump(DUMP_ALL); + + if (si_mask & SYS_INFO_ALL_CPU_BT) + trigger_all_cpu_backtrace(); + + if (si_mask & SYS_INFO_BLOCKED_TASKS) + show_state_filter(TASK_UNINTERRUPTIBLE); +} diff --git a/lib/test_hmm.c b/lib/test_hmm.c index 5b144bc5c4ec..761725bc713c 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -330,7 +330,7 @@ static int dmirror_fault(struct dmirror *dmirror, unsigned long start, { struct mm_struct *mm = dmirror->notifier.mm; unsigned long addr; - unsigned long pfns[64]; + unsigned long pfns[32]; struct hmm_range range = { .notifier = &dmirror->notifier, .hmm_pfns = pfns, @@ -879,8 +879,8 @@ static int dmirror_migrate_to_system(struct dmirror *dmirror, unsigned long size = cmd->npages << PAGE_SHIFT; struct mm_struct *mm = dmirror->notifier.mm; struct vm_area_struct *vma; - unsigned long src_pfns[64] = { 0 }; - unsigned long dst_pfns[64] = { 0 }; + unsigned long src_pfns[32] = { 0 }; + unsigned long dst_pfns[32] = { 0 }; struct migrate_vma args = { 0 }; unsigned long next; int ret; @@ -939,8 +939,8 @@ static int dmirror_migrate_to_device(struct dmirror *dmirror, unsigned long size = cmd->npages << PAGE_SHIFT; struct mm_struct *mm = dmirror->notifier.mm; struct vm_area_struct *vma; - unsigned long src_pfns[64] = { 0 }; - unsigned long dst_pfns[64] = { 0 }; + unsigned long src_pfns[32] = { 0 }; + unsigned long dst_pfns[32] = { 0 }; struct dmirror_bounce bounce; struct migrate_vma args = { 0 }; unsigned long next; @@ -1144,8 +1144,8 @@ static int dmirror_snapshot(struct dmirror *dmirror, unsigned long size = cmd->npages << PAGE_SHIFT; unsigned long addr; unsigned long next; - unsigned long pfns[64]; - unsigned char perm[64]; + unsigned long pfns[32]; + unsigned char perm[32]; char __user *uptr; struct hmm_range range = { .hmm_pfns = pfns, diff --git a/lib/test_kho.c b/lib/test_kho.c new file mode 100644 index 000000000000..c2eb899c3b45 --- /dev/null +++ b/lib/test_kho.c @@ -0,0 +1,305 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Test module for KHO + * Copyright (c) 2025 Microsoft Corporation. + * + * Authors: + * Saurabh Sengar <ssengar@microsoft.com> + * Mike Rapoport <rppt@kernel.org> + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/mm.h> +#include <linux/gfp.h> +#include <linux/slab.h> +#include <linux/kexec.h> +#include <linux/libfdt.h> +#include <linux/module.h> +#include <linux/printk.h> +#include <linux/vmalloc.h> +#include <linux/kexec_handover.h> + +#include <net/checksum.h> + +#define KHO_TEST_MAGIC 0x4b484f21 /* KHO! */ +#define KHO_TEST_FDT "kho_test" +#define KHO_TEST_COMPAT "kho-test-v1" + +static long max_mem = (PAGE_SIZE << MAX_PAGE_ORDER) * 2; +module_param(max_mem, long, 0644); + +struct kho_test_state { + unsigned int nr_folios; + struct folio **folios; + struct folio *fdt; + __wsum csum; +}; + +static struct kho_test_state kho_test_state; + +static int kho_test_notifier(struct notifier_block *self, unsigned long cmd, + void *v) +{ + struct kho_test_state *state = &kho_test_state; + struct kho_serialization *ser = v; + int err = 0; + + switch (cmd) { + case KEXEC_KHO_ABORT: + return NOTIFY_DONE; + case KEXEC_KHO_FINALIZE: + /* Handled below */ + break; + default: + return NOTIFY_BAD; + } + + err |= kho_preserve_folio(state->fdt); + err |= kho_add_subtree(ser, KHO_TEST_FDT, folio_address(state->fdt)); + + return err ? NOTIFY_BAD : NOTIFY_DONE; +} + +static struct notifier_block kho_test_nb = { + .notifier_call = kho_test_notifier, +}; + +static int kho_test_save_data(struct kho_test_state *state, void *fdt) +{ + phys_addr_t *folios_info __free(kvfree) = NULL; + int err = 0; + + folios_info = kvmalloc_array(state->nr_folios, sizeof(*folios_info), + GFP_KERNEL); + if (!folios_info) + return -ENOMEM; + + for (int i = 0; i < state->nr_folios; i++) { + struct folio *folio = state->folios[i]; + unsigned int order = folio_order(folio); + + folios_info[i] = virt_to_phys(folio_address(folio)) | order; + + err = kho_preserve_folio(folio); + if (err) + return err; + } + + err |= fdt_begin_node(fdt, "data"); + err |= fdt_property(fdt, "nr_folios", &state->nr_folios, + sizeof(state->nr_folios)); + err |= fdt_property(fdt, "folios_info", folios_info, + state->nr_folios * sizeof(*folios_info)); + err |= fdt_property(fdt, "csum", &state->csum, sizeof(state->csum)); + err |= fdt_end_node(fdt); + + return err; +} + +static int kho_test_prepare_fdt(struct kho_test_state *state) +{ + const char compatible[] = KHO_TEST_COMPAT; + unsigned int magic = KHO_TEST_MAGIC; + ssize_t fdt_size; + int err = 0; + void *fdt; + + fdt_size = state->nr_folios * sizeof(phys_addr_t) + PAGE_SIZE; + state->fdt = folio_alloc(GFP_KERNEL, get_order(fdt_size)); + if (!state->fdt) + return -ENOMEM; + + fdt = folio_address(state->fdt); + + err |= fdt_create(fdt, fdt_size); + err |= fdt_finish_reservemap(fdt); + + err |= fdt_begin_node(fdt, ""); + err |= fdt_property(fdt, "compatible", compatible, sizeof(compatible)); + err |= fdt_property(fdt, "magic", &magic, sizeof(magic)); + err |= kho_test_save_data(state, fdt); + err |= fdt_end_node(fdt); + + err |= fdt_finish(fdt); + + if (err) + folio_put(state->fdt); + + return err; +} + +static int kho_test_generate_data(struct kho_test_state *state) +{ + size_t alloc_size = 0; + __wsum csum = 0; + + while (alloc_size < max_mem) { + int order = get_random_u32() % NR_PAGE_ORDERS; + struct folio *folio; + unsigned int size; + void *addr; + + /* cap allocation so that we won't exceed max_mem */ + if (alloc_size + (PAGE_SIZE << order) > max_mem) { + order = get_order(max_mem - alloc_size); + if (order) + order--; + } + size = PAGE_SIZE << order; + + folio = folio_alloc(GFP_KERNEL | __GFP_NORETRY, order); + if (!folio) + goto err_free_folios; + + state->folios[state->nr_folios++] = folio; + addr = folio_address(folio); + get_random_bytes(addr, size); + csum = csum_partial(addr, size, csum); + alloc_size += size; + } + + state->csum = csum; + return 0; + +err_free_folios: + for (int i = 0; i < state->nr_folios; i++) + folio_put(state->folios[i]); + return -ENOMEM; +} + +static int kho_test_save(void) +{ + struct kho_test_state *state = &kho_test_state; + struct folio **folios __free(kvfree) = NULL; + unsigned long max_nr; + int err; + + max_mem = PAGE_ALIGN(max_mem); + max_nr = max_mem >> PAGE_SHIFT; + + folios = kvmalloc_array(max_nr, sizeof(*state->folios), GFP_KERNEL); + if (!folios) + return -ENOMEM; + state->folios = folios; + + err = kho_test_generate_data(state); + if (err) + return err; + + err = kho_test_prepare_fdt(state); + if (err) + return err; + + return register_kho_notifier(&kho_test_nb); +} + +static int kho_test_restore_data(const void *fdt, int node) +{ + const unsigned int *nr_folios; + const phys_addr_t *folios_info; + const __wsum *old_csum; + __wsum csum = 0; + int len; + + node = fdt_path_offset(fdt, "/data"); + + nr_folios = fdt_getprop(fdt, node, "nr_folios", &len); + if (!nr_folios || len != sizeof(*nr_folios)) + return -EINVAL; + + old_csum = fdt_getprop(fdt, node, "csum", &len); + if (!old_csum || len != sizeof(*old_csum)) + return -EINVAL; + + folios_info = fdt_getprop(fdt, node, "folios_info", &len); + if (!folios_info || len != sizeof(*folios_info) * *nr_folios) + return -EINVAL; + + for (int i = 0; i < *nr_folios; i++) { + unsigned int order = folios_info[i] & ~PAGE_MASK; + phys_addr_t phys = folios_info[i] & PAGE_MASK; + unsigned int size = PAGE_SIZE << order; + struct folio *folio; + + folio = kho_restore_folio(phys); + if (!folio) + break; + + if (folio_order(folio) != order) + break; + + csum = csum_partial(folio_address(folio), size, csum); + folio_put(folio); + } + + if (csum != *old_csum) + return -EINVAL; + + return 0; +} + +static int kho_test_restore(phys_addr_t fdt_phys) +{ + void *fdt = phys_to_virt(fdt_phys); + const unsigned int *magic; + int node, len, err; + + node = fdt_path_offset(fdt, "/"); + if (node < 0) + return -EINVAL; + + if (fdt_node_check_compatible(fdt, node, KHO_TEST_COMPAT)) + return -EINVAL; + + magic = fdt_getprop(fdt, node, "magic", &len); + if (!magic || len != sizeof(*magic)) + return -EINVAL; + + if (*magic != KHO_TEST_MAGIC) + return -EINVAL; + + err = kho_test_restore_data(fdt, node); + if (err) + return err; + + pr_info("KHO restore succeeded\n"); + return 0; +} + +static int __init kho_test_init(void) +{ + phys_addr_t fdt_phys; + int err; + + err = kho_retrieve_subtree(KHO_TEST_FDT, &fdt_phys); + if (!err) + return kho_test_restore(fdt_phys); + + if (err != -ENOENT) { + pr_warn("failed to retrieve %s FDT: %d\n", KHO_TEST_FDT, err); + return err; + } + + return kho_test_save(); +} +module_init(kho_test_init); + +static void kho_test_cleanup(void) +{ + for (int i = 0; i < kho_test_state.nr_folios; i++) + folio_put(kho_test_state.folios[i]); + + kvfree(kho_test_state.folios); +} + +static void __exit kho_test_exit(void) +{ + unregister_kho_notifier(&kho_test_nb); + kho_test_cleanup(); +} +module_exit(kho_test_exit); + +MODULE_AUTHOR("Mike Rapoport <rppt@kernel.org>"); +MODULE_DESCRIPTION("KHO test module"); +MODULE_LICENSE("GPL"); diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c index 13e2a10d7554..cb3936595b0d 100644 --- a/lib/test_maple_tree.c +++ b/lib/test_maple_tree.c @@ -3177,6 +3177,7 @@ static noinline void __init check_state_handling(struct maple_tree *mt) void *entry, *ptr = (void *) 0x1234500; void *ptr2 = &ptr; void *ptr3 = &ptr2; + unsigned long index; /* Check MAS_ROOT First */ mtree_store_range(mt, 0, 0, ptr, GFP_KERNEL); @@ -3707,6 +3708,37 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, !mas_is_active(&mas)); mas_unlock(&mas); + mtree_destroy(mt); + + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + mas_lock(&mas); + for (int count = 0; count < 30; count++) { + mas_set(&mas, count); + mas_store_gfp(&mas, xa_mk_value(count), GFP_KERNEL); + } + + /* Ensure mas_find works with MA_UNDERFLOW */ + mas_set(&mas, 0); + entry = mas_walk(&mas); + mas_set(&mas, 0); + mas_prev(&mas, 0); + MT_BUG_ON(mt, mas.status != ma_underflow); + MT_BUG_ON(mt, mas_find(&mas, ULONG_MAX) != entry); + + /* Restore active on mas_next */ + entry = mas_next(&mas, ULONG_MAX); + index = mas.index; + mas_prev(&mas, index); + MT_BUG_ON(mt, mas.status != ma_underflow); + MT_BUG_ON(mt, mas_next(&mas, ULONG_MAX) != entry); + + /* Ensure overflow -> active works */ + mas_prev(&mas, 0); + mas_next(&mas, index - 1); + MT_BUG_ON(mt, mas.status != ma_overflow); + MT_BUG_ON(mt, mas_next(&mas, ULONG_MAX) != entry); + + mas_unlock(&mas); } static noinline void __init alloc_cyclic_testing(struct maple_tree *mt) diff --git a/lib/test_objagg.c b/lib/test_objagg.c index 222b39fc2629..ce5c4c36a084 100644 --- a/lib/test_objagg.c +++ b/lib/test_objagg.c @@ -908,50 +908,22 @@ static int check_expect_hints_stats(struct objagg_hints *objagg_hints, return err; } -static int test_hints_case(const struct hints_case *hints_case) +static int test_hints_case2(const struct hints_case *hints_case, + struct objagg_hints *hints, struct objagg *objagg) { struct objagg_obj *objagg_obj; - struct objagg_hints *hints; struct world world2 = {}; - struct world world = {}; struct objagg *objagg2; - struct objagg *objagg; const char *errmsg; int i; int err; - objagg = objagg_create(&delta_ops, NULL, &world); - if (IS_ERR(objagg)) - return PTR_ERR(objagg); - - for (i = 0; i < hints_case->key_ids_count; i++) { - objagg_obj = world_obj_get(&world, objagg, - hints_case->key_ids[i]); - if (IS_ERR(objagg_obj)) { - err = PTR_ERR(objagg_obj); - goto err_world_obj_get; - } - } - - pr_debug_stats(objagg); - err = check_expect_stats(objagg, &hints_case->expect_stats, &errmsg); - if (err) { - pr_err("Stats: %s\n", errmsg); - goto err_check_expect_stats; - } - - hints = objagg_hints_get(objagg, OBJAGG_OPT_ALGO_SIMPLE_GREEDY); - if (IS_ERR(hints)) { - err = PTR_ERR(hints); - goto err_hints_get; - } - pr_debug_hints_stats(hints); err = check_expect_hints_stats(hints, &hints_case->expect_stats_hints, &errmsg); if (err) { pr_err("Hints stats: %s\n", errmsg); - goto err_check_expect_hints_stats; + return err; } objagg2 = objagg_create(&delta_ops, hints, &world2); @@ -983,7 +955,48 @@ err_world2_obj_get: world_obj_put(&world2, objagg, hints_case->key_ids[i]); i = hints_case->key_ids_count; objagg_destroy(objagg2); -err_check_expect_hints_stats: + + return err; +} + +static int test_hints_case(const struct hints_case *hints_case) +{ + struct objagg_obj *objagg_obj; + struct objagg_hints *hints; + struct world world = {}; + struct objagg *objagg; + const char *errmsg; + int i; + int err; + + objagg = objagg_create(&delta_ops, NULL, &world); + if (IS_ERR(objagg)) + return PTR_ERR(objagg); + + for (i = 0; i < hints_case->key_ids_count; i++) { + objagg_obj = world_obj_get(&world, objagg, + hints_case->key_ids[i]); + if (IS_ERR(objagg_obj)) { + err = PTR_ERR(objagg_obj); + goto err_world_obj_get; + } + } + + pr_debug_stats(objagg); + err = check_expect_stats(objagg, &hints_case->expect_stats, &errmsg); + if (err) { + pr_err("Stats: %s\n", errmsg); + goto err_check_expect_stats; + } + + hints = objagg_hints_get(objagg, OBJAGG_OPT_ALGO_SIMPLE_GREEDY); + if (IS_ERR(hints)) { + err = PTR_ERR(hints); + goto err_hints_get; + } + + err = test_hints_case2(hints_case, hints, objagg); + objagg_hints_put(hints); err_hints_get: err_check_expect_stats: diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c index 1b0b59549aaf..2815658ccc37 100644 --- a/lib/test_vmalloc.c +++ b/lib/test_vmalloc.c @@ -41,7 +41,7 @@ __param(int, nr_pages, 0, __param(bool, use_huge, false, "Use vmalloc_huge in fix_size_alloc_test"); -__param(int, run_test_mask, INT_MAX, +__param(int, run_test_mask, 7, "Set tests specified in the mask.\n\n" "\t\tid: 1, name: fix_size_alloc_test\n" "\t\tid: 2, name: full_fit_alloc_test\n" @@ -396,25 +396,27 @@ cleanup: struct test_case_desc { const char *test_name; int (*test_func)(void); + bool xfail; }; static struct test_case_desc test_case_array[] = { - { "fix_size_alloc_test", fix_size_alloc_test }, - { "full_fit_alloc_test", full_fit_alloc_test }, - { "long_busy_list_alloc_test", long_busy_list_alloc_test }, - { "random_size_alloc_test", random_size_alloc_test }, - { "fix_align_alloc_test", fix_align_alloc_test }, - { "random_size_align_alloc_test", random_size_align_alloc_test }, - { "align_shift_alloc_test", align_shift_alloc_test }, - { "pcpu_alloc_test", pcpu_alloc_test }, - { "kvfree_rcu_1_arg_vmalloc_test", kvfree_rcu_1_arg_vmalloc_test }, - { "kvfree_rcu_2_arg_vmalloc_test", kvfree_rcu_2_arg_vmalloc_test }, - { "vm_map_ram_test", vm_map_ram_test }, + { "fix_size_alloc_test", fix_size_alloc_test, }, + { "full_fit_alloc_test", full_fit_alloc_test, }, + { "long_busy_list_alloc_test", long_busy_list_alloc_test, }, + { "random_size_alloc_test", random_size_alloc_test, }, + { "fix_align_alloc_test", fix_align_alloc_test, }, + { "random_size_align_alloc_test", random_size_align_alloc_test, }, + { "align_shift_alloc_test", align_shift_alloc_test, true }, + { "pcpu_alloc_test", pcpu_alloc_test, }, + { "kvfree_rcu_1_arg_vmalloc_test", kvfree_rcu_1_arg_vmalloc_test, }, + { "kvfree_rcu_2_arg_vmalloc_test", kvfree_rcu_2_arg_vmalloc_test, }, + { "vm_map_ram_test", vm_map_ram_test, }, /* Add a new test case here. */ }; struct test_case_data { int test_failed; + int test_xfailed; int test_passed; u64 time; }; @@ -444,7 +446,7 @@ static int test_func(void *private) { struct test_driver *t = private; int random_array[ARRAY_SIZE(test_case_array)]; - int index, i, j; + int index, i, j, ret; ktime_t kt; u64 delta; @@ -468,11 +470,14 @@ static int test_func(void *private) */ if (!((run_test_mask & (1 << index)) >> index)) continue; - kt = ktime_get(); for (j = 0; j < test_repeat_count; j++) { - if (!test_case_array[index].test_func()) + ret = test_case_array[index].test_func(); + + if (!ret && !test_case_array[index].xfail) t->data[index].test_passed++; + else if (ret && test_case_array[index].xfail) + t->data[index].test_xfailed++; else t->data[index].test_failed++; } @@ -576,10 +581,11 @@ static void do_concurrent_test(void) continue; pr_info( - "Summary: %s passed: %d failed: %d repeat: %d loops: %d avg: %llu usec\n", + "Summary: %s passed: %d failed: %d xfailed: %d repeat: %d loops: %d avg: %llu usec\n", test_case_array[j].test_name, t->data[j].test_passed, t->data[j].test_failed, + t->data[j].test_xfailed, test_repeat_count, test_loop_count, t->data[j].time); } @@ -598,7 +604,11 @@ static int __init vmalloc_test_init(void) return IS_BUILTIN(CONFIG_TEST_VMALLOC) ? 0:-EAGAIN; } +#ifdef MODULE module_init(vmalloc_test_init) +#else +late_initcall(vmalloc_test_init); +#endif MODULE_LICENSE("GPL"); MODULE_AUTHOR("Uladzislau Rezki"); diff --git a/lib/tests/Makefile b/lib/tests/Makefile index 83434b722193..fa6d728a8b5b 100644 --- a/lib/tests/Makefile +++ b/lib/tests/Makefile @@ -46,5 +46,6 @@ obj-$(CONFIG_STRING_KUNIT_TEST) += string_kunit.o obj-$(CONFIG_STRING_HELPERS_KUNIT_TEST) += string_helpers_kunit.o obj-$(CONFIG_USERCOPY_KUNIT_TEST) += usercopy_kunit.o obj-$(CONFIG_UTIL_MACROS_KUNIT) += util_macros_kunit.o +obj-$(CONFIG_RATELIMIT_KUNIT_TEST) += test_ratelimit.o obj-$(CONFIG_TEST_RUNTIME_MODULE) += module/ diff --git a/lib/tests/longest_symbol_kunit.c b/lib/tests/longest_symbol_kunit.c index e3c28ff1807f..9b4de3050ba7 100644 --- a/lib/tests/longest_symbol_kunit.c +++ b/lib/tests/longest_symbol_kunit.c @@ -3,8 +3,7 @@ * Test the longest symbol length. Execute with: * ./tools/testing/kunit/kunit.py run longest-symbol * --arch=x86_64 --kconfig_add CONFIG_KPROBES=y --kconfig_add CONFIG_MODULES=y - * --kconfig_add CONFIG_RETPOLINE=n --kconfig_add CONFIG_CFI_CLANG=n - * --kconfig_add CONFIG_MITIGATION_RETPOLINE=n + * --kconfig_add CONFIG_CPU_MITIGATIONS=n --kconfig_add CONFIG_GCOV_KERNEL=n */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/lib/tests/test_bits.c b/lib/tests/test_bits.c index 47325b41515f..ab88e50d2edf 100644 --- a/lib/tests/test_bits.c +++ b/lib/tests/test_bits.c @@ -26,6 +26,23 @@ static_assert(assert_type(u16, GENMASK_U16(15, 0)) == U16_MAX); static_assert(assert_type(u32, GENMASK_U32(31, 0)) == U32_MAX); static_assert(assert_type(u64, GENMASK_U64(63, 0)) == U64_MAX); +/* FIXME: add a test case written in asm for GENMASK() and GENMASK_ULL() */ + +static void __genmask_test(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, 1ul, __GENMASK(0, 0)); + KUNIT_EXPECT_EQ(test, 3ul, __GENMASK(1, 0)); + KUNIT_EXPECT_EQ(test, 6ul, __GENMASK(2, 1)); + KUNIT_EXPECT_EQ(test, 0xFFFFFFFFul, __GENMASK(31, 0)); +} + +static void __genmask_ull_test(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, 1ull, __GENMASK_ULL(0, 0)); + KUNIT_EXPECT_EQ(test, 3ull, __GENMASK_ULL(1, 0)); + KUNIT_EXPECT_EQ(test, 0x000000ffffe00000ull, __GENMASK_ULL(39, 21)); + KUNIT_EXPECT_EQ(test, 0xffffffffffffffffull, __GENMASK_ULL(63, 0)); +} static void genmask_test(struct kunit *test) { @@ -123,6 +140,8 @@ static void genmask_input_check_test(struct kunit *test) static struct kunit_case bits_test_cases[] = { + KUNIT_CASE(__genmask_test), + KUNIT_CASE(__genmask_ull_test), KUNIT_CASE(genmask_test), KUNIT_CASE(genmask_ull_test), KUNIT_CASE(genmask_u128_test), diff --git a/lib/tests/test_ratelimit.c b/lib/tests/test_ratelimit.c new file mode 100644 index 000000000000..bfaeca49304a --- /dev/null +++ b/lib/tests/test_ratelimit.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <kunit/test.h> + +#include <linux/ratelimit.h> +#include <linux/module.h> +#include <linux/kthread.h> +#include <linux/cpumask.h> + +/* a simple boot-time regression test */ + +#define TESTRL_INTERVAL (5 * HZ) +static DEFINE_RATELIMIT_STATE(testrl, TESTRL_INTERVAL, 3); + +#define test_ratelimited(test, expected) \ + KUNIT_ASSERT_EQ(test, ___ratelimit(&testrl, "test_ratelimit_smoke"), (expected)) + +static void test_ratelimit_smoke(struct kunit *test) +{ + // Check settings. + KUNIT_ASSERT_GE(test, TESTRL_INTERVAL, 100); + + // Test normal operation. + test_ratelimited(test, true); + test_ratelimited(test, true); + test_ratelimited(test, true); + test_ratelimited(test, false); + + schedule_timeout_idle(TESTRL_INTERVAL / 2); + test_ratelimited(test, false); + + schedule_timeout_idle(TESTRL_INTERVAL * 3 / 4); + test_ratelimited(test, true); + + schedule_timeout_idle(2 * TESTRL_INTERVAL); + test_ratelimited(test, true); + test_ratelimited(test, true); + + schedule_timeout_idle(TESTRL_INTERVAL / 2 ); + test_ratelimited(test, true); + schedule_timeout_idle(TESTRL_INTERVAL * 3 / 4); + test_ratelimited(test, true); + test_ratelimited(test, true); + test_ratelimited(test, true); + test_ratelimited(test, false); + + // Test disabling. + testrl.burst = 0; + test_ratelimited(test, false); + testrl.burst = 2; + testrl.interval = 0; + test_ratelimited(test, true); + test_ratelimited(test, true); + test_ratelimited(test, true); + test_ratelimited(test, true); + test_ratelimited(test, true); + test_ratelimited(test, true); + test_ratelimited(test, true); + + // Testing re-enabling. + testrl.interval = TESTRL_INTERVAL; + test_ratelimited(test, true); + test_ratelimited(test, true); + test_ratelimited(test, false); + test_ratelimited(test, false); +} + +static struct ratelimit_state stressrl = RATELIMIT_STATE_INIT_FLAGS("stressrl", HZ / 10, 3, + RATELIMIT_MSG_ON_RELEASE); + +static int doneflag; +static const int stress_duration = 2 * HZ; + +struct stress_kthread { + unsigned long nattempts; + unsigned long nunlimited; + unsigned long nlimited; + unsigned long nmissed; + struct task_struct *tp; +}; + +static int test_ratelimit_stress_child(void *arg) +{ + struct stress_kthread *sktp = arg; + + set_user_nice(current, MAX_NICE); + WARN_ON_ONCE(!sktp->tp); + + while (!READ_ONCE(doneflag)) { + sktp->nattempts++; + if (___ratelimit(&stressrl, __func__)) + sktp->nunlimited++; + else + sktp->nlimited++; + cond_resched(); + } + + sktp->nmissed = ratelimit_state_reset_miss(&stressrl); + return 0; +} + +static void test_ratelimit_stress(struct kunit *test) +{ + int i; + const int n_stress_kthread = cpumask_weight(cpu_online_mask); + struct stress_kthread skt = { 0 }; + struct stress_kthread *sktp = kcalloc(n_stress_kthread, sizeof(*sktp), GFP_KERNEL); + + KUNIT_EXPECT_NOT_NULL_MSG(test, sktp, "Memory allocation failure"); + for (i = 0; i < n_stress_kthread; i++) { + sktp[i].tp = kthread_run(test_ratelimit_stress_child, &sktp[i], "%s/%i", + "test_ratelimit_stress_child", i); + KUNIT_EXPECT_NOT_NULL_MSG(test, sktp, "kthread creation failure"); + pr_alert("Spawned test_ratelimit_stress_child %d\n", i); + } + schedule_timeout_idle(stress_duration); + WRITE_ONCE(doneflag, 1); + for (i = 0; i < n_stress_kthread; i++) { + kthread_stop(sktp[i].tp); + skt.nattempts += sktp[i].nattempts; + skt.nunlimited += sktp[i].nunlimited; + skt.nlimited += sktp[i].nlimited; + skt.nmissed += sktp[i].nmissed; + } + KUNIT_ASSERT_EQ_MSG(test, skt.nunlimited + skt.nlimited, skt.nattempts, + "Outcomes not equal to attempts"); + KUNIT_ASSERT_EQ_MSG(test, skt.nlimited, skt.nmissed, "Misses not equal to limits"); +} + +static struct kunit_case ratelimit_test_cases[] = { + KUNIT_CASE_SLOW(test_ratelimit_smoke), + KUNIT_CASE_SLOW(test_ratelimit_stress), + {} +}; + +static struct kunit_suite ratelimit_test_suite = { + .name = "lib_ratelimit", + .test_cases = ratelimit_test_cases, +}; + +kunit_test_suites(&ratelimit_test_suite); + +MODULE_DESCRIPTION("___ratelimit() KUnit test suite"); +MODULE_LICENSE("GPL"); diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index 93ef801a97ef..02ea19f67164 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -2,6 +2,7 @@ /* * Generic userspace implementations of gettimeofday() and similar. */ +#include <vdso/auxclock.h> #include <vdso/datapage.h> #include <vdso/helpers.h> @@ -71,6 +72,42 @@ static inline bool vdso_cycles_ok(u64 cycles) } #endif +static __always_inline bool vdso_clockid_valid(clockid_t clock) +{ + /* Check for negative values or invalid clocks */ + return likely((u32) clock <= CLOCK_AUX_LAST); +} + +/* + * Must not be invoked within the sequence read section as a race inside + * that loop could result in __iter_div_u64_rem() being extremely slow. + */ +static __always_inline void vdso_set_timespec(struct __kernel_timespec *ts, u64 sec, u64 ns) +{ + ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); + ts->tv_nsec = ns; +} + +static __always_inline +bool vdso_get_timestamp(const struct vdso_time_data *vd, const struct vdso_clock *vc, + unsigned int clkidx, u64 *sec, u64 *ns) +{ + const struct vdso_timestamp *vdso_ts = &vc->basetime[clkidx]; + u64 cycles; + + if (unlikely(!vdso_clocksource_ok(vc))) + return false; + + cycles = __arch_get_hw_counter(vc->clock_mode, vd); + if (unlikely(!vdso_cycles_ok(cycles))) + return false; + + *ns = vdso_calc_ns(vc, cycles, vdso_ts->nsec); + *sec = vdso_ts->sec; + + return true; +} + #ifdef CONFIG_TIME_NS #ifdef CONFIG_GENERIC_VDSO_DATA_STORE @@ -82,48 +119,35 @@ const struct vdso_time_data *__arch_get_vdso_u_timens_data(const struct vdso_tim #endif /* CONFIG_GENERIC_VDSO_DATA_STORE */ static __always_inline -int do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, - clockid_t clk, struct __kernel_timespec *ts) +bool do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, + clockid_t clk, struct __kernel_timespec *ts) { const struct vdso_time_data *vd = __arch_get_vdso_u_timens_data(vdns); const struct timens_offset *offs = &vcns->offset[clk]; const struct vdso_clock *vc = vd->clock_data; - const struct vdso_timestamp *vdso_ts; - u64 cycles, ns; u32 seq; s64 sec; + u64 ns; if (clk != CLOCK_MONOTONIC_RAW) vc = &vc[CS_HRES_COARSE]; else vc = &vc[CS_RAW]; - vdso_ts = &vc->basetime[clk]; do { seq = vdso_read_begin(vc); - if (unlikely(!vdso_clocksource_ok(vc))) - return -1; - - cycles = __arch_get_hw_counter(vc->clock_mode, vd); - if (unlikely(!vdso_cycles_ok(cycles))) - return -1; - ns = vdso_calc_ns(vc, cycles, vdso_ts->nsec); - sec = vdso_ts->sec; + if (!vdso_get_timestamp(vd, vc, clk, &sec, &ns)) + return false; } while (unlikely(vdso_read_retry(vc, seq))); /* Add the namespace offset */ sec += offs->sec; ns += offs->nsec; - /* - * Do this outside the loop: a race inside the loop could result - * in __iter_div_u64_rem() being extremely slow. - */ - ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); - ts->tv_nsec = ns; + vdso_set_timespec(ts, sec, ns); - return 0; + return true; } #else static __always_inline @@ -133,24 +157,23 @@ const struct vdso_time_data *__arch_get_vdso_u_timens_data(const struct vdso_tim } static __always_inline -int do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, - clockid_t clk, struct __kernel_timespec *ts) +bool do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, + clockid_t clk, struct __kernel_timespec *ts) { - return -EINVAL; + return false; } #endif static __always_inline -int do_hres(const struct vdso_time_data *vd, const struct vdso_clock *vc, - clockid_t clk, struct __kernel_timespec *ts) +bool do_hres(const struct vdso_time_data *vd, const struct vdso_clock *vc, + clockid_t clk, struct __kernel_timespec *ts) { - const struct vdso_timestamp *vdso_ts = &vc->basetime[clk]; - u64 cycles, sec, ns; + u64 sec, ns; u32 seq; /* Allows to compile the high resolution parts out */ if (!__arch_vdso_hres_capable()) - return -1; + return false; do { /* @@ -172,30 +195,19 @@ int do_hres(const struct vdso_time_data *vd, const struct vdso_clock *vc, } smp_rmb(); - if (unlikely(!vdso_clocksource_ok(vc))) - return -1; - - cycles = __arch_get_hw_counter(vc->clock_mode, vd); - if (unlikely(!vdso_cycles_ok(cycles))) - return -1; - ns = vdso_calc_ns(vc, cycles, vdso_ts->nsec); - sec = vdso_ts->sec; + if (!vdso_get_timestamp(vd, vc, clk, &sec, &ns)) + return false; } while (unlikely(vdso_read_retry(vc, seq))); - /* - * Do this outside the loop: a race inside the loop could result - * in __iter_div_u64_rem() being extremely slow. - */ - ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); - ts->tv_nsec = ns; + vdso_set_timespec(ts, sec, ns); - return 0; + return true; } #ifdef CONFIG_TIME_NS static __always_inline -int do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, - clockid_t clk, struct __kernel_timespec *ts) +bool do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, + clockid_t clk, struct __kernel_timespec *ts) { const struct vdso_time_data *vd = __arch_get_vdso_u_timens_data(vdns); const struct timens_offset *offs = &vcns->offset[clk]; @@ -217,26 +229,22 @@ int do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock sec += offs->sec; nsec += offs->nsec; - /* - * Do this outside the loop: a race inside the loop could result - * in __iter_div_u64_rem() being extremely slow. - */ - ts->tv_sec = sec + __iter_div_u64_rem(nsec, NSEC_PER_SEC, &nsec); - ts->tv_nsec = nsec; - return 0; + vdso_set_timespec(ts, sec, nsec); + + return true; } #else static __always_inline -int do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, - clockid_t clk, struct __kernel_timespec *ts) +bool do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, + clockid_t clk, struct __kernel_timespec *ts) { - return -1; + return false; } #endif static __always_inline -int do_coarse(const struct vdso_time_data *vd, const struct vdso_clock *vc, - clockid_t clk, struct __kernel_timespec *ts) +bool do_coarse(const struct vdso_time_data *vd, const struct vdso_clock *vc, + clockid_t clk, struct __kernel_timespec *ts) { const struct vdso_timestamp *vdso_ts = &vc->basetime[clk]; u32 seq; @@ -258,19 +266,60 @@ int do_coarse(const struct vdso_time_data *vd, const struct vdso_clock *vc, ts->tv_nsec = vdso_ts->nsec; } while (unlikely(vdso_read_retry(vc, seq))); - return 0; + return true; +} + +static __always_inline +bool do_aux(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_timespec *ts) +{ + const struct vdso_clock *vc; + u32 seq, idx; + u64 sec, ns; + + if (!IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS)) + return false; + + idx = clock - CLOCK_AUX; + vc = &vd->aux_clock_data[idx]; + + do { + /* + * Open coded function vdso_read_begin() to handle + * VDSO_CLOCK_TIMENS. See comment in do_hres(). + */ + while ((seq = READ_ONCE(vc->seq)) & 1) { + if (IS_ENABLED(CONFIG_TIME_NS) && vc->clock_mode == VDSO_CLOCKMODE_TIMENS) { + vd = __arch_get_vdso_u_timens_data(vd); + vc = &vd->aux_clock_data[idx]; + /* Re-read from the real time data page */ + continue; + } + cpu_relax(); + } + smp_rmb(); + + /* Auxclock disabled? */ + if (vc->clock_mode == VDSO_CLOCKMODE_NONE) + return false; + + if (!vdso_get_timestamp(vd, vc, VDSO_BASE_AUX, &sec, &ns)) + return false; + } while (unlikely(vdso_read_retry(vc, seq))); + + vdso_set_timespec(ts, sec, ns); + + return true; } -static __always_inline int +static __always_inline bool __cvdso_clock_gettime_common(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_timespec *ts) { const struct vdso_clock *vc = vd->clock_data; u32 msk; - /* Check for negative values or invalid clocks */ - if (unlikely((u32) clock >= MAX_CLOCKS)) - return -1; + if (!vdso_clockid_valid(clock)) + return false; /* * Convert the clockid to a bitmask and use it to check which @@ -283,8 +332,10 @@ __cvdso_clock_gettime_common(const struct vdso_time_data *vd, clockid_t clock, return do_coarse(vd, &vc[CS_HRES_COARSE], clock, ts); else if (msk & VDSO_RAW) vc = &vc[CS_RAW]; + else if (msk & VDSO_AUX) + return do_aux(vd, clock, ts); else - return -1; + return false; return do_hres(vd, vc, clock, ts); } @@ -293,9 +344,11 @@ static __maybe_unused int __cvdso_clock_gettime_data(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_timespec *ts) { - int ret = __cvdso_clock_gettime_common(vd, clock, ts); + bool ok; + + ok = __cvdso_clock_gettime_common(vd, clock, ts); - if (unlikely(ret)) + if (unlikely(!ok)) return clock_gettime_fallback(clock, ts); return 0; } @@ -312,18 +365,18 @@ __cvdso_clock_gettime32_data(const struct vdso_time_data *vd, clockid_t clock, struct old_timespec32 *res) { struct __kernel_timespec ts; - int ret; + bool ok; - ret = __cvdso_clock_gettime_common(vd, clock, &ts); + ok = __cvdso_clock_gettime_common(vd, clock, &ts); - if (unlikely(ret)) + if (unlikely(!ok)) return clock_gettime32_fallback(clock, res); - /* For ret == 0 */ + /* For ok == true */ res->tv_sec = ts.tv_sec; res->tv_nsec = ts.tv_nsec; - return ret; + return 0; } static __maybe_unused int @@ -342,7 +395,7 @@ __cvdso_gettimeofday_data(const struct vdso_time_data *vd, if (likely(tv != NULL)) { struct __kernel_timespec ts; - if (do_hres(vd, &vc[CS_HRES_COARSE], CLOCK_REALTIME, &ts)) + if (!do_hres(vd, &vc[CS_HRES_COARSE], CLOCK_REALTIME, &ts)) return gettimeofday_fallback(tv, tz); tv->tv_sec = ts.tv_sec; @@ -396,16 +449,15 @@ static __maybe_unused __kernel_old_time_t __cvdso_time(__kernel_old_time_t *time #ifdef VDSO_HAS_CLOCK_GETRES static __maybe_unused -int __cvdso_clock_getres_common(const struct vdso_time_data *vd, clockid_t clock, - struct __kernel_timespec *res) +bool __cvdso_clock_getres_common(const struct vdso_time_data *vd, clockid_t clock, + struct __kernel_timespec *res) { const struct vdso_clock *vc = vd->clock_data; u32 msk; u64 ns; - /* Check for negative values or invalid clocks */ - if (unlikely((u32) clock >= MAX_CLOCKS)) - return -1; + if (!vdso_clockid_valid(clock)) + return false; if (IS_ENABLED(CONFIG_TIME_NS) && vc->clock_mode == VDSO_CLOCKMODE_TIMENS) @@ -426,24 +478,28 @@ int __cvdso_clock_getres_common(const struct vdso_time_data *vd, clockid_t clock * Preserves the behaviour of posix_get_coarse_res(). */ ns = LOW_RES_NSEC; + } else if (msk & VDSO_AUX) { + ns = aux_clock_resolution_ns(); } else { - return -1; + return false; } if (likely(res)) { res->tv_sec = 0; res->tv_nsec = ns; } - return 0; + return true; } static __maybe_unused int __cvdso_clock_getres_data(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_timespec *res) { - int ret = __cvdso_clock_getres_common(vd, clock, res); + bool ok; - if (unlikely(ret)) + ok = __cvdso_clock_getres_common(vd, clock, res); + + if (unlikely(!ok)) return clock_getres_fallback(clock, res); return 0; } @@ -460,18 +516,18 @@ __cvdso_clock_getres_time32_data(const struct vdso_time_data *vd, clockid_t cloc struct old_timespec32 *res) { struct __kernel_timespec ts; - int ret; + bool ok; - ret = __cvdso_clock_getres_common(vd, clock, &ts); + ok = __cvdso_clock_getres_common(vd, clock, &ts); - if (unlikely(ret)) + if (unlikely(!ok)) return clock_getres32_fallback(clock, res); if (likely(res)) { res->tv_sec = ts.tv_sec; res->tv_nsec = ts.tv_nsec; } - return ret; + return 0; } static __maybe_unused int diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 3d85800757aa..eb0cb11d0d12 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -60,6 +60,20 @@ bool no_hash_pointers __ro_after_init; EXPORT_SYMBOL_GPL(no_hash_pointers); +/* + * Hashed pointers policy selected by "hash_pointers=..." boot param + * + * `auto` - Hashed pointers enabled unless disabled by slub_debug_enabled=true + * `always` - Hashed pointers enabled unconditionally + * `never` - Hashed pointers disabled unconditionally + */ +enum hash_pointers_policy { + HASH_PTR_AUTO = 0, + HASH_PTR_ALWAYS, + HASH_PTR_NEVER +}; +static enum hash_pointers_policy hash_pointers_mode __initdata; + noinline static unsigned long long simple_strntoull(const char *startp, char **endp, unsigned int base, size_t max_chars) { @@ -1699,10 +1713,9 @@ char *escaped_string(char *buf, char *end, u8 *addr, struct printf_spec spec, return buf; } -#pragma GCC diagnostic push -#ifndef __clang__ -#pragma GCC diagnostic ignored "-Wsuggest-attribute=format" -#endif +__diag_push(); +__diag_ignore(GCC, all, "-Wsuggest-attribute=format", + "Not a valid __printf() conversion candidate."); static char *va_format(char *buf, char *end, struct va_format *va_fmt, struct printf_spec spec) { @@ -1717,7 +1730,7 @@ static char *va_format(char *buf, char *end, struct va_format *va_fmt, return buf; } -#pragma GCC diagnostic pop +__diag_pop(); static noinline_for_stack char *uuid_string(char *buf, char *end, const u8 *addr, @@ -2289,12 +2302,23 @@ char *resource_or_range(const char *fmt, char *buf, char *end, void *ptr, return resource_string(buf, end, ptr, spec, fmt); } -int __init no_hash_pointers_enable(char *str) +void __init hash_pointers_finalize(bool slub_debug) { - if (no_hash_pointers) - return 0; + switch (hash_pointers_mode) { + case HASH_PTR_ALWAYS: + no_hash_pointers = false; + break; + case HASH_PTR_NEVER: + no_hash_pointers = true; + break; + case HASH_PTR_AUTO: + default: + no_hash_pointers = slub_debug; + break; + } - no_hash_pointers = true; + if (!no_hash_pointers) + return; pr_warn("**********************************************************\n"); pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); @@ -2307,11 +2331,39 @@ int __init no_hash_pointers_enable(char *str) pr_warn("** the kernel, report this immediately to your system **\n"); pr_warn("** administrator! **\n"); pr_warn("** **\n"); + pr_warn("** Use hash_pointers=always to force this mode off **\n"); + pr_warn("** **\n"); pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); pr_warn("**********************************************************\n"); +} + +static int __init hash_pointers_mode_parse(char *str) +{ + if (!str) { + pr_warn("Hash pointers mode empty; falling back to auto.\n"); + hash_pointers_mode = HASH_PTR_AUTO; + } else if (strncmp(str, "auto", 4) == 0) { + pr_info("Hash pointers mode set to auto.\n"); + hash_pointers_mode = HASH_PTR_AUTO; + } else if (strncmp(str, "never", 5) == 0) { + pr_info("Hash pointers mode set to never.\n"); + hash_pointers_mode = HASH_PTR_NEVER; + } else if (strncmp(str, "always", 6) == 0) { + pr_info("Hash pointers mode set to always.\n"); + hash_pointers_mode = HASH_PTR_ALWAYS; + } else { + pr_warn("Unknown hash_pointers mode '%s' specified; assuming auto.\n", str); + hash_pointers_mode = HASH_PTR_AUTO; + } return 0; } +early_param("hash_pointers", hash_pointers_mode_parse); + +static int __init no_hash_pointers_enable(char *str) +{ + return hash_pointers_mode_parse("never"); +} early_param("no_hash_pointers", no_hash_pointers_enable); /* diff --git a/lib/xarray.c b/lib/xarray.c index 76dde3a1cacf..ae3d80f4b4ee 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -1910,6 +1910,7 @@ EXPORT_SYMBOL(xa_store_range); * @xas: XArray operation state. * * Called after xas_load, the xas should not be in an error state. + * The xas should not be pointing to a sibling entry. * * Return: A number between 0 and 63 indicating the order of the entry. */ @@ -1920,6 +1921,8 @@ int xas_get_order(struct xa_state *xas) if (!xas->xa_node) return 0; + XA_NODE_BUG_ON(xas->xa_node, xa_is_sibling(xa_entry(xas->xa, + xas->xa_node, xas->xa_offset))); for (;;) { unsigned int slot = xas->xa_offset + (1 << order); diff --git a/lib/xxhash.c b/lib/xxhash.c index b5bd567aa6b3..cf629766f376 100644 --- a/lib/xxhash.c +++ b/lib/xxhash.c @@ -267,113 +267,6 @@ void xxh64_reset(struct xxh64_state *statePtr, const uint64_t seed) } EXPORT_SYMBOL(xxh64_reset); -int xxh32_update(struct xxh32_state *state, const void *input, const size_t len) -{ - const uint8_t *p = (const uint8_t *)input; - const uint8_t *const b_end = p + len; - - if (input == NULL) - return -EINVAL; - - state->total_len_32 += (uint32_t)len; - state->large_len |= (len >= 16) | (state->total_len_32 >= 16); - - if (state->memsize + len < 16) { /* fill in tmp buffer */ - memcpy((uint8_t *)(state->mem32) + state->memsize, input, len); - state->memsize += (uint32_t)len; - return 0; - } - - if (state->memsize) { /* some data left from previous update */ - const uint32_t *p32 = state->mem32; - - memcpy((uint8_t *)(state->mem32) + state->memsize, input, - 16 - state->memsize); - - state->v1 = xxh32_round(state->v1, get_unaligned_le32(p32)); - p32++; - state->v2 = xxh32_round(state->v2, get_unaligned_le32(p32)); - p32++; - state->v3 = xxh32_round(state->v3, get_unaligned_le32(p32)); - p32++; - state->v4 = xxh32_round(state->v4, get_unaligned_le32(p32)); - p32++; - - p += 16-state->memsize; - state->memsize = 0; - } - - if (p <= b_end - 16) { - const uint8_t *const limit = b_end - 16; - uint32_t v1 = state->v1; - uint32_t v2 = state->v2; - uint32_t v3 = state->v3; - uint32_t v4 = state->v4; - - do { - v1 = xxh32_round(v1, get_unaligned_le32(p)); - p += 4; - v2 = xxh32_round(v2, get_unaligned_le32(p)); - p += 4; - v3 = xxh32_round(v3, get_unaligned_le32(p)); - p += 4; - v4 = xxh32_round(v4, get_unaligned_le32(p)); - p += 4; - } while (p <= limit); - - state->v1 = v1; - state->v2 = v2; - state->v3 = v3; - state->v4 = v4; - } - - if (p < b_end) { - memcpy(state->mem32, p, (size_t)(b_end-p)); - state->memsize = (uint32_t)(b_end-p); - } - - return 0; -} -EXPORT_SYMBOL(xxh32_update); - -uint32_t xxh32_digest(const struct xxh32_state *state) -{ - const uint8_t *p = (const uint8_t *)state->mem32; - const uint8_t *const b_end = (const uint8_t *)(state->mem32) + - state->memsize; - uint32_t h32; - - if (state->large_len) { - h32 = xxh_rotl32(state->v1, 1) + xxh_rotl32(state->v2, 7) + - xxh_rotl32(state->v3, 12) + xxh_rotl32(state->v4, 18); - } else { - h32 = state->v3 /* == seed */ + PRIME32_5; - } - - h32 += state->total_len_32; - - while (p + 4 <= b_end) { - h32 += get_unaligned_le32(p) * PRIME32_3; - h32 = xxh_rotl32(h32, 17) * PRIME32_4; - p += 4; - } - - while (p < b_end) { - h32 += (*p) * PRIME32_5; - h32 = xxh_rotl32(h32, 11) * PRIME32_1; - p++; - } - - h32 ^= h32 >> 15; - h32 *= PRIME32_2; - h32 ^= h32 >> 13; - h32 *= PRIME32_3; - h32 ^= h32 >> 16; - - return h32; -} -EXPORT_SYMBOL(xxh32_digest); - int xxh64_update(struct xxh64_state *state, const void *input, const size_t len) { const uint8_t *p = (const uint8_t *)input; |
