From 1e6b17b4237dacb02e9cfeaed35d889bbc9e8a84 Mon Sep 17 00:00:00 2001 From: Dev Jain Date: Wed, 4 Jun 2025 09:45:33 +0530 Subject: xarray: add a BUG_ON() to ensure caller is not sibling Suppose xas is pointing somewhere near the end of the multi-entry batch. Then it may happen that the computed slot already falls beyond the batch, thus breaking the loop due to !xa_is_sibling(), and computing the wrong order. For example, suppose we have a shift-6 node having an order-9 entry => 8 - 1 = 7 siblings, so assume the slots are at offset 0 till 7 in this node. If xas->xa_offset is 6, then the code will compute order as 1 + xas->xa_node->shift = 7. Therefore, the order computation must start from the beginning of the multi-slot entries, that is, the non-sibling entry. Thus ensure that the caller is aware of this by triggering a BUG when the entry is a sibling entry. Note that this BUG_ON() is only active while running selftests, so there is no overhead in a running kernel. Link: https://lkml.kernel.org/r/20250604041533.91198-1-dev.jain@arm.com Signed-off-by: Dev Jain Acked-by: Zi Yan Cc: "Aneesh Kumar K.V" Cc: Anshuman Khandual Cc: David Hildenbrand Cc: Matthew Wilcox (Oracle) Cc: Ryan Roberts Signed-off-by: Andrew Morton --- lib/xarray.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib') diff --git a/lib/xarray.c b/lib/xarray.c index 76dde3a1cacf..ae3d80f4b4ee 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -1910,6 +1910,7 @@ EXPORT_SYMBOL(xa_store_range); * @xas: XArray operation state. * * Called after xas_load, the xas should not be in an error state. + * The xas should not be pointing to a sibling entry. * * Return: A number between 0 and 63 indicating the order of the entry. */ @@ -1920,6 +1921,8 @@ int xas_get_order(struct xa_state *xas) if (!xas->xa_node) return 0; + XA_NODE_BUG_ON(xas->xa_node, xa_is_sibling(xa_entry(xas->xa, + xas->xa_node, xas->xa_offset))); for (;;) { unsigned int slot = xas->xa_offset + (1 << order); -- cgit v1.2.3 From 6046a3bed1c2b028e692f7606e3450d1c93e8fdd Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 10 Jun 2025 11:21:50 +0200 Subject: lib/test_hmm: reduce stack usage The various test ioctl handlers use arrays of 64 integers that add up to 1KiB of stack data, which in turn leads to exceeding the warning limit in some configurations: lib/test_hmm.c:935:12: error: stack frame size (1408) exceeds limit (1280) in 'dmirror_migrate_to_device' [-Werror,-Wframe-larger-than] Use half the size for these arrays, in order to stay under the warning limits. The code can already deal with arbitrary lengths, but this may be a little less efficient. Link: https://lkml.kernel.org/r/20250610092159.2639515-1-arnd@kernel.org Signed-off-by: Arnd Bergmann Cc: Alistair Popple Cc: David Hildenbrand Cc: Jason Gunthorpe Cc: Jeff Johnson Cc: Jerome Glisse Cc: Thorsten Blum Signed-off-by: Andrew Morton --- lib/test_hmm.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/test_hmm.c b/lib/test_hmm.c index 5b144bc5c4ec..761725bc713c 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -330,7 +330,7 @@ static int dmirror_fault(struct dmirror *dmirror, unsigned long start, { struct mm_struct *mm = dmirror->notifier.mm; unsigned long addr; - unsigned long pfns[64]; + unsigned long pfns[32]; struct hmm_range range = { .notifier = &dmirror->notifier, .hmm_pfns = pfns, @@ -879,8 +879,8 @@ static int dmirror_migrate_to_system(struct dmirror *dmirror, unsigned long size = cmd->npages << PAGE_SHIFT; struct mm_struct *mm = dmirror->notifier.mm; struct vm_area_struct *vma; - unsigned long src_pfns[64] = { 0 }; - unsigned long dst_pfns[64] = { 0 }; + unsigned long src_pfns[32] = { 0 }; + unsigned long dst_pfns[32] = { 0 }; struct migrate_vma args = { 0 }; unsigned long next; int ret; @@ -939,8 +939,8 @@ static int dmirror_migrate_to_device(struct dmirror *dmirror, unsigned long size = cmd->npages << PAGE_SHIFT; struct mm_struct *mm = dmirror->notifier.mm; struct vm_area_struct *vma; - unsigned long src_pfns[64] = { 0 }; - unsigned long dst_pfns[64] = { 0 }; + unsigned long src_pfns[32] = { 0 }; + unsigned long dst_pfns[32] = { 0 }; struct dmirror_bounce bounce; struct migrate_vma args = { 0 }; unsigned long next; @@ -1144,8 +1144,8 @@ static int dmirror_snapshot(struct dmirror *dmirror, unsigned long size = cmd->npages << PAGE_SHIFT; unsigned long addr; unsigned long next; - unsigned long pfns[64]; - unsigned char perm[64]; + unsigned long pfns[32]; + unsigned char perm[32]; char __user *uptr; struct hmm_range range = { .hmm_pfns = pfns, -- cgit v1.2.3 From b0da7709c28c35e0a51d4b1b350c9028358dfb14 Mon Sep 17 00:00:00 2001 From: David Wang <00107082@163.com> Date: Mon, 9 Jun 2025 14:42:00 +0800 Subject: alloc_tag: add sequence number for module and iterator Codetag iterator use pair to guarantee the validness. But both id and address can be reused, there is theoretical possibility when module inserted right after another module removed, kmalloc returns an address same as the address kfree by previous module and IDR key reuses the key recently removed. Add a sequence number to codetag_module and code_iterator, the sequence number is strickly incremented whenever a module is loaded. An iterator is valid if and only if its sequence number match codetag_module's. Link: https://lkml.kernel.org/r/20250609064200.112639-1-00107082@163.com Signed-off-by: David Wang <00107082@163.com> Acked-by: Suren Baghdasaryan Cc: Kent Overstreet Cc: Tim Chen Signed-off-by: Andrew Morton --- lib/codetag.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/codetag.c b/lib/codetag.c index 650d54d7e14d..545911cebd25 100644 --- a/lib/codetag.c +++ b/lib/codetag.c @@ -11,8 +11,14 @@ struct codetag_type { struct list_head link; unsigned int count; struct idr mod_idr; - struct rw_semaphore mod_lock; /* protects mod_idr */ + /* + * protects mod_idr, next_mod_seq, + * iter->mod_seq and cmod->mod_seq + */ + struct rw_semaphore mod_lock; struct codetag_type_desc desc; + /* generates unique sequence number for module load */ + unsigned long next_mod_seq; }; struct codetag_range { @@ -23,6 +29,7 @@ struct codetag_range { struct codetag_module { struct module *mod; struct codetag_range range; + unsigned long mod_seq; }; static DEFINE_MUTEX(codetag_lock); @@ -48,6 +55,7 @@ struct codetag_iterator codetag_get_ct_iter(struct codetag_type *cttype) .cmod = NULL, .mod_id = 0, .ct = NULL, + .mod_seq = 0, }; return iter; @@ -91,11 +99,13 @@ struct codetag *codetag_next_ct(struct codetag_iterator *iter) if (!cmod) break; - if (cmod != iter->cmod) { + if (!iter->cmod || iter->mod_seq != cmod->mod_seq) { iter->cmod = cmod; + iter->mod_seq = cmod->mod_seq; ct = get_first_module_ct(cmod); - } else + } else { ct = get_next_module_ct(iter); + } if (ct) break; @@ -191,6 +201,7 @@ static int codetag_module_init(struct codetag_type *cttype, struct module *mod) cmod->range = range; down_write(&cttype->mod_lock); + cmod->mod_seq = ++cttype->next_mod_seq; mod_id = idr_alloc(&cttype->mod_idr, cmod, 0, 0, GFP_KERNEL); if (mod_id >= 0) { if (cttype->desc.module_load) { -- cgit v1.2.3 From 9f44df50fee4d2f6cb374177244ccfa9f0a5cc95 Mon Sep 17 00:00:00 2001 From: David Wang <00107082@163.com> Date: Mon, 9 Jun 2025 14:44:08 +0800 Subject: alloc_tag: keep codetag iterator active between read() When reading /proc/allocinfo, for each read syscall, seq_file would invoke start/stop callbacks. In start callback, a memory is alloced to store iterator and the iterator would start from beginning to walk linearly to current read position. seq_file read() takes at most 4096 bytes, even if read with a larger user space buffer, meaning read out all of /proc/allocinfo, tens of read syscalls are needed. For example, a 306036 bytes allocinfo files need 76 reads: $ sudo cat /proc/allocinfo | wc 3964 16678 306036 $ sudo strace -T -e read cat /proc/allocinfo ... read(3, " 4096 1 arch/x86/k"..., 131072) = 4063 <0.000062> ... read(3, " 0 0 sound/core"..., 131072) = 4021 <0.000150> ... For those n=3964 lines, each read takes about m=3964/76=52 lines, since iterator restart from beginning for each read(), it would move forward m steps on 1st read 2*m steps on 2nd read 3*m steps on 3rd read ... n steps on last read As read() along, those linear seek steps make read() calls slower and slower. Adding those up, codetag iterator moves about O(n*n/m) steps, making data structure traversal take significant part of the whole reading. Profiling when stress reading /proc/allocinfo confirms it: vfs_read(99.959% 1677299/1677995) proc_reg_read_iter(99.856% 1674881/1677299) seq_read_iter(99.959% 1674191/1674881) allocinfo_start(75.664% 1266755/1674191) codetag_next_ct(79.217% 1003487/1266755) <--- srso_return_thunk(1.264% 16011/1266755) __kmalloc_cache_noprof(0.102% 1296/1266755) ... allocinfo_show(21.287% 356378/1674191) allocinfo_next(1.530% 25621/1674191) codetag_next_ct() takes major part. A private data alloced at open() time can be used to carry iterator alive across read() calls, and avoid the memory allocation and iterator reset for each read(). This way, only O(1) memory allocation and O(n) steps iterating, and `time` shows performance improvement from ~7ms to ~4ms. Profiling with the change: vfs_read(99.865% 1581073/1583214) proc_reg_read_iter(99.485% 1572934/1581073) seq_read_iter(99.846% 1570519/1572934) allocinfo_show(87.428% 1373074/1570519) seq_buf_printf(83.695% 1149196/1373074) seq_buf_putc(1.917% 26321/1373074) _find_next_bit(1.531% 21023/1373074) ... codetag_to_text(0.490% 6727/1373074) ... allocinfo_next(6.275% 98543/1570519) ... allocinfo_start(0.369% 5790/1570519) ... Now seq_buf_printf() takes major part. Link: https://lkml.kernel.org/r/20250609064408.112783-1-00107082@163.com Signed-off-by: David Wang <00107082@163.com> Acked-by: Suren Baghdasaryan Cc: Kent Overstreet Cc: Tim Chen Signed-off-by: Andrew Morton --- lib/alloc_tag.c | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) (limited to 'lib') diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c index 3a74d63a959e..36f07dc95069 100644 --- a/lib/alloc_tag.c +++ b/lib/alloc_tag.c @@ -46,21 +46,16 @@ struct allocinfo_private { static void *allocinfo_start(struct seq_file *m, loff_t *pos) { struct allocinfo_private *priv; - struct codetag *ct; loff_t node = *pos; - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - m->private = priv; - if (!priv) - return NULL; - - priv->print_header = (node == 0); + priv = (struct allocinfo_private *)m->private; codetag_lock_module_list(alloc_tag_cttype, true); - priv->iter = codetag_get_ct_iter(alloc_tag_cttype); - while ((ct = codetag_next_ct(&priv->iter)) != NULL && node) - node--; - - return ct ? priv : NULL; + if (node == 0) { + priv->print_header = true; + priv->iter = codetag_get_ct_iter(alloc_tag_cttype); + codetag_next_ct(&priv->iter); + } + return priv->iter.ct ? priv : NULL; } static void *allocinfo_next(struct seq_file *m, void *arg, loff_t *pos) @@ -77,12 +72,7 @@ static void *allocinfo_next(struct seq_file *m, void *arg, loff_t *pos) static void allocinfo_stop(struct seq_file *m, void *arg) { - struct allocinfo_private *priv = (struct allocinfo_private *)m->private; - - if (priv) { - codetag_lock_module_list(alloc_tag_cttype, false); - kfree(priv); - } + codetag_lock_module_list(alloc_tag_cttype, false); } static void print_allocinfo_header(struct seq_buf *buf) @@ -817,7 +807,8 @@ static int __init alloc_tag_init(void) return 0; } - if (!proc_create_seq(ALLOCINFO_FILE_NAME, 0400, NULL, &allocinfo_seq_op)) { + if (!proc_create_seq_private(ALLOCINFO_FILE_NAME, 0400, NULL, &allocinfo_seq_op, + sizeof(struct allocinfo_private), NULL)) { pr_err("Failed to create %s file\n", ALLOCINFO_FILE_NAME); shutdown_mem_profiling(false); return -ENOMEM; -- cgit v1.2.3 From 59b5ed409d03bc8b7bb153d78afcd7cea9d7bbfa Mon Sep 17 00:00:00 2001 From: Hao Ge Date: Wed, 18 Jun 2025 09:58:09 +0800 Subject: mm/percpu: conditionally define _shared_alloc_tag via CONFIG_ARCH_MODULE_NEEDS_WEAK_PER_CPU Recently discovered this entry while checking kallsyms on ARM64: ffff800083e509c0 D _shared_alloc_tag If ARCH_NEEDS_WEAK_PER_CPU is not defined(it is only defined for s390 and alpha architectures), there's no need to statically define the percpu variable _shared_alloc_tag. Therefore, we need to implement isolation for this purpose. When building the core kernel code for s390 or alpha architectures, ARCH_NEEDS_WEAK_PER_CPU remains undefined (as it is gated by #if defined(MODULE)). However, when building modules for these architectures, the macro is explicitly defined. Therefore, we remove all instances of ARCH_NEEDS_WEAK_PER_CPU from the code and introduced CONFIG_ARCH_MODULE_NEEDS_WEAK_PER_CPU to replace the relevant logic. We can now conditionally define the perpcu variable _shared_alloc_tag based on CONFIG_ARCH_MODULE_NEEDS_WEAK_PER_CPU. This allows architectures (such as s390/alpha) that require weak definitions for percpu variables in modules to include the definition, while others can omit it via compile-time exclusion. Link: https://lkml.kernel.org/r/20250618015809.1235761-1-hao.ge@linux.dev Signed-off-by: Hao Ge Suggested-by: Suren Baghdasaryan Acked-by: Alexander Gordeev [s390] Acked-by: Mike Rapoport (Microsoft) Cc: Chistoph Lameter Cc: Christian Borntraeger Cc: David Hildenbrand Cc: Dennis Zhou Cc: Heiko Carstens Cc: Kent Overstreet Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Matt Turner Cc: Richard Henderson Cc: Sven Schnelle Cc: Tejun Heo Cc: Vasily Gorbik Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/alloc_tag.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c index 36f07dc95069..41ccfb035b7b 100644 --- a/lib/alloc_tag.c +++ b/lib/alloc_tag.c @@ -25,8 +25,10 @@ static bool mem_profiling_support; static struct codetag_type *alloc_tag_cttype; +#ifdef CONFIG_ARCH_MODULE_NEEDS_WEAK_PER_CPU DEFINE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag); EXPORT_SYMBOL(_shared_alloc_tag); +#endif DEFINE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT, mem_alloc_profiling_key); -- cgit v1.2.3 From 592b939b59b43a817ce6d79900793982d452bb5d Mon Sep 17 00:00:00 2001 From: Dev Jain Date: Tue, 24 Jun 2025 13:37:48 +0530 Subject: maple tree: use goto label to simplify code Use the underflow goto label to set the status to ma_underflow and return NULL, as is being done elsewhere. [akpm@linux-foundation.org: add newline, per Liam (and remove one, per akpm)] Link: https://lkml.kernel.org/r/20250624080748.4855-1-dev.jain@arm.com Signed-off-by: Dev Jain Reviewed-by: Liam R. Howlett Reviewed-by: Wei Yang Signed-off-by: Andrew Morton --- lib/maple_tree.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 00524e55a21e..34b84b14985e 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -4560,15 +4560,12 @@ again: if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) goto retry; - if (likely(entry)) return entry; if (!empty) { - if (mas->index <= min) { - mas->status = ma_underflow; - return NULL; - } + if (mas->index <= min) + goto underflow; goto again; } -- cgit v1.2.3 From 2e728505494b21b874fa87fce233c63b43d74434 Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Mon, 23 Jun 2025 20:40:34 +0200 Subject: lib/test_vmalloc.c: use late_initcall() if built-in for init ordering When the vmalloc test code is compiled as a built-in, use late_initcall() instead of module_init() to defer a vmalloc test execution until most subsystems are up and running. It avoids interfering with components that may not yet be initialized at module_init() time. For example, there was a recent report of memory profiling infrastructure not being ready early enough leading to kernel crash. By using late_initcall() in the built-in case, we ensure the tests are run at a safer point during a boot sequence. Link: https://lkml.kernel.org/r/20250623184035.581229-1-urezki@gmail.com Signed-off-by: Uladzislau Rezki (Sony) Reviewed-by: Baoquan He Cc: Harry Yoo Cc: Suren Baghdasaryan Cc: David Wang <00107082@163.com> Signed-off-by: Andrew Morton --- lib/test_vmalloc.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib') diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c index 1b0b59549aaf..7264781750c9 100644 --- a/lib/test_vmalloc.c +++ b/lib/test_vmalloc.c @@ -598,7 +598,11 @@ static int __init vmalloc_test_init(void) return IS_BUILTIN(CONFIG_TEST_VMALLOC) ? 0:-EAGAIN; } +#ifdef MODULE module_init(vmalloc_test_init) +#else +late_initcall(vmalloc_test_init); +#endif MODULE_LICENSE("GPL"); MODULE_AUTHOR("Uladzislau Rezki"); -- cgit v1.2.3 From d8e77a0b636485364d70b86addf0c76bf9bccc4f Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Mon, 23 Jun 2025 20:40:35 +0200 Subject: lib/test_vmalloc.c: restrict default test mask to avoid test warnings When the vmalloc test is built into the kernel, it runs automatically during the boot. The current-default "run_test_mask" includes all test cases, including those which are designed to fail and which trigger kernel warnings. These kernel splats can be misinterpreted as actual kernel bugs, leading to false alarms and unnecessary reports. To address this, limit the default test mask to only the first few tests which are expected to pass cleanly. These tests are safe and should not generate any warnings unless there is a real bug. Users who wish to explicitly run specific test cases have to pass the run_test_mask as a boot parameter or at module load time. Link: https://lkml.kernel.org/r/20250623184035.581229-2-urezki@gmail.com Signed-off-by: Uladzislau Rezki (Sony) Reviewed-by: Baoquan He Cc: Harry Yoo Cc: Suren Baghdasaryan Cc: David Wang <00107082@163.com> Signed-off-by: Andrew Morton --- lib/test_vmalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c index 7264781750c9..c1966cf72ab8 100644 --- a/lib/test_vmalloc.c +++ b/lib/test_vmalloc.c @@ -41,7 +41,7 @@ __param(int, nr_pages, 0, __param(bool, use_huge, false, "Use vmalloc_huge in fix_size_alloc_test"); -__param(int, run_test_mask, INT_MAX, +__param(int, run_test_mask, 7, "Set tests specified in the mask.\n\n" "\t\tid: 1, name: fix_size_alloc_test\n" "\t\tid: 2, name: full_fit_alloc_test\n" -- cgit v1.2.3 From 12f1d7931283106306a8822c5279013b8a0f1242 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Tue, 24 Jun 2025 11:48:22 -0400 Subject: maple_tree: fix status setup on restore to active During the initial call with a maple state, an error status may be set before a valid node is populated into the maple state node. Subsequent calls with the maple state may restore the state into an active state with no node set. This was masked by the mas_walk() always resetting the status to ma_start and result in an extra walk in this rare scenario. Don't restore the state to active unless there is a value in the structs node. This also allows mas_walk() to be fixed to use the active state without exposing an issue. User visible results are marginal performance improvements when an active state can be restored and used instead of rewalking the tree. Stable is not Cc'ed because the existing code is stable and the performance gains are not worth the risk. Link: https://lore.kernel.org/all/20250611011253.19515-1-richard.weiyang@gmail.com/ Link: https://lore.kernel.org/all/20250407231354.11771-1-richard.weiyang@gmail.com/ Link: https://lore.kernel.org/all/202506191556.6bfc7b93-lkp@intel.com/ Link: https://lkml.kernel.org/r/20250624154823.52221-1-Liam.Howlett@oracle.com Fixes: a8091f039c1e ("maple_tree: add MAS_UNDERFLOW and MAS_OVERFLOW states") Signed-off-by: Liam R. Howlett Reported-by: Wei Yang Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202506191556.6bfc7b93-lkp@intel.com Reviewed-by: Wei Yang Signed-off-by: Andrew Morton --- lib/maple_tree.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 34b84b14985e..7601c7c2bc09 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -4927,7 +4927,7 @@ void *mas_walk(struct ma_state *mas) { void *entry; - if (!mas_is_active(mas) || !mas_is_start(mas)) + if (!mas_is_active(mas) && !mas_is_start(mas)) mas->status = ma_start; retry: entry = mas_state_walk(mas); @@ -5655,6 +5655,17 @@ int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries) } EXPORT_SYMBOL_GPL(mas_expected_entries); +static void mas_may_activate(struct ma_state *mas) +{ + if (!mas->node) { + mas->status = ma_start; + } else if (mas->index > mas->max || mas->index < mas->min) { + mas->status = ma_start; + } else { + mas->status = ma_active; + } +} + static bool mas_next_setup(struct ma_state *mas, unsigned long max, void **entry) { @@ -5678,11 +5689,11 @@ static bool mas_next_setup(struct ma_state *mas, unsigned long max, break; case ma_overflow: /* Overflowed before, but the max changed */ - mas->status = ma_active; + mas_may_activate(mas); break; case ma_underflow: /* The user expects the mas to be one before where it is */ - mas->status = ma_active; + mas_may_activate(mas); *entry = mas_walk(mas); if (*entry) return true; @@ -5803,11 +5814,11 @@ static bool mas_prev_setup(struct ma_state *mas, unsigned long min, void **entry break; case ma_underflow: /* underflowed before but the min changed */ - mas->status = ma_active; + mas_may_activate(mas); break; case ma_overflow: /* User expects mas to be one after where it is */ - mas->status = ma_active; + mas_may_activate(mas); *entry = mas_walk(mas); if (*entry) return true; @@ -5972,7 +5983,7 @@ static __always_inline bool mas_find_setup(struct ma_state *mas, unsigned long m return true; } - mas->status = ma_active; + mas_may_activate(mas); *entry = mas_walk(mas); if (*entry) return true; @@ -5981,7 +5992,7 @@ static __always_inline bool mas_find_setup(struct ma_state *mas, unsigned long m if (unlikely(mas->last >= max)) return true; - mas->status = ma_active; + mas_may_activate(mas); *entry = mas_walk(mas); if (*entry) return true; -- cgit v1.2.3 From 1f0bce2fa8c6bfd65cf78ad6ef6e0948fc55c7bb Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Tue, 24 Jun 2025 11:48:23 -0400 Subject: maple_tree: add testing for restoring maple state to active Restoring maple status to ma_active on overflow/underflow when mas->node was NULL could have happened in the past, but was masked by a bug in mas_walk(). Add test cases that triggered the bug when the node was mas->node prior to fixing the maple state setup. Add a few extra tests around restoring the active maple status. Link: https://lore.kernel.org/all/202506191556.6bfc7b93-lkp@intel.com/ Link: https://lkml.kernel.org/r/20250624154823.52221-2-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Reviewed-by: Wei Yang Signed-off-by: Andrew Morton --- lib/test_maple_tree.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'lib') diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c index 13e2a10d7554..cb3936595b0d 100644 --- a/lib/test_maple_tree.c +++ b/lib/test_maple_tree.c @@ -3177,6 +3177,7 @@ static noinline void __init check_state_handling(struct maple_tree *mt) void *entry, *ptr = (void *) 0x1234500; void *ptr2 = &ptr; void *ptr3 = &ptr2; + unsigned long index; /* Check MAS_ROOT First */ mtree_store_range(mt, 0, 0, ptr, GFP_KERNEL); @@ -3706,6 +3707,37 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, mas.last != 0x1fff); MT_BUG_ON(mt, !mas_is_active(&mas)); + mas_unlock(&mas); + mtree_destroy(mt); + + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + mas_lock(&mas); + for (int count = 0; count < 30; count++) { + mas_set(&mas, count); + mas_store_gfp(&mas, xa_mk_value(count), GFP_KERNEL); + } + + /* Ensure mas_find works with MA_UNDERFLOW */ + mas_set(&mas, 0); + entry = mas_walk(&mas); + mas_set(&mas, 0); + mas_prev(&mas, 0); + MT_BUG_ON(mt, mas.status != ma_underflow); + MT_BUG_ON(mt, mas_find(&mas, ULONG_MAX) != entry); + + /* Restore active on mas_next */ + entry = mas_next(&mas, ULONG_MAX); + index = mas.index; + mas_prev(&mas, index); + MT_BUG_ON(mt, mas.status != ma_underflow); + MT_BUG_ON(mt, mas_next(&mas, ULONG_MAX) != entry); + + /* Ensure overflow -> active works */ + mas_prev(&mas, 0); + mas_next(&mas, index - 1); + MT_BUG_ON(mt, mas.status != ma_overflow); + MT_BUG_ON(mt, mas_next(&mas, ULONG_MAX) != entry); + mas_unlock(&mas); } -- cgit v1.2.3 From 526f36f3f47b9ad29ffb1bf668b7f295287ee11b Mon Sep 17 00:00:00 2001 From: Dev Jain Date: Thu, 3 Jul 2025 12:03:38 +0530 Subject: maple tree: add some comments Add comments explaining the fields for maple_metadata, since "end" is ambiguous and "gap" can be confused as the largest gap, whereas it is actually the offset of the largest gap. Add comment for mas_ascend() to explain, whose min and max we are trying to find. Explain that, for example, if we are already on offset zero, then the parent min is mas->min, otherwise we need to walk up to find the implied pivot min. Link: https://lkml.kernel.org/r/20250703063338.51509-1-dev.jain@arm.com Signed-off-by: Dev Jain Reviewed-by: Liam R. Howlett Cc: Wei Yang Signed-off-by: Andrew Morton --- lib/maple_tree.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 0e85e92c5375..b4ee2d29d7a9 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -1053,7 +1053,7 @@ static inline void mte_set_gap(const struct maple_enode *mn, * mas_ascend() - Walk up a level of the tree. * @mas: The maple state * - * Sets the @mas->max and @mas->min to the correct values when walking up. This + * Sets the @mas->max and @mas->min for the parent node of mas->node. This * may cause several levels of walking up to find the correct min and max. * May find a dead node which will cause a premature return. * Return: 1 on dead node, 0 otherwise @@ -1098,6 +1098,12 @@ static int mas_ascend(struct ma_state *mas) min = 0; max = ULONG_MAX; + + /* + * !mas->offset implies that parent node min == mas->min. + * mas->offset > 0 implies that we need to walk up to find the + * implied pivot min. + */ if (!mas->offset) { min = mas->min; set_min = true; -- cgit v1.2.3 From ee58e38489772f356c1ac79e0724183497e43249 Mon Sep 17 00:00:00 2001 From: Raghavendra K T Date: Wed, 2 Jul 2025 06:43:19 +0000 Subject: lib/test_vmalloc.c: introduce xfail for failing tests The test align_shift_alloc_test is expected to fail. Reporting the test as fail confuses to be a genuine failure. Introduce widely used xfail sematics to address the issue. Note: a warn_alloc dump similar to below is still expected: Call Trace: dump_stack_lvl+0x64/0x80 warn_alloc+0x137/0x1b0 ? __get_vm_area_node+0x134/0x140 Snippet of dmesg after change: Summary: random_size_align_alloc_test passed: 1 failed: 0 xfailed: 0 .. Summary: align_shift_alloc_test passed: 0 failed: 0 xfailed: 1 .. Summary: pcpu_alloc_test passed: 1 failed: 0 xfailed: 0 .. Link: https://lkml.kernel.org/r/20250702064319.885-1-raghavendra.kt@amd.com Signed-off-by: Raghavendra K T Reviewed-by: "Uladzislau Rezki (Sony)" Cc: Dev Jain Signed-off-by: Andrew Morton --- lib/test_vmalloc.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c index c1966cf72ab8..2815658ccc37 100644 --- a/lib/test_vmalloc.c +++ b/lib/test_vmalloc.c @@ -396,25 +396,27 @@ cleanup: struct test_case_desc { const char *test_name; int (*test_func)(void); + bool xfail; }; static struct test_case_desc test_case_array[] = { - { "fix_size_alloc_test", fix_size_alloc_test }, - { "full_fit_alloc_test", full_fit_alloc_test }, - { "long_busy_list_alloc_test", long_busy_list_alloc_test }, - { "random_size_alloc_test", random_size_alloc_test }, - { "fix_align_alloc_test", fix_align_alloc_test }, - { "random_size_align_alloc_test", random_size_align_alloc_test }, - { "align_shift_alloc_test", align_shift_alloc_test }, - { "pcpu_alloc_test", pcpu_alloc_test }, - { "kvfree_rcu_1_arg_vmalloc_test", kvfree_rcu_1_arg_vmalloc_test }, - { "kvfree_rcu_2_arg_vmalloc_test", kvfree_rcu_2_arg_vmalloc_test }, - { "vm_map_ram_test", vm_map_ram_test }, + { "fix_size_alloc_test", fix_size_alloc_test, }, + { "full_fit_alloc_test", full_fit_alloc_test, }, + { "long_busy_list_alloc_test", long_busy_list_alloc_test, }, + { "random_size_alloc_test", random_size_alloc_test, }, + { "fix_align_alloc_test", fix_align_alloc_test, }, + { "random_size_align_alloc_test", random_size_align_alloc_test, }, + { "align_shift_alloc_test", align_shift_alloc_test, true }, + { "pcpu_alloc_test", pcpu_alloc_test, }, + { "kvfree_rcu_1_arg_vmalloc_test", kvfree_rcu_1_arg_vmalloc_test, }, + { "kvfree_rcu_2_arg_vmalloc_test", kvfree_rcu_2_arg_vmalloc_test, }, + { "vm_map_ram_test", vm_map_ram_test, }, /* Add a new test case here. */ }; struct test_case_data { int test_failed; + int test_xfailed; int test_passed; u64 time; }; @@ -444,7 +446,7 @@ static int test_func(void *private) { struct test_driver *t = private; int random_array[ARRAY_SIZE(test_case_array)]; - int index, i, j; + int index, i, j, ret; ktime_t kt; u64 delta; @@ -468,11 +470,14 @@ static int test_func(void *private) */ if (!((run_test_mask & (1 << index)) >> index)) continue; - kt = ktime_get(); for (j = 0; j < test_repeat_count; j++) { - if (!test_case_array[index].test_func()) + ret = test_case_array[index].test_func(); + + if (!ret && !test_case_array[index].xfail) t->data[index].test_passed++; + else if (ret && test_case_array[index].xfail) + t->data[index].test_xfailed++; else t->data[index].test_failed++; } @@ -576,10 +581,11 @@ static void do_concurrent_test(void) continue; pr_info( - "Summary: %s passed: %d failed: %d repeat: %d loops: %d avg: %llu usec\n", + "Summary: %s passed: %d failed: %d xfailed: %d repeat: %d loops: %d avg: %llu usec\n", test_case_array[j].test_name, t->data[j].test_passed, t->data[j].test_failed, + t->data[j].test_xfailed, test_repeat_count, test_loop_count, t->data[j].time); } -- cgit v1.2.3