From 1e6b17b4237dacb02e9cfeaed35d889bbc9e8a84 Mon Sep 17 00:00:00 2001
From: Dev Jain <dev.jain@arm.com>
Date: Wed, 4 Jun 2025 09:45:33 +0530
Subject: xarray: add a BUG_ON() to ensure caller is not sibling

Suppose xas is pointing somewhere near the end of the multi-entry batch.
Then it may happen that the computed slot already falls beyond the batch,
thus breaking the loop due to !xa_is_sibling(), and computing the wrong
order.

For example, suppose we have a shift-6 node having an order-9 entry => 8 -
1 = 7 siblings, so assume the slots are at offset 0 till 7 in this node.
If xas->xa_offset is 6, then the code will compute order as 1 +
xas->xa_node->shift = 7.  Therefore, the order computation must start from
the beginning of the multi-slot entries, that is, the non-sibling entry.

Thus ensure that the caller is aware of this by triggering a BUG when the
entry is a sibling entry.  Note that this BUG_ON() is only active while
running selftests, so there is no overhead in a running kernel.

Link: https://lkml.kernel.org/r/20250604041533.91198-1-dev.jain@arm.com
Signed-off-by: Dev Jain <dev.jain@arm.com>
Acked-by: Zi Yan <ziy@nvidia.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@kernel.org>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/xarray.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'lib')

diff --git a/lib/xarray.c b/lib/xarray.c
index 76dde3a1cacf..ae3d80f4b4ee 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -1910,6 +1910,7 @@ EXPORT_SYMBOL(xa_store_range);
  * @xas: XArray operation state.
  *
  * Called after xas_load, the xas should not be in an error state.
+ * The xas should not be pointing to a sibling entry.
  *
  * Return: A number between 0 and 63 indicating the order of the entry.
  */
@@ -1920,6 +1921,8 @@ int xas_get_order(struct xa_state *xas)
 	if (!xas->xa_node)
 		return 0;
 
+	XA_NODE_BUG_ON(xas->xa_node, xa_is_sibling(xa_entry(xas->xa,
+		       xas->xa_node, xas->xa_offset)));
 	for (;;) {
 		unsigned int slot = xas->xa_offset + (1 << order);
 
-- 
cgit v1.2.3


From 6046a3bed1c2b028e692f7606e3450d1c93e8fdd Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 10 Jun 2025 11:21:50 +0200
Subject: lib/test_hmm: reduce stack usage

The various test ioctl handlers use arrays of 64 integers that add up to
1KiB of stack data, which in turn leads to exceeding the warning limit in
some configurations:

lib/test_hmm.c:935:12: error: stack frame size (1408) exceeds limit (1280)
in 'dmirror_migrate_to_device' [-Werror,-Wframe-larger-than]

Use half the size for these arrays, in order to stay under the warning
limits.  The code can already deal with arbitrary lengths, but this may be
a little less efficient.

Link: https://lkml.kernel.org/r/20250610092159.2639515-1-arnd@kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Jeff Johnson <jeff.johnson@oss.qualcomm.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Thorsten Blum <thorsten.blum@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/test_hmm.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'lib')

diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index 5b144bc5c4ec..761725bc713c 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -330,7 +330,7 @@ static int dmirror_fault(struct dmirror *dmirror, unsigned long start,
 {
 	struct mm_struct *mm = dmirror->notifier.mm;
 	unsigned long addr;
-	unsigned long pfns[64];
+	unsigned long pfns[32];
 	struct hmm_range range = {
 		.notifier = &dmirror->notifier,
 		.hmm_pfns = pfns,
@@ -879,8 +879,8 @@ static int dmirror_migrate_to_system(struct dmirror *dmirror,
 	unsigned long size = cmd->npages << PAGE_SHIFT;
 	struct mm_struct *mm = dmirror->notifier.mm;
 	struct vm_area_struct *vma;
-	unsigned long src_pfns[64] = { 0 };
-	unsigned long dst_pfns[64] = { 0 };
+	unsigned long src_pfns[32] = { 0 };
+	unsigned long dst_pfns[32] = { 0 };
 	struct migrate_vma args = { 0 };
 	unsigned long next;
 	int ret;
@@ -939,8 +939,8 @@ static int dmirror_migrate_to_device(struct dmirror *dmirror,
 	unsigned long size = cmd->npages << PAGE_SHIFT;
 	struct mm_struct *mm = dmirror->notifier.mm;
 	struct vm_area_struct *vma;
-	unsigned long src_pfns[64] = { 0 };
-	unsigned long dst_pfns[64] = { 0 };
+	unsigned long src_pfns[32] = { 0 };
+	unsigned long dst_pfns[32] = { 0 };
 	struct dmirror_bounce bounce;
 	struct migrate_vma args = { 0 };
 	unsigned long next;
@@ -1144,8 +1144,8 @@ static int dmirror_snapshot(struct dmirror *dmirror,
 	unsigned long size = cmd->npages << PAGE_SHIFT;
 	unsigned long addr;
 	unsigned long next;
-	unsigned long pfns[64];
-	unsigned char perm[64];
+	unsigned long pfns[32];
+	unsigned char perm[32];
 	char __user *uptr;
 	struct hmm_range range = {
 		.hmm_pfns = pfns,
-- 
cgit v1.2.3


From b0da7709c28c35e0a51d4b1b350c9028358dfb14 Mon Sep 17 00:00:00 2001
From: David Wang <00107082@163.com>
Date: Mon, 9 Jun 2025 14:42:00 +0800
Subject: alloc_tag: add sequence number for module and iterator

Codetag iterator use <id,address> pair to guarantee the validness.  But
both id and address can be reused, there is theoretical possibility when
module inserted right after another module removed, kmalloc returns an
address same as the address kfree by previous module and IDR key reuses
the key recently removed.

Add a sequence number to codetag_module and code_iterator, the sequence
number is strickly incremented whenever a module is loaded.  An iterator
is valid if and only if its sequence number match codetag_module's.

Link: https://lkml.kernel.org/r/20250609064200.112639-1-00107082@163.com
Signed-off-by: David Wang <00107082@163.com>
Acked-by: Suren Baghdasaryan <surenb@google.com>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/codetag.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

(limited to 'lib')

diff --git a/lib/codetag.c b/lib/codetag.c
index 650d54d7e14d..545911cebd25 100644
--- a/lib/codetag.c
+++ b/lib/codetag.c
@@ -11,8 +11,14 @@ struct codetag_type {
 	struct list_head link;
 	unsigned int count;
 	struct idr mod_idr;
-	struct rw_semaphore mod_lock; /* protects mod_idr */
+	/*
+	 * protects mod_idr, next_mod_seq,
+	 * iter->mod_seq and cmod->mod_seq
+	 */
+	struct rw_semaphore mod_lock;
 	struct codetag_type_desc desc;
+	/* generates unique sequence number for module load */
+	unsigned long next_mod_seq;
 };
 
 struct codetag_range {
@@ -23,6 +29,7 @@ struct codetag_range {
 struct codetag_module {
 	struct module *mod;
 	struct codetag_range range;
+	unsigned long mod_seq;
 };
 
 static DEFINE_MUTEX(codetag_lock);
@@ -48,6 +55,7 @@ struct codetag_iterator codetag_get_ct_iter(struct codetag_type *cttype)
 		.cmod = NULL,
 		.mod_id = 0,
 		.ct = NULL,
+		.mod_seq = 0,
 	};
 
 	return iter;
@@ -91,11 +99,13 @@ struct codetag *codetag_next_ct(struct codetag_iterator *iter)
 		if (!cmod)
 			break;
 
-		if (cmod != iter->cmod) {
+		if (!iter->cmod || iter->mod_seq != cmod->mod_seq) {
 			iter->cmod = cmod;
+			iter->mod_seq = cmod->mod_seq;
 			ct = get_first_module_ct(cmod);
-		} else
+		} else {
 			ct = get_next_module_ct(iter);
+		}
 
 		if (ct)
 			break;
@@ -191,6 +201,7 @@ static int codetag_module_init(struct codetag_type *cttype, struct module *mod)
 	cmod->range = range;
 
 	down_write(&cttype->mod_lock);
+	cmod->mod_seq = ++cttype->next_mod_seq;
 	mod_id = idr_alloc(&cttype->mod_idr, cmod, 0, 0, GFP_KERNEL);
 	if (mod_id >= 0) {
 		if (cttype->desc.module_load) {
-- 
cgit v1.2.3


From 9f44df50fee4d2f6cb374177244ccfa9f0a5cc95 Mon Sep 17 00:00:00 2001
From: David Wang <00107082@163.com>
Date: Mon, 9 Jun 2025 14:44:08 +0800
Subject: alloc_tag: keep codetag iterator active between read()

When reading /proc/allocinfo, for each read syscall, seq_file would invoke
start/stop callbacks.  In start callback, a memory is alloced to store
iterator and the iterator would start from beginning to walk linearly to
current read position.

seq_file read() takes at most 4096 bytes, even if read with a larger user
space buffer, meaning read out all of /proc/allocinfo, tens of read
syscalls are needed.  For example, a 306036 bytes allocinfo files need 76
reads:

 $ sudo cat /proc/allocinfo  | wc
    3964   16678  306036
 $ sudo strace -T -e read cat /proc/allocinfo
 ...
 read(3, "        4096        1 arch/x86/k"..., 131072) = 4063 <0.000062>
 ...
 read(3, "           0        0 sound/core"..., 131072) = 4021 <0.000150>
 ...
For those n=3964 lines, each read takes about m=3964/76=52 lines,
since iterator restart from beginning for each read(),
it would move forward
   m  steps on 1st read
 2*m  steps on 2nd read
 3*m  steps on 3rd read
 ...
   n  steps on last read
As read() along, those linear seek steps make read() calls slower and
slower.  Adding those up, codetag iterator moves about O(n*n/m) steps,
making data structure traversal take significant part of the whole
reading.  Profiling when stress reading /proc/allocinfo confirms it:

 vfs_read(99.959% 1677299/1677995)
     proc_reg_read_iter(99.856% 1674881/1677299)
         seq_read_iter(99.959% 1674191/1674881)
             allocinfo_start(75.664% 1266755/1674191)
                 codetag_next_ct(79.217% 1003487/1266755)  <---
                 srso_return_thunk(1.264% 16011/1266755)
                 __kmalloc_cache_noprof(0.102% 1296/1266755)
                 ...
             allocinfo_show(21.287% 356378/1674191)
             allocinfo_next(1.530% 25621/1674191)
codetag_next_ct() takes major part.

A private data alloced at open() time can be used to carry iterator alive
across read() calls, and avoid the memory allocation and iterator reset
for each read().  This way, only O(1) memory allocation and O(n) steps
iterating, and `time` shows performance improvement from ~7ms to ~4ms.
Profiling with the change:

 vfs_read(99.865% 1581073/1583214)
     proc_reg_read_iter(99.485% 1572934/1581073)
         seq_read_iter(99.846% 1570519/1572934)
             allocinfo_show(87.428% 1373074/1570519)
                 seq_buf_printf(83.695% 1149196/1373074)
                 seq_buf_putc(1.917% 26321/1373074)
                 _find_next_bit(1.531% 21023/1373074)
                 ...
                 codetag_to_text(0.490% 6727/1373074)
                 ...
             allocinfo_next(6.275% 98543/1570519)
             ...
             allocinfo_start(0.369% 5790/1570519)
             ...
Now seq_buf_printf() takes major part.

Link: https://lkml.kernel.org/r/20250609064408.112783-1-00107082@163.com
Signed-off-by: David Wang <00107082@163.com>
Acked-by: Suren Baghdasaryan <surenb@google.com>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/alloc_tag.c | 29 ++++++++++-------------------
 1 file changed, 10 insertions(+), 19 deletions(-)

(limited to 'lib')

diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
index 3a74d63a959e..36f07dc95069 100644
--- a/lib/alloc_tag.c
+++ b/lib/alloc_tag.c
@@ -46,21 +46,16 @@ struct allocinfo_private {
 static void *allocinfo_start(struct seq_file *m, loff_t *pos)
 {
 	struct allocinfo_private *priv;
-	struct codetag *ct;
 	loff_t node = *pos;
 
-	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
-	m->private = priv;
-	if (!priv)
-		return NULL;
-
-	priv->print_header = (node == 0);
+	priv = (struct allocinfo_private *)m->private;
 	codetag_lock_module_list(alloc_tag_cttype, true);
-	priv->iter = codetag_get_ct_iter(alloc_tag_cttype);
-	while ((ct = codetag_next_ct(&priv->iter)) != NULL && node)
-		node--;
-
-	return ct ? priv : NULL;
+	if (node == 0) {
+		priv->print_header = true;
+		priv->iter = codetag_get_ct_iter(alloc_tag_cttype);
+		codetag_next_ct(&priv->iter);
+	}
+	return priv->iter.ct ? priv : NULL;
 }
 
 static void *allocinfo_next(struct seq_file *m, void *arg, loff_t *pos)
@@ -77,12 +72,7 @@ static void *allocinfo_next(struct seq_file *m, void *arg, loff_t *pos)
 
 static void allocinfo_stop(struct seq_file *m, void *arg)
 {
-	struct allocinfo_private *priv = (struct allocinfo_private *)m->private;
-
-	if (priv) {
-		codetag_lock_module_list(alloc_tag_cttype, false);
-		kfree(priv);
-	}
+	codetag_lock_module_list(alloc_tag_cttype, false);
 }
 
 static void print_allocinfo_header(struct seq_buf *buf)
@@ -817,7 +807,8 @@ static int __init alloc_tag_init(void)
 		return 0;
 	}
 
-	if (!proc_create_seq(ALLOCINFO_FILE_NAME, 0400, NULL, &allocinfo_seq_op)) {
+	if (!proc_create_seq_private(ALLOCINFO_FILE_NAME, 0400, NULL, &allocinfo_seq_op,
+				     sizeof(struct allocinfo_private), NULL)) {
 		pr_err("Failed to create %s file\n", ALLOCINFO_FILE_NAME);
 		shutdown_mem_profiling(false);
 		return -ENOMEM;
-- 
cgit v1.2.3


From 59b5ed409d03bc8b7bb153d78afcd7cea9d7bbfa Mon Sep 17 00:00:00 2001
From: Hao Ge <gehao@kylinos.cn>
Date: Wed, 18 Jun 2025 09:58:09 +0800
Subject: mm/percpu: conditionally define _shared_alloc_tag via
 CONFIG_ARCH_MODULE_NEEDS_WEAK_PER_CPU

Recently discovered this entry while checking kallsyms on ARM64:
ffff800083e509c0 D _shared_alloc_tag

If ARCH_NEEDS_WEAK_PER_CPU is not defined(it is only defined for s390 and
alpha architectures), there's no need to statically define the percpu
variable _shared_alloc_tag.

Therefore, we need to implement isolation for this purpose.

When building the core kernel code for s390 or alpha architectures,
ARCH_NEEDS_WEAK_PER_CPU remains undefined (as it is gated by #if
defined(MODULE)).  However, when building modules for these architectures,
the macro is explicitly defined.

Therefore, we remove all instances of ARCH_NEEDS_WEAK_PER_CPU from the
code and introduced CONFIG_ARCH_MODULE_NEEDS_WEAK_PER_CPU to replace the
relevant logic.  We can now conditionally define the perpcu variable
_shared_alloc_tag based on CONFIG_ARCH_MODULE_NEEDS_WEAK_PER_CPU.  This
allows architectures (such as s390/alpha) that require weak definitions
for percpu variables in modules to include the definition, while others
can omit it via compile-time exclusion.

Link: https://lkml.kernel.org/r/20250618015809.1235761-1-hao.ge@linux.dev
Signed-off-by: Hao Ge <gehao@kylinos.cn>
Suggested-by: Suren Baghdasaryan <surenb@google.com>
Acked-by: Alexander Gordeev <agordeev@linux.ibm.com>	[s390]
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Chistoph Lameter <cl@linux.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Richard Henderson <richard.henderson@linaro.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/alloc_tag.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'lib')

diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
index 36f07dc95069..41ccfb035b7b 100644
--- a/lib/alloc_tag.c
+++ b/lib/alloc_tag.c
@@ -25,8 +25,10 @@ static bool mem_profiling_support;
 
 static struct codetag_type *alloc_tag_cttype;
 
+#ifdef CONFIG_ARCH_MODULE_NEEDS_WEAK_PER_CPU
 DEFINE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag);
 EXPORT_SYMBOL(_shared_alloc_tag);
+#endif
 
 DEFINE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT,
 			mem_alloc_profiling_key);
-- 
cgit v1.2.3


From 592b939b59b43a817ce6d79900793982d452bb5d Mon Sep 17 00:00:00 2001
From: Dev Jain <dev.jain@arm.com>
Date: Tue, 24 Jun 2025 13:37:48 +0530
Subject: maple tree: use goto label to simplify code

Use the underflow goto label to set the status to ma_underflow and return
NULL, as is being done elsewhere.

[akpm@linux-foundation.org: add newline, per Liam (and remove one, per akpm)]
Link: https://lkml.kernel.org/r/20250624080748.4855-1-dev.jain@arm.com
Signed-off-by: Dev Jain <dev.jain@arm.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Reviewed-by: Wei Yang <richard.weiyang@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/maple_tree.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'lib')

diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index 00524e55a21e..34b84b14985e 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -4560,15 +4560,12 @@ again:
 	if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
 		goto retry;
 
-
 	if (likely(entry))
 		return entry;
 
 	if (!empty) {
-		if (mas->index <= min) {
-			mas->status = ma_underflow;
-			return NULL;
-		}
+		if (mas->index <= min)
+			goto underflow;
 
 		goto again;
 	}
-- 
cgit v1.2.3


From 2e728505494b21b874fa87fce233c63b43d74434 Mon Sep 17 00:00:00 2001
From: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
Date: Mon, 23 Jun 2025 20:40:34 +0200
Subject: lib/test_vmalloc.c: use late_initcall() if built-in for init ordering

When the vmalloc test code is compiled as a built-in, use late_initcall()
instead of module_init() to defer a vmalloc test execution until most
subsystems are up and running.

It avoids interfering with components that may not yet be initialized at
module_init() time.  For example, there was a recent report of memory
profiling infrastructure not being ready early enough leading to kernel
crash.

By using late_initcall() in the built-in case, we ensure the tests are run
at a safer point during a boot sequence.

Link: https://lkml.kernel.org/r/20250623184035.581229-1-urezki@gmail.com
Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Reviewed-by: Baoquan He <bhe@redhat.com>
Cc: Harry Yoo <harry.yoo@oracle.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: David Wang <00107082@163.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/test_vmalloc.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'lib')

diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c
index 1b0b59549aaf..7264781750c9 100644
--- a/lib/test_vmalloc.c
+++ b/lib/test_vmalloc.c
@@ -598,7 +598,11 @@ static int __init vmalloc_test_init(void)
 	return IS_BUILTIN(CONFIG_TEST_VMALLOC) ? 0:-EAGAIN;
 }
 
+#ifdef MODULE
 module_init(vmalloc_test_init)
+#else
+late_initcall(vmalloc_test_init);
+#endif
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Uladzislau Rezki");
-- 
cgit v1.2.3


From d8e77a0b636485364d70b86addf0c76bf9bccc4f Mon Sep 17 00:00:00 2001
From: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
Date: Mon, 23 Jun 2025 20:40:35 +0200
Subject: lib/test_vmalloc.c: restrict default test mask to avoid test warnings

When the vmalloc test is built into the kernel, it runs automatically
during the boot.  The current-default "run_test_mask" includes all test
cases, including those which are designed to fail and which trigger kernel
warnings.

These kernel splats can be misinterpreted as actual kernel bugs, leading
to false alarms and unnecessary reports.

To address this, limit the default test mask to only the first few tests
which are expected to pass cleanly.  These tests are safe and should not
generate any warnings unless there is a real bug.

Users who wish to explicitly run specific test cases have to pass the
run_test_mask as a boot parameter or at module load time.

Link: https://lkml.kernel.org/r/20250623184035.581229-2-urezki@gmail.com
Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Reviewed-by: Baoquan He <bhe@redhat.com>
Cc: Harry Yoo <harry.yoo@oracle.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: David Wang <00107082@163.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/test_vmalloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c
index 7264781750c9..c1966cf72ab8 100644
--- a/lib/test_vmalloc.c
+++ b/lib/test_vmalloc.c
@@ -41,7 +41,7 @@ __param(int, nr_pages, 0,
 __param(bool, use_huge, false,
 	"Use vmalloc_huge in fix_size_alloc_test");
 
-__param(int, run_test_mask, INT_MAX,
+__param(int, run_test_mask, 7,
 	"Set tests specified in the mask.\n\n"
 		"\t\tid: 1,    name: fix_size_alloc_test\n"
 		"\t\tid: 2,    name: full_fit_alloc_test\n"
-- 
cgit v1.2.3


From 12f1d7931283106306a8822c5279013b8a0f1242 Mon Sep 17 00:00:00 2001
From: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Date: Tue, 24 Jun 2025 11:48:22 -0400
Subject: maple_tree: fix status setup on restore to active

During the initial call with a maple state, an error status may be set
before a valid node is populated into the maple state node.  Subsequent
calls with the maple state may restore the state into an active state with
no node set.  This was masked by the mas_walk() always resetting the
status to ma_start and result in an extra walk in this rare scenario.

Don't restore the state to active unless there is a value in the structs
node.  This also allows mas_walk() to be fixed to use the active state
without exposing an issue.

User visible results are marginal performance improvements when an active
state can be restored and used instead of rewalking the tree.

Stable is not Cc'ed because the existing code is stable and the
performance gains are not worth the risk.

Link: https://lore.kernel.org/all/20250611011253.19515-1-richard.weiyang@gmail.com/
Link: https://lore.kernel.org/all/20250407231354.11771-1-richard.weiyang@gmail.com/
Link: https://lore.kernel.org/all/202506191556.6bfc7b93-lkp@intel.com/
Link: https://lkml.kernel.org/r/20250624154823.52221-1-Liam.Howlett@oracle.com
Fixes: a8091f039c1e ("maple_tree: add MAS_UNDERFLOW and MAS_OVERFLOW states")
Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Reported-by: Wei Yang <richard.weiyang@gmail.com>
Reported-by: kernel test robot <oliver.sang@intel.com>
Closes: https://lore.kernel.org/oe-lkp/202506191556.6bfc7b93-lkp@intel.com
Reviewed-by: Wei Yang <richard.weiyang@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/maple_tree.c | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

(limited to 'lib')

diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index 34b84b14985e..7601c7c2bc09 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -4927,7 +4927,7 @@ void *mas_walk(struct ma_state *mas)
 {
 	void *entry;
 
-	if (!mas_is_active(mas) || !mas_is_start(mas))
+	if (!mas_is_active(mas) && !mas_is_start(mas))
 		mas->status = ma_start;
 retry:
 	entry = mas_state_walk(mas);
@@ -5655,6 +5655,17 @@ int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries)
 }
 EXPORT_SYMBOL_GPL(mas_expected_entries);
 
+static void mas_may_activate(struct ma_state *mas)
+{
+	if (!mas->node) {
+		mas->status = ma_start;
+	} else if (mas->index > mas->max || mas->index < mas->min) {
+		mas->status = ma_start;
+	} else {
+		mas->status = ma_active;
+	}
+}
+
 static bool mas_next_setup(struct ma_state *mas, unsigned long max,
 		void **entry)
 {
@@ -5678,11 +5689,11 @@ static bool mas_next_setup(struct ma_state *mas, unsigned long max,
 		break;
 	case ma_overflow:
 		/* Overflowed before, but the max changed */
-		mas->status = ma_active;
+		mas_may_activate(mas);
 		break;
 	case ma_underflow:
 		/* The user expects the mas to be one before where it is */
-		mas->status = ma_active;
+		mas_may_activate(mas);
 		*entry = mas_walk(mas);
 		if (*entry)
 			return true;
@@ -5803,11 +5814,11 @@ static bool mas_prev_setup(struct ma_state *mas, unsigned long min, void **entry
 		break;
 	case ma_underflow:
 		/* underflowed before but the min changed */
-		mas->status = ma_active;
+		mas_may_activate(mas);
 		break;
 	case ma_overflow:
 		/* User expects mas to be one after where it is */
-		mas->status = ma_active;
+		mas_may_activate(mas);
 		*entry = mas_walk(mas);
 		if (*entry)
 			return true;
@@ -5972,7 +5983,7 @@ static __always_inline bool mas_find_setup(struct ma_state *mas, unsigned long m
 			return true;
 		}
 
-		mas->status = ma_active;
+		mas_may_activate(mas);
 		*entry = mas_walk(mas);
 		if (*entry)
 			return true;
@@ -5981,7 +5992,7 @@ static __always_inline bool mas_find_setup(struct ma_state *mas, unsigned long m
 		if (unlikely(mas->last >= max))
 			return true;
 
-		mas->status = ma_active;
+		mas_may_activate(mas);
 		*entry = mas_walk(mas);
 		if (*entry)
 			return true;
-- 
cgit v1.2.3


From 1f0bce2fa8c6bfd65cf78ad6ef6e0948fc55c7bb Mon Sep 17 00:00:00 2001
From: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Date: Tue, 24 Jun 2025 11:48:23 -0400
Subject: maple_tree: add testing for restoring maple state to active

Restoring maple status to ma_active on overflow/underflow when mas->node
was NULL could have happened in the past, but was masked by a bug in
mas_walk().  Add test cases that triggered the bug when the node was
mas->node prior to fixing the maple state setup.

Add a few extra tests around restoring the active maple status.

Link: https://lore.kernel.org/all/202506191556.6bfc7b93-lkp@intel.com/
Link: https://lkml.kernel.org/r/20250624154823.52221-2-Liam.Howlett@oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Reviewed-by: Wei Yang <richard.weiyang@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/test_maple_tree.c | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'lib')

diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c
index 13e2a10d7554..cb3936595b0d 100644
--- a/lib/test_maple_tree.c
+++ b/lib/test_maple_tree.c
@@ -3177,6 +3177,7 @@ static noinline void __init check_state_handling(struct maple_tree *mt)
 	void *entry, *ptr = (void *) 0x1234500;
 	void *ptr2 = &ptr;
 	void *ptr3 = &ptr2;
+	unsigned long index;
 
 	/* Check MAS_ROOT First */
 	mtree_store_range(mt, 0, 0, ptr, GFP_KERNEL);
@@ -3706,6 +3707,37 @@ static noinline void __init check_state_handling(struct maple_tree *mt)
 	MT_BUG_ON(mt, mas.last != 0x1fff);
 	MT_BUG_ON(mt, !mas_is_active(&mas));
 
+	mas_unlock(&mas);
+	mtree_destroy(mt);
+
+	mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+	mas_lock(&mas);
+	for (int count = 0; count < 30; count++) {
+		mas_set(&mas, count);
+		mas_store_gfp(&mas, xa_mk_value(count), GFP_KERNEL);
+	}
+
+	/* Ensure mas_find works with MA_UNDERFLOW */
+	mas_set(&mas, 0);
+	entry = mas_walk(&mas);
+	mas_set(&mas, 0);
+	mas_prev(&mas, 0);
+	MT_BUG_ON(mt, mas.status != ma_underflow);
+	MT_BUG_ON(mt, mas_find(&mas, ULONG_MAX) != entry);
+
+	/* Restore active on mas_next */
+	entry = mas_next(&mas, ULONG_MAX);
+	index = mas.index;
+	mas_prev(&mas, index);
+	MT_BUG_ON(mt, mas.status != ma_underflow);
+	MT_BUG_ON(mt, mas_next(&mas, ULONG_MAX) != entry);
+
+	/* Ensure overflow -> active works */
+	mas_prev(&mas, 0);
+	mas_next(&mas, index - 1);
+	MT_BUG_ON(mt, mas.status != ma_overflow);
+	MT_BUG_ON(mt, mas_next(&mas, ULONG_MAX) != entry);
+
 	mas_unlock(&mas);
 }
 
-- 
cgit v1.2.3


From 526f36f3f47b9ad29ffb1bf668b7f295287ee11b Mon Sep 17 00:00:00 2001
From: Dev Jain <dev.jain@arm.com>
Date: Thu, 3 Jul 2025 12:03:38 +0530
Subject: maple tree: add some comments

Add comments explaining the fields for maple_metadata, since "end" is
ambiguous and "gap" can be confused as the largest gap, whereas it is
actually the offset of the largest gap.

Add comment for mas_ascend() to explain, whose min and max we are trying
to find.  Explain that, for example, if we are already on offset zero,
then the parent min is mas->min, otherwise we need to walk up to find the
implied pivot min.

Link: https://lkml.kernel.org/r/20250703063338.51509-1-dev.jain@arm.com
Signed-off-by: Dev Jain <dev.jain@arm.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Wei Yang <richard.weiyang@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/maple_tree.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index 0e85e92c5375..b4ee2d29d7a9 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -1053,7 +1053,7 @@ static inline void mte_set_gap(const struct maple_enode *mn,
  * mas_ascend() - Walk up a level of the tree.
  * @mas: The maple state
  *
- * Sets the @mas->max and @mas->min to the correct values when walking up.  This
+ * Sets the @mas->max and @mas->min for the parent node of mas->node.  This
  * may cause several levels of walking up to find the correct min and max.
  * May find a dead node which will cause a premature return.
  * Return: 1 on dead node, 0 otherwise
@@ -1098,6 +1098,12 @@ static int mas_ascend(struct ma_state *mas)
 
 	min = 0;
 	max = ULONG_MAX;
+
+	/*
+	 * !mas->offset implies that parent node min == mas->min.
+	 * mas->offset > 0 implies that we need to walk up to find the
+	 * implied pivot min.
+	 */
 	if (!mas->offset) {
 		min = mas->min;
 		set_min = true;
-- 
cgit v1.2.3


From ee58e38489772f356c1ac79e0724183497e43249 Mon Sep 17 00:00:00 2001
From: Raghavendra K T <raghavendra.kt@amd.com>
Date: Wed, 2 Jul 2025 06:43:19 +0000
Subject: lib/test_vmalloc.c: introduce xfail for failing tests

The test align_shift_alloc_test is expected to fail.  Reporting the test
as fail confuses to be a genuine failure.  Introduce widely used xfail
sematics to address the issue.

Note: a warn_alloc dump similar to below is still expected:

 Call Trace:
  <TASK>
  dump_stack_lvl+0x64/0x80
  warn_alloc+0x137/0x1b0
  ? __get_vm_area_node+0x134/0x140

Snippet of dmesg after change:

Summary: random_size_align_alloc_test passed: 1 failed: 0 xfailed: 0 ..
Summary: align_shift_alloc_test passed: 0 failed: 0 xfailed: 1 ..
Summary: pcpu_alloc_test passed: 1 failed: 0 xfailed: 0 ..

Link: https://lkml.kernel.org/r/20250702064319.885-1-raghavendra.kt@amd.com
Signed-off-by: Raghavendra K T <raghavendra.kt@amd.com>
Reviewed-by: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
Cc: Dev Jain <dev.jain@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/test_vmalloc.c | 36 +++++++++++++++++++++---------------
 1 file changed, 21 insertions(+), 15 deletions(-)

(limited to 'lib')

diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c
index c1966cf72ab8..2815658ccc37 100644
--- a/lib/test_vmalloc.c
+++ b/lib/test_vmalloc.c
@@ -396,25 +396,27 @@ cleanup:
 struct test_case_desc {
 	const char *test_name;
 	int (*test_func)(void);
+	bool xfail;
 };
 
 static struct test_case_desc test_case_array[] = {
-	{ "fix_size_alloc_test", fix_size_alloc_test },
-	{ "full_fit_alloc_test", full_fit_alloc_test },
-	{ "long_busy_list_alloc_test", long_busy_list_alloc_test },
-	{ "random_size_alloc_test", random_size_alloc_test },
-	{ "fix_align_alloc_test", fix_align_alloc_test },
-	{ "random_size_align_alloc_test", random_size_align_alloc_test },
-	{ "align_shift_alloc_test", align_shift_alloc_test },
-	{ "pcpu_alloc_test", pcpu_alloc_test },
-	{ "kvfree_rcu_1_arg_vmalloc_test", kvfree_rcu_1_arg_vmalloc_test },
-	{ "kvfree_rcu_2_arg_vmalloc_test", kvfree_rcu_2_arg_vmalloc_test },
-	{ "vm_map_ram_test", vm_map_ram_test },
+	{ "fix_size_alloc_test", fix_size_alloc_test, },
+	{ "full_fit_alloc_test", full_fit_alloc_test, },
+	{ "long_busy_list_alloc_test", long_busy_list_alloc_test, },
+	{ "random_size_alloc_test", random_size_alloc_test, },
+	{ "fix_align_alloc_test", fix_align_alloc_test, },
+	{ "random_size_align_alloc_test", random_size_align_alloc_test, },
+	{ "align_shift_alloc_test", align_shift_alloc_test, true },
+	{ "pcpu_alloc_test", pcpu_alloc_test, },
+	{ "kvfree_rcu_1_arg_vmalloc_test", kvfree_rcu_1_arg_vmalloc_test, },
+	{ "kvfree_rcu_2_arg_vmalloc_test", kvfree_rcu_2_arg_vmalloc_test, },
+	{ "vm_map_ram_test", vm_map_ram_test, },
 	/* Add a new test case here. */
 };
 
 struct test_case_data {
 	int test_failed;
+	int test_xfailed;
 	int test_passed;
 	u64 time;
 };
@@ -444,7 +446,7 @@ static int test_func(void *private)
 {
 	struct test_driver *t = private;
 	int random_array[ARRAY_SIZE(test_case_array)];
-	int index, i, j;
+	int index, i, j, ret;
 	ktime_t kt;
 	u64 delta;
 
@@ -468,11 +470,14 @@ static int test_func(void *private)
 		 */
 		if (!((run_test_mask & (1 << index)) >> index))
 			continue;
-
 		kt = ktime_get();
 		for (j = 0; j < test_repeat_count; j++) {
-			if (!test_case_array[index].test_func())
+			ret = test_case_array[index].test_func();
+
+			if (!ret && !test_case_array[index].xfail)
 				t->data[index].test_passed++;
+			else if (ret && test_case_array[index].xfail)
+				t->data[index].test_xfailed++;
 			else
 				t->data[index].test_failed++;
 		}
@@ -576,10 +581,11 @@ static void do_concurrent_test(void)
 				continue;
 
 			pr_info(
-				"Summary: %s passed: %d failed: %d repeat: %d loops: %d avg: %llu usec\n",
+				"Summary: %s passed: %d failed: %d xfailed: %d repeat: %d loops: %d avg: %llu usec\n",
 				test_case_array[j].test_name,
 				t->data[j].test_passed,
 				t->data[j].test_failed,
+				t->data[j].test_xfailed,
 				test_repeat_count, test_loop_count,
 				t->data[j].time);
 		}
-- 
cgit v1.2.3