summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVlastimil Babka (SUSE) <vbabka@kernel.org>2026-03-02 10:55:37 +0100
committerVlastimil Babka <vbabka@suse.cz>2026-03-04 11:03:54 +0100
commitfb1091febd668398aa84c161b8d9a1834321e021 (patch)
tree04eb5b9671c11cc0bc20cfd4023878482cb83df0
parent48647d3f9a644d1e81af6558102d43cdb260597b (diff)
mm/slab: allow sheaf refill if blocking is not allowed
Ming Lei reported [1] a regression in the ublk null target benchmark due to sheaves. The profile shows that the alloc_from_pcs() fastpath fails and allocations fall back to ___slab_alloc(). It also shows the allocations happen through mempool_alloc(). The strategy of mempool_alloc() is to call the underlying allocator (here slab) without __GFP_DIRECT_RECLAIM first. This does not play well with __pcs_replace_empty_main() checking for gfpflags_allow_blocking() to decide if it should refill an empty sheaf or fallback to the slowpath, so we end up falling back. We could change the mempool strategy but there might be other paths doing the same ting. So instead allow sheaf refill when blocking is not allowed, changing the condition to gfpflags_allow_spinning(). The original condition was unnecessarily restrictive. Note this doesn't fully resolve the regression [1] as another component of that are memoryless nodes, which is to be addressed separately. Reported-by: Ming Lei <ming.lei@redhat.com> Fixes: e47c897a2949 ("slab: add sheaves to most caches") Link: https://lore.kernel.org/all/aZ0SbIqaIkwoW2mB@fedora/ [1] Link: https://patch.msgid.link/20260302095536.34062-2-vbabka@kernel.org Signed-off-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
-rw-r--r--mm/slub.c22
1 files changed, 10 insertions, 12 deletions
diff --git a/mm/slub.c b/mm/slub.c
index b1e9f16ba435..20cb4f3b636d 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4567,7 +4567,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs,
struct slab_sheaf *empty = NULL;
struct slab_sheaf *full;
struct node_barn *barn;
- bool can_alloc;
+ bool allow_spin;
lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock));
@@ -4588,8 +4588,9 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs,
return NULL;
}
- full = barn_replace_empty_sheaf(barn, pcs->main,
- gfpflags_allow_spinning(gfp));
+ allow_spin = gfpflags_allow_spinning(gfp);
+
+ full = barn_replace_empty_sheaf(barn, pcs->main, allow_spin);
if (full) {
stat(s, BARN_GET);
@@ -4599,9 +4600,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs,
stat(s, BARN_GET_FAIL);
- can_alloc = gfpflags_allow_blocking(gfp);
-
- if (can_alloc) {
+ if (allow_spin) {
if (pcs->spare) {
empty = pcs->spare;
pcs->spare = NULL;
@@ -4611,8 +4610,9 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs,
}
local_unlock(&s->cpu_sheaves->lock);
+ pcs = NULL;
- if (!can_alloc)
+ if (!allow_spin)
return NULL;
if (empty) {
@@ -4632,11 +4632,8 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs,
if (!full)
return NULL;
- /*
- * we can reach here only when gfpflags_allow_blocking
- * so this must not be an irq
- */
- local_lock(&s->cpu_sheaves->lock);
+ if (!local_trylock(&s->cpu_sheaves->lock))
+ goto barn_put;
pcs = this_cpu_ptr(s->cpu_sheaves);
/*
@@ -4667,6 +4664,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs,
return pcs;
}
+barn_put:
barn_put_full_sheaf(barn, full);
stat(s, BARN_PUT);