summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/memcontrol.h3
-rw-r--r--include/linux/rcupdate.h4
-rw-r--r--mm/kmemleak.c22
-rw-r--r--mm/slub.c80
4 files changed, 76 insertions, 33 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 1baee139999f..52bfe4157623 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -359,8 +359,7 @@ enum objext_flags {
* MEMCG_DATA_OBJEXTS.
*/
OBJEXTS_ALLOC_FAIL = __OBJEXTS_ALLOC_FAIL,
- /* slabobj_ext vector allocated with kmalloc_nolock() */
- OBJEXTS_NOSPIN_ALLOC = __FIRST_OBJEXT_FLAG,
+ __OBJEXTS_FLAG_UNUSED = __FIRST_OBJEXT_FLAG,
/* the next bit after the last actual flag */
__NR_OBJEXTS_FLAGS = (__FIRST_OBJEXT_FLAG << 1),
};
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 7729fef249e1..04f3f86a4145 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -1074,8 +1074,8 @@ static inline void rcu_read_unlock_migrate(void)
* either fall back to use of call_rcu() or rearrange the structure to
* position the rcu_head structure into the first 4096 bytes.
*
- * The object to be freed can be allocated either by kmalloc() or
- * kmem_cache_alloc().
+ * The object to be freed can be allocated either by kmalloc(),
+ * kmalloc_nolock(), or kmem_cache_alloc().
*
* Note that the allowable offset might decrease in the future.
*
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index fe33f2edfe07..d79acf5c5100 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -837,13 +837,12 @@ static void delete_object_full(unsigned long ptr, unsigned int objflags)
struct kmemleak_object *object;
object = find_and_remove_object(ptr, 0, objflags);
- if (!object) {
-#ifdef DEBUG
- kmemleak_warn("Freeing unknown object at 0x%08lx\n",
- ptr);
-#endif
+ if (!object)
+ /*
+ * kmalloc_nolock() -> kfree() calls kmemleak_free()
+ * without kmemleak_alloc().
+ */
return;
- }
__delete_object(object);
}
@@ -926,13 +925,12 @@ static void paint_ptr(unsigned long ptr, int color, unsigned int objflags)
struct kmemleak_object *object;
object = __find_and_get_object(ptr, 0, objflags);
- if (!object) {
- kmemleak_warn("Trying to color unknown object at 0x%08lx as %s\n",
- ptr,
- (color == KMEMLEAK_GREY) ? "Grey" :
- (color == KMEMLEAK_BLACK) ? "Black" : "Unknown");
+ if (!object)
+ /*
+ * kmalloc_nolock() -> kfree_rcu() calls kmemleak_ignore()
+ * without kmemleak_alloc().
+ */
return;
- }
paint_it(object, color);
put_object(object);
}
diff --git a/mm/slub.c b/mm/slub.c
index 42df791279d9..865bc050f654 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -43,6 +43,7 @@
#include <linux/prefetch.h>
#include <linux/memcontrol.h>
#include <linux/random.h>
+#include <linux/prandom.h>
#include <kunit/test.h>
#include <kunit/test-bug.h>
#include <linux/sort.h>
@@ -2189,8 +2190,6 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s,
virt_to_slab(vec)->slab_cache == s);
new_exts = (unsigned long)vec;
- if (unlikely(!allow_spin))
- new_exts |= OBJEXTS_NOSPIN_ALLOC;
#ifdef CONFIG_MEMCG
new_exts |= MEMCG_DATA_OBJEXTS;
#endif
@@ -2228,7 +2227,7 @@ retry:
return 0;
}
-static inline void free_slab_obj_exts(struct slab *slab)
+static inline void free_slab_obj_exts(struct slab *slab, bool allow_spin)
{
struct slabobj_ext *obj_exts;
@@ -2256,10 +2255,10 @@ static inline void free_slab_obj_exts(struct slab *slab)
* the extension for obj_exts is expected to be NULL.
*/
mark_objexts_empty(obj_exts);
- if (unlikely(READ_ONCE(slab->obj_exts) & OBJEXTS_NOSPIN_ALLOC))
- kfree_nolock(obj_exts);
- else
+ if (allow_spin)
kfree(obj_exts);
+ else
+ kfree_nolock(obj_exts);
slab->obj_exts = 0;
}
@@ -2323,7 +2322,7 @@ static int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s,
return 0;
}
-static inline void free_slab_obj_exts(struct slab *slab)
+static inline void free_slab_obj_exts(struct slab *slab, bool allow_spin)
{
}
@@ -2584,6 +2583,24 @@ struct rcu_delayed_free {
* Returns true if freeing of the object can proceed, false if its reuse
* was delayed by CONFIG_SLUB_RCU_DEBUG or KASAN quarantine, or it was returned
* to KFENCE.
+ *
+ * For objects allocated via kmalloc_nolock(), only a subset of alloc hooks
+ * are invoked, so some free hooks must handle asymmetric hook calls.
+ *
+ * Alloc hooks called for kmalloc_nolock():
+ * - kmsan_slab_alloc()
+ * - kasan_slab_alloc()
+ * - memcg_slab_post_alloc_hook()
+ * - alloc_tagging_slab_alloc_hook()
+ *
+ * Free hooks that must handle missing corresponding alloc hooks:
+ * - kmemleak_free_recursive()
+ * - kfence_free()
+ *
+ * Free hooks that have no alloc hook counterpart, and thus safe to call:
+ * - debug_check_no_locks_freed()
+ * - debug_check_no_obj_freed()
+ * - __kcsan_check_access()
*/
static __always_inline
bool slab_free_hook(struct kmem_cache *s, void *x, bool init,
@@ -3311,8 +3328,11 @@ static void *next_freelist_entry(struct kmem_cache *s,
return (char *)start + idx;
}
+static DEFINE_PER_CPU(struct rnd_state, slab_rnd_state);
+
/* Shuffle the single linked freelist based on a random pre-computed sequence */
-static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab)
+static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab,
+ bool allow_spin)
{
void *start;
void *cur;
@@ -3323,7 +3343,19 @@ static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab)
return false;
freelist_count = oo_objects(s->oo);
- pos = get_random_u32_below(freelist_count);
+ if (allow_spin) {
+ pos = get_random_u32_below(freelist_count);
+ } else {
+ struct rnd_state *state;
+
+ /*
+ * An interrupt or NMI handler might interrupt and change
+ * the state in the middle, but that's safe.
+ */
+ state = &get_cpu_var(slab_rnd_state);
+ pos = prandom_u32_state(state) % freelist_count;
+ put_cpu_var(slab_rnd_state);
+ }
page_limit = slab->objects * s->size;
start = fixup_red_left(s, slab_address(slab));
@@ -3350,7 +3382,8 @@ static inline int init_cache_random_seq(struct kmem_cache *s)
return 0;
}
static inline void init_freelist_randomization(void) { }
-static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab)
+static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab,
+ bool allow_spin)
{
return false;
}
@@ -3369,14 +3402,14 @@ static __always_inline void account_slab(struct slab *slab, int order,
}
static __always_inline void unaccount_slab(struct slab *slab, int order,
- struct kmem_cache *s)
+ struct kmem_cache *s, bool allow_spin)
{
/*
* The slab object extensions should now be freed regardless of
* whether mem_alloc_profiling_enabled() or not because profiling
* might have been disabled after slab->obj_exts got allocated.
*/
- free_slab_obj_exts(slab);
+ free_slab_obj_exts(slab, allow_spin);
mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
-(PAGE_SIZE << order));
@@ -3441,7 +3474,7 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
alloc_slab_obj_exts_early(s, slab);
account_slab(slab, oo_order(oo), s, flags);
- shuffle = shuffle_freelist(s, slab);
+ shuffle = shuffle_freelist(s, slab, allow_spin);
if (!shuffle) {
start = fixup_red_left(s, start);
@@ -3480,7 +3513,7 @@ static void __free_slab(struct kmem_cache *s, struct slab *slab, bool allow_spin
page->mapping = NULL;
__ClearPageSlab(page);
mm_account_reclaimed_pages(pages);
- unaccount_slab(slab, order, s);
+ unaccount_slab(slab, order, s, allow_spin);
if (allow_spin)
free_frozen_pages(page, order);
else
@@ -3791,6 +3824,7 @@ static void *get_from_any_partial(struct kmem_cache *s, struct partial_context *
struct zone *zone;
enum zone_type highest_zoneidx = gfp_zone(pc->flags);
unsigned int cpuset_mems_cookie;
+ bool allow_spin = gfpflags_allow_spinning(pc->flags);
/*
* The defrag ratio allows a configuration of the tradeoffs between
@@ -3815,7 +3849,15 @@ static void *get_from_any_partial(struct kmem_cache *s, struct partial_context *
return NULL;
do {
- cpuset_mems_cookie = read_mems_allowed_begin();
+ /*
+ * read_mems_allowed_begin() accesses current->mems_allowed_seq,
+ * a seqcount_spinlock_t that is not NMI-safe. Do not access
+ * current->mems_allowed_seq and avoid retry when GFP flags
+ * indicate spinning is not allowed.
+ */
+ if (allow_spin)
+ cpuset_mems_cookie = read_mems_allowed_begin();
+
zonelist = node_zonelist(mempolicy_slab_node(), pc->flags);
for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) {
struct kmem_cache_node *n;
@@ -3839,7 +3881,7 @@ static void *get_from_any_partial(struct kmem_cache *s, struct partial_context *
}
}
}
- } while (read_mems_allowed_retry(cpuset_mems_cookie));
+ } while (allow_spin && read_mems_allowed_retry(cpuset_mems_cookie));
#endif /* CONFIG_NUMA */
return NULL;
}
@@ -6372,7 +6414,7 @@ void kvfree_rcu_cb(struct rcu_head *head)
/**
* kfree - free previously allocated memory
- * @object: pointer returned by kmalloc() or kmem_cache_alloc()
+ * @object: pointer returned by kmalloc(), kmalloc_nolock(), or kmem_cache_alloc()
*
* If @object is NULL, no operation is performed.
*/
@@ -6391,6 +6433,7 @@ void kfree(const void *object)
page = virt_to_page(object);
slab = page_slab(page);
if (!slab) {
+ /* kmalloc_nolock() doesn't support large kmalloc */
free_large_kmalloc(page, (void *)object);
return;
}
@@ -8337,6 +8380,9 @@ void __init kmem_cache_init_late(void)
flushwq = alloc_workqueue("slub_flushwq", WQ_MEM_RECLAIM | WQ_PERCPU,
0);
WARN_ON(!flushwq);
+#ifdef CONFIG_SLAB_FREELIST_RANDOM
+ prandom_init_once(&slab_rnd_state);
+#endif
}
int do_kmem_cache_create(struct kmem_cache *s, const char *name,