summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/bpf.h4
-rw-r--r--include/linux/bpf_local_storage.h2
-rw-r--r--include/linux/bpf_mem_alloc.h6
-rw-r--r--kernel/bpf/arena.c2
-rw-r--r--kernel/bpf/arraymap.c2
-rw-r--r--kernel/bpf/bloom_filter.c2
-rw-r--r--kernel/bpf/bpf_insn_array.c2
-rw-r--r--kernel/bpf/bpf_local_storage.c75
-rw-r--r--kernel/bpf/hashtab.c86
-rw-r--r--kernel/bpf/local_storage.c2
-rw-r--r--kernel/bpf/lpm_trie.c2
-rw-r--r--kernel/bpf/memalloc.c58
-rw-r--r--kernel/bpf/syscall.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_kptr_race.c218
-rw-r--r--tools/testing/selftests/bpf/progs/map_kptr_race.c197
15 files changed, 603 insertions, 57 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index b78b53198a2e..05b34a6355b0 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -124,7 +124,7 @@ struct bpf_map_ops {
u32 (*map_fd_sys_lookup_elem)(void *ptr);
void (*map_seq_show_elem)(struct bpf_map *map, void *key,
struct seq_file *m);
- int (*map_check_btf)(const struct bpf_map *map,
+ int (*map_check_btf)(struct bpf_map *map,
const struct btf *btf,
const struct btf_type *key_type,
const struct btf_type *value_type);
@@ -656,7 +656,7 @@ static inline bool bpf_map_support_seq_show(const struct bpf_map *map)
map->ops->map_seq_show_elem;
}
-int map_check_no_btf(const struct bpf_map *map,
+int map_check_no_btf(struct bpf_map *map,
const struct btf *btf,
const struct btf_type *key_type,
const struct btf_type *value_type);
diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index 85efa9772530..8157e8da61d4 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -176,7 +176,7 @@ u32 bpf_local_storage_destroy(struct bpf_local_storage *local_storage);
void bpf_local_storage_map_free(struct bpf_map *map,
struct bpf_local_storage_cache *cache);
-int bpf_local_storage_map_check_btf(const struct bpf_map *map,
+int bpf_local_storage_map_check_btf(struct bpf_map *map,
const struct btf *btf,
const struct btf_type *key_type,
const struct btf_type *value_type);
diff --git a/include/linux/bpf_mem_alloc.h b/include/linux/bpf_mem_alloc.h
index e45162ef59bb..4ce0d27f8ea2 100644
--- a/include/linux/bpf_mem_alloc.h
+++ b/include/linux/bpf_mem_alloc.h
@@ -14,6 +14,8 @@ struct bpf_mem_alloc {
struct obj_cgroup *objcg;
bool percpu;
struct work_struct work;
+ void (*dtor_ctx_free)(void *ctx);
+ void *dtor_ctx;
};
/* 'size != 0' is for bpf_mem_alloc which manages fixed-size objects.
@@ -32,6 +34,10 @@ int bpf_mem_alloc_percpu_init(struct bpf_mem_alloc *ma, struct obj_cgroup *objcg
/* The percpu allocation with a specific unit size. */
int bpf_mem_alloc_percpu_unit_init(struct bpf_mem_alloc *ma, int size);
void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma);
+void bpf_mem_alloc_set_dtor(struct bpf_mem_alloc *ma,
+ void (*dtor)(void *obj, void *ctx),
+ void (*dtor_ctx_free)(void *ctx),
+ void *ctx);
/* Check the allocation size for kmalloc equivalent allocator */
int bpf_mem_alloc_check_size(bool percpu, size_t size);
diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c
index 144f30e740e8..f355cf1c1a16 100644
--- a/kernel/bpf/arena.c
+++ b/kernel/bpf/arena.c
@@ -303,7 +303,7 @@ static long arena_map_update_elem(struct bpf_map *map, void *key,
return -EOPNOTSUPP;
}
-static int arena_map_check_btf(const struct bpf_map *map, const struct btf *btf,
+static int arena_map_check_btf(struct bpf_map *map, const struct btf *btf,
const struct btf_type *key_type, const struct btf_type *value_type)
{
return 0;
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 26763df6134a..33de68c95ad8 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -548,7 +548,7 @@ static void percpu_array_map_seq_show_elem(struct bpf_map *map, void *key,
rcu_read_unlock();
}
-static int array_map_check_btf(const struct bpf_map *map,
+static int array_map_check_btf(struct bpf_map *map,
const struct btf *btf,
const struct btf_type *key_type,
const struct btf_type *value_type)
diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c
index 35e1ddca74d2..b73336c976b7 100644
--- a/kernel/bpf/bloom_filter.c
+++ b/kernel/bpf/bloom_filter.c
@@ -180,7 +180,7 @@ static long bloom_map_update_elem(struct bpf_map *map, void *key,
return -EINVAL;
}
-static int bloom_map_check_btf(const struct bpf_map *map,
+static int bloom_map_check_btf(struct bpf_map *map,
const struct btf *btf,
const struct btf_type *key_type,
const struct btf_type *value_type)
diff --git a/kernel/bpf/bpf_insn_array.c b/kernel/bpf/bpf_insn_array.c
index c0286f25ca3c..a2f84afe6f7c 100644
--- a/kernel/bpf/bpf_insn_array.c
+++ b/kernel/bpf/bpf_insn_array.c
@@ -98,7 +98,7 @@ static long insn_array_delete_elem(struct bpf_map *map, void *key)
return -EINVAL;
}
-static int insn_array_check_btf(const struct bpf_map *map,
+static int insn_array_check_btf(struct bpf_map *map,
const struct btf *btf,
const struct btf_type *key_type,
const struct btf_type *value_type)
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index b28f07d3a0db..9c96a4477f81 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -107,14 +107,12 @@ static void __bpf_local_storage_free_trace_rcu(struct rcu_head *rcu)
{
struct bpf_local_storage *local_storage;
- /* If RCU Tasks Trace grace period implies RCU grace period, do
- * kfree(), else do kfree_rcu().
+ /*
+ * RCU Tasks Trace grace period implies RCU grace period, do
+ * kfree() directly.
*/
local_storage = container_of(rcu, struct bpf_local_storage, rcu);
- if (rcu_trace_implies_rcu_gp())
- kfree(local_storage);
- else
- kfree_rcu(local_storage, rcu);
+ kfree(local_storage);
}
/* Handle use_kmalloc_nolock == false */
@@ -138,10 +136,11 @@ static void bpf_local_storage_free_rcu(struct rcu_head *rcu)
static void bpf_local_storage_free_trace_rcu(struct rcu_head *rcu)
{
- if (rcu_trace_implies_rcu_gp())
- bpf_local_storage_free_rcu(rcu);
- else
- call_rcu(rcu, bpf_local_storage_free_rcu);
+ /*
+ * RCU Tasks Trace grace period implies RCU grace period, do
+ * kfree() directly.
+ */
+ bpf_local_storage_free_rcu(rcu);
}
static void bpf_local_storage_free(struct bpf_local_storage *local_storage,
@@ -164,16 +163,29 @@ static void bpf_local_storage_free(struct bpf_local_storage *local_storage,
bpf_local_storage_free_trace_rcu);
}
-/* rcu tasks trace callback for use_kmalloc_nolock == false */
-static void __bpf_selem_free_trace_rcu(struct rcu_head *rcu)
+/* rcu callback for use_kmalloc_nolock == false */
+static void __bpf_selem_free_rcu(struct rcu_head *rcu)
{
struct bpf_local_storage_elem *selem;
+ struct bpf_local_storage_map *smap;
selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
- if (rcu_trace_implies_rcu_gp())
- kfree(selem);
- else
- kfree_rcu(selem, rcu);
+ /* bpf_selem_unlink_nofail may have already cleared smap and freed fields. */
+ smap = rcu_dereference_check(SDATA(selem)->smap, 1);
+
+ if (smap)
+ bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
+ kfree(selem);
+}
+
+/* rcu tasks trace callback for use_kmalloc_nolock == false */
+static void __bpf_selem_free_trace_rcu(struct rcu_head *rcu)
+{
+ /*
+ * RCU Tasks Trace grace period implies RCU grace period, do
+ * kfree() directly.
+ */
+ __bpf_selem_free_rcu(rcu);
}
/* Handle use_kmalloc_nolock == false */
@@ -181,7 +193,7 @@ static void __bpf_selem_free(struct bpf_local_storage_elem *selem,
bool vanilla_rcu)
{
if (vanilla_rcu)
- kfree_rcu(selem, rcu);
+ call_rcu(&selem->rcu, __bpf_selem_free_rcu);
else
call_rcu_tasks_trace(&selem->rcu, __bpf_selem_free_trace_rcu);
}
@@ -195,37 +207,29 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu)
/* The bpf_local_storage_map_free will wait for rcu_barrier */
smap = rcu_dereference_check(SDATA(selem)->smap, 1);
- if (smap) {
- migrate_disable();
+ if (smap)
bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
- migrate_enable();
- }
kfree_nolock(selem);
}
static void bpf_selem_free_trace_rcu(struct rcu_head *rcu)
{
- if (rcu_trace_implies_rcu_gp())
- bpf_selem_free_rcu(rcu);
- else
- call_rcu(rcu, bpf_selem_free_rcu);
+ /*
+ * RCU Tasks Trace grace period implies RCU grace period, do
+ * kfree() directly.
+ */
+ bpf_selem_free_rcu(rcu);
}
void bpf_selem_free(struct bpf_local_storage_elem *selem,
bool reuse_now)
{
- struct bpf_local_storage_map *smap;
-
- smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
-
if (!selem->use_kmalloc_nolock) {
/*
* No uptr will be unpin even when reuse_now == false since uptr
* is only supported in task local storage, where
* smap->use_kmalloc_nolock == true.
*/
- if (smap)
- bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
__bpf_selem_free(selem, reuse_now);
return;
}
@@ -797,7 +801,7 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr)
return 0;
}
-int bpf_local_storage_map_check_btf(const struct bpf_map *map,
+int bpf_local_storage_map_check_btf(struct bpf_map *map,
const struct btf *btf,
const struct btf_type *key_type,
const struct btf_type *value_type)
@@ -958,10 +962,9 @@ restart:
*/
synchronize_rcu();
- if (smap->use_kmalloc_nolock) {
- rcu_barrier_tasks_trace();
- rcu_barrier();
- }
+ /* smap remains in use regardless of kmalloc_nolock, so wait unconditionally. */
+ rcu_barrier_tasks_trace();
+ rcu_barrier();
kvfree(smap->buckets);
bpf_map_area_free(smap);
}
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 3b9d297a53be..bc6bc8bb871d 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -125,6 +125,11 @@ struct htab_elem {
char key[] __aligned(8);
};
+struct htab_btf_record {
+ struct btf_record *record;
+ u32 key_size;
+};
+
static inline bool htab_is_prealloc(const struct bpf_htab *htab)
{
return !(htab->map.map_flags & BPF_F_NO_PREALLOC);
@@ -457,6 +462,83 @@ static int htab_map_alloc_check(union bpf_attr *attr)
return 0;
}
+static void htab_mem_dtor(void *obj, void *ctx)
+{
+ struct htab_btf_record *hrec = ctx;
+ struct htab_elem *elem = obj;
+ void *map_value;
+
+ if (IS_ERR_OR_NULL(hrec->record))
+ return;
+
+ map_value = htab_elem_value(elem, hrec->key_size);
+ bpf_obj_free_fields(hrec->record, map_value);
+}
+
+static void htab_pcpu_mem_dtor(void *obj, void *ctx)
+{
+ void __percpu *pptr = *(void __percpu **)obj;
+ struct htab_btf_record *hrec = ctx;
+ int cpu;
+
+ if (IS_ERR_OR_NULL(hrec->record))
+ return;
+
+ for_each_possible_cpu(cpu)
+ bpf_obj_free_fields(hrec->record, per_cpu_ptr(pptr, cpu));
+}
+
+static void htab_dtor_ctx_free(void *ctx)
+{
+ struct htab_btf_record *hrec = ctx;
+
+ btf_record_free(hrec->record);
+ kfree(ctx);
+}
+
+static int htab_set_dtor(struct bpf_htab *htab, void (*dtor)(void *, void *))
+{
+ u32 key_size = htab->map.key_size;
+ struct bpf_mem_alloc *ma;
+ struct htab_btf_record *hrec;
+ int err;
+
+ /* No need for dtors. */
+ if (IS_ERR_OR_NULL(htab->map.record))
+ return 0;
+
+ hrec = kzalloc(sizeof(*hrec), GFP_KERNEL);
+ if (!hrec)
+ return -ENOMEM;
+ hrec->key_size = key_size;
+ hrec->record = btf_record_dup(htab->map.record);
+ if (IS_ERR(hrec->record)) {
+ err = PTR_ERR(hrec->record);
+ kfree(hrec);
+ return err;
+ }
+ ma = htab_is_percpu(htab) ? &htab->pcpu_ma : &htab->ma;
+ bpf_mem_alloc_set_dtor(ma, dtor, htab_dtor_ctx_free, hrec);
+ return 0;
+}
+
+static int htab_map_check_btf(struct bpf_map *map, const struct btf *btf,
+ const struct btf_type *key_type, const struct btf_type *value_type)
+{
+ struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+
+ if (htab_is_prealloc(htab))
+ return 0;
+ /*
+ * We must set the dtor using this callback, as map's BTF record is not
+ * populated in htab_map_alloc(), so it will always appear as NULL.
+ */
+ if (htab_is_percpu(htab))
+ return htab_set_dtor(htab, htab_pcpu_mem_dtor);
+ else
+ return htab_set_dtor(htab, htab_mem_dtor);
+}
+
static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
{
bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
@@ -2281,6 +2363,7 @@ const struct bpf_map_ops htab_map_ops = {
.map_seq_show_elem = htab_map_seq_show_elem,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_hash_elem,
+ .map_check_btf = htab_map_check_btf,
.map_mem_usage = htab_map_mem_usage,
BATCH_OPS(htab),
.map_btf_id = &htab_map_btf_ids[0],
@@ -2303,6 +2386,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
.map_seq_show_elem = htab_map_seq_show_elem,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_hash_elem,
+ .map_check_btf = htab_map_check_btf,
.map_mem_usage = htab_map_mem_usage,
BATCH_OPS(htab_lru),
.map_btf_id = &htab_map_btf_ids[0],
@@ -2482,6 +2566,7 @@ const struct bpf_map_ops htab_percpu_map_ops = {
.map_seq_show_elem = htab_percpu_map_seq_show_elem,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_hash_elem,
+ .map_check_btf = htab_map_check_btf,
.map_mem_usage = htab_map_mem_usage,
BATCH_OPS(htab_percpu),
.map_btf_id = &htab_map_btf_ids[0],
@@ -2502,6 +2587,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
.map_seq_show_elem = htab_percpu_map_seq_show_elem,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_hash_elem,
+ .map_check_btf = htab_map_check_btf,
.map_mem_usage = htab_map_mem_usage,
BATCH_OPS(htab_lru_percpu),
.map_btf_id = &htab_map_btf_ids[0],
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 1ccbf28b2ad9..8fca0c64f7b1 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -364,7 +364,7 @@ static long cgroup_storage_delete_elem(struct bpf_map *map, void *key)
return -EINVAL;
}
-static int cgroup_storage_check_btf(const struct bpf_map *map,
+static int cgroup_storage_check_btf(struct bpf_map *map,
const struct btf *btf,
const struct btf_type *key_type,
const struct btf_type *value_type)
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 1adeb4d3b8cf..0f57608b385d 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -751,7 +751,7 @@ free_stack:
return err;
}
-static int trie_check_btf(const struct bpf_map *map,
+static int trie_check_btf(struct bpf_map *map,
const struct btf *btf,
const struct btf_type *key_type,
const struct btf_type *value_type)
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index bd45dda9dc35..682a9f34214b 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -102,6 +102,8 @@ struct bpf_mem_cache {
int percpu_size;
bool draining;
struct bpf_mem_cache *tgt;
+ void (*dtor)(void *obj, void *ctx);
+ void *dtor_ctx;
/* list of objects to be freed after RCU GP */
struct llist_head free_by_rcu;
@@ -260,12 +262,14 @@ static void free_one(void *obj, bool percpu)
kfree(obj);
}
-static int free_all(struct llist_node *llnode, bool percpu)
+static int free_all(struct bpf_mem_cache *c, struct llist_node *llnode, bool percpu)
{
struct llist_node *pos, *t;
int cnt = 0;
llist_for_each_safe(pos, t, llnode) {
+ if (c->dtor)
+ c->dtor((void *)pos + LLIST_NODE_SZ, c->dtor_ctx);
free_one(pos, percpu);
cnt++;
}
@@ -276,7 +280,7 @@ static void __free_rcu(struct rcu_head *head)
{
struct bpf_mem_cache *c = container_of(head, struct bpf_mem_cache, rcu_ttrace);
- free_all(llist_del_all(&c->waiting_for_gp_ttrace), !!c->percpu_size);
+ free_all(c, llist_del_all(&c->waiting_for_gp_ttrace), !!c->percpu_size);
atomic_set(&c->call_rcu_ttrace_in_progress, 0);
}
@@ -308,7 +312,7 @@ static void do_call_rcu_ttrace(struct bpf_mem_cache *c)
if (atomic_xchg(&c->call_rcu_ttrace_in_progress, 1)) {
if (unlikely(READ_ONCE(c->draining))) {
llnode = llist_del_all(&c->free_by_rcu_ttrace);
- free_all(llnode, !!c->percpu_size);
+ free_all(c, llnode, !!c->percpu_size);
}
return;
}
@@ -417,7 +421,7 @@ static void check_free_by_rcu(struct bpf_mem_cache *c)
dec_active(c, &flags);
if (unlikely(READ_ONCE(c->draining))) {
- free_all(llist_del_all(&c->waiting_for_gp), !!c->percpu_size);
+ free_all(c, llist_del_all(&c->waiting_for_gp), !!c->percpu_size);
atomic_set(&c->call_rcu_in_progress, 0);
} else {
call_rcu_hurry(&c->rcu, __free_by_rcu);
@@ -635,13 +639,13 @@ static void drain_mem_cache(struct bpf_mem_cache *c)
* Except for waiting_for_gp_ttrace list, there are no concurrent operations
* on these lists, so it is safe to use __llist_del_all().
*/
- free_all(llist_del_all(&c->free_by_rcu_ttrace), percpu);
- free_all(llist_del_all(&c->waiting_for_gp_ttrace), percpu);
- free_all(__llist_del_all(&c->free_llist), percpu);
- free_all(__llist_del_all(&c->free_llist_extra), percpu);
- free_all(__llist_del_all(&c->free_by_rcu), percpu);
- free_all(__llist_del_all(&c->free_llist_extra_rcu), percpu);
- free_all(llist_del_all(&c->waiting_for_gp), percpu);
+ free_all(c, llist_del_all(&c->free_by_rcu_ttrace), percpu);
+ free_all(c, llist_del_all(&c->waiting_for_gp_ttrace), percpu);
+ free_all(c, __llist_del_all(&c->free_llist), percpu);
+ free_all(c, __llist_del_all(&c->free_llist_extra), percpu);
+ free_all(c, __llist_del_all(&c->free_by_rcu), percpu);
+ free_all(c, __llist_del_all(&c->free_llist_extra_rcu), percpu);
+ free_all(c, llist_del_all(&c->waiting_for_gp), percpu);
}
static void check_mem_cache(struct bpf_mem_cache *c)
@@ -680,6 +684,9 @@ static void check_leaked_objs(struct bpf_mem_alloc *ma)
static void free_mem_alloc_no_barrier(struct bpf_mem_alloc *ma)
{
+ /* We can free dtor ctx only once all callbacks are done using it. */
+ if (ma->dtor_ctx_free)
+ ma->dtor_ctx_free(ma->dtor_ctx);
check_leaked_objs(ma);
free_percpu(ma->cache);
free_percpu(ma->caches);
@@ -1014,3 +1021,32 @@ int bpf_mem_alloc_check_size(bool percpu, size_t size)
return 0;
}
+
+void bpf_mem_alloc_set_dtor(struct bpf_mem_alloc *ma, void (*dtor)(void *obj, void *ctx),
+ void (*dtor_ctx_free)(void *ctx), void *ctx)
+{
+ struct bpf_mem_caches *cc;
+ struct bpf_mem_cache *c;
+ int cpu, i;
+
+ ma->dtor_ctx_free = dtor_ctx_free;
+ ma->dtor_ctx = ctx;
+
+ if (ma->cache) {
+ for_each_possible_cpu(cpu) {
+ c = per_cpu_ptr(ma->cache, cpu);
+ c->dtor = dtor;
+ c->dtor_ctx = ctx;
+ }
+ }
+ if (ma->caches) {
+ for_each_possible_cpu(cpu) {
+ cc = per_cpu_ptr(ma->caches, cpu);
+ for (i = 0; i < NUM_CACHES; i++) {
+ c = &cc->cache[i];
+ c->dtor = dtor;
+ c->dtor_ctx = ctx;
+ }
+ }
+ }
+}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 0378e83b4099..274039e36465 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1234,7 +1234,7 @@ int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size)
}
EXPORT_SYMBOL_GPL(bpf_obj_name_cpy);
-int map_check_no_btf(const struct bpf_map *map,
+int map_check_no_btf(struct bpf_map *map,
const struct btf *btf,
const struct btf_type *key_type,
const struct btf_type *value_type)
diff --git a/tools/testing/selftests/bpf/prog_tests/map_kptr_race.c b/tools/testing/selftests/bpf/prog_tests/map_kptr_race.c
new file mode 100644
index 000000000000..506ed55e8528
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_kptr_race.c
@@ -0,0 +1,218 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "map_kptr_race.skel.h"
+
+static int get_map_id(int map_fd)
+{
+ struct bpf_map_info info = {};
+ __u32 len = sizeof(info);
+
+ if (!ASSERT_OK(bpf_map_get_info_by_fd(map_fd, &info, &len), "get_map_info"))
+ return -1;
+ return info.id;
+}
+
+static int read_refs(struct map_kptr_race *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ int ret;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.count_ref), &opts);
+ if (!ASSERT_OK(ret, "count_ref run"))
+ return -1;
+ if (!ASSERT_OK(opts.retval, "count_ref retval"))
+ return -1;
+ return skel->bss->num_of_refs;
+}
+
+static void test_htab_leak(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct map_kptr_race *skel, *watcher;
+ int ret, map_id;
+
+ skel = map_kptr_race__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_htab_leak), &opts);
+ if (!ASSERT_OK(ret, "test_htab_leak run"))
+ goto out_skel;
+ if (!ASSERT_OK(opts.retval, "test_htab_leak retval"))
+ goto out_skel;
+
+ map_id = get_map_id(bpf_map__fd(skel->maps.race_hash_map));
+ if (!ASSERT_GE(map_id, 0, "map_id"))
+ goto out_skel;
+
+ watcher = map_kptr_race__open_and_load();
+ if (!ASSERT_OK_PTR(watcher, "watcher open_and_load"))
+ goto out_skel;
+
+ watcher->bss->target_map_id = map_id;
+ watcher->links.map_put = bpf_program__attach(watcher->progs.map_put);
+ if (!ASSERT_OK_PTR(watcher->links.map_put, "attach fentry"))
+ goto out_watcher;
+ watcher->links.htab_map_free = bpf_program__attach(watcher->progs.htab_map_free);
+ if (!ASSERT_OK_PTR(watcher->links.htab_map_free, "attach fexit"))
+ goto out_watcher;
+
+ map_kptr_race__destroy(skel);
+ skel = NULL;
+
+ kern_sync_rcu();
+
+ while (!READ_ONCE(watcher->bss->map_freed))
+ sched_yield();
+
+ ASSERT_EQ(watcher->bss->map_freed, 1, "map_freed");
+ ASSERT_EQ(read_refs(watcher), 2, "htab refcount");
+
+out_watcher:
+ map_kptr_race__destroy(watcher);
+out_skel:
+ map_kptr_race__destroy(skel);
+}
+
+static void test_percpu_htab_leak(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct map_kptr_race *skel, *watcher;
+ int ret, map_id;
+
+ skel = map_kptr_race__open();
+ if (!ASSERT_OK_PTR(skel, "open"))
+ return;
+
+ skel->rodata->nr_cpus = libbpf_num_possible_cpus();
+ if (skel->rodata->nr_cpus > 16)
+ skel->rodata->nr_cpus = 16;
+
+ ret = map_kptr_race__load(skel);
+ if (!ASSERT_OK(ret, "load"))
+ goto out_skel;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_percpu_htab_leak), &opts);
+ if (!ASSERT_OK(ret, "test_percpu_htab_leak run"))
+ goto out_skel;
+ if (!ASSERT_OK(opts.retval, "test_percpu_htab_leak retval"))
+ goto out_skel;
+
+ map_id = get_map_id(bpf_map__fd(skel->maps.race_percpu_hash_map));
+ if (!ASSERT_GE(map_id, 0, "map_id"))
+ goto out_skel;
+
+ watcher = map_kptr_race__open_and_load();
+ if (!ASSERT_OK_PTR(watcher, "watcher open_and_load"))
+ goto out_skel;
+
+ watcher->bss->target_map_id = map_id;
+ watcher->links.map_put = bpf_program__attach(watcher->progs.map_put);
+ if (!ASSERT_OK_PTR(watcher->links.map_put, "attach fentry"))
+ goto out_watcher;
+ watcher->links.htab_map_free = bpf_program__attach(watcher->progs.htab_map_free);
+ if (!ASSERT_OK_PTR(watcher->links.htab_map_free, "attach fexit"))
+ goto out_watcher;
+
+ map_kptr_race__destroy(skel);
+ skel = NULL;
+
+ kern_sync_rcu();
+
+ while (!READ_ONCE(watcher->bss->map_freed))
+ sched_yield();
+
+ ASSERT_EQ(watcher->bss->map_freed, 1, "map_freed");
+ ASSERT_EQ(read_refs(watcher), 2, "percpu_htab refcount");
+
+out_watcher:
+ map_kptr_race__destroy(watcher);
+out_skel:
+ map_kptr_race__destroy(skel);
+}
+
+static void test_sk_ls_leak(void)
+{
+ struct map_kptr_race *skel, *watcher;
+ int listen_fd = -1, client_fd = -1, map_id;
+
+ skel = map_kptr_race__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ if (!ASSERT_OK(map_kptr_race__attach(skel), "attach"))
+ goto out_skel;
+
+ listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ if (!ASSERT_GE(listen_fd, 0, "start_server"))
+ goto out_skel;
+
+ client_fd = connect_to_fd(listen_fd, 0);
+ if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
+ goto out_skel;
+
+ if (!ASSERT_EQ(skel->bss->sk_ls_leak_done, 1, "sk_ls_leak_done"))
+ goto out_skel;
+
+ close(client_fd);
+ client_fd = -1;
+ close(listen_fd);
+ listen_fd = -1;
+
+ map_id = get_map_id(bpf_map__fd(skel->maps.race_sk_ls_map));
+ if (!ASSERT_GE(map_id, 0, "map_id"))
+ goto out_skel;
+
+ watcher = map_kptr_race__open_and_load();
+ if (!ASSERT_OK_PTR(watcher, "watcher open_and_load"))
+ goto out_skel;
+
+ watcher->bss->target_map_id = map_id;
+ watcher->links.map_put = bpf_program__attach(watcher->progs.map_put);
+ if (!ASSERT_OK_PTR(watcher->links.map_put, "attach fentry"))
+ goto out_watcher;
+ watcher->links.sk_map_free = bpf_program__attach(watcher->progs.sk_map_free);
+ if (!ASSERT_OK_PTR(watcher->links.sk_map_free, "attach fexit"))
+ goto out_watcher;
+
+ map_kptr_race__destroy(skel);
+ skel = NULL;
+
+ kern_sync_rcu();
+
+ while (!READ_ONCE(watcher->bss->map_freed))
+ sched_yield();
+
+ ASSERT_EQ(watcher->bss->map_freed, 1, "map_freed");
+ ASSERT_EQ(read_refs(watcher), 2, "sk_ls refcount");
+
+out_watcher:
+ map_kptr_race__destroy(watcher);
+out_skel:
+ if (client_fd >= 0)
+ close(client_fd);
+ if (listen_fd >= 0)
+ close(listen_fd);
+ map_kptr_race__destroy(skel);
+}
+
+void serial_test_map_kptr_race(void)
+{
+ if (test__start_subtest("htab_leak"))
+ test_htab_leak();
+ if (test__start_subtest("percpu_htab_leak"))
+ test_percpu_htab_leak();
+ if (test__start_subtest("sk_ls_leak"))
+ test_sk_ls_leak();
+}
diff --git a/tools/testing/selftests/bpf/progs/map_kptr_race.c b/tools/testing/selftests/bpf/progs/map_kptr_race.c
new file mode 100644
index 000000000000..f6f136cd8f60
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/map_kptr_race.c
@@ -0,0 +1,197 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+struct map_value {
+ struct prog_test_ref_kfunc __kptr *ref_ptr;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} race_hash_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} race_percpu_hash_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct map_value);
+} race_sk_ls_map SEC(".maps");
+
+int num_of_refs;
+int sk_ls_leak_done;
+int target_map_id;
+int map_freed;
+const volatile int nr_cpus;
+
+SEC("tc")
+int test_htab_leak(struct __sk_buff *skb)
+{
+ struct prog_test_ref_kfunc *p, *old;
+ struct map_value val = {};
+ struct map_value *v;
+ int key = 0;
+
+ if (bpf_map_update_elem(&race_hash_map, &key, &val, BPF_ANY))
+ return 1;
+
+ v = bpf_map_lookup_elem(&race_hash_map, &key);
+ if (!v)
+ return 2;
+
+ p = bpf_kfunc_call_test_acquire(&(unsigned long){0});
+ if (!p)
+ return 3;
+ old = bpf_kptr_xchg(&v->ref_ptr, p);
+ if (old)
+ bpf_kfunc_call_test_release(old);
+
+ bpf_map_delete_elem(&race_hash_map, &key);
+
+ p = bpf_kfunc_call_test_acquire(&(unsigned long){0});
+ if (!p)
+ return 4;
+ old = bpf_kptr_xchg(&v->ref_ptr, p);
+ if (old)
+ bpf_kfunc_call_test_release(old);
+
+ return 0;
+}
+
+static int fill_percpu_kptr(struct map_value *v)
+{
+ struct prog_test_ref_kfunc *p, *old;
+
+ p = bpf_kfunc_call_test_acquire(&(unsigned long){0});
+ if (!p)
+ return 1;
+ old = bpf_kptr_xchg(&v->ref_ptr, p);
+ if (old)
+ bpf_kfunc_call_test_release(old);
+ return 0;
+}
+
+SEC("tc")
+int test_percpu_htab_leak(struct __sk_buff *skb)
+{
+ struct map_value *v, *arr[16] = {};
+ struct map_value val = {};
+ int key = 0;
+ int err = 0;
+
+ if (bpf_map_update_elem(&race_percpu_hash_map, &key, &val, BPF_ANY))
+ return 1;
+
+ for (int i = 0; i < nr_cpus; i++) {
+ v = bpf_map_lookup_percpu_elem(&race_percpu_hash_map, &key, i);
+ if (!v)
+ return 2;
+ arr[i] = v;
+ }
+
+ bpf_map_delete_elem(&race_percpu_hash_map, &key);
+
+ for (int i = 0; i < nr_cpus; i++) {
+ v = arr[i];
+ err = fill_percpu_kptr(v);
+ if (err)
+ return 3;
+ }
+
+ return 0;
+}
+
+SEC("tp_btf/inet_sock_set_state")
+int BPF_PROG(test_sk_ls_leak, struct sock *sk, int oldstate, int newstate)
+{
+ struct prog_test_ref_kfunc *p, *old;
+ struct map_value *v;
+
+ if (newstate != BPF_TCP_SYN_SENT)
+ return 0;
+
+ if (sk_ls_leak_done)
+ return 0;
+
+ v = bpf_sk_storage_get(&race_sk_ls_map, sk, NULL,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!v)
+ return 0;
+
+ p = bpf_kfunc_call_test_acquire(&(unsigned long){0});
+ if (!p)
+ return 0;
+ old = bpf_kptr_xchg(&v->ref_ptr, p);
+ if (old)
+ bpf_kfunc_call_test_release(old);
+
+ bpf_sk_storage_delete(&race_sk_ls_map, sk);
+
+ p = bpf_kfunc_call_test_acquire(&(unsigned long){0});
+ if (!p)
+ return 0;
+ old = bpf_kptr_xchg(&v->ref_ptr, p);
+ if (old)
+ bpf_kfunc_call_test_release(old);
+
+ sk_ls_leak_done = 1;
+ return 0;
+}
+
+long target_map_ptr;
+
+SEC("fentry/bpf_map_put")
+int BPF_PROG(map_put, struct bpf_map *map)
+{
+ if (target_map_id && map->id == (u32)target_map_id)
+ target_map_ptr = (long)map;
+ return 0;
+}
+
+SEC("fexit/htab_map_free")
+int BPF_PROG(htab_map_free, struct bpf_map *map)
+{
+ if (target_map_ptr && (long)map == target_map_ptr)
+ map_freed = 1;
+ return 0;
+}
+
+SEC("fexit/bpf_sk_storage_map_free")
+int BPF_PROG(sk_map_free, struct bpf_map *map)
+{
+ if (target_map_ptr && (long)map == target_map_ptr)
+ map_freed = 1;
+ return 0;
+}
+
+SEC("syscall")
+int count_ref(void *ctx)
+{
+ struct prog_test_ref_kfunc *p;
+ unsigned long arg = 0;
+
+ p = bpf_kfunc_call_test_acquire(&arg);
+ if (!p)
+ return 1;
+
+ num_of_refs = p->cnt.refs.counter;
+
+ bpf_kfunc_call_test_release(p);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";