summaryrefslogtreecommitdiff
path: root/kernel/bpf
diff options
context:
space:
mode:
authorJustin Suess <utilityemal77@gmail.com>2026-06-09 22:25:44 +0200
committerAlexei Starovoitov <ast@kernel.org>2026-06-09 21:23:11 -0700
commita3a81d247651218e47153f2d2afd7aee236726fd (patch)
tree243bddd5e5085f96f65fdd1a596f0773b75f6928 /kernel/bpf
parent94c8d1c21be40a845357854f98ec07e21bb14bc9 (diff)
bpf: Cancel special fields on map value recycle
Map update and delete paths currently call bpf_obj_free_fields() when a value is being replaced or recycled. That makes field destruction depend on the context of the update/delete operation. For tracing programs this can include NMI context, where referenced kptr destructors, uptr unpinning, and graph root destruction are not generally safe. Introduce bpf_obj_cancel_fields() for the reusable-value path. It only performs NMI-safe cleanup for timer, workqueue, and task_work fields. Fields that need full destruction are left attached to the recycled value and are destroyed by the final cleanup path instead. Switch array and hashtab update/delete/recycle paths to this cancel helper. Keep bpf_obj_free_fields() for final map destruction and for bpf_mem_alloc destructors. Preallocated hashtabs do not have allocator destructors, so teardown continues to walk the normal and extra elements and fully destroy their fields. This deliberately relaxes the eager-free semantics of map update/delete for special fields. Programs that relied on a recycled map slot becoming empty immediately after update/delete were relying on behavior that cannot be implemented safely from every BPF execution context without offloading arbitrary destructors. There is a chance this change breaks programs making assumptions regarding the eager freeing of fields. If so, we can relax semantics to cancellation only when irqs_disabled() is true in the future. However, theoretically, map values that get reused eagerly already have weaker guarantees as parallel users can recreate freed fields before the new element becomes visible again. Fixes: 14a324f6a67e ("bpf: Wire up freeing of referenced kptr") Signed-off-by: Justin Suess <utilityemal77@gmail.com> Co-developed-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Link: https://lore.kernel.org/r/20260609202548.3571690-3-memxor@gmail.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'kernel/bpf')
-rw-r--r--kernel/bpf/arraymap.c8
-rw-r--r--kernel/bpf/hashtab.c32
-rw-r--r--kernel/bpf/syscall.c5
3 files changed, 27 insertions, 18 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index e6271a2bf6d6..248b4818178c 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -384,7 +384,7 @@ static long array_map_update_elem(struct bpf_map *map, void *key, void *value,
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
val = this_cpu_ptr(array->pptrs[index & array->index_mask]);
copy_map_value(map, val, value);
- bpf_obj_free_fields(array->map.record, val);
+ bpf_obj_cancel_fields(map, val);
} else {
val = array->value +
(u64)array->elem_size * (index & array->index_mask);
@@ -392,7 +392,7 @@ static long array_map_update_elem(struct bpf_map *map, void *key, void *value,
copy_map_value_locked(map, val, value, false);
else
copy_map_value(map, val, value);
- bpf_obj_free_fields(array->map.record, val);
+ bpf_obj_cancel_fields(map, val);
}
return 0;
}
@@ -432,14 +432,14 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
cpu = map_flags >> 32;
ptr = per_cpu_ptr(pptr, cpu);
copy_map_value(map, ptr, value);
- bpf_obj_free_fields(array->map.record, ptr);
+ bpf_obj_cancel_fields(map, ptr);
goto unlock;
}
for_each_possible_cpu(cpu) {
ptr = per_cpu_ptr(pptr, cpu);
val = (map_flags & BPF_F_ALL_CPUS) ? value : value + size * cpu;
copy_map_value(map, ptr, val);
- bpf_obj_free_fields(array->map.record, ptr);
+ bpf_obj_cancel_fields(map, ptr);
}
unlock:
rcu_read_unlock();
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index b4366cad3cfa..9f394e1aa2e8 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -243,6 +243,10 @@ static void htab_free_prealloced_fields(struct bpf_htab *htab)
if (IS_ERR_OR_NULL(htab->map.record))
return;
+ /*
+ * Preallocated maps do not have a bpf_mem_alloc destructor, so fully
+ * destroy every element, including the extra elements.
+ */
if (htab_has_extra_elems(htab))
num_entries += num_possible_cpus();
for (i = 0; i < num_entries; i++) {
@@ -833,8 +837,8 @@ static int htab_lru_map_gen_lookup(struct bpf_map *map,
return insn - insn_buf;
}
-static void check_and_free_fields(struct bpf_htab *htab,
- struct htab_elem *elem)
+static void check_and_cancel_fields(struct bpf_htab *htab,
+ struct htab_elem *elem)
{
if (IS_ERR_OR_NULL(htab->map.record))
return;
@@ -844,11 +848,11 @@ static void check_and_free_fields(struct bpf_htab *htab,
int cpu;
for_each_possible_cpu(cpu)
- bpf_obj_free_fields(htab->map.record, per_cpu_ptr(pptr, cpu));
+ bpf_obj_cancel_fields(&htab->map, per_cpu_ptr(pptr, cpu));
} else {
void *map_value = htab_elem_value(elem, htab->map.key_size);
- bpf_obj_free_fields(htab->map.record, map_value);
+ bpf_obj_cancel_fields(&htab->map, map_value);
}
}
@@ -883,7 +887,7 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
htab_unlock_bucket(b, flags);
if (l == tgt_l)
- check_and_free_fields(htab, l);
+ check_and_cancel_fields(htab, l);
return l == tgt_l;
}
@@ -948,7 +952,7 @@ find_first_elem:
static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
{
- check_and_free_fields(htab, l);
+ check_and_cancel_fields(htab, l);
if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
bpf_mem_cache_free(&htab->pcpu_ma, l->ptr_to_pptr);
@@ -1001,7 +1005,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
if (htab_is_prealloc(htab)) {
bpf_map_dec_elem_count(&htab->map);
- check_and_free_fields(htab, l);
+ check_and_cancel_fields(htab, l);
pcpu_freelist_push(&htab->freelist, &l->fnode);
} else {
dec_elem_count(htab);
@@ -1018,7 +1022,7 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
/* copy true value_size bytes */
ptr = this_cpu_ptr(pptr);
copy_map_value(&htab->map, ptr, value);
- bpf_obj_free_fields(htab->map.record, ptr);
+ bpf_obj_cancel_fields(&htab->map, ptr);
} else {
u32 size = round_up(htab->map.value_size, 8);
void *val;
@@ -1028,7 +1032,7 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
cpu = map_flags >> 32;
ptr = per_cpu_ptr(pptr, cpu);
copy_map_value(&htab->map, ptr, value);
- bpf_obj_free_fields(htab->map.record, ptr);
+ bpf_obj_cancel_fields(&htab->map, ptr);
return;
}
@@ -1036,7 +1040,7 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
ptr = per_cpu_ptr(pptr, cpu);
val = (map_flags & BPF_F_ALL_CPUS) ? value : value + size * cpu;
copy_map_value(&htab->map, ptr, val);
- bpf_obj_free_fields(htab->map.record, ptr);
+ bpf_obj_cancel_fields(&htab->map, ptr);
}
}
}
@@ -1252,11 +1256,11 @@ static long htab_map_update_elem(struct bpf_map *map, void *key, void *value,
if (l_old) {
hlist_nulls_del_rcu(&l_old->hash_node);
- /* l_old has already been stashed in htab->extra_elems, free
- * its special fields before it is available for reuse.
+ /* l_old has already been stashed in htab->extra_elems, cancel
+ * its reusable special fields before it is available for reuse.
*/
if (htab_is_prealloc(htab))
- check_and_free_fields(htab, l_old);
+ check_and_cancel_fields(htab, l_old);
}
htab_unlock_bucket(b, flags);
if (l_old && !htab_is_prealloc(htab))
@@ -1269,7 +1273,7 @@ err:
static void htab_lru_push_free(struct bpf_htab *htab, struct htab_elem *elem)
{
- check_and_free_fields(htab, elem);
+ check_and_cancel_fields(htab, elem);
bpf_map_dec_elem_count(&htab->map);
bpf_lru_push_free(&htab->lru, &elem->lru_node);
}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index d4188a992bd8..7ed949f70f82 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -808,6 +808,11 @@ void bpf_obj_free_task_work(const struct btf_record *rec, void *obj)
bpf_task_work_cancel_and_free(obj + rec->task_work_off);
}
+void bpf_obj_cancel_fields(struct bpf_map *map, void *obj)
+{
+ bpf_map_free_internal_structs(map, obj);
+}
+
void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
{
const struct btf_field *fields;