summaryrefslogtreecommitdiff
path: root/kernel/bpf
diff options
context:
space:
mode:
authorAmery Hung <ameryhung@gmail.com>2025-12-03 15:37:44 -0800
committerAndrii Nakryiko <andrii@kernel.org>2025-12-05 16:17:57 -0800
commitb5709f6d26d65f6bb9711f4b5f98469fd507cb5b (patch)
treeb9a0535c5b65be14a46d194aef46c63b20620694 /kernel/bpf
parent1588c81b9f215170bd596984691eda2f05a84a1f (diff)
bpf: Support associating BPF program with struct_ops
Add a new BPF command BPF_PROG_ASSOC_STRUCT_OPS to allow associating a BPF program with a struct_ops map. This command takes a file descriptor of a struct_ops map and a BPF program and set prog->aux->st_ops_assoc to the kdata of the struct_ops map. The command does not accept a struct_ops program nor a non-struct_ops map. Programs of a struct_ops map is automatically associated with the map during map update. If a program is shared between two struct_ops maps, prog->aux->st_ops_assoc will be poisoned to indicate that the associated struct_ops is ambiguous. The pointer, once poisoned, cannot be reset since we have lost track of associated struct_ops. For other program types, the associated struct_ops map, once set, cannot be changed later. This restriction may be lifted in the future if there is a use case. A kernel helper bpf_prog_get_assoc_struct_ops() can be used to retrieve the associated struct_ops pointer. The returned pointer, if not NULL, is guaranteed to be valid and point to a fully updated struct_ops struct. For struct_ops program reused in multiple struct_ops map, the return will be NULL. prog->aux->st_ops_assoc is protected by bumping the refcount for non-struct_ops programs and RCU for struct_ops programs. Since it would be inefficient to track programs associated with a struct_ops map, every non-struct_ops program will bump the refcount of the map to make sure st_ops_assoc stays valid. For a struct_ops program, it is protected by RCU as map_free will wait for an RCU grace period before disassociating the program with the map. The helper must be called in BPF program context or RCU read-side critical section. struct_ops implementers should note that the struct_ops returned may not be initialized nor attached yet. The struct_ops implementer will be responsible for tracking and checking the state of the associated struct_ops map if the use case expects an initialized or attached struct_ops. Signed-off-by: Amery Hung <ameryhung@gmail.com> Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Acked-by: Andrii Nakryiko <andrii@kernel.org> Acked-by: Martin KaFai Lau <martin.lau@kernel.org> Link: https://lore.kernel.org/bpf/20251203233748.668365-3-ameryhung@gmail.com
Diffstat (limited to 'kernel/bpf')
-rw-r--r--kernel/bpf/bpf_struct_ops.c88
-rw-r--r--kernel/bpf/core.c3
-rw-r--r--kernel/bpf/syscall.c46
3 files changed, 137 insertions, 0 deletions
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 278490683d28..c43346cb3d76 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -533,6 +533,17 @@ static void bpf_struct_ops_map_put_progs(struct bpf_struct_ops_map *st_map)
}
}
+static void bpf_struct_ops_map_dissoc_progs(struct bpf_struct_ops_map *st_map)
+{
+ u32 i;
+
+ for (i = 0; i < st_map->funcs_cnt; i++) {
+ if (!st_map->links[i])
+ break;
+ bpf_prog_disassoc_struct_ops(st_map->links[i]->prog);
+ }
+}
+
static void bpf_struct_ops_map_free_image(struct bpf_struct_ops_map *st_map)
{
int i;
@@ -801,6 +812,9 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
goto reset_unlock;
}
+ /* Poison pointer on error instead of return for backward compatibility */
+ bpf_prog_assoc_struct_ops(prog, &st_map->map);
+
link = kzalloc(sizeof(*link), GFP_USER);
if (!link) {
bpf_prog_put(prog);
@@ -980,6 +994,8 @@ static void bpf_struct_ops_map_free(struct bpf_map *map)
if (btf_is_module(st_map->btf))
module_put(st_map->st_ops_desc->st_ops->owner);
+ bpf_struct_ops_map_dissoc_progs(st_map);
+
bpf_struct_ops_map_del_ksyms(st_map);
/* The struct_ops's function may switch to another struct_ops.
@@ -1396,6 +1412,78 @@ err_out:
return err;
}
+int bpf_prog_assoc_struct_ops(struct bpf_prog *prog, struct bpf_map *map)
+{
+ struct bpf_map *st_ops_assoc;
+
+ guard(mutex)(&prog->aux->st_ops_assoc_mutex);
+
+ st_ops_assoc = rcu_dereference_protected(prog->aux->st_ops_assoc,
+ lockdep_is_held(&prog->aux->st_ops_assoc_mutex));
+ if (st_ops_assoc && st_ops_assoc == map)
+ return 0;
+
+ if (st_ops_assoc) {
+ if (prog->type != BPF_PROG_TYPE_STRUCT_OPS)
+ return -EBUSY;
+
+ rcu_assign_pointer(prog->aux->st_ops_assoc, BPF_PTR_POISON);
+ } else {
+ /*
+ * struct_ops map does not track associated non-struct_ops programs.
+ * Bump the refcount to make sure st_ops_assoc is always valid.
+ */
+ if (prog->type != BPF_PROG_TYPE_STRUCT_OPS)
+ bpf_map_inc(map);
+
+ rcu_assign_pointer(prog->aux->st_ops_assoc, map);
+ }
+
+ return 0;
+}
+
+void bpf_prog_disassoc_struct_ops(struct bpf_prog *prog)
+{
+ struct bpf_map *st_ops_assoc;
+
+ guard(mutex)(&prog->aux->st_ops_assoc_mutex);
+
+ st_ops_assoc = rcu_dereference_protected(prog->aux->st_ops_assoc,
+ lockdep_is_held(&prog->aux->st_ops_assoc_mutex));
+ if (!st_ops_assoc || st_ops_assoc == BPF_PTR_POISON)
+ return;
+
+ if (prog->type != BPF_PROG_TYPE_STRUCT_OPS)
+ bpf_map_put(st_ops_assoc);
+
+ RCU_INIT_POINTER(prog->aux->st_ops_assoc, NULL);
+}
+
+/*
+ * Get a reference to the struct_ops struct (i.e., kdata) associated with a
+ * program. Should only be called in BPF program context (e.g., in a kfunc).
+ *
+ * If the returned pointer is not NULL, it must points to a valid struct_ops.
+ * The struct_ops map is not guaranteed to be initialized nor attached.
+ * Kernel struct_ops implementers are responsible for tracking and checking
+ * the state of the struct_ops if the use case requires an initialized or
+ * attached struct_ops.
+ */
+void *bpf_prog_get_assoc_struct_ops(const struct bpf_prog_aux *aux)
+{
+ struct bpf_struct_ops_map *st_map;
+ struct bpf_map *st_ops_assoc;
+
+ st_ops_assoc = rcu_dereference_check(aux->st_ops_assoc, bpf_rcu_lock_held());
+ if (!st_ops_assoc || st_ops_assoc == BPF_PTR_POISON)
+ return NULL;
+
+ st_map = (struct bpf_struct_ops_map *)st_ops_assoc;
+
+ return &st_map->kvalue.data;
+}
+EXPORT_SYMBOL_GPL(bpf_prog_get_assoc_struct_ops);
+
void bpf_map_struct_ops_info_fill(struct bpf_map_info *info, struct bpf_map *map)
{
struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index c8ae6ab31651..67226145a4db 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -136,6 +136,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
mutex_init(&fp->aux->used_maps_mutex);
mutex_init(&fp->aux->ext_mutex);
mutex_init(&fp->aux->dst_mutex);
+ mutex_init(&fp->aux->st_ops_assoc_mutex);
#ifdef CONFIG_BPF_SYSCALL
bpf_prog_stream_init(fp);
@@ -286,6 +287,7 @@ void __bpf_prog_free(struct bpf_prog *fp)
if (fp->aux) {
mutex_destroy(&fp->aux->used_maps_mutex);
mutex_destroy(&fp->aux->dst_mutex);
+ mutex_destroy(&fp->aux->st_ops_assoc_mutex);
kfree(fp->aux->poke_tab);
kfree(fp->aux);
}
@@ -2896,6 +2898,7 @@ static void bpf_prog_free_deferred(struct work_struct *work)
#endif
bpf_free_used_maps(aux);
bpf_free_used_btfs(aux);
+ bpf_prog_disassoc_struct_ops(aux->prog);
if (bpf_prog_is_dev_bound(aux))
bpf_prog_dev_bound_destroy(aux->prog);
#ifdef CONFIG_PERF_EVENTS
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 6589acc89ef8..3080cc48bfc3 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -6122,6 +6122,49 @@ static int prog_stream_read(union bpf_attr *attr)
return ret;
}
+#define BPF_PROG_ASSOC_STRUCT_OPS_LAST_FIELD prog_assoc_struct_ops.prog_fd
+
+static int prog_assoc_struct_ops(union bpf_attr *attr)
+{
+ struct bpf_prog *prog;
+ struct bpf_map *map;
+ int ret;
+
+ if (CHECK_ATTR(BPF_PROG_ASSOC_STRUCT_OPS))
+ return -EINVAL;
+
+ if (attr->prog_assoc_struct_ops.flags)
+ return -EINVAL;
+
+ prog = bpf_prog_get(attr->prog_assoc_struct_ops.prog_fd);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) {
+ ret = -EINVAL;
+ goto put_prog;
+ }
+
+ map = bpf_map_get(attr->prog_assoc_struct_ops.map_fd);
+ if (IS_ERR(map)) {
+ ret = PTR_ERR(map);
+ goto put_prog;
+ }
+
+ if (map->map_type != BPF_MAP_TYPE_STRUCT_OPS) {
+ ret = -EINVAL;
+ goto put_map;
+ }
+
+ ret = bpf_prog_assoc_struct_ops(prog, map);
+
+put_map:
+ bpf_map_put(map);
+put_prog:
+ bpf_prog_put(prog);
+ return ret;
+}
+
static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size)
{
union bpf_attr attr;
@@ -6261,6 +6304,9 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size)
case BPF_PROG_STREAM_READ_BY_FD:
err = prog_stream_read(&attr);
break;
+ case BPF_PROG_ASSOC_STRUCT_OPS:
+ err = prog_assoc_struct_ops(&attr);
+ break;
default:
err = -EINVAL;
break;