From f233d4855918547f19c5bff95223706d1c836b7c Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Tue, 7 Oct 2025 22:03:48 +0000 Subject: bpf: Refactor storage_get_func_atomic to generic non_sleepable flag Rename the storage_get_func_atomic flag to a more generic non_sleepable flag that tracks whether a helper or kfunc may be called from a non-sleepable context. This makes the flag more broadly applicable beyond just storage_get helpers. See [0] for more context. The flag is now set unconditionally for all helpers and kfuncs when: - RCU critical section is active. - Preemption is disabled. - IRQs are disabled. - In a non-sleepable context within a sleepable program (e.g., timer callbacks), which is indicated by !in_sleepable(). Previously, the flag was only set for storage_get helpers in these contexts. With this change, it can be used by any code that needs to differentiate between sleepable and non-sleepable contexts at the per-instruction level. The existing usage in do_misc_fixups() for storage_get helpers is preserved by checking is_storage_get_function() before using the flag. [0]: https://lore.kernel.org/bpf/CAP01T76cbaNi4p-y8E0sjE2NXSra2S=Uja8G4hSQDu_SbXxREQ@mail.gmail.com Cc: Mykyta Yatsenko Signed-off-by: Kumar Kartikeya Dwivedi Acked-by: Eduard Zingerman Acked-by: Mykyta Yatsenko Link: https://lore.kernel.org/r/20251007220349.3852807-3-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 4c497e839526..b57222a25a4a 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -548,7 +548,7 @@ struct bpf_insn_aux_data { bool nospec_result; /* result is unsafe under speculation, nospec must follow */ bool zext_dst; /* this insn zero extends dst reg */ bool needs_zext; /* alu op needs to clear upper bits */ - bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */ + bool non_sleepable; /* helper/kfunc may be called from non-sleepable context */ bool is_iter_next; /* bpf_iter__next() kfunc call */ bool call_with_percpu_alloc_ptr; /* {this,per}_cpu_ptr() with prog percpu alloc */ u8 alu_state; /* used in combination with alu_limit */ -- cgit v1.2.3 From 4c97c4b149a019a3b318dc6ea3dc96efe0ee1f39 Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Fri, 10 Oct 2025 17:46:06 +0100 Subject: bpf: Extract internal structs validation logic into helpers The arraymap and hashtab duplicate the logic that checks for and frees internal structs (timer, workqueue, task_work) based on BTF record flags. Centralize this by introducing two helpers: * bpf_map_has_internal_structs(map) Returns true if the map value contains any of internal structs: BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK. * bpf_map_free_internal_structs(map, obj) Frees the internal structs for a single value object. Convert arraymap and both the prealloc/malloc hashtab paths to use the new generic functions. This keeps the functionality for when/how to free these special fields in one place and makes it easier to add support for new internal structs in the future without touching every map implementation. Signed-off-by: Mykyta Yatsenko Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20251010164606.147298-3-mykyta.yatsenko5@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a98c83346134..f87fb203aaae 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -663,6 +663,13 @@ int map_check_no_btf(const struct bpf_map *map, bool bpf_map_meta_equal(const struct bpf_map *meta0, const struct bpf_map *meta1); +static inline bool bpf_map_has_internal_structs(struct bpf_map *map) +{ + return btf_record_has_field(map->record, BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK); +} + +void bpf_map_free_internal_structs(struct bpf_map *map, void *obj); + extern const struct bpf_map_ops bpf_map_offload_ops; /* bpf_type_flag contains a set of flags that are applicable to the values of -- cgit v1.2.3 From 48a97ffc6c826640907d13b199e29008f4fe2c15 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 14 Oct 2025 13:14:03 -0700 Subject: bpf: Consistently use bpf_rcu_lock_held() everywhere We have many places which open-code what's now is bpf_rcu_lock_held() macro, so replace all those places with a clean and short macro invocation. For that, move bpf_rcu_lock_held() macro into include/linux/bpf.h. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20251014201403.4104511-1-andrii@kernel.org --- include/linux/bpf.h | 3 +++ include/linux/bpf_local_storage.h | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f87fb203aaae..86afd9ac6848 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2381,6 +2381,9 @@ bpf_prog_run_array_uprobe(const struct bpf_prog_array *array, bool bpf_jit_bypass_spec_v1(void); bool bpf_jit_bypass_spec_v4(void); +#define bpf_rcu_lock_held() \ + (rcu_read_lock_held() || rcu_read_lock_trace_held() || rcu_read_lock_bh_held()) + #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); extern struct mutex bpf_stats_enabled_mutex; diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index ab7244d8108f..782f58feea35 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -18,9 +18,6 @@ #define BPF_LOCAL_STORAGE_CACHE_SIZE 16 -#define bpf_rcu_lock_held() \ - (rcu_read_lock_held() || rcu_read_lock_trace_held() || \ - rcu_read_lock_bh_held()) struct bpf_local_storage_map_bucket { struct hlist_head list; raw_spinlock_t lock; -- cgit v1.2.3 From f7d72d0b3f438b881dba16c7c00493f16e41a821 Mon Sep 17 00:00:00 2001 From: Anton Protopopov Date: Sun, 19 Oct 2025 20:21:30 +0000 Subject: bpf: save the start of functions in bpf_prog_aux Introduce a new subprog_start field in bpf_prog_aux. This field may be used by JIT compilers wanting to know the real absolute xlated offset of the function being jitted. The func_info[func_id] may have served this purpose, but func_info may be NULL, so JIT compilers can't rely on it. Signed-off-by: Anton Protopopov Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20251019202145.3944697-3-a.s.protopopov@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 204f9c759a41..3bda915cd7a8 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1623,6 +1623,7 @@ struct bpf_prog_aux { u32 ctx_arg_info_size; u32 max_rdonly_access; u32 max_rdwr_access; + u32 subprog_start; struct btf *attach_btf; struct bpf_ctx_arg_aux *ctx_arg_info; void __percpu *priv_stack_ptr; -- cgit v1.2.3 From 44481e4925327d833f2e37c8741406e4cabfe054 Mon Sep 17 00:00:00 2001 From: Anton Protopopov Date: Sun, 19 Oct 2025 20:21:31 +0000 Subject: bpf: generalize and export map_get_next_key for arrays The kernel/bpf/array.c file defines the array_map_get_next_key() function which finds the next key for array maps. It actually doesn't use any map fields besides the generic max_entries field. Generalize it, and export as bpf_array_get_next_key() such that it can be re-used by other array-like maps. Signed-off-by: Anton Protopopov Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20251019202145.3944697-4-a.s.protopopov@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3bda915cd7a8..e53cda0aabb6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2107,6 +2107,12 @@ struct bpf_array { }; }; +/* + * The bpf_array_get_next_key() function may be used for all array-like + * maps, i.e., maps with u32 keys with range [0 ,..., max_entries) + */ +int bpf_array_get_next_key(struct bpf_map *map, void *key, void *next_key); + #define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */ #define MAX_TAIL_CALL_CNT 33 -- cgit v1.2.3 From 2f69c5685427308d2f312646779313f3677536bc Mon Sep 17 00:00:00 2001 From: Anton Protopopov Date: Sun, 19 Oct 2025 20:21:37 +0000 Subject: bpf: make bpf_insn_successors to return a pointer The bpf_insn_successors() function is used to return successors to a BPF instruction. So far, an instruction could have 0, 1 or 2 successors. Prepare the verifier code to introduction of instructions with more than 2 successors (namely, indirect jumps). To do this, introduce a new struct, struct bpf_iarray, containing an array of bpf instruction indexes and make bpf_insn_successors to return a pointer of that type. The storage for all instructions is allocated in the env->succ, which holds an array of size 2, to be used for all instructions. Signed-off-by: Anton Protopopov Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20251019202145.3944697-10-a.s.protopopov@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index b57222a25a4a..c6eb68b6389c 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -509,6 +509,15 @@ struct bpf_map_ptr_state { #define BPF_ALU_SANITIZE (BPF_ALU_SANITIZE_SRC | \ BPF_ALU_SANITIZE_DST) +/* + * An array of BPF instructions. + * Primary usage: return value of bpf_insn_successors. + */ +struct bpf_iarray { + int cnt; + u32 items[]; +}; + struct bpf_insn_aux_data { union { enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ @@ -828,6 +837,7 @@ struct bpf_verifier_env { /* array of pointers to bpf_scc_info indexed by SCC id */ struct bpf_scc_info **scc_info; u32 scc_cnt; + struct bpf_iarray *succ; }; static inline struct bpf_func_info_aux *subprog_aux(struct bpf_verifier_env *env, int subprog) @@ -1050,7 +1060,7 @@ void print_insn_state(struct bpf_verifier_env *env, const struct bpf_verifier_st struct bpf_subprog_info *bpf_find_containing_subprog(struct bpf_verifier_env *env, int off); int bpf_jmp_offset(struct bpf_insn *insn); -int bpf_insn_successors(struct bpf_prog *prog, u32 idx, u32 succ[2]); +struct bpf_iarray *bpf_insn_successors(struct bpf_verifier_env *env, u32 idx); void bpf_fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask); bool bpf_calls_callback(struct bpf_verifier_env *env, int insn_idx); -- cgit v1.2.3 From 531b87d865eb9e625c2e46ec8f06a65a6157ee45 Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Sun, 26 Oct 2025 20:38:45 +0000 Subject: bpf: widen dynptr size/offset to 64 bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dynptr currently caps size and offset at 24 bits, which isn’t sufficient for file-backed use cases; even 32 bits can be limiting. Refactor dynptr helpers/kfuncs to use 64-bit size and offset, ensuring consistency across the APIs. This change does not affect internals of xdp, skb or other dynptrs, which continue to behave as before. Also it does not break binary compatibility. The widening enables large-file access support via dynptr, implemented in the next patches. Signed-off-by: Mykyta Yatsenko Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20251026203853.135105-3-mykyta.yatsenko5@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e53cda0aabb6..907c69295293 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1387,19 +1387,19 @@ enum bpf_dynptr_type { BPF_DYNPTR_TYPE_SKB_META, }; -int bpf_dynptr_check_size(u32 size); -u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr); -const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len); -void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len); +int bpf_dynptr_check_size(u64 size); +u64 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr); +const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u64 len); +void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u64 len); bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr); -int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, - void *src, u32 len, u64 flags); -void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u32 offset, - void *buffer__opt, u32 buffer__szk); +int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u64 offset, + void *src, u64 len, u64 flags); +void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u64 offset, + void *buffer__opt, u64 buffer__szk); -static inline int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u32 offset, u32 len) +static inline int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u64 offset, u64 len) { - u32 size = __bpf_dynptr_size(ptr); + u64 size = __bpf_dynptr_size(ptr); if (len > size || offset > size - len) return -E2BIG; -- cgit v1.2.3 From 76e4fed847124690f7344a43d01dbcd7b2925353 Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Sun, 26 Oct 2025 20:38:46 +0000 Subject: lib: move freader into buildid.h Move struct freader and prototypes of the functions operating on it into the buildid.h. This allows reusing freader outside buildid, e.g. for file dynptr support added later. Signed-off-by: Mykyta Yatsenko Link: https://lore.kernel.org/r/20251026203853.135105-4-mykyta.yatsenko5@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/buildid.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/buildid.h b/include/linux/buildid.h index 014a88c41073..831c1b4b626c 100644 --- a/include/linux/buildid.h +++ b/include/linux/buildid.h @@ -18,4 +18,29 @@ void init_vmlinux_build_id(void); static inline void init_vmlinux_build_id(void) { } #endif +struct freader { + void *buf; + u32 buf_sz; + int err; + union { + struct { + struct file *file; + struct folio *folio; + void *addr; + loff_t folio_off; + bool may_fault; + }; + struct { + const char *data; + u64 data_sz; + }; + }; +}; + +void freader_init_from_file(struct freader *r, void *buf, u32 buf_sz, + struct file *file, bool may_fault); +void freader_init_from_mem(struct freader *r, const char *data, u64 data_sz); +const void *freader_fetch(struct freader *r, loff_t file_off, size_t sz); +void freader_cleanup(struct freader *r); + #endif -- cgit v1.2.3 From 8d8771dc03e48300e80b43744dd3c320ccaf746a Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Sun, 26 Oct 2025 20:38:49 +0000 Subject: bpf: add plumbing for file-backed dynptr Add the necessary verifier plumbing for the new file-backed dynptr type. Introduce two kfuncs for its lifecycle management: * bpf_dynptr_from_file() for initialization * bpf_dynptr_file_discard() for destruction Currently there is no mechanism for kfunc to release dynptr, this patch add one: * Dynptr release function sets meta->release_regno * Call unmark_stack_slots_dynptr() if meta->release_regno is set and dynptr ref_obj_id is set as well. Signed-off-by: Mykyta Yatsenko Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20251026203853.135105-7-mykyta.yatsenko5@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 907c69295293..14f800773997 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -792,12 +792,15 @@ enum bpf_type_flag { /* DYNPTR points to skb_metadata_end()-skb_metadata_len() */ DYNPTR_TYPE_SKB_META = BIT(19 + BPF_BASE_TYPE_BITS), + /* DYNPTR points to file */ + DYNPTR_TYPE_FILE = BIT(20 + BPF_BASE_TYPE_BITS), + __BPF_TYPE_FLAG_MAX, __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, }; #define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF | DYNPTR_TYPE_SKB \ - | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META) + | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META | DYNPTR_TYPE_FILE) /* Max number of base types. */ #define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS) @@ -1385,6 +1388,8 @@ enum bpf_dynptr_type { BPF_DYNPTR_TYPE_XDP, /* Points to skb_metadata_end()-skb_metadata_len() */ BPF_DYNPTR_TYPE_SKB_META, + /* Underlying data is a file */ + BPF_DYNPTR_TYPE_FILE, }; int bpf_dynptr_check_size(u64 size); -- cgit v1.2.3 From 2c52e8943a437af6093d8b0f0920f1764f0e5f64 Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Sun, 26 Oct 2025 20:38:52 +0000 Subject: bpf: dispatch to sleepable file dynptr File dynptr reads may sleep when the requested folios are not in the page cache. To avoid sleeping in non-sleepable contexts while still supporting valid sleepable use, given that dynptrs are non-sleepable by default, enable sleeping only when bpf_dynptr_from_file() is invoked from a sleepable context. This change: * Introduces a sleepable constructor: bpf_dynptr_from_file_sleepable() * Override non-sleepable constructor with sleepable if it's always called in sleepable context Signed-off-by: Mykyta Yatsenko Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20251026203853.135105-10-mykyta.yatsenko5@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 14f800773997..a47d67db3be5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -670,6 +670,9 @@ static inline bool bpf_map_has_internal_structs(struct bpf_map *map) void bpf_map_free_internal_structs(struct bpf_map *map, void *obj); +int bpf_dynptr_from_file_sleepable(struct file *file, u32 flags, + struct bpf_dynptr *ptr__uninit); + extern const struct bpf_map_ops bpf_map_offload_ops; /* bpf_type_flag contains a set of flags that are applicable to the values of -- cgit v1.2.3 From b4ce5923e780d6896d4aaf19de5a27652b8bf1ea Mon Sep 17 00:00:00 2001 From: Anton Protopopov Date: Wed, 5 Nov 2025 09:03:59 +0000 Subject: bpf, x86: add new map type: instructions array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On bpf(BPF_PROG_LOAD) syscall user-supplied BPF programs are translated by the verifier into "xlated" BPF programs. During this process the original instructions offsets might be adjusted and/or individual instructions might be replaced by new sets of instructions, or deleted. Add a new BPF map type which is aimed to keep track of how, for a given program, the original instructions were relocated during the verification. Also, besides keeping track of the original -> xlated mapping, make x86 JIT to build the xlated -> jitted mapping for every instruction listed in an instruction array. This is required for every future application of instruction arrays: static keys, indirect jumps and indirect calls. A map of the BPF_MAP_TYPE_INSN_ARRAY type must be created with a u32 keys and value of size 8. The values have different semantics for userspace and for BPF space. For userspace a value consists of two u32 values – xlated and jitted offsets. For BPF side the value is a real pointer to a jitted instruction. On map creation/initialization, before loading the program, each element of the map should be initialized to point to an instruction offset within the program. Before the program load such maps should be made frozen. After the program verification xlated and jitted offsets can be read via the bpf(2) syscall. If a tracked instruction is removed by the verifier, then the xlated offset is set to (u32)-1 which is considered to be too big for a valid BPF program offset. One such a map can, obviously, be used to track one and only one BPF program. If the verification process was unsuccessful, then the same map can be re-used to verify the program with a different log level. However, if the program was loaded fine, then such a map, being frozen in any case, can't be reused by other programs even after the program release. Example. Consider the following original and xlated programs: Original prog: Xlated prog: 0: r1 = 0x0 0: r1 = 0 1: *(u32 *)(r10 - 0x4) = r1 1: *(u32 *)(r10 -4) = r1 2: r2 = r10 2: r2 = r10 3: r2 += -0x4 3: r2 += -4 4: r1 = 0x0 ll 4: r1 = map[id:88] 6: call 0x1 6: r1 += 272 7: r0 = *(u32 *)(r2 +0) 8: if r0 >= 0x1 goto pc+3 9: r0 <<= 3 10: r0 += r1 11: goto pc+1 12: r0 = 0 7: r6 = r0 13: r6 = r0 8: if r6 == 0x0 goto +0x2 14: if r6 == 0x0 goto pc+4 9: call 0x76 15: r0 = 0xffffffff8d2079c0 17: r0 = *(u64 *)(r0 +0) 10: *(u64 *)(r6 + 0x0) = r0 18: *(u64 *)(r6 +0) = r0 11: r0 = 0x0 19: r0 = 0x0 12: exit 20: exit An instruction array map, containing, e.g., instructions [0,4,7,12] will be translated by the verifier to [0,4,13,20]. A map with index 5 (the middle of 16-byte instruction) or indexes greater than 12 (outside the program boundaries) would be rejected. The functionality provided by this patch will be extended in consequent patches to implement BPF Static Keys, indirect jumps, and indirect calls. Signed-off-by: Anton Protopopov Reviewed-by: Eduard Zingerman Link: https://lore.kernel.org/r/20251105090410.1250500-2-a.s.protopopov@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 15 +++++++++++++++ include/linux/bpf_types.h | 1 + include/linux/bpf_verifier.h | 2 ++ 3 files changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a47d67db3be5..9d41a6affcef 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -3797,4 +3797,19 @@ int bpf_prog_get_file_line(struct bpf_prog *prog, unsigned long ip, const char * const char **linep, int *nump); struct bpf_prog *bpf_prog_find_from_stack(void); +int bpf_insn_array_init(struct bpf_map *map, const struct bpf_prog *prog); +int bpf_insn_array_ready(struct bpf_map *map); +void bpf_insn_array_release(struct bpf_map *map); +void bpf_insn_array_adjust(struct bpf_map *map, u32 off, u32 len); +void bpf_insn_array_adjust_after_remove(struct bpf_map *map, u32 off, u32 len); + +#ifdef CONFIG_BPF_SYSCALL +void bpf_prog_update_insn_ptrs(struct bpf_prog *prog, u32 *offsets, void *image); +#else +static inline void +bpf_prog_update_insn_ptrs(struct bpf_prog *prog, u32 *offsets, void *image) +{ +} +#endif + #endif /* _LINUX_BPF_H */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index fa78f49d4a9a..b13de31e163f 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -133,6 +133,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_RINGBUF, ringbuf_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_BLOOM_FILTER, bloom_filter_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_USER_RINGBUF, user_ringbuf_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_ARENA, arena_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_INSN_ARRAY, insn_array_map_ops) BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint) BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index c6eb68b6389c..6b820d8d77af 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -754,8 +754,10 @@ struct bpf_verifier_env { struct list_head free_list; /* list of struct bpf_verifier_state_list */ struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ struct btf_mod_pair used_btfs[MAX_USED_BTFS]; /* array of BTF's used by BPF program */ + struct bpf_map *insn_array_maps[MAX_USED_MAPS]; /* array of INSN_ARRAY map's to be relocated */ u32 used_map_cnt; /* number of used maps */ u32 used_btf_cnt; /* number of used BTF objects */ + u32 insn_array_map_cnt; /* number of used maps of type BPF_MAP_TYPE_INSN_ARRAY */ u32 id_gen; /* used to generate unique reg IDs */ u32 hidden_subprog_cnt; /* number of hidden subprogs */ int exception_callback_subprog; -- cgit v1.2.3 From 493d9e0d608339a32f568504d5fd411a261bb0af Mon Sep 17 00:00:00 2001 From: Anton Protopopov Date: Wed, 5 Nov 2025 09:04:06 +0000 Subject: bpf, x86: add support for indirect jumps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for a new instruction BPF_JMP|BPF_X|BPF_JA, SRC=0, DST=Rx, off=0, imm=0 which does an indirect jump to a location stored in Rx. The register Rx should have type PTR_TO_INSN. This new type assures that the Rx register contains a value (or a range of values) loaded from a correct jump table – map of type instruction array. For example, for a C switch LLVM will generate the following code: 0: r3 = r1 # "switch (r3)" 1: if r3 > 0x13 goto +0x666 # check r3 boundaries 2: r3 <<= 0x3 # adjust to an index in array of addresses 3: r1 = 0xbeef ll # r1 is PTR_TO_MAP_VALUE, r1->map_ptr=M 5: r1 += r3 # r1 inherits boundaries from r3 6: r1 = *(u64 *)(r1 + 0x0) # r1 now has type INSN_TO_PTR 7: gotox r1 # jit will generate proper code Here the gotox instruction corresponds to one particular map. This is possible however to have a gotox instruction which can be loaded from different maps, e.g. 0: r1 &= 0x1 1: r2 <<= 0x3 2: r3 = 0x0 ll # load from map M_1 4: r3 += r2 5: if r1 == 0x0 goto +0x4 6: r1 <<= 0x3 7: r3 = 0x0 ll # load from map M_2 9: r3 += r1 A: r1 = *(u64 *)(r3 + 0x0) B: gotox r1 # jump to target loaded from M_1 or M_2 During check_cfg stage the verifier will collect all the maps which point to inside the subprog being verified. When building the config, the high 16 bytes of the insn_state are used, so this patch (theoretically) supports jump tables of up to 2^16 slots. During the later stage, in check_indirect_jump, it is checked that the register Rx was loaded from a particular instruction array. Signed-off-by: Anton Protopopov Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20251105090410.1250500-9-a.s.protopopov@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + include/linux/bpf_verifier.h | 9 +++++++++ 2 files changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 9d41a6affcef..09d5dc541d1c 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1001,6 +1001,7 @@ enum bpf_reg_type { PTR_TO_ARENA, PTR_TO_BUF, /* reg points to a read/write buffer */ PTR_TO_FUNC, /* reg points to a bpf program function */ + PTR_TO_INSN, /* reg points to a bpf program instruction */ CONST_PTR_TO_DYNPTR, /* reg points to a const struct bpf_dynptr */ __BPF_REG_TYPE_MAX, diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 6b820d8d77af..5441341f1ab9 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -527,6 +527,7 @@ struct bpf_insn_aux_data { struct { u32 map_index; /* index into used_maps[] */ u32 map_off; /* offset from value base address */ + struct bpf_iarray *jt; /* jump table for gotox instruction */ }; struct { enum bpf_reg_type reg_type; /* type of pseudo_btf_id */ @@ -840,6 +841,7 @@ struct bpf_verifier_env { struct bpf_scc_info **scc_info; u32 scc_cnt; struct bpf_iarray *succ; + struct bpf_iarray *gotox_tmp_buf; }; static inline struct bpf_func_info_aux *subprog_aux(struct bpf_verifier_env *env, int subprog) @@ -1050,6 +1052,13 @@ static inline bool bpf_stack_narrow_access_ok(int off, int fill_size, int spill_ return !(off % BPF_REG_SIZE); } +static inline bool insn_is_gotox(struct bpf_insn *insn) +{ + return BPF_CLASS(insn->code) == BPF_JMP && + BPF_OP(insn->code) == BPF_JA && + BPF_SRC(insn->code) == BPF_X; +} + const char *reg_type_str(struct bpf_verifier_env *env, enum bpf_reg_type type); const char *dynptr_type_str(enum bpf_dynptr_type type); const char *iter_type_str(const struct btf *btf, u32 btf_id); -- cgit v1.2.3 From 7dc211c1159d991db609bdf4b0fb9033c04adcbc Mon Sep 17 00:00:00 2001 From: Pu Lehui Date: Sat, 15 Nov 2025 10:23:43 +0000 Subject: bpf: Fix invalid prog->stats access when update_effective_progs fails Syzkaller triggers an invalid memory access issue following fault injection in update_effective_progs. The issue can be described as follows: __cgroup_bpf_detach update_effective_progs compute_effective_progs bpf_prog_array_alloc <-- fault inject purge_effective_progs /* change to dummy_bpf_prog */ array->items[index] = &dummy_bpf_prog.prog ---softirq start--- __do_softirq ... __cgroup_bpf_run_filter_skb __bpf_prog_run_save_cb bpf_prog_run stats = this_cpu_ptr(prog->stats) /* invalid memory access */ flags = u64_stats_update_begin_irqsave(&stats->syncp) ---softirq end--- static_branch_dec(&cgroup_bpf_enabled_key[atype]) The reason is that fault injection caused update_effective_progs to fail and then changed the original prog into dummy_bpf_prog.prog in purge_effective_progs. Then a softirq came, and accessing the members of dummy_bpf_prog.prog in the softirq triggers invalid mem access. To fix it, skip updating stats when stats is NULL. Fixes: 492ecee892c2 ("bpf: enable program stats") Signed-off-by: Pu Lehui Link: https://lore.kernel.org/r/20251115102343.2200727-1-pulehui@huaweicloud.com Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 973233b82dc1..569de3b14279 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -712,11 +712,13 @@ static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog, ret = dfunc(ctx, prog->insnsi, prog->bpf_func); duration = sched_clock() - start; - stats = this_cpu_ptr(prog->stats); - flags = u64_stats_update_begin_irqsave(&stats->syncp); - u64_stats_inc(&stats->cnt); - u64_stats_add(&stats->nsecs, duration); - u64_stats_update_end_irqrestore(&stats->syncp, flags); + if (likely(prog->stats)) { + stats = this_cpu_ptr(prog->stats); + flags = u64_stats_update_begin_irqsave(&stats->syncp); + u64_stats_inc(&stats->cnt); + u64_stats_add(&stats->nsecs, duration); + u64_stats_update_end_irqrestore(&stats->syncp, flags); + } } else { ret = dfunc(ctx, prog->insnsi, prog->bpf_func); } -- cgit v1.2.3 From 0e854e55356908386605714e66f98c3985d9e266 Mon Sep 17 00:00:00 2001 From: Amery Hung Date: Fri, 14 Nov 2025 12:13:23 -0800 Subject: bpf: Always charge/uncharge memory when allocating/unlinking storage elements Since commit a96a44aba556 ("bpf: bpf_sk_storage: Fix invalid wait context lockdep report"), {charge,uncharge}_mem are always true when allocating a bpf_local_storage_elem or unlinking a bpf_local_storage_elem from local storage, so drop these arguments. No functional change. Signed-off-by: Amery Hung Reviewed-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20251114201329.3275875-2-ameryhung@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_local_storage.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 782f58feea35..3663eabcc3ff 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -184,7 +184,7 @@ void bpf_selem_link_map(struct bpf_local_storage_map *smap, struct bpf_local_storage_elem * bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value, - bool charge_mem, bool swap_uptrs, gfp_t gfp_flags); + bool swap_uptrs, gfp_t gfp_flags); void bpf_selem_free(struct bpf_local_storage_elem *selem, struct bpf_local_storage_map *smap, -- cgit v1.2.3 From e76a33e1c7186526c2c133af73ea70da9275e1ba Mon Sep 17 00:00:00 2001 From: Amery Hung Date: Fri, 14 Nov 2025 12:13:24 -0800 Subject: bpf: Remove smap argument from bpf_selem_free() Since selem already saves a pointer to smap, use it instead of an additional argument in bpf_selem_free(). This requires moving the SDATA(selem)->smap assignment from bpf_selem_link_map() to bpf_selem_alloc() since bpf_selem_free() may be called without the selem being linked to smap in bpf_local_storage_update(). Signed-off-by: Amery Hung Reviewed-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20251114201329.3275875-3-ameryhung@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_local_storage.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 3663eabcc3ff..4ab137e75f33 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -187,7 +187,6 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value, bool swap_uptrs, gfp_t gfp_flags); void bpf_selem_free(struct bpf_local_storage_elem *selem, - struct bpf_local_storage_map *smap, bool reuse_now); int -- cgit v1.2.3 From 39a460c4253e4a437b6b372f462c0c043026784d Mon Sep 17 00:00:00 2001 From: Amery Hung Date: Fri, 14 Nov 2025 12:13:25 -0800 Subject: bpf: Save memory alloction info in bpf_local_storage Save the memory allocation method used for bpf_local_storage in the struct explicitly so that we don't need to go through the hassle to find out the info. When a later patch replaces BPF memory allocator with kmalloc_noloc(), bpf_local_storage_free() will no longer need smap->storage_ma to return the memory and completely remove the dependency on smap in bpf_local_storage_free(). Signed-off-by: Amery Hung Reviewed-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20251114201329.3275875-4-ameryhung@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_local_storage.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 4ab137e75f33..7fef0cec8340 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -97,6 +97,7 @@ struct bpf_local_storage { */ struct rcu_head rcu; raw_spinlock_t lock; /* Protect adding/removing from the "list" */ + bool bpf_ma; }; /* U16_MAX is much more than enough for sk local storage -- cgit v1.2.3 From f484f4a3e058b5641670ebaeb301c06589848521 Mon Sep 17 00:00:00 2001 From: Amery Hung Date: Fri, 14 Nov 2025 12:13:26 -0800 Subject: bpf: Replace bpf memory allocator with kmalloc_nolock() in local storage Replace bpf memory allocator with kmalloc_nolock() to reduce memory wastage due to preallocation. In bpf_selem_free(), an selem now needs to wait for a RCU grace period before being freed when reuse_now == true. Therefore, rcu_barrier() should be always be called in bpf_local_storage_map_free(). In bpf_local_storage_free(), since smap->storage_ma is no longer needed to return the memory, the function is now independent from smap. Remove the outdated comment in bpf_local_storage_alloc(). We already free selem after an RCU grace period in bpf_local_storage_update() when bpf_local_storage_alloc() failed the cmpxchg since commit c0d63f309186 ("bpf: Add bpf_selem_free()"). Signed-off-by: Amery Hung Reviewed-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20251114201329.3275875-5-ameryhung@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_local_storage.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 7fef0cec8340..66432248cd81 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -53,9 +53,7 @@ struct bpf_local_storage_map { u32 bucket_log; u16 elem_size; u16 cache_idx; - struct bpf_mem_alloc selem_ma; - struct bpf_mem_alloc storage_ma; - bool bpf_ma; + bool use_kmalloc_nolock; }; struct bpf_local_storage_data { @@ -97,7 +95,7 @@ struct bpf_local_storage { */ struct rcu_head rcu; raw_spinlock_t lock; /* Protect adding/removing from the "list" */ - bool bpf_ma; + bool use_kmalloc_nolock; }; /* U16_MAX is much more than enough for sk local storage @@ -131,7 +129,7 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr); struct bpf_map * bpf_local_storage_map_alloc(union bpf_attr *attr, struct bpf_local_storage_cache *cache, - bool bpf_ma); + bool use_kmalloc_nolock); void __bpf_local_storage_insert_cache(struct bpf_local_storage *local_storage, struct bpf_local_storage_map *smap, -- cgit v1.2.3 From e0940c672ab4228caa33bcd7cc0ad8017482c2f1 Mon Sep 17 00:00:00 2001 From: Nirbhay Sharma Date: Fri, 21 Nov 2025 02:16:21 +0530 Subject: bpf: Document cfi_stubs and owner fields in struct bpf_struct_ops Add missing kernel-doc documentation for the cfi_stubs and owner fields in struct bpf_struct_ops to fix the following warnings: Warning: include/linux/bpf.h:1931 struct member 'cfi_stubs' not described in 'bpf_struct_ops' Warning: include/linux/bpf.h:1931 struct member 'owner' not described in 'bpf_struct_ops' The cfi_stubs field was added in commit 2cd3e3772e41 ("x86/cfi,bpf: Fix bpf_struct_ops CFI") to provide CFI stub functions for trampolines, and the owner field is used for module reference counting. Signed-off-by: Nirbhay Sharma Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20251120204620.59571-2-nirbhay.lkd@gmail.com --- include/linux/bpf.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 09d5dc541d1c..30fb40421405 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1922,12 +1922,14 @@ struct btf_member; * reason, if this callback is not defined, the check is skipped as * the struct_ops map will have final verification performed in * @reg. - * @type: BTF type. - * @value_type: Value type. + * @cfi_stubs: Pointer to a structure of stub functions for CFI. These stubs + * provide the correct Control Flow Integrity hashes for the + * trampolines generated by BPF struct_ops. + * @owner: The module that owns this struct_ops. Used for module reference + * counting to ensure the module providing the struct_ops cannot be + * unloaded while in use. * @name: The name of the struct bpf_struct_ops object. * @func_models: Func models - * @type_id: BTF type id. - * @value_id: BTF value id. */ struct bpf_struct_ops { const struct bpf_verifier_ops *verifier_ops; -- cgit v1.2.3 From e40f5a6bf88a781d5f81bc6b8aab9ac31d8c98dd Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Wed, 19 Nov 2025 17:03:54 +0100 Subject: bpf: correct stack liveness for tail calls This updates bpf_insn_successors() reflecting that control flow might jump over the instructions between tail call and function exit, verifier might assume that some writes to parent stack always happen, which is not the case. Signed-off-by: Eduard Zingerman Signed-off-by: Martin Teichmann Link: https://lore.kernel.org/r/20251119160355.1160932-4-martin.teichmann@xfel.eu Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 5441341f1ab9..8d0b60fa5f2b 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -527,7 +527,6 @@ struct bpf_insn_aux_data { struct { u32 map_index; /* index into used_maps[] */ u32 map_off; /* offset from value base address */ - struct bpf_iarray *jt; /* jump table for gotox instruction */ }; struct { enum bpf_reg_type reg_type; /* type of pseudo_btf_id */ @@ -550,6 +549,7 @@ struct bpf_insn_aux_data { /* remember the offset of node field within type to rewrite */ u64 insert_off; }; + struct bpf_iarray *jt; /* jump table for gotox or bpf_tailcall call instruction */ struct btf_struct_meta *kptr_struct_meta; u64 map_key_state; /* constant (32 bit) key tracking for maps */ int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ @@ -652,6 +652,7 @@ struct bpf_subprog_info { u32 start; /* insn idx of function entry point */ u32 linfo_idx; /* The idx to the main_prog->aux->linfo */ u32 postorder_start; /* The idx to the env->cfg.insn_postorder */ + u32 exit_idx; /* Index of one of the BPF_EXIT instructions in this subprogram */ u16 stack_depth; /* max. stack depth used by this function */ u16 stack_extra; /* offsets in range [stack_depth .. fastcall_stack_off) @@ -669,9 +670,9 @@ struct bpf_subprog_info { bool keep_fastcall_stack: 1; bool changes_pkt_data: 1; bool might_sleep: 1; + u8 arg_cnt:3; enum priv_stack_mode priv_stack_mode; - u8 arg_cnt; struct bpf_subprog_arg_info args[MAX_BPF_FUNC_REG_ARGS]; }; -- cgit v1.2.3 From 4167096cb964325ed88cd558f5b0c61fcaab44c1 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Mon, 17 Nov 2025 20:04:09 +0000 Subject: bpf: support nested rcu critical sections Currently, nested rcu critical sections are rejected by the verifier and rcu_lock state is managed by a boolean variable. Add support for nested rcu critical sections by make active_rcu_locks a counter similar to active_preempt_locks. bpf_rcu_read_lock() increments this counter and bpf_rcu_read_unlock() decrements it, MEM_RCU -> PTR_UNTRUSTED transition happens when active_rcu_locks drops to 0. Signed-off-by: Puranjay Mohan Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20251117200411.25563-2-puranjay@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 8d0b60fa5f2b..130bcbd66f60 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -416,7 +416,7 @@ struct bpf_verifier_state { u32 active_irq_id; u32 active_lock_id; void *active_lock_ptr; - bool active_rcu_lock; + u32 active_rcu_locks; bool speculative; bool in_sleepable; -- cgit v1.2.3 From 25e4e3565d45f567f78089f38822fa64abee5230 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Tue, 18 Nov 2025 20:36:29 +0800 Subject: ftrace: Introduce FTRACE_OPS_FL_JMP For now, the "nop" will be replaced with a "call" instruction when a function is hooked by the ftrace. However, sometimes the "call" can break the RSB and introduce extra overhead. Therefore, introduce the flag FTRACE_OPS_FL_JMP, which indicate that the ftrace_ops should be called with a "jmp" instead of "call". For now, it is only used by the direct call case. When a direct ftrace_ops is marked with FTRACE_OPS_FL_JMP, the last bit of the ops->direct_call will be set to 1. Therefore, we can tell if we should use "jmp" for the callback in ftrace_call_replace(). Signed-off-by: Menglong Dong Acked-by: Steven Rostedt (Google) Link: https://lore.kernel.org/r/20251118123639.688444-2-dongml2@chinatelecom.cn Signed-off-by: Alexei Starovoitov --- include/linux/ftrace.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 07f8c309e432..015dd1049bea 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -359,6 +359,7 @@ enum { FTRACE_OPS_FL_DIRECT = BIT(17), FTRACE_OPS_FL_SUBOP = BIT(18), FTRACE_OPS_FL_GRAPH = BIT(19), + FTRACE_OPS_FL_JMP = BIT(20), }; #ifndef CONFIG_DYNAMIC_FTRACE_WITH_ARGS @@ -577,6 +578,38 @@ static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, unsigned long addr) { } #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_JMP +static inline bool ftrace_is_jmp(unsigned long addr) +{ + return addr & 1; +} + +static inline unsigned long ftrace_jmp_set(unsigned long addr) +{ + return addr | 1UL; +} + +static inline unsigned long ftrace_jmp_get(unsigned long addr) +{ + return addr & ~1UL; +} +#else +static inline bool ftrace_is_jmp(unsigned long addr) +{ + return false; +} + +static inline unsigned long ftrace_jmp_set(unsigned long addr) +{ + return addr; +} + +static inline unsigned long ftrace_jmp_get(unsigned long addr) +{ + return addr; +} +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_JMP */ + #ifdef CONFIG_STACK_TRACER int stack_trace_sysctl(const struct ctl_table *table, int write, void *buffer, -- cgit v1.2.3 From 373f2f44c300815c5f170e89560ac361c0053dfe Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Tue, 18 Nov 2025 20:36:32 +0800 Subject: bpf,x86: adjust the "jmp" mode for bpf trampoline In the origin call case, if BPF_TRAMP_F_SKIP_FRAME is not set, it means that the trampoline is not called, but "jmp". Introduce the function bpf_trampoline_use_jmp() to check if the trampoline is in "jmp" mode. Do some adjustment on the "jmp" mode for the x86_64. The main adjustment that we make is for the stack parameter passing case, as the stack alignment logic changes in the "jmp" mode without the "rip". What's more, the location of the parameters on the stack also changes. Signed-off-by: Menglong Dong Link: https://lore.kernel.org/r/20251118123639.688444-5-dongml2@chinatelecom.cn Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 30fb40421405..2f79afe81482 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1264,6 +1264,18 @@ typedef void (*bpf_trampoline_exit_t)(struct bpf_prog *prog, u64 start, bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog); bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog); +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_JMP +static inline bool bpf_trampoline_use_jmp(u64 flags) +{ + return flags & BPF_TRAMP_F_CALL_ORIG && !(flags & BPF_TRAMP_F_SKIP_FRAME); +} +#else +static inline bool bpf_trampoline_use_jmp(u64 flags) +{ + return false; +} +#endif + struct bpf_ksym { unsigned long start; unsigned long end; -- cgit v1.2.3 From ae4a3160d19cd16b874737ebc1798c7bc2fe3c9e Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Tue, 18 Nov 2025 20:36:33 +0800 Subject: bpf: specify the old and new poke_type for bpf_arch_text_poke In the origin logic, the bpf_arch_text_poke() assume that the old and new instructions have the same opcode. However, they can have different opcode if we want to replace a "call" insn with a "jmp" insn. Therefore, add the new function parameter "old_t" along with the "new_t", which are used to indicate the old and new poke type. Meanwhile, adjust the implement of bpf_arch_text_poke() for all the archs. "BPF_MOD_NOP" is added to make the code more readable. In bpf_arch_text_poke(), we still check if the new and old address is NULL to determine if nop insn should be used, which I think is more safe. Signed-off-by: Menglong Dong Link: https://lore.kernel.org/r/20251118123639.688444-6-dongml2@chinatelecom.cn Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2f79afe81482..a9b788c7b4aa 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -3710,12 +3710,14 @@ static inline u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type, #endif /* CONFIG_INET */ enum bpf_text_poke_type { + BPF_MOD_NOP, BPF_MOD_CALL, BPF_MOD_JUMP, }; -int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, - void *addr1, void *addr2); +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t, + enum bpf_text_poke_type new_t, void *old_addr, + void *new_addr); void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke, struct bpf_prog *new, struct bpf_prog *old); -- cgit v1.2.3 From 8f6ddc0587606c4be7ffcbdb20a4a99647e0c362 Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Tue, 25 Nov 2025 22:58:50 +0800 Subject: bpf: Introduce internal bpf_map_check_op_flags helper function It is to unify map flags checking for lookup_elem, update_elem, lookup_batch and update_batch APIs. Acked-by: Andrii Nakryiko Signed-off-by: Leon Hwang Link: https://lore.kernel.org/r/20251125145857.98134-2-leon.hwang@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a9b788c7b4aa..6498be4c44f8 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -3829,4 +3829,15 @@ bpf_prog_update_insn_ptrs(struct bpf_prog *prog, u32 *offsets, void *image) } #endif +static inline int bpf_map_check_op_flags(struct bpf_map *map, u64 flags, u64 allowed_flags) +{ + if (flags & ~allowed_flags) + return -EINVAL; + + if ((flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK)) + return -EINVAL; + + return 0; +} + #endif /* _LINUX_BPF_H */ -- cgit v1.2.3