From 3ebc98c1ae7efda949a015990280a097f4a5453a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 2 Mar 2026 09:16:22 +0100 Subject: ftrace: Add missing ftrace_lock to update_ftrace_direct_add/del Ihor and Kumar reported splat from ftrace_get_addr_curr [1], which happened because of the missing ftrace_lock in update_ftrace_direct_add/del functions allowing concurrent access to ftrace internals. The ftrace_update_ops function must be guarded by ftrace_lock, adding that. Fixes: 05dc5e9c1fe1 ("ftrace: Add update_ftrace_direct_add function") Fixes: 8d2c1233f371 ("ftrace: Add update_ftrace_direct_del function") Reported-by: Ihor Solodrai Reported-by: Kumar Kartikeya Dwivedi Closes: https://lore.kernel.org/bpf/1b58ffb2-92ae-433a-ba46-95294d6edea2@linux.dev/ Tested-by: Ihor Solodrai Signed-off-by: Jiri Olsa Reviewed-by: Steven Rostedt (Google) Link: https://lore.kernel.org/r/20260302081622.165713-1-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/trace/ftrace.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 827fb9a0bf0d..8baf61c9be6d 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -6404,6 +6404,7 @@ int update_ftrace_direct_add(struct ftrace_ops *ops, struct ftrace_hash *hash) new_filter_hash = old_filter_hash; } } else { + guard(mutex)(&ftrace_lock); err = ftrace_update_ops(ops, new_filter_hash, EMPTY_HASH); /* * new_filter_hash is dup-ed, so we need to release it anyway, @@ -6530,6 +6531,7 @@ int update_ftrace_direct_del(struct ftrace_ops *ops, struct ftrace_hash *hash) ops->func_hash->filter_hash = NULL; } } else { + guard(mutex)(&ftrace_lock); err = ftrace_update_ops(ops, new_filter_hash, EMPTY_HASH); /* * new_filter_hash is dup-ed, so we need to release it anyway, -- cgit v1.2.3 From 56145d237385ca0e7ca9ff7b226aaf2eb8ef368b Mon Sep 17 00:00:00 2001 From: Lang Xu Date: Tue, 3 Mar 2026 17:52:17 +0800 Subject: bpf: Fix a UAF issue in bpf_trampoline_link_cgroup_shim The root cause of this bug is that when 'bpf_link_put' reduces the refcount of 'shim_link->link.link' to zero, the resource is considered released but may still be referenced via 'tr->progs_hlist' in 'cgroup_shim_find'. The actual cleanup of 'tr->progs_hlist' in 'bpf_shim_tramp_link_release' is deferred. During this window, another process can cause a use-after-free via 'bpf_trampoline_link_cgroup_shim'. Based on Martin KaFai Lau's suggestions, I have created a simple patch. To fix this: Add an atomic non-zero check in 'bpf_trampoline_link_cgroup_shim'. Only increment the refcount if it is not already zero. Testing: I verified the fix by adding a delay in 'bpf_shim_tramp_link_release' to make the bug easier to trigger: static void bpf_shim_tramp_link_release(struct bpf_link *link) { /* ... */ if (!shim_link->trampoline) return; + msleep(100); WARN_ON_ONCE(bpf_trampoline_unlink_prog(&shim_link->link, shim_link->trampoline, NULL)); bpf_trampoline_put(shim_link->trampoline); } Before the patch, running a PoC easily reproduced the crash(almost 100%) with a call trace similar to KaiyanM's report. After the patch, the bug no longer occurs even after millions of iterations. Fixes: 69fd337a975c ("bpf: per-cgroup lsm flavor") Reported-by: Kaiyan Mei Closes: https://lore.kernel.org/bpf/3c4ebb0b.46ff8.19abab8abe2.Coremail.kaiyanm@hust.edu.cn/ Signed-off-by: Lang Xu Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/279EEE1BA1DDB49D+20260303095217.34436-1-xulang@uniontech.com --- kernel/bpf/trampoline.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 84db9e658e52..f02254a21585 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -1002,10 +1002,8 @@ int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, mutex_lock(&tr->mutex); shim_link = cgroup_shim_find(tr, bpf_func); - if (shim_link) { + if (shim_link && !IS_ERR(bpf_link_inc_not_zero(&shim_link->link.link))) { /* Reusing existing shim attached by the other program. */ - bpf_link_inc(&shim_link->link.link); - mutex_unlock(&tr->mutex); bpf_trampoline_put(tr); /* bpf_trampoline_get above */ return 0; -- cgit v1.2.3 From fbc7aef517d8765e4c425d2792409bb9bf2e1f13 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Fri, 6 Mar 2026 16:54:24 -0800 Subject: bpf: Fix u32/s32 bounds when ranges cross min/max boundary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Same as in __reg64_deduce_bounds(), refine s32/u32 ranges in __reg32_deduce_bounds() in the following situations: - s32 range crosses U32_MAX/0 boundary, positive part of the s32 range overlaps with u32 range: 0 U32_MAX | [xxxxxxxxxxxxxx u32 range xxxxxxxxxxxxxx] | |----------------------------|----------------------------| |xxxxx s32 range xxxxxxxxx] [xxxxxxx| 0 S32_MAX S32_MIN -1 - s32 range crosses U32_MAX/0 boundary, negative part of the s32 range overlaps with u32 range: 0 U32_MAX | [xxxxxxxxxxxxxx u32 range xxxxxxxxxxxxxx] | |----------------------------|----------------------------| |xxxxxxxxx] [xxxxxxxxxxxx s32 range | 0 S32_MAX S32_MIN -1 - No refinement if ranges overlap in two intervals. This helps for e.g. consider the following program: call %[bpf_get_prandom_u32]; w0 &= 0xffffffff; if w0 < 0x3 goto 1f; // on fall-through u32 range [3..U32_MAX] if w0 s> 0x1 goto 1f; // on fall-through s32 range [S32_MIN..1] if w0 s< 0x0 goto 1f; // range can be narrowed to [S32_MIN..-1] r10 = 0; 1: ...; The reg_bounds.c selftest is updated to incorporate identical logic, refinement based on non-overflowing range halves: ((x ∩ [0, smax]) ∩ (y ∩ [0, smax])) ∪ ((x ∩ [smin,-1]) ∩ (y ∩ [smin,-1])) Reported-by: Andrea Righi Reported-by: Emil Tsalapatis Closes: https://lore.kernel.org/bpf/aakqucg4vcujVwif@gpd4/T/ Reviewed-by: Emil Tsalapatis Acked-by: Shung-Hsi Yu Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20260306-bpf-32-bit-range-overflow-v3-1-f7f67e060a6b@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'kernel') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 401d6c4960ec..f960b382fdb3 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2511,6 +2511,30 @@ static void __reg32_deduce_bounds(struct bpf_reg_state *reg) if ((u32)reg->s32_min_value <= (u32)reg->s32_max_value) { reg->u32_min_value = max_t(u32, reg->s32_min_value, reg->u32_min_value); reg->u32_max_value = min_t(u32, reg->s32_max_value, reg->u32_max_value); + } else { + if (reg->u32_max_value < (u32)reg->s32_min_value) { + /* See __reg64_deduce_bounds() for detailed explanation. + * Refine ranges in the following situation: + * + * 0 U32_MAX + * | [xxxxxxxxxxxxxx u32 range xxxxxxxxxxxxxx] | + * |----------------------------|----------------------------| + * |xxxxx s32 range xxxxxxxxx] [xxxxxxx| + * 0 S32_MAX S32_MIN -1 + */ + reg->s32_min_value = (s32)reg->u32_min_value; + reg->u32_max_value = min_t(u32, reg->u32_max_value, reg->s32_max_value); + } else if ((u32)reg->s32_max_value < reg->u32_min_value) { + /* + * 0 U32_MAX + * | [xxxxxxxxxxxxxx u32 range xxxxxxxxxxxxxx] | + * |----------------------------|----------------------------| + * |xxxxxxxxx] [xxxxxxxxxxxx s32 range | + * 0 S32_MAX S32_MIN -1 + */ + reg->s32_max_value = (s32)reg->u32_max_value; + reg->u32_min_value = max_t(u32, reg->u32_min_value, reg->s32_min_value); + } } } -- cgit v1.2.3 From 2658a1720a1944fbaeda937000ad2b3c3dfaf1bb Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Fri, 6 Mar 2026 16:02:47 -0800 Subject: bpf: collect only live registers in linked regs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix an inconsistency between func_states_equal() and collect_linked_regs(): - regsafe() uses check_ids() to verify that cached and current states have identical register id mapping. - func_states_equal() calls regsafe() only for registers computed as live by compute_live_registers(). - clean_live_states() is supposed to remove dead registers from cached states, but it can skip states belonging to an iterator-based loop. - collect_linked_regs() collects all registers sharing the same id, ignoring the marks computed by compute_live_registers(). Linked registers are stored in the state's jump history. - backtrack_insn() marks all linked registers for an instruction as precise whenever one of the linked registers is precise. The above might lead to a scenario: - There is an instruction I with register rY known to be dead at I. - Instruction I is reached via two paths: first A, then B. - On path A: - There is an id link between registers rX and rY. - Checkpoint C is created at I. - Linked register set {rX, rY} is saved to the jump history. - rX is marked as precise at I, causing both rX and rY to be marked precise at C. - On path B: - There is no id link between registers rX and rY, otherwise register states are sub-states of those in C. - Because rY is dead at I, check_ids() returns true. - Current state is considered equal to checkpoint C, propagate_precision() propagates spurious precision mark for register rY along the path B. - Depending on a program, this might hit verifier_bug() in the backtrack_insn(), e.g. if rY ∈ [r1..r5] and backtrack_insn() spots a function call. The reproducer program is in the next patch. This was hit by sched_ext scx_lavd scheduler code. Changes in tests: - verifier_scalar_ids.c selftests need modification to preserve some registers as live for __msg() checks. - exceptions_assert.c adjusted to match changes in the verifier log, R0 is dead after conditional instruction and thus does not get range. - precise.c adjusted to match changes in the verifier log, register r9 is dead after comparison and it's range is not important for test. Reported-by: Emil Tsalapatis Fixes: 0fb3cf6110a5 ("bpf: use register liveness information for func_states_equal") Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20260306-linked-regs-and-propagate-precision-v1-1-18e859be570d@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f960b382fdb3..836ceb128d19 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -17359,17 +17359,24 @@ static void __collect_linked_regs(struct linked_regs *reg_set, struct bpf_reg_st * in verifier state, save R in linked_regs if R->id == id. * If there are too many Rs sharing same id, reset id for leftover Rs. */ -static void collect_linked_regs(struct bpf_verifier_state *vstate, u32 id, +static void collect_linked_regs(struct bpf_verifier_env *env, + struct bpf_verifier_state *vstate, + u32 id, struct linked_regs *linked_regs) { + struct bpf_insn_aux_data *aux = env->insn_aux_data; struct bpf_func_state *func; struct bpf_reg_state *reg; + u16 live_regs; int i, j; id = id & ~BPF_ADD_CONST; for (i = vstate->curframe; i >= 0; i--) { + live_regs = aux[frame_insn_idx(vstate, i)].live_regs_before; func = vstate->frame[i]; for (j = 0; j < BPF_REG_FP; j++) { + if (!(live_regs & BIT(j))) + continue; reg = &func->regs[j]; __collect_linked_regs(linked_regs, reg, id, i, j, true); } @@ -17584,9 +17591,9 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, * if parent state is created. */ if (BPF_SRC(insn->code) == BPF_X && src_reg->type == SCALAR_VALUE && src_reg->id) - collect_linked_regs(this_branch, src_reg->id, &linked_regs); + collect_linked_regs(env, this_branch, src_reg->id, &linked_regs); if (dst_reg->type == SCALAR_VALUE && dst_reg->id) - collect_linked_regs(this_branch, dst_reg->id, &linked_regs); + collect_linked_regs(env, this_branch, dst_reg->id, &linked_regs); if (linked_regs.cnt > 1) { err = push_jmp_history(env, this_branch, 0, linked_regs_pack(&linked_regs)); if (err) -- cgit v1.2.3