From 8bc11700e0d23d4fdb7d8d5a73b2e95de427cabc Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 26 Jan 2026 22:18:32 +0100 Subject: x86/fgraph: Fix return_to_handler regs.rsp value The previous change (Fixes commit) messed up the rsp register value, which is wrong because it's already adjusted with FRAME_SIZE, we need the original rsp value. This change does not affect fprobe current kernel unwind, the !perf_hw_regs path perf_callchain_kernel: if (perf_hw_regs(regs)) { if (perf_callchain_store(entry, regs->ip)) return; unwind_start(&state, current, regs, NULL); } else { unwind_start(&state, current, NULL, (void *)regs->sp); } which uses pt_regs.sp as first_frame boundary (FRAME_SIZE shift makes no difference, unwind stil stops at the right frame). This change fixes the other path when we want to unwind directly from pt_regs sp/fp/ip state, which is coming in following change. Fixes: 20a0bc10272f ("x86/fgraph,bpf: Fix stack ORC unwind from kprobe_multi return probe") Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Reviewed-by: Steven Rostedt (Google) Link: https://lore.kernel.org/bpf/20260126211837.472802-2-jolsa@kernel.org --- arch/x86/kernel/ftrace_64.S | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index a132608265f6..62c1c93aa1c6 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -364,6 +364,9 @@ SYM_CODE_START(return_to_handler) UNWIND_HINT_UNDEFINED ANNOTATE_NOENDBR + /* Store original rsp for pt_regs.sp value. */ + movq %rsp, %rdi + /* Restore return_to_handler value that got eaten by previous ret instruction. */ subq $8, %rsp UNWIND_HINT_FUNC @@ -374,7 +377,7 @@ SYM_CODE_START(return_to_handler) movq %rax, RAX(%rsp) movq %rdx, RDX(%rsp) movq %rbp, RBP(%rsp) - movq %rsp, RSP(%rsp) + movq %rdi, RSP(%rsp) movq %rsp, %rdi call ftrace_return_to_handler -- cgit v1.2.3 From aea251799998aa1b78eacdfb308f18ea114ea5b3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 26 Jan 2026 22:18:33 +0100 Subject: x86/fgraph,bpf: Switch kprobe_multi program stack unwind to hw_regs path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mahe reported missing function from stack trace on top of kprobe multi program. The missing function is the very first one in the stacktrace, the one that the bpf program is attached to. # bpftrace -e 'kprobe:__x64_sys_newuname* { print(kstack)}' Attaching 1 probe... do_syscall_64+134 entry_SYSCALL_64_after_hwframe+118 ('*' is used for kprobe_multi attachment) The reason is that the previous change (the Fixes commit) fixed stack unwind for tracepoint, but removed attached function address from the stack trace on top of kprobe multi programs, which I also overlooked in the related test (check following patch). The tracepoint and kprobe_multi have different stack setup, but use same unwind path. I think it's better to keep the previous change, which fixed tracepoint unwind and instead change the kprobe multi unwind as explained below. The bpf program stack unwind calls perf_callchain_kernel for kernel portion and it follows two unwind paths based on X86_EFLAGS_FIXED bit in pt_regs.flags. When the bit set we unwind from stack represented by pt_regs argument, otherwise we unwind currently executed stack up to 'first_frame' boundary. The 'first_frame' value is taken from regs.rsp value, but ftrace_caller and ftrace_regs_caller (ftrace trampoline) functions set the regs.rsp to the previous stack frame, so we skip the attached function entry. If we switch kprobe_multi unwind to use the X86_EFLAGS_FIXED bit, we set the start of the unwind to the attached function address. As another benefit we also cut extra unwind cycles needed to reach the 'first_frame' boundary. The speedup can be measured with trigger bench for kprobe_multi program and stacktrace support. - trigger bench with stacktrace on current code: kprobe-multi : 0.810 ± 0.001M/s kretprobe-multi: 0.808 ± 0.001M/s - and with the fix: kprobe-multi : 1.264 ± 0.001M/s kretprobe-multi: 1.401 ± 0.002M/s With the fix, the entry probe stacktrace: # bpftrace -e 'kprobe:__x64_sys_newuname* { print(kstack)}' Attaching 1 probe... __x64_sys_newuname+9 do_syscall_64+134 entry_SYSCALL_64_after_hwframe+118 The return probe skips the attached function, because it's no longer on the stack at the point of the unwind and this way is the same how standard kretprobe works. # bpftrace -e 'kretprobe:__x64_sys_newuname* { print(kstack)}' Attaching 1 probe... do_syscall_64+134 entry_SYSCALL_64_after_hwframe+118 Fixes: 6d08340d1e35 ("Revert "perf/x86: Always store regs->ip in perf_callchain_kernel()"") Reported-by: Mahe Tardy Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Acked-by: Steven Rostedt (Google) Link: https://lore.kernel.org/bpf/20260126211837.472802-3-jolsa@kernel.org --- arch/x86/include/asm/ftrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index b08c95872eed..c56e1e63b893 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -57,7 +57,7 @@ arch_ftrace_get_regs(struct ftrace_regs *fregs) } #define arch_ftrace_partial_regs(regs) do { \ - regs->flags &= ~X86_EFLAGS_FIXED; \ + regs->flags |= X86_EFLAGS_FIXED; \ regs->cs = __KERNEL_CS; \ } while (0) -- cgit v1.2.3 From 0207f94971e72a13380e28022c86da147e8e090f Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 26 Jan 2026 22:18:34 +0100 Subject: selftests/bpf: Fix kprobe multi stacktrace_ips test We now include the attached function in the stack trace, fixing the test accordingly. Fixes: c9e208fa93cd ("selftests/bpf: Add stacktrace ips test for kprobe_multi/kretprobe_multi") Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20260126211837.472802-4-jolsa@kernel.org --- .../testing/selftests/bpf/prog_tests/stacktrace_ips.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c index c9efdd2a5b18..c93718dafd9b 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c @@ -74,11 +74,20 @@ static void test_stacktrace_ips_kprobe_multi(bool retprobe) load_kallsyms(); - check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 4, - ksym_get_addr("bpf_testmod_stacktrace_test_3"), - ksym_get_addr("bpf_testmod_stacktrace_test_2"), - ksym_get_addr("bpf_testmod_stacktrace_test_1"), - ksym_get_addr("bpf_testmod_test_read")); + if (retprobe) { + check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 4, + ksym_get_addr("bpf_testmod_stacktrace_test_3"), + ksym_get_addr("bpf_testmod_stacktrace_test_2"), + ksym_get_addr("bpf_testmod_stacktrace_test_1"), + ksym_get_addr("bpf_testmod_test_read")); + } else { + check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 5, + ksym_get_addr("bpf_testmod_stacktrace_test"), + ksym_get_addr("bpf_testmod_stacktrace_test_3"), + ksym_get_addr("bpf_testmod_stacktrace_test_2"), + ksym_get_addr("bpf_testmod_stacktrace_test_1"), + ksym_get_addr("bpf_testmod_test_read")); + } cleanup: stacktrace_ips__destroy(skel); -- cgit v1.2.3 From 7373f97e868ad01fc73de8e7b71834eeba25d4f1 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 26 Jan 2026 22:18:35 +0100 Subject: selftests/bpf: Add stacktrace ips test for kprobe/kretprobe Adding test that attaches kprobe/kretprobe and verifies the ORC stacktrace matches expected functions. The test is only for ORC unwinder to keep it simple. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20260126211837.472802-5-jolsa@kernel.org --- .../selftests/bpf/prog_tests/stacktrace_ips.c | 50 ++++++++++++++++++++++ tools/testing/selftests/bpf/progs/stacktrace_ips.c | 7 +++ 2 files changed, 57 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c index c93718dafd9b..852830536109 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c @@ -137,6 +137,52 @@ cleanup: stacktrace_ips__destroy(skel); } +static void test_stacktrace_ips_kprobe(bool retprobe) +{ + LIBBPF_OPTS(bpf_kprobe_opts, opts, + .retprobe = retprobe + ); + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct stacktrace_ips *skel; + + skel = stacktrace_ips__open_and_load(); + if (!ASSERT_OK_PTR(skel, "stacktrace_ips__open_and_load")) + return; + + if (!skel->kconfig->CONFIG_UNWINDER_ORC) { + test__skip(); + goto cleanup; + } + + skel->links.kprobe_test = bpf_program__attach_kprobe_opts( + skel->progs.kprobe_test, + "bpf_testmod_stacktrace_test", &opts); + if (!ASSERT_OK_PTR(skel->links.kprobe_test, "bpf_program__attach_kprobe_opts")) + goto cleanup; + + trigger_module_test_read(1); + + load_kallsyms(); + + if (retprobe) { + check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 4, + ksym_get_addr("bpf_testmod_stacktrace_test_3"), + ksym_get_addr("bpf_testmod_stacktrace_test_2"), + ksym_get_addr("bpf_testmod_stacktrace_test_1"), + ksym_get_addr("bpf_testmod_test_read")); + } else { + check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 5, + ksym_get_addr("bpf_testmod_stacktrace_test"), + ksym_get_addr("bpf_testmod_stacktrace_test_3"), + ksym_get_addr("bpf_testmod_stacktrace_test_2"), + ksym_get_addr("bpf_testmod_stacktrace_test_1"), + ksym_get_addr("bpf_testmod_test_read")); + } + +cleanup: + stacktrace_ips__destroy(skel); +} + static void __test_stacktrace_ips(void) { if (test__start_subtest("kprobe_multi")) @@ -145,6 +191,10 @@ static void __test_stacktrace_ips(void) test_stacktrace_ips_kprobe_multi(true); if (test__start_subtest("raw_tp")) test_stacktrace_ips_raw_tp(); + if (test__start_subtest("kprobe")) + test_stacktrace_ips_kprobe(false); + if (test__start_subtest("kretprobe")) + test_stacktrace_ips_kprobe(true); } #else static void __test_stacktrace_ips(void) diff --git a/tools/testing/selftests/bpf/progs/stacktrace_ips.c b/tools/testing/selftests/bpf/progs/stacktrace_ips.c index a96c8150d7f5..cae077a4061b 100644 --- a/tools/testing/selftests/bpf/progs/stacktrace_ips.c +++ b/tools/testing/selftests/bpf/progs/stacktrace_ips.c @@ -31,6 +31,13 @@ int unused(void) __u32 stack_key; +SEC("kprobe") +int kprobe_test(struct pt_regs *ctx) +{ + stack_key = bpf_get_stackid(ctx, &stackmap, 0); + return 0; +} + SEC("kprobe.multi") int kprobe_multi_test(struct pt_regs *ctx) { -- cgit v1.2.3 From e5d532be4a3b0d1f0a9210c0da2c04a6b4605904 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 26 Jan 2026 22:18:36 +0100 Subject: selftests/bpf: Add stacktrace ips test for fentry/fexit Adding test that attaches fentry/fexitand verifies the ORC stacktrace matches expected functions. The test is only for ORC unwinder to keep it simple. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20260126211837.472802-6-jolsa@kernel.org --- .../selftests/bpf/prog_tests/stacktrace_ips.c | 51 ++++++++++++++++++++++ tools/testing/selftests/bpf/progs/stacktrace_ips.c | 20 +++++++++ 2 files changed, 71 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c index 852830536109..da42b00e3d1f 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c @@ -183,6 +183,53 @@ cleanup: stacktrace_ips__destroy(skel); } +static void test_stacktrace_ips_trampoline(bool retprobe) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct stacktrace_ips *skel; + + skel = stacktrace_ips__open_and_load(); + if (!ASSERT_OK_PTR(skel, "stacktrace_ips__open_and_load")) + return; + + if (!skel->kconfig->CONFIG_UNWINDER_ORC) { + test__skip(); + goto cleanup; + } + + if (retprobe) { + skel->links.fexit_test = bpf_program__attach_trace(skel->progs.fexit_test); + if (!ASSERT_OK_PTR(skel->links.fexit_test, "bpf_program__attach_trace")) + goto cleanup; + } else { + skel->links.fentry_test = bpf_program__attach_trace(skel->progs.fentry_test); + if (!ASSERT_OK_PTR(skel->links.fentry_test, "bpf_program__attach_trace")) + goto cleanup; + } + + trigger_module_test_read(1); + + load_kallsyms(); + + if (retprobe) { + check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 4, + ksym_get_addr("bpf_testmod_stacktrace_test_3"), + ksym_get_addr("bpf_testmod_stacktrace_test_2"), + ksym_get_addr("bpf_testmod_stacktrace_test_1"), + ksym_get_addr("bpf_testmod_test_read")); + } else { + check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 5, + ksym_get_addr("bpf_testmod_stacktrace_test"), + ksym_get_addr("bpf_testmod_stacktrace_test_3"), + ksym_get_addr("bpf_testmod_stacktrace_test_2"), + ksym_get_addr("bpf_testmod_stacktrace_test_1"), + ksym_get_addr("bpf_testmod_test_read")); + } + +cleanup: + stacktrace_ips__destroy(skel); +} + static void __test_stacktrace_ips(void) { if (test__start_subtest("kprobe_multi")) @@ -195,6 +242,10 @@ static void __test_stacktrace_ips(void) test_stacktrace_ips_kprobe(false); if (test__start_subtest("kretprobe")) test_stacktrace_ips_kprobe(true); + if (test__start_subtest("fentry")) + test_stacktrace_ips_trampoline(false); + if (test__start_subtest("fexit")) + test_stacktrace_ips_trampoline(true); } #else static void __test_stacktrace_ips(void) diff --git a/tools/testing/selftests/bpf/progs/stacktrace_ips.c b/tools/testing/selftests/bpf/progs/stacktrace_ips.c index cae077a4061b..6830f2978613 100644 --- a/tools/testing/selftests/bpf/progs/stacktrace_ips.c +++ b/tools/testing/selftests/bpf/progs/stacktrace_ips.c @@ -53,4 +53,24 @@ int rawtp_test(void *ctx) return 0; } +SEC("fentry/bpf_testmod_stacktrace_test") +int fentry_test(struct pt_regs *ctx) +{ + /* + * Skip 2 bpf_program/trampoline stack entries: + * - bpf_prog_bd1f7a949f55fb03_fentry_test + * - bpf_trampoline_182536277701 + */ + stack_key = bpf_get_stackid(ctx, &stackmap, 2); + return 0; +} + +SEC("fexit/bpf_testmod_stacktrace_test") +int fexit_test(struct pt_regs *ctx) +{ + /* Skip 2 bpf_program/trampoline stack entries, check fentry_test. */ + stack_key = bpf_get_stackid(ctx, &stackmap, 2); + return 0; +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 4173b494d93a8057d3ed23e65853cd76b647f870 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 26 Jan 2026 22:18:37 +0100 Subject: selftests/bpf: Allow to benchmark trigger with stacktrace Adding support to call bpf_get_stackid helper from trigger programs, so far added for kprobe multi. Adding the --stacktrace/-g option to enable it. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20260126211837.472802-7-jolsa@kernel.org --- tools/testing/selftests/bpf/bench.c | 4 ++ tools/testing/selftests/bpf/bench.h | 1 + tools/testing/selftests/bpf/benchs/bench_trigger.c | 1 + tools/testing/selftests/bpf/progs/trigger_bench.c | 46 +++++++++++++++++----- 4 files changed, 43 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index bd29bb2e6cb5..8368bd3a0665 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -265,6 +265,7 @@ static const struct argp_option opts[] = { { "verbose", 'v', NULL, 0, "Verbose debug output"}, { "affinity", 'a', NULL, 0, "Set consumer/producer thread affinity"}, { "quiet", 'q', NULL, 0, "Be more quiet"}, + { "stacktrace", 's', NULL, 0, "Get stack trace"}, { "prod-affinity", ARG_PROD_AFFINITY_SET, "CPUSET", 0, "Set of CPUs for producer threads; implies --affinity"}, { "cons-affinity", ARG_CONS_AFFINITY_SET, "CPUSET", 0, @@ -350,6 +351,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) case 'q': env.quiet = true; break; + case 's': + env.stacktrace = true; + break; case ARG_PROD_AFFINITY_SET: env.affinity = true; if (parse_num_list(arg, &env.prod_cpus.cpus, diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h index bea323820ffb..7cf21936e7ed 100644 --- a/tools/testing/selftests/bpf/bench.h +++ b/tools/testing/selftests/bpf/bench.h @@ -26,6 +26,7 @@ struct env { bool list; bool affinity; bool quiet; + bool stacktrace; int consumer_cnt; int producer_cnt; int nr_cpus; diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c index 34018fc3927f..aeec9edd3851 100644 --- a/tools/testing/selftests/bpf/benchs/bench_trigger.c +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c @@ -146,6 +146,7 @@ static void setup_ctx(void) bpf_program__set_autoload(ctx.skel->progs.trigger_driver, true); ctx.skel->rodata->batch_iters = args.batch_iters; + ctx.skel->rodata->stacktrace = env.stacktrace; } static void load_ctx(void) diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c index 2898b3749d07..4ea0422d1042 100644 --- a/tools/testing/selftests/bpf/progs/trigger_bench.c +++ b/tools/testing/selftests/bpf/progs/trigger_bench.c @@ -25,6 +25,34 @@ static __always_inline void inc_counter(void) __sync_add_and_fetch(&hits[cpu & CPU_MASK].value, 1); } +volatile const int stacktrace; + +typedef __u64 stack_trace_t[128]; + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, stack_trace_t); +} stack_heap SEC(".maps"); + +static __always_inline void do_stacktrace(void *ctx) +{ + if (!stacktrace) + return; + + __u64 *ptr = bpf_map_lookup_elem(&stack_heap, &(__u32){0}); + + if (ptr) + bpf_get_stack(ctx, ptr, sizeof(stack_trace_t), 0); +} + +static __always_inline void handle(void *ctx) +{ + inc_counter(); + do_stacktrace(ctx); +} + SEC("?uprobe") int bench_trigger_uprobe(void *ctx) { @@ -81,21 +109,21 @@ int trigger_driver_kfunc(void *ctx) SEC("?kprobe/bpf_get_numa_node_id") int bench_trigger_kprobe(void *ctx) { - inc_counter(); + handle(ctx); return 0; } SEC("?kretprobe/bpf_get_numa_node_id") int bench_trigger_kretprobe(void *ctx) { - inc_counter(); + handle(ctx); return 0; } SEC("?kprobe.multi/bpf_get_numa_node_id") int bench_trigger_kprobe_multi(void *ctx) { - inc_counter(); + handle(ctx); return 0; } @@ -108,7 +136,7 @@ int bench_kprobe_multi_empty(void *ctx) SEC("?kretprobe.multi/bpf_get_numa_node_id") int bench_trigger_kretprobe_multi(void *ctx) { - inc_counter(); + handle(ctx); return 0; } @@ -121,34 +149,34 @@ int bench_kretprobe_multi_empty(void *ctx) SEC("?fentry/bpf_get_numa_node_id") int bench_trigger_fentry(void *ctx) { - inc_counter(); + handle(ctx); return 0; } SEC("?fexit/bpf_get_numa_node_id") int bench_trigger_fexit(void *ctx) { - inc_counter(); + handle(ctx); return 0; } SEC("?fmod_ret/bpf_modify_return_test_tp") int bench_trigger_fmodret(void *ctx) { - inc_counter(); + handle(ctx); return -22; } SEC("?tp/bpf_test_run/bpf_trigger_tp") int bench_trigger_tp(void *ctx) { - inc_counter(); + handle(ctx); return 0; } SEC("?raw_tp/bpf_trigger_tp") int bench_trigger_rawtp(void *ctx) { - inc_counter(); + handle(ctx); return 0; } -- cgit v1.2.3