From 23dc9867329c72b48e5039ac93fbf50d9099cdb3 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Thu, 5 Sep 2024 19:22:44 -0600 Subject: bpf, cpumap: Move xdp:xdp_cpumap_kthread tracepoint before rcv cpumap takes RX processing out of softirq and onto a separate kthread. Since the kthread needs to be scheduled in order to run (versus softirq which does not), we can theoretically experience extra latency if the system is under load and the scheduler is being unfair to us. Moving the tracepoint to before passing the skb list up the stack allows users to more accurately measure enqueue/dequeue latency introduced by cpumap via xdp:xdp_cpumap_enqueue and xdp:xdp_cpumap_kthread tracepoints. f9419f7bd7a5 ("bpf: cpumap add tracepoints") which added the tracepoints states that the intent behind them was for general observability and for a feedback loop to see if the queues are being overwhelmed. This change does not mess with either of those use cases but rather adds a third one. Signed-off-by: Daniel Xu Signed-off-by: Daniel Borkmann Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/bpf/47615d5b5e302e4bd30220473779e98b492d47cd.1725585718.git.dxu@dxuuu.xyz --- kernel/bpf/cpumap.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index fbdf5a1aabfe..a2f46785ac3b 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -354,12 +354,14 @@ static int cpu_map_kthread_run(void *data) list_add_tail(&skb->list, &list); } - netif_receive_skb_list(&list); - /* Feedback loop via tracepoint */ + /* Feedback loop via tracepoint. + * NB: keep before recv to allow measuring enqueue/dequeue latency. + */ trace_xdp_cpumap_kthread(rcpu->map_id, n, kmem_alloc_drops, sched, &stats); + netif_receive_skb_list(&list); local_bh_enable(); /* resched point, may call do_softirq() */ } __set_current_state(TASK_RUNNING); -- cgit v1.2.3 From 8aeaed21befc90f27f4fca6dd190850d97d2e9e3 Mon Sep 17 00:00:00 2001 From: Philo Lu Date: Wed, 11 Sep 2024 11:37:15 +0800 Subject: bpf: Support __nullable argument suffix for tp_btf Pointers passed to tp_btf were trusted to be valid, but some tracepoints do take NULL pointer as input, such as trace_tcp_send_reset(). Then the invalid memory access cannot be detected by verifier. This patch fix it by add a suffix "__nullable" to the unreliable argument. The suffix is shown in btf, and PTR_MAYBE_NULL will be added to nullable arguments. Then users must check the pointer before use it. A problem here is that we use "btf_trace_##call" to search func_proto. As it is a typedef, argument names as well as the suffix are not recorded. To solve this, I use bpf_raw_event_map to find "__bpf_trace##template" from "btf_trace_##call", and then we can see the suffix. Suggested-by: Alexei Starovoitov Signed-off-by: Philo Lu Link: https://lore.kernel.org/r/20240911033719.91468-2-lulie@linux.alibaba.com Signed-off-by: Martin KaFai Lau --- kernel/bpf/btf.c | 3 +++ kernel/bpf/verifier.c | 36 ++++++++++++++++++++++++++++++++---- 2 files changed, 35 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 520f49f422fe..7e03a98ccb7c 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -6523,6 +6523,9 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, if (prog_args_trusted(prog)) info->reg_type |= PTR_TRUSTED; + if (btf_param_match_suffix(btf, &args[arg], "__nullable")) + info->reg_type |= PTR_MAYBE_NULL; + if (tgt_prog) { enum bpf_prog_type tgt_type; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d8520095ca03..39d5710c68ad 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -28,6 +28,8 @@ #include #include #include +#include +#include #include "disasm.h" @@ -21154,11 +21156,13 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, { bool prog_extension = prog->type == BPF_PROG_TYPE_EXT; bool prog_tracing = prog->type == BPF_PROG_TYPE_TRACING; + char trace_symbol[KSYM_SYMBOL_LEN]; const char prefix[] = "btf_trace_"; + struct bpf_raw_event_map *btp; int ret = 0, subprog = -1, i; const struct btf_type *t; bool conservative = true; - const char *tname; + const char *tname, *fname; struct btf *btf; long addr = 0; struct module *mod = NULL; @@ -21289,10 +21293,34 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, return -EINVAL; } tname += sizeof(prefix) - 1; - t = btf_type_by_id(btf, t->type); - if (!btf_type_is_ptr(t)) - /* should never happen in valid vmlinux build */ + + /* The func_proto of "btf_trace_##tname" is generated from typedef without argument + * names. Thus using bpf_raw_event_map to get argument names. + */ + btp = bpf_get_raw_tracepoint(tname); + if (!btp) return -EINVAL; + fname = kallsyms_lookup((unsigned long)btp->bpf_func, NULL, NULL, NULL, + trace_symbol); + bpf_put_raw_tracepoint(btp); + + if (fname) + ret = btf_find_by_name_kind(btf, fname, BTF_KIND_FUNC); + + if (!fname || ret < 0) { + bpf_log(log, "Cannot find btf of tracepoint template, fall back to %s%s.\n", + prefix, tname); + t = btf_type_by_id(btf, t->type); + if (!btf_type_is_ptr(t)) + /* should never happen in valid vmlinux build */ + return -EINVAL; + } else { + t = btf_type_by_id(btf, ret); + if (!btf_type_is_func(t)) + /* should never happen in valid vmlinux build */ + return -EINVAL; + } + t = btf_type_by_id(btf, t->type); if (!btf_type_is_func_proto(t)) /* should never happen in valid vmlinux build */ -- cgit v1.2.3