From 23dc9867329c72b48e5039ac93fbf50d9099cdb3 Mon Sep 17 00:00:00 2001
From: Daniel Xu <dxu@dxuuu.xyz>
Date: Thu, 5 Sep 2024 19:22:44 -0600
Subject: bpf, cpumap: Move xdp:xdp_cpumap_kthread tracepoint before rcv

cpumap takes RX processing out of softirq and onto a separate kthread.
Since the kthread needs to be scheduled in order to run (versus softirq
which does not), we can theoretically experience extra latency if the
system is under load and the scheduler is being unfair to us.

Moving the tracepoint to before passing the skb list up the stack allows
users to more accurately measure enqueue/dequeue latency introduced by
cpumap via xdp:xdp_cpumap_enqueue and xdp:xdp_cpumap_kthread tracepoints.

f9419f7bd7a5 ("bpf: cpumap add tracepoints") which added the tracepoints
states that the intent behind them was for general observability and for
a feedback loop to see if the queues are being overwhelmed. This change
does not mess with either of those use cases but rather adds a third
one.

Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
Link: https://lore.kernel.org/bpf/47615d5b5e302e4bd30220473779e98b492d47cd.1725585718.git.dxu@dxuuu.xyz
---
 kernel/bpf/cpumap.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index fbdf5a1aabfe..a2f46785ac3b 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -354,12 +354,14 @@ static int cpu_map_kthread_run(void *data)
 
 			list_add_tail(&skb->list, &list);
 		}
-		netif_receive_skb_list(&list);
 
-		/* Feedback loop via tracepoint */
+		/* Feedback loop via tracepoint.
+		 * NB: keep before recv to allow measuring enqueue/dequeue latency.
+		 */
 		trace_xdp_cpumap_kthread(rcpu->map_id, n, kmem_alloc_drops,
 					 sched, &stats);
 
+		netif_receive_skb_list(&list);
 		local_bh_enable(); /* resched point, may call do_softirq() */
 	}
 	__set_current_state(TASK_RUNNING);
-- 
cgit v1.2.3


From 8aeaed21befc90f27f4fca6dd190850d97d2e9e3 Mon Sep 17 00:00:00 2001
From: Philo Lu <lulie@linux.alibaba.com>
Date: Wed, 11 Sep 2024 11:37:15 +0800
Subject: bpf: Support __nullable argument suffix for tp_btf

Pointers passed to tp_btf were trusted to be valid, but some tracepoints
do take NULL pointer as input, such as trace_tcp_send_reset(). Then the
invalid memory access cannot be detected by verifier.

This patch fix it by add a suffix "__nullable" to the unreliable
argument. The suffix is shown in btf, and PTR_MAYBE_NULL will be added
to nullable arguments. Then users must check the pointer before use it.

A problem here is that we use "btf_trace_##call" to search func_proto.
As it is a typedef, argument names as well as the suffix are not
recorded. To solve this, I use bpf_raw_event_map to find
"__bpf_trace##template" from "btf_trace_##call", and then we can see the
suffix.

Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Philo Lu <lulie@linux.alibaba.com>
Link: https://lore.kernel.org/r/20240911033719.91468-2-lulie@linux.alibaba.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 kernel/bpf/btf.c      |  3 +++
 kernel/bpf/verifier.c | 36 ++++++++++++++++++++++++++++++++----
 2 files changed, 35 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 520f49f422fe..7e03a98ccb7c 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -6523,6 +6523,9 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 	if (prog_args_trusted(prog))
 		info->reg_type |= PTR_TRUSTED;
 
+	if (btf_param_match_suffix(btf, &args[arg], "__nullable"))
+		info->reg_type |= PTR_MAYBE_NULL;
+
 	if (tgt_prog) {
 		enum bpf_prog_type tgt_type;
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index d8520095ca03..39d5710c68ad 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -28,6 +28,8 @@
 #include <linux/cpumask.h>
 #include <linux/bpf_mem_alloc.h>
 #include <net/xdp.h>
+#include <linux/trace_events.h>
+#include <linux/kallsyms.h>
 
 #include "disasm.h"
 
@@ -21154,11 +21156,13 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
 {
 	bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
 	bool prog_tracing = prog->type == BPF_PROG_TYPE_TRACING;
+	char trace_symbol[KSYM_SYMBOL_LEN];
 	const char prefix[] = "btf_trace_";
+	struct bpf_raw_event_map *btp;
 	int ret = 0, subprog = -1, i;
 	const struct btf_type *t;
 	bool conservative = true;
-	const char *tname;
+	const char *tname, *fname;
 	struct btf *btf;
 	long addr = 0;
 	struct module *mod = NULL;
@@ -21289,10 +21293,34 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
 			return -EINVAL;
 		}
 		tname += sizeof(prefix) - 1;
-		t = btf_type_by_id(btf, t->type);
-		if (!btf_type_is_ptr(t))
-			/* should never happen in valid vmlinux build */
+
+		/* The func_proto of "btf_trace_##tname" is generated from typedef without argument
+		 * names. Thus using bpf_raw_event_map to get argument names.
+		 */
+		btp = bpf_get_raw_tracepoint(tname);
+		if (!btp)
 			return -EINVAL;
+		fname = kallsyms_lookup((unsigned long)btp->bpf_func, NULL, NULL, NULL,
+					trace_symbol);
+		bpf_put_raw_tracepoint(btp);
+
+		if (fname)
+			ret = btf_find_by_name_kind(btf, fname, BTF_KIND_FUNC);
+
+		if (!fname || ret < 0) {
+			bpf_log(log, "Cannot find btf of tracepoint template, fall back to %s%s.\n",
+				prefix, tname);
+			t = btf_type_by_id(btf, t->type);
+			if (!btf_type_is_ptr(t))
+				/* should never happen in valid vmlinux build */
+				return -EINVAL;
+		} else {
+			t = btf_type_by_id(btf, ret);
+			if (!btf_type_is_func(t))
+				/* should never happen in valid vmlinux build */
+				return -EINVAL;
+		}
+
 		t = btf_type_by_id(btf, t->type);
 		if (!btf_type_is_func_proto(t))
 			/* should never happen in valid vmlinux build */
-- 
cgit v1.2.3