From f5836749c9c04a10decd2742845ad4870965fdef Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 6 Jul 2020 16:01:25 -0700 Subject: bpf: Add BPF_CGROUP_INET_SOCK_RELEASE hook Sometimes it's handy to know when the socket gets freed. In particular, we'd like to try to use a smarter allocation of ports for bpf_bind and explore the possibility of limiting the number of SOCK_DGRAM sockets the process can have. Implement BPF_CGROUP_INET_SOCK_RELEASE hook that triggers on inet socket release. It triggers only for userspace sockets (not in-kernel ones) and therefore has the same semantics as the existing BPF_CGROUP_INET_SOCK_CREATE. Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200706230128.4073544-2-sdf@google.com --- kernel/bpf/syscall.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel') diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 8da159936bab..156f51ffada2 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1981,6 +1981,7 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type, case BPF_PROG_TYPE_CGROUP_SOCK: switch (expected_attach_type) { case BPF_CGROUP_INET_SOCK_CREATE: + case BPF_CGROUP_INET_SOCK_RELEASE: case BPF_CGROUP_INET4_POST_BIND: case BPF_CGROUP_INET6_POST_BIND: return 0; @@ -2779,6 +2780,7 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type) return BPF_PROG_TYPE_CGROUP_SKB; break; case BPF_CGROUP_INET_SOCK_CREATE: + case BPF_CGROUP_INET_SOCK_RELEASE: case BPF_CGROUP_INET4_POST_BIND: case BPF_CGROUP_INET6_POST_BIND: return BPF_PROG_TYPE_CGROUP_SOCK; @@ -2929,6 +2931,7 @@ static int bpf_prog_query(const union bpf_attr *attr, case BPF_CGROUP_INET_INGRESS: case BPF_CGROUP_INET_EGRESS: case BPF_CGROUP_INET_SOCK_CREATE: + case BPF_CGROUP_INET_SOCK_RELEASE: case BPF_CGROUP_INET4_BIND: case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET4_POST_BIND: -- cgit v1.2.3 From c9a0f3b85e09dd16665b639cb884490410619434 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 11 Jul 2020 23:53:24 +0200 Subject: bpf: Resolve BTF IDs in vmlinux image Using BTF_ID_LIST macro to define lists for several helpers using BTF arguments. And running resolve_btfids on vmlinux elf object during linking, so the .BTF_ids section gets the IDs resolved. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Tested-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200711215329.41165-5-jolsa@kernel.org --- kernel/bpf/stackmap.c | 5 ++++- kernel/trace/bpf_trace.c | 9 +++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index a6c361ed7937..48d8e739975f 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "percpu_freelist.h" #define STACK_CREATE_FLAG_MASK \ @@ -576,7 +577,9 @@ BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf, return __bpf_get_stack(regs, task, buf, size, flags); } -static int bpf_get_task_stack_btf_ids[5]; +BTF_ID_LIST(bpf_get_task_stack_btf_ids) +BTF_ID(struct, task_struct) + const struct bpf_func_proto bpf_get_task_stack_proto = { .func = bpf_get_task_stack, .gpl_only = false, diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index e0b7775039ab..e178e8e32b33 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -13,6 +13,7 @@ #include #include #include +#include #include @@ -710,7 +711,9 @@ out: return err; } -static int bpf_seq_printf_btf_ids[5]; +BTF_ID_LIST(bpf_seq_printf_btf_ids) +BTF_ID(struct, seq_file) + static const struct bpf_func_proto bpf_seq_printf_proto = { .func = bpf_seq_printf, .gpl_only = true, @@ -728,7 +731,9 @@ BPF_CALL_3(bpf_seq_write, struct seq_file *, m, const void *, data, u32, len) return seq_write(m, data, len) ? -EOVERFLOW : 0; } -static int bpf_seq_write_btf_ids[5]; +BTF_ID_LIST(bpf_seq_write_btf_ids) +BTF_ID(struct, seq_file) + static const struct bpf_func_proto bpf_seq_write_proto = { .func = bpf_seq_write, .gpl_only = true, -- cgit v1.2.3 From 138b9a0511c789f2451ff1d80e7fd3f9eef3a9e3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 11 Jul 2020 23:53:25 +0200 Subject: bpf: Remove btf_id helpers resolving Now when we moved the helpers btf_id arrays into .BTF_ids section, we can remove the code that resolve those IDs in runtime. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Tested-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200711215329.41165-6-jolsa@kernel.org --- kernel/bpf/btf.c | 89 ++++---------------------------------------------------- 1 file changed, 5 insertions(+), 84 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 4c3007f428b1..71140b73ae3c 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -4079,96 +4079,17 @@ error: return -EINVAL; } -static int __btf_resolve_helper_id(struct bpf_verifier_log *log, void *fn, - int arg) -{ - char fnname[KSYM_SYMBOL_LEN + 4] = "btf_"; - const struct btf_param *args; - const struct btf_type *t; - const char *tname, *sym; - u32 btf_id, i; - - if (IS_ERR(btf_vmlinux)) { - bpf_log(log, "btf_vmlinux is malformed\n"); - return -EINVAL; - } - - sym = kallsyms_lookup((long)fn, NULL, NULL, NULL, fnname + 4); - if (!sym) { - bpf_log(log, "kernel doesn't have kallsyms\n"); - return -EFAULT; - } - - for (i = 1; i <= btf_vmlinux->nr_types; i++) { - t = btf_type_by_id(btf_vmlinux, i); - if (BTF_INFO_KIND(t->info) != BTF_KIND_TYPEDEF) - continue; - tname = __btf_name_by_offset(btf_vmlinux, t->name_off); - if (!strcmp(tname, fnname)) - break; - } - if (i > btf_vmlinux->nr_types) { - bpf_log(log, "helper %s type is not found\n", fnname); - return -ENOENT; - } - - t = btf_type_by_id(btf_vmlinux, t->type); - if (!btf_type_is_ptr(t)) - return -EFAULT; - t = btf_type_by_id(btf_vmlinux, t->type); - if (!btf_type_is_func_proto(t)) - return -EFAULT; - - args = (const struct btf_param *)(t + 1); - if (arg >= btf_type_vlen(t)) { - bpf_log(log, "bpf helper %s doesn't have %d-th argument\n", - fnname, arg); - return -EINVAL; - } - - t = btf_type_by_id(btf_vmlinux, args[arg].type); - if (!btf_type_is_ptr(t) || !t->type) { - /* anything but the pointer to struct is a helper config bug */ - bpf_log(log, "ARG_PTR_TO_BTF is misconfigured\n"); - return -EFAULT; - } - btf_id = t->type; - t = btf_type_by_id(btf_vmlinux, t->type); - /* skip modifiers */ - while (btf_type_is_modifier(t)) { - btf_id = t->type; - t = btf_type_by_id(btf_vmlinux, t->type); - } - if (!btf_type_is_struct(t)) { - bpf_log(log, "ARG_PTR_TO_BTF is not a struct\n"); - return -EFAULT; - } - bpf_log(log, "helper %s arg%d has btf_id %d struct %s\n", fnname + 4, - arg, btf_id, __btf_name_by_offset(btf_vmlinux, t->name_off)); - return btf_id; -} - int btf_resolve_helper_id(struct bpf_verifier_log *log, const struct bpf_func_proto *fn, int arg) { - int *btf_id = &fn->btf_id[arg]; - int ret; + int id; if (fn->arg_type[arg] != ARG_PTR_TO_BTF_ID) return -EINVAL; - - ret = READ_ONCE(*btf_id); - if (ret) - return ret; - /* ok to race the search. The result is the same */ - ret = __btf_resolve_helper_id(log, fn->func, arg); - if (!ret) { - /* Function argument cannot be type 'void' */ - bpf_log(log, "BTF resolution bug\n"); - return -EFAULT; - } - WRITE_ONCE(*btf_id, ret); - return ret; + id = fn->btf_id[arg]; + if (!id || id > btf_vmlinux->nr_types) + return -EINVAL; + return id; } static int __get_type_size(struct btf *btf, u32 btf_id, -- cgit v1.2.3 From 49f4e6720748c778795df62d222d0200b61345be Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 11 Jul 2020 23:53:26 +0200 Subject: bpf: Use BTF_ID to resolve bpf_ctx_convert struct This way the ID is resolved during compile time, and we can remove the runtime name search. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Tested-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200711215329.41165-7-jolsa@kernel.org --- kernel/bpf/btf.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 71140b73ae3c..a710e3ee1f18 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -3621,12 +3622,15 @@ static int btf_translate_to_vmlinux(struct bpf_verifier_log *log, return kern_ctx_type->type; } +BTF_ID_LIST(bpf_ctx_convert_btf_id) +BTF_ID(struct, bpf_ctx_convert) + struct btf *btf_parse_vmlinux(void) { struct btf_verifier_env *env = NULL; struct bpf_verifier_log *log; struct btf *btf = NULL; - int err, btf_id; + int err; env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN); if (!env) @@ -3659,14 +3663,8 @@ struct btf *btf_parse_vmlinux(void) if (err) goto errout; - /* find struct bpf_ctx_convert for type checking later */ - btf_id = btf_find_by_name_kind(btf, "bpf_ctx_convert", BTF_KIND_STRUCT); - if (btf_id < 0) { - err = btf_id; - goto errout; - } /* btf_parse_vmlinux() runs under bpf_verifier_lock */ - bpf_ctx_convert.t = btf_type_by_id(btf, btf_id); + bpf_ctx_convert.t = btf_type_by_id(btf, bpf_ctx_convert_btf_id[0]); /* find bpf map structs for map_ptr access checking */ err = btf_vmlinux_map_ids_init(btf, log); -- cgit v1.2.3 From ac5a72ea5c8989871e61f6bb0852e0f91de51ebe Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Mon, 13 Jul 2020 12:52:33 +0100 Subject: bpf: Use dedicated bpf_trace_printk event instead of trace_printk() The bpf helper bpf_trace_printk() uses trace_printk() under the hood. This leads to an alarming warning message originating from trace buffer allocation which occurs the first time a program using bpf_trace_printk() is loaded. We can instead create a trace event for bpf_trace_printk() and enable it in-kernel when/if we encounter a program using the bpf_trace_printk() helper. With this approach, trace_printk() is not used directly and no warning message appears. This work was started by Steven (see Link) and finished by Alan; added Steven's Signed-off-by with his permission. Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Alan Maguire Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20200628194334.6238b933@oasis.local.home Link: https://lore.kernel.org/bpf/1594641154-18897-2-git-send-email-alan.maguire@oracle.com --- kernel/trace/Makefile | 2 ++ kernel/trace/bpf_trace.c | 42 +++++++++++++++++++++++++++++++++++++----- kernel/trace/bpf_trace.h | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+), 5 deletions(-) create mode 100644 kernel/trace/bpf_trace.h (limited to 'kernel') diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 6575bb0a0434..aeba5ee7325a 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -31,6 +31,8 @@ ifdef CONFIG_GCOV_PROFILE_FTRACE GCOV_PROFILE := y endif +CFLAGS_bpf_trace.o := -I$(src) + CFLAGS_trace_benchmark.o := -I$(src) CFLAGS_trace_events_filter.o := -I$(src) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index e178e8e32b33..3cc0dcb60ca2 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -20,6 +21,9 @@ #include "trace_probe.h" #include "trace.h" +#define CREATE_TRACE_POINTS +#include "bpf_trace.h" + #define bpf_event_rcu_dereference(p) \ rcu_dereference_protected(p, lockdep_is_held(&bpf_event_mutex)) @@ -375,6 +379,30 @@ static void bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype, } } +static DEFINE_RAW_SPINLOCK(trace_printk_lock); + +#define BPF_TRACE_PRINTK_SIZE 1024 + +static inline __printf(1, 0) int bpf_do_trace_printk(const char *fmt, ...) +{ + static char buf[BPF_TRACE_PRINTK_SIZE]; + unsigned long flags; + va_list ap; + int ret; + + raw_spin_lock_irqsave(&trace_printk_lock, flags); + va_start(ap, fmt); + ret = vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + /* vsnprintf() will not append null for zero-length strings */ + if (ret == 0) + buf[0] = '\0'; + trace_bpf_trace_printk(buf); + raw_spin_unlock_irqrestore(&trace_printk_lock, flags); + + return ret; +} + /* * Only limited trace_printk() conversion specifiers allowed: * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %pB %pks %pus %s @@ -484,8 +512,7 @@ fmt_next: */ #define __BPF_TP_EMIT() __BPF_ARG3_TP() #define __BPF_TP(...) \ - __trace_printk(0 /* Fake ip */, \ - fmt, ##__VA_ARGS__) + bpf_do_trace_printk(fmt, ##__VA_ARGS__) #define __BPF_ARG1_TP(...) \ ((mod[0] == 2 || (mod[0] == 1 && __BITS_PER_LONG == 64)) \ @@ -522,10 +549,15 @@ static const struct bpf_func_proto bpf_trace_printk_proto = { const struct bpf_func_proto *bpf_get_trace_printk_proto(void) { /* - * this program might be calling bpf_trace_printk, - * so allocate per-cpu printk buffers + * This program might be calling bpf_trace_printk, + * so enable the associated bpf_trace/bpf_trace_printk event. + * Repeat this each time as it is possible a user has + * disabled bpf_trace_printk events. By loading a program + * calling bpf_trace_printk() however the user has expressed + * the intent to see such events. */ - trace_printk_init_buffers(); + if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1)) + pr_warn_ratelimited("could not enable bpf_trace_printk events"); return &bpf_trace_printk_proto; } diff --git a/kernel/trace/bpf_trace.h b/kernel/trace/bpf_trace.h new file mode 100644 index 000000000000..9acbc11ac7bb --- /dev/null +++ b/kernel/trace/bpf_trace.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM bpf_trace + +#if !defined(_TRACE_BPF_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) + +#define _TRACE_BPF_TRACE_H + +#include + +TRACE_EVENT(bpf_trace_printk, + + TP_PROTO(const char *bpf_string), + + TP_ARGS(bpf_string), + + TP_STRUCT__entry( + __string(bpf_string, bpf_string) + ), + + TP_fast_assign( + __assign_str(bpf_string, bpf_string); + ), + + TP_printk("%s", __get_str(bpf_string)) +); + +#endif /* _TRACE_BPF_TRACE_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE bpf_trace + +#include -- cgit v1.2.3