From 53c8036cb715f3577a7fe1db6e6ad06e8697b36f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 25 May 2018 23:33:19 +0200 Subject: bpf: btf: avoid -Wreturn-type warning gcc warns about a noreturn function possibly returning in some configurations: kernel/bpf/btf.c: In function 'env_type_is_resolve_sink': kernel/bpf/btf.c:729:1: error: control reaches end of non-void function [-Werror=return-type] Using BUG() instead of BUG_ON() avoids that warning and otherwise does the exact same thing. Fixes: eb3f595dab40 ("bpf: btf: Validate type reference") Signed-off-by: Arnd Bergmann Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- kernel/bpf/btf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 7e90fd13b5b5..3d20aa1f4b54 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -749,7 +749,7 @@ static bool env_type_is_resolve_sink(const struct btf_verifier_env *env, !btf_type_is_array(next_type) && !btf_type_is_struct(next_type); default: - BUG_ON(1); + BUG(); } } -- cgit v1.2.3 From dc3b8ae9d271897e09b27fa4e4e0000de98590d1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 25 May 2018 23:33:20 +0200 Subject: bpf: avoid -Wmaybe-uninitialized warning The stack_map_get_build_id_offset() function is too long for gcc to track whether 'work' may or may not be initialized at the end of it, leading to a false-positive warning: kernel/bpf/stackmap.c: In function 'stack_map_get_build_id_offset': kernel/bpf/stackmap.c:334:13: error: 'work' may be used uninitialized in this function [-Werror=maybe-uninitialized] This removes the 'in_nmi_ctx' flag and uses the state of that variable itself to see if it got initialized. Fixes: bae77c5eb5b2 ("bpf: enable stackmap with build_id in nmi context") Signed-off-by: Arnd Bergmann Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- kernel/bpf/stackmap.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index b59ace0f0f09..b675a3f3d141 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -285,11 +285,10 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, { int i; struct vm_area_struct *vma; - bool in_nmi_ctx = in_nmi(); bool irq_work_busy = false; - struct stack_map_irq_work *work; + struct stack_map_irq_work *work = NULL; - if (in_nmi_ctx) { + if (in_nmi()) { work = this_cpu_ptr(&up_read_work); if (work->irq_work.flags & IRQ_WORK_BUSY) /* cannot queue more up_read, fallback */ @@ -328,7 +327,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, id_offs[i].status = BPF_STACK_BUILD_ID_VALID; } - if (!in_nmi_ctx) { + if (!work) { up_read(¤t->mm->mmap_sem); } else { work->sem = ¤t->mm->mmap_sem; -- cgit v1.2.3 From 1cedee13d25ab118d325f95588c1a084e9317229 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Fri, 25 May 2018 08:55:23 -0700 Subject: bpf: Hooks for sys_sendmsg In addition to already existing BPF hooks for sys_bind and sys_connect, the patch provides new hooks for sys_sendmsg. It leverages existing BPF program type `BPF_PROG_TYPE_CGROUP_SOCK_ADDR` that provides access to socket itlself (properties like family, type, protocol) and user-passed `struct sockaddr *` so that BPF program can override destination IP and port for system calls such as sendto(2) or sendmsg(2) and/or assign source IP to the socket. The hooks are implemented as two new attach types: `BPF_CGROUP_UDP4_SENDMSG` and `BPF_CGROUP_UDP6_SENDMSG` for UDPv4 and UDPv6 correspondingly. UDPv4 and UDPv6 separate attach types for same reason as sys_bind and sys_connect hooks, i.e. to prevent reading from / writing to e.g. user_ip6 fields when user passes sockaddr_in since it'd be out-of-bound. The difference with already existing hooks is sys_sendmsg are implemented only for unconnected UDP. For TCP it doesn't make sense to change user-provided `struct sockaddr *` at sendto(2)/sendmsg(2) time since socket either was already connected and has source/destination set or wasn't connected and call to sendto(2)/sendmsg(2) would lead to ENOTCONN anyway. Connected UDP is already handled by sys_connect hooks that can override source/destination at connect time and use fast-path later, i.e. these hooks don't affect UDP fast-path. Rewriting source IP is implemented differently than that in sys_connect hooks. When sys_sendmsg is used with unconnected UDP it doesn't work to just bind socket to desired local IP address since source IP can be set on per-packet basis by using ancillary data (cmsg(3)). So no matter if socket is bound or not, source IP has to be rewritten on every call to sys_sendmsg. To do so two new fields are added to UAPI `struct bpf_sock_addr`; * `msg_src_ip4` to set source IPv4 for UDPv4; * `msg_src_ip6` to set source IPv6 for UDPv6. Signed-off-by: Andrey Ignatov Acked-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann --- kernel/bpf/cgroup.c | 11 ++++++++++- kernel/bpf/syscall.c | 8 ++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 43171a0bb02b..f7c00bd6f8e4 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -500,6 +500,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); * @sk: sock struct that will use sockaddr * @uaddr: sockaddr struct provided by user * @type: The type of program to be exectuted + * @t_ctx: Pointer to attach type specific context * * socket is expected to be of type INET or INET6. * @@ -508,12 +509,15 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); */ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, struct sockaddr *uaddr, - enum bpf_attach_type type) + enum bpf_attach_type type, + void *t_ctx) { struct bpf_sock_addr_kern ctx = { .sk = sk, .uaddr = uaddr, + .t_ctx = t_ctx, }; + struct sockaddr_storage unspec; struct cgroup *cgrp; int ret; @@ -523,6 +527,11 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6) return 0; + if (!ctx.uaddr) { + memset(&unspec, 0, sizeof(unspec)); + ctx.uaddr = (struct sockaddr *)&unspec; + } + cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 388d4feda348..e254526d6744 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1249,6 +1249,8 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type, case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UDP4_SENDMSG: + case BPF_CGROUP_UDP6_SENDMSG: return 0; default: return -EINVAL; @@ -1565,6 +1567,8 @@ static int bpf_prog_attach(const union bpf_attr *attr) case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UDP4_SENDMSG: + case BPF_CGROUP_UDP6_SENDMSG: ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR; break; case BPF_CGROUP_SOCK_OPS: @@ -1635,6 +1639,8 @@ static int bpf_prog_detach(const union bpf_attr *attr) case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UDP4_SENDMSG: + case BPF_CGROUP_UDP6_SENDMSG: ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR; break; case BPF_CGROUP_SOCK_OPS: @@ -1692,6 +1698,8 @@ static int bpf_prog_query(const union bpf_attr *attr, case BPF_CGROUP_INET6_POST_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UDP4_SENDMSG: + case BPF_CGROUP_UDP6_SENDMSG: case BPF_CGROUP_SOCK_OPS: case BPF_CGROUP_DEVICE: break; -- cgit v1.2.3 From 170a7e3ea0709eae12c8f944b9f33c54fe80c6c1 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Sun, 27 May 2018 12:24:08 +0100 Subject: bpf: bpf_prog_array_copy() should return -ENOENT if exclude_prog not found This makes is it possible for bpf prog detach to return -ENOENT. Acked-by: Yonghong Song Signed-off-by: Sean Young Signed-off-by: Daniel Borkmann --- kernel/bpf/core.c | 11 +++++++++-- kernel/trace/bpf_trace.c | 2 ++ 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index b574dddc05b8..527587de8a67 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1616,6 +1616,7 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, int new_prog_cnt, carry_prog_cnt = 0; struct bpf_prog **existing_prog; struct bpf_prog_array *array; + bool found_exclude = false; int new_prog_idx = 0; /* Figure out how many existing progs we need to carry over to @@ -1624,14 +1625,20 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, if (old_array) { existing_prog = old_array->progs; for (; *existing_prog; existing_prog++) { - if (*existing_prog != exclude_prog && - *existing_prog != &dummy_bpf_prog.prog) + if (*existing_prog == exclude_prog) { + found_exclude = true; + continue; + } + if (*existing_prog != &dummy_bpf_prog.prog) carry_prog_cnt++; if (*existing_prog == include_prog) return -EEXIST; } } + if (exclude_prog && !found_exclude) + return -ENOENT; + /* How many progs (not NULL) will be in the new array? */ new_prog_cnt = carry_prog_cnt; if (include_prog) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 81fdf2fc94ac..af1486d9a0ed 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1006,6 +1006,8 @@ void perf_event_detach_bpf_prog(struct perf_event *event) old_array = event->tp_event->prog_array; ret = bpf_prog_array_copy(old_array, event->prog, NULL, &new_array); + if (ret == -ENOENT) + goto unlock; if (ret < 0) { bpf_prog_array_delete_safe(old_array, event->prog); } else { -- cgit v1.2.3 From f4364dcfc86df7c1ca47b256eaf6b6d0cdd0d936 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Sun, 27 May 2018 12:24:09 +0100 Subject: media: rc: introduce BPF_PROG_LIRC_MODE2 Add support for BPF_PROG_LIRC_MODE2. This type of BPF program can call rc_keydown() to reported decoded IR scancodes, or rc_repeat() to report that the last key should be repeated. The bpf program can be attached to using the bpf(BPF_PROG_ATTACH) syscall; the target_fd must be the /dev/lircN device. Acked-by: Yonghong Song Signed-off-by: Sean Young Signed-off-by: Daniel Borkmann --- kernel/bpf/syscall.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel') diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index e254526d6744..7365d79ae00d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -11,6 +11,7 @@ */ #include #include +#include #include #include #include @@ -1582,6 +1583,8 @@ static int bpf_prog_attach(const union bpf_attr *attr) case BPF_SK_SKB_STREAM_PARSER: case BPF_SK_SKB_STREAM_VERDICT: return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, true); + case BPF_LIRC_MODE2: + return lirc_prog_attach(attr); default: return -EINVAL; } @@ -1654,6 +1657,8 @@ static int bpf_prog_detach(const union bpf_attr *attr) case BPF_SK_SKB_STREAM_PARSER: case BPF_SK_SKB_STREAM_VERDICT: return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, false); + case BPF_LIRC_MODE2: + return lirc_prog_detach(attr); default: return -EINVAL; } @@ -1703,6 +1708,8 @@ static int bpf_prog_query(const union bpf_attr *attr, case BPF_CGROUP_SOCK_OPS: case BPF_CGROUP_DEVICE: break; + case BPF_LIRC_MODE2: + return lirc_prog_query(attr, uattr); default: return -EINVAL; } -- cgit v1.2.3 From 71b2c87df3ac37f5f83e166db136b0c1d065a781 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 30 May 2018 16:09:16 +0100 Subject: bpf: devmap: remove redundant assignment of dev = dev The assignment dev = dev is redundant and should be removed. Detected by CoverityScan, CID#1469486 ("Evaluation order violation") Signed-off-by: Colin Ian King Acked-by: Song Liu Signed-off-by: Alexei Starovoitov --- kernel/bpf/devmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index ae16d0c373ef..1fe3fe60508a 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -352,7 +352,7 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, static void *dev_map_lookup_elem(struct bpf_map *map, void *key) { struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key); - struct net_device *dev = dev = obj ? obj->dev : NULL; + struct net_device *dev = obj ? obj->dev : NULL; return dev ? &dev->ifindex : NULL; } -- cgit v1.2.3 From b9308ae696b2c35e862636eec631d95ff958c33d Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Sat, 2 Jun 2018 09:06:50 -0700 Subject: bpf: btf: Check array t->size This patch ensures array's t->size is 0. The array size is decided by its individual elem's size and the number of elements. Hence, t->size is not used and it must be 0. A test case is added to test_btf.c Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- kernel/bpf/btf.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'kernel') diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 3d20aa1f4b54..84ad532f2854 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -1342,6 +1342,11 @@ static s32 btf_array_check_meta(struct btf_verifier_env *env, return -EINVAL; } + if (t->size) { + btf_verifier_log_type(env, t, "size != 0"); + return -EINVAL; + } + /* Array elem type and index type cannot be in type void, * so !array->type and !array->index_type are not allowed. */ -- cgit v1.2.3 From 8175383f2320dbc1b4e803d857ed499ed3e76199 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Sat, 2 Jun 2018 09:06:51 -0700 Subject: bpf: btf: Ensure t->type == 0 for BTF_KIND_FWD The t->type in BTF_KIND_FWD is not used. It must be 0. This patch ensures that and also adds a test case in test_btf.c Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- kernel/bpf/btf.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 84ad532f2854..8653ab004c73 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -1286,8 +1286,27 @@ static struct btf_kind_operations ptr_ops = { .seq_show = btf_ptr_seq_show, }; +static s32 btf_fwd_check_meta(struct btf_verifier_env *env, + const struct btf_type *t, + u32 meta_left) +{ + if (btf_type_vlen(t)) { + btf_verifier_log_type(env, t, "vlen != 0"); + return -EINVAL; + } + + if (t->type) { + btf_verifier_log_type(env, t, "type != 0"); + return -EINVAL; + } + + btf_verifier_log_type(env, t, NULL); + + return 0; +} + static struct btf_kind_operations fwd_ops = { - .check_meta = btf_ref_type_check_meta, + .check_meta = btf_fwd_check_meta, .resolve = btf_df_resolve, .check_member = btf_df_check_member, .log_details = btf_ref_type_log, -- cgit v1.2.3 From 3fe2867cdf088ffb2dc5aed6cdcf757b4c62476c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 2 Jun 2018 23:06:33 +0200 Subject: bpf: fixup error message from gpl helpers on license mismatch Stating 'proprietary program' in the error is just silly since it can also be a different open source license than that which is just not compatible. Reference: https://twitter.com/majek04/status/998531268039102465 Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: Jesper Dangaard Brouer Acked-by: Song Liu Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1fd9667b29f1..4f4786ea2296 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2462,7 +2462,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn /* eBPF programs must be GPL compatible to use GPL-ed functions */ if (!env->prog->gpl_compatible && fn->gpl_only) { - verbose(env, "cannot call GPL only function from proprietary program\n"); + verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n"); return -EINVAL; } -- cgit v1.2.3 From 4316b40914ecde3738968225af56e650e8b61938 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 2 Jun 2018 23:06:34 +0200 Subject: bpf: show prog and map id in fdinfo Its trivial and straight forward to expose it for scripts that can then use it along with bpftool in order to inspect an individual application's used maps and progs. Right now we dump some basic information in the fdinfo file but with the help of the map/prog id full introspection becomes possible now. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: Song Liu Acked-by: Jesper Dangaard Brouer Signed-off-by: Alexei Starovoitov --- kernel/bpf/syscall.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 7365d79ae00d..0fa20624707f 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -327,13 +327,15 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) "value_size:\t%u\n" "max_entries:\t%u\n" "map_flags:\t%#x\n" - "memlock:\t%llu\n", + "memlock:\t%llu\n" + "map_id:\t%u\n", map->map_type, map->key_size, map->value_size, map->max_entries, map->map_flags, - map->pages * 1ULL << PAGE_SHIFT); + map->pages * 1ULL << PAGE_SHIFT, + map->id); if (owner_prog_type) { seq_printf(m, "owner_prog_type:\t%u\n", @@ -1070,11 +1072,13 @@ static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) "prog_type:\t%u\n" "prog_jited:\t%u\n" "prog_tag:\t%s\n" - "memlock:\t%llu\n", + "memlock:\t%llu\n" + "prog_id:\t%u\n", prog->type, prog->jited, prog_tag, - prog->pages * 1ULL << PAGE_SHIFT); + prog->pages * 1ULL << PAGE_SHIFT, + prog->aux->id); } #endif -- cgit v1.2.3 From 09772d92cd5ad998b0d5f6f46cd1658f8cb698cf Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 2 Jun 2018 23:06:35 +0200 Subject: bpf: avoid retpoline for lookup/update/delete calls on maps While some of the BPF map lookup helpers provide a ->map_gen_lookup() callback for inlining the map lookup altogether it is not available for every map, so the remaining ones have to call bpf_map_lookup_elem() helper which does a dispatch to map->ops->map_lookup_elem(). In times of retpolines, this will control and trap speculative execution rather than letting it do its work for the indirect call and will therefore cause a slowdown. Likewise, bpf_map_update_elem() and bpf_map_delete_elem() do not have an inlined version and need to call into their map->ops->map_update_elem() resp. map->ops->map_delete_elem() handlers. Before: # bpftool prog dump xlated id 1 0: (bf) r2 = r10 1: (07) r2 += -8 2: (7a) *(u64 *)(r2 +0) = 0 3: (18) r1 = map[id:1] 5: (85) call __htab_map_lookup_elem#232656 6: (15) if r0 == 0x0 goto pc+4 7: (71) r1 = *(u8 *)(r0 +35) 8: (55) if r1 != 0x0 goto pc+1 9: (72) *(u8 *)(r0 +35) = 1 10: (07) r0 += 56 11: (15) if r0 == 0x0 goto pc+4 12: (bf) r2 = r0 13: (18) r1 = map[id:1] 15: (85) call bpf_map_delete_elem#215008 <-- indirect call via 16: (95) exit helper After: # bpftool prog dump xlated id 1 0: (bf) r2 = r10 1: (07) r2 += -8 2: (7a) *(u64 *)(r2 +0) = 0 3: (18) r1 = map[id:1] 5: (85) call __htab_map_lookup_elem#233328 6: (15) if r0 == 0x0 goto pc+4 7: (71) r1 = *(u8 *)(r0 +35) 8: (55) if r1 != 0x0 goto pc+1 9: (72) *(u8 *)(r0 +35) = 1 10: (07) r0 += 56 11: (15) if r0 == 0x0 goto pc+4 12: (bf) r2 = r0 13: (18) r1 = map[id:1] 15: (85) call htab_lru_map_delete_elem#238240 <-- direct call 16: (95) exit In all three lookup/update/delete cases however we can use the actual address of the map callback directly if we find that there's only a single path with a map pointer leading to the helper call, meaning when the map pointer has not been poisoned from verifier side. Example code can be seen above for the delete case. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: Song Liu Signed-off-by: Alexei Starovoitov --- kernel/bpf/hashtab.c | 12 ++++++--- kernel/bpf/verifier.c | 68 +++++++++++++++++++++++++++++++++++++-------------- 2 files changed, 58 insertions(+), 22 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index b76828f23b49..3ca2198a6d22 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -503,7 +503,9 @@ static u32 htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) struct bpf_insn *insn = insn_buf; const int ret = BPF_REG_0; - *insn++ = BPF_EMIT_CALL((u64 (*)(u64, u64, u64, u64, u64))__htab_map_lookup_elem); + BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, + (void *(*)(struct bpf_map *map, void *key))NULL)); + *insn++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem)); *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1); *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, offsetof(struct htab_elem, key) + @@ -530,7 +532,9 @@ static u32 htab_lru_map_gen_lookup(struct bpf_map *map, const int ret = BPF_REG_0; const int ref_reg = BPF_REG_1; - *insn++ = BPF_EMIT_CALL((u64 (*)(u64, u64, u64, u64, u64))__htab_map_lookup_elem); + BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, + (void *(*)(struct bpf_map *map, void *key))NULL)); + *insn++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem)); *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 4); *insn++ = BPF_LDX_MEM(BPF_B, ref_reg, ret, offsetof(struct htab_elem, lru_node) + @@ -1369,7 +1373,9 @@ static u32 htab_of_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn = insn_buf; const int ret = BPF_REG_0; - *insn++ = BPF_EMIT_CALL((u64 (*)(u64, u64, u64, u64, u64))__htab_map_lookup_elem); + BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, + (void *(*)(struct bpf_map *map, void *key))NULL)); + *insn++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem)); *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 2); *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, offsetof(struct htab_elem, key) + diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 4f4786ea2296..1dd6d5a7aa5b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2421,8 +2421,11 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx]; if (func_id != BPF_FUNC_tail_call && - func_id != BPF_FUNC_map_lookup_elem) + func_id != BPF_FUNC_map_lookup_elem && + func_id != BPF_FUNC_map_update_elem && + func_id != BPF_FUNC_map_delete_elem) return 0; + if (meta->map_ptr == NULL) { verbose(env, "kernel subsystem misconfigured verifier\n"); return -EINVAL; @@ -5586,6 +5589,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) struct bpf_insn *insn = prog->insnsi; const struct bpf_func_proto *fn; const int insn_cnt = prog->len; + const struct bpf_map_ops *ops; struct bpf_insn_aux_data *aux; struct bpf_insn insn_buf[16]; struct bpf_prog *new_prog; @@ -5715,35 +5719,61 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) } /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup - * handlers are currently limited to 64 bit only. + * and other inlining handlers are currently limited to 64 bit + * only. */ if (prog->jit_requested && BITS_PER_LONG == 64 && - insn->imm == BPF_FUNC_map_lookup_elem) { + (insn->imm == BPF_FUNC_map_lookup_elem || + insn->imm == BPF_FUNC_map_update_elem || + insn->imm == BPF_FUNC_map_delete_elem)) { aux = &env->insn_aux_data[i + delta]; if (bpf_map_ptr_poisoned(aux)) goto patch_call_imm; map_ptr = BPF_MAP_PTR(aux->map_state); - if (!map_ptr->ops->map_gen_lookup) - goto patch_call_imm; + ops = map_ptr->ops; + if (insn->imm == BPF_FUNC_map_lookup_elem && + ops->map_gen_lookup) { + cnt = ops->map_gen_lookup(map_ptr, insn_buf); + if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { + verbose(env, "bpf verifier is misconfigured\n"); + return -EINVAL; + } - cnt = map_ptr->ops->map_gen_lookup(map_ptr, insn_buf); - if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { - verbose(env, "bpf verifier is misconfigured\n"); - return -EINVAL; - } + new_prog = bpf_patch_insn_data(env, i + delta, + insn_buf, cnt); + if (!new_prog) + return -ENOMEM; - new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, - cnt); - if (!new_prog) - return -ENOMEM; + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + continue; + } - delta += cnt - 1; + BUILD_BUG_ON(!__same_type(ops->map_lookup_elem, + (void *(*)(struct bpf_map *map, void *key))NULL)); + BUILD_BUG_ON(!__same_type(ops->map_delete_elem, + (int (*)(struct bpf_map *map, void *key))NULL)); + BUILD_BUG_ON(!__same_type(ops->map_update_elem, + (int (*)(struct bpf_map *map, void *key, void *value, + u64 flags))NULL)); + switch (insn->imm) { + case BPF_FUNC_map_lookup_elem: + insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) - + __bpf_call_base; + continue; + case BPF_FUNC_map_update_elem: + insn->imm = BPF_CAST_CALL(ops->map_update_elem) - + __bpf_call_base; + continue; + case BPF_FUNC_map_delete_elem: + insn->imm = BPF_CAST_CALL(ops->map_delete_elem) - + __bpf_call_base; + continue; + } - /* keep walking new program and skip insns we just inserted */ - env->prog = prog = new_prog; - insn = new_prog->insnsi + i + delta; - continue; + goto patch_call_imm; } if (insn->imm == BPF_FUNC_redirect_map) { -- cgit v1.2.3 From bc23105ca0abdeed98366af01c700c2c3aff5cd5 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 2 Jun 2018 23:06:39 +0200 Subject: bpf: fix context access in tracing progs on 32 bit archs Wang reported that all the testcases for BPF_PROG_TYPE_PERF_EVENT program type in test_verifier report the following errors on x86_32: 172/p unpriv: spill/fill of different pointers ldx FAIL Unexpected error message! 0: (bf) r6 = r10 1: (07) r6 += -8 2: (15) if r1 == 0x0 goto pc+3 R1=ctx(id=0,off=0,imm=0) R6=fp-8,call_-1 R10=fp0,call_-1 3: (bf) r2 = r10 4: (07) r2 += -76 5: (7b) *(u64 *)(r6 +0) = r2 6: (55) if r1 != 0x0 goto pc+1 R1=ctx(id=0,off=0,imm=0) R2=fp-76,call_-1 R6=fp-8,call_-1 R10=fp0,call_-1 fp-8=fp 7: (7b) *(u64 *)(r6 +0) = r1 8: (79) r1 = *(u64 *)(r6 +0) 9: (79) r1 = *(u64 *)(r1 +68) invalid bpf_context access off=68 size=8 378/p check bpf_perf_event_data->sample_period byte load permitted FAIL Failed to load prog 'Permission denied'! 0: (b7) r0 = 0 1: (71) r0 = *(u8 *)(r1 +68) invalid bpf_context access off=68 size=1 379/p check bpf_perf_event_data->sample_period half load permitted FAIL Failed to load prog 'Permission denied'! 0: (b7) r0 = 0 1: (69) r0 = *(u16 *)(r1 +68) invalid bpf_context access off=68 size=2 380/p check bpf_perf_event_data->sample_period word load permitted FAIL Failed to load prog 'Permission denied'! 0: (b7) r0 = 0 1: (61) r0 = *(u32 *)(r1 +68) invalid bpf_context access off=68 size=4 381/p check bpf_perf_event_data->sample_period dword load permitted FAIL Failed to load prog 'Permission denied'! 0: (b7) r0 = 0 1: (79) r0 = *(u64 *)(r1 +68) invalid bpf_context access off=68 size=8 Reason is that struct pt_regs on x86_32 doesn't fully align to 8 byte boundary due to its size of 68 bytes. Therefore, bpf_ctx_narrow_access_ok() will then bail out saying that off & (size_default - 1) which is 68 & 7 doesn't cleanly align in the case of sample_period access from struct bpf_perf_event_data, hence verifier wrongly thinks we might be doing an unaligned access here though underlying arch can handle it just fine. Therefore adjust this down to machine size and check and rewrite the offset for narrow access on that basis. We also need to fix corresponding pe_prog_is_valid_access(), since we hit the check for off % size != 0 (e.g. 68 % 8 -> 4) in the first and last test. With that in place, progs for tracing work on x86_32. Reported-by: Wang YanQing Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Tested-by: Wang YanQing Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 3 ++- kernel/trace/bpf_trace.c | 10 ++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1dd6d5a7aa5b..d6403b5166f4 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5349,6 +5349,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) */ is_narrower_load = size < ctx_field_size; if (is_narrower_load) { + u32 size_default = bpf_ctx_off_adjust_machine(ctx_field_size); u32 off = insn->off; u8 size_code; @@ -5363,7 +5364,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) else if (ctx_field_size == 8) size_code = BPF_DW; - insn->off = off & ~(ctx_field_size - 1); + insn->off = off & ~(size_default - 1); insn->code = BPF_LDX | BPF_MEM | size_code; } diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index af1486d9a0ed..752992ce3513 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -880,8 +880,14 @@ static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type return false; if (type != BPF_READ) return false; - if (off % size != 0) - return false; + if (off % size != 0) { + if (sizeof(unsigned long) != 4) + return false; + if (size != 8) + return false; + if (off % size != 4) + return false; + } switch (off) { case bpf_ctx_range(struct bpf_perf_event_data, sample_period): -- cgit v1.2.3 From 42b33468987bac0dd95c30f14820c7abac04a153 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 31 May 2018 10:59:47 +0200 Subject: xdp: add flags argument to ndo_xdp_xmit API This patch only change the API and reject any use of flags. This is an intermediate step that allows us to implement the flush flag operation later, for each individual driver in a separate patch. The plan is to implement flush operation via XDP_XMIT_FLUSH flag and then remove XDP_XMIT_FLAGS_NONE when done. Signed-off-by: Jesper Dangaard Brouer Acked-by: Song Liu Signed-off-by: Alexei Starovoitov --- kernel/bpf/devmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 1fe3fe60508a..037e234056f7 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -232,7 +232,7 @@ static int bq_xmit_all(struct bpf_dtab_netdev *obj, prefetch(xdpf); } - sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q); + sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q, 0); if (sent < 0) { err = sent; sent = 0; -- cgit v1.2.3 From c1ece6b245bd12a57124da78abafbf8a511394d6 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 31 May 2018 11:00:23 +0200 Subject: bpf/xdp: devmap can avoid calling ndo_xdp_flush The XDP_REDIRECT map devmap can avoid using ndo_xdp_flush, by instead instructing ndo_xdp_xmit to flush via XDP_XMIT_FLUSH flag in appropriate places. Notice after this patch it is possible to remove ndo_xdp_flush completely, as this is the last user of ndo_xdp_flush. This is left for later patches, to keep driver changes separate. Signed-off-by: Jesper Dangaard Brouer Acked-by: Song Liu Signed-off-by: Alexei Starovoitov --- kernel/bpf/devmap.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 037e234056f7..a7cc7b3494a9 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -217,7 +217,7 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit) } static int bq_xmit_all(struct bpf_dtab_netdev *obj, - struct xdp_bulk_queue *bq) + struct xdp_bulk_queue *bq, u32 flags) { struct net_device *dev = obj->dev; int sent = 0, drops = 0, err = 0; @@ -232,7 +232,7 @@ static int bq_xmit_all(struct bpf_dtab_netdev *obj, prefetch(xdpf); } - sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q, 0); + sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q, flags); if (sent < 0) { err = sent; sent = 0; @@ -276,7 +276,6 @@ void __dev_map_flush(struct bpf_map *map) for_each_set_bit(bit, bitmap, map->max_entries) { struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]); struct xdp_bulk_queue *bq; - struct net_device *netdev; /* This is possible if the dev entry is removed by user space * between xdp redirect and flush op. @@ -287,10 +286,7 @@ void __dev_map_flush(struct bpf_map *map) __clear_bit(bit, bitmap); bq = this_cpu_ptr(dev->bulkq); - bq_xmit_all(dev, bq); - netdev = dev->dev; - if (likely(netdev->netdev_ops->ndo_xdp_flush)) - netdev->netdev_ops->ndo_xdp_flush(netdev); + bq_xmit_all(dev, bq, XDP_XMIT_FLUSH); } } @@ -320,7 +316,7 @@ static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf, struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq); if (unlikely(bq->count == DEV_MAP_BULK_SIZE)) - bq_xmit_all(obj, bq); + bq_xmit_all(obj, bq, 0); /* Ingress dev_rx will be the same for all xdp_frame's in * bulk_queue, because bq stored per-CPU and must be flushed @@ -359,8 +355,7 @@ static void *dev_map_lookup_elem(struct bpf_map *map, void *key) static void dev_map_flush_old(struct bpf_dtab_netdev *dev) { - if (dev->dev->netdev_ops->ndo_xdp_flush) { - struct net_device *fl = dev->dev; + if (dev->dev->netdev_ops->ndo_xdp_xmit) { struct xdp_bulk_queue *bq; unsigned long *bitmap; @@ -371,9 +366,7 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev) __clear_bit(dev->bit, bitmap); bq = per_cpu_ptr(dev->bulkq, cpu); - bq_xmit_all(dev, bq); - - fl->netdev_ops->ndo_xdp_flush(dev->dev); + bq_xmit_all(dev, bq, XDP_XMIT_FLUSH); } } } -- cgit v1.2.3 From bf6fa2c893c5237b48569a13fa3c673041430b6c Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sun, 3 Jun 2018 15:59:41 -0700 Subject: bpf: implement bpf_get_current_cgroup_id() helper bpf has been used extensively for tracing. For example, bcc contains an almost full set of bpf-based tools to trace kernel and user functions/events. Most tracing tools are currently either filtered based on pid or system-wide. Containers have been used quite extensively in industry and cgroup is often used together to provide resource isolation and protection. Several processes may run inside the same container. It is often desirable to get container-level tracing results as well, e.g. syscall count, function count, I/O activity, etc. This patch implements a new helper, bpf_get_current_cgroup_id(), which will return cgroup id based on the cgroup within which the current task is running. The later patch will provide an example to show that userspace can get the same cgroup id so it could configure a filter or policy in the bpf program based on task cgroup id. The helper is currently implemented for tracing. It can be added to other program types as well when needed. Acked-by: Alexei Starovoitov Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- kernel/bpf/core.c | 1 + kernel/bpf/helpers.c | 15 +++++++++++++++ kernel/trace/bpf_trace.c | 2 ++ 3 files changed, 18 insertions(+) (limited to 'kernel') diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 527587de8a67..9f1493705f40 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1765,6 +1765,7 @@ const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak; const struct bpf_func_proto bpf_get_current_comm_proto __weak; const struct bpf_func_proto bpf_sock_map_update_proto __weak; const struct bpf_func_proto bpf_sock_hash_update_proto __weak; +const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak; const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void) { diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 3d24e238221e..73065e2d23c2 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -179,3 +179,18 @@ const struct bpf_func_proto bpf_get_current_comm_proto = { .arg1_type = ARG_PTR_TO_UNINIT_MEM, .arg2_type = ARG_CONST_SIZE, }; + +#ifdef CONFIG_CGROUPS +BPF_CALL_0(bpf_get_current_cgroup_id) +{ + struct cgroup *cgrp = task_dfl_cgroup(current); + + return cgrp->kn->id.id; +} + +const struct bpf_func_proto bpf_get_current_cgroup_id_proto = { + .func = bpf_get_current_cgroup_id, + .gpl_only = false, + .ret_type = RET_INTEGER, +}; +#endif diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 752992ce3513..e2ab5b7f29d2 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -564,6 +564,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_prandom_u32_proto; case BPF_FUNC_probe_read_str: return &bpf_probe_read_str_proto; + case BPF_FUNC_get_current_cgroup_id: + return &bpf_get_current_cgroup_id_proto; default: return NULL; } -- cgit v1.2.3 From 34ea38ca27991466a8fff849514b4181b42ae2eb Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 4 Jun 2018 08:53:41 -0700 Subject: bpf: guard bpf_get_current_cgroup_id() with CONFIG_CGROUPS Commit bf6fa2c893c5 ("bpf: implement bpf_get_current_cgroup_id() helper") introduced a new helper bpf_get_current_cgroup_id(). The helper has a dependency on CONFIG_CGROUPS. When CONFIG_CGROUPS is not defined, using the helper will result the following verifier error: kernel subsystem misconfigured func bpf_get_current_cgroup_id#80 which is hard for users to interpret. Guarding the reference to bpf_get_current_cgroup_id_proto with CONFIG_CGROUPS will result in below better message: unknown func bpf_get_current_cgroup_id#80 Fixes: bf6fa2c893c5 ("bpf: implement bpf_get_current_cgroup_id() helper") Suggested-by: Daniel Borkmann Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- kernel/trace/bpf_trace.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index e2ab5b7f29d2..0ae6829804bc 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -564,8 +564,10 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_prandom_u32_proto; case BPF_FUNC_probe_read_str: return &bpf_probe_read_str_proto; +#ifdef CONFIG_CGROUPS case BPF_FUNC_get_current_cgroup_id: return &bpf_get_current_cgroup_id_proto; +#endif default: return NULL; } -- cgit v1.2.3