From 6816a7ffce32e999601825ddfd887f36d3052932 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 28 Jun 2016 12:18:25 +0200 Subject: bpf, trace: add BPF_F_CURRENT_CPU flag for bpf_perf_event_read Follow-up commit to 1e33759c788c ("bpf, trace: add BPF_F_CURRENT_CPU flag for bpf_perf_event_output") to add the same functionality into bpf_perf_event_read() helper. The split of index into flags and index component is also safe here, since such large maps are rejected during map allocation time. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 406459b935a2..58df2da3e9bf 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -347,7 +347,7 @@ enum bpf_func_id { #define BPF_F_ZERO_CSUM_TX (1ULL << 1) #define BPF_F_DONT_FRAGMENT (1ULL << 2) -/* BPF_FUNC_perf_event_output flags. */ +/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */ #define BPF_F_INDEX_MASK 0xffffffffULL #define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK -- cgit v1.2.3 From 6578171a7ff0c31dc73258f93da7407510abf085 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 28 Jun 2016 12:18:27 +0200 Subject: bpf: add bpf_skb_change_proto helper This patch adds a minimal helper for doing the groundwork of changing the skb->protocol in a controlled way. Currently supported is v4 to v6 and vice versa transitions, which allows f.e. for a minimal, static nat64 implementation where applications in containers that still require IPv4 can be transparently operated in an IPv6-only environment. For example, host facing veth of the container can transparently do the transitions in a programmatic way with the help of clsact qdisc and cls_bpf. Idea is to separate concerns for keeping complexity of the helper lower, which means that the programs utilize bpf_skb_change_proto(), bpf_skb_store_bytes() and bpf_lX_csum_replace() to get the job done, instead of doing everything in a single helper (and thus partially duplicating helper functionality). Also, bpf_skb_change_proto() shouldn't need to deal with raw packet data as this is done by other helpers. bpf_skb_proto_6_to_4() and bpf_skb_proto_4_to_6() unclone the skb to operate on a private one, push or pop additionally required header space and migrate the gso/gro meta data from the shared info. We do mark the gso type as dodgy so that headers are checked and segs recalculated by the gso/gro engine. The gso_size target is adapted as well. The flags argument added is currently reserved and can be used for future extensions. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 58df2da3e9bf..66cd738a937a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -313,6 +313,20 @@ enum bpf_func_id { */ BPF_FUNC_skb_get_tunnel_opt, BPF_FUNC_skb_set_tunnel_opt, + + /** + * bpf_skb_change_proto(skb, proto, flags) + * Change protocol of the skb. Currently supported is + * v4 -> v6, v6 -> v4 transitions. The helper will also + * resize the skb. eBPF program is expected to fill the + * new headers via skb_store_bytes and lX_csum_replace. + * @skb: pointer to skb + * @proto: new skb->protocol type + * @flags: reserved + * Return: 0 on success or negative error + */ + BPF_FUNC_skb_change_proto, + __BPF_FUNC_MAX_ID, }; -- cgit v1.2.3 From d2485c4242a826fdf493fd3a27b8b792965b9b9e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 28 Jun 2016 12:18:28 +0200 Subject: bpf: add bpf_skb_change_type helper This work adds a helper for changing skb->pkt_type in a controlled way. We only allow a subset of possible values and can extend that in future should other use cases come up. Doing this as a helper has the advantage that errors can be handeled gracefully and thus helper kept extensible. It's a write counterpart to pkt_type member we can already read from struct __sk_buff context. Major use case is to change incoming skbs to PACKET_HOST in a programmatic way instead of having to recirculate via redirect(..., BPF_F_INGRESS), for example. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 66cd738a937a..be6ac1291680 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -327,6 +327,15 @@ enum bpf_func_id { */ BPF_FUNC_skb_change_proto, + /** + * bpf_skb_change_type(skb, type) + * Change packet type of skb. + * @skb: pointer to skb + * @type: new skb->pkt_type type + * Return: 0 on success or negative error + */ + BPF_FUNC_skb_change_type, + __BPF_FUNC_MAX_ID, }; -- cgit v1.2.3 From 4ed8ec521ed57c4e207ad464ca0388776de74d4b Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 30 Jun 2016 10:28:43 -0700 Subject: cgroup: bpf: Add BPF_MAP_TYPE_CGROUP_ARRAY Add a BPF_MAP_TYPE_CGROUP_ARRAY and its bpf_map_ops's implementations. To update an element, the caller is expected to obtain a cgroup2 backed fd by open(cgroup2_dir) and then update the array with that fd. Signed-off-by: Martin KaFai Lau Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Tejun Heo Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index be6ac1291680..26c04be32003 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -84,6 +84,7 @@ enum bpf_map_type { BPF_MAP_TYPE_PERCPU_HASH, BPF_MAP_TYPE_PERCPU_ARRAY, BPF_MAP_TYPE_STACK_TRACE, + BPF_MAP_TYPE_CGROUP_ARRAY, }; enum bpf_prog_type { -- cgit v1.2.3 From 4a482f34afcc162d8456f449b137ec2a95be60d8 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 30 Jun 2016 10:28:44 -0700 Subject: cgroup: bpf: Add bpf_skb_in_cgroup_proto Adds a bpf helper, bpf_skb_in_cgroup, to decide if a skb->sk belongs to a descendant of a cgroup2. It is similar to the feature added in netfilter: commit c38c4597e4bf ("netfilter: implement xt_cgroup cgroup2 path match") The user is expected to populate a BPF_MAP_TYPE_CGROUP_ARRAY which will be used by the bpf_skb_in_cgroup. Modifications to the bpf verifier is to ensure BPF_MAP_TYPE_CGROUP_ARRAY and bpf_skb_in_cgroup() are always used together. Signed-off-by: Martin KaFai Lau Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Tejun Heo Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 26c04be32003..f44504d875e2 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -337,6 +337,17 @@ enum bpf_func_id { */ BPF_FUNC_skb_change_type, + /** + * bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb + * @skb: pointer to skb + * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type + * @index: index of the cgroup in the bpf_map + * Return: + * == 0 skb failed the cgroup2 descendant test + * == 1 skb succeeded the cgroup2 descendant test + * < 0 error + */ + BPF_FUNC_skb_in_cgroup, __BPF_FUNC_MAX_ID, }; -- cgit v1.2.3 From 13c5c240f789bbd2bcacb14a23771491485ae61f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 3 Jul 2016 01:28:47 +0200 Subject: bpf: add bpf_get_hash_recalc helper If skb_clear_hash() was invoked due to mangling of relevant headers and BPF program needs skb->hash later on, we can add a helper to trigger hash recalculation via bpf_get_hash_recalc(). The helper will return the newly retrieved hash directly, but later access can also be done via skb context again through skb->hash directly (inline) without needing to call the helper once more. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f44504d875e2..c14ca1cd6297 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -348,6 +348,15 @@ enum bpf_func_id { * < 0 error */ BPF_FUNC_skb_in_cgroup, + + /** + * bpf_get_hash_recalc(skb) + * Retrieve and possibly recalculate skb->hash. + * @skb: pointer to skb + * Return: hash + */ + BPF_FUNC_get_hash_recalc, + __BPF_FUNC_MAX_ID, }; -- cgit v1.2.3 From 606274c5abd8e245add01bc7145a8cbb92b69ba8 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 6 Jul 2016 22:38:36 -0700 Subject: bpf: introduce bpf_get_current_task() helper over time there were multiple requests to access different data structures and fields of task_struct current, so finally add the helper to access 'current' as-is. Tracing bpf programs will do the rest of walking the pointers via bpf_probe_read(). Note that current can be null and bpf program has to deal it with, but even dumb passing null into bpf_probe_read() is still safe. Suggested-by: Brendan Gregg Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c14ca1cd6297..262a7e883b19 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -357,6 +357,13 @@ enum bpf_func_id { */ BPF_FUNC_get_hash_recalc, + /** + * u64 bpf_get_current_task(void) + * Returns current task_struct + * Return: current + */ + BPF_FUNC_get_current_task, + __BPF_FUNC_MAX_ID, }; -- cgit v1.2.3 From 555c8a8623a3a87b3c990ba30b7fd2e5914e41d2 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 14 Jul 2016 18:08:05 +0200 Subject: bpf: avoid stack copy and use skb ctx for event output This work addresses a couple of issues bpf_skb_event_output() helper currently has: i) We need two copies instead of just a single one for the skb data when it should be part of a sample. The data can be non-linear and thus needs to be extracted via bpf_skb_load_bytes() helper first, and then copied once again into the ring buffer slot. ii) Since bpf_skb_load_bytes() currently needs to be used first, the helper needs to see a constant size on the passed stack buffer to make sure BPF verifier can do sanity checks on it during verification time. Thus, just passing skb->len (or any other non-constant value) wouldn't work, but changing bpf_skb_load_bytes() is also not the proper solution, since the two copies are generally still needed. iii) bpf_skb_load_bytes() is just for rather small buffers like headers, since they need to sit on the limited BPF stack anyway. Instead of working around in bpf_skb_load_bytes(), this work improves the bpf_skb_event_output() helper to address all 3 at once. We can make use of the passed in skb context that we have in the helper anyway, and use some of the reserved flag bits as a length argument. The helper will use the new __output_custom() facility from perf side with bpf_skb_copy() as callback helper to walk and extract the data. It will pass the data for setup to bpf_event_output(), which generates and pushes the raw record with an additional frag part. The linear data used in the first frag of the record serves as programmatically defined meta data passed along with the appended sample. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 262a7e883b19..c4d922439d20 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -401,6 +401,8 @@ enum bpf_func_id { /* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */ #define BPF_F_INDEX_MASK 0xffffffffULL #define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK +/* BPF_FUNC_perf_event_output for sk_buff input context. */ +#define BPF_F_CTXLEN_MASK (0xfffffULL << 32) /* user accessible mirror of in-kernel sk_buff. * new fields can only be added to the end of this structure -- cgit v1.2.3 From 6a773a15a1e8874e5eccd2f29190c31085912c95 Mon Sep 17 00:00:00 2001 From: Brenden Blanco Date: Tue, 19 Jul 2016 12:16:47 -0700 Subject: bpf: add XDP prog type for early driver filter Add a new bpf prog type that is intended to run in early stages of the packet rx path. Only minimal packet metadata will be available, hence a new context type, struct xdp_md, is exposed to userspace. So far only expose the packet start and end pointers, and only in read mode. An XDP program must return one of the well known enum values, all other return codes are reserved for future use. Unfortunately, this restriction is hard to enforce at verification time, so take the approach of warning at runtime when such programs are encountered. Out of bounds return codes should alias to XDP_ABORTED. Signed-off-by: Brenden Blanco Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c4d922439d20..a51786566c2f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -94,6 +94,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_SCHED_CLS, BPF_PROG_TYPE_SCHED_ACT, BPF_PROG_TYPE_TRACEPOINT, + BPF_PROG_TYPE_XDP, }; #define BPF_PSEUDO_MAP_FD 1 @@ -439,4 +440,23 @@ struct bpf_tunnel_key { __u32 tunnel_label; }; +/* User return codes for XDP prog type. + * A valid XDP program must return one of these defined values. All other + * return codes are reserved for future use. Unknown return codes will result + * in packet drop. + */ +enum xdp_action { + XDP_ABORTED = 0, + XDP_DROP, + XDP_PASS, +}; + +/* user accessible metadata for XDP packet hook + * new fields must be added to the end of this structure + */ +struct xdp_md { + __u32 data; + __u32 data_end; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From 6ce96ca348a9e949f8c43f4d3e98db367d93cffd Mon Sep 17 00:00:00 2001 From: Brenden Blanco Date: Tue, 19 Jul 2016 12:16:53 -0700 Subject: bpf: add XDP_TX xdp_action for direct forwarding XDP enabled drivers must transmit received packets back out on the same port they were received on when a program returns this action. Signed-off-by: Brenden Blanco Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a51786566c2f..2b7076f5b5ad 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -449,6 +449,7 @@ enum xdp_action { XDP_ABORTED = 0, XDP_DROP, XDP_PASS, + XDP_TX, }; /* user accessible metadata for XDP packet hook -- cgit v1.2.3 From 96ae52279594470622ff0585621a13e96b700600 Mon Sep 17 00:00:00 2001 From: Sargun Dhillon Date: Mon, 25 Jul 2016 05:54:46 -0700 Subject: bpf: Add bpf_probe_write_user BPF helper to be called in tracers This allows user memory to be written to during the course of a kprobe. It shouldn't be used to implement any kind of security mechanism because of TOC-TOU attacks, but rather to debug, divert, and manipulate execution of semi-cooperative processes. Although it uses probe_kernel_write, we limit the address space the probe can write into by checking the space with access_ok. We do this as opposed to calling copy_to_user directly, in order to avoid sleeping. In addition we ensure the threads's current fs / segment is USER_DS and the thread isn't exiting nor a kernel thread. Given this feature is meant for experiments, and it has a risk of crashing the system, and running programs, we print a warning on when a proglet that attempts to use this helper is installed, along with the pid and process name. Signed-off-by: Sargun Dhillon Cc: Alexei Starovoitov Cc: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2b7076f5b5ad..da218fec6056 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -365,6 +365,16 @@ enum bpf_func_id { */ BPF_FUNC_get_current_task, + /** + * bpf_probe_write_user(void *dst, void *src, int len) + * safely attempt to write to a location + * @dst: destination address in userspace + * @src: source address on stack + * @len: number of bytes to copy + * Return: 0 on success or negative error + */ + BPF_FUNC_probe_write_user, + __BPF_FUNC_MAX_ID, }; -- cgit v1.2.3