From de8f3a83b0a0fddb2cf56e7a718127e9619ea3da Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 25 Sep 2017 02:25:51 +0200 Subject: bpf: add meta pointer for direct access This work enables generic transfer of metadata from XDP into skb. The basic idea is that we can make use of the fact that the resulting skb must be linear and already comes with a larger headroom for supporting bpf_xdp_adjust_head(), which mangles xdp->data. Here, we base our work on a similar principle and introduce a small helper bpf_xdp_adjust_meta() for adjusting a new pointer called xdp->data_meta. Thus, the packet has a flexible and programmable room for meta data, followed by the actual packet data. struct xdp_buff is therefore laid out that we first point to data_hard_start, then data_meta directly prepended to data followed by data_end marking the end of packet. bpf_xdp_adjust_head() takes into account whether we have meta data already prepended and if so, memmove()s this along with the given offset provided there's enough room. xdp->data_meta is optional and programs are not required to use it. The rationale is that when we process the packet in XDP (e.g. as DoS filter), we can push further meta data along with it for the XDP_PASS case, and give the guarantee that a clsact ingress BPF program on the same device can pick this up for further post-processing. Since we work with skb there, we can also set skb->mark, skb->priority or other skb meta data out of BPF, thus having this scratch space generic and programmable allows for more flexibility than defining a direct 1:1 transfer of potentially new XDP members into skb (it's also more efficient as we don't need to initialize/handle each of such new members). The facility also works together with GRO aggregation. The scratch space at the head of the packet can be multiple of 4 byte up to 32 byte large. Drivers not yet supporting xdp->data_meta can simply be set up with xdp->data_meta as xdp->data + 1 as bpf_xdp_adjust_meta() will detect this and bail out, such that the subsequent match against xdp->data for later access is guaranteed to fail. The verifier treats xdp->data_meta/xdp->data the same way as we treat xdp->data/xdp->data_end pointer comparisons. The requirement for doing the compare against xdp->data is that it hasn't been modified from it's original address we got from ctx access. It may have a range marking already from prior successful xdp->data/xdp->data_end pointer comparisons though. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 43ab5c402f98..e43491ac4823 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -582,6 +582,12 @@ union bpf_attr { * @map: pointer to sockmap to update * @key: key to insert/update sock in map * @flags: same flags as map update elem + * + * int bpf_xdp_adjust_meta(xdp_md, delta) + * Adjust the xdp_md.data_meta by delta + * @xdp_md: pointer to xdp_md + * @delta: An positive/negative integer to be added to xdp_md.data_meta + * Return: 0 on success or negative on error */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -638,6 +644,7 @@ union bpf_attr { FN(redirect_map), \ FN(sk_redirect_map), \ FN(sock_map_update), \ + FN(xdp_adjust_meta), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -715,7 +722,7 @@ struct __sk_buff { __u32 data_end; __u32 napi_id; - /* accessed by BPF_PROG_TYPE_sk_skb types */ + /* Accessed by BPF_PROG_TYPE_sk_skb types from here to ... */ __u32 family; __u32 remote_ip4; /* Stored in network byte order */ __u32 local_ip4; /* Stored in network byte order */ @@ -723,6 +730,9 @@ struct __sk_buff { __u32 local_ip6[4]; /* Stored in network byte order */ __u32 remote_port; /* Stored in network byte order */ __u32 local_port; /* stored in host byte order */ + /* ... here. */ + + __u32 data_meta; }; struct bpf_tunnel_key { @@ -783,6 +793,7 @@ enum xdp_action { struct xdp_md { __u32 data; __u32 data_end; + __u32 data_meta; }; enum sk_action { -- cgit v1.2.3 From cb4d2b3f03d8eed90be3a194e5b54b734ec4bbe9 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 27 Sep 2017 14:37:52 -0700 Subject: bpf: Add name, load_time, uid and map_ids to bpf_prog_info The patch adds name and load_time to struct bpf_prog_aux. They are also exported to bpf_prog_info. The bpf_prog's name is passed by userspace during BPF_PROG_LOAD. The kernel only stores the first (BPF_PROG_NAME_LEN - 1) bytes and the name stored in the kernel is always \0 terminated. The kernel will reject name that contains characters other than isalnum() and '_'. It will also reject name that is not null terminated. The existing 'user->uid' of the bpf_prog_aux is also exported to the bpf_prog_info as created_by_uid. The existing 'used_maps' of the bpf_prog_aux is exported to the newly added members 'nr_map_ids' and 'map_ids' of the bpf_prog_info. On the input, nr_map_ids tells how big the userspace's map_ids buffer is. On the output, nr_map_ids tells the exact user_map_cnt and it will only copy up to the userspace's map_ids buffer is allowed. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e43491ac4823..bd6348269bf5 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -175,6 +175,8 @@ enum bpf_attach_type { /* Specify numa node during map creation */ #define BPF_F_NUMA_NODE (1U << 2) +#define BPF_OBJ_NAME_LEN 16U + union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; /* one of enum bpf_map_type */ @@ -210,6 +212,7 @@ union bpf_attr { __aligned_u64 log_buf; /* user supplied buffer */ __u32 kern_version; /* checked when prog_type=kprobe */ __u32 prog_flags; + __u8 prog_name[BPF_OBJ_NAME_LEN]; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -812,6 +815,11 @@ struct bpf_prog_info { __u32 xlated_prog_len; __aligned_u64 jited_prog_insns; __aligned_u64 xlated_prog_insns; + __u64 load_time; /* ns since boottime */ + __u32 created_by_uid; + __u32 nr_map_ids; + __aligned_u64 map_ids; + __u8 name[BPF_OBJ_NAME_LEN]; } __attribute__((aligned(8))); struct bpf_map_info { -- cgit v1.2.3 From ad5b177bd73f5107d97c36f56395c4281fb6f089 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 27 Sep 2017 14:37:53 -0700 Subject: bpf: Add map_name to bpf_map_info This patch allows userspace to specify a name for a map during BPF_MAP_CREATE. The map's name can later be exported to user space via BPF_OBJ_GET_INFO_BY_FD. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bd6348269bf5..6d2137b4cf38 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -190,6 +190,7 @@ union bpf_attr { __u32 numa_node; /* numa node (effective only if * BPF_F_NUMA_NODE is set). */ + __u8 map_name[BPF_OBJ_NAME_LEN]; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -829,6 +830,7 @@ struct bpf_map_info { __u32 value_size; __u32 max_entries; __u32 map_flags; + __u8 name[BPF_OBJ_NAME_LEN]; } __attribute__((aligned(8))); /* User bpf_sock_ops struct to access socket values and specify request ops -- cgit v1.2.3 From 90caccdd8cc0215705f18b92771b449b01e2474a Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 3 Oct 2017 15:37:20 -0700 Subject: bpf: fix bpf_tail_call() x64 JIT - bpf prog_array just like all other types of bpf array accepts 32-bit index. Clarify that in the comment. - fix x64 JIT of bpf_tail_call which was incorrectly loading 8 instead of 4 bytes - tighten corresponding check in the interpreter to stay consistent The JIT bug can be triggered after introduction of BPF_F_NUMA_NODE flag in commit 96eabe7a40aa in 4.14. Before that the map_flags would stay zero and though JIT code is wrong it will check bounds correctly. Hence two fixes tags. All other JITs don't have this problem. Signed-off-by: Alexei Starovoitov Fixes: 96eabe7a40aa ("bpf: Allow selecting numa node during map creation") Fixes: b52f00e6a715 ("x86: bpf_jit: implement bpf_tail_call() helper") Acked-by: Daniel Borkmann Acked-by: Martin KaFai Lau Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 43ab5c402f98..f90860d1f897 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -312,7 +312,7 @@ union bpf_attr { * jump into another BPF program * @ctx: context pointer passed to next program * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY - * @index: index inside array that selects specific program to run + * @index: 32-bit index inside array that selects specific program to run * Return: 0 on success or negative error * * int bpf_clone_redirect(skb, ifindex, flags) -- cgit v1.2.3 From 324bda9e6c5add86ba2e1066476481c48132aca0 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 2 Oct 2017 22:50:21 -0700 Subject: bpf: multi program support for cgroup+bpf introduce BPF_F_ALLOW_MULTI flag that can be used to attach multiple bpf programs to a cgroup. The difference between three possible flags for BPF_PROG_ATTACH command: - NONE(default): No further bpf programs allowed in the subtree. - BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program, the program in this cgroup yields to sub-cgroup program. - BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program, that cgroup program gets run in addition to the program in this cgroup. NONE and BPF_F_ALLOW_OVERRIDE existed before. This patch doesn't change their behavior. It only clarifies the semantics in relation to new flag. Only one program is allowed to be attached to a cgroup with NONE or BPF_F_ALLOW_OVERRIDE flag. Multiple programs are allowed to be attached to a cgroup with BPF_F_ALLOW_MULTI flag. They are executed in FIFO order (those that were attached first, run first) The programs of sub-cgroup are executed first, then programs of this cgroup and then programs of parent cgroup. All eligible programs are executed regardless of return code from earlier programs. To allow efficient execution of multiple programs attached to a cgroup and to avoid penalizing cgroups without any programs attached introduce 'struct bpf_prog_array' which is RCU protected array of pointers to bpf programs. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Acked-by: Martin KaFai Lau for cgroup bits Acked-by: Tejun Heo Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 42 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6d2137b4cf38..762f74bc6c47 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -143,11 +143,47 @@ enum bpf_attach_type { #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE -/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command - * to the given target_fd cgroup the descendent cgroup will be able to - * override effective bpf program that was inherited from this cgroup +/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command + * + * NONE(default): No further bpf programs allowed in the subtree. + * + * BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program, + * the program in this cgroup yields to sub-cgroup program. + * + * BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program, + * that cgroup program gets run in addition to the program in this cgroup. + * + * Only one program is allowed to be attached to a cgroup with + * NONE or BPF_F_ALLOW_OVERRIDE flag. + * Attaching another program on top of NONE or BPF_F_ALLOW_OVERRIDE will + * release old program and attach the new one. Attach flags has to match. + * + * Multiple programs are allowed to be attached to a cgroup with + * BPF_F_ALLOW_MULTI flag. They are executed in FIFO order + * (those that were attached first, run first) + * The programs of sub-cgroup are executed first, then programs of + * this cgroup and then programs of parent cgroup. + * When children program makes decision (like picking TCP CA or sock bind) + * parent program has a chance to override it. + * + * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups. + * A cgroup with NONE doesn't allow any programs in sub-cgroups. + * Ex1: + * cgrp1 (MULTI progs A, B) -> + * cgrp2 (OVERRIDE prog C) -> + * cgrp3 (MULTI prog D) -> + * cgrp4 (OVERRIDE prog E) -> + * cgrp5 (NONE prog F) + * the event in cgrp5 triggers execution of F,D,A,B in that order. + * if prog F is detached, the execution is E,D,A,B + * if prog F and D are detached, the execution is E,A,B + * if prog F, E and D are detached, the execution is C,A,B + * + * All eligible programs are executed regardless of return code from + * earlier programs. */ #define BPF_F_ALLOW_OVERRIDE (1U << 0) +#define BPF_F_ALLOW_MULTI (1U << 1) /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the * verifier will perform strict alignment checking as if the kernel -- cgit v1.2.3 From 468e2f64d220fe2dc11caa2bcb9b3a1e50fc7321 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 2 Oct 2017 22:50:22 -0700 Subject: bpf: introduce BPF_PROG_QUERY command introduce BPF_PROG_QUERY command to retrieve a set of either attached programs to given cgroup or a set of effective programs that will execute for events within a cgroup Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Acked-by: Martin KaFai Lau for cgroup bits Acked-by: Tejun Heo Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 762f74bc6c47..cb2b9f95160a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -92,6 +92,7 @@ enum bpf_cmd { BPF_PROG_GET_FD_BY_ID, BPF_MAP_GET_FD_BY_ID, BPF_OBJ_GET_INFO_BY_FD, + BPF_PROG_QUERY, }; enum bpf_map_type { @@ -211,6 +212,9 @@ enum bpf_attach_type { /* Specify numa node during map creation */ #define BPF_F_NUMA_NODE (1U << 2) +/* flags for BPF_PROG_QUERY */ +#define BPF_F_QUERY_EFFECTIVE (1U << 0) + #define BPF_OBJ_NAME_LEN 16U union bpf_attr { @@ -289,6 +293,15 @@ union bpf_attr { __u32 info_len; __aligned_u64 info; } info; + + struct { /* anonymous struct used by BPF_PROG_QUERY command */ + __u32 target_fd; /* container object to query */ + __u32 attach_type; + __u32 query_flags; + __u32 attach_flags; + __aligned_u64 prog_ids; + __u32 prog_cnt; + } query; } __attribute__((aligned(8))); /* BPF helper function descriptions: -- cgit v1.2.3 From 908432ca84fc229e906ba164219e9ad0fe56f755 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 5 Oct 2017 09:19:20 -0700 Subject: bpf: add helper bpf_perf_event_read_value for perf event array map Hardware pmu counters are limited resources. When there are more pmu based perf events opened than available counters, kernel will multiplex these events so each event gets certain percentage (but not 100%) of the pmu time. In case that multiplexing happens, the number of samples or counter value will not reflect the case compared to no multiplexing. This makes comparison between different runs difficult. Typically, the number of samples or counter value should be normalized before comparing to other experiments. The typical normalization is done like: normalized_num_samples = num_samples * time_enabled / time_running normalized_counter_value = counter_value * time_enabled / time_running where time_enabled is the time enabled for event and time_running is the time running for event since last normalization. This patch adds helper bpf_perf_event_read_value for kprobed based perf event array map, to read perf counter and enabled/running time. The enabled/running time is accumulated since the perf event open. To achieve scaling factor between two bpf invocations, users can can use cpu_id as the key (which is typical for perf array usage model) to remember the previous value and do the calculation inside the bpf program. Signed-off-by: Yonghong Song Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6082faf5fd2a..7b57a212c7d7 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -641,6 +641,14 @@ union bpf_attr { * @xdp_md: pointer to xdp_md * @delta: An positive/negative integer to be added to xdp_md.data_meta * Return: 0 on success or negative on error + * + * int bpf_perf_event_read_value(map, flags, buf, buf_size) + * read perf event counter value and perf event enabled/running time + * @map: pointer to perf_event_array map + * @flags: index of event in the map or bitmask flags + * @buf: buf to fill + * @buf_size: size of the buf + * Return: 0 on success or negative error code */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -697,7 +705,8 @@ union bpf_attr { FN(redirect_map), \ FN(sk_redirect_map), \ FN(sock_map_update), \ - FN(xdp_adjust_meta), + FN(xdp_adjust_meta), \ + FN(perf_event_read_value), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -741,7 +750,9 @@ enum bpf_func_id { #define BPF_F_ZERO_CSUM_TX (1ULL << 1) #define BPF_F_DONT_FRAGMENT (1ULL << 2) -/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */ +/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and + * BPF_FUNC_perf_event_read_value flags. + */ #define BPF_F_INDEX_MASK 0xffffffffULL #define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK /* BPF_FUNC_perf_event_output for sk_buff input context. */ @@ -934,4 +945,10 @@ enum { #define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ #define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */ +struct bpf_perf_event_value { + __u64 counter; + __u64 enabled; + __u64 running; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From 4bebdc7a85aa400c0222b5329861e4ad9252f1e5 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 5 Oct 2017 09:19:22 -0700 Subject: bpf: add helper bpf_perf_prog_read_value This patch adds helper bpf_perf_prog_read_cvalue for perf event based bpf programs, to read event counter and enabled/running time. The enabled/running time is accumulated since the perf event open. The typical use case for perf event based bpf program is to attach itself to a single event. In such cases, if it is desirable to get scaling factor between two bpf invocations, users can can save the time values in a map, and use the value from the map and the current value to calculate the scaling factor. Signed-off-by: Yonghong Song Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7b57a212c7d7..5bbbec17aa5a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -649,6 +649,13 @@ union bpf_attr { * @buf: buf to fill * @buf_size: size of the buf * Return: 0 on success or negative error code + * + * int bpf_perf_prog_read_value(ctx, buf, buf_size) + * read perf prog attached perf event counter and enabled/running time + * @ctx: pointer to ctx + * @buf: buf to fill + * @buf_size: size of the buf + * Return : 0 on success or negative error code */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -706,7 +713,8 @@ union bpf_attr { FN(sk_redirect_map), \ FN(sock_map_update), \ FN(xdp_adjust_meta), \ - FN(perf_event_read_value), + FN(perf_event_read_value), \ + FN(perf_prog_read_value), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From 067cae47771c864604969fd902efe10916e0d79c Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 5 Oct 2017 21:52:12 -0700 Subject: bpf: Use char in prog and map name Instead of u8, use char for prog and map name. It can avoid the userspace tool getting compiler's signess warning. The bpf_prog_aux, bpf_map, bpf_attr, bpf_prog_info and bpf_map_info are changed. Signed-off-by: Martin KaFai Lau Cc: Jakub Kicinski Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 5bbbec17aa5a..6db9e1d679cd 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -230,7 +230,7 @@ union bpf_attr { __u32 numa_node; /* numa node (effective only if * BPF_F_NUMA_NODE is set). */ - __u8 map_name[BPF_OBJ_NAME_LEN]; + char map_name[BPF_OBJ_NAME_LEN]; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -253,7 +253,7 @@ union bpf_attr { __aligned_u64 log_buf; /* user supplied buffer */ __u32 kern_version; /* checked when prog_type=kprobe */ __u32 prog_flags; - __u8 prog_name[BPF_OBJ_NAME_LEN]; + char prog_name[BPF_OBJ_NAME_LEN]; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -888,7 +888,7 @@ struct bpf_prog_info { __u32 created_by_uid; __u32 nr_map_ids; __aligned_u64 map_ids; - __u8 name[BPF_OBJ_NAME_LEN]; + char name[BPF_OBJ_NAME_LEN]; } __attribute__((aligned(8))); struct bpf_map_info { @@ -898,7 +898,7 @@ struct bpf_map_info { __u32 value_size; __u32 max_entries; __u32 map_flags; - __u8 name[BPF_OBJ_NAME_LEN]; + char name[BPF_OBJ_NAME_LEN]; } __attribute__((aligned(8))); /* User bpf_sock_ops struct to access socket values and specify request ops -- cgit v1.2.3 From 6710e1126934d8b4372b4d2f9ae1646cd3f151bf Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Mon, 16 Oct 2017 12:19:28 +0200 Subject: bpf: introduce new bpf cpu map type BPF_MAP_TYPE_CPUMAP The 'cpumap' is primarily used as a backend map for XDP BPF helper call bpf_redirect_map() and XDP_REDIRECT action, like 'devmap'. This patch implement the main part of the map. It is not connected to the XDP redirect system yet, and no SKB allocation are done yet. The main concern in this patch is to ensure the datapath can run without any locking. This adds complexity to the setup and tear-down procedure, which assumptions are extra carefully documented in the code comments. V2: - make sure array isn't larger than NR_CPUS - make sure CPUs added is a valid possible CPU V3: fix nitpicks from Jakub Kicinski V5: - Restrict map allocation to root / CAP_SYS_ADMIN - WARN_ON_ONCE if queue is not empty on tear-down - Return -EPERM on memlock limit instead of -ENOMEM - Error code in __cpu_map_entry_alloc() also handle ptr_ring_cleanup() - Moved cpu_map_enqueue() to next patch V6: all notice by Daniel Borkmann - Fix err return code in cpu_map_alloc() introduced in V5 - Move cpu_possible() check after max_entries boundary check - Forbid usage initially in check_map_func_compatibility() V7: - Fix alloc error path spotted by Daniel Borkmann - Did stress test adding+removing CPUs from the map concurrently - Fixed refcnt issue on cpu_map_entry, kthread started too soon - Make sure packets are flushed during tear-down, involved use of rcu_barrier() and kthread_run only exit after queue is empty - Fix alloc error path in __cpu_map_entry_alloc() for ptr_ring V8: - Nitpicking comments and gramma by Edward Cree - Fix missing semi-colon introduced in V7 due to rebasing - Move struct bpf_cpu_map_entry members cpu+map_id to tracepoint patch Signed-off-by: Jesper Dangaard Brouer Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6db9e1d679cd..4303fb6c3817 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -112,6 +112,7 @@ enum bpf_map_type { BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_DEVMAP, BPF_MAP_TYPE_SOCKMAP, + BPF_MAP_TYPE_CPUMAP, }; enum bpf_prog_type { -- cgit v1.2.3 From 6e71b04a82248ccf13a94b85cbc674a9fefe53f5 Mon Sep 17 00:00:00 2001 From: Chenbo Feng Date: Wed, 18 Oct 2017 13:00:22 -0700 Subject: bpf: Add file mode configuration into bpf maps Introduce the map read/write flags to the eBPF syscalls that returns the map fd. The flags is used to set up the file mode when construct a new file descriptor for bpf maps. To not break the backward capability, the f_flags is set to O_RDWR if the flag passed by syscall is 0. Otherwise it should be O_RDONLY or O_WRONLY. When the userspace want to modify or read the map content, it will check the file mode to see if it is allowed to make the change. Signed-off-by: Chenbo Feng Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4303fb6c3817..d83f95ea6a1b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -218,6 +218,10 @@ enum bpf_attach_type { #define BPF_OBJ_NAME_LEN 16U +/* Flags for accessing BPF object */ +#define BPF_F_RDONLY (1U << 3) +#define BPF_F_WRONLY (1U << 4) + union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; /* one of enum bpf_map_type */ @@ -260,6 +264,7 @@ union bpf_attr { struct { /* anonymous struct used by BPF_OBJ_* commands */ __aligned_u64 pathname; __u32 bpf_fd; + __u32 file_flags; }; struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */ @@ -287,6 +292,7 @@ union bpf_attr { __u32 map_id; }; __u32 next_id; + __u32 open_flags; }; struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */ -- cgit v1.2.3 From e6546ef6d86d0fc38e0e84ccae80e641f3fc0087 Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Fri, 20 Oct 2017 11:05:39 -0700 Subject: bpf: add support for BPF_SOCK_OPS_BASE_RTT A congestion control algorithm can make a call to the BPF socket_ops program to request the base RTT. The base RTT can be congestion control dependent and is meant to represent a congestion threshold such that RTTs above it indicate congestion. This is especially useful for flows within a DC where the base RTT is easy to obtain. Being provided a base RTT solves a basic problem in RTT based congestion avoidance algorithms (such as Vegas, NV and BBR). Although it is easy to get the base RTT when the network is not congested, it is very diffcult to do when it is very congested. Newer connections get an inflated value of the base RTT leading to unfariness (newer flows with a larger base RTT get more bandwidth). As a result, RTT based congestion avoidance algorithms tend to update their base RTTs to improve fairness. In very congested networks this can lead to base RTT inflation, reducing the ability of these RTT based congestion control algorithms to prevent congestion. Note that in my experiments with TCP-NV, the base RTT provided can be much larger than the actual hardware RTT. For example, experimenting with hosts within a rack where the hardware RTT is 16-20us, I've used base RTTs up to 150us. The effect of using a larger base RTT is that the congestion avoidance algorithm will allow more queueing. When there are only a few flows the main effect is larger measured RTTs and RPC latencies due to the increased queueing. When there are a lot of flows, a larger base RTT can lead to more congestion and more packet drops. For this case, where the hardware RTT is 20us, a base RTT of 80us produces good results. This patch only introduces BPF_SOCK_OPS_BASE_RTT, a later patch in this set adds support for using it in TCP-NV. Further study and testing is needed before support can be added to other delay based congestion avoidance algorithms. Signed-off-by: Lawrence Brakmo Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d83f95ea6a1b..1aca744c220f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -955,6 +955,13 @@ enum { BPF_SOCK_OPS_NEEDS_ECN, /* If connection's congestion control * needs ECN */ + BPF_SOCK_OPS_BASE_RTT, /* Get base RTT. The correct value is + * based on the path and may be + * dependent on the congestion control + * algorithm. In general it indicates + * a congestion threshold. RTTs above + * this indicate congestion + */ }; #define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ -- cgit v1.2.3 From cd86d1fd21025fdd6daf23d1288da405e7ad0ec6 Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Fri, 20 Oct 2017 11:05:40 -0700 Subject: bpf: Adding helper function bpf_getsockops Adding support for helper function bpf_getsockops to socket_ops BPF programs. This patch only supports TCP_CONGESTION. Signed-off-by: Vlad Vysotsky Acked-by: Lawrence Brakmo Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1aca744c220f..f650346aaa1a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -613,12 +613,22 @@ union bpf_attr { * int bpf_setsockopt(bpf_socket, level, optname, optval, optlen) * Calls setsockopt. Not all opts are available, only those with * integer optvals plus TCP_CONGESTION. - * Supported levels: SOL_SOCKET and IPROTO_TCP + * Supported levels: SOL_SOCKET and IPPROTO_TCP * @bpf_socket: pointer to bpf_socket - * @level: SOL_SOCKET or IPROTO_TCP + * @level: SOL_SOCKET or IPPROTO_TCP * @optname: option name * @optval: pointer to option value - * @optlen: length of optval in byes + * @optlen: length of optval in bytes + * Return: 0 or negative error + * + * int bpf_getsockopt(bpf_socket, level, optname, optval, optlen) + * Calls getsockopt. Not all opts are available. + * Supported levels: IPPROTO_TCP + * @bpf_socket: pointer to bpf_socket + * @level: IPPROTO_TCP + * @optname: option name + * @optval: pointer to option value + * @optlen: length of optval in bytes * Return: 0 or negative error * * int bpf_skb_adjust_room(skb, len_diff, mode, flags) @@ -721,7 +731,8 @@ union bpf_attr { FN(sock_map_update), \ FN(xdp_adjust_meta), \ FN(perf_event_read_value), \ - FN(perf_prog_read_value), + FN(perf_prog_read_value), \ + FN(getsockopt), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From bfa640757e9378c2f26867e723f1287e94f5a7ad Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 27 Oct 2017 09:45:53 -0700 Subject: bpf: rename sk_actions to align with bpf infrastructure Recent additions to support multiple programs in cgroups impose a strict requirement, "all yes is yes, any no is no". To enforce this the infrastructure requires the 'no' return code, SK_DROP in this case, to be 0. To apply these rules to SK_SKB program types the sk_actions return codes need to be adjusted. This fix adds SK_PASS and makes 'SK_DROP = 0'. Finally, remove SK_ABORTED to remove any chance that the API may allow aborted program flows to be passed up the stack. This would be incorrect behavior and allow programs to break existing policies. Signed-off-by: John Fastabend Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f90860d1f897..0d7948ce2128 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -575,7 +575,7 @@ union bpf_attr { * @map: pointer to sockmap * @key: key to lookup sock in map * @flags: reserved for future use - * Return: SK_REDIRECT + * Return: SK_PASS * * int bpf_sock_map_update(skops, map, key, flags) * @skops: pointer to bpf_sock_ops @@ -786,8 +786,8 @@ struct xdp_md { }; enum sk_action { - SK_ABORTED = 0, - SK_DROP, + SK_DROP = 0, + SK_PASS, SK_REDIRECT, }; -- cgit v1.2.3 From 04686ef299db5ff299bfc5a4504b189e46842078 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 31 Oct 2017 19:17:31 -0700 Subject: bpf: remove SK_REDIRECT from UAPI Now that SK_REDIRECT is no longer a valid return code. Remove it from the UAPI completely. Then do a namespace remapping internal to sockmap so SK_REDIRECT is no longer externally visible. Patchs primary change is to do a namechange from SK_REDIRECT to __SK_REDIRECT Reported-by: Alexei Starovoitov Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0d7948ce2128..7bf4c750dd3a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -788,7 +788,6 @@ struct xdp_md { enum sk_action { SK_DROP = 0, SK_PASS, - SK_REDIRECT, }; #define BPF_TAG_SIZE 8 -- cgit v1.2.3 From e2be04c7f9958dde770eeb8b30e829ca969b37bb Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 1 Nov 2017 15:09:13 +0100 Subject: License cleanup: add SPDX license identifier to uapi header files with a license Many user space API headers have licensing information, which is either incomplete, badly formatted or just a shorthand for referring to the license under which the file is supposed to be. This makes it hard for compliance tools to determine the correct license. Update these files with an SPDX license identifier. The identifier was chosen based on the license information in the file. GPL/LGPL licensed headers get the matching GPL/LGPL SPDX license identifier with the added 'WITH Linux-syscall-note' exception, which is the officially assigned exception identifier for the kernel syscall exception: NOTE! This copyright does *not* cover user programs that use kernel services by normal system calls - this is merely considered normal use of the kernel, and does *not* fall under the heading of "derived work". This exception makes it possible to include GPL headers into non GPL code, without confusing license compliance tools. Headers which have either explicit dual licensing or are just licensed under a non GPL license are updated with the corresponding SPDX identifier and the GPLv2 with syscall exception identifier. The format is: ((GPL-2.0 WITH Linux-syscall-note) OR SPDX-ID-OF-OTHER-LICENSE) SPDX license identifiers are a legally binding shorthand, which can be used instead of the full boiler plate text. The update does not remove existing license information as this has to be done on a case by case basis and the copyright holders might have to be consulted. This will happen in a separate step. This patch is based on work done by Thomas Gleixner and Kate Stewart and Philippe Ombredanne. See the previous patch in this series for the methodology of how this patch was researched. Reviewed-by: Kate Stewart Reviewed-by: Philippe Ombredanne Reviewed-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f90860d1f897..8012b4ff959b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com * * This program is free software; you can redistribute it and/or -- cgit v1.2.3 From ab3f0063c48c26c927851b6767824e35a716d878 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 3 Nov 2017 13:56:17 -0700 Subject: bpf: offload: add infrastructure for loading programs for a specific netdev The fact that we don't know which device the program is going to be used on is quite limiting in current eBPF infrastructure. We have to reverse or limit the changes which kernel makes to the loaded bytecode if we want it to be offloaded to a networking device. We also have to invent new APIs for debugging and troubleshooting support. Make it possible to load programs for a specific netdev. This helps us to bring the debug information closer to the core eBPF infrastructure (e.g. we will be able to reuse the verifer log in device JIT). It allows device JITs to perform translation on the original bytecode. __bpf_prog_get() when called to get a reference for an attachment point will now refuse to give it if program has a device assigned. Following patches will add a version of that function which passes the expected netdev in. @type argument in __bpf_prog_get() is renamed to attach_type to make it clearer that it's only set on attachment. All calls to ndo_bpf are protected by rtnl, only verifier callbacks are not. We need a wait queue to make sure netdev doesn't get destroyed while verifier is still running and calling its driver. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a9820677c2ff..80d191a93fb0 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -260,6 +260,7 @@ union bpf_attr { __u32 kern_version; /* checked when prog_type=kprobe */ __u32 prog_flags; char prog_name[BPF_OBJ_NAME_LEN]; + __u32 prog_target_ifindex; /* ifindex of netdev to prep for */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ -- cgit v1.2.3 From bd601b6ada11fdfb9e277f24ad2eb54bc599156b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 3 Nov 2017 13:56:18 -0700 Subject: bpf: report offload info to user space Extend struct bpf_prog_info to contain information about program being bound to a device. Since the netdev may get destroyed while program still exists we need a flag to indicate the program is loaded for a device, even if the device is gone. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 80d191a93fb0..4455dd195201 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -895,6 +895,10 @@ enum sk_action { #define BPF_TAG_SIZE 8 +enum bpf_prog_status { + BPF_PROG_STATUS_DEV_BOUND = (1 << 0), +}; + struct bpf_prog_info { __u32 type; __u32 id; @@ -908,6 +912,8 @@ struct bpf_prog_info { __u32 nr_map_ids; __aligned_u64 map_ids; char name[BPF_OBJ_NAME_LEN]; + __u32 ifindex; + __u32 status; } __attribute__((aligned(8))); struct bpf_map_info { -- cgit v1.2.3 From ebc614f687369f9df99828572b1d85a7c2de3d92 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Sun, 5 Nov 2017 08:15:32 -0500 Subject: bpf, cgroup: implement eBPF-based device controller for cgroup v2 Cgroup v2 lacks the device controller, provided by cgroup v1. This patch adds a new eBPF program type, which in combination of previously added ability to attach multiple eBPF programs to a cgroup, will provide a similar functionality, but with some additional flexibility. This patch introduces a BPF_PROG_TYPE_CGROUP_DEVICE program type. A program takes major and minor device numbers, device type (block/character) and access type (mknod/read/write) as parameters and returns an integer which defines if the operation should be allowed or terminated with -EPERM. Signed-off-by: Roman Gushchin Acked-by: Alexei Starovoitov Acked-by: Tejun Heo Cc: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4455dd195201..e880ae6434ee 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -132,6 +132,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_LWT_XMIT, BPF_PROG_TYPE_SOCK_OPS, BPF_PROG_TYPE_SK_SKB, + BPF_PROG_TYPE_CGROUP_DEVICE, }; enum bpf_attach_type { @@ -141,6 +142,7 @@ enum bpf_attach_type { BPF_CGROUP_SOCK_OPS, BPF_SK_SKB_STREAM_PARSER, BPF_SK_SKB_STREAM_VERDICT, + BPF_CGROUP_DEVICE, __MAX_BPF_ATTACH_TYPE }; @@ -991,4 +993,17 @@ struct bpf_perf_event_value { __u64 running; }; +#define BPF_DEVCG_ACC_MKNOD (1ULL << 0) +#define BPF_DEVCG_ACC_READ (1ULL << 1) +#define BPF_DEVCG_ACC_WRITE (1ULL << 2) + +#define BPF_DEVCG_DEV_BLOCK (1ULL << 0) +#define BPF_DEVCG_DEV_CHAR (1ULL << 1) + +struct bpf_cgroup_dev_ctx { + __u32 access_type; /* (access << 16) | type */ + __u32 major; + __u32 minor; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From dd0bb688eaa241b5655d396d45366cba9225aed9 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 7 Nov 2017 15:28:42 -0500 Subject: bpf: add a bpf_override_function helper Error injection is sloppy and very ad-hoc. BPF could fill this niche perfectly with it's kprobe functionality. We could make sure errors are only triggered in specific call chains that we care about with very specific situations. Accomplish this with the bpf_override_funciton helper. This will modify the probe'd callers return value to the specified value and set the PC to an override function that simply returns, bypassing the originally probed function. This gives us a nice clean way to implement systematic error injection for all of our code paths. Acked-by: Alexei Starovoitov Signed-off-by: Josef Bacik Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e880ae6434ee..adb66f78b674 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -677,6 +677,10 @@ union bpf_attr { * @buf: buf to fill * @buf_size: size of the buf * Return : 0 on success or negative error code + * + * int bpf_override_return(pt_regs, rc) + * @pt_regs: pointer to struct pt_regs + * @rc: the return value to set */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -736,7 +740,8 @@ union bpf_attr { FN(xdp_adjust_meta), \ FN(perf_event_read_value), \ FN(perf_prog_read_value), \ - FN(getsockopt), + FN(getsockopt), \ + FN(override_return), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From f3edacbd697f94a743fff1a3d26910ab99948ba7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 11 Nov 2017 18:24:55 +0900 Subject: bpf: Revert bpf_overrid_function() helper changes. NACK'd by x86 maintainer. Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index adb66f78b674..e880ae6434ee 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -677,10 +677,6 @@ union bpf_attr { * @buf: buf to fill * @buf_size: size of the buf * Return : 0 on success or negative error code - * - * int bpf_override_return(pt_regs, rc) - * @pt_regs: pointer to struct pt_regs - * @rc: the return value to set */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -740,8 +736,7 @@ union bpf_attr { FN(xdp_adjust_meta), \ FN(perf_event_read_value), \ FN(perf_prog_read_value), \ - FN(getsockopt), \ - FN(override_return), + FN(getsockopt), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From 1f6f4cb7ba219b00a3fa9afe8049fa16444d8b52 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 20 Nov 2017 15:21:53 -0800 Subject: bpf: offload: rename the ifindex field bpf_target_prog seems long and clunky, rename it to prog_ifindex. We don't want to call this field just ifindex, because maps may need a similar field in the future and bpf_attr members for programs and maps are unnamed. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e880ae6434ee..3f626df42516 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -262,7 +262,7 @@ union bpf_attr { __u32 kern_version; /* checked when prog_type=kprobe */ __u32 prog_flags; char prog_name[BPF_OBJ_NAME_LEN]; - __u32 prog_target_ifindex; /* ifindex of netdev to prep for */ + __u32 prog_ifindex; /* ifindex of netdev to prep for */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ -- cgit v1.2.3 From 1ee640095f049e7ac4ec36b985abada497b98cc2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 20 Nov 2017 15:21:59 -0800 Subject: bpf: revert report offload info to user space This reverts commit bd601b6ada11 ("bpf: report offload info to user space"). The ifindex by itself is not sufficient, we should provide information on which network namespace this ifindex belongs to. After considering some options we concluded that it's best to just remove this API for now, and rework it in -next. Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3f626df42516..4c223ab30293 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -897,10 +897,6 @@ enum sk_action { #define BPF_TAG_SIZE 8 -enum bpf_prog_status { - BPF_PROG_STATUS_DEV_BOUND = (1 << 0), -}; - struct bpf_prog_info { __u32 type; __u32 id; @@ -914,8 +910,6 @@ struct bpf_prog_info { __u32 nr_map_ids; __aligned_u64 map_ids; char name[BPF_OBJ_NAME_LEN]; - __u32 ifindex; - __u32 status; } __attribute__((aligned(8))); struct bpf_map_info { -- cgit v1.2.3