From 78958fca7ead2f81b60a6827881c4866d1ed0c52 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 4 May 2018 14:49:51 -0700 Subject: bpf: btf: Introduce BTF ID This patch gives an ID to each loaded BTF. The ID is allocated by the idr like the existing prog-id and map-id. The bpf_put(map->btf) is moved to __bpf_map_put() so that the userspace can stop seeing the BTF ID ASAP when the last BTF refcnt is gone. It also makes BTF accessible from userspace through the 1. new BPF_BTF_GET_FD_BY_ID command. It is limited to CAP_SYS_ADMIN which is inline with the BPF_BTF_LOAD cmd and the existing BPF_[MAP|PROG]_GET_FD_BY_ID cmd. 2. new btf_id (and btf_key_id + btf_value_id) in "struct bpf_map_info" Once the BTF ID handler is accessible from userspace, freeing a BTF object has to go through a rcu period. The BPF_BTF_GET_FD_BY_ID cmd can then be done under a rcu_read_lock() instead of taking spin_lock. [Note: A similar rcu usage can be done to the existing bpf_prog_get_fd_by_id() in a follow up patch] When processing the BPF_BTF_GET_FD_BY_ID cmd, refcount_inc_not_zero() is needed because the BTF object could be already in the rcu dead row . btf_get() is removed since its usage is currently limited to btf.c alone. refcount_inc() is used directly instead. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- include/linux/btf.h | 2 ++ include/uapi/linux/bpf.h | 5 +++++ 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/btf.h b/include/linux/btf.h index a966dc6d61ee..e076c4697049 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -44,5 +44,7 @@ const struct btf_type *btf_type_id_size(const struct btf *btf, u32 *ret_size); void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj, struct seq_file *m); +int btf_get_fd_by_id(u32 id); +u32 btf_id(const struct btf *btf); #endif diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 93d5a4eeec2a..6106f23a9a8a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -96,6 +96,7 @@ enum bpf_cmd { BPF_PROG_QUERY, BPF_RAW_TRACEPOINT_OPEN, BPF_BTF_LOAD, + BPF_BTF_GET_FD_BY_ID, }; enum bpf_map_type { @@ -344,6 +345,7 @@ union bpf_attr { __u32 start_id; __u32 prog_id; __u32 map_id; + __u32 btf_id; }; __u32 next_id; __u32 open_flags; @@ -2130,6 +2132,9 @@ struct bpf_map_info { __u32 ifindex; __u64 netns_dev; __u64 netns_ino; + __u32 btf_id; + __u32 btf_key_id; + __u32 btf_value_id; } __attribute__((aligned(8))); /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed -- cgit v1.2.3 From 62dab84c81a487d946a5fc37c6df541dd95cca38 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 4 May 2018 14:49:52 -0700 Subject: bpf: btf: Add struct bpf_btf_info During BPF_OBJ_GET_INFO_BY_FD on a btf_fd, the current bpf_attr's info.info is directly filled with the BTF binary data. It is not extensible. In this case, we want to add BTF ID. This patch adds "struct bpf_btf_info" which has the BTF ID as one of its member. The BTF binary data itself is exposed through the "btf" and "btf_size" members. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6106f23a9a8a..d615c777b573 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2137,6 +2137,12 @@ struct bpf_map_info { __u32 btf_value_id; } __attribute__((aligned(8))); +struct bpf_btf_info { + __aligned_u64 btf; + __u32 btf_size; + __u32 id; +} __attribute__((aligned(8))); + /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed * by user and intended to be used by socket (e.g. to bind to, depends on * attach attach type). -- cgit v1.2.3 From 0d8300325660f81787892a1c58dc1f9428a67143 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 8 May 2018 19:37:06 -0700 Subject: bpf: xdp: allow offloads to store into rx_queue_index It's fairly easy for offloaded XDP programs to select the RX queue packets go to. We need a way of expressing this in the software. Allow write to the rx_queue_index field of struct xdp_md for device-bound programs. Skip convert_ctx_access callback entirely for offloads. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 321969da67b7..a38e474bf7ee 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -627,7 +627,7 @@ bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map); #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr); -static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux) +static inline bool bpf_prog_is_dev_bound(const struct bpf_prog_aux *aux) { return aux->offload_requested; } -- cgit v1.2.3 From 6454743bc13e7dfd4f2720758ca3fcdea76b82a4 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 9 May 2018 20:34:19 -0700 Subject: net/ipv6: Rename fib6_lookup to fib6_node_lookup Rename fib6_lookup to fib6_node_lookup to better reflect what it returns. The fib6_lookup name will be used in a later patch for an IPv6 equivalent to IPv4's fib_lookup. Signed-off-by: David Ahern Acked-by: David S. Miller Signed-off-by: Daniel Borkmann --- include/net/ip6_fib.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index a3ec08d05756..43ab545e64ea 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -376,9 +376,9 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, const struct sk_buff *skb, int flags, pol_lookup_t lookup); -struct fib6_node *fib6_lookup(struct fib6_node *root, - const struct in6_addr *daddr, - const struct in6_addr *saddr); +struct fib6_node *fib6_node_lookup(struct fib6_node *root, + const struct in6_addr *daddr, + const struct in6_addr *saddr); struct fib6_node *fib6_locate(struct fib6_node *root, const struct in6_addr *daddr, int dst_len, -- cgit v1.2.3 From 3b290a31bbc5969f9193f73d547a6dc8a25c6f9e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 9 May 2018 20:34:20 -0700 Subject: net/ipv6: Rename rt6_multipath_select Rename rt6_multipath_select to fib6_multipath_select and export it. A later patch wants access to it similar to IPv4's fib_select_path. Signed-off-by: David Ahern Acked-by: David S. Miller Signed-off-by: Daniel Borkmann --- include/net/ip6_fib.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 43ab545e64ea..2597d8fdd92f 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -376,6 +376,11 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, const struct sk_buff *skb, int flags, pol_lookup_t lookup); +struct fib6_info *fib6_multipath_select(const struct net *net, + struct fib6_info *match, + struct flowi6 *fl6, int oif, + const struct sk_buff *skb, int strict); + struct fib6_node *fib6_node_lookup(struct fib6_node *root, const struct in6_addr *daddr, const struct in6_addr *saddr); -- cgit v1.2.3 From 1d053da910947afccec96d90892c0f5488c7a9cf Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 9 May 2018 20:34:21 -0700 Subject: net/ipv6: Extract table lookup from ip6_pol_route ip6_pol_route is used for ingress and egress FIB lookups. Refactor it moving the table lookup into a separate fib6_table_lookup that can be invoked separately and export the new function. ip6_pol_route now calls fib6_table_lookup and uses the result to generate a dst based rt6_info. Signed-off-by: David Ahern Acked-by: David S. Miller Signed-off-by: Daniel Borkmann --- include/net/ip6_fib.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 2597d8fdd92f..c70705f2647a 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -376,6 +376,10 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, const struct sk_buff *skb, int flags, pol_lookup_t lookup); +/* called with rcu lock held; caller needs to select path */ +struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table, + int oif, struct flowi6 *fl6, int strict); + struct fib6_info *fib6_multipath_select(const struct net *net, struct fib6_info *match, struct flowi6 *fl6, int oif, -- cgit v1.2.3 From 138118ec96cbfc303c1d7cc05fbb2caf8382c95b Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 9 May 2018 20:34:23 -0700 Subject: net/ipv6: Add fib6_lookup Add IPv6 equivalent to fib_lookup. Does a fib lookup, including rules, but returns a FIB entry, fib6_info, rather than a dst based rt6_info. fib6_lookup is any where from 140% (MULTIPLE_TABLES config disabled) to 60% faster than any of the dst based lookup methods (without custom rules) and 25% faster with custom rules (e.g., l3mdev rule). Since the lookup function has a completely different signature, fib6_rule_action is split into 2 paths: the existing one is renamed __fib6_rule_action and a new one for the fib6_info path is added. fib6_rule_action decides which to call based on the lookup_ptr. If it is fib6_table_lookup then the new path is taken. Caller must hold rcu lock as no reference is taken on the returned fib entry. Signed-off-by: David Ahern Acked-by: David S. Miller Signed-off-by: Daniel Borkmann --- include/net/ip6_fib.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index c70705f2647a..cc70f6da8462 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -376,6 +376,12 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, const struct sk_buff *skb, int flags, pol_lookup_t lookup); +/* called with rcu lock held; can return error pointer + * caller needs to select path + */ +struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6, + int flags); + /* called with rcu lock held; caller needs to select path */ struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table, int oif, struct flowi6 *fl6, int strict); -- cgit v1.2.3 From d4bea421f7322400d804c2284739e42e61f78349 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 9 May 2018 20:34:24 -0700 Subject: net/ipv6: Update fib6 tracepoint to take fib6_info Similar to IPv4, IPv6 should use the FIB lookup result in the tracepoint. Signed-off-by: David Ahern Acked-by: David S. Miller Signed-off-by: Daniel Borkmann --- include/trace/events/fib6.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/trace/events/fib6.h b/include/trace/events/fib6.h index 7e8d48a81b91..1b8d951e3c12 100644 --- a/include/trace/events/fib6.h +++ b/include/trace/events/fib6.h @@ -12,10 +12,10 @@ TRACE_EVENT(fib6_table_lookup, - TP_PROTO(const struct net *net, const struct rt6_info *rt, + TP_PROTO(const struct net *net, const struct fib6_info *f6i, struct fib6_table *table, const struct flowi6 *flp), - TP_ARGS(net, rt, table, flp), + TP_ARGS(net, f6i, table, flp), TP_STRUCT__entry( __field( u32, tb_id ) @@ -48,20 +48,20 @@ TRACE_EVENT(fib6_table_lookup, in6 = (struct in6_addr *)__entry->dst; *in6 = flp->daddr; - if (rt->rt6i_idev) { - __assign_str(name, rt->rt6i_idev->dev->name); + if (f6i->fib6_nh.nh_dev) { + __assign_str(name, f6i->fib6_nh.nh_dev); } else { __assign_str(name, ""); } - if (rt == net->ipv6.ip6_null_entry) { + if (f6i == net->ipv6.fib6_null_entry) { struct in6_addr in6_zero = {}; in6 = (struct in6_addr *)__entry->gw; *in6 = in6_zero; - } else if (rt) { + } else if (f6i) { in6 = (struct in6_addr *)__entry->gw; - *in6 = rt->rt6i_gateway; + *in6 = f6i->fib6_nh.nh_gw; } ), -- cgit v1.2.3 From 65a2022e89a4760f9702837e2d9d15a39a9c68a3 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 9 May 2018 20:34:25 -0700 Subject: net/ipv6: Add fib lookup stubs for use in bpf helper Add stubs to retrieve a handle to an IPv6 FIB table, fib6_get_table, a stub to do a lookup in a specific table, fib6_table_lookup, and a stub for a full route lookup. The stubs are needed for core bpf code to handle the case when the IPv6 module is not builtin. Signed-off-by: David Ahern Acked-by: David S. Miller Signed-off-by: Daniel Borkmann --- include/net/addrconf.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 8312cc25a3af..ff766ab207e0 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -223,6 +223,20 @@ struct ipv6_stub { const struct in6_addr *addr); int (*ipv6_dst_lookup)(struct net *net, struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6); + + struct fib6_table *(*fib6_get_table)(struct net *net, u32 id); + struct fib6_info *(*fib6_lookup)(struct net *net, int oif, + struct flowi6 *fl6, int flags); + struct fib6_info *(*fib6_table_lookup)(struct net *net, + struct fib6_table *table, + int oif, struct flowi6 *fl6, + int flags); + struct fib6_info *(*fib6_multipath_select)(const struct net *net, + struct fib6_info *f6i, + struct flowi6 *fl6, int oif, + const struct sk_buff *skb, + int strict); + void (*udpv6_encap_enable)(void); void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr, const struct in6_addr *solicited_addr, -- cgit v1.2.3 From 87f5fc7e48dd3175b30dd03b41564e1a8e136323 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 9 May 2018 20:34:26 -0700 Subject: bpf: Provide helper to do forwarding lookups in kernel FIB table Provide a helper for doing a FIB and neighbor lookup in the kernel tables from an XDP program. The helper provides a fastpath for forwarding packets. If the packet is a local delivery or for any reason is not a simple lookup and forward, the packet continues up the stack. If it is to be forwarded, the forwarding can be done directly if the neighbor is already known. If the neighbor does not exist, the first few packets go up the stack for neighbor resolution. Once resolved, the xdp program provides the fast path. On successful lookup the nexthop dmac, current device smac and egress device index are returned. The API supports IPv4, IPv6 and MPLS protocols, but only IPv4 and IPv6 are implemented in this patch. The API includes layer 4 parameters if the XDP program chooses to do deep packet inspection to allow compare against ACLs implemented as FIB rules. Header rewrite is left to the XDP program. The lookup takes 2 flags: - BPF_FIB_LOOKUP_DIRECT to do a lookup that bypasses FIB rules and goes straight to the table associated with the device (expert setting for those looking to maximize throughput) - BPF_FIB_LOOKUP_OUTPUT to do a lookup from the egress perspective. Default is an ingress lookup. Initial performance numbers collected by Jesper, forwarded packets/sec: Full stack XDP FIB lookup XDP Direct lookup IPv4 1,947,969 7,074,156 7,415,333 IPv6 1,728,000 6,165,504 7,262,720 These number are single CPU core forwarding on a Broadwell E5-1650 v4 @ 3.60GHz. Signed-off-by: David Ahern Acked-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 81 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 80 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d615c777b573..02e4112510f8 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1828,6 +1828,33 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * + * + * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags) + * Description + * Do FIB lookup in kernel tables using parameters in *params*. + * If lookup is successful and result shows packet is to be + * forwarded, the neighbor tables are searched for the nexthop. + * If successful (ie., FIB lookup shows forwarding and nexthop + * is resolved), the nexthop address is returned in ipv4_dst, + * ipv6_dst or mpls_out based on family, smac is set to mac + * address of egress device, dmac is set to nexthop mac address, + * rt_metric is set to metric from route. + * + * *plen* argument is the size of the passed in struct. + * *flags* argument can be one or more BPF_FIB_LOOKUP_ flags: + * + * **BPF_FIB_LOOKUP_DIRECT** means do a direct table lookup vs + * full lookup using FIB rules + * **BPF_FIB_LOOKUP_OUTPUT** means do lookup from an egress + * perspective (default is ingress) + * + * *ctx* is either **struct xdp_md** for XDP programs or + * **struct sk_buff** tc cls_act programs. + * + * Return + * Egress device index on success, 0 if packet needs to continue + * up the stack for further processing or a negative error in case + * of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -1898,7 +1925,8 @@ union bpf_attr { FN(xdp_adjust_tail), \ FN(skb_get_xfrm_state), \ FN(get_stack), \ - FN(skb_load_bytes_relative), + FN(skb_load_bytes_relative), \ + FN(fib_lookup), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -2321,4 +2349,55 @@ struct bpf_raw_tracepoint_args { __u64 args[0]; }; +/* DIRECT: Skip the FIB rules and go to FIB table associated with device + * OUTPUT: Do lookup from egress perspective; default is ingress + */ +#define BPF_FIB_LOOKUP_DIRECT BIT(0) +#define BPF_FIB_LOOKUP_OUTPUT BIT(1) + +struct bpf_fib_lookup { + /* input */ + __u8 family; /* network family, AF_INET, AF_INET6, AF_MPLS */ + + /* set if lookup is to consider L4 data - e.g., FIB rules */ + __u8 l4_protocol; + __be16 sport; + __be16 dport; + + /* total length of packet from network header - used for MTU check */ + __u16 tot_len; + __u32 ifindex; /* L3 device index for lookup */ + + union { + /* inputs to lookup */ + __u8 tos; /* AF_INET */ + __be32 flowlabel; /* AF_INET6 */ + + /* output: metric of fib result */ + __u32 rt_metric; + }; + + union { + __be32 mpls_in; + __be32 ipv4_src; + __u32 ipv6_src[4]; /* in6_addr; network order */ + }; + + /* input to bpf_fib_lookup, *dst is destination address. + * output: bpf_fib_lookup sets to gateway address + */ + union { + /* return for MPLS lookups */ + __be32 mpls_out[4]; /* support up to 4 labels */ + __be32 ipv4_dst; + __u32 ipv6_dst[4]; /* in6_addr; network order */ + }; + + /* output */ + __be16 h_vlan_proto; + __be16 h_vlan_TCI; + __u8 smac[6]; /* ETH_ALEN */ + __u8 dmac[6]; /* ETH_ALEN */ +}; + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From e5cd3abcb31a48d4ea91bd32f0618802ca5f3592 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 14 May 2018 10:00:16 -0700 Subject: bpf: sockmap, refactor sockmap routines to work with hashmap This patch only refactors the existing sockmap code. This will allow much of the psock initialization code path and bpf helper codes to work for both sockmap bpf map types that are backed by an array, the currently supported type, and the new hash backed bpf map type sockhash. Most the fallout comes from three changes, - Pushing bpf programs into an independent structure so we can use it from the htab struct in the next patch. - Generalizing helpers to use void *key instead of the hardcoded u32. - Instead of passing map/key through the metadata we now do the lookup inline. This avoids storing the key in the metadata which will be useful when keys can be longer than 4 bytes. We rename the sk pointers to sk_redir at this point as well to avoid any confusion between the current sk pointer and the redirect pointer sk_redir. Signed-off-by: John Fastabend Acked-by: David S. Miller Signed-off-by: Daniel Borkmann --- include/linux/filter.h | 3 +-- include/net/tcp.h | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index da7e16523128..9dbcb9d55921 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -515,9 +515,8 @@ struct sk_msg_buff { int sg_end; struct scatterlist sg_data[MAX_SKB_FRAGS]; bool sg_copy[MAX_SKB_FRAGS]; - __u32 key; __u32 flags; - struct bpf_map *map; + struct sock *sk_redir; struct sk_buff *skb; struct list_head list; }; diff --git a/include/net/tcp.h b/include/net/tcp.h index cf803fe0fb86..059287374ba0 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -814,9 +814,8 @@ struct tcp_skb_cb { #endif } header; /* For incoming skbs */ struct { - __u32 key; __u32 flags; - struct bpf_map *map; + struct sock *sk_redir; void *data_end; } bpf; }; -- cgit v1.2.3 From 81110384441a59cff47430f20f049e69b98c17f4 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 14 May 2018 10:00:17 -0700 Subject: bpf: sockmap, add hash map support Sockmap is currently backed by an array and enforces keys to be four bytes. This works well for many use cases and was originally modeled after devmap which also uses four bytes keys. However, this has become limiting in larger use cases where a hash would be more appropriate. For example users may want to use the 5-tuple of the socket as the lookup key. To support this add hash support. Signed-off-by: John Fastabend Acked-by: David S. Miller Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 8 +++++++ include/linux/bpf_types.h | 1 + include/uapi/linux/bpf.h | 54 +++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 61 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a38e474bf7ee..ed0122b45b63 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -668,6 +668,7 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map) #if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_INET) struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key); +struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key); int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type); #else static inline struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key) @@ -675,6 +676,12 @@ static inline struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key) return NULL; } +static inline struct sock *__sock_hash_lookup_elem(struct bpf_map *map, + void *key) +{ + return NULL; +} + static inline int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type) @@ -724,6 +731,7 @@ extern const struct bpf_func_proto bpf_get_current_comm_proto; extern const struct bpf_func_proto bpf_get_stackid_proto; extern const struct bpf_func_proto bpf_get_stack_proto; extern const struct bpf_func_proto bpf_sock_map_update_proto; +extern const struct bpf_func_proto bpf_sock_hash_update_proto; /* Shared helpers among cBPF and eBPF. */ void bpf_user_rnd_init_once(void); diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index d7df1b323082..b67f8793de0d 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -47,6 +47,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) #if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_INET) BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops) #if defined(CONFIG_XDP_SOCKETS) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 02e4112510f8..d94d333a8225 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -118,6 +118,7 @@ enum bpf_map_type { BPF_MAP_TYPE_SOCKMAP, BPF_MAP_TYPE_CPUMAP, BPF_MAP_TYPE_XSKMAP, + BPF_MAP_TYPE_SOCKHASH, }; enum bpf_prog_type { @@ -1828,7 +1829,6 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags) * Description * Do FIB lookup in kernel tables using parameters in *params*. @@ -1855,6 +1855,53 @@ union bpf_attr { * Egress device index on success, 0 if packet needs to continue * up the stack for further processing or a negative error in case * of failure. + * + * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags) + * Description + * Add an entry to, or update a sockhash *map* referencing sockets. + * The *skops* is used as a new value for the entry associated to + * *key*. *flags* is one of: + * + * **BPF_NOEXIST** + * The entry for *key* must not exist in the map. + * **BPF_EXIST** + * The entry for *key* must already exist in the map. + * **BPF_ANY** + * No condition on the existence of the entry for *key*. + * + * If the *map* has eBPF programs (parser and verdict), those will + * be inherited by the socket being added. If the socket is + * already attached to eBPF programs, this results in an error. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags) + * Description + * This helper is used in programs implementing policies at the + * socket level. If the message *msg* is allowed to pass (i.e. if + * the verdict eBPF program returns **SK_PASS**), redirect it to + * the socket referenced by *map* (of type + * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and + * egress interfaces can be used for redirection. The + * **BPF_F_INGRESS** value in *flags* is used to make the + * distinction (ingress path is selected if the flag is present, + * egress path otherwise). This is the only flag supported for now. + * Return + * **SK_PASS** on success, or **SK_DROP** on error. + * + * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags) + * Description + * This helper is used in programs implementing policies at the + * skb socket level. If the sk_buff *skb* is allowed to pass (i.e. + * if the verdeict eBPF program returns **SK_PASS**), redirect it + * to the socket referenced by *map* (of type + * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and + * egress interfaces can be used for redirection. The + * **BPF_F_INGRESS** value in *flags* is used to make the + * distinction (ingress path is selected if the flag is present, + * egress otherwise). This is the only flag supported for now. + * Return + * **SK_PASS** on success, or **SK_DROP** on error. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -1926,7 +1973,10 @@ union bpf_attr { FN(skb_get_xfrm_state), \ FN(get_stack), \ FN(skb_load_bytes_relative), \ - FN(fib_lookup), + FN(fib_lookup), \ + FN(sock_hash_update), \ + FN(msg_redirect_hash), \ + FN(sk_redirect_hash), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From be2d04d11fd33bd46622f94619aae1596d9f9303 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Wed, 16 May 2018 22:27:41 +0200 Subject: bpf: add __printf verification to bpf_verifier_vlog MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit __printf is useful to verify format and arguments. ‘bpf_verifier_vlog’ function is used twice in verifier.c in both cases the caller function already uses the __printf gcc attribute. Remove the following warning, triggered with W=1: kernel/bpf/verifier.c:176:2: warning: function might be possible candidate for ‘gnu_printf’ format attribute [-Wsuggest-attribute=format] Signed-off-by: Mathieu Malaterre Signed-off-by: Daniel Borkmann --- include/linux/bpf_verifier.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 8f70dc181e23..c286813deaeb 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -200,8 +200,8 @@ struct bpf_verifier_env { u32 subprog_cnt; }; -void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, - va_list args); +__printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log, + const char *fmt, va_list args); __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env, const char *fmt, ...); -- cgit v1.2.3