From bcc7f554cfa7e0ac77c7adc4027c16f4a2f99c6f Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Tue, 23 Jun 2020 16:39:35 +0100 Subject: bpf: Fix formatting in documentation for BPF helpers When producing the bpf-helpers.7 man page from the documentation from the BPF user space header file, rst2man complains: :2636: (ERROR/3) Unexpected indentation. :2640: (WARNING/2) Block quote ends without a blank line; unexpected unindent. Let's fix formatting for the relevant chunk (item list in bpf_ringbuf_query()'s description), and for a couple other functions. Signed-off-by: Quentin Monnet Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200623153935.6215-1-quentin@isovalent.com --- include/uapi/linux/bpf.h | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 974a71342aea..8bd33050b7bb 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3171,13 +3171,12 @@ union bpf_attr { * int bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags) * Description * Copy *size* bytes from *data* into a ring buffer *ringbuf*. - * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of - * new data availability is sent. - * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of - * new data availability is sent unconditionally. + * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification + * of new data availability is sent. + * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification + * of new data availability is sent unconditionally. * Return - * 0, on success; - * < 0, on error. + * 0 on success, or a negative error in case of failure. * * void *bpf_ringbuf_reserve(void *ringbuf, u64 size, u64 flags) * Description @@ -3189,20 +3188,20 @@ union bpf_attr { * void bpf_ringbuf_submit(void *data, u64 flags) * Description * Submit reserved ring buffer sample, pointed to by *data*. - * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of - * new data availability is sent. - * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of - * new data availability is sent unconditionally. + * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification + * of new data availability is sent. + * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification + * of new data availability is sent unconditionally. * Return * Nothing. Always succeeds. * * void bpf_ringbuf_discard(void *data, u64 flags) * Description * Discard reserved ring buffer sample, pointed to by *data*. - * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of - * new data availability is sent. - * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of - * new data availability is sent unconditionally. + * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification + * of new data availability is sent. + * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification + * of new data availability is sent unconditionally. * Return * Nothing. Always succeeds. * @@ -3210,16 +3209,18 @@ union bpf_attr { * Description * Query various characteristics of provided ring buffer. What * exactly is queries is determined by *flags*: - * - BPF_RB_AVAIL_DATA - amount of data not yet consumed; - * - BPF_RB_RING_SIZE - the size of ring buffer; - * - BPF_RB_CONS_POS - consumer position (can wrap around); - * - BPF_RB_PROD_POS - producer(s) position (can wrap around); - * Data returned is just a momentary snapshots of actual values + * + * * **BPF_RB_AVAIL_DATA**: Amount of data not yet consumed. + * * **BPF_RB_RING_SIZE**: The size of ring buffer. + * * **BPF_RB_CONS_POS**: Consumer position (can wrap around). + * * **BPF_RB_PROD_POS**: Producer(s) position (can wrap around). + * + * Data returned is just a momentary snapshot of actual values * and could be inaccurate, so this facility should be used to * power heuristics and for reporting, not to make 100% correct * calculation. * Return - * Requested value, or 0, if flags are not recognized. + * Requested value, or 0, if *flags* are not recognized. * * int bpf_csum_level(struct sk_buff *skb, u64 level) * Description -- cgit v1.2.3 From a9b59159d338d414acaa8e2f569d129d51c76452 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 24 Jun 2020 15:20:39 -0700 Subject: bpf: Do not allow btf_ctx_access with __int128 types To ensure btf_ctx_access() is safe the verifier checks that the BTF arg type is an int, enum, or pointer. When the function does the BTF arg lookup it uses the calculation 'arg = off / 8' using the fact that registers are 8B. This requires that the first arg is in the first reg, the second in the second, and so on. However, for __int128 the arg will consume two registers by default LLVM implementation. So this will cause the arg layout assumed by the 'arg = off / 8' calculation to be incorrect. Because __int128 is uncommon this patch applies the easiest fix and will force int types to be sizeof(u64) or smaller so that they will fit in a single register. v2: remove unneeded parens per Andrii's feedback Fixes: 9e15db66136a1 ("bpf: Implement accurate raw_tp context access via BTF") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/159303723962.11287.13309537171132420717.stgit@john-Precision-5820-Tower --- include/linux/btf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/btf.h b/include/linux/btf.h index 5c1ea99b480f..8b81fbb4497c 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -82,6 +82,11 @@ static inline bool btf_type_is_int(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_INT; } +static inline bool btf_type_is_small_int(const struct btf_type *t) +{ + return btf_type_is_int(t) && t->size <= sizeof(u64); +} + static inline bool btf_type_is_enum(const struct btf_type *t) { return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM; -- cgit v1.2.3 From 3aa91625007807bfca4155df1867a5c924a08662 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 29 Jun 2020 15:03:56 +0200 Subject: dma-mapping: Add a new dma_need_sync API Add a new API to check if calls to dma_sync_single_for_{device,cpu} are required for a given DMA streaming mapping. Signed-off-by: Christoph Hellwig Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200629130359.2690853-2-hch@lst.de --- include/linux/dma-direct.h | 1 + include/linux/dma-mapping.h | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 136f984df0d9..8b006730687b 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -87,4 +87,5 @@ int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); int dma_direct_supported(struct device *dev, u64 mask); +bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr); #endif /* _LINUX_DMA_DIRECT_H */ diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 78f677cf45ab..a33ed3954ed4 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -461,6 +461,7 @@ int dma_set_mask(struct device *dev, u64 mask); int dma_set_coherent_mask(struct device *dev, u64 mask); u64 dma_get_required_mask(struct device *dev); size_t dma_max_mapping_size(struct device *dev); +bool dma_need_sync(struct device *dev, dma_addr_t dma_addr); unsigned long dma_get_merge_boundary(struct device *dev); #else /* CONFIG_HAS_DMA */ static inline dma_addr_t dma_map_page_attrs(struct device *dev, @@ -571,6 +572,10 @@ static inline size_t dma_max_mapping_size(struct device *dev) { return 0; } +static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) +{ + return false; +} static inline unsigned long dma_get_merge_boundary(struct device *dev) { return 0; -- cgit v1.2.3 From 91d5b70273267bbae6f5d1fb4cf3510bd31ef9ff Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 29 Jun 2020 15:03:57 +0200 Subject: xsk: Replace the cheap_dma flag with a dma_need_sync flag Invert the polarity and better name the flag so that the use case is properly documented. Signed-off-by: Christoph Hellwig Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200629130359.2690853-3-hch@lst.de --- include/net/xsk_buff_pool.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index a4ff226505c9..6842990e2712 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -40,7 +40,7 @@ struct xsk_buff_pool { u32 headroom; u32 chunk_size; u32 frame_len; - bool cheap_dma; + bool dma_need_sync; bool unaligned; void *addrs; struct device *dev; @@ -80,7 +80,7 @@ static inline dma_addr_t xp_get_frame_dma(struct xdp_buff_xsk *xskb) void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb); static inline void xp_dma_sync_for_cpu(struct xdp_buff_xsk *xskb) { - if (xskb->pool->cheap_dma) + if (!xskb->pool->dma_need_sync) return; xp_dma_sync_for_cpu_slow(xskb); @@ -91,7 +91,7 @@ void xp_dma_sync_for_device_slow(struct xsk_buff_pool *pool, dma_addr_t dma, static inline void xp_dma_sync_for_device(struct xsk_buff_pool *pool, dma_addr_t dma, size_t size) { - if (pool->cheap_dma) + if (!pool->dma_need_sync) return; xp_dma_sync_for_device_slow(pool, dma, size); -- cgit v1.2.3 From 3b7016996c4c44db5d499d98759b82fb714bb912 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 25 Jun 2020 16:13:54 +0200 Subject: flow_dissector: Pull BPF program assignment up to bpf-netns Prepare for using bpf_prog_array to store attached programs by moving out code that updates the attached program out of flow dissector. Managing bpf_prog_array is more involved than updating a single bpf_prog pointer. This will let us do it all from one place, bpf/net_namespace.c, in the subsequent patch. No functional change intended. Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200625141357.910330-2-jakub@cloudflare.com --- include/net/flow_dissector.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index a7eba43fe4e4..4b6e36288ddd 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -372,7 +372,8 @@ flow_dissector_init_keys(struct flow_dissector_key_control *key_control, } #ifdef CONFIG_BPF_SYSCALL -int flow_dissector_bpf_prog_attach(struct net *net, struct bpf_prog *prog); +int flow_dissector_bpf_prog_attach_check(struct net *net, + struct bpf_prog *prog); #endif /* CONFIG_BPF_SYSCALL */ #endif -- cgit v1.2.3 From 695c12147a40181fe9221d321c3f2de33c9574ed Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 25 Jun 2020 16:13:55 +0200 Subject: bpf, netns: Keep attached programs in bpf_prog_array Prepare for having multi-prog attachments for new netns attach types by storing programs to run in a bpf_prog_array, which is well suited for iterating over programs and running them in sequence. After this change bpf(PROG_QUERY) may block to allocate memory in bpf_prog_array_copy_to_user() for collected program IDs. This forces a change in how we protect access to the attached program in the query callback. Because bpf_prog_array_copy_to_user() can sleep, we switch from an RCU read lock to holding a mutex that serializes updaters. Because we allow only one BPF flow_dissector program to be attached to netns at all times, the bpf_prog_array pointed by net->bpf.run_array is always either detached (null) or one element long. No functional changes intended. Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200625141357.910330-3-jakub@cloudflare.com --- include/net/netns/bpf.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netns/bpf.h b/include/net/netns/bpf.h index a8dce2a380c8..a5015bda9979 100644 --- a/include/net/netns/bpf.h +++ b/include/net/netns/bpf.h @@ -9,9 +9,12 @@ #include struct bpf_prog; +struct bpf_prog_array; struct netns_bpf { - struct bpf_prog __rcu *progs[MAX_NETNS_BPF_ATTACH_TYPE]; + /* Array of programs to run compiled from progs or links */ + struct bpf_prog_array __rcu *run_array[MAX_NETNS_BPF_ATTACH_TYPE]; + struct bpf_prog *progs[MAX_NETNS_BPF_ATTACH_TYPE]; struct bpf_link *links[MAX_NETNS_BPF_ATTACH_TYPE]; }; -- cgit v1.2.3 From ab53cad90eb10c9991f501ba08904680a074ef3d Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 25 Jun 2020 16:13:56 +0200 Subject: bpf, netns: Keep a list of attached bpf_link's To support multi-prog link-based attachments for new netns attach types, we need to keep track of more than one bpf_link per attach type. Hence, convert net->bpf.links into a list, that currently can be either empty or have just one item. Instead of reusing bpf_prog_list from bpf-cgroup, we link together bpf_netns_link's themselves. This makes list management simpler as we don't have to allocate, initialize, and later release list elements. We can do this because multi-prog attachment will be available only for bpf_link, and we don't need to build a list of programs attached directly and indirectly via links. No functional changes intended. Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200625141357.910330-4-jakub@cloudflare.com --- include/net/netns/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netns/bpf.h b/include/net/netns/bpf.h index a5015bda9979..0ca6a1b87185 100644 --- a/include/net/netns/bpf.h +++ b/include/net/netns/bpf.h @@ -15,7 +15,7 @@ struct netns_bpf { /* Array of programs to run compiled from progs or links */ struct bpf_prog_array __rcu *run_array[MAX_NETNS_BPF_ATTACH_TYPE]; struct bpf_prog *progs[MAX_NETNS_BPF_ATTACH_TYPE]; - struct bpf_link *links[MAX_NETNS_BPF_ATTACH_TYPE]; + struct list_head links[MAX_NETNS_BPF_ATTACH_TYPE]; }; #endif /* __NETNS_BPF_H__ */ -- cgit v1.2.3 From 4ac2add65974e4efafb8d4ccd8fc5660417ea312 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 29 Jun 2020 10:56:26 +0100 Subject: bpf: flow_dissector: Check value of unused flags to BPF_PROG_DETACH Using BPF_PROG_DETACH on a flow dissector program supports neither attach_flags nor attach_bpf_fd. Yet no value is enforced for them. Enforce that attach_flags are zero, and require the current program to be passed via attach_bpf_fd. This allows us to remove the check for CAP_SYS_ADMIN, since userspace can now no longer remove arbitrary flow dissector programs. Fixes: b27f7bb590ba ("flow_dissector: Move out netns_bpf prog callbacks") Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200629095630.7933-3-lmb@cloudflare.com --- include/linux/bpf-netns.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bpf-netns.h b/include/linux/bpf-netns.h index 4052d649f36d..47d5b0c708c9 100644 --- a/include/linux/bpf-netns.h +++ b/include/linux/bpf-netns.h @@ -33,7 +33,7 @@ int netns_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog); -int netns_bpf_prog_detach(const union bpf_attr *attr); +int netns_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); int netns_bpf_link_create(const union bpf_attr *attr, struct bpf_prog *prog); #else @@ -49,7 +49,8 @@ static inline int netns_bpf_prog_attach(const union bpf_attr *attr, return -EOPNOTSUPP; } -static inline int netns_bpf_prog_detach(const union bpf_attr *attr) +static inline int netns_bpf_prog_detach(const union bpf_attr *attr, + enum bpf_prog_type ptype) { return -EOPNOTSUPP; } -- cgit v1.2.3 From bb0de3131f4c60a9bf976681e0fe4d1e55c7a821 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 29 Jun 2020 10:56:28 +0100 Subject: bpf: sockmap: Require attach_bpf_fd when detaching a program The sockmap code currently ignores the value of attach_bpf_fd when detaching a program. This is contrary to the usual behaviour of checking that attach_bpf_fd represents the currently attached program. Ensure that attach_bpf_fd is indeed the currently attached program. It turns out that all sockmap selftests already do this, which indicates that this is unlikely to cause breakage. Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface") Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200629095630.7933-5-lmb@cloudflare.com --- include/linux/bpf.h | 13 +++++++++++-- include/linux/skmsg.h | 13 +++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 07052d44bca1..9750a1902ee5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1543,13 +1543,16 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map) #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ #if defined(CONFIG_BPF_STREAM_PARSER) -int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, u32 which); +int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, + struct bpf_prog *old, u32 which); int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); +int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); void sock_map_unhash(struct sock *sk); void sock_map_close(struct sock *sk, long timeout); #else static inline int sock_map_prog_update(struct bpf_map *map, - struct bpf_prog *prog, u32 which) + struct bpf_prog *prog, + struct bpf_prog *old, u32 which) { return -EOPNOTSUPP; } @@ -1559,6 +1562,12 @@ static inline int sock_map_get_from_fd(const union bpf_attr *attr, { return -EINVAL; } + +static inline int sock_map_prog_detach(const union bpf_attr *attr, + enum bpf_prog_type ptype) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_BPF_STREAM_PARSER */ #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 08674cd14d5a..1e9ed840b9fc 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -430,6 +430,19 @@ static inline void psock_set_prog(struct bpf_prog **pprog, bpf_prog_put(prog); } +static inline int psock_replace_prog(struct bpf_prog **pprog, + struct bpf_prog *prog, + struct bpf_prog *old) +{ + if (cmpxchg(pprog, old, prog) != old) + return -ENOENT; + + if (old) + bpf_prog_put(old); + + return 0; +} + static inline void psock_progs_drop(struct sk_psock_progs *progs) { psock_set_prog(&progs->msg_parser, NULL); -- cgit v1.2.3