summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2019-11-02 15:27:42 -0700
committerDavid S. Miller <davem@davemloft.net>2019-11-02 15:29:58 -0700
commitae8a76fb8b5d03fa2adc7249dc6131ba6a0c6119 (patch)
treeb197a7452b46abf51ffab8485236ccab69664d5c /include
parentd31e95585ca697fb31440c6fe30113adc85ecfbd (diff)
parent358fdb456288d48874d44a064a82bfb0d9963fa0 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says: ==================== pull-request: bpf-next 2019-11-02 The following pull-request contains BPF updates for your *net-next* tree. We've added 30 non-merge commits during the last 7 day(s) which contain a total of 41 files changed, 1864 insertions(+), 474 deletions(-). The main changes are: 1) Fix long standing user vs kernel access issue by introducing bpf_probe_read_user() and bpf_probe_read_kernel() helpers, from Daniel. 2) Accelerated xskmap lookup, from Björn and Maciej. 3) Support for automatic map pinning in libbpf, from Toke. 4) Cleanup of BTF-enabled raw tracepoints, from Alexei. 5) Various fixes to libbpf and selftests. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/linux/bpf.h30
-rw-r--r--include/linux/bpf_types.h1
-rw-r--r--include/linux/uaccess.h16
-rw-r--r--include/net/xdp_sock.h51
-rw-r--r--include/uapi/linux/bpf.h124
5 files changed, 145 insertions, 77 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 171be30fe0ae..7c7f518811a6 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -373,6 +373,11 @@ enum bpf_cgroup_storage_type {
#define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX
+/* The longest tracepoint has 12 args.
+ * See include/trace/bpf_probe.h
+ */
+#define MAX_BPF_FUNC_ARGS 12
+
struct bpf_prog_stats {
u64 cnt;
u64 nsecs;
@@ -1004,31 +1009,6 @@ static inline int sock_map_get_from_fd(const union bpf_attr *attr,
}
#endif
-#if defined(CONFIG_XDP_SOCKETS)
-struct xdp_sock;
-struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key);
-int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
- struct xdp_sock *xs);
-void __xsk_map_flush(struct bpf_map *map);
-#else
-struct xdp_sock;
-static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
- u32 key)
-{
- return NULL;
-}
-
-static inline int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
- struct xdp_sock *xs)
-{
- return -EOPNOTSUPP;
-}
-
-static inline void __xsk_map_flush(struct bpf_map *map)
-{
-}
-#endif
-
#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
void bpf_sk_reuseport_detach(struct sock *sk);
int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 36a9c2325176..de14872b01ba 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -26,6 +26,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint)
BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event)
BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable)
+BPF_PROG_TYPE(BPF_PROG_TYPE_TRACING, tracing)
#endif
#ifdef CONFIG_CGROUP_BPF
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index d4ee6e942562..67f016010aad 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -311,6 +311,7 @@ copy_struct_from_user(void *dst, size_t ksize, const void __user *src,
* happens, handle that and return -EFAULT.
*/
extern long probe_kernel_read(void *dst, const void *src, size_t size);
+extern long probe_kernel_read_strict(void *dst, const void *src, size_t size);
extern long __probe_kernel_read(void *dst, const void *src, size_t size);
/*
@@ -337,7 +338,22 @@ extern long __probe_user_read(void *dst, const void __user *src, size_t size);
extern long notrace probe_kernel_write(void *dst, const void *src, size_t size);
extern long notrace __probe_kernel_write(void *dst, const void *src, size_t size);
+/*
+ * probe_user_write(): safely attempt to write to a location in user space
+ * @dst: address to write to
+ * @src: pointer to the data that shall be written
+ * @size: size of the data chunk
+ *
+ * Safely write to address @dst from the buffer at @src. If a kernel fault
+ * happens, handle that and return -EFAULT.
+ */
+extern long notrace probe_user_write(void __user *dst, const void *src, size_t size);
+extern long notrace __probe_user_write(void __user *dst, const void *src, size_t size);
+
extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
+extern long strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr,
+ long count);
+extern long __strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
extern long strncpy_from_unsafe_user(char *dst, const void __user *unsafe_addr,
long count);
extern long strnlen_unsafe_user(const void __user *unsafe_addr, long count);
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index c9398ce7960f..e3780e4b74e1 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -69,7 +69,14 @@ struct xdp_umem {
/* Nodes are linked in the struct xdp_sock map_list field, and used to
* track which maps a certain socket reside in.
*/
-struct xsk_map;
+
+struct xsk_map {
+ struct bpf_map map;
+ struct list_head __percpu *flush_list;
+ spinlock_t lock; /* Synchronize map updates */
+ struct xdp_sock *xsk_map[];
+};
+
struct xsk_map_node {
struct list_head node;
struct xsk_map *map;
@@ -109,8 +116,6 @@ struct xdp_sock {
struct xdp_buff;
#ifdef CONFIG_XDP_SOCKETS
int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
-int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
-void xsk_flush(struct xdp_sock *xs);
bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs);
/* Used from netdev driver */
bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt);
@@ -134,6 +139,22 @@ void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
struct xdp_sock **map_entry);
int xsk_map_inc(struct xsk_map *map);
void xsk_map_put(struct xsk_map *map);
+int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
+ struct xdp_sock *xs);
+void __xsk_map_flush(struct bpf_map *map);
+
+static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
+ u32 key)
+{
+ struct xsk_map *m = container_of(map, struct xsk_map, map);
+ struct xdp_sock *xs;
+
+ if (key >= map->max_entries)
+ return NULL;
+
+ xs = READ_ONCE(m->xsk_map[key]);
+ return xs;
+}
static inline u64 xsk_umem_extract_addr(u64 addr)
{
@@ -224,15 +245,6 @@ static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
return -ENOTSUPP;
}
-static inline int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
-{
- return -ENOTSUPP;
-}
-
-static inline void xsk_flush(struct xdp_sock *xs)
-{
-}
-
static inline bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
{
return false;
@@ -357,6 +369,21 @@ static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 handle,
return 0;
}
+static inline int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
+ struct xdp_sock *xs)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void __xsk_map_flush(struct bpf_map *map)
+{
+}
+
+static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
+ u32 key)
+{
+ return NULL;
+}
#endif /* CONFIG_XDP_SOCKETS */
#endif /* _LINUX_XDP_SOCK_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4af8b0819a32..df6809a76404 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -173,6 +173,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_CGROUP_SYSCTL,
BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
BPF_PROG_TYPE_CGROUP_SOCKOPT,
+ BPF_PROG_TYPE_TRACING,
};
enum bpf_attach_type {
@@ -199,6 +200,7 @@ enum bpf_attach_type {
BPF_CGROUP_UDP6_RECVMSG,
BPF_CGROUP_GETSOCKOPT,
BPF_CGROUP_SETSOCKOPT,
+ BPF_TRACE_RAW_TP,
__MAX_BPF_ATTACH_TYPE
};
@@ -561,10 +563,13 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_probe_read(void *dst, u32 size, const void *src)
+ * int bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr)
* Description
* For tracing programs, safely attempt to read *size* bytes from
- * address *src* and store the data in *dst*.
+ * kernel space address *unsafe_ptr* and store the data in *dst*.
+ *
+ * Generally, use bpf_probe_read_user() or bpf_probe_read_kernel()
+ * instead.
* Return
* 0 on success, or a negative error in case of failure.
*
@@ -1426,45 +1431,14 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
+ * int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr)
* Description
- * Copy a NUL terminated string from an unsafe address
- * *unsafe_ptr* to *dst*. The *size* should include the
- * terminating NUL byte. In case the string length is smaller than
- * *size*, the target is not padded with further NUL bytes. If the
- * string length is larger than *size*, just *size*-1 bytes are
- * copied and the last byte is set to NUL.
- *
- * On success, the length of the copied string is returned. This
- * makes this helper useful in tracing programs for reading
- * strings, and more importantly to get its length at runtime. See
- * the following snippet:
- *
- * ::
- *
- * SEC("kprobe/sys_open")
- * void bpf_sys_open(struct pt_regs *ctx)
- * {
- * char buf[PATHLEN]; // PATHLEN is defined to 256
- * int res = bpf_probe_read_str(buf, sizeof(buf),
- * ctx->di);
- *
- * // Consume buf, for example push it to
- * // userspace via bpf_perf_event_output(); we
- * // can use res (the string length) as event
- * // size, after checking its boundaries.
- * }
- *
- * In comparison, using **bpf_probe_read()** helper here instead
- * to read the string would require to estimate the length at
- * compile time, and would often result in copying more memory
- * than necessary.
+ * Copy a NUL terminated string from an unsafe kernel address
+ * *unsafe_ptr* to *dst*. See bpf_probe_read_kernel_str() for
+ * more details.
*
- * Another useful use case is when parsing individual process
- * arguments or individual environment variables navigating
- * *current*\ **->mm->arg_start** and *current*\
- * **->mm->env_start**: using this helper and the return value,
- * one can quickly iterate at the right offset of the memory area.
+ * Generally, use bpf_probe_read_user_str() or bpf_probe_read_kernel_str()
+ * instead.
* Return
* On success, the strictly positive length of the string,
* including the trailing NUL character. On error, a negative
@@ -2775,6 +2749,72 @@ union bpf_attr {
* restricted to raw_tracepoint bpf programs.
* Return
* 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr)
+ * Description
+ * Safely attempt to read *size* bytes from user space address
+ * *unsafe_ptr* and store the data in *dst*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
+ * Description
+ * Safely attempt to read *size* bytes from kernel space address
+ * *unsafe_ptr* and store the data in *dst*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr)
+ * Description
+ * Copy a NUL terminated string from an unsafe user address
+ * *unsafe_ptr* to *dst*. The *size* should include the
+ * terminating NUL byte. In case the string length is smaller than
+ * *size*, the target is not padded with further NUL bytes. If the
+ * string length is larger than *size*, just *size*-1 bytes are
+ * copied and the last byte is set to NUL.
+ *
+ * On success, the length of the copied string is returned. This
+ * makes this helper useful in tracing programs for reading
+ * strings, and more importantly to get its length at runtime. See
+ * the following snippet:
+ *
+ * ::
+ *
+ * SEC("kprobe/sys_open")
+ * void bpf_sys_open(struct pt_regs *ctx)
+ * {
+ * char buf[PATHLEN]; // PATHLEN is defined to 256
+ * int res = bpf_probe_read_user_str(buf, sizeof(buf),
+ * ctx->di);
+ *
+ * // Consume buf, for example push it to
+ * // userspace via bpf_perf_event_output(); we
+ * // can use res (the string length) as event
+ * // size, after checking its boundaries.
+ * }
+ *
+ * In comparison, using **bpf_probe_read_user()** helper here
+ * instead to read the string would require to estimate the length
+ * at compile time, and would often result in copying more memory
+ * than necessary.
+ *
+ * Another useful use case is when parsing individual process
+ * arguments or individual environment variables navigating
+ * *current*\ **->mm->arg_start** and *current*\
+ * **->mm->env_start**: using this helper and the return value,
+ * one can quickly iterate at the right offset of the memory area.
+ * Return
+ * On success, the strictly positive length of the string,
+ * including the trailing NUL character. On error, a negative
+ * value.
+ *
+ * int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr)
+ * Description
+ * Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr*
+ * to *dst*. Same semantics as with bpf_probe_read_user_str() apply.
+ * Return
+ * On success, the strictly positive length of the string, including
+ * the trailing NUL character. On error, a negative value.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2888,7 +2928,11 @@ union bpf_attr {
FN(sk_storage_delete), \
FN(send_signal), \
FN(tcp_gen_syncookie), \
- FN(skb_output),
+ FN(skb_output), \
+ FN(probe_read_user), \
+ FN(probe_read_kernel), \
+ FN(probe_read_user_str), \
+ FN(probe_read_kernel_str),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call