summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2023-03-01 09:55:24 -0800
committerAlexei Starovoitov <ast@kernel.org>2023-03-01 10:06:10 -0800
commitc4b5c5bad9f07e9074c7abde3289de71c4acac48 (patch)
tree8491e38d4f856e6b815036a84bd517f91de12997 /include
parentae256f95478e07d49dae5036bb83c09dfbd686d4 (diff)
parentcfa7b011894d689cccfa88a25da324fa5c34e4ed (diff)
Merge branch 'Add skb + xdp dynptrs'
Joanne Koong says: ==================== This patchset is the 2nd in the dynptr series. The 1st can be found here [0]. This patchset adds skb and xdp type dynptrs, which have two main benefits for packet parsing: * allowing operations on sizes that are not statically known at compile-time (eg variable-sized accesses). * more ergonomic and less brittle iteration through data (eg does not need manual if checking for being within bounds of data_end) When comparing the differences in runtime for packet parsing without dynptrs vs. with dynptrs, there is no noticeable difference. Patch 9 contains more details as well as examples of how to use skb and xdp dynptrs. [0] https://lore.kernel.org/bpf/20220523210712.3641569-1-joannelkoong@gmail.com/ --- Changelog: v12 = https://lore.kernel.org/bpf/20230226085120.3907863-1-joannelkoong@gmail.com/ v12 -> v13: * Fix missing { } for case statement v11 = https://lore.kernel.org/bpf/20230222060747.2562549-1-joannelkoong@gmail.com/ v11 -> v12: * Change constant mem size checking to use "__szk" kfunc annotation for slices * Use autoloading for success selftests v10 = https://lore.kernel.org/bpf/20230216225524.1192789-1-joannelkoong@gmail.com/ v10 -> v11: * Reject bpf_dynptr_slice_rdwr() for non-writable progs at load time instead of runtime * Add additional patch (__uninit kfunc annotation) * Expand on documentation * Add bpf_dynptr_write() calls for persisting writes in tests v9 = https://lore.kernel.org/bpf/20230127191703.3864860-1-joannelkoong@gmail.com/ v9 -> v10: * Add bpf_dynptr_slice and bpf_dynptr_slice_rdwr interface * Add some more tests * Split up patchset into more parts to make it easier to review v8 = https://lore.kernel.org/bpf/20230126233439.3739120-1-joannelkoong@gmail.com/ v8 -> v9: * Fix dynptr_get_type() to check non-stack dynptrs v7 = https://lore.kernel.org/bpf/20221021011510.1890852-1-joannelkoong@gmail.com/ v7 -> v8: * Change helpers to kfuncs * Add 2 new patches (1/5 and 2/5) v6 = https://lore.kernel.org/bpf/20220907183129.745846-1-joannelkoong@gmail.com/ v6 -> v7 * Change bpf_dynptr_data() to return read-only data slices if the skb prog is read-only (Martin) * Add test "skb_invalid_write" to test that writes to rd-only data slices are rejected v5 = https://lore.kernel.org/bpf/20220831183224.3754305-1-joannelkoong@gmail.com/ v5 -> v6 * Address kernel test robot errors by static inlining v4 = https://lore.kernel.org/bpf/20220822235649.2218031-1-joannelkoong@gmail.com/ v4 -> v5 * Address kernel test robot errors for configs w/out CONFIG_NET set * For data slices, return PTR_TO_MEM instead of PTR_TO_PACKET (Kumar) * Split selftests into subtests (Andrii) * Remove insn patching. Use rdonly and rdwr protos for dynptr skb construction (Andrii) * bpf_dynptr_data() returns NULL for rd-only dynptrs. There will be a separate bpf_dynptr_data_rdonly() added later (Andrii and Kumar) v3 = https://lore.kernel.org/bpf/20220822193442.657638-1-joannelkoong@gmail.com/ v3 -> v4 * Forgot to commit --amend the kernel test robot error fixups v2 = https://lore.kernel.org/bpf/20220811230501.2632393-1-joannelkoong@gmail.com/ v2 -> v3 * Fix kernel test robot build test errors v1 = https://lore.kernel.org/bpf/20220726184706.954822-1-joannelkoong@gmail.com/ v1 -> v2 * Return data slices to rd-only skb dynptrs (Martin) * bpf_dynptr_write allows writes to frags for skb dynptrs, but always invalidates associated data slices (Martin) * Use switch casing instead of ifs (Andrii) * Use 0xFD for experimental kind number in the selftest (Zvi) * Put selftest conversions w/ dynptrs into new files (Alexei) * Add new selftest "test_cls_redirect_dynptr.c" ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/bpf.h95
-rw-r--r--include/linux/bpf_verifier.h3
-rw-r--r--include/linux/filter.h46
-rw-r--r--include/uapi/linux/bpf.h18
4 files changed, 126 insertions, 36 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 520b238abd5a..23ec684e660d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -607,11 +607,18 @@ enum bpf_type_flag {
*/
NON_OWN_REF = BIT(14 + BPF_BASE_TYPE_BITS),
+ /* DYNPTR points to sk_buff */
+ DYNPTR_TYPE_SKB = BIT(15 + BPF_BASE_TYPE_BITS),
+
+ /* DYNPTR points to xdp_buff */
+ DYNPTR_TYPE_XDP = BIT(16 + BPF_BASE_TYPE_BITS),
+
__BPF_TYPE_FLAG_MAX,
__BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
};
-#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF)
+#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF | DYNPTR_TYPE_SKB \
+ | DYNPTR_TYPE_XDP)
/* Max number of base types. */
#define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS)
@@ -1124,6 +1131,37 @@ static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func(
return bpf_func(ctx, insnsi);
}
+/* the implementation of the opaque uapi struct bpf_dynptr */
+struct bpf_dynptr_kern {
+ void *data;
+ /* Size represents the number of usable bytes of dynptr data.
+ * If for example the offset is at 4 for a local dynptr whose data is
+ * of type u64, the number of usable bytes is 4.
+ *
+ * The upper 8 bits are reserved. It is as follows:
+ * Bits 0 - 23 = size
+ * Bits 24 - 30 = dynptr type
+ * Bit 31 = whether dynptr is read-only
+ */
+ u32 size;
+ u32 offset;
+} __aligned(8);
+
+enum bpf_dynptr_type {
+ BPF_DYNPTR_TYPE_INVALID,
+ /* Points to memory that is local to the bpf program */
+ BPF_DYNPTR_TYPE_LOCAL,
+ /* Underlying data is a ringbuf record */
+ BPF_DYNPTR_TYPE_RINGBUF,
+ /* Underlying data is a sk_buff */
+ BPF_DYNPTR_TYPE_SKB,
+ /* Underlying data is a xdp_buff */
+ BPF_DYNPTR_TYPE_XDP,
+};
+
+int bpf_dynptr_check_size(u32 size);
+u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr);
+
#ifdef CONFIG_BPF_JIT
int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr);
int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr);
@@ -2266,6 +2304,11 @@ static inline bool has_current_bpf_ctx(void)
}
void notrace bpf_prog_inc_misses_counter(struct bpf_prog *prog);
+
+void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
+ enum bpf_dynptr_type type, u32 offset, u32 size);
+void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
+void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr);
#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
{
@@ -2495,6 +2538,19 @@ static inline void bpf_prog_inc_misses_counter(struct bpf_prog *prog)
static inline void bpf_cgrp_storage_free(struct cgroup *cgroup)
{
}
+
+static inline void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
+ enum bpf_dynptr_type type, u32 offset, u32 size)
+{
+}
+
+static inline void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
+{
+}
+
+static inline void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr)
+{
+}
#endif /* CONFIG_BPF_SYSCALL */
void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
@@ -2801,6 +2857,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
struct bpf_insn *insn_buf,
struct bpf_prog *prog,
u32 *target_size);
+int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags,
+ struct bpf_dynptr_kern *ptr);
#else
static inline bool bpf_sock_common_is_valid_access(int off, int size,
enum bpf_access_type type,
@@ -2822,6 +2880,11 @@ static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
{
return 0;
}
+static inline int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags,
+ struct bpf_dynptr_kern *ptr)
+{
+ return -EOPNOTSUPP;
+}
#endif
#ifdef CONFIG_INET
@@ -2913,36 +2976,6 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
u32 num_args, struct bpf_bprintf_data *data);
void bpf_bprintf_cleanup(struct bpf_bprintf_data *data);
-/* the implementation of the opaque uapi struct bpf_dynptr */
-struct bpf_dynptr_kern {
- void *data;
- /* Size represents the number of usable bytes of dynptr data.
- * If for example the offset is at 4 for a local dynptr whose data is
- * of type u64, the number of usable bytes is 4.
- *
- * The upper 8 bits are reserved. It is as follows:
- * Bits 0 - 23 = size
- * Bits 24 - 30 = dynptr type
- * Bit 31 = whether dynptr is read-only
- */
- u32 size;
- u32 offset;
-} __aligned(8);
-
-enum bpf_dynptr_type {
- BPF_DYNPTR_TYPE_INVALID,
- /* Points to memory that is local to the bpf program */
- BPF_DYNPTR_TYPE_LOCAL,
- /* Underlying data is a kernel-produced ringbuf record */
- BPF_DYNPTR_TYPE_RINGBUF,
-};
-
-void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
- enum bpf_dynptr_type type, u32 offset, u32 size);
-void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
-int bpf_dynptr_check_size(u32 size);
-u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr);
-
#ifdef CONFIG_BPF_LSM
void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype);
void bpf_cgroup_atype_put(int cgroup_atype);
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index cf1bb1cf4a7b..b26ff2a8f63b 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -616,9 +616,6 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
enum bpf_arg_type arg_type);
int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
u32 regno, u32 mem_size);
-struct bpf_call_arg_meta;
-int process_dynptr_func(struct bpf_verifier_env *env, int regno,
- enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta);
/* this lives here instead of in bpf.h because it needs to dereference tgt_prog */
static inline u64 bpf_trampoline_compute_key(const struct bpf_prog *tgt_prog,
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 1727898f1641..efa5d4a1677e 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1542,4 +1542,50 @@ static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u64 index
return XDP_REDIRECT;
}
+#ifdef CONFIG_NET
+int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len);
+int __bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from,
+ u32 len, u64 flags);
+int __bpf_xdp_load_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len);
+int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len);
+void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len);
+void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
+ void *buf, unsigned long len, bool flush);
+#else /* CONFIG_NET */
+static inline int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset,
+ void *to, u32 len)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int __bpf_skb_store_bytes(struct sk_buff *skb, u32 offset,
+ const void *from, u32 len, u64 flags)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int __bpf_xdp_load_bytes(struct xdp_buff *xdp, u32 offset,
+ void *buf, u32 len)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset,
+ void *buf, u32 len)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len)
+{
+ return NULL;
+}
+
+static inline void *bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, void *buf,
+ unsigned long len, bool flush)
+{
+ return NULL;
+}
+#endif /* CONFIG_NET */
+
#endif /* __LINUX_FILTER_H__ */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 62ce1f5d1b1d..c9699304aed2 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5325,11 +5325,22 @@ union bpf_attr {
* Description
* Write *len* bytes from *src* into *dst*, starting from *offset*
* into *dst*.
- * *flags* is currently unused.
+ *
+ * *flags* must be 0 except for skb-type dynptrs.
+ *
+ * For skb-type dynptrs:
+ * * All data slices of the dynptr are automatically
+ * invalidated after **bpf_dynptr_write**\ (). This is
+ * because writing may pull the skb and change the
+ * underlying packet buffer.
+ *
+ * * For *flags*, please see the flags accepted by
+ * **bpf_skb_store_bytes**\ ().
* Return
* 0 on success, -E2BIG if *offset* + *len* exceeds the length
* of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst*
- * is a read-only dynptr or if *flags* is not 0.
+ * is a read-only dynptr or if *flags* is not correct. For skb-type dynptrs,
+ * other errors correspond to errors returned by **bpf_skb_store_bytes**\ ().
*
* void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len)
* Description
@@ -5337,6 +5348,9 @@ union bpf_attr {
*
* *len* must be a statically known value. The returned data slice
* is invalidated whenever the dynptr is invalidated.
+ *
+ * skb and xdp type dynptrs may not use bpf_dynptr_data. They should
+ * instead use bpf_dynptr_slice and bpf_dynptr_slice_rdwr.
* Return
* Pointer to the underlying dynptr data, NULL if the dynptr is
* read-only, if the dynptr is invalid, or if the offset and length