summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2026-01-15 08:24:02 -0700
committerJens Axboe <axboe@kernel.dk>2026-01-27 11:09:57 -0700
commitd42eb05e60fea31de49897d63a1d73f933303bd4 (patch)
tree0bf7d4c52fcc666161e071024ee4bbdf185502fe /include
parent0105b0562a5ed6374f06e5cd4246a3f1311a65a0 (diff)
io_uring: add support for BPF filtering for opcode restrictions
Add support for loading classic BPF programs with io_uring to provide fine-grained filtering of SQE operations. Unlike IORING_REGISTER_RESTRICTIONS which only allows bitmap-based allow/deny of opcodes, BPF filters can inspect request attributes and make dynamic decisions. The filter is registered via IORING_REGISTER_BPF_FILTER with a struct io_uring_bpf: struct io_uring_bpf_filter { __u32 opcode; /* io_uring opcode to filter */ __u32 flags; __u32 filter_len; /* number of BPF instructions */ __u32 resv; __u64 filter_ptr; /* pointer to BPF filter */ __u64 resv2[5]; }; enum { IO_URING_BPF_CMD_FILTER = 1, }; struct io_uring_bpf { __u16 cmd_type; /* IO_URING_BPF_* values */ __u16 cmd_flags; /* none so far */ __u32 resv; union { struct io_uring_bpf_filter filter; }; }; and the filters get supplied a struct io_uring_bpf_ctx: struct io_uring_bpf_ctx { __u64 user_data; __u8 opcode; __u8 sqe_flags; __u8 pdu_size; __u8 pad[5]; }; where it's possible to filter on opcode and sqe_flags, with pdu_size indicating how much extra data is being passed in beyond the pad field. This will used for specific finer grained filtering inside an opcode. An example of that for sockets is in one of the following patches. Anything the opcode supports can end up in this struct, populated by the opcode itself, and hence can be filtered for. Filters have the following semantics: - Return 1 to allow the request - Return 0 to deny the request with -EACCES - Multiple filters can be stacked per opcode. All filters must return 1 for the opcode to be allowed. - Filters are evaluated in registration order (most recent first) The implementation uses classic BPF (cBPF) rather than eBPF for as that's required for containers, and since they can be used by any user in the system. Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'include')
-rw-r--r--include/linux/io_uring_types.h9
-rw-r--r--include/uapi/linux/io_uring.h3
-rw-r--r--include/uapi/linux/io_uring/bpf_filter.h50
3 files changed, 62 insertions, 0 deletions
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index dc6bd6940a0d..74bf98362876 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -219,9 +219,18 @@ struct io_rings {
struct io_uring_cqe cqes[] ____cacheline_aligned_in_smp;
};
+struct io_bpf_filter;
+struct io_bpf_filters {
+ refcount_t refs; /* ref for ->bpf_filters */
+ spinlock_t lock; /* protects ->bpf_filters modifications */
+ struct io_bpf_filter __rcu **filters;
+ struct rcu_head rcu_head;
+};
+
struct io_restriction {
DECLARE_BITMAP(register_op, IORING_REGISTER_LAST);
DECLARE_BITMAP(sqe_op, IORING_OP_LAST);
+ struct io_bpf_filters *bpf_filters;
u8 sqe_flags_allowed;
u8 sqe_flags_required;
/* IORING_OP_* restrictions exist */
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index b5b23c0d5283..94669b77fee8 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -700,6 +700,9 @@ enum io_uring_register_op {
/* auxiliary zcrx configuration, see enum zcrx_ctrl_op */
IORING_REGISTER_ZCRX_CTRL = 36,
+ /* register bpf filtering programs */
+ IORING_REGISTER_BPF_FILTER = 37,
+
/* this goes last */
IORING_REGISTER_LAST,
diff --git a/include/uapi/linux/io_uring/bpf_filter.h b/include/uapi/linux/io_uring/bpf_filter.h
new file mode 100644
index 000000000000..2d4d0e5743e4
--- /dev/null
+++ b/include/uapi/linux/io_uring/bpf_filter.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
+/*
+ * Header file for the io_uring BPF filters.
+ */
+#ifndef LINUX_IO_URING_BPF_FILTER_H
+#define LINUX_IO_URING_BPF_FILTER_H
+
+#include <linux/types.h>
+
+/*
+ * Struct passed to filters.
+ */
+struct io_uring_bpf_ctx {
+ __u64 user_data;
+ __u8 opcode;
+ __u8 sqe_flags;
+ __u8 pdu_size; /* size of aux data for filter */
+ __u8 pad[5];
+};
+
+enum {
+ /*
+ * If set, any currently unset opcode will have a deny filter attached
+ */
+ IO_URING_BPF_FILTER_DENY_REST = 1,
+};
+
+struct io_uring_bpf_filter {
+ __u32 opcode; /* io_uring opcode to filter */
+ __u32 flags;
+ __u32 filter_len; /* number of BPF instructions */
+ __u32 resv;
+ __u64 filter_ptr; /* pointer to BPF filter */
+ __u64 resv2[5];
+};
+
+enum {
+ IO_URING_BPF_CMD_FILTER = 1,
+};
+
+struct io_uring_bpf {
+ __u16 cmd_type; /* IO_URING_BPF_* values */
+ __u16 cmd_flags; /* none so far */
+ __u32 resv;
+ union {
+ struct io_uring_bpf_filter filter;
+ };
+};
+
+#endif