From 9d4a75efa200a31deabe9ba1c941aef697e6bb30 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 27 Aug 2020 16:58:29 +0200 Subject: io_uring: use an enumeration for io_uring_register(2) opcodes The enumeration allows us to keep track of the last io_uring_register(2) opcode available. Behaviour and opcodes names don't change. Signed-off-by: Stefano Garzarella Reviewed-by: Kees Cook Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index d65fde732518..5f12ae6a415c 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -255,17 +255,22 @@ struct io_uring_params { /* * io_uring_register(2) opcodes and arguments */ -#define IORING_REGISTER_BUFFERS 0 -#define IORING_UNREGISTER_BUFFERS 1 -#define IORING_REGISTER_FILES 2 -#define IORING_UNREGISTER_FILES 3 -#define IORING_REGISTER_EVENTFD 4 -#define IORING_UNREGISTER_EVENTFD 5 -#define IORING_REGISTER_FILES_UPDATE 6 -#define IORING_REGISTER_EVENTFD_ASYNC 7 -#define IORING_REGISTER_PROBE 8 -#define IORING_REGISTER_PERSONALITY 9 -#define IORING_UNREGISTER_PERSONALITY 10 +enum { + IORING_REGISTER_BUFFERS = 0, + IORING_UNREGISTER_BUFFERS = 1, + IORING_REGISTER_FILES = 2, + IORING_UNREGISTER_FILES = 3, + IORING_REGISTER_EVENTFD = 4, + IORING_UNREGISTER_EVENTFD = 5, + IORING_REGISTER_FILES_UPDATE = 6, + IORING_REGISTER_EVENTFD_ASYNC = 7, + IORING_REGISTER_PROBE = 8, + IORING_REGISTER_PERSONALITY = 9, + IORING_UNREGISTER_PERSONALITY = 10, + + /* this goes last */ + IORING_REGISTER_LAST +}; struct io_uring_files_update { __u32 offset; -- cgit v1.2.3 From 21b55dbc0653018b8cd4513c37cbca303b0f0d50 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 27 Aug 2020 16:58:30 +0200 Subject: io_uring: add IOURING_REGISTER_RESTRICTIONS opcode The new io_uring_register(2) IOURING_REGISTER_RESTRICTIONS opcode permanently installs a feature allowlist on an io_ring_ctx. The io_ring_ctx can then be passed to untrusted code with the knowledge that only operations present in the allowlist can be executed. The allowlist approach ensures that new features added to io_uring do not accidentally become available when an existing application is launched on a newer kernel version. Currently is it possible to restrict sqe opcodes, sqe flags, and register opcodes. IOURING_REGISTER_RESTRICTIONS can only be made once. Afterwards it is not possible to change restrictions anymore. This prevents untrusted code from removing restrictions. Suggested-by: Stefan Hajnoczi Signed-off-by: Stefano Garzarella Reviewed-by: Kees Cook Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 5f12ae6a415c..6e7f2e5e917b 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -267,6 +267,7 @@ enum { IORING_REGISTER_PROBE = 8, IORING_REGISTER_PERSONALITY = 9, IORING_UNREGISTER_PERSONALITY = 10, + IORING_REGISTER_RESTRICTIONS = 11, /* this goes last */ IORING_REGISTER_LAST @@ -295,4 +296,34 @@ struct io_uring_probe { struct io_uring_probe_op ops[0]; }; +struct io_uring_restriction { + __u16 opcode; + union { + __u8 register_op; /* IORING_RESTRICTION_REGISTER_OP */ + __u8 sqe_op; /* IORING_RESTRICTION_SQE_OP */ + __u8 sqe_flags; /* IORING_RESTRICTION_SQE_FLAGS_* */ + }; + __u8 resv; + __u32 resv2[3]; +}; + +/* + * io_uring_restriction->opcode values + */ +enum { + /* Allow an io_uring_register(2) opcode */ + IORING_RESTRICTION_REGISTER_OP = 0, + + /* Allow an sqe opcode */ + IORING_RESTRICTION_SQE_OP = 1, + + /* Allow sqe flags */ + IORING_RESTRICTION_SQE_FLAGS_ALLOWED = 2, + + /* Require sqe flags (these flags must be set on each submission) */ + IORING_RESTRICTION_SQE_FLAGS_REQUIRED = 3, + + IORING_RESTRICTION_LAST +}; + #endif -- cgit v1.2.3 From 7e84e1c7566a1df470a9e1f49d3db2ce311261a4 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 27 Aug 2020 16:58:31 +0200 Subject: io_uring: allow disabling rings during the creation This patch adds a new IORING_SETUP_R_DISABLED flag to start the rings disabled, allowing the user to register restrictions, buffers, files, before to start processing SQEs. When IORING_SETUP_R_DISABLED is set, SQE are not processed and SQPOLL kthread is not started. The restrictions registration are allowed only when the rings are disable to prevent concurrency issue while processing SQEs. The rings can be enabled using IORING_REGISTER_ENABLE_RINGS opcode with io_uring_register(2). Suggested-by: Jens Axboe Signed-off-by: Stefano Garzarella Reviewed-by: Kees Cook Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 6e7f2e5e917b..a0c85e0e9016 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -95,6 +95,7 @@ enum { #define IORING_SETUP_CQSIZE (1U << 3) /* app defines CQ size */ #define IORING_SETUP_CLAMP (1U << 4) /* clamp SQ/CQ ring sizes */ #define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */ +#define IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */ enum { IORING_OP_NOP, @@ -268,6 +269,7 @@ enum { IORING_REGISTER_PERSONALITY = 9, IORING_UNREGISTER_PERSONALITY = 10, IORING_REGISTER_RESTRICTIONS = 11, + IORING_REGISTER_ENABLE_RINGS = 12, /* this goes last */ IORING_REGISTER_LAST -- cgit v1.2.3 From 90554200724d5b280439dc361fe7ee92fe459ea7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 3 Sep 2020 12:12:41 -0600 Subject: io_uring: provide IORING_ENTER_SQ_WAIT for SQPOLL SQ ring waits When using SQPOLL, applications can run into the issue of running out of SQ ring entries because the thread hasn't consumed them yet. The only option for dealing with that is checking later, or busy checking for the condition. Provide IORING_ENTER_SQ_WAIT if applications want to wait on this condition. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index a0c85e0e9016..98d8e06dea22 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -225,6 +225,7 @@ struct io_cqring_offsets { */ #define IORING_ENTER_GETEVENTS (1U << 0) #define IORING_ENTER_SQ_WAKEUP (1U << 1) +#define IORING_ENTER_SQ_WAIT (1U << 2) /* * Passed in for io_uring_setup(2). Copied back with updated info on success -- cgit v1.2.3