From c3a31e605620c279163c14068a60869ea3fda203 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 3 Oct 2019 13:59:56 -0600 Subject: io_uring: add support for IORING_REGISTER_FILES_UPDATE Allows the application to remove/replace/add files to/from a file set. Passes in a struct: struct io_uring_files_update { __u32 offset; __s32 *fds; }; that holds an array of fds, size of array passed in through the usual nr_args part of the io_uring_register() system call. The logic is as follows: 1) If ->fds[i] is -1, the existing file at i + ->offset is removed from the set. 2) If ->fds[i] is a valid fd, the existing file at i + ->offset is replaced with ->fds[i]. For case #2, is the existing file is currently empty (fd == -1), the new fd is simply added to the array. Reviewed-by: Jeff Moyer Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux/io_uring.h') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index ea57526a5b89..4f532d9c0554 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -150,5 +150,11 @@ struct io_uring_params { #define IORING_UNREGISTER_FILES 3 #define IORING_REGISTER_EVENTFD 4 #define IORING_UNREGISTER_EVENTFD 5 +#define IORING_REGISTER_FILES_UPDATE 6 + +struct io_uring_files_update { + __u32 offset; + __s32 *fds; +}; #endif -- cgit v1.2.3 From 33a107f0a1b8df0ad925e39d8afc97bb78e0cec1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 4 Oct 2019 12:10:03 -0600 Subject: io_uring: allow application controlled CQ ring size We currently size the CQ ring as twice the SQ ring, to allow some flexibility in not overflowing the CQ ring. This is done because the SQE life time is different than that of the IO request itself, the SQE is consumed as soon as the kernel has seen the entry. Certain application don't need a huge SQ ring size, since they just submit IO in batches. But they may have a lot of requests pending, and hence need a big CQ ring to hold them all. By allowing the application to control the CQ ring size multiplier, we can cater to those applications more efficiently. If an application wants to define its own CQ ring size, it must set IORING_SETUP_CQSIZE in the setup flags, and fill out io_uring_params->cq_entries. The value must be a power of two. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux/io_uring.h') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 4f532d9c0554..e0137ea6ad79 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -50,6 +50,7 @@ struct io_uring_sqe { #define IORING_SETUP_IOPOLL (1U << 0) /* io_context is polled */ #define IORING_SETUP_SQPOLL (1U << 1) /* SQ poll thread */ #define IORING_SETUP_SQ_AFF (1U << 2) /* sq_thread_cpu is valid */ +#define IORING_SETUP_CQSIZE (1U << 3) /* app defines CQ size */ #define IORING_OP_NOP 0 #define IORING_OP_READV 1 -- cgit v1.2.3 From a41525ab2e75987e809926352ebc6f1397da900e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 15 Oct 2019 16:48:15 -0600 Subject: io_uring: add support for absolute timeouts This is a pretty trivial addition on top of the relative timeouts we have now, but it's handy for ensuring tighter timing for those that are building scheduling primitives on top of io_uring. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux/io_uring.h') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index e0137ea6ad79..b402dfee5e15 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -70,6 +70,11 @@ struct io_uring_sqe { */ #define IORING_FSYNC_DATASYNC (1U << 0) +/* + * sqe->timeout_flags + */ +#define IORING_TIMEOUT_ABS (1U << 0) + /* * IO completion data structure (Completion Queue Entry) */ -- cgit v1.2.3 From 11365043e5271fea4c92189a976833da477a3a44 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 16 Oct 2019 09:08:32 -0600 Subject: io_uring: add support for canceling timeout requests We might have cases where the need for a specific timeout is gone, add support for canceling an existing timeout operation. This works like the POLL_REMOVE command, where the application passes in the user_data of the timeout it wishes to cancel in the sqe->addr field. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux/io_uring.h') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index b402dfee5e15..6dc5ced1c37a 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -64,6 +64,7 @@ struct io_uring_sqe { #define IORING_OP_SENDMSG 9 #define IORING_OP_RECVMSG 10 #define IORING_OP_TIMEOUT 11 +#define IORING_OP_TIMEOUT_REMOVE 12 /* * sqe->fsync_flags -- cgit v1.2.3 From 17f2fe35d080d8f64e86a60cdcd3a97edcbc213b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 17 Oct 2019 14:42:58 -0600 Subject: io_uring: add support for IORING_OP_ACCEPT This allows an application to call accept4() in an async fashion. Like other opcodes, we first try a non-blocking accept, then punt to async context if we have to. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux/io_uring.h') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 6dc5ced1c37a..f82d90e617a6 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -19,7 +19,10 @@ struct io_uring_sqe { __u8 flags; /* IOSQE_ flags */ __u16 ioprio; /* ioprio for the request */ __s32 fd; /* file descriptor to do IO on */ - __u64 off; /* offset into file */ + union { + __u64 off; /* offset into file */ + __u64 addr2; + }; __u64 addr; /* pointer to buffer or iovecs */ __u32 len; /* buffer size or number of iovecs */ union { @@ -29,6 +32,7 @@ struct io_uring_sqe { __u32 sync_range_flags; __u32 msg_flags; __u32 timeout_flags; + __u32 accept_flags; }; __u64 user_data; /* data to be passed back at completion time */ union { @@ -65,6 +69,7 @@ struct io_uring_sqe { #define IORING_OP_RECVMSG 10 #define IORING_OP_TIMEOUT 11 #define IORING_OP_TIMEOUT_REMOVE 12 +#define IORING_OP_ACCEPT 13 /* * sqe->fsync_flags -- cgit v1.2.3 From 62755e35dfb2b113c52b81cd96d01c20971c8e02 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 28 Oct 2019 21:49:21 -0600 Subject: io_uring: support for generic async request cancel This adds support for IORING_OP_ASYNC_CANCEL, which will attempt to cancel requests that have been punted to async context and are now in-flight. This works for regular read/write requests to files, as long as they haven't been started yet. For socket based IO (or things like accept4(2)), we can cancel work that is already running as well. To cancel a request, the sqe must have ->addr set to the user_data of the request it wishes to cancel. If the request is cancelled successfully, the original request is completed with -ECANCELED and the cancel request is completed with a result of 0. If the request was already running, the original may or may not complete in error. The cancel request will complete with -EALREADY for that case. And finally, if the request to cancel wasn't found, the cancel request is completed with -ENOENT. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux/io_uring.h') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index f82d90e617a6..6877cf8894db 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -33,6 +33,7 @@ struct io_uring_sqe { __u32 msg_flags; __u32 timeout_flags; __u32 accept_flags; + __u32 cancel_flags; }; __u64 user_data; /* data to be passed back at completion time */ union { @@ -70,6 +71,7 @@ struct io_uring_sqe { #define IORING_OP_TIMEOUT 11 #define IORING_OP_TIMEOUT_REMOVE 12 #define IORING_OP_ACCEPT 13 +#define IORING_OP_ASYNC_CANCEL 14 /* * sqe->fsync_flags -- cgit v1.2.3 From 2665abfd757fb35a241c6f0b1ebf620e3ffb36fb Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 5 Nov 2019 12:40:47 -0700 Subject: io_uring: add support for linked SQE timeouts While we have support for generic timeouts, we don't have a way to tie a timeout to a specific SQE. The generic timeouts simply trigger wakeups on the CQ ring. This adds support for IORING_OP_LINK_TIMEOUT. This command is only valid as a link to a previous command. The timeout specific can be either relative or absolute, following the same rules as IORING_OP_TIMEOUT. If the timeout triggers before the dependent command completes, it will attempt to cancel that command. Likewise, if the dependent command completes before the timeout triggers, it will cancel the timeout. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux/io_uring.h') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 6877cf8894db..f1a118b01d18 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -72,6 +72,7 @@ struct io_uring_sqe { #define IORING_OP_TIMEOUT_REMOVE 12 #define IORING_OP_ACCEPT 13 #define IORING_OP_ASYNC_CANCEL 14 +#define IORING_OP_LINK_TIMEOUT 15 /* * sqe->fsync_flags -- cgit v1.2.3 From 1d7bb1d50fb4dc141c7431cc21fdd24ffcc83c76 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 6 Nov 2019 11:31:17 -0700 Subject: io_uring: add support for backlogged CQ ring Currently we drop completion events, if the CQ ring is full. That's fine for requests with bounded completion times, but it may make it harder or impossible to use io_uring with networked IO where request completion times are generally unbounded. Or with POLL, for example, which is also unbounded. After this patch, we never overflow the ring, we simply store requests in a backlog for later flushing. This flushing is done automatically by the kernel. To prevent the backlog from growing indefinitely, if the backlog is non-empty, we apply back pressure on IO submissions. Any attempt to submit new IO with a non-empty backlog will get an -EBUSY return from the kernel. This is a signal to the application that it has backlogged CQ events, and that it must reap those before being allowed to submit more IO. Note that if we do return -EBUSY, we will have filled whatever backlogged events into the CQ ring first, if there's room. This means the application can safely reap events WITHOUT entering the kernel and waiting for them, they are already available in the CQ ring. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux/io_uring.h') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index f1a118b01d18..2a1569211d87 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -155,6 +155,7 @@ struct io_uring_params { * io_uring_params->features flags */ #define IORING_FEAT_SINGLE_MMAP (1U << 0) +#define IORING_FEAT_NODROP (1U << 1) /* * io_uring_register(2) opcodes and arguments -- cgit v1.2.3