From d63d1b5edb7b832210bfde587ba9e7549fa064eb Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 10 Dec 2019 10:38:56 -0700 Subject: io_uring: add support for fallocate() This exposes fallocate(2) through io_uring. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 55cfcb71606d..ad1574f35eb3 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -76,6 +76,7 @@ enum { IORING_OP_ASYNC_CANCEL, IORING_OP_LINK_TIMEOUT, IORING_OP_CONNECT, + IORING_OP_FALLOCATE, /* this goes last, obviously */ IORING_OP_LAST, -- cgit v1.2.3 From 15b71abe7b52df214785dde0de9f581cc0216d17 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 11 Dec 2019 11:20:36 -0700 Subject: io_uring: add support for IORING_OP_OPENAT This works just like openat(2), except it can be performed async. For the normal case of a non-blocking path lookup this will complete inline. If we have to do IO to perform the open, it'll be done from async context. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index ad1574f35eb3..c1a7c1c65eaf 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -34,6 +34,7 @@ struct io_uring_sqe { __u32 timeout_flags; __u32 accept_flags; __u32 cancel_flags; + __u32 open_flags; }; __u64 user_data; /* data to be passed back at completion time */ union { @@ -77,6 +78,7 @@ enum { IORING_OP_LINK_TIMEOUT, IORING_OP_CONNECT, IORING_OP_FALLOCATE, + IORING_OP_OPENAT, /* this goes last, obviously */ IORING_OP_LAST, -- cgit v1.2.3 From b5dba59e0cf7e2cc4d3b3b1ac5fe81ddf21959eb Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 11 Dec 2019 14:02:38 -0700 Subject: io_uring: add support for IORING_OP_CLOSE This works just like close(2), unsurprisingly. We remove the file descriptor and post the completion inline, then offload the actual (potential) last file put to async context. Mark the async part of this work as uncancellable, as we really must guarantee that the latter part of the close is run. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index c1a7c1c65eaf..084dea85b838 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -79,6 +79,7 @@ enum { IORING_OP_CONNECT, IORING_OP_FALLOCATE, IORING_OP_OPENAT, + IORING_OP_CLOSE, /* this goes last, obviously */ IORING_OP_LAST, -- cgit v1.2.3 From 05f3fb3c5397524feae2e73ee8e150a9090a7da2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 9 Dec 2019 11:22:50 -0700 Subject: io_uring: avoid ring quiesce for fixed file set unregister and update We currently fully quiesce the ring before an unregister or update of the fixed fileset. This is very expensive, and we can be a bit smarter about this. Add a percpu refcount for the file tables as a whole. Grab a percpu ref when we use a registered file, and put it on completion. This is cheap to do. Upon removal of a file from a set, switch the ref count to atomic mode. When we hit zero ref on the completion side, then we know we can drop the previously registered files. When the old files have been dropped, switch the ref back to percpu mode for normal operation. Since there's a period between doing the update and the kernel being done with it, add a IORING_OP_FILES_UPDATE opcode that can perform the same action. The application knows the update has completed when it gets the CQE for it. Between doing the update and receiving this completion, the application must continue to use the unregistered fd if submitting IO on this particular file. This takes the runtime of test/file-register from liburing from 14s to about 0.7s. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 084dea85b838..ca436b9d4921 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -80,6 +80,7 @@ enum { IORING_OP_FALLOCATE, IORING_OP_OPENAT, IORING_OP_CLOSE, + IORING_OP_FILES_UPDATE, /* this goes last, obviously */ IORING_OP_LAST, -- cgit v1.2.3 From eddc7ef52a6b37b7ba3d1c8a8fbb63d5d9914f8a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 13 Dec 2019 21:18:10 -0700 Subject: io_uring: add support for IORING_OP_STATX This provides support for async statx(2) through io_uring. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index ca436b9d4921..3f45f7c543de 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -35,6 +35,7 @@ struct io_uring_sqe { __u32 accept_flags; __u32 cancel_flags; __u32 open_flags; + __u32 statx_flags; }; __u64 user_data; /* data to be passed back at completion time */ union { @@ -81,6 +82,7 @@ enum { IORING_OP_OPENAT, IORING_OP_CLOSE, IORING_OP_FILES_UPDATE, + IORING_OP_STATX, /* this goes last, obviously */ IORING_OP_LAST, -- cgit v1.2.3 From ce35a47a3a0208a77b4d31b7f2e8ed57d624093d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 17 Dec 2019 08:04:44 -0700 Subject: io_uring: add IOSQE_ASYNC io_uring defaults to always doing inline submissions, if at all possible. But for larger copies, even if the data is fully cached, that can take a long time. Add an IOSQE_ASYNC flag that the application can set on the SQE - if set, it'll ensure that we always go async for those kinds of requests. Use the io-wq IO_WQ_WORK_CONCURRENT flag to ensure we get the concurrency we desire for this case. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 3f45f7c543de..d7ec50247a3a 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -51,6 +51,7 @@ struct io_uring_sqe { #define IOSQE_IO_DRAIN (1U << 1) /* issue after inflight IO */ #define IOSQE_IO_LINK (1U << 2) /* links next sqe */ #define IOSQE_IO_HARDLINK (1U << 3) /* like LINK, but stronger */ +#define IOSQE_ASYNC (1U << 4) /* always go async */ /* * io_uring_setup() flags -- cgit v1.2.3 From 3a6820f2bb8a079975109c25a5d1f29f46bce5d2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 22 Dec 2019 15:19:35 -0700 Subject: io_uring: add non-vectored read/write commands For uses cases that don't already naturally have an iovec, it's easier (or more convenient) to just use a buffer address + length. This is particular true if the use case is from languages that want to create a memory safe abstraction on top of io_uring, and where introducing the need for the iovec may impose an ownership issue. For those cases, they currently need an indirection buffer, which means allocating data just for this purpose. Add basic read/write that don't require the iovec. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index d7ec50247a3a..7fdf994f3313 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -84,6 +84,8 @@ enum { IORING_OP_CLOSE, IORING_OP_FILES_UPDATE, IORING_OP_STATX, + IORING_OP_READ, + IORING_OP_WRITE, /* this goes last, obviously */ IORING_OP_LAST, -- cgit v1.2.3 From ba04291eb66ed895f194ae5abd3748d72bf8aaea Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 25 Dec 2019 16:33:42 -0700 Subject: io_uring: allow use of offset == -1 to mean file position MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This behaves like preadv2/pwritev2 with offset == -1, it'll use (and update) the current file position. This obviously comes with the caveat that if the application has multiple read/writes in flight, then the end result will not be as expected. This is similar to threads sharing a file descriptor and doing IO using the current file position. Since this feature isn't easily detectable by doing a read or write, add a feature flags, IORING_FEAT_RW_CUR_POS, to allow applications to detect presence of this feature. Reported-by: 李通洲 Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 7fdf994f3313..1f96136eb6ee 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -174,6 +174,7 @@ struct io_uring_params { #define IORING_FEAT_SINGLE_MMAP (1U << 0) #define IORING_FEAT_NODROP (1U << 1) #define IORING_FEAT_SUBMIT_STABLE (1U << 2) +#define IORING_FEAT_RW_CUR_POS (1U << 3) /* * io_uring_register(2) opcodes and arguments -- cgit v1.2.3 From 4840e418c2fc533d55ff6caa5b9313eed1d26cfd Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 25 Dec 2019 22:03:45 -0700 Subject: io_uring: add IORING_OP_FADVISE This adds support for doing fadvise through io_uring. We assume that WILLNEED doesn't block, but that DONTNEED may block. Reviewed-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 1f96136eb6ee..f86d1c776078 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -36,6 +36,7 @@ struct io_uring_sqe { __u32 cancel_flags; __u32 open_flags; __u32 statx_flags; + __u32 fadvise_advice; }; __u64 user_data; /* data to be passed back at completion time */ union { @@ -86,6 +87,7 @@ enum { IORING_OP_STATX, IORING_OP_READ, IORING_OP_WRITE, + IORING_OP_FADVISE, /* this goes last, obviously */ IORING_OP_LAST, -- cgit v1.2.3 From c1ca757bd6f4632c510714631ddcc2d13030fe1e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 25 Dec 2019 22:18:28 -0700 Subject: io_uring: add IORING_OP_MADVISE This adds support for doing madvise(2) through io_uring. We assume that any operation can block, and hence punt everything async. This could be improved, but hard to make bullet proof. The async punt ensures it's safe. Reviewed-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index f86d1c776078..8ad3cece5440 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -88,6 +88,7 @@ enum { IORING_OP_READ, IORING_OP_WRITE, IORING_OP_FADVISE, + IORING_OP_MADVISE, /* this goes last, obviously */ IORING_OP_LAST, -- cgit v1.2.3 From 8110c1a6212e430a84edd2b83fe9043def8b743e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 28 Dec 2019 15:39:54 -0700 Subject: io_uring: add support for IORING_SETUP_CLAMP Some applications like to start small in terms of ring size, and then ramp up as needed. This is a bit tricky to do currently, since we don't advertise the max ring size. This adds IORING_SETUP_CLAMP. If set, and the values for SQ or CQ ring size exceed what we support, then clamp them at the max values instead of returning -EINVAL. Since we return the chosen ring sizes after setup, no further changes are needed on the application side. io_uring already changes the ring sizes if the application doesn't ask for power-of-two sizes, for example. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 8ad3cece5440..29fae13395a8 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -61,6 +61,7 @@ struct io_uring_sqe { #define IORING_SETUP_SQPOLL (1U << 1) /* SQ poll thread */ #define IORING_SETUP_SQ_AFF (1U << 2) /* sq_thread_cpu is valid */ #define IORING_SETUP_CQSIZE (1U << 3) /* app defines CQ size */ +#define IORING_SETUP_CLAMP (1U << 4) /* clamp SQ/CQ ring sizes */ enum { IORING_OP_NOP, -- cgit v1.2.3 From fddafacee287b3140212c92464077e971401f860 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 4 Jan 2020 20:19:44 -0700 Subject: io_uring: add support for send(2) and recv(2) This adds IORING_OP_SEND for send(2) support, and IORING_OP_RECV for recv(2) support. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 29fae13395a8..0fe270ab191c 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -90,6 +90,8 @@ enum { IORING_OP_WRITE, IORING_OP_FADVISE, IORING_OP_MADVISE, + IORING_OP_SEND, + IORING_OP_RECV, /* this goes last, obviously */ IORING_OP_LAST, -- cgit v1.2.3 From f2842ab5b72d7ee5f7f8385c2d4f32c133f5837b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 8 Jan 2020 11:04:00 -0700 Subject: io_uring: enable option to only trigger eventfd for async completions If an application is using eventfd notifications with poll to know when new SQEs can be issued, it's expecting the following read/writes to complete inline. And with that, it knows that there are events available, and don't want spurious wakeups on the eventfd for those requests. This adds IORING_REGISTER_EVENTFD_ASYNC, which works just like IORING_REGISTER_EVENTFD, except it only triggers notifications for events that happen from async completions (IRQ, or io-wq worker completions). Any completions inline from the submission itself will not trigger notifications. Suggested-by: Mark Papadakis Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 0fe270ab191c..66772a90a7f2 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -192,6 +192,7 @@ struct io_uring_params { #define IORING_REGISTER_EVENTFD 4 #define IORING_UNREGISTER_EVENTFD 5 #define IORING_REGISTER_FILES_UPDATE 6 +#define IORING_REGISTER_EVENTFD_ASYNC 7 struct io_uring_files_update { __u32 offset; -- cgit v1.2.3 From cebdb98617ae3e842c81c73758a185248b37cfd6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 8 Jan 2020 17:59:24 -0700 Subject: io_uring: add support for IORING_OP_OPENAT2 Add support for the new openat2(2) system call. It's trivial to do, as we can have openat(2) just be wrapped around it. Suggested-by: Stefan Metzmacher Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 66772a90a7f2..fea7da182851 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -92,6 +92,7 @@ enum { IORING_OP_MADVISE, IORING_OP_SEND, IORING_OP_RECV, + IORING_OP_OPENAT2, /* this goes last, obviously */ IORING_OP_LAST, -- cgit v1.2.3 From 66f4af93da5761d2fa05c0dc673a47003cdb9cfe Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 16 Jan 2020 15:36:52 -0700 Subject: io_uring: add support for probing opcodes The application currently has no way of knowing if a given opcode is supported or not without having to try and issue one and see if we get -EINVAL or not. And even this approach is fraught with peril, as maybe we're getting -EINVAL due to some fields being missing, or maybe it's just not that easy to issue that particular command without doing some other leg work in terms of setup first. This adds IORING_REGISTER_PROBE, which fills in a structure with info on what it supported or not. This will work even with sparse opcode fields, which may happen in the future or even today if someone backports specific features to older kernels. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index fea7da182851..955fd477e530 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -194,6 +194,7 @@ struct io_uring_params { #define IORING_UNREGISTER_EVENTFD 5 #define IORING_REGISTER_FILES_UPDATE 6 #define IORING_REGISTER_EVENTFD_ASYNC 7 +#define IORING_REGISTER_PROBE 8 struct io_uring_files_update { __u32 offset; @@ -201,4 +202,21 @@ struct io_uring_files_update { __aligned_u64 /* __s32 * */ fds; }; +#define IO_URING_OP_SUPPORTED (1U << 0) + +struct io_uring_probe_op { + __u8 op; + __u8 resv; + __u16 flags; /* IO_URING_OP_* flags */ + __u32 resv2; +}; + +struct io_uring_probe { + __u8 last_op; /* last opcode supported */ + __u8 ops_len; /* length of ops[] array below */ + __u16 resv; + __u32 resv2[3]; + struct io_uring_probe_op ops[0]; +}; + #endif -- cgit v1.2.3 From 6b47ee6ecab142f938a40bf3b297abac74218ee2 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 18 Jan 2020 20:22:41 +0300 Subject: io_uring: optimise sqe-to-req flags translation For each IOSQE_* flag there is a corresponding REQ_F_* flag. And there is a repetitive pattern of their translation: e.g. if (sqe->flags & SQE_FLAG*) req->flags |= REQ_F_FLAG* Use same numeric values/bits for them and copy instead of manual handling. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 955fd477e530..57d05cc5e271 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -45,14 +45,27 @@ struct io_uring_sqe { }; }; +enum { + IOSQE_FIXED_FILE_BIT, + IOSQE_IO_DRAIN_BIT, + IOSQE_IO_LINK_BIT, + IOSQE_IO_HARDLINK_BIT, + IOSQE_ASYNC_BIT, +}; + /* * sqe->flags */ -#define IOSQE_FIXED_FILE (1U << 0) /* use fixed fileset */ -#define IOSQE_IO_DRAIN (1U << 1) /* issue after inflight IO */ -#define IOSQE_IO_LINK (1U << 2) /* links next sqe */ -#define IOSQE_IO_HARDLINK (1U << 3) /* like LINK, but stronger */ -#define IOSQE_ASYNC (1U << 4) /* always go async */ +/* use fixed fileset */ +#define IOSQE_FIXED_FILE (1U << IOSQE_FIXED_FILE_BIT) +/* issue after inflight IO */ +#define IOSQE_IO_DRAIN (1U << IOSQE_IO_DRAIN_BIT) +/* links next sqe */ +#define IOSQE_IO_LINK (1U << IOSQE_IO_LINK_BIT) +/* like LINK, but stronger */ +#define IOSQE_IO_HARDLINK (1U << IOSQE_IO_HARDLINK_BIT) +/* always go async */ +#define IOSQE_ASYNC (1U << IOSQE_ASYNC_BIT) /* * io_uring_setup() flags -- cgit v1.2.3 From cccf0ee834559ae0b327b40290e14f6a2a017177 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 27 Jan 2020 16:34:48 -0700 Subject: io_uring/io-wq: don't use static creds/mm assignments We currently setup the io_wq with a static set of mm and creds. Even for a single-use io-wq per io_uring, this is suboptimal as we have may have multiple enters of the ring. For sharing the io-wq backend, it doesn't work at all. Switch to passing in the creds and mm when the work item is setup. This means that async work is no longer deferred to the io_uring mm and creds, it is done with the current mm and creds. Flag this behavior with IORING_FEAT_CUR_PERSONALITY, so applications know they can rely on the current personality (mm and creds) being the same for direct issue and async issue. Reviewed-by: Stefan Metzmacher Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 57d05cc5e271..9988e82f858b 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -195,6 +195,7 @@ struct io_uring_params { #define IORING_FEAT_NODROP (1U << 1) #define IORING_FEAT_SUBMIT_STABLE (1U << 2) #define IORING_FEAT_RW_CUR_POS (1U << 3) +#define IORING_FEAT_CUR_PERSONALITY (1U << 4) /* * io_uring_register(2) opcodes and arguments -- cgit v1.2.3 From 24369c2e3bb06d8c4e71fd6ceaf4f8a01ae79b7c Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 28 Jan 2020 03:15:48 +0300 Subject: io_uring: add io-wq workqueue sharing If IORING_SETUP_ATTACH_WQ is set, it expects wq_fd in io_uring_params to be a valid io_uring fd io-wq of which will be shared with the newly created io_uring instance. If the flag is set but it can't share io-wq, it fails. This allows creation of "sibling" io_urings, where we prefer to keep the SQ/CQ private, but want to share the async backend to minimize the amount of overhead associated with having multiple rings that belong to the same backend. Reported-by: Jens Axboe Reported-by: Daurnimator Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 9988e82f858b..e067b92af5ad 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -75,6 +75,7 @@ enum { #define IORING_SETUP_SQ_AFF (1U << 2) /* sq_thread_cpu is valid */ #define IORING_SETUP_CQSIZE (1U << 3) /* app defines CQ size */ #define IORING_SETUP_CLAMP (1U << 4) /* clamp SQ/CQ ring sizes */ +#define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */ enum { IORING_OP_NOP, @@ -183,7 +184,8 @@ struct io_uring_params { __u32 sq_thread_cpu; __u32 sq_thread_idle; __u32 features; - __u32 resv[4]; + __u32 wq_fd; + __u32 resv[3]; struct io_sqring_offsets sq_off; struct io_cqring_offsets cq_off; }; -- cgit v1.2.3 From 071698e13ac6ba786dfa22349a7b62deb5a9464d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 28 Jan 2020 10:04:42 -0700 Subject: io_uring: allow registering credentials If an application wants to use a ring with different kinds of credentials, it can register them upfront. We don't lookup credentials, the credentials of the task calling IORING_REGISTER_PERSONALITY is used. An 'id' is returned for the application to use in subsequent personality support. Reviewed-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index e067b92af5ad..b4ccf31db2d1 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -211,6 +211,8 @@ struct io_uring_params { #define IORING_REGISTER_FILES_UPDATE 6 #define IORING_REGISTER_EVENTFD_ASYNC 7 #define IORING_REGISTER_PROBE 8 +#define IORING_REGISTER_PERSONALITY 9 +#define IORING_UNREGISTER_PERSONALITY 10 struct io_uring_files_update { __u32 offset; -- cgit v1.2.3 From 75c6a03904e0dd414a4d99a3072075cb5117e5bc Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 28 Jan 2020 10:15:23 -0700 Subject: io_uring: support using a registered personality for commands For personalities previously registered via IORING_REGISTER_PERSONALITY, allow any command to select them. This is done through setting sqe->personality to the id returned from registration, and then flagging sqe->flags with IOSQE_PERSONALITY. Reviewed-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index b4ccf31db2d1..98105ff8d3e6 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -40,7 +40,12 @@ struct io_uring_sqe { }; __u64 user_data; /* data to be passed back at completion time */ union { - __u16 buf_index; /* index into fixed buffers, if used */ + struct { + /* index into fixed buffers, if used */ + __u16 buf_index; + /* personality to use, if used */ + __u16 personality; + }; __u64 __pad2[3]; }; }; -- cgit v1.2.3 From 3e4827b05d2ac2d377ed136a52829ec46787bf4b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 8 Jan 2020 15:18:09 -0700 Subject: io_uring: add support for epoll_ctl(2) This adds IORING_OP_EPOLL_CTL, which can perform the same work as the epoll_ctl(2) system call. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 98105ff8d3e6..3f7961c1c243 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -112,6 +112,7 @@ enum { IORING_OP_SEND, IORING_OP_RECV, IORING_OP_OPENAT2, + IORING_OP_EPOLL_CTL, /* this goes last, obviously */ IORING_OP_LAST, -- cgit v1.2.3