From 59915143e89fb8dc7b5bd9dcaf628d8181fd54ac Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 25 May 2022 08:57:27 -0600 Subject: io_uring: move timeout opcodes and handling into its own file Signed-off-by: Jens Axboe --- io_uring/timeout.c | 634 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 634 insertions(+) create mode 100644 io_uring/timeout.c (limited to 'io_uring/timeout.c') diff --git a/io_uring/timeout.c b/io_uring/timeout.c new file mode 100644 index 000000000000..5e42bfcd683e --- /dev/null +++ b/io_uring/timeout.c @@ -0,0 +1,634 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include + +#include + +#include + +#include "io_uring_types.h" +#include "io_uring.h" +#include "refs.h" +#include "timeout.h" + +struct io_timeout { + struct file *file; + u32 off; + u32 target_seq; + struct list_head list; + /* head of the link, used by linked timeouts only */ + struct io_kiocb *head; + /* for linked completions */ + struct io_kiocb *prev; +}; + +struct io_timeout_rem { + struct file *file; + u64 addr; + + /* timeout update */ + struct timespec64 ts; + u32 flags; + bool ltimeout; +}; + +static inline bool io_is_timeout_noseq(struct io_kiocb *req) +{ + struct io_timeout *timeout = io_kiocb_to_cmd(req); + + return !timeout->off; +} + +static inline void io_put_req(struct io_kiocb *req) +{ + if (req_ref_put_and_test(req)) { + io_queue_next(req); + io_free_req(req); + } +} + +static void io_kill_timeout(struct io_kiocb *req, int status) + __must_hold(&req->ctx->completion_lock) + __must_hold(&req->ctx->timeout_lock) +{ + struct io_timeout_data *io = req->async_data; + + if (hrtimer_try_to_cancel(&io->timer) != -1) { + struct io_timeout *timeout = io_kiocb_to_cmd(req); + + if (status) + req_set_fail(req); + atomic_set(&req->ctx->cq_timeouts, + atomic_read(&req->ctx->cq_timeouts) + 1); + list_del_init(&timeout->list); + io_req_tw_post_queue(req, status, 0); + } +} + +__cold void io_flush_timeouts(struct io_ring_ctx *ctx) + __must_hold(&ctx->completion_lock) +{ + u32 seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); + struct io_timeout *timeout, *tmp; + + spin_lock_irq(&ctx->timeout_lock); + list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) { + struct io_kiocb *req = cmd_to_io_kiocb(timeout); + u32 events_needed, events_got; + + if (io_is_timeout_noseq(req)) + break; + + /* + * Since seq can easily wrap around over time, subtract + * the last seq at which timeouts were flushed before comparing. + * Assuming not more than 2^31-1 events have happened since, + * these subtractions won't have wrapped, so we can check if + * target is in [last_seq, current_seq] by comparing the two. + */ + events_needed = timeout->target_seq - ctx->cq_last_tm_flush; + events_got = seq - ctx->cq_last_tm_flush; + if (events_got < events_needed) + break; + + io_kill_timeout(req, 0); + } + ctx->cq_last_tm_flush = seq; + spin_unlock_irq(&ctx->timeout_lock); +} + +static void io_fail_links(struct io_kiocb *req) + __must_hold(&req->ctx->completion_lock) +{ + struct io_kiocb *nxt, *link = req->link; + bool ignore_cqes = req->flags & REQ_F_SKIP_LINK_CQES; + + req->link = NULL; + while (link) { + long res = -ECANCELED; + + if (link->flags & REQ_F_FAIL) + res = link->cqe.res; + + nxt = link->link; + link->link = NULL; + + trace_io_uring_fail_link(req->ctx, req, req->cqe.user_data, + req->opcode, link); + + if (ignore_cqes) + link->flags |= REQ_F_CQE_SKIP; + else + link->flags &= ~REQ_F_CQE_SKIP; + io_req_set_res(link, res, 0); + __io_req_complete_post(link); + link = nxt; + } +} + +static inline void io_remove_next_linked(struct io_kiocb *req) +{ + struct io_kiocb *nxt = req->link; + + req->link = nxt->link; + nxt->link = NULL; +} + +bool io_disarm_next(struct io_kiocb *req) + __must_hold(&req->ctx->completion_lock) +{ + struct io_kiocb *link = NULL; + bool posted = false; + + if (req->flags & REQ_F_ARM_LTIMEOUT) { + link = req->link; + req->flags &= ~REQ_F_ARM_LTIMEOUT; + if (link && link->opcode == IORING_OP_LINK_TIMEOUT) { + io_remove_next_linked(req); + io_req_tw_post_queue(link, -ECANCELED, 0); + posted = true; + } + } else if (req->flags & REQ_F_LINK_TIMEOUT) { + struct io_ring_ctx *ctx = req->ctx; + + spin_lock_irq(&ctx->timeout_lock); + link = io_disarm_linked_timeout(req); + spin_unlock_irq(&ctx->timeout_lock); + if (link) { + posted = true; + io_req_tw_post_queue(link, -ECANCELED, 0); + } + } + if (unlikely((req->flags & REQ_F_FAIL) && + !(req->flags & REQ_F_HARDLINK))) { + posted |= (req->link != NULL); + io_fail_links(req); + } + return posted; +} + +struct io_kiocb *__io_disarm_linked_timeout(struct io_kiocb *req, + struct io_kiocb *link) + __must_hold(&req->ctx->completion_lock) + __must_hold(&req->ctx->timeout_lock) +{ + struct io_timeout_data *io = link->async_data; + struct io_timeout *timeout = io_kiocb_to_cmd(link); + + io_remove_next_linked(req); + timeout->head = NULL; + if (hrtimer_try_to_cancel(&io->timer) != -1) { + list_del(&timeout->list); + return link; + } + + return NULL; +} + +static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer) +{ + struct io_timeout_data *data = container_of(timer, + struct io_timeout_data, timer); + struct io_kiocb *req = data->req; + struct io_timeout *timeout = io_kiocb_to_cmd(req); + struct io_ring_ctx *ctx = req->ctx; + unsigned long flags; + + spin_lock_irqsave(&ctx->timeout_lock, flags); + list_del_init(&timeout->list); + atomic_set(&req->ctx->cq_timeouts, + atomic_read(&req->ctx->cq_timeouts) + 1); + spin_unlock_irqrestore(&ctx->timeout_lock, flags); + + if (!(data->flags & IORING_TIMEOUT_ETIME_SUCCESS)) + req_set_fail(req); + + io_req_set_res(req, -ETIME, 0); + req->io_task_work.func = io_req_task_complete; + io_req_task_work_add(req); + return HRTIMER_NORESTART; +} + +static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx, + struct io_cancel_data *cd) + __must_hold(&ctx->timeout_lock) +{ + struct io_timeout *timeout; + struct io_timeout_data *io; + struct io_kiocb *req = NULL; + + list_for_each_entry(timeout, &ctx->timeout_list, list) { + struct io_kiocb *tmp = cmd_to_io_kiocb(timeout); + + if (!(cd->flags & IORING_ASYNC_CANCEL_ANY) && + cd->data != tmp->cqe.user_data) + continue; + if (cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY)) { + if (cd->seq == tmp->work.cancel_seq) + continue; + tmp->work.cancel_seq = cd->seq; + } + req = tmp; + break; + } + if (!req) + return ERR_PTR(-ENOENT); + + io = req->async_data; + if (hrtimer_try_to_cancel(&io->timer) == -1) + return ERR_PTR(-EALREADY); + timeout = io_kiocb_to_cmd(req); + list_del_init(&timeout->list); + return req; +} + +int io_timeout_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd) + __must_hold(&ctx->completion_lock) +{ + struct io_kiocb *req; + + spin_lock_irq(&ctx->timeout_lock); + req = io_timeout_extract(ctx, cd); + spin_unlock_irq(&ctx->timeout_lock); + + if (IS_ERR(req)) + return PTR_ERR(req); + io_req_task_queue_fail(req, -ECANCELED); + return 0; +} + +static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked) +{ + struct io_timeout *timeout = io_kiocb_to_cmd(req); + struct io_kiocb *prev = timeout->prev; + int ret = -ENOENT; + + if (prev) { + if (!(req->task->flags & PF_EXITING)) { + struct io_cancel_data cd = { + .ctx = req->ctx, + .data = prev->cqe.user_data, + }; + + ret = io_try_cancel(req, &cd); + } + io_req_set_res(req, ret ?: -ETIME, 0); + io_req_complete_post(req); + io_put_req(prev); + } else { + io_req_set_res(req, -ETIME, 0); + io_req_complete_post(req); + } +} + +static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer) +{ + struct io_timeout_data *data = container_of(timer, + struct io_timeout_data, timer); + struct io_kiocb *prev, *req = data->req; + struct io_timeout *timeout = io_kiocb_to_cmd(req); + struct io_ring_ctx *ctx = req->ctx; + unsigned long flags; + + spin_lock_irqsave(&ctx->timeout_lock, flags); + prev = timeout->head; + timeout->head = NULL; + + /* + * We don't expect the list to be empty, that will only happen if we + * race with the completion of the linked work. + */ + if (prev) { + io_remove_next_linked(prev); + if (!req_ref_inc_not_zero(prev)) + prev = NULL; + } + list_del(&timeout->list); + timeout->prev = prev; + spin_unlock_irqrestore(&ctx->timeout_lock, flags); + + req->io_task_work.func = io_req_task_link_timeout; + io_req_task_work_add(req); + return HRTIMER_NORESTART; +} + +static clockid_t io_timeout_get_clock(struct io_timeout_data *data) +{ + switch (data->flags & IORING_TIMEOUT_CLOCK_MASK) { + case IORING_TIMEOUT_BOOTTIME: + return CLOCK_BOOTTIME; + case IORING_TIMEOUT_REALTIME: + return CLOCK_REALTIME; + default: + /* can't happen, vetted at prep time */ + WARN_ON_ONCE(1); + fallthrough; + case 0: + return CLOCK_MONOTONIC; + } +} + +static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, + struct timespec64 *ts, enum hrtimer_mode mode) + __must_hold(&ctx->timeout_lock) +{ + struct io_timeout_data *io; + struct io_timeout *timeout; + struct io_kiocb *req = NULL; + + list_for_each_entry(timeout, &ctx->ltimeout_list, list) { + struct io_kiocb *tmp = cmd_to_io_kiocb(timeout); + + if (user_data == tmp->cqe.user_data) { + req = tmp; + break; + } + } + if (!req) + return -ENOENT; + + io = req->async_data; + if (hrtimer_try_to_cancel(&io->timer) == -1) + return -EALREADY; + hrtimer_init(&io->timer, io_timeout_get_clock(io), mode); + io->timer.function = io_link_timeout_fn; + hrtimer_start(&io->timer, timespec64_to_ktime(*ts), mode); + return 0; +} + +static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, + struct timespec64 *ts, enum hrtimer_mode mode) + __must_hold(&ctx->timeout_lock) +{ + struct io_cancel_data cd = { .data = user_data, }; + struct io_kiocb *req = io_timeout_extract(ctx, &cd); + struct io_timeout *timeout = io_kiocb_to_cmd(req); + struct io_timeout_data *data; + + if (IS_ERR(req)) + return PTR_ERR(req); + + timeout->off = 0; /* noseq */ + data = req->async_data; + list_add_tail(&timeout->list, &ctx->timeout_list); + hrtimer_init(&data->timer, io_timeout_get_clock(data), mode); + data->timer.function = io_timeout_fn; + hrtimer_start(&data->timer, timespec64_to_ktime(*ts), mode); + return 0; +} + +int io_timeout_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + struct io_timeout_rem *tr = io_kiocb_to_cmd(req); + + if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) + return -EINVAL; + if (sqe->buf_index || sqe->len || sqe->splice_fd_in) + return -EINVAL; + + tr->ltimeout = false; + tr->addr = READ_ONCE(sqe->addr); + tr->flags = READ_ONCE(sqe->timeout_flags); + if (tr->flags & IORING_TIMEOUT_UPDATE_MASK) { + if (hweight32(tr->flags & IORING_TIMEOUT_CLOCK_MASK) > 1) + return -EINVAL; + if (tr->flags & IORING_LINK_TIMEOUT_UPDATE) + tr->ltimeout = true; + if (tr->flags & ~(IORING_TIMEOUT_UPDATE_MASK|IORING_TIMEOUT_ABS)) + return -EINVAL; + if (get_timespec64(&tr->ts, u64_to_user_ptr(sqe->addr2))) + return -EFAULT; + if (tr->ts.tv_sec < 0 || tr->ts.tv_nsec < 0) + return -EINVAL; + } else if (tr->flags) { + /* timeout removal doesn't support flags */ + return -EINVAL; + } + + return 0; +} + +static inline enum hrtimer_mode io_translate_timeout_mode(unsigned int flags) +{ + return (flags & IORING_TIMEOUT_ABS) ? HRTIMER_MODE_ABS + : HRTIMER_MODE_REL; +} + +/* + * Remove or update an existing timeout command + */ +int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_timeout_rem *tr = io_kiocb_to_cmd(req); + struct io_ring_ctx *ctx = req->ctx; + int ret; + + if (!(tr->flags & IORING_TIMEOUT_UPDATE)) { + struct io_cancel_data cd = { .data = tr->addr, }; + + spin_lock(&ctx->completion_lock); + ret = io_timeout_cancel(ctx, &cd); + spin_unlock(&ctx->completion_lock); + } else { + enum hrtimer_mode mode = io_translate_timeout_mode(tr->flags); + + spin_lock_irq(&ctx->timeout_lock); + if (tr->ltimeout) + ret = io_linked_timeout_update(ctx, tr->addr, &tr->ts, mode); + else + ret = io_timeout_update(ctx, tr->addr, &tr->ts, mode); + spin_unlock_irq(&ctx->timeout_lock); + } + + if (ret < 0) + req_set_fail(req); + io_req_set_res(req, ret, 0); + return IOU_OK; +} + +static int __io_timeout_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe, + bool is_timeout_link) +{ + struct io_timeout *timeout = io_kiocb_to_cmd(req); + struct io_timeout_data *data; + unsigned flags; + u32 off = READ_ONCE(sqe->off); + + if (sqe->buf_index || sqe->len != 1 || sqe->splice_fd_in) + return -EINVAL; + if (off && is_timeout_link) + return -EINVAL; + flags = READ_ONCE(sqe->timeout_flags); + if (flags & ~(IORING_TIMEOUT_ABS | IORING_TIMEOUT_CLOCK_MASK | + IORING_TIMEOUT_ETIME_SUCCESS)) + return -EINVAL; + /* more than one clock specified is invalid, obviously */ + if (hweight32(flags & IORING_TIMEOUT_CLOCK_MASK) > 1) + return -EINVAL; + + INIT_LIST_HEAD(&timeout->list); + timeout->off = off; + if (unlikely(off && !req->ctx->off_timeout_used)) + req->ctx->off_timeout_used = true; + + if (WARN_ON_ONCE(req_has_async_data(req))) + return -EFAULT; + if (io_alloc_async_data(req)) + return -ENOMEM; + + data = req->async_data; + data->req = req; + data->flags = flags; + + if (get_timespec64(&data->ts, u64_to_user_ptr(sqe->addr))) + return -EFAULT; + + if (data->ts.tv_sec < 0 || data->ts.tv_nsec < 0) + return -EINVAL; + + INIT_LIST_HEAD(&timeout->list); + data->mode = io_translate_timeout_mode(flags); + hrtimer_init(&data->timer, io_timeout_get_clock(data), data->mode); + + if (is_timeout_link) { + struct io_submit_link *link = &req->ctx->submit_state.link; + + if (!link->head) + return -EINVAL; + if (link->last->opcode == IORING_OP_LINK_TIMEOUT) + return -EINVAL; + timeout->head = link->last; + link->last->flags |= REQ_F_ARM_LTIMEOUT; + } + return 0; +} + +int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + return __io_timeout_prep(req, sqe, false); +} + +int io_link_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + return __io_timeout_prep(req, sqe, true); +} + +int io_timeout(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_timeout *timeout = io_kiocb_to_cmd(req); + struct io_ring_ctx *ctx = req->ctx; + struct io_timeout_data *data = req->async_data; + struct list_head *entry; + u32 tail, off = timeout->off; + + spin_lock_irq(&ctx->timeout_lock); + + /* + * sqe->off holds how many events that need to occur for this + * timeout event to be satisfied. If it isn't set, then this is + * a pure timeout request, sequence isn't used. + */ + if (io_is_timeout_noseq(req)) { + entry = ctx->timeout_list.prev; + goto add; + } + + tail = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); + timeout->target_seq = tail + off; + + /* Update the last seq here in case io_flush_timeouts() hasn't. + * This is safe because ->completion_lock is held, and submissions + * and completions are never mixed in the same ->completion_lock section. + */ + ctx->cq_last_tm_flush = tail; + + /* + * Insertion sort, ensuring the first entry in the list is always + * the one we need first. + */ + list_for_each_prev(entry, &ctx->timeout_list) { + struct io_timeout *nextt = list_entry(entry, struct io_timeout, list); + struct io_kiocb *nxt = cmd_to_io_kiocb(nextt); + + if (io_is_timeout_noseq(nxt)) + continue; + /* nxt.seq is behind @tail, otherwise would've been completed */ + if (off >= nextt->target_seq - tail) + break; + } +add: + list_add(&timeout->list, entry); + data->timer.function = io_timeout_fn; + hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode); + spin_unlock_irq(&ctx->timeout_lock); + return IOU_ISSUE_SKIP_COMPLETE; +} + +void io_queue_linked_timeout(struct io_kiocb *req) +{ + struct io_timeout *timeout = io_kiocb_to_cmd(req); + struct io_ring_ctx *ctx = req->ctx; + + spin_lock_irq(&ctx->timeout_lock); + /* + * If the back reference is NULL, then our linked request finished + * before we got a chance to setup the timer + */ + if (timeout->head) { + struct io_timeout_data *data = req->async_data; + + data->timer.function = io_link_timeout_fn; + hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), + data->mode); + list_add_tail(&timeout->list, &ctx->ltimeout_list); + } + spin_unlock_irq(&ctx->timeout_lock); + /* drop submission reference */ + io_put_req(req); +} + +static bool io_match_task(struct io_kiocb *head, struct task_struct *task, + bool cancel_all) + __must_hold(&req->ctx->timeout_lock) +{ + struct io_kiocb *req; + + if (task && head->task != task) + return false; + if (cancel_all) + return true; + + io_for_each_link(req, head) { + if (req->flags & REQ_F_INFLIGHT) + return true; + } + return false; +} + +/* Returns true if we found and killed one or more timeouts */ +__cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk, + bool cancel_all) +{ + struct io_timeout *timeout, *tmp; + int canceled = 0; + + spin_lock(&ctx->completion_lock); + spin_lock_irq(&ctx->timeout_lock); + list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) { + struct io_kiocb *req = cmd_to_io_kiocb(timeout); + + if (io_match_task(req, tsk, cancel_all)) { + io_kill_timeout(req, -ECANCELED); + canceled++; + } + } + spin_unlock_irq(&ctx->timeout_lock); + io_commit_cqring(ctx); + spin_unlock(&ctx->completion_lock); + if (canceled != 0) + io_cqring_ev_posted(ctx); + return canceled != 0; +} -- cgit v1.2.3 From 7aaff708a768144ec6459f0a58301be1a6b982fc Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 25 May 2022 20:36:47 -0600 Subject: io_uring: move cancelation into its own file This also helps cleanup the io_uring.h cancel parts, as we can make things static in the cancel.c file, mostly. Signed-off-by: Jens Axboe --- io_uring/timeout.c | 1 + 1 file changed, 1 insertion(+) (limited to 'io_uring/timeout.c') diff --git a/io_uring/timeout.c b/io_uring/timeout.c index 5e42bfcd683e..69cca42d6835 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -11,6 +11,7 @@ #include "io_uring_types.h" #include "io_uring.h" #include "refs.h" +#include "cancel.h" #include "timeout.h" struct io_timeout { -- cgit v1.2.3 From 5d7943d99df9326c7b02f773b2d6f09709c30594 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 16 Jun 2022 10:22:11 +0100 Subject: io_uring: propagate locking state to poll cancel Poll cancellation will be soon need to grab ->uring_lock inside, pass the locking state, i.e. issue_flags, inside the cancellation functions. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/b86781d047727c07163443b57551a3fa57c7c5e1.1655371007.git.asml.silence@gmail.com Reviewed-by: Hao Xu Signed-off-by: Jens Axboe --- io_uring/timeout.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'io_uring/timeout.c') diff --git a/io_uring/timeout.c b/io_uring/timeout.c index 69cca42d6835..526fc8b2e3b6 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -262,6 +262,7 @@ int io_timeout_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd) static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked) { + unsigned issue_flags = *locked ? 0 : IO_URING_F_UNLOCKED; struct io_timeout *timeout = io_kiocb_to_cmd(req); struct io_kiocb *prev = timeout->prev; int ret = -ENOENT; @@ -273,7 +274,7 @@ static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked) .data = prev->cqe.user_data, }; - ret = io_try_cancel(req, &cd); + ret = io_try_cancel(req, &cd, issue_flags); } io_req_set_res(req, ret ?: -ETIME, 0); io_req_complete_post(req); -- cgit v1.2.3 From 27a9d66fec77cff0e32d2ecd5d0ac7ef878a7bb0 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 16 Jun 2022 13:57:18 +0100 Subject: io_uring: kill extra io_uring_types.h includes io_uring/io_uring.h already includes io_uring_types.h, no need to include it every time. Kill it in a bunch of places, it prepares us for following patches. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/94d8c943fbe0ef949981c508ddcee7fc1c18850f.1655384063.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/timeout.c | 1 - 1 file changed, 1 deletion(-) (limited to 'io_uring/timeout.c') diff --git a/io_uring/timeout.c b/io_uring/timeout.c index 526fc8b2e3b6..f9df359813c9 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -8,7 +8,6 @@ #include -#include "io_uring_types.h" #include "io_uring.h" #include "refs.h" #include "cancel.h" -- cgit v1.2.3 From 48863ffd3e81b6ec98606d3479c50aa77b7a98a9 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 16 Jun 2022 13:57:20 +0100 Subject: io_uring: clean up tracing events We have lots of trace events accepting an io_uring request and wanting to print some of its fields like user_data, opcode, flags and so on. However, as trace points were unaware of io_uring structures, we had to pass all the fields as arguments. Teach trace/events/io_uring.h about struct io_kiocb and stop the misery of passing a horde of arguments to trace helpers. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/40ff72f92798114e56d400f2b003beb6cde6ef53.1655384063.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/timeout.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'io_uring/timeout.c') diff --git a/io_uring/timeout.c b/io_uring/timeout.c index f9df359813c9..557c637af158 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -115,8 +115,7 @@ static void io_fail_links(struct io_kiocb *req) nxt = link->link; link->link = NULL; - trace_io_uring_fail_link(req->ctx, req, req->cqe.user_data, - req->opcode, link); + trace_io_uring_fail_link(req, link); if (ignore_cqes) link->flags |= REQ_F_CQE_SKIP; -- cgit v1.2.3 From ba3cdb6fbb6e8eb525c868c60e103c5711edc068 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 20 Jun 2022 01:25:53 +0100 Subject: io_uring: improve task exit timeout cancellations Don't spin trying to cancel timeouts that are reachable but not cancellable, e.g. already executing. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/ab8a7440a60bbdf69ae514f672ad050e43dd1b03.1655684496.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/timeout.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'io_uring/timeout.c') diff --git a/io_uring/timeout.c b/io_uring/timeout.c index 557c637af158..a79a7d6ef1b3 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -49,7 +49,7 @@ static inline void io_put_req(struct io_kiocb *req) } } -static void io_kill_timeout(struct io_kiocb *req, int status) +static bool io_kill_timeout(struct io_kiocb *req, int status) __must_hold(&req->ctx->completion_lock) __must_hold(&req->ctx->timeout_lock) { @@ -64,7 +64,9 @@ static void io_kill_timeout(struct io_kiocb *req, int status) atomic_read(&req->ctx->cq_timeouts) + 1); list_del_init(&timeout->list); io_req_tw_post_queue(req, status, 0); + return true; } + return false; } __cold void io_flush_timeouts(struct io_ring_ctx *ctx) @@ -620,10 +622,9 @@ __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk, list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) { struct io_kiocb *req = cmd_to_io_kiocb(timeout); - if (io_match_task(req, tsk, cancel_all)) { - io_kill_timeout(req, -ECANCELED); + if (io_match_task(req, tsk, cancel_all) && + io_kill_timeout(req, -ECANCELED)) canceled++; - } } spin_unlock_irq(&ctx->timeout_lock); io_commit_cqring(ctx); -- cgit v1.2.3 From 305bef98870816ae58357d647521891ec558a92e Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 20 Jun 2022 01:25:55 +0100 Subject: io_uring: hide eventfd assumptions in eventfd paths Some io_uring-eventfd users assume that there won't be spurious wakeups. That assumption has to be honoured by all io_cqring_ev_posted() callers, which is inconvenient and from time to time leads to problems but should be maintained to not break the userspace. Instead of making the callers track whether a CQE was posted or not, hide it inside io_eventfd_signal(). It saves ->cached_cq_tail it saw last time and triggers the eventfd only when ->cached_cq_tail changed since then. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/0ffc66bae37a2513080b601e4370e147faaa72c5.1655684496.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/timeout.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'io_uring/timeout.c') diff --git a/io_uring/timeout.c b/io_uring/timeout.c index a79a7d6ef1b3..424b2fc858b8 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -629,7 +629,6 @@ __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk, spin_unlock_irq(&ctx->timeout_lock); io_commit_cqring(ctx); spin_unlock(&ctx->completion_lock); - if (canceled != 0) - io_cqring_ev_posted(ctx); + io_cqring_ev_posted(ctx); return canceled != 0; } -- cgit v1.2.3 From 253993210bd8aa3b39a392807c03c8ef1cd7dc3d Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 20 Jun 2022 01:25:56 +0100 Subject: io_uring: introduce locking helpers for CQE posting spin_lock(&ctx->completion_lock); /* post CQEs */ io_commit_cqring(ctx); spin_unlock(&ctx->completion_lock); io_cqring_ev_posted(ctx); We have many places repeating this sequence, and the three function unlock section is not perfect from the maintainance perspective and also makes it harder to add new locking/sync trick. Introduce two helpers. io_cq_lock(), which is simple and only grabs ->completion_lock, and io_cq_unlock_post() encapsulating the three call section. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/fe0c682bf7f7b55d9be55b0d034be9c1949277dc.1655684496.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/timeout.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'io_uring/timeout.c') diff --git a/io_uring/timeout.c b/io_uring/timeout.c index 424b2fc858b8..7e2c341f9762 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -617,7 +617,7 @@ __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk, struct io_timeout *timeout, *tmp; int canceled = 0; - spin_lock(&ctx->completion_lock); + io_cq_lock(ctx); spin_lock_irq(&ctx->timeout_lock); list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) { struct io_kiocb *req = cmd_to_io_kiocb(timeout); @@ -627,8 +627,6 @@ __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk, canceled++; } spin_unlock_irq(&ctx->timeout_lock); - io_commit_cqring(ctx); - spin_unlock(&ctx->completion_lock); - io_cqring_ev_posted(ctx); + io_cq_unlock_post(ctx); return canceled != 0; } -- cgit v1.2.3 From 88f52eaad2df2cb5ab49b864d79398c9cb9a57f2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 18 Jun 2022 09:23:54 -0600 Subject: io_uring: have cancelation API accept io_uring_task directly We just use the io_kiocb passed in to find the io_uring_task, and we already pass in the ctx via cd->ctx anyway. Signed-off-by: Jens Axboe --- io_uring/timeout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'io_uring/timeout.c') diff --git a/io_uring/timeout.c b/io_uring/timeout.c index 7e2c341f9762..4af074b8f6b7 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -274,7 +274,7 @@ static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked) .data = prev->cqe.user_data, }; - ret = io_try_cancel(req, &cd, issue_flags); + ret = io_try_cancel(req->task->io_uring, &cd, issue_flags); } io_req_set_res(req, ret ?: -ETIME, 0); io_req_complete_post(req); -- cgit v1.2.3 From 37c7bd31b3e9e4b6aee3c5227f789c0b586a33a2 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 25 Jun 2022 11:52:58 +0100 Subject: io_uring: improve io_fail_links() io_fail_links() is called with ->completion_lock held and for that reason we'd want to keep it as small as we can. Instead of doing __io_req_complete_post() for each linked request under the lock, fail them in a task_work handler under ->uring_lock. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/a2f68708b970a21f4e84ddfa7b3abd67a8fffb27.1656153285.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/timeout.c | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) (limited to 'io_uring/timeout.c') diff --git a/io_uring/timeout.c b/io_uring/timeout.c index 4af074b8f6b7..2f9e56935479 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -101,32 +101,44 @@ __cold void io_flush_timeouts(struct io_ring_ctx *ctx) spin_unlock_irq(&ctx->timeout_lock); } -static void io_fail_links(struct io_kiocb *req) - __must_hold(&req->ctx->completion_lock) +static void io_req_tw_fail_links(struct io_kiocb *link, bool *locked) { - struct io_kiocb *nxt, *link = req->link; - bool ignore_cqes = req->flags & REQ_F_SKIP_LINK_CQES; - - req->link = NULL; + io_tw_lock(link->ctx, locked); while (link) { + struct io_kiocb *nxt = link->link; long res = -ECANCELED; if (link->flags & REQ_F_FAIL) res = link->cqe.res; - - nxt = link->link; link->link = NULL; + io_req_set_res(link, res, 0); + io_req_task_complete(link, locked); + link = nxt; + } +} - trace_io_uring_fail_link(req, link); +static void io_fail_links(struct io_kiocb *req) + __must_hold(&req->ctx->completion_lock) +{ + struct io_kiocb *link = req->link; + bool ignore_cqes = req->flags & REQ_F_SKIP_LINK_CQES; + + if (!link) + return; + while (link) { if (ignore_cqes) link->flags |= REQ_F_CQE_SKIP; else link->flags &= ~REQ_F_CQE_SKIP; - io_req_set_res(link, res, 0); - __io_req_complete_post(link); - link = nxt; + trace_io_uring_fail_link(req, link); + link = link->link; } + + link = req->link; + link->io_task_work.func = io_req_tw_fail_links; + io_req_task_work_add(link); + req->link = NULL; } static inline void io_remove_next_linked(struct io_kiocb *req) -- cgit v1.2.3