From 59915143e89fb8dc7b5bd9dcaf628d8181fd54ac Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 25 May 2022 08:57:27 -0600
Subject: io_uring: move timeout opcodes and handling into its own file

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/timeout.c | 634 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 634 insertions(+)
 create mode 100644 io_uring/timeout.c

(limited to 'io_uring/timeout.c')

diff --git a/io_uring/timeout.c b/io_uring/timeout.c
new file mode 100644
index 000000000000..5e42bfcd683e
--- /dev/null
+++ b/io_uring/timeout.c
@@ -0,0 +1,634 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/file.h>
+#include <linux/io_uring.h>
+
+#include <trace/events/io_uring.h>
+
+#include <uapi/linux/io_uring.h>
+
+#include "io_uring_types.h"
+#include "io_uring.h"
+#include "refs.h"
+#include "timeout.h"
+
+struct io_timeout {
+	struct file			*file;
+	u32				off;
+	u32				target_seq;
+	struct list_head		list;
+	/* head of the link, used by linked timeouts only */
+	struct io_kiocb			*head;
+	/* for linked completions */
+	struct io_kiocb			*prev;
+};
+
+struct io_timeout_rem {
+	struct file			*file;
+	u64				addr;
+
+	/* timeout update */
+	struct timespec64		ts;
+	u32				flags;
+	bool				ltimeout;
+};
+
+static inline bool io_is_timeout_noseq(struct io_kiocb *req)
+{
+	struct io_timeout *timeout = io_kiocb_to_cmd(req);
+
+	return !timeout->off;
+}
+
+static inline void io_put_req(struct io_kiocb *req)
+{
+	if (req_ref_put_and_test(req)) {
+		io_queue_next(req);
+		io_free_req(req);
+	}
+}
+
+static void io_kill_timeout(struct io_kiocb *req, int status)
+	__must_hold(&req->ctx->completion_lock)
+	__must_hold(&req->ctx->timeout_lock)
+{
+	struct io_timeout_data *io = req->async_data;
+
+	if (hrtimer_try_to_cancel(&io->timer) != -1) {
+		struct io_timeout *timeout = io_kiocb_to_cmd(req);
+
+		if (status)
+			req_set_fail(req);
+		atomic_set(&req->ctx->cq_timeouts,
+			atomic_read(&req->ctx->cq_timeouts) + 1);
+		list_del_init(&timeout->list);
+		io_req_tw_post_queue(req, status, 0);
+	}
+}
+
+__cold void io_flush_timeouts(struct io_ring_ctx *ctx)
+	__must_hold(&ctx->completion_lock)
+{
+	u32 seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
+	struct io_timeout *timeout, *tmp;
+
+	spin_lock_irq(&ctx->timeout_lock);
+	list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) {
+		struct io_kiocb *req = cmd_to_io_kiocb(timeout);
+		u32 events_needed, events_got;
+
+		if (io_is_timeout_noseq(req))
+			break;
+
+		/*
+		 * Since seq can easily wrap around over time, subtract
+		 * the last seq at which timeouts were flushed before comparing.
+		 * Assuming not more than 2^31-1 events have happened since,
+		 * these subtractions won't have wrapped, so we can check if
+		 * target is in [last_seq, current_seq] by comparing the two.
+		 */
+		events_needed = timeout->target_seq - ctx->cq_last_tm_flush;
+		events_got = seq - ctx->cq_last_tm_flush;
+		if (events_got < events_needed)
+			break;
+
+		io_kill_timeout(req, 0);
+	}
+	ctx->cq_last_tm_flush = seq;
+	spin_unlock_irq(&ctx->timeout_lock);
+}
+
+static void io_fail_links(struct io_kiocb *req)
+	__must_hold(&req->ctx->completion_lock)
+{
+	struct io_kiocb *nxt, *link = req->link;
+	bool ignore_cqes = req->flags & REQ_F_SKIP_LINK_CQES;
+
+	req->link = NULL;
+	while (link) {
+		long res = -ECANCELED;
+
+		if (link->flags & REQ_F_FAIL)
+			res = link->cqe.res;
+
+		nxt = link->link;
+		link->link = NULL;
+
+		trace_io_uring_fail_link(req->ctx, req, req->cqe.user_data,
+					req->opcode, link);
+
+		if (ignore_cqes)
+			link->flags |= REQ_F_CQE_SKIP;
+		else
+			link->flags &= ~REQ_F_CQE_SKIP;
+		io_req_set_res(link, res, 0);
+		__io_req_complete_post(link);
+		link = nxt;
+	}
+}
+
+static inline void io_remove_next_linked(struct io_kiocb *req)
+{
+	struct io_kiocb *nxt = req->link;
+
+	req->link = nxt->link;
+	nxt->link = NULL;
+}
+
+bool io_disarm_next(struct io_kiocb *req)
+	__must_hold(&req->ctx->completion_lock)
+{
+	struct io_kiocb *link = NULL;
+	bool posted = false;
+
+	if (req->flags & REQ_F_ARM_LTIMEOUT) {
+		link = req->link;
+		req->flags &= ~REQ_F_ARM_LTIMEOUT;
+		if (link && link->opcode == IORING_OP_LINK_TIMEOUT) {
+			io_remove_next_linked(req);
+			io_req_tw_post_queue(link, -ECANCELED, 0);
+			posted = true;
+		}
+	} else if (req->flags & REQ_F_LINK_TIMEOUT) {
+		struct io_ring_ctx *ctx = req->ctx;
+
+		spin_lock_irq(&ctx->timeout_lock);
+		link = io_disarm_linked_timeout(req);
+		spin_unlock_irq(&ctx->timeout_lock);
+		if (link) {
+			posted = true;
+			io_req_tw_post_queue(link, -ECANCELED, 0);
+		}
+	}
+	if (unlikely((req->flags & REQ_F_FAIL) &&
+		     !(req->flags & REQ_F_HARDLINK))) {
+		posted |= (req->link != NULL);
+		io_fail_links(req);
+	}
+	return posted;
+}
+
+struct io_kiocb *__io_disarm_linked_timeout(struct io_kiocb *req,
+					    struct io_kiocb *link)
+	__must_hold(&req->ctx->completion_lock)
+	__must_hold(&req->ctx->timeout_lock)
+{
+	struct io_timeout_data *io = link->async_data;
+	struct io_timeout *timeout = io_kiocb_to_cmd(link);
+
+	io_remove_next_linked(req);
+	timeout->head = NULL;
+	if (hrtimer_try_to_cancel(&io->timer) != -1) {
+		list_del(&timeout->list);
+		return link;
+	}
+
+	return NULL;
+}
+
+static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
+{
+	struct io_timeout_data *data = container_of(timer,
+						struct io_timeout_data, timer);
+	struct io_kiocb *req = data->req;
+	struct io_timeout *timeout = io_kiocb_to_cmd(req);
+	struct io_ring_ctx *ctx = req->ctx;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ctx->timeout_lock, flags);
+	list_del_init(&timeout->list);
+	atomic_set(&req->ctx->cq_timeouts,
+		atomic_read(&req->ctx->cq_timeouts) + 1);
+	spin_unlock_irqrestore(&ctx->timeout_lock, flags);
+
+	if (!(data->flags & IORING_TIMEOUT_ETIME_SUCCESS))
+		req_set_fail(req);
+
+	io_req_set_res(req, -ETIME, 0);
+	req->io_task_work.func = io_req_task_complete;
+	io_req_task_work_add(req);
+	return HRTIMER_NORESTART;
+}
+
+static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx,
+					   struct io_cancel_data *cd)
+	__must_hold(&ctx->timeout_lock)
+{
+	struct io_timeout *timeout;
+	struct io_timeout_data *io;
+	struct io_kiocb *req = NULL;
+
+	list_for_each_entry(timeout, &ctx->timeout_list, list) {
+		struct io_kiocb *tmp = cmd_to_io_kiocb(timeout);
+
+		if (!(cd->flags & IORING_ASYNC_CANCEL_ANY) &&
+		    cd->data != tmp->cqe.user_data)
+			continue;
+		if (cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY)) {
+			if (cd->seq == tmp->work.cancel_seq)
+				continue;
+			tmp->work.cancel_seq = cd->seq;
+		}
+		req = tmp;
+		break;
+	}
+	if (!req)
+		return ERR_PTR(-ENOENT);
+
+	io = req->async_data;
+	if (hrtimer_try_to_cancel(&io->timer) == -1)
+		return ERR_PTR(-EALREADY);
+	timeout = io_kiocb_to_cmd(req);
+	list_del_init(&timeout->list);
+	return req;
+}
+
+int io_timeout_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd)
+	__must_hold(&ctx->completion_lock)
+{
+	struct io_kiocb *req;
+
+	spin_lock_irq(&ctx->timeout_lock);
+	req = io_timeout_extract(ctx, cd);
+	spin_unlock_irq(&ctx->timeout_lock);
+
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+	io_req_task_queue_fail(req, -ECANCELED);
+	return 0;
+}
+
+static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked)
+{
+	struct io_timeout *timeout = io_kiocb_to_cmd(req);
+	struct io_kiocb *prev = timeout->prev;
+	int ret = -ENOENT;
+
+	if (prev) {
+		if (!(req->task->flags & PF_EXITING)) {
+			struct io_cancel_data cd = {
+				.ctx		= req->ctx,
+				.data		= prev->cqe.user_data,
+			};
+
+			ret = io_try_cancel(req, &cd);
+		}
+		io_req_set_res(req, ret ?: -ETIME, 0);
+		io_req_complete_post(req);
+		io_put_req(prev);
+	} else {
+		io_req_set_res(req, -ETIME, 0);
+		io_req_complete_post(req);
+	}
+}
+
+static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
+{
+	struct io_timeout_data *data = container_of(timer,
+						struct io_timeout_data, timer);
+	struct io_kiocb *prev, *req = data->req;
+	struct io_timeout *timeout = io_kiocb_to_cmd(req);
+	struct io_ring_ctx *ctx = req->ctx;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ctx->timeout_lock, flags);
+	prev = timeout->head;
+	timeout->head = NULL;
+
+	/*
+	 * We don't expect the list to be empty, that will only happen if we
+	 * race with the completion of the linked work.
+	 */
+	if (prev) {
+		io_remove_next_linked(prev);
+		if (!req_ref_inc_not_zero(prev))
+			prev = NULL;
+	}
+	list_del(&timeout->list);
+	timeout->prev = prev;
+	spin_unlock_irqrestore(&ctx->timeout_lock, flags);
+
+	req->io_task_work.func = io_req_task_link_timeout;
+	io_req_task_work_add(req);
+	return HRTIMER_NORESTART;
+}
+
+static clockid_t io_timeout_get_clock(struct io_timeout_data *data)
+{
+	switch (data->flags & IORING_TIMEOUT_CLOCK_MASK) {
+	case IORING_TIMEOUT_BOOTTIME:
+		return CLOCK_BOOTTIME;
+	case IORING_TIMEOUT_REALTIME:
+		return CLOCK_REALTIME;
+	default:
+		/* can't happen, vetted at prep time */
+		WARN_ON_ONCE(1);
+		fallthrough;
+	case 0:
+		return CLOCK_MONOTONIC;
+	}
+}
+
+static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
+				    struct timespec64 *ts, enum hrtimer_mode mode)
+	__must_hold(&ctx->timeout_lock)
+{
+	struct io_timeout_data *io;
+	struct io_timeout *timeout;
+	struct io_kiocb *req = NULL;
+
+	list_for_each_entry(timeout, &ctx->ltimeout_list, list) {
+		struct io_kiocb *tmp = cmd_to_io_kiocb(timeout);
+
+		if (user_data == tmp->cqe.user_data) {
+			req = tmp;
+			break;
+		}
+	}
+	if (!req)
+		return -ENOENT;
+
+	io = req->async_data;
+	if (hrtimer_try_to_cancel(&io->timer) == -1)
+		return -EALREADY;
+	hrtimer_init(&io->timer, io_timeout_get_clock(io), mode);
+	io->timer.function = io_link_timeout_fn;
+	hrtimer_start(&io->timer, timespec64_to_ktime(*ts), mode);
+	return 0;
+}
+
+static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
+			     struct timespec64 *ts, enum hrtimer_mode mode)
+	__must_hold(&ctx->timeout_lock)
+{
+	struct io_cancel_data cd = { .data = user_data, };
+	struct io_kiocb *req = io_timeout_extract(ctx, &cd);
+	struct io_timeout *timeout = io_kiocb_to_cmd(req);
+	struct io_timeout_data *data;
+
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	timeout->off = 0; /* noseq */
+	data = req->async_data;
+	list_add_tail(&timeout->list, &ctx->timeout_list);
+	hrtimer_init(&data->timer, io_timeout_get_clock(data), mode);
+	data->timer.function = io_timeout_fn;
+	hrtimer_start(&data->timer, timespec64_to_ktime(*ts), mode);
+	return 0;
+}
+
+int io_timeout_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+	struct io_timeout_rem *tr = io_kiocb_to_cmd(req);
+
+	if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
+		return -EINVAL;
+	if (sqe->buf_index || sqe->len || sqe->splice_fd_in)
+		return -EINVAL;
+
+	tr->ltimeout = false;
+	tr->addr = READ_ONCE(sqe->addr);
+	tr->flags = READ_ONCE(sqe->timeout_flags);
+	if (tr->flags & IORING_TIMEOUT_UPDATE_MASK) {
+		if (hweight32(tr->flags & IORING_TIMEOUT_CLOCK_MASK) > 1)
+			return -EINVAL;
+		if (tr->flags & IORING_LINK_TIMEOUT_UPDATE)
+			tr->ltimeout = true;
+		if (tr->flags & ~(IORING_TIMEOUT_UPDATE_MASK|IORING_TIMEOUT_ABS))
+			return -EINVAL;
+		if (get_timespec64(&tr->ts, u64_to_user_ptr(sqe->addr2)))
+			return -EFAULT;
+		if (tr->ts.tv_sec < 0 || tr->ts.tv_nsec < 0)
+			return -EINVAL;
+	} else if (tr->flags) {
+		/* timeout removal doesn't support flags */
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static inline enum hrtimer_mode io_translate_timeout_mode(unsigned int flags)
+{
+	return (flags & IORING_TIMEOUT_ABS) ? HRTIMER_MODE_ABS
+					    : HRTIMER_MODE_REL;
+}
+
+/*
+ * Remove or update an existing timeout command
+ */
+int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags)
+{
+	struct io_timeout_rem *tr = io_kiocb_to_cmd(req);
+	struct io_ring_ctx *ctx = req->ctx;
+	int ret;
+
+	if (!(tr->flags & IORING_TIMEOUT_UPDATE)) {
+		struct io_cancel_data cd = { .data = tr->addr, };
+
+		spin_lock(&ctx->completion_lock);
+		ret = io_timeout_cancel(ctx, &cd);
+		spin_unlock(&ctx->completion_lock);
+	} else {
+		enum hrtimer_mode mode = io_translate_timeout_mode(tr->flags);
+
+		spin_lock_irq(&ctx->timeout_lock);
+		if (tr->ltimeout)
+			ret = io_linked_timeout_update(ctx, tr->addr, &tr->ts, mode);
+		else
+			ret = io_timeout_update(ctx, tr->addr, &tr->ts, mode);
+		spin_unlock_irq(&ctx->timeout_lock);
+	}
+
+	if (ret < 0)
+		req_set_fail(req);
+	io_req_set_res(req, ret, 0);
+	return IOU_OK;
+}
+
+static int __io_timeout_prep(struct io_kiocb *req,
+			     const struct io_uring_sqe *sqe,
+			     bool is_timeout_link)
+{
+	struct io_timeout *timeout = io_kiocb_to_cmd(req);
+	struct io_timeout_data *data;
+	unsigned flags;
+	u32 off = READ_ONCE(sqe->off);
+
+	if (sqe->buf_index || sqe->len != 1 || sqe->splice_fd_in)
+		return -EINVAL;
+	if (off && is_timeout_link)
+		return -EINVAL;
+	flags = READ_ONCE(sqe->timeout_flags);
+	if (flags & ~(IORING_TIMEOUT_ABS | IORING_TIMEOUT_CLOCK_MASK |
+		      IORING_TIMEOUT_ETIME_SUCCESS))
+		return -EINVAL;
+	/* more than one clock specified is invalid, obviously */
+	if (hweight32(flags & IORING_TIMEOUT_CLOCK_MASK) > 1)
+		return -EINVAL;
+
+	INIT_LIST_HEAD(&timeout->list);
+	timeout->off = off;
+	if (unlikely(off && !req->ctx->off_timeout_used))
+		req->ctx->off_timeout_used = true;
+
+	if (WARN_ON_ONCE(req_has_async_data(req)))
+		return -EFAULT;
+	if (io_alloc_async_data(req))
+		return -ENOMEM;
+
+	data = req->async_data;
+	data->req = req;
+	data->flags = flags;
+
+	if (get_timespec64(&data->ts, u64_to_user_ptr(sqe->addr)))
+		return -EFAULT;
+
+	if (data->ts.tv_sec < 0 || data->ts.tv_nsec < 0)
+		return -EINVAL;
+
+	INIT_LIST_HEAD(&timeout->list);
+	data->mode = io_translate_timeout_mode(flags);
+	hrtimer_init(&data->timer, io_timeout_get_clock(data), data->mode);
+
+	if (is_timeout_link) {
+		struct io_submit_link *link = &req->ctx->submit_state.link;
+
+		if (!link->head)
+			return -EINVAL;
+		if (link->last->opcode == IORING_OP_LINK_TIMEOUT)
+			return -EINVAL;
+		timeout->head = link->last;
+		link->last->flags |= REQ_F_ARM_LTIMEOUT;
+	}
+	return 0;
+}
+
+int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+	return __io_timeout_prep(req, sqe, false);
+}
+
+int io_link_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+	return __io_timeout_prep(req, sqe, true);
+}
+
+int io_timeout(struct io_kiocb *req, unsigned int issue_flags)
+{
+	struct io_timeout *timeout = io_kiocb_to_cmd(req);
+	struct io_ring_ctx *ctx = req->ctx;
+	struct io_timeout_data *data = req->async_data;
+	struct list_head *entry;
+	u32 tail, off = timeout->off;
+
+	spin_lock_irq(&ctx->timeout_lock);
+
+	/*
+	 * sqe->off holds how many events that need to occur for this
+	 * timeout event to be satisfied. If it isn't set, then this is
+	 * a pure timeout request, sequence isn't used.
+	 */
+	if (io_is_timeout_noseq(req)) {
+		entry = ctx->timeout_list.prev;
+		goto add;
+	}
+
+	tail = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
+	timeout->target_seq = tail + off;
+
+	/* Update the last seq here in case io_flush_timeouts() hasn't.
+	 * This is safe because ->completion_lock is held, and submissions
+	 * and completions are never mixed in the same ->completion_lock section.
+	 */
+	ctx->cq_last_tm_flush = tail;
+
+	/*
+	 * Insertion sort, ensuring the first entry in the list is always
+	 * the one we need first.
+	 */
+	list_for_each_prev(entry, &ctx->timeout_list) {
+		struct io_timeout *nextt = list_entry(entry, struct io_timeout, list);
+		struct io_kiocb *nxt = cmd_to_io_kiocb(nextt);
+
+		if (io_is_timeout_noseq(nxt))
+			continue;
+		/* nxt.seq is behind @tail, otherwise would've been completed */
+		if (off >= nextt->target_seq - tail)
+			break;
+	}
+add:
+	list_add(&timeout->list, entry);
+	data->timer.function = io_timeout_fn;
+	hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode);
+	spin_unlock_irq(&ctx->timeout_lock);
+	return IOU_ISSUE_SKIP_COMPLETE;
+}
+
+void io_queue_linked_timeout(struct io_kiocb *req)
+{
+	struct io_timeout *timeout = io_kiocb_to_cmd(req);
+	struct io_ring_ctx *ctx = req->ctx;
+
+	spin_lock_irq(&ctx->timeout_lock);
+	/*
+	 * If the back reference is NULL, then our linked request finished
+	 * before we got a chance to setup the timer
+	 */
+	if (timeout->head) {
+		struct io_timeout_data *data = req->async_data;
+
+		data->timer.function = io_link_timeout_fn;
+		hrtimer_start(&data->timer, timespec64_to_ktime(data->ts),
+				data->mode);
+		list_add_tail(&timeout->list, &ctx->ltimeout_list);
+	}
+	spin_unlock_irq(&ctx->timeout_lock);
+	/* drop submission reference */
+	io_put_req(req);
+}
+
+static bool io_match_task(struct io_kiocb *head, struct task_struct *task,
+			  bool cancel_all)
+	__must_hold(&req->ctx->timeout_lock)
+{
+	struct io_kiocb *req;
+
+	if (task && head->task != task)
+		return false;
+	if (cancel_all)
+		return true;
+
+	io_for_each_link(req, head) {
+		if (req->flags & REQ_F_INFLIGHT)
+			return true;
+	}
+	return false;
+}
+
+/* Returns true if we found and killed one or more timeouts */
+__cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk,
+			     bool cancel_all)
+{
+	struct io_timeout *timeout, *tmp;
+	int canceled = 0;
+
+	spin_lock(&ctx->completion_lock);
+	spin_lock_irq(&ctx->timeout_lock);
+	list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) {
+		struct io_kiocb *req = cmd_to_io_kiocb(timeout);
+
+		if (io_match_task(req, tsk, cancel_all)) {
+			io_kill_timeout(req, -ECANCELED);
+			canceled++;
+		}
+	}
+	spin_unlock_irq(&ctx->timeout_lock);
+	io_commit_cqring(ctx);
+	spin_unlock(&ctx->completion_lock);
+	if (canceled != 0)
+		io_cqring_ev_posted(ctx);
+	return canceled != 0;
+}
-- 
cgit v1.2.3


From 7aaff708a768144ec6459f0a58301be1a6b982fc Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 25 May 2022 20:36:47 -0600
Subject: io_uring: move cancelation into its own file

This also helps cleanup the io_uring.h cancel parts, as we can make
things static in the cancel.c file, mostly.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/timeout.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'io_uring/timeout.c')

diff --git a/io_uring/timeout.c b/io_uring/timeout.c
index 5e42bfcd683e..69cca42d6835 100644
--- a/io_uring/timeout.c
+++ b/io_uring/timeout.c
@@ -11,6 +11,7 @@
 #include "io_uring_types.h"
 #include "io_uring.h"
 #include "refs.h"
+#include "cancel.h"
 #include "timeout.h"
 
 struct io_timeout {
-- 
cgit v1.2.3


From 5d7943d99df9326c7b02f773b2d6f09709c30594 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 16 Jun 2022 10:22:11 +0100
Subject: io_uring: propagate locking state to poll cancel

Poll cancellation will be soon need to grab ->uring_lock inside, pass
the locking state, i.e. issue_flags, inside the cancellation functions.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/b86781d047727c07163443b57551a3fa57c7c5e1.1655371007.git.asml.silence@gmail.com
Reviewed-by: Hao Xu <howeyxu@tencent.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/timeout.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'io_uring/timeout.c')

diff --git a/io_uring/timeout.c b/io_uring/timeout.c
index 69cca42d6835..526fc8b2e3b6 100644
--- a/io_uring/timeout.c
+++ b/io_uring/timeout.c
@@ -262,6 +262,7 @@ int io_timeout_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd)
 
 static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked)
 {
+	unsigned issue_flags = *locked ? 0 : IO_URING_F_UNLOCKED;
 	struct io_timeout *timeout = io_kiocb_to_cmd(req);
 	struct io_kiocb *prev = timeout->prev;
 	int ret = -ENOENT;
@@ -273,7 +274,7 @@ static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked)
 				.data		= prev->cqe.user_data,
 			};
 
-			ret = io_try_cancel(req, &cd);
+			ret = io_try_cancel(req, &cd, issue_flags);
 		}
 		io_req_set_res(req, ret ?: -ETIME, 0);
 		io_req_complete_post(req);
-- 
cgit v1.2.3


From 27a9d66fec77cff0e32d2ecd5d0ac7ef878a7bb0 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 16 Jun 2022 13:57:18 +0100
Subject: io_uring: kill extra io_uring_types.h includes

io_uring/io_uring.h already includes io_uring_types.h, no need to
include it every time. Kill it in a bunch of places, it prepares us for
following patches.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/94d8c943fbe0ef949981c508ddcee7fc1c18850f.1655384063.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/timeout.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'io_uring/timeout.c')

diff --git a/io_uring/timeout.c b/io_uring/timeout.c
index 526fc8b2e3b6..f9df359813c9 100644
--- a/io_uring/timeout.c
+++ b/io_uring/timeout.c
@@ -8,7 +8,6 @@
 
 #include <uapi/linux/io_uring.h>
 
-#include "io_uring_types.h"
 #include "io_uring.h"
 #include "refs.h"
 #include "cancel.h"
-- 
cgit v1.2.3


From 48863ffd3e81b6ec98606d3479c50aa77b7a98a9 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 16 Jun 2022 13:57:20 +0100
Subject: io_uring: clean up tracing events

We have lots of trace events accepting an io_uring request and wanting
to print some of its fields like user_data, opcode, flags and so on.
However, as trace points were unaware of io_uring structures, we had to
pass all the fields as arguments. Teach trace/events/io_uring.h about
struct io_kiocb and stop the misery of passing a horde of arguments to
trace helpers.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/40ff72f92798114e56d400f2b003beb6cde6ef53.1655384063.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/timeout.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'io_uring/timeout.c')

diff --git a/io_uring/timeout.c b/io_uring/timeout.c
index f9df359813c9..557c637af158 100644
--- a/io_uring/timeout.c
+++ b/io_uring/timeout.c
@@ -115,8 +115,7 @@ static void io_fail_links(struct io_kiocb *req)
 		nxt = link->link;
 		link->link = NULL;
 
-		trace_io_uring_fail_link(req->ctx, req, req->cqe.user_data,
-					req->opcode, link);
+		trace_io_uring_fail_link(req, link);
 
 		if (ignore_cqes)
 			link->flags |= REQ_F_CQE_SKIP;
-- 
cgit v1.2.3


From ba3cdb6fbb6e8eb525c868c60e103c5711edc068 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 20 Jun 2022 01:25:53 +0100
Subject: io_uring: improve task exit timeout cancellations

Don't spin trying to cancel timeouts that are reachable but not
cancellable, e.g. already executing.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/ab8a7440a60bbdf69ae514f672ad050e43dd1b03.1655684496.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/timeout.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'io_uring/timeout.c')

diff --git a/io_uring/timeout.c b/io_uring/timeout.c
index 557c637af158..a79a7d6ef1b3 100644
--- a/io_uring/timeout.c
+++ b/io_uring/timeout.c
@@ -49,7 +49,7 @@ static inline void io_put_req(struct io_kiocb *req)
 	}
 }
 
-static void io_kill_timeout(struct io_kiocb *req, int status)
+static bool io_kill_timeout(struct io_kiocb *req, int status)
 	__must_hold(&req->ctx->completion_lock)
 	__must_hold(&req->ctx->timeout_lock)
 {
@@ -64,7 +64,9 @@ static void io_kill_timeout(struct io_kiocb *req, int status)
 			atomic_read(&req->ctx->cq_timeouts) + 1);
 		list_del_init(&timeout->list);
 		io_req_tw_post_queue(req, status, 0);
+		return true;
 	}
+	return false;
 }
 
 __cold void io_flush_timeouts(struct io_ring_ctx *ctx)
@@ -620,10 +622,9 @@ __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk,
 	list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) {
 		struct io_kiocb *req = cmd_to_io_kiocb(timeout);
 
-		if (io_match_task(req, tsk, cancel_all)) {
-			io_kill_timeout(req, -ECANCELED);
+		if (io_match_task(req, tsk, cancel_all) &&
+		    io_kill_timeout(req, -ECANCELED))
 			canceled++;
-		}
 	}
 	spin_unlock_irq(&ctx->timeout_lock);
 	io_commit_cqring(ctx);
-- 
cgit v1.2.3


From 305bef98870816ae58357d647521891ec558a92e Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 20 Jun 2022 01:25:55 +0100
Subject: io_uring: hide eventfd assumptions in eventfd paths

Some io_uring-eventfd users assume that there won't be spurious wakeups.
That assumption has to be honoured by all io_cqring_ev_posted() callers,
which is inconvenient and from time to time leads to problems but should
be maintained to not break the userspace.

Instead of making the callers track whether a CQE was posted or not, hide
it inside io_eventfd_signal(). It saves ->cached_cq_tail it saw last time
and triggers the eventfd only when ->cached_cq_tail changed since then.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/0ffc66bae37a2513080b601e4370e147faaa72c5.1655684496.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/timeout.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'io_uring/timeout.c')

diff --git a/io_uring/timeout.c b/io_uring/timeout.c
index a79a7d6ef1b3..424b2fc858b8 100644
--- a/io_uring/timeout.c
+++ b/io_uring/timeout.c
@@ -629,7 +629,6 @@ __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk,
 	spin_unlock_irq(&ctx->timeout_lock);
 	io_commit_cqring(ctx);
 	spin_unlock(&ctx->completion_lock);
-	if (canceled != 0)
-		io_cqring_ev_posted(ctx);
+	io_cqring_ev_posted(ctx);
 	return canceled != 0;
 }
-- 
cgit v1.2.3


From 253993210bd8aa3b39a392807c03c8ef1cd7dc3d Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 20 Jun 2022 01:25:56 +0100
Subject: io_uring: introduce locking helpers for CQE posting

spin_lock(&ctx->completion_lock);
/* post CQEs */
io_commit_cqring(ctx);
spin_unlock(&ctx->completion_lock);
io_cqring_ev_posted(ctx);

We have many places repeating this sequence, and the three function
unlock section is not perfect from the maintainance perspective and also
makes it harder to add new locking/sync trick.

Introduce two helpers. io_cq_lock(), which is simple and only grabs
->completion_lock, and io_cq_unlock_post() encapsulating the three call
section.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/fe0c682bf7f7b55d9be55b0d034be9c1949277dc.1655684496.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/timeout.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'io_uring/timeout.c')

diff --git a/io_uring/timeout.c b/io_uring/timeout.c
index 424b2fc858b8..7e2c341f9762 100644
--- a/io_uring/timeout.c
+++ b/io_uring/timeout.c
@@ -617,7 +617,7 @@ __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk,
 	struct io_timeout *timeout, *tmp;
 	int canceled = 0;
 
-	spin_lock(&ctx->completion_lock);
+	io_cq_lock(ctx);
 	spin_lock_irq(&ctx->timeout_lock);
 	list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) {
 		struct io_kiocb *req = cmd_to_io_kiocb(timeout);
@@ -627,8 +627,6 @@ __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk,
 			canceled++;
 	}
 	spin_unlock_irq(&ctx->timeout_lock);
-	io_commit_cqring(ctx);
-	spin_unlock(&ctx->completion_lock);
-	io_cqring_ev_posted(ctx);
+	io_cq_unlock_post(ctx);
 	return canceled != 0;
 }
-- 
cgit v1.2.3


From 88f52eaad2df2cb5ab49b864d79398c9cb9a57f2 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 18 Jun 2022 09:23:54 -0600
Subject: io_uring: have cancelation API accept io_uring_task directly

We just use the io_kiocb passed in to find the io_uring_task, and we
already pass in the ctx via cd->ctx anyway.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/timeout.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'io_uring/timeout.c')

diff --git a/io_uring/timeout.c b/io_uring/timeout.c
index 7e2c341f9762..4af074b8f6b7 100644
--- a/io_uring/timeout.c
+++ b/io_uring/timeout.c
@@ -274,7 +274,7 @@ static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked)
 				.data		= prev->cqe.user_data,
 			};
 
-			ret = io_try_cancel(req, &cd, issue_flags);
+			ret = io_try_cancel(req->task->io_uring, &cd, issue_flags);
 		}
 		io_req_set_res(req, ret ?: -ETIME, 0);
 		io_req_complete_post(req);
-- 
cgit v1.2.3


From 37c7bd31b3e9e4b6aee3c5227f789c0b586a33a2 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Sat, 25 Jun 2022 11:52:58 +0100
Subject: io_uring: improve io_fail_links()

io_fail_links() is called with ->completion_lock held and for that
reason we'd want to keep it as small as we can. Instead of doing
__io_req_complete_post() for each linked request under the lock, fail
them in a task_work handler under ->uring_lock.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/a2f68708b970a21f4e84ddfa7b3abd67a8fffb27.1656153285.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/timeout.c | 36 ++++++++++++++++++++++++------------
 1 file changed, 24 insertions(+), 12 deletions(-)

(limited to 'io_uring/timeout.c')

diff --git a/io_uring/timeout.c b/io_uring/timeout.c
index 4af074b8f6b7..2f9e56935479 100644
--- a/io_uring/timeout.c
+++ b/io_uring/timeout.c
@@ -101,32 +101,44 @@ __cold void io_flush_timeouts(struct io_ring_ctx *ctx)
 	spin_unlock_irq(&ctx->timeout_lock);
 }
 
-static void io_fail_links(struct io_kiocb *req)
-	__must_hold(&req->ctx->completion_lock)
+static void io_req_tw_fail_links(struct io_kiocb *link, bool *locked)
 {
-	struct io_kiocb *nxt, *link = req->link;
-	bool ignore_cqes = req->flags & REQ_F_SKIP_LINK_CQES;
-
-	req->link = NULL;
+	io_tw_lock(link->ctx, locked);
 	while (link) {
+		struct io_kiocb *nxt = link->link;
 		long res = -ECANCELED;
 
 		if (link->flags & REQ_F_FAIL)
 			res = link->cqe.res;
-
-		nxt = link->link;
 		link->link = NULL;
+		io_req_set_res(link, res, 0);
+		io_req_task_complete(link, locked);
+		link = nxt;
+	}
+}
 
-		trace_io_uring_fail_link(req, link);
+static void io_fail_links(struct io_kiocb *req)
+	__must_hold(&req->ctx->completion_lock)
+{
+	struct io_kiocb *link = req->link;
+	bool ignore_cqes = req->flags & REQ_F_SKIP_LINK_CQES;
+
+	if (!link)
+		return;
 
+	while (link) {
 		if (ignore_cqes)
 			link->flags |= REQ_F_CQE_SKIP;
 		else
 			link->flags &= ~REQ_F_CQE_SKIP;
-		io_req_set_res(link, res, 0);
-		__io_req_complete_post(link);
-		link = nxt;
+		trace_io_uring_fail_link(req, link);
+		link = link->link;
 	}
+
+	link = req->link;
+	link->io_task_work.func = io_req_tw_fail_links;
+	io_req_task_work_add(link);
+	req->link = NULL;
 }
 
 static inline void io_remove_next_linked(struct io_kiocb *req)
-- 
cgit v1.2.3