summaryrefslogtreecommitdiff
path: root/io_uring/register.c
diff options
context:
space:
mode:
Diffstat (limited to 'io_uring/register.c')
-rw-r--r--io_uring/register.c110
1 files changed, 58 insertions, 52 deletions
diff --git a/io_uring/register.c b/io_uring/register.c
index 6015a3e9ce69..dce5e2f9cf77 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -178,15 +178,23 @@ static __cold int io_register_restrictions(struct io_ring_ctx *ctx,
return -EBUSY;
ret = io_parse_restrictions(arg, nr_args, &ctx->restrictions);
- /* Reset all restrictions if an error happened */
+ /*
+ * Reset all restrictions if an error happened, but retain any COW'ed
+ * settings.
+ */
if (ret < 0) {
+ struct io_bpf_filters *bpf = ctx->restrictions.bpf_filters;
+ bool cowed = ctx->restrictions.bpf_filters_cow;
+
memset(&ctx->restrictions, 0, sizeof(ctx->restrictions));
+ ctx->restrictions.bpf_filters = bpf;
+ ctx->restrictions.bpf_filters_cow = cowed;
return ret;
}
if (ctx->restrictions.op_registered)
- ctx->op_restricted = 1;
+ ctx->int_flags |= IO_RING_F_OP_RESTRICTED;
if (ctx->restrictions.reg_registered)
- ctx->reg_restricted = 1;
+ ctx->int_flags |= IO_RING_F_REG_RESTRICTED;
return 0;
}
@@ -202,7 +210,7 @@ static int io_register_restrictions_task(void __user *arg, unsigned int nr_args)
return -EPERM;
/*
* Similar to seccomp, disallow setting a filter if task_no_new_privs
- * is true and we're not CAP_SYS_ADMIN.
+ * is false and we're not CAP_SYS_ADMIN.
*/
if (!task_no_new_privs(current) &&
!ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN))
@@ -238,7 +246,7 @@ static int io_register_bpf_filter_task(void __user *arg, unsigned int nr_args)
/*
* Similar to seccomp, disallow setting a filter if task_no_new_privs
- * is true and we're not CAP_SYS_ADMIN.
+ * is false and we're not CAP_SYS_ADMIN.
*/
if (!task_no_new_privs(current) &&
!ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN))
@@ -384,7 +392,7 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
for (i = 0; i < ARRAY_SIZE(new_count); i++)
if (new_count[i])
ctx->iowq_limits[i] = new_count[i];
- ctx->iowq_limits_set = true;
+ ctx->int_flags |= IO_RING_F_IOWQ_LIMITS_SET;
if (tctx && tctx->io_wq) {
ret = io_wq_max_workers(tctx->io_wq, new_count);
@@ -591,10 +599,20 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
if (tail - old_head > p->sq_entries)
goto overflow;
for (i = old_head; i < tail; i++) {
- unsigned src_head = i & (ctx->sq_entries - 1);
- unsigned dst_head = i & (p->sq_entries - 1);
-
- n.sq_sqes[dst_head] = o.sq_sqes[src_head];
+ unsigned index, dst_mask, src_mask;
+ size_t sq_size;
+
+ index = i;
+ sq_size = sizeof(struct io_uring_sqe);
+ src_mask = ctx->sq_entries - 1;
+ dst_mask = p->sq_entries - 1;
+ if (ctx->flags & IORING_SETUP_SQE128) {
+ index <<= 1;
+ sq_size <<= 1;
+ src_mask = (ctx->sq_entries << 1) - 1;
+ dst_mask = (p->sq_entries << 1) - 1;
+ }
+ memcpy(&n.sq_sqes[index & dst_mask], &o.sq_sqes[index & src_mask], sq_size);
}
WRITE_ONCE(n.rings->sq.head, old_head);
WRITE_ONCE(n.rings->sq.tail, tail);
@@ -611,10 +629,20 @@ overflow:
goto out;
}
for (i = old_head; i < tail; i++) {
- unsigned src_head = i & (ctx->cq_entries - 1);
- unsigned dst_head = i & (p->cq_entries - 1);
-
- n.rings->cqes[dst_head] = o.rings->cqes[src_head];
+ unsigned index, dst_mask, src_mask;
+ size_t cq_size;
+
+ index = i;
+ cq_size = sizeof(struct io_uring_cqe);
+ src_mask = ctx->cq_entries - 1;
+ dst_mask = p->cq_entries - 1;
+ if (ctx->flags & IORING_SETUP_CQE32) {
+ index <<= 1;
+ cq_size <<= 1;
+ src_mask = (ctx->cq_entries << 1) - 1;
+ dst_mask = (p->cq_entries << 1) - 1;
+ }
+ memcpy(&n.rings->cqes[index & dst_mask], &o.rings->cqes[index & src_mask], cq_size);
}
WRITE_ONCE(n.rings->cq.head, old_head);
WRITE_ONCE(n.rings->cq.tail, tail);
@@ -633,7 +661,15 @@ overflow:
ctx->sq_entries = p->sq_entries;
ctx->cq_entries = p->cq_entries;
+ /*
+ * Just mark any flag we may have missed and that the application
+ * should act on unconditionally. Worst case it'll be an extra
+ * syscall.
+ */
+ atomic_or(IORING_SQ_TASKRUN | IORING_SQ_NEED_WAKEUP, &n.rings->sq_flags);
ctx->rings = n.rings;
+ rcu_assign_pointer(ctx->rings_rcu, n.rings);
+
ctx->sq_sqes = n.sq_sqes;
swap_old(ctx, o, n, ring_region);
swap_old(ctx, o, n, sq_region);
@@ -642,6 +678,9 @@ overflow:
out:
spin_unlock(&ctx->completion_lock);
mutex_unlock(&ctx->mmap_lock);
+ /* Wait for concurrent io_ctx_mark_taskrun() */
+ if (to_free == &o)
+ synchronize_rcu_expedited();
io_register_free_rings(ctx, to_free);
if (ctx->sq_data)
@@ -714,7 +753,7 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
if (ctx->submitter_task && ctx->submitter_task != current)
return -EEXIST;
- if (ctx->reg_restricted && !(ctx->flags & IORING_SETUP_R_DISABLED)) {
+ if ((ctx->int_flags & IO_RING_F_REG_RESTRICTED) && !(ctx->flags & IORING_SETUP_R_DISABLED)) {
opcode = array_index_nospec(opcode, IORING_REGISTER_LAST);
if (!test_bit(opcode, ctx->restrictions.register_op))
return -EACCES;
@@ -889,7 +928,7 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
ret = -EINVAL;
if (!arg || nr_args != 1)
break;
- ret = io_register_zcrx_ifq(ctx, arg);
+ ret = io_register_zcrx(ctx, arg);
break;
case IORING_REGISTER_RESIZE_RINGS:
ret = -EINVAL;
@@ -927,40 +966,6 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
return ret;
}
-/*
- * Given an 'fd' value, return the ctx associated with if. If 'registered' is
- * true, then the registered index is used. Otherwise, the normal fd table.
- * Caller must call fput() on the returned file, unless it's an ERR_PTR.
- */
-struct file *io_uring_register_get_file(unsigned int fd, bool registered)
-{
- struct file *file;
-
- if (registered) {
- /*
- * Ring fd has been registered via IORING_REGISTER_RING_FDS, we
- * need only dereference our task private array to find it.
- */
- struct io_uring_task *tctx = current->io_uring;
-
- if (unlikely(!tctx || fd >= IO_RINGFD_REG_MAX))
- return ERR_PTR(-EINVAL);
- fd = array_index_nospec(fd, IO_RINGFD_REG_MAX);
- file = tctx->registered_rings[fd];
- if (file)
- get_file(file);
- } else {
- file = fget(fd);
- }
-
- if (unlikely(!file))
- return ERR_PTR(-EBADF);
- if (io_is_uring_fops(file))
- return file;
- fput(file);
- return ERR_PTR(-EOPNOTSUPP);
-}
-
static int io_uring_register_send_msg_ring(void __user *arg, unsigned int nr_args)
{
struct io_uring_sqe sqe;
@@ -1015,7 +1020,7 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
if (fd == -1)
return io_uring_register_blind(opcode, arg, nr_args);
- file = io_uring_register_get_file(fd, use_registered_ring);
+ file = io_uring_ctx_get_file(fd, use_registered_ring);
if (IS_ERR(file))
return PTR_ERR(file);
ctx = file->private_data;
@@ -1027,6 +1032,7 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
ctx->buf_table.nr, ret);
mutex_unlock(&ctx->uring_lock);
- fput(file);
+ if (!use_registered_ring)
+ fput(file);
return ret;
}