diff options
Diffstat (limited to 'io_uring/register.c')
| -rw-r--r-- | io_uring/register.c | 110 |
1 files changed, 58 insertions, 52 deletions
diff --git a/io_uring/register.c b/io_uring/register.c index 6015a3e9ce69..dce5e2f9cf77 100644 --- a/io_uring/register.c +++ b/io_uring/register.c @@ -178,15 +178,23 @@ static __cold int io_register_restrictions(struct io_ring_ctx *ctx, return -EBUSY; ret = io_parse_restrictions(arg, nr_args, &ctx->restrictions); - /* Reset all restrictions if an error happened */ + /* + * Reset all restrictions if an error happened, but retain any COW'ed + * settings. + */ if (ret < 0) { + struct io_bpf_filters *bpf = ctx->restrictions.bpf_filters; + bool cowed = ctx->restrictions.bpf_filters_cow; + memset(&ctx->restrictions, 0, sizeof(ctx->restrictions)); + ctx->restrictions.bpf_filters = bpf; + ctx->restrictions.bpf_filters_cow = cowed; return ret; } if (ctx->restrictions.op_registered) - ctx->op_restricted = 1; + ctx->int_flags |= IO_RING_F_OP_RESTRICTED; if (ctx->restrictions.reg_registered) - ctx->reg_restricted = 1; + ctx->int_flags |= IO_RING_F_REG_RESTRICTED; return 0; } @@ -202,7 +210,7 @@ static int io_register_restrictions_task(void __user *arg, unsigned int nr_args) return -EPERM; /* * Similar to seccomp, disallow setting a filter if task_no_new_privs - * is true and we're not CAP_SYS_ADMIN. + * is false and we're not CAP_SYS_ADMIN. */ if (!task_no_new_privs(current) && !ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN)) @@ -238,7 +246,7 @@ static int io_register_bpf_filter_task(void __user *arg, unsigned int nr_args) /* * Similar to seccomp, disallow setting a filter if task_no_new_privs - * is true and we're not CAP_SYS_ADMIN. + * is false and we're not CAP_SYS_ADMIN. */ if (!task_no_new_privs(current) && !ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN)) @@ -384,7 +392,7 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx, for (i = 0; i < ARRAY_SIZE(new_count); i++) if (new_count[i]) ctx->iowq_limits[i] = new_count[i]; - ctx->iowq_limits_set = true; + ctx->int_flags |= IO_RING_F_IOWQ_LIMITS_SET; if (tctx && tctx->io_wq) { ret = io_wq_max_workers(tctx->io_wq, new_count); @@ -591,10 +599,20 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg) if (tail - old_head > p->sq_entries) goto overflow; for (i = old_head; i < tail; i++) { - unsigned src_head = i & (ctx->sq_entries - 1); - unsigned dst_head = i & (p->sq_entries - 1); - - n.sq_sqes[dst_head] = o.sq_sqes[src_head]; + unsigned index, dst_mask, src_mask; + size_t sq_size; + + index = i; + sq_size = sizeof(struct io_uring_sqe); + src_mask = ctx->sq_entries - 1; + dst_mask = p->sq_entries - 1; + if (ctx->flags & IORING_SETUP_SQE128) { + index <<= 1; + sq_size <<= 1; + src_mask = (ctx->sq_entries << 1) - 1; + dst_mask = (p->sq_entries << 1) - 1; + } + memcpy(&n.sq_sqes[index & dst_mask], &o.sq_sqes[index & src_mask], sq_size); } WRITE_ONCE(n.rings->sq.head, old_head); WRITE_ONCE(n.rings->sq.tail, tail); @@ -611,10 +629,20 @@ overflow: goto out; } for (i = old_head; i < tail; i++) { - unsigned src_head = i & (ctx->cq_entries - 1); - unsigned dst_head = i & (p->cq_entries - 1); - - n.rings->cqes[dst_head] = o.rings->cqes[src_head]; + unsigned index, dst_mask, src_mask; + size_t cq_size; + + index = i; + cq_size = sizeof(struct io_uring_cqe); + src_mask = ctx->cq_entries - 1; + dst_mask = p->cq_entries - 1; + if (ctx->flags & IORING_SETUP_CQE32) { + index <<= 1; + cq_size <<= 1; + src_mask = (ctx->cq_entries << 1) - 1; + dst_mask = (p->cq_entries << 1) - 1; + } + memcpy(&n.rings->cqes[index & dst_mask], &o.rings->cqes[index & src_mask], cq_size); } WRITE_ONCE(n.rings->cq.head, old_head); WRITE_ONCE(n.rings->cq.tail, tail); @@ -633,7 +661,15 @@ overflow: ctx->sq_entries = p->sq_entries; ctx->cq_entries = p->cq_entries; + /* + * Just mark any flag we may have missed and that the application + * should act on unconditionally. Worst case it'll be an extra + * syscall. + */ + atomic_or(IORING_SQ_TASKRUN | IORING_SQ_NEED_WAKEUP, &n.rings->sq_flags); ctx->rings = n.rings; + rcu_assign_pointer(ctx->rings_rcu, n.rings); + ctx->sq_sqes = n.sq_sqes; swap_old(ctx, o, n, ring_region); swap_old(ctx, o, n, sq_region); @@ -642,6 +678,9 @@ overflow: out: spin_unlock(&ctx->completion_lock); mutex_unlock(&ctx->mmap_lock); + /* Wait for concurrent io_ctx_mark_taskrun() */ + if (to_free == &o) + synchronize_rcu_expedited(); io_register_free_rings(ctx, to_free); if (ctx->sq_data) @@ -714,7 +753,7 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, if (ctx->submitter_task && ctx->submitter_task != current) return -EEXIST; - if (ctx->reg_restricted && !(ctx->flags & IORING_SETUP_R_DISABLED)) { + if ((ctx->int_flags & IO_RING_F_REG_RESTRICTED) && !(ctx->flags & IORING_SETUP_R_DISABLED)) { opcode = array_index_nospec(opcode, IORING_REGISTER_LAST); if (!test_bit(opcode, ctx->restrictions.register_op)) return -EACCES; @@ -889,7 +928,7 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, ret = -EINVAL; if (!arg || nr_args != 1) break; - ret = io_register_zcrx_ifq(ctx, arg); + ret = io_register_zcrx(ctx, arg); break; case IORING_REGISTER_RESIZE_RINGS: ret = -EINVAL; @@ -927,40 +966,6 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, return ret; } -/* - * Given an 'fd' value, return the ctx associated with if. If 'registered' is - * true, then the registered index is used. Otherwise, the normal fd table. - * Caller must call fput() on the returned file, unless it's an ERR_PTR. - */ -struct file *io_uring_register_get_file(unsigned int fd, bool registered) -{ - struct file *file; - - if (registered) { - /* - * Ring fd has been registered via IORING_REGISTER_RING_FDS, we - * need only dereference our task private array to find it. - */ - struct io_uring_task *tctx = current->io_uring; - - if (unlikely(!tctx || fd >= IO_RINGFD_REG_MAX)) - return ERR_PTR(-EINVAL); - fd = array_index_nospec(fd, IO_RINGFD_REG_MAX); - file = tctx->registered_rings[fd]; - if (file) - get_file(file); - } else { - file = fget(fd); - } - - if (unlikely(!file)) - return ERR_PTR(-EBADF); - if (io_is_uring_fops(file)) - return file; - fput(file); - return ERR_PTR(-EOPNOTSUPP); -} - static int io_uring_register_send_msg_ring(void __user *arg, unsigned int nr_args) { struct io_uring_sqe sqe; @@ -1015,7 +1020,7 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode, if (fd == -1) return io_uring_register_blind(opcode, arg, nr_args); - file = io_uring_register_get_file(fd, use_registered_ring); + file = io_uring_ctx_get_file(fd, use_registered_ring); if (IS_ERR(file)) return PTR_ERR(file); ctx = file->private_data; @@ -1027,6 +1032,7 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode, ctx->buf_table.nr, ret); mutex_unlock(&ctx->uring_lock); - fput(file); + if (!use_registered_ring) + fput(file); return ret; } |
