diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-02-17 08:33:49 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-02-17 08:33:49 -0800 |
| commit | 7b751b01ade7f666de2f5c365bd9562c2dcd7d60 (patch) | |
| tree | 5b9ae23000c7e4b78b9383a69676cd59915afa4c | |
| parent | 9702969978695d9a699a1f34771580cdbb153b33 (diff) | |
| parent | be3573124e630736d2d39650b12f5ef220b47ac1 (diff) | |
Merge tag 'io_uring-7.0-20260216' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull more io_uring updates from Jens Axboe:
"This is a mix of cleanups and fixes. No major fixes in here, just a
bunch of little fixes. Some of them marked for stable as it fixes
behavioral issues
- Fix an issue with SOCKET_URING_OP_SETSOCKOPT for netlink sockets,
due to a too restrictive check on it having an ioctl handler
- Remove a redundant SQPOLL check in ring creation
- Kill dead accounting for zero-copy send, which doesn't use ->buf
or ->len post the initial setup
- Fix missing clamp of the allocation hint, which could cause
allocations to fall outside of the range the application asked
for. Still within the allowed limits.
- Fix for IORING_OP_PIPE's handling of direct descriptors
- Tweak to the API for the newly added BPF filters, making them
more future proof in terms of how applications deal with them
- A few fixes for zcrx, fixing a few error handling conditions
- Fix for zcrx request flag checking
- Add support for querying the zcrx page size
- Improve the NO_SQARRAY static branch inc/dec, avoiding busy
conditions causing too much traffic
- Various little cleanups"
* tag 'io_uring-7.0-20260216' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux:
io_uring/bpf_filter: pass in expected filter payload size
io_uring/bpf_filter: move filter size and populate helper into struct
io_uring/cancel: de-unionize file and user_data in struct io_cancel_data
io_uring/rsrc: improve regbuf iov validation
io_uring: remove unneeded io_send_zc accounting
io_uring/cmd_net: fix too strict requirement on ioctl
io_uring: delay sqarray static branch disablement
io_uring/query: add query.h copyright notice
io_uring/query: return support for custom rx page size
io_uring/zcrx: check unsupported flags on import
io_uring/zcrx: fix post open error handling
io_uring/zcrx: fix sgtable leak on mapping failures
io_uring: use the right type for creds iteration
io_uring/openclose: fix io_pipe_fixed() slot tracking for specific slots
io_uring/filetable: clamp alloc_hint to the configured alloc range
io_uring/rsrc: replace reg buffer bit field with flags
io_uring/zcrx: improve types for size calculation
io_uring/tctx: avoid modifying loop variable in io_ring_add_registered_file
io_uring: simplify IORING_SETUP_DEFER_TASKRUN && !SQPOLL check
| -rw-r--r-- | include/uapi/linux/io_uring.h | 8 | ||||
| -rw-r--r-- | include/uapi/linux/io_uring/bpf_filter.h | 8 | ||||
| -rw-r--r-- | include/uapi/linux/io_uring/query.h | 6 | ||||
| -rw-r--r-- | io_uring/bpf_filter.c | 82 | ||||
| -rw-r--r-- | io_uring/cancel.h | 6 | ||||
| -rw-r--r-- | io_uring/cmd_net.c | 9 | ||||
| -rw-r--r-- | io_uring/filetable.c | 4 | ||||
| -rw-r--r-- | io_uring/io_uring.c | 13 | ||||
| -rw-r--r-- | io_uring/net.c | 2 | ||||
| -rw-r--r-- | io_uring/opdef.c | 6 | ||||
| -rw-r--r-- | io_uring/opdef.h | 6 | ||||
| -rw-r--r-- | io_uring/openclose.c | 9 | ||||
| -rw-r--r-- | io_uring/query.c | 2 | ||||
| -rw-r--r-- | io_uring/rsrc.c | 43 | ||||
| -rw-r--r-- | io_uring/rsrc.h | 6 | ||||
| -rw-r--r-- | io_uring/rw.c | 3 | ||||
| -rw-r--r-- | io_uring/tctx.c | 10 | ||||
| -rw-r--r-- | io_uring/zcrx.c | 16 |
18 files changed, 149 insertions, 90 deletions
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index fc473af6feb4..6750c383a2ab 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -1090,6 +1090,14 @@ enum zcrx_reg_flags { ZCRX_REG_IMPORT = 1, }; +enum zcrx_features { + /* + * The user can ask for the desired rx page size by passing the + * value in struct io_uring_zcrx_ifq_reg::rx_buf_len. + */ + ZCRX_FEATURE_RX_PAGE_SIZE = 1 << 0, +}; + /* * Argument for IORING_REGISTER_ZCRX_IFQ */ diff --git a/include/uapi/linux/io_uring/bpf_filter.h b/include/uapi/linux/io_uring/bpf_filter.h index 220351b81bc0..1b461d792a7b 100644 --- a/include/uapi/linux/io_uring/bpf_filter.h +++ b/include/uapi/linux/io_uring/bpf_filter.h @@ -35,13 +35,19 @@ enum { * If set, any currently unset opcode will have a deny filter attached */ IO_URING_BPF_FILTER_DENY_REST = 1, + /* + * If set, if kernel and application don't agree on pdu_size for + * the given opcode, fail the registration of the filter. + */ + IO_URING_BPF_FILTER_SZ_STRICT = 2, }; struct io_uring_bpf_filter { __u32 opcode; /* io_uring opcode to filter */ __u32 flags; __u32 filter_len; /* number of BPF instructions */ - __u32 resv; + __u8 pdu_size; /* expected pdu size for opcode */ + __u8 resv[3]; __u64 filter_ptr; /* pointer to BPF filter */ __u64 resv2[5]; }; diff --git a/include/uapi/linux/io_uring/query.h b/include/uapi/linux/io_uring/query.h index 2456e6c5ebb5..95500759cc13 100644 --- a/include/uapi/linux/io_uring/query.h +++ b/include/uapi/linux/io_uring/query.h @@ -1,6 +1,9 @@ /* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */ /* * Header file for the io_uring query interface. + * + * Copyright (C) 2026 Pavel Begunkov <asml.silence@gmail.com> + * Copyright (C) Meta Platforms, Inc. */ #ifndef LINUX_IO_URING_QUERY_H #define LINUX_IO_URING_QUERY_H @@ -50,7 +53,8 @@ struct io_uring_query_zcrx { __u64 area_flags; /* The number of supported ZCRX_CTRL_* opcodes */ __u32 nr_ctrl_opcodes; - __u32 __resv1; + /* Bitmask of ZCRX_FEATURE_* indicating which features are available */ + __u32 features; /* The refill ring header size */ __u32 rq_hdr_size; /* The alignment for the header */ diff --git a/io_uring/bpf_filter.c b/io_uring/bpf_filter.c index 3816883a45ed..28a23e92ee81 100644 --- a/io_uring/bpf_filter.c +++ b/io_uring/bpf_filter.c @@ -26,6 +26,8 @@ static const struct io_bpf_filter dummy_filter; static void io_uring_populate_bpf_ctx(struct io_uring_bpf_ctx *bctx, struct io_kiocb *req) { + const struct io_issue_def *def = &io_issue_defs[req->opcode]; + bctx->opcode = req->opcode; bctx->sqe_flags = (__force int) req->flags & SQE_VALID_FLAGS; bctx->user_data = req->cqe.user_data; @@ -34,19 +36,12 @@ static void io_uring_populate_bpf_ctx(struct io_uring_bpf_ctx *bctx, sizeof(*bctx) - offsetof(struct io_uring_bpf_ctx, pdu_size)); /* - * Opcodes can provide a handler fo populating more data into bctx, + * Opcodes can provide a handler for populating more data into bctx, * for filters to use. */ - switch (req->opcode) { - case IORING_OP_SOCKET: - bctx->pdu_size = sizeof(bctx->socket); - io_socket_bpf_populate(bctx, req); - break; - case IORING_OP_OPENAT: - case IORING_OP_OPENAT2: - bctx->pdu_size = sizeof(bctx->open); - io_openat_bpf_populate(bctx, req); - break; + if (def->filter_pdu_size) { + bctx->pdu_size = def->filter_pdu_size; + def->filter_populate(bctx, req); } } @@ -313,36 +308,69 @@ err: return ERR_PTR(-EBUSY); } -#define IO_URING_BPF_FILTER_FLAGS IO_URING_BPF_FILTER_DENY_REST +#define IO_URING_BPF_FILTER_FLAGS (IO_URING_BPF_FILTER_DENY_REST | \ + IO_URING_BPF_FILTER_SZ_STRICT) -int io_register_bpf_filter(struct io_restriction *res, - struct io_uring_bpf __user *arg) +static int io_bpf_filter_import(struct io_uring_bpf *reg, + struct io_uring_bpf __user *arg) { - struct io_bpf_filters *filters, *old_filters = NULL; - struct io_bpf_filter *filter, *old_filter; - struct io_uring_bpf reg; - struct bpf_prog *prog; - struct sock_fprog fprog; + const struct io_issue_def *def; int ret; - if (copy_from_user(®, arg, sizeof(reg))) + if (copy_from_user(reg, arg, sizeof(*reg))) return -EFAULT; - if (reg.cmd_type != IO_URING_BPF_CMD_FILTER) + if (reg->cmd_type != IO_URING_BPF_CMD_FILTER) return -EINVAL; - if (reg.cmd_flags || reg.resv) + if (reg->cmd_flags || reg->resv) return -EINVAL; - if (reg.filter.opcode >= IORING_OP_LAST) + if (reg->filter.opcode >= IORING_OP_LAST) return -EINVAL; - if (reg.filter.flags & ~IO_URING_BPF_FILTER_FLAGS) + if (reg->filter.flags & ~IO_URING_BPF_FILTER_FLAGS) return -EINVAL; - if (reg.filter.resv) + if (!mem_is_zero(reg->filter.resv, sizeof(reg->filter.resv))) return -EINVAL; - if (!mem_is_zero(reg.filter.resv2, sizeof(reg.filter.resv2))) + if (!mem_is_zero(reg->filter.resv2, sizeof(reg->filter.resv2))) return -EINVAL; - if (!reg.filter.filter_len || reg.filter.filter_len > BPF_MAXINSNS) + if (!reg->filter.filter_len || reg->filter.filter_len > BPF_MAXINSNS) return -EINVAL; + /* Verify filter size */ + def = &io_issue_defs[array_index_nospec(reg->filter.opcode, IORING_OP_LAST)]; + + /* same size, always ok */ + ret = 0; + if (reg->filter.pdu_size == def->filter_pdu_size) + ; + /* size differs, fail in strict mode */ + else if (reg->filter.flags & IO_URING_BPF_FILTER_SZ_STRICT) + ret = -EMSGSIZE; + /* userspace filter is bigger, always disallow */ + else if (reg->filter.pdu_size > def->filter_pdu_size) + ret = -EMSGSIZE; + + /* copy back kernel filter size */ + reg->filter.pdu_size = def->filter_pdu_size; + if (copy_to_user(&arg->filter, ®->filter, sizeof(reg->filter))) + return -EFAULT; + + return ret; +} + +int io_register_bpf_filter(struct io_restriction *res, + struct io_uring_bpf __user *arg) +{ + struct io_bpf_filters *filters, *old_filters = NULL; + struct io_bpf_filter *filter, *old_filter; + struct io_uring_bpf reg; + struct bpf_prog *prog; + struct sock_fprog fprog; + int ret; + + ret = io_bpf_filter_import(®, arg); + if (ret) + return ret; + fprog.len = reg.filter.filter_len; fprog.filter = u64_to_user_ptr(reg.filter.filter_ptr); diff --git a/io_uring/cancel.h b/io_uring/cancel.h index 6783961ede1b..1b201a094303 100644 --- a/io_uring/cancel.h +++ b/io_uring/cancel.h @@ -6,10 +6,8 @@ struct io_cancel_data { struct io_ring_ctx *ctx; - union { - u64 data; - struct file *file; - }; + u64 data; + struct file *file; u8 opcode; u32 flags; int seq; diff --git a/io_uring/cmd_net.c b/io_uring/cmd_net.c index cb2775936fb8..57ddaf874611 100644 --- a/io_uring/cmd_net.c +++ b/io_uring/cmd_net.c @@ -160,16 +160,19 @@ int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags) struct proto *prot = READ_ONCE(sk->sk_prot); int ret, arg = 0; - if (!prot || !prot->ioctl) - return -EOPNOTSUPP; - switch (cmd->cmd_op) { case SOCKET_URING_OP_SIOCINQ: + if (!prot || !prot->ioctl) + return -EOPNOTSUPP; + ret = prot->ioctl(sk, SIOCINQ, &arg); if (ret) return ret; return arg; case SOCKET_URING_OP_SIOCOUTQ: + if (!prot || !prot->ioctl) + return -EOPNOTSUPP; + ret = prot->ioctl(sk, SIOCOUTQ, &arg); if (ret) return ret; diff --git a/io_uring/filetable.c b/io_uring/filetable.c index 794ef95df293..cb1838c9fc37 100644 --- a/io_uring/filetable.c +++ b/io_uring/filetable.c @@ -22,6 +22,10 @@ static int io_file_bitmap_get(struct io_ring_ctx *ctx) if (!table->bitmap) return -ENFILE; + if (table->alloc_hint < ctx->file_alloc_start || + table->alloc_hint >= ctx->file_alloc_end) + table->alloc_hint = ctx->file_alloc_start; + do { ret = find_next_zero_bit(table->bitmap, nr, table->alloc_hint); if (ret != nr) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 1d5bc669afd9..ccab8562d273 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -119,7 +119,7 @@ static void io_queue_sqe(struct io_kiocb *req, unsigned int extra_flags); static void __io_req_caches_free(struct io_ring_ctx *ctx); -static __read_mostly DEFINE_STATIC_KEY_FALSE(io_key_has_sqarray); +static __read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(io_key_has_sqarray, HZ); struct kmem_cache *req_cachep; static struct workqueue_struct *iou_wq __ro_after_init; @@ -1978,7 +1978,7 @@ static bool io_get_sqe(struct io_ring_ctx *ctx, const struct io_uring_sqe **sqe) unsigned mask = ctx->sq_entries - 1; unsigned head = ctx->cached_sq_head++ & mask; - if (static_branch_unlikely(&io_key_has_sqarray) && + if (static_branch_unlikely(&io_key_has_sqarray.key) && (!(ctx->flags & IORING_SETUP_NO_SQARRAY))) { head = READ_ONCE(ctx->sq_array[head]); if (unlikely(head >= ctx->sq_entries)) { @@ -2173,7 +2173,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) io_rings_free(ctx); if (!(ctx->flags & IORING_SETUP_NO_SQARRAY)) - static_branch_dec(&io_key_has_sqarray); + static_branch_slow_dec_deferred(&io_key_has_sqarray); percpu_ref_exit(&ctx->refs); free_uid(ctx->user); @@ -2398,7 +2398,7 @@ static __cold void io_ring_exit_work(struct work_struct *work) static __cold void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) { unsigned long index; - struct creds *creds; + struct cred *creds; mutex_lock(&ctx->uring_lock); percpu_ref_kill(&ctx->refs); @@ -2946,11 +2946,10 @@ static __cold int io_uring_create(struct io_ctx_config *config) ctx->clock_offset = 0; if (!(ctx->flags & IORING_SETUP_NO_SQARRAY)) - static_branch_inc(&io_key_has_sqarray); + static_branch_deferred_inc(&io_key_has_sqarray); if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) && - !(ctx->flags & IORING_SETUP_IOPOLL) && - !(ctx->flags & IORING_SETUP_SQPOLL)) + !(ctx->flags & IORING_SETUP_IOPOLL)) ctx->task_complete = true; if (ctx->task_complete || (ctx->flags & IORING_SETUP_IOPOLL)) diff --git a/io_uring/net.c b/io_uring/net.c index a6f3cbb7dfea..8576c6cb2236 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -1493,8 +1493,6 @@ int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) return -EAGAIN; if (ret > 0 && io_net_retry(sock, kmsg->msg.msg_flags)) { - zc->len -= ret; - zc->buf += ret; zc->done_io += ret; return -EAGAIN; } diff --git a/io_uring/opdef.c b/io_uring/opdef.c index df52d760240e..91a23baf415e 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -221,8 +221,10 @@ const struct io_issue_def io_issue_defs[] = { .issue = io_fallocate, }, [IORING_OP_OPENAT] = { + .filter_pdu_size = sizeof_field(struct io_uring_bpf_ctx, open), .prep = io_openat_prep, .issue = io_openat, + .filter_populate = io_openat_bpf_populate, }, [IORING_OP_CLOSE] = { .prep = io_close_prep, @@ -309,8 +311,10 @@ const struct io_issue_def io_issue_defs[] = { #endif }, [IORING_OP_OPENAT2] = { + .filter_pdu_size = sizeof_field(struct io_uring_bpf_ctx, open), .prep = io_openat2_prep, .issue = io_openat2, + .filter_populate = io_openat_bpf_populate, }, [IORING_OP_EPOLL_CTL] = { .unbound_nonreg_file = 1, @@ -406,8 +410,10 @@ const struct io_issue_def io_issue_defs[] = { [IORING_OP_SOCKET] = { .audit_skip = 1, #if defined(CONFIG_NET) + .filter_pdu_size = sizeof_field(struct io_uring_bpf_ctx, socket), .prep = io_socket_prep, .issue = io_socket, + .filter_populate = io_socket_bpf_populate, #else .prep = io_eopnotsupp_prep, #endif diff --git a/io_uring/opdef.h b/io_uring/opdef.h index aa37846880ff..faf3955dce8b 100644 --- a/io_uring/opdef.h +++ b/io_uring/opdef.h @@ -2,6 +2,8 @@ #ifndef IOU_OP_DEF_H #define IOU_OP_DEF_H +struct io_uring_bpf_ctx; + struct io_issue_def { /* needs req->file assigned */ unsigned needs_file : 1; @@ -33,8 +35,12 @@ struct io_issue_def { /* size of async data needed, if any */ unsigned short async_size; + /* bpf filter pdu size, if any */ + unsigned short filter_pdu_size; + int (*issue)(struct io_kiocb *, unsigned int); int (*prep)(struct io_kiocb *, const struct io_uring_sqe *); + void (*filter_populate)(struct io_uring_bpf_ctx *, struct io_kiocb *); }; struct io_cold_def { diff --git a/io_uring/openclose.c b/io_uring/openclose.c index d617b421b1e6..c71242915dad 100644 --- a/io_uring/openclose.c +++ b/io_uring/openclose.c @@ -345,31 +345,34 @@ static int io_pipe_fixed(struct io_kiocb *req, struct file **files, { struct io_pipe *p = io_kiocb_to_cmd(req, struct io_pipe); struct io_ring_ctx *ctx = req->ctx; + bool alloc_slot; int ret, fds[2] = { -1, -1 }; int slot = p->file_slot; if (p->flags & O_CLOEXEC) return -EINVAL; + alloc_slot = slot == IORING_FILE_INDEX_ALLOC; + io_ring_submit_lock(ctx, issue_flags); ret = __io_fixed_fd_install(ctx, files[0], slot); if (ret < 0) goto err; - fds[0] = ret; + fds[0] = alloc_slot ? ret : slot - 1; files[0] = NULL; /* * If a specific slot is given, next one will be used for * the write side. */ - if (slot != IORING_FILE_INDEX_ALLOC) + if (!alloc_slot) slot++; ret = __io_fixed_fd_install(ctx, files[1], slot); if (ret < 0) goto err; - fds[1] = ret; + fds[1] = alloc_slot ? ret : slot - 1; files[1] = NULL; io_ring_submit_unlock(ctx, issue_flags); diff --git a/io_uring/query.c b/io_uring/query.c index abdd6f3e1223..63cc30c9803d 100644 --- a/io_uring/query.c +++ b/io_uring/query.c @@ -39,7 +39,7 @@ static ssize_t io_query_zcrx(union io_query_data *data) e->nr_ctrl_opcodes = __ZCRX_CTRL_LAST; e->rq_hdr_size = sizeof(struct io_uring); e->rq_hdr_alignment = L1_CACHE_BYTES; - e->__resv1 = 0; + e->features = ZCRX_FEATURE_RX_PAGE_SIZE; e->__resv2 = 0; return sizeof(*e); } diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 95ce553fff8d..842e231c8a7c 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -96,20 +96,6 @@ int io_validate_user_buf_range(u64 uaddr, u64 ulen) return 0; } -static int io_buffer_validate(struct iovec *iov) -{ - /* - * Don't impose further limits on the size and buffer - * constraints here, we'll -EINVAL later when IO is - * submitted if they are wrong. - */ - if (!iov->iov_base) - return iov->iov_len ? -EFAULT : 0; - - return io_validate_user_buf_range((unsigned long)iov->iov_base, - iov->iov_len); -} - static void io_release_ubuf(void *priv) { struct io_mapped_ubuf *imu = priv; @@ -319,9 +305,6 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx, err = -EFAULT; break; } - err = io_buffer_validate(iov); - if (err) - break; node = io_sqe_buffer_register(ctx, iov, &last_hpage); if (IS_ERR(node)) { err = PTR_ERR(node); @@ -790,8 +773,17 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx, struct io_imu_folio_data data; bool coalesced = false; - if (!iov->iov_base) + if (!iov->iov_base) { + if (iov->iov_len) + return ERR_PTR(-EFAULT); + /* remove the buffer without installing a new one */ return NULL; + } + + ret = io_validate_user_buf_range((unsigned long)iov->iov_base, + iov->iov_len); + if (ret) + return ERR_PTR(ret); node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER); if (!node) @@ -828,7 +820,7 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx, imu->folio_shift = PAGE_SHIFT; imu->release = io_release_ubuf; imu->priv = imu; - imu->is_kbuf = false; + imu->flags = 0; imu->dir = IO_IMU_DEST | IO_IMU_SOURCE; if (coalesced) imu->folio_shift = data.folio_shift; @@ -897,9 +889,6 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg, ret = PTR_ERR(iov); break; } - ret = io_buffer_validate(iov); - if (ret) - break; if (ctx->compat) arg += sizeof(struct compat_iovec); else @@ -985,7 +974,7 @@ int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq, refcount_set(&imu->refs, 1); imu->release = release; imu->priv = rq; - imu->is_kbuf = true; + imu->flags = IO_REGBUF_F_KBUF; imu->dir = 1 << rq_data_dir(rq); rq_for_each_bvec(bv, rq, rq_iter) @@ -1020,7 +1009,7 @@ int io_buffer_unregister_bvec(struct io_uring_cmd *cmd, unsigned int index, ret = -EINVAL; goto unlock; } - if (!node->buf->is_kbuf) { + if (!(node->buf->flags & IO_REGBUF_F_KBUF)) { ret = -EBUSY; goto unlock; } @@ -1076,7 +1065,7 @@ static int io_import_fixed(int ddir, struct iov_iter *iter, offset = buf_addr - imu->ubuf; - if (imu->is_kbuf) + if (imu->flags & IO_REGBUF_F_KBUF) return io_import_kbuf(ddir, iter, imu, len, offset); /* @@ -1496,7 +1485,7 @@ int io_import_reg_vec(int ddir, struct iov_iter *iter, iovec_off = vec->nr - nr_iovs; iov = vec->iovec + iovec_off; - if (imu->is_kbuf) { + if (imu->flags & IO_REGBUF_F_KBUF) { int ret = io_kern_bvec_size(iov, nr_iovs, imu, &nr_segs); if (unlikely(ret)) @@ -1534,7 +1523,7 @@ int io_import_reg_vec(int ddir, struct iov_iter *iter, req->flags |= REQ_F_NEED_CLEANUP; } - if (imu->is_kbuf) + if (imu->flags & IO_REGBUF_F_KBUF) return io_vec_fill_kern_bvec(ddir, iter, imu, iov, nr_iovs, vec); return io_vec_fill_bvec(ddir, iter, imu, iov, nr_iovs, vec); diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index 4a5db2ad1af2..cff0f8834c35 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -28,6 +28,10 @@ enum { IO_IMU_SOURCE = 1 << ITER_SOURCE, }; +enum { + IO_REGBUF_F_KBUF = 1, +}; + struct io_mapped_ubuf { u64 ubuf; unsigned int len; @@ -37,7 +41,7 @@ struct io_mapped_ubuf { unsigned long acct_pages; void (*release)(void *); void *priv; - bool is_kbuf; + u8 flags; u8 dir; struct bio_vec bvec[] __counted_by(nr_bvecs); }; diff --git a/io_uring/rw.c b/io_uring/rw.c index d10386f56d49..b3971171c342 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -702,7 +702,8 @@ static ssize_t loop_rw_iter(int ddir, struct io_rw *rw, struct iov_iter *iter) if ((kiocb->ki_flags & IOCB_NOWAIT) && !(kiocb->ki_filp->f_flags & O_NONBLOCK)) return -EAGAIN; - if ((req->flags & REQ_F_BUF_NODE) && req->buf_node->buf->is_kbuf) + if ((req->flags & REQ_F_BUF_NODE) && + (req->buf_node->buf->flags & IO_REGBUF_F_KBUF)) return -EFAULT; ppos = io_kiocb_ppos(kiocb); diff --git a/io_uring/tctx.c b/io_uring/tctx.c index ad9e4336d736..270263699c6f 100644 --- a/io_uring/tctx.c +++ b/io_uring/tctx.c @@ -240,14 +240,14 @@ void io_uring_unreg_ringfd(void) int io_ring_add_registered_file(struct io_uring_task *tctx, struct file *file, int start, int end) { - int offset; + int offset, idx; for (offset = start; offset < end; offset++) { - offset = array_index_nospec(offset, IO_RINGFD_REG_MAX); - if (tctx->registered_rings[offset]) + idx = array_index_nospec(offset, IO_RINGFD_REG_MAX); + if (tctx->registered_rings[idx]) continue; - tctx->registered_rings[offset] = file; - return offset; + tctx->registered_rings[idx] = file; + return idx; } return -EBUSY; } diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index d8b6db456bd7..28150c6578e3 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -205,7 +205,7 @@ static int io_import_umem(struct io_zcrx_ifq *ifq, return PTR_ERR(pages); ret = sg_alloc_table_from_pages(&mem->page_sg_table, pages, nr_pages, - 0, nr_pages << PAGE_SHIFT, + 0, (unsigned long)nr_pages << PAGE_SHIFT, GFP_KERNEL_ACCOUNT); if (ret) { unpin_user_pages(pages, nr_pages); @@ -300,6 +300,9 @@ static int io_zcrx_map_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area) } ret = io_populate_area_dma(ifq, area); + if (ret && !area->mem.is_dmabuf) + dma_unmap_sgtable(ifq->dev, &area->mem.page_sg_table, + DMA_FROM_DEVICE, IO_DMA_ATTR); if (ret == 0) area->is_mapped = true; return ret; @@ -538,9 +541,6 @@ static void io_close_queue(struct io_zcrx_ifq *ifq) .mp_priv = ifq, }; - if (ifq->if_rxq == -1) - return; - scoped_guard(mutex, &ifq->pp_lock) { netdev = ifq->netdev; netdev_tracker = ifq->netdev_tracker; @@ -548,7 +548,8 @@ static void io_close_queue(struct io_zcrx_ifq *ifq) } if (netdev) { - net_mp_close_rxq(netdev, ifq->if_rxq, &p); + if (ifq->if_rxq != -1) + net_mp_close_rxq(netdev, ifq->if_rxq, &p); netdev_put(netdev, &netdev_tracker); } ifq->if_rxq = -1; @@ -702,6 +703,8 @@ static int import_zcrx(struct io_ring_ctx *ctx, return -EINVAL; if (reg->if_rxq || reg->rq_entries || reg->area_ptr || reg->region_ptr) return -EINVAL; + if (reg->flags & ~ZCRX_REG_IMPORT) + return -EINVAL; fd = reg->if_idx; CLASS(fd, f)(fd); @@ -858,13 +861,12 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, } return 0; netdev_put_unlock: - netdev_put(ifq->netdev, &ifq->netdev_tracker); netdev_unlock(ifq->netdev); err: scoped_guard(mutex, &ctx->mmap_lock) xa_erase(&ctx->zcrx_ctxs, id); ifq_free: - io_zcrx_ifq_free(ifq); + zcrx_unregister(ifq); return ret; } |
