summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-02-17 08:33:49 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2026-02-17 08:33:49 -0800
commit7b751b01ade7f666de2f5c365bd9562c2dcd7d60 (patch)
tree5b9ae23000c7e4b78b9383a69676cd59915afa4c
parent9702969978695d9a699a1f34771580cdbb153b33 (diff)
parentbe3573124e630736d2d39650b12f5ef220b47ac1 (diff)
Merge tag 'io_uring-7.0-20260216' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull more io_uring updates from Jens Axboe: "This is a mix of cleanups and fixes. No major fixes in here, just a bunch of little fixes. Some of them marked for stable as it fixes behavioral issues - Fix an issue with SOCKET_URING_OP_SETSOCKOPT for netlink sockets, due to a too restrictive check on it having an ioctl handler - Remove a redundant SQPOLL check in ring creation - Kill dead accounting for zero-copy send, which doesn't use ->buf or ->len post the initial setup - Fix missing clamp of the allocation hint, which could cause allocations to fall outside of the range the application asked for. Still within the allowed limits. - Fix for IORING_OP_PIPE's handling of direct descriptors - Tweak to the API for the newly added BPF filters, making them more future proof in terms of how applications deal with them - A few fixes for zcrx, fixing a few error handling conditions - Fix for zcrx request flag checking - Add support for querying the zcrx page size - Improve the NO_SQARRAY static branch inc/dec, avoiding busy conditions causing too much traffic - Various little cleanups" * tag 'io_uring-7.0-20260216' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux: io_uring/bpf_filter: pass in expected filter payload size io_uring/bpf_filter: move filter size and populate helper into struct io_uring/cancel: de-unionize file and user_data in struct io_cancel_data io_uring/rsrc: improve regbuf iov validation io_uring: remove unneeded io_send_zc accounting io_uring/cmd_net: fix too strict requirement on ioctl io_uring: delay sqarray static branch disablement io_uring/query: add query.h copyright notice io_uring/query: return support for custom rx page size io_uring/zcrx: check unsupported flags on import io_uring/zcrx: fix post open error handling io_uring/zcrx: fix sgtable leak on mapping failures io_uring: use the right type for creds iteration io_uring/openclose: fix io_pipe_fixed() slot tracking for specific slots io_uring/filetable: clamp alloc_hint to the configured alloc range io_uring/rsrc: replace reg buffer bit field with flags io_uring/zcrx: improve types for size calculation io_uring/tctx: avoid modifying loop variable in io_ring_add_registered_file io_uring: simplify IORING_SETUP_DEFER_TASKRUN && !SQPOLL check
-rw-r--r--include/uapi/linux/io_uring.h8
-rw-r--r--include/uapi/linux/io_uring/bpf_filter.h8
-rw-r--r--include/uapi/linux/io_uring/query.h6
-rw-r--r--io_uring/bpf_filter.c82
-rw-r--r--io_uring/cancel.h6
-rw-r--r--io_uring/cmd_net.c9
-rw-r--r--io_uring/filetable.c4
-rw-r--r--io_uring/io_uring.c13
-rw-r--r--io_uring/net.c2
-rw-r--r--io_uring/opdef.c6
-rw-r--r--io_uring/opdef.h6
-rw-r--r--io_uring/openclose.c9
-rw-r--r--io_uring/query.c2
-rw-r--r--io_uring/rsrc.c43
-rw-r--r--io_uring/rsrc.h6
-rw-r--r--io_uring/rw.c3
-rw-r--r--io_uring/tctx.c10
-rw-r--r--io_uring/zcrx.c16
18 files changed, 149 insertions, 90 deletions
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index fc473af6feb4..6750c383a2ab 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -1090,6 +1090,14 @@ enum zcrx_reg_flags {
ZCRX_REG_IMPORT = 1,
};
+enum zcrx_features {
+ /*
+ * The user can ask for the desired rx page size by passing the
+ * value in struct io_uring_zcrx_ifq_reg::rx_buf_len.
+ */
+ ZCRX_FEATURE_RX_PAGE_SIZE = 1 << 0,
+};
+
/*
* Argument for IORING_REGISTER_ZCRX_IFQ
*/
diff --git a/include/uapi/linux/io_uring/bpf_filter.h b/include/uapi/linux/io_uring/bpf_filter.h
index 220351b81bc0..1b461d792a7b 100644
--- a/include/uapi/linux/io_uring/bpf_filter.h
+++ b/include/uapi/linux/io_uring/bpf_filter.h
@@ -35,13 +35,19 @@ enum {
* If set, any currently unset opcode will have a deny filter attached
*/
IO_URING_BPF_FILTER_DENY_REST = 1,
+ /*
+ * If set, if kernel and application don't agree on pdu_size for
+ * the given opcode, fail the registration of the filter.
+ */
+ IO_URING_BPF_FILTER_SZ_STRICT = 2,
};
struct io_uring_bpf_filter {
__u32 opcode; /* io_uring opcode to filter */
__u32 flags;
__u32 filter_len; /* number of BPF instructions */
- __u32 resv;
+ __u8 pdu_size; /* expected pdu size for opcode */
+ __u8 resv[3];
__u64 filter_ptr; /* pointer to BPF filter */
__u64 resv2[5];
};
diff --git a/include/uapi/linux/io_uring/query.h b/include/uapi/linux/io_uring/query.h
index 2456e6c5ebb5..95500759cc13 100644
--- a/include/uapi/linux/io_uring/query.h
+++ b/include/uapi/linux/io_uring/query.h
@@ -1,6 +1,9 @@
/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
/*
* Header file for the io_uring query interface.
+ *
+ * Copyright (C) 2026 Pavel Begunkov <asml.silence@gmail.com>
+ * Copyright (C) Meta Platforms, Inc.
*/
#ifndef LINUX_IO_URING_QUERY_H
#define LINUX_IO_URING_QUERY_H
@@ -50,7 +53,8 @@ struct io_uring_query_zcrx {
__u64 area_flags;
/* The number of supported ZCRX_CTRL_* opcodes */
__u32 nr_ctrl_opcodes;
- __u32 __resv1;
+ /* Bitmask of ZCRX_FEATURE_* indicating which features are available */
+ __u32 features;
/* The refill ring header size */
__u32 rq_hdr_size;
/* The alignment for the header */
diff --git a/io_uring/bpf_filter.c b/io_uring/bpf_filter.c
index 3816883a45ed..28a23e92ee81 100644
--- a/io_uring/bpf_filter.c
+++ b/io_uring/bpf_filter.c
@@ -26,6 +26,8 @@ static const struct io_bpf_filter dummy_filter;
static void io_uring_populate_bpf_ctx(struct io_uring_bpf_ctx *bctx,
struct io_kiocb *req)
{
+ const struct io_issue_def *def = &io_issue_defs[req->opcode];
+
bctx->opcode = req->opcode;
bctx->sqe_flags = (__force int) req->flags & SQE_VALID_FLAGS;
bctx->user_data = req->cqe.user_data;
@@ -34,19 +36,12 @@ static void io_uring_populate_bpf_ctx(struct io_uring_bpf_ctx *bctx,
sizeof(*bctx) - offsetof(struct io_uring_bpf_ctx, pdu_size));
/*
- * Opcodes can provide a handler fo populating more data into bctx,
+ * Opcodes can provide a handler for populating more data into bctx,
* for filters to use.
*/
- switch (req->opcode) {
- case IORING_OP_SOCKET:
- bctx->pdu_size = sizeof(bctx->socket);
- io_socket_bpf_populate(bctx, req);
- break;
- case IORING_OP_OPENAT:
- case IORING_OP_OPENAT2:
- bctx->pdu_size = sizeof(bctx->open);
- io_openat_bpf_populate(bctx, req);
- break;
+ if (def->filter_pdu_size) {
+ bctx->pdu_size = def->filter_pdu_size;
+ def->filter_populate(bctx, req);
}
}
@@ -313,36 +308,69 @@ err:
return ERR_PTR(-EBUSY);
}
-#define IO_URING_BPF_FILTER_FLAGS IO_URING_BPF_FILTER_DENY_REST
+#define IO_URING_BPF_FILTER_FLAGS (IO_URING_BPF_FILTER_DENY_REST | \
+ IO_URING_BPF_FILTER_SZ_STRICT)
-int io_register_bpf_filter(struct io_restriction *res,
- struct io_uring_bpf __user *arg)
+static int io_bpf_filter_import(struct io_uring_bpf *reg,
+ struct io_uring_bpf __user *arg)
{
- struct io_bpf_filters *filters, *old_filters = NULL;
- struct io_bpf_filter *filter, *old_filter;
- struct io_uring_bpf reg;
- struct bpf_prog *prog;
- struct sock_fprog fprog;
+ const struct io_issue_def *def;
int ret;
- if (copy_from_user(&reg, arg, sizeof(reg)))
+ if (copy_from_user(reg, arg, sizeof(*reg)))
return -EFAULT;
- if (reg.cmd_type != IO_URING_BPF_CMD_FILTER)
+ if (reg->cmd_type != IO_URING_BPF_CMD_FILTER)
return -EINVAL;
- if (reg.cmd_flags || reg.resv)
+ if (reg->cmd_flags || reg->resv)
return -EINVAL;
- if (reg.filter.opcode >= IORING_OP_LAST)
+ if (reg->filter.opcode >= IORING_OP_LAST)
return -EINVAL;
- if (reg.filter.flags & ~IO_URING_BPF_FILTER_FLAGS)
+ if (reg->filter.flags & ~IO_URING_BPF_FILTER_FLAGS)
return -EINVAL;
- if (reg.filter.resv)
+ if (!mem_is_zero(reg->filter.resv, sizeof(reg->filter.resv)))
return -EINVAL;
- if (!mem_is_zero(reg.filter.resv2, sizeof(reg.filter.resv2)))
+ if (!mem_is_zero(reg->filter.resv2, sizeof(reg->filter.resv2)))
return -EINVAL;
- if (!reg.filter.filter_len || reg.filter.filter_len > BPF_MAXINSNS)
+ if (!reg->filter.filter_len || reg->filter.filter_len > BPF_MAXINSNS)
return -EINVAL;
+ /* Verify filter size */
+ def = &io_issue_defs[array_index_nospec(reg->filter.opcode, IORING_OP_LAST)];
+
+ /* same size, always ok */
+ ret = 0;
+ if (reg->filter.pdu_size == def->filter_pdu_size)
+ ;
+ /* size differs, fail in strict mode */
+ else if (reg->filter.flags & IO_URING_BPF_FILTER_SZ_STRICT)
+ ret = -EMSGSIZE;
+ /* userspace filter is bigger, always disallow */
+ else if (reg->filter.pdu_size > def->filter_pdu_size)
+ ret = -EMSGSIZE;
+
+ /* copy back kernel filter size */
+ reg->filter.pdu_size = def->filter_pdu_size;
+ if (copy_to_user(&arg->filter, &reg->filter, sizeof(reg->filter)))
+ return -EFAULT;
+
+ return ret;
+}
+
+int io_register_bpf_filter(struct io_restriction *res,
+ struct io_uring_bpf __user *arg)
+{
+ struct io_bpf_filters *filters, *old_filters = NULL;
+ struct io_bpf_filter *filter, *old_filter;
+ struct io_uring_bpf reg;
+ struct bpf_prog *prog;
+ struct sock_fprog fprog;
+ int ret;
+
+ ret = io_bpf_filter_import(&reg, arg);
+ if (ret)
+ return ret;
+
fprog.len = reg.filter.filter_len;
fprog.filter = u64_to_user_ptr(reg.filter.filter_ptr);
diff --git a/io_uring/cancel.h b/io_uring/cancel.h
index 6783961ede1b..1b201a094303 100644
--- a/io_uring/cancel.h
+++ b/io_uring/cancel.h
@@ -6,10 +6,8 @@
struct io_cancel_data {
struct io_ring_ctx *ctx;
- union {
- u64 data;
- struct file *file;
- };
+ u64 data;
+ struct file *file;
u8 opcode;
u32 flags;
int seq;
diff --git a/io_uring/cmd_net.c b/io_uring/cmd_net.c
index cb2775936fb8..57ddaf874611 100644
--- a/io_uring/cmd_net.c
+++ b/io_uring/cmd_net.c
@@ -160,16 +160,19 @@ int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags)
struct proto *prot = READ_ONCE(sk->sk_prot);
int ret, arg = 0;
- if (!prot || !prot->ioctl)
- return -EOPNOTSUPP;
-
switch (cmd->cmd_op) {
case SOCKET_URING_OP_SIOCINQ:
+ if (!prot || !prot->ioctl)
+ return -EOPNOTSUPP;
+
ret = prot->ioctl(sk, SIOCINQ, &arg);
if (ret)
return ret;
return arg;
case SOCKET_URING_OP_SIOCOUTQ:
+ if (!prot || !prot->ioctl)
+ return -EOPNOTSUPP;
+
ret = prot->ioctl(sk, SIOCOUTQ, &arg);
if (ret)
return ret;
diff --git a/io_uring/filetable.c b/io_uring/filetable.c
index 794ef95df293..cb1838c9fc37 100644
--- a/io_uring/filetable.c
+++ b/io_uring/filetable.c
@@ -22,6 +22,10 @@ static int io_file_bitmap_get(struct io_ring_ctx *ctx)
if (!table->bitmap)
return -ENFILE;
+ if (table->alloc_hint < ctx->file_alloc_start ||
+ table->alloc_hint >= ctx->file_alloc_end)
+ table->alloc_hint = ctx->file_alloc_start;
+
do {
ret = find_next_zero_bit(table->bitmap, nr, table->alloc_hint);
if (ret != nr)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 1d5bc669afd9..ccab8562d273 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -119,7 +119,7 @@
static void io_queue_sqe(struct io_kiocb *req, unsigned int extra_flags);
static void __io_req_caches_free(struct io_ring_ctx *ctx);
-static __read_mostly DEFINE_STATIC_KEY_FALSE(io_key_has_sqarray);
+static __read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(io_key_has_sqarray, HZ);
struct kmem_cache *req_cachep;
static struct workqueue_struct *iou_wq __ro_after_init;
@@ -1978,7 +1978,7 @@ static bool io_get_sqe(struct io_ring_ctx *ctx, const struct io_uring_sqe **sqe)
unsigned mask = ctx->sq_entries - 1;
unsigned head = ctx->cached_sq_head++ & mask;
- if (static_branch_unlikely(&io_key_has_sqarray) &&
+ if (static_branch_unlikely(&io_key_has_sqarray.key) &&
(!(ctx->flags & IORING_SETUP_NO_SQARRAY))) {
head = READ_ONCE(ctx->sq_array[head]);
if (unlikely(head >= ctx->sq_entries)) {
@@ -2173,7 +2173,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
io_rings_free(ctx);
if (!(ctx->flags & IORING_SETUP_NO_SQARRAY))
- static_branch_dec(&io_key_has_sqarray);
+ static_branch_slow_dec_deferred(&io_key_has_sqarray);
percpu_ref_exit(&ctx->refs);
free_uid(ctx->user);
@@ -2398,7 +2398,7 @@ static __cold void io_ring_exit_work(struct work_struct *work)
static __cold void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
{
unsigned long index;
- struct creds *creds;
+ struct cred *creds;
mutex_lock(&ctx->uring_lock);
percpu_ref_kill(&ctx->refs);
@@ -2946,11 +2946,10 @@ static __cold int io_uring_create(struct io_ctx_config *config)
ctx->clock_offset = 0;
if (!(ctx->flags & IORING_SETUP_NO_SQARRAY))
- static_branch_inc(&io_key_has_sqarray);
+ static_branch_deferred_inc(&io_key_has_sqarray);
if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) &&
- !(ctx->flags & IORING_SETUP_IOPOLL) &&
- !(ctx->flags & IORING_SETUP_SQPOLL))
+ !(ctx->flags & IORING_SETUP_IOPOLL))
ctx->task_complete = true;
if (ctx->task_complete || (ctx->flags & IORING_SETUP_IOPOLL))
diff --git a/io_uring/net.c b/io_uring/net.c
index a6f3cbb7dfea..8576c6cb2236 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -1493,8 +1493,6 @@ int io_send_zc(struct io_kiocb *req, unsigned int issue_flags)
return -EAGAIN;
if (ret > 0 && io_net_retry(sock, kmsg->msg.msg_flags)) {
- zc->len -= ret;
- zc->buf += ret;
zc->done_io += ret;
return -EAGAIN;
}
diff --git a/io_uring/opdef.c b/io_uring/opdef.c
index df52d760240e..91a23baf415e 100644
--- a/io_uring/opdef.c
+++ b/io_uring/opdef.c
@@ -221,8 +221,10 @@ const struct io_issue_def io_issue_defs[] = {
.issue = io_fallocate,
},
[IORING_OP_OPENAT] = {
+ .filter_pdu_size = sizeof_field(struct io_uring_bpf_ctx, open),
.prep = io_openat_prep,
.issue = io_openat,
+ .filter_populate = io_openat_bpf_populate,
},
[IORING_OP_CLOSE] = {
.prep = io_close_prep,
@@ -309,8 +311,10 @@ const struct io_issue_def io_issue_defs[] = {
#endif
},
[IORING_OP_OPENAT2] = {
+ .filter_pdu_size = sizeof_field(struct io_uring_bpf_ctx, open),
.prep = io_openat2_prep,
.issue = io_openat2,
+ .filter_populate = io_openat_bpf_populate,
},
[IORING_OP_EPOLL_CTL] = {
.unbound_nonreg_file = 1,
@@ -406,8 +410,10 @@ const struct io_issue_def io_issue_defs[] = {
[IORING_OP_SOCKET] = {
.audit_skip = 1,
#if defined(CONFIG_NET)
+ .filter_pdu_size = sizeof_field(struct io_uring_bpf_ctx, socket),
.prep = io_socket_prep,
.issue = io_socket,
+ .filter_populate = io_socket_bpf_populate,
#else
.prep = io_eopnotsupp_prep,
#endif
diff --git a/io_uring/opdef.h b/io_uring/opdef.h
index aa37846880ff..faf3955dce8b 100644
--- a/io_uring/opdef.h
+++ b/io_uring/opdef.h
@@ -2,6 +2,8 @@
#ifndef IOU_OP_DEF_H
#define IOU_OP_DEF_H
+struct io_uring_bpf_ctx;
+
struct io_issue_def {
/* needs req->file assigned */
unsigned needs_file : 1;
@@ -33,8 +35,12 @@ struct io_issue_def {
/* size of async data needed, if any */
unsigned short async_size;
+ /* bpf filter pdu size, if any */
+ unsigned short filter_pdu_size;
+
int (*issue)(struct io_kiocb *, unsigned int);
int (*prep)(struct io_kiocb *, const struct io_uring_sqe *);
+ void (*filter_populate)(struct io_uring_bpf_ctx *, struct io_kiocb *);
};
struct io_cold_def {
diff --git a/io_uring/openclose.c b/io_uring/openclose.c
index d617b421b1e6..c71242915dad 100644
--- a/io_uring/openclose.c
+++ b/io_uring/openclose.c
@@ -345,31 +345,34 @@ static int io_pipe_fixed(struct io_kiocb *req, struct file **files,
{
struct io_pipe *p = io_kiocb_to_cmd(req, struct io_pipe);
struct io_ring_ctx *ctx = req->ctx;
+ bool alloc_slot;
int ret, fds[2] = { -1, -1 };
int slot = p->file_slot;
if (p->flags & O_CLOEXEC)
return -EINVAL;
+ alloc_slot = slot == IORING_FILE_INDEX_ALLOC;
+
io_ring_submit_lock(ctx, issue_flags);
ret = __io_fixed_fd_install(ctx, files[0], slot);
if (ret < 0)
goto err;
- fds[0] = ret;
+ fds[0] = alloc_slot ? ret : slot - 1;
files[0] = NULL;
/*
* If a specific slot is given, next one will be used for
* the write side.
*/
- if (slot != IORING_FILE_INDEX_ALLOC)
+ if (!alloc_slot)
slot++;
ret = __io_fixed_fd_install(ctx, files[1], slot);
if (ret < 0)
goto err;
- fds[1] = ret;
+ fds[1] = alloc_slot ? ret : slot - 1;
files[1] = NULL;
io_ring_submit_unlock(ctx, issue_flags);
diff --git a/io_uring/query.c b/io_uring/query.c
index abdd6f3e1223..63cc30c9803d 100644
--- a/io_uring/query.c
+++ b/io_uring/query.c
@@ -39,7 +39,7 @@ static ssize_t io_query_zcrx(union io_query_data *data)
e->nr_ctrl_opcodes = __ZCRX_CTRL_LAST;
e->rq_hdr_size = sizeof(struct io_uring);
e->rq_hdr_alignment = L1_CACHE_BYTES;
- e->__resv1 = 0;
+ e->features = ZCRX_FEATURE_RX_PAGE_SIZE;
e->__resv2 = 0;
return sizeof(*e);
}
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 95ce553fff8d..842e231c8a7c 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -96,20 +96,6 @@ int io_validate_user_buf_range(u64 uaddr, u64 ulen)
return 0;
}
-static int io_buffer_validate(struct iovec *iov)
-{
- /*
- * Don't impose further limits on the size and buffer
- * constraints here, we'll -EINVAL later when IO is
- * submitted if they are wrong.
- */
- if (!iov->iov_base)
- return iov->iov_len ? -EFAULT : 0;
-
- return io_validate_user_buf_range((unsigned long)iov->iov_base,
- iov->iov_len);
-}
-
static void io_release_ubuf(void *priv)
{
struct io_mapped_ubuf *imu = priv;
@@ -319,9 +305,6 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
err = -EFAULT;
break;
}
- err = io_buffer_validate(iov);
- if (err)
- break;
node = io_sqe_buffer_register(ctx, iov, &last_hpage);
if (IS_ERR(node)) {
err = PTR_ERR(node);
@@ -790,8 +773,17 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
struct io_imu_folio_data data;
bool coalesced = false;
- if (!iov->iov_base)
+ if (!iov->iov_base) {
+ if (iov->iov_len)
+ return ERR_PTR(-EFAULT);
+ /* remove the buffer without installing a new one */
return NULL;
+ }
+
+ ret = io_validate_user_buf_range((unsigned long)iov->iov_base,
+ iov->iov_len);
+ if (ret)
+ return ERR_PTR(ret);
node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER);
if (!node)
@@ -828,7 +820,7 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
imu->folio_shift = PAGE_SHIFT;
imu->release = io_release_ubuf;
imu->priv = imu;
- imu->is_kbuf = false;
+ imu->flags = 0;
imu->dir = IO_IMU_DEST | IO_IMU_SOURCE;
if (coalesced)
imu->folio_shift = data.folio_shift;
@@ -897,9 +889,6 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
ret = PTR_ERR(iov);
break;
}
- ret = io_buffer_validate(iov);
- if (ret)
- break;
if (ctx->compat)
arg += sizeof(struct compat_iovec);
else
@@ -985,7 +974,7 @@ int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq,
refcount_set(&imu->refs, 1);
imu->release = release;
imu->priv = rq;
- imu->is_kbuf = true;
+ imu->flags = IO_REGBUF_F_KBUF;
imu->dir = 1 << rq_data_dir(rq);
rq_for_each_bvec(bv, rq, rq_iter)
@@ -1020,7 +1009,7 @@ int io_buffer_unregister_bvec(struct io_uring_cmd *cmd, unsigned int index,
ret = -EINVAL;
goto unlock;
}
- if (!node->buf->is_kbuf) {
+ if (!(node->buf->flags & IO_REGBUF_F_KBUF)) {
ret = -EBUSY;
goto unlock;
}
@@ -1076,7 +1065,7 @@ static int io_import_fixed(int ddir, struct iov_iter *iter,
offset = buf_addr - imu->ubuf;
- if (imu->is_kbuf)
+ if (imu->flags & IO_REGBUF_F_KBUF)
return io_import_kbuf(ddir, iter, imu, len, offset);
/*
@@ -1496,7 +1485,7 @@ int io_import_reg_vec(int ddir, struct iov_iter *iter,
iovec_off = vec->nr - nr_iovs;
iov = vec->iovec + iovec_off;
- if (imu->is_kbuf) {
+ if (imu->flags & IO_REGBUF_F_KBUF) {
int ret = io_kern_bvec_size(iov, nr_iovs, imu, &nr_segs);
if (unlikely(ret))
@@ -1534,7 +1523,7 @@ int io_import_reg_vec(int ddir, struct iov_iter *iter,
req->flags |= REQ_F_NEED_CLEANUP;
}
- if (imu->is_kbuf)
+ if (imu->flags & IO_REGBUF_F_KBUF)
return io_vec_fill_kern_bvec(ddir, iter, imu, iov, nr_iovs, vec);
return io_vec_fill_bvec(ddir, iter, imu, iov, nr_iovs, vec);
diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h
index 4a5db2ad1af2..cff0f8834c35 100644
--- a/io_uring/rsrc.h
+++ b/io_uring/rsrc.h
@@ -28,6 +28,10 @@ enum {
IO_IMU_SOURCE = 1 << ITER_SOURCE,
};
+enum {
+ IO_REGBUF_F_KBUF = 1,
+};
+
struct io_mapped_ubuf {
u64 ubuf;
unsigned int len;
@@ -37,7 +41,7 @@ struct io_mapped_ubuf {
unsigned long acct_pages;
void (*release)(void *);
void *priv;
- bool is_kbuf;
+ u8 flags;
u8 dir;
struct bio_vec bvec[] __counted_by(nr_bvecs);
};
diff --git a/io_uring/rw.c b/io_uring/rw.c
index d10386f56d49..b3971171c342 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -702,7 +702,8 @@ static ssize_t loop_rw_iter(int ddir, struct io_rw *rw, struct iov_iter *iter)
if ((kiocb->ki_flags & IOCB_NOWAIT) &&
!(kiocb->ki_filp->f_flags & O_NONBLOCK))
return -EAGAIN;
- if ((req->flags & REQ_F_BUF_NODE) && req->buf_node->buf->is_kbuf)
+ if ((req->flags & REQ_F_BUF_NODE) &&
+ (req->buf_node->buf->flags & IO_REGBUF_F_KBUF))
return -EFAULT;
ppos = io_kiocb_ppos(kiocb);
diff --git a/io_uring/tctx.c b/io_uring/tctx.c
index ad9e4336d736..270263699c6f 100644
--- a/io_uring/tctx.c
+++ b/io_uring/tctx.c
@@ -240,14 +240,14 @@ void io_uring_unreg_ringfd(void)
int io_ring_add_registered_file(struct io_uring_task *tctx, struct file *file,
int start, int end)
{
- int offset;
+ int offset, idx;
for (offset = start; offset < end; offset++) {
- offset = array_index_nospec(offset, IO_RINGFD_REG_MAX);
- if (tctx->registered_rings[offset])
+ idx = array_index_nospec(offset, IO_RINGFD_REG_MAX);
+ if (tctx->registered_rings[idx])
continue;
- tctx->registered_rings[offset] = file;
- return offset;
+ tctx->registered_rings[idx] = file;
+ return idx;
}
return -EBUSY;
}
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index d8b6db456bd7..28150c6578e3 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -205,7 +205,7 @@ static int io_import_umem(struct io_zcrx_ifq *ifq,
return PTR_ERR(pages);
ret = sg_alloc_table_from_pages(&mem->page_sg_table, pages, nr_pages,
- 0, nr_pages << PAGE_SHIFT,
+ 0, (unsigned long)nr_pages << PAGE_SHIFT,
GFP_KERNEL_ACCOUNT);
if (ret) {
unpin_user_pages(pages, nr_pages);
@@ -300,6 +300,9 @@ static int io_zcrx_map_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
}
ret = io_populate_area_dma(ifq, area);
+ if (ret && !area->mem.is_dmabuf)
+ dma_unmap_sgtable(ifq->dev, &area->mem.page_sg_table,
+ DMA_FROM_DEVICE, IO_DMA_ATTR);
if (ret == 0)
area->is_mapped = true;
return ret;
@@ -538,9 +541,6 @@ static void io_close_queue(struct io_zcrx_ifq *ifq)
.mp_priv = ifq,
};
- if (ifq->if_rxq == -1)
- return;
-
scoped_guard(mutex, &ifq->pp_lock) {
netdev = ifq->netdev;
netdev_tracker = ifq->netdev_tracker;
@@ -548,7 +548,8 @@ static void io_close_queue(struct io_zcrx_ifq *ifq)
}
if (netdev) {
- net_mp_close_rxq(netdev, ifq->if_rxq, &p);
+ if (ifq->if_rxq != -1)
+ net_mp_close_rxq(netdev, ifq->if_rxq, &p);
netdev_put(netdev, &netdev_tracker);
}
ifq->if_rxq = -1;
@@ -702,6 +703,8 @@ static int import_zcrx(struct io_ring_ctx *ctx,
return -EINVAL;
if (reg->if_rxq || reg->rq_entries || reg->area_ptr || reg->region_ptr)
return -EINVAL;
+ if (reg->flags & ~ZCRX_REG_IMPORT)
+ return -EINVAL;
fd = reg->if_idx;
CLASS(fd, f)(fd);
@@ -858,13 +861,12 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
}
return 0;
netdev_put_unlock:
- netdev_put(ifq->netdev, &ifq->netdev_tracker);
netdev_unlock(ifq->netdev);
err:
scoped_guard(mutex, &ctx->mmap_lock)
xa_erase(&ctx->zcrx_ctxs, id);
ifq_free:
- io_zcrx_ifq_free(ifq);
+ zcrx_unregister(ifq);
return ret;
}