summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-06-17 12:33:23 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2026-06-17 12:33:23 -0700
commite771677c937da5808f7b6c1f0e4a97ec1a84f8a8 (patch)
treecf2331bf991cafbefc2e4625ec8e6ddc87796faf
parentd076a8d3b9b36563fdd029ef33c79f713445970e (diff)
parente28bee5b445178390d63f7a93a5a219063c6434e (diff)
Merge tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd
Pull iommufd updates from Jason Gunthorpe: "All various fixes: - Typo breaking the veventq uAPI for 32 bit userspace - Several Sashiko found errors in the veventq and fault fd paths - Fix incorrect use of dmabuf locks, and possible races with iommufd destroy and dmabuf revoke - Sashiko errors found in the uAPI validation for IOMMU_HWPT_INVALIDATE" * tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd: iommu: Avoid copying the user array twice in the full-array copy helper iommufd/selftest: Add invalidation entry_num and entry_len boundary tests iommufd: Set upper bounds on cache invalidation entry_num and entry_len iommufd: Clarify IOAS_MAP_FILE dma-buf support iommufd: Destroy the pages content after detaching from dmabuf iommufd: Take dma_resv lock before dma_buf_unpin() in release path iommufd/selftest: Cover invalid read counts on vEVENTQ FD iommufd: Avoid partial fault group delivery in iommufd_fault_fops_read() iommufd: Break the loop on failure in iommufd_fault_fops_read() iommufd: Reject invalid read count in iommufd_fault_fops_read() iommufd: Propagate allocation failure in iommufd_veventq_deliver_fetch() iommufd: Reject invalid read count in iommufd_veventq_fops_read() iommufd: Rewind header length in done if iommufd_veventq_fops_read() fails iommufd/selftest: Add boundary tests for veventq_depth iommufd: Set veventq_depth upper bound iommufd: Move vevent memory allocation outside spinlock iommufd: Fix data_len byte-count vs element-count mismatch iommufd: Use sizeof(*hdr) instead of sizeof(hdr) in veventq read
-rw-r--r--drivers/iommu/iommufd/driver.c13
-rw-r--r--drivers/iommu/iommufd/eventq.c36
-rw-r--r--drivers/iommu/iommufd/hw_pagetable.c11
-rw-r--r--drivers/iommu/iommufd/iommufd_private.h2
-rw-r--r--drivers/iommu/iommufd/pages.c10
-rw-r--r--include/linux/iommu.h1
-rw-r--r--include/uapi/linux/iommufd.h12
-rw-r--r--tools/testing/selftests/iommu/iommufd.c51
-rw-r--r--tools/testing/selftests/iommu/iommufd_fail_nth.c2
-rw-r--r--tools/testing/selftests/iommu/iommufd_utils.h17
10 files changed, 124 insertions, 31 deletions
diff --git a/drivers/iommu/iommufd/driver.c b/drivers/iommu/iommufd/driver.c
index 61e6b02601d1..3b8067976eac 100644
--- a/drivers/iommu/iommufd/driver.c
+++ b/drivers/iommu/iommufd/driver.c
@@ -149,15 +149,18 @@ int iommufd_viommu_report_event(struct iommufd_viommu *viommu,
goto out_unlock_veventqs;
}
- spin_lock(&veventq->common.lock);
- if (veventq->num_events == veventq->depth) {
+ /* Pre-allocate to avoid GFP_ATOMIC; use GFP_NOWAIT to avoid sleeping */
+ vevent = kzalloc_flex(*vevent, event_data, data_len, GFP_NOWAIT);
+ if (!vevent) {
+ spin_lock(&veventq->common.lock);
vevent = &veventq->lost_events_header;
+ rc = -ENOMEM;
goto out_set_header;
}
- vevent = kzalloc_flex(*vevent, event_data, data_len, GFP_ATOMIC);
- if (!vevent) {
- rc = -ENOMEM;
+ spin_lock(&veventq->common.lock);
+ if (veventq->num_events == veventq->depth) {
+ kfree(vevent);
vevent = &veventq->lost_events_header;
goto out_set_header;
}
diff --git a/drivers/iommu/iommufd/eventq.c b/drivers/iommu/iommufd/eventq.c
index 710eef0b6004..5129e3bf5461 100644
--- a/drivers/iommu/iommufd/eventq.c
+++ b/drivers/iommu/iommufd/eventq.c
@@ -139,9 +139,14 @@ static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf,
mutex_lock(&fault->mutex);
while ((group = iommufd_fault_deliver_fetch(fault))) {
+ size_t group_done = done;
+
if (done >= count ||
group->fault_count * fault_size > count - done) {
iommufd_fault_deliver_restore(fault, group);
+ /* Read count doesn't fit the first fault group */
+ if (done == 0)
+ rc = -EINVAL;
break;
}
@@ -157,14 +162,17 @@ static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf,
iommufd_compose_fault_message(&iopf->fault,
&data, idev,
group->cookie);
- if (copy_to_user(buf + done, &data, fault_size)) {
+ if (copy_to_user(buf + group_done, &data, fault_size)) {
xa_erase(&fault->response, group->cookie);
iommufd_fault_deliver_restore(fault, group);
rc = -EFAULT;
break;
}
- done += fault_size;
+ group_done += fault_size;
}
+ if (rc)
+ break;
+ done = group_done;
}
mutex_unlock(&fault->mutex);
@@ -264,8 +272,10 @@ iommufd_veventq_deliver_fetch(struct iommufd_veventq *veventq)
/* Make a copy of the lost_events_header for copy_to_user */
if (next == &veventq->lost_events_header) {
vevent = kzalloc_obj(*vevent, GFP_ATOMIC);
- if (!vevent)
+ if (!vevent) {
+ vevent = ERR_PTR(-ENOMEM);
goto out_unlock;
+ }
}
list_del(&next->node);
if (vevent)
@@ -310,8 +320,17 @@ static ssize_t iommufd_veventq_fops_read(struct file *filep, char __user *buf,
if (*ppos)
return -ESPIPE;
+ /* Minimum read count is a vEVENT header */
+ if (count < sizeof(*hdr))
+ return -EINVAL;
while ((cur = iommufd_veventq_deliver_fetch(veventq))) {
+ if (IS_ERR(cur)) {
+ if (done == 0)
+ rc = PTR_ERR(cur);
+ break;
+ }
+
/* Validate the remaining bytes against the header size */
if (done >= count || sizeof(*hdr) > count - done) {
iommufd_veventq_deliver_restore(veventq, cur);
@@ -321,8 +340,11 @@ static ssize_t iommufd_veventq_fops_read(struct file *filep, char __user *buf,
/* If being a normal vEVENT, validate against the full size */
if (!vevent_for_lost_events_header(cur) &&
- sizeof(hdr) + cur->data_len > count - done) {
+ sizeof(*hdr) + cur->data_len > count - done) {
iommufd_veventq_deliver_restore(veventq, cur);
+ /* Read count doesn't fit a single normal vEVENT */
+ if (done == 0)
+ rc = -EINVAL;
break;
}
@@ -336,6 +358,7 @@ static ssize_t iommufd_veventq_fops_read(struct file *filep, char __user *buf,
if (cur->data_len &&
copy_to_user(buf + done, cur->event_data, cur->data_len)) {
iommufd_veventq_deliver_restore(veventq, cur);
+ done -= sizeof(*hdr);
rc = -EFAULT;
break;
}
@@ -473,6 +496,9 @@ int iommufd_fault_iopf_handler(struct iopf_group *group)
static const struct file_operations iommufd_veventq_fops =
INIT_EVENTQ_FOPS(iommufd_veventq_fops_read, NULL);
+/* An arbitrary upper bound for veventq_depth that fits all existing HWs */
+#define VEVENTQ_MAX_DEPTH (1U << 19)
+
int iommufd_veventq_alloc(struct iommufd_ucmd *ucmd)
{
struct iommu_veventq_alloc *cmd = ucmd->cmd;
@@ -484,7 +510,7 @@ int iommufd_veventq_alloc(struct iommufd_ucmd *ucmd)
if (cmd->flags || cmd->__reserved ||
cmd->type == IOMMU_VEVENTQ_TYPE_DEFAULT)
return -EOPNOTSUPP;
- if (!cmd->veventq_depth)
+ if (!cmd->veventq_depth || cmd->veventq_depth > VEVENTQ_MAX_DEPTH)
return -EINVAL;
viommu = iommufd_get_viommu(ucmd, cmd->viommu_id);
diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c
index fe789c2dc0c9..623cc608ca0c 100644
--- a/drivers/iommu/iommufd/hw_pagetable.c
+++ b/drivers/iommu/iommufd/hw_pagetable.c
@@ -489,6 +489,9 @@ int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd)
return rc;
}
+/* An arbitrary entry_num cap, far above any realistic invalidation batch */
+#define IOMMU_HWPT_INVALIDATE_ENTRY_NUM_MAX (1U << 19)
+
int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd)
{
struct iommu_hwpt_invalidate *cmd = ucmd->cmd;
@@ -507,7 +510,13 @@ int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd)
goto out;
}
- if (cmd->entry_num && (!cmd->data_uptr || !cmd->entry_len)) {
+ /*
+ * Bound entry_num and entry_len so a single call cannot pin the CPU;
+ * entry_len also caps the copy_struct_from_user() trailing-zero scan.
+ */
+ if (cmd->entry_num &&
+ (!cmd->data_uptr || !cmd->entry_len || cmd->entry_len > PAGE_SIZE ||
+ cmd->entry_num > IOMMU_HWPT_INVALIDATE_ENTRY_NUM_MAX)) {
rc = -EINVAL;
goto out;
}
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 6ac1965199e9..43fbc5bed8de 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -602,7 +602,7 @@ struct iommufd_vevent {
struct iommufd_vevent_header header;
struct list_head node; /* for iommufd_eventq::deliver */
ssize_t data_len;
- u64 event_data[] __counted_by(data_len);
+ u8 event_data[] __counted_by(data_len);
};
#define vevent_for_lost_events_header(vevent) \
diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c
index 9bdb2945afe1..03c8379bbc34 100644
--- a/drivers/iommu/iommufd/pages.c
+++ b/drivers/iommu/iommufd/pages.c
@@ -1656,20 +1656,22 @@ void iopt_release_pages(struct kref *kref)
WARN_ON(!RB_EMPTY_ROOT(&pages->domains_itree.rb_root));
WARN_ON(pages->npinned);
WARN_ON(!xa_empty(&pages->pinned_pfns));
- mmdrop(pages->source_mm);
- mutex_destroy(&pages->mutex);
- put_task_struct(pages->source_task);
- free_uid(pages->source_user);
if (iopt_is_dmabuf(pages) && pages->dmabuf.attach) {
struct dma_buf *dmabuf = pages->dmabuf.attach->dmabuf;
+ dma_resv_lock(dmabuf->resv, NULL);
dma_buf_unpin(pages->dmabuf.attach);
+ dma_resv_unlock(dmabuf->resv);
dma_buf_detach(dmabuf, pages->dmabuf.attach);
dma_buf_put(dmabuf);
WARN_ON(!list_empty(&pages->dmabuf.tracker));
} else if (pages->type == IOPT_ADDRESS_FILE) {
fput(pages->file);
}
+ mmdrop(pages->source_mm);
+ mutex_destroy(&pages->mutex);
+ put_task_struct(pages->source_task);
+ free_uid(pages->source_user);
kfree(pages);
}
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index bf8a77a164e4..d20aa6f6863a 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -575,6 +575,7 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size,
user_array->entry_num *
user_array->entry_len))
return -EFAULT;
+ return 0;
}
/* Copy item by item */
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index e998dfbd6960..0425d452d41e 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -224,13 +224,17 @@ struct iommu_ioas_map {
* @size: sizeof(struct iommu_ioas_map_file)
* @flags: same as for iommu_ioas_map
* @ioas_id: same as for iommu_ioas_map
- * @fd: the memfd to map
- * @start: byte offset from start of file to map from
+ * @fd: the memfd or supported dma-buf file to map
+ * @start: byte offset from start of the file to map from
* @length: same as for iommu_ioas_map
* @iova: same as for iommu_ioas_map
*
- * Set an IOVA mapping from a memfd file. All other arguments and semantics
- * match those of IOMMU_IOAS_MAP.
+ * Set an IOVA mapping from a memfd file. On kernels with dma-buf support,
+ * supported dma-buf files may also be accepted. This is not a generic
+ * dma-buf import path; currently supported dma-bufs include single-range
+ * VFIO PCI dma-bufs exported through VFIO_DEVICE_FEATURE_DMA_BUF, and
+ * other dma-bufs may be rejected. All other arguments and semantics match
+ * those of IOMMU_IOAS_MAP.
*/
struct iommu_ioas_map_file {
__u32 size;
diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c
index d1fe5dbc2813..d44b34b05757 100644
--- a/tools/testing/selftests/iommu/iommufd.c
+++ b/tools/testing/selftests/iommu/iommufd.c
@@ -556,6 +556,21 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested)
1, &num_inv);
assert(!num_inv);
+ /* Negative test: entry_len is bounded by PAGE_SIZE */
+ num_inv = 1;
+ test_err_hwpt_invalidate(EINVAL, nested_hwpt_id[0], inv_reqs,
+ IOMMU_HWPT_INVALIDATE_DATA_SELFTEST,
+ PAGE_SIZE + 1, &num_inv);
+ assert(!num_inv);
+
+ /* Negative test: entry_num is bounded */
+#define IOMMU_HWPT_INVALIDATE_ENTRY_NUM_MAX (1U << 19)
+ num_inv = IOMMU_HWPT_INVALIDATE_ENTRY_NUM_MAX + 1;
+ test_err_hwpt_invalidate(EINVAL, nested_hwpt_id[0], inv_reqs,
+ IOMMU_HWPT_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(!num_inv);
+
/* Negative test: invalid flag is passed */
num_inv = 1;
inv_reqs[0].flags = 0xffffffff;
@@ -2980,22 +2995,54 @@ TEST_F(iommufd_viommu, vdevice_alloc)
uint32_t veventq_id;
uint32_t veventq_fd;
int prev_seq = -1;
+ size_t hdr_size = sizeof(struct iommufd_vevent_header);
+ char vbuf[64];
if (dev_id) {
/* Must allocate vdevice before attaching to a nested hwpt */
test_err_mock_domain_replace(ENOENT, self->stdev_id,
self->nested_hwpt_id);
+ /* Test depth lower and upper bounds (mirrors kernel cap) */
+#define VEVENTQ_MAX_DEPTH (1U << 19)
+ test_err_veventq_alloc(EINVAL, viommu_id,
+ IOMMU_VEVENTQ_TYPE_SELFTEST, 0, NULL,
+ NULL);
+ test_err_veventq_alloc(EINVAL, viommu_id,
+ IOMMU_VEVENTQ_TYPE_SELFTEST,
+ VEVENTQ_MAX_DEPTH + 1, NULL, NULL);
+ test_cmd_veventq_alloc(viommu_id, IOMMU_VEVENTQ_TYPE_SELFTEST,
+ VEVENTQ_MAX_DEPTH, &veventq_id,
+ &veventq_fd);
+ close(veventq_fd);
+ test_ioctl_destroy(veventq_id);
+
/* Allocate a vEVENTQ with veventq_depth=2 */
test_cmd_veventq_alloc(viommu_id, IOMMU_VEVENTQ_TYPE_SELFTEST,
- &veventq_id, &veventq_fd);
+ 2, &veventq_id, &veventq_fd);
test_err_veventq_alloc(EEXIST, viommu_id,
- IOMMU_VEVENTQ_TYPE_SELFTEST, NULL, NULL);
+ IOMMU_VEVENTQ_TYPE_SELFTEST, 2, NULL,
+ NULL);
+
+ /* Invalid read counts on an empty vEVENTQ */
+ ASSERT_EQ(-1, read(veventq_fd, vbuf, 0));
+ ASSERT_EQ(EINVAL, errno);
+ ASSERT_EQ(-1, read(veventq_fd, vbuf, hdr_size - 1));
+ ASSERT_EQ(EINVAL, errno);
+
/* Set vdev_id to 0x99, unset it, and set to 0x88 */
test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id);
test_cmd_mock_domain_replace(self->stdev_id,
self->nested_hwpt_id);
test_cmd_trigger_vevents(dev_id, 1);
+
+ /* Invalid read counts on a non-empty vEVENTQ */
+ ASSERT_EQ(-1, read(veventq_fd, vbuf, 0));
+ ASSERT_EQ(EINVAL, errno);
+ /* header fits but the event's payload doesn't */
+ ASSERT_EQ(-1, read(veventq_fd, vbuf, hdr_size));
+ ASSERT_EQ(EINVAL, errno);
+
test_cmd_read_vevents(veventq_fd, 1, 0x99, &prev_seq);
test_err_vdevice_alloc(EEXIST, viommu_id, dev_id, 0x99,
&vdev_id);
diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c
index 45c14323a618..25495d8dceb3 100644
--- a/tools/testing/selftests/iommu/iommufd_fail_nth.c
+++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c
@@ -712,7 +712,7 @@ TEST_FAIL_NTH(basic_fail_nth, device)
return -1;
if (_test_cmd_veventq_alloc(self->fd, viommu_id,
- IOMMU_VEVENTQ_TYPE_SELFTEST, &veventq_id,
+ IOMMU_VEVENTQ_TYPE_SELFTEST, 2, &veventq_id,
&veventq_fd))
return -1;
close(veventq_fd);
diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h
index 5502751d500c..b4928cbd4d9c 100644
--- a/tools/testing/selftests/iommu/iommufd_utils.h
+++ b/tools/testing/selftests/iommu/iommufd_utils.h
@@ -1060,12 +1060,13 @@ static int _test_cmd_hw_queue_alloc(int fd, __u32 viommu_id, __u32 type,
base_addr, len, out_qid))
static int _test_cmd_veventq_alloc(int fd, __u32 viommu_id, __u32 type,
- __u32 *veventq_id, __u32 *veventq_fd)
+ __u32 depth, __u32 *veventq_id,
+ __u32 *veventq_fd)
{
struct iommu_veventq_alloc cmd = {
.size = sizeof(cmd),
.type = type,
- .veventq_depth = 2,
+ .veventq_depth = depth,
.viommu_id = viommu_id,
};
int ret;
@@ -1080,13 +1081,13 @@ static int _test_cmd_veventq_alloc(int fd, __u32 viommu_id, __u32 type,
return 0;
}
-#define test_cmd_veventq_alloc(viommu_id, type, veventq_id, veventq_fd) \
- ASSERT_EQ(0, _test_cmd_veventq_alloc(self->fd, viommu_id, type, \
+#define test_cmd_veventq_alloc(viommu_id, type, depth, veventq_id, veventq_fd) \
+ ASSERT_EQ(0, _test_cmd_veventq_alloc(self->fd, viommu_id, type, depth, \
veventq_id, veventq_fd))
-#define test_err_veventq_alloc(_errno, viommu_id, type, veventq_id, \
- veventq_fd) \
- EXPECT_ERRNO(_errno, \
- _test_cmd_veventq_alloc(self->fd, viommu_id, type, \
+#define test_err_veventq_alloc(_errno, viommu_id, type, depth, veventq_id, \
+ veventq_fd) \
+ EXPECT_ERRNO(_errno, \
+ _test_cmd_veventq_alloc(self->fd, viommu_id, type, depth, \
veventq_id, veventq_fd))
static int _test_cmd_trigger_vevents(int fd, __u32 dev_id, __u32 nvevents)