diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-06-17 12:33:23 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-06-17 12:33:23 -0700 |
| commit | e771677c937da5808f7b6c1f0e4a97ec1a84f8a8 (patch) | |
| tree | cf2331bf991cafbefc2e4625ec8e6ddc87796faf | |
| parent | d076a8d3b9b36563fdd029ef33c79f713445970e (diff) | |
| parent | e28bee5b445178390d63f7a93a5a219063c6434e (diff) | |
Merge tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd
Pull iommufd updates from Jason Gunthorpe:
"All various fixes:
- Typo breaking the veventq uAPI for 32 bit userspace
- Several Sashiko found errors in the veventq and fault fd paths
- Fix incorrect use of dmabuf locks, and possible races with iommufd
destroy and dmabuf revoke
- Sashiko errors found in the uAPI validation for IOMMU_HWPT_INVALIDATE"
* tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd:
iommu: Avoid copying the user array twice in the full-array copy helper
iommufd/selftest: Add invalidation entry_num and entry_len boundary tests
iommufd: Set upper bounds on cache invalidation entry_num and entry_len
iommufd: Clarify IOAS_MAP_FILE dma-buf support
iommufd: Destroy the pages content after detaching from dmabuf
iommufd: Take dma_resv lock before dma_buf_unpin() in release path
iommufd/selftest: Cover invalid read counts on vEVENTQ FD
iommufd: Avoid partial fault group delivery in iommufd_fault_fops_read()
iommufd: Break the loop on failure in iommufd_fault_fops_read()
iommufd: Reject invalid read count in iommufd_fault_fops_read()
iommufd: Propagate allocation failure in iommufd_veventq_deliver_fetch()
iommufd: Reject invalid read count in iommufd_veventq_fops_read()
iommufd: Rewind header length in done if iommufd_veventq_fops_read() fails
iommufd/selftest: Add boundary tests for veventq_depth
iommufd: Set veventq_depth upper bound
iommufd: Move vevent memory allocation outside spinlock
iommufd: Fix data_len byte-count vs element-count mismatch
iommufd: Use sizeof(*hdr) instead of sizeof(hdr) in veventq read
| -rw-r--r-- | drivers/iommu/iommufd/driver.c | 13 | ||||
| -rw-r--r-- | drivers/iommu/iommufd/eventq.c | 36 | ||||
| -rw-r--r-- | drivers/iommu/iommufd/hw_pagetable.c | 11 | ||||
| -rw-r--r-- | drivers/iommu/iommufd/iommufd_private.h | 2 | ||||
| -rw-r--r-- | drivers/iommu/iommufd/pages.c | 10 | ||||
| -rw-r--r-- | include/linux/iommu.h | 1 | ||||
| -rw-r--r-- | include/uapi/linux/iommufd.h | 12 | ||||
| -rw-r--r-- | tools/testing/selftests/iommu/iommufd.c | 51 | ||||
| -rw-r--r-- | tools/testing/selftests/iommu/iommufd_fail_nth.c | 2 | ||||
| -rw-r--r-- | tools/testing/selftests/iommu/iommufd_utils.h | 17 |
10 files changed, 124 insertions, 31 deletions
diff --git a/drivers/iommu/iommufd/driver.c b/drivers/iommu/iommufd/driver.c index 61e6b02601d1..3b8067976eac 100644 --- a/drivers/iommu/iommufd/driver.c +++ b/drivers/iommu/iommufd/driver.c @@ -149,15 +149,18 @@ int iommufd_viommu_report_event(struct iommufd_viommu *viommu, goto out_unlock_veventqs; } - spin_lock(&veventq->common.lock); - if (veventq->num_events == veventq->depth) { + /* Pre-allocate to avoid GFP_ATOMIC; use GFP_NOWAIT to avoid sleeping */ + vevent = kzalloc_flex(*vevent, event_data, data_len, GFP_NOWAIT); + if (!vevent) { + spin_lock(&veventq->common.lock); vevent = &veventq->lost_events_header; + rc = -ENOMEM; goto out_set_header; } - vevent = kzalloc_flex(*vevent, event_data, data_len, GFP_ATOMIC); - if (!vevent) { - rc = -ENOMEM; + spin_lock(&veventq->common.lock); + if (veventq->num_events == veventq->depth) { + kfree(vevent); vevent = &veventq->lost_events_header; goto out_set_header; } diff --git a/drivers/iommu/iommufd/eventq.c b/drivers/iommu/iommufd/eventq.c index 710eef0b6004..5129e3bf5461 100644 --- a/drivers/iommu/iommufd/eventq.c +++ b/drivers/iommu/iommufd/eventq.c @@ -139,9 +139,14 @@ static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf, mutex_lock(&fault->mutex); while ((group = iommufd_fault_deliver_fetch(fault))) { + size_t group_done = done; + if (done >= count || group->fault_count * fault_size > count - done) { iommufd_fault_deliver_restore(fault, group); + /* Read count doesn't fit the first fault group */ + if (done == 0) + rc = -EINVAL; break; } @@ -157,14 +162,17 @@ static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf, iommufd_compose_fault_message(&iopf->fault, &data, idev, group->cookie); - if (copy_to_user(buf + done, &data, fault_size)) { + if (copy_to_user(buf + group_done, &data, fault_size)) { xa_erase(&fault->response, group->cookie); iommufd_fault_deliver_restore(fault, group); rc = -EFAULT; break; } - done += fault_size; + group_done += fault_size; } + if (rc) + break; + done = group_done; } mutex_unlock(&fault->mutex); @@ -264,8 +272,10 @@ iommufd_veventq_deliver_fetch(struct iommufd_veventq *veventq) /* Make a copy of the lost_events_header for copy_to_user */ if (next == &veventq->lost_events_header) { vevent = kzalloc_obj(*vevent, GFP_ATOMIC); - if (!vevent) + if (!vevent) { + vevent = ERR_PTR(-ENOMEM); goto out_unlock; + } } list_del(&next->node); if (vevent) @@ -310,8 +320,17 @@ static ssize_t iommufd_veventq_fops_read(struct file *filep, char __user *buf, if (*ppos) return -ESPIPE; + /* Minimum read count is a vEVENT header */ + if (count < sizeof(*hdr)) + return -EINVAL; while ((cur = iommufd_veventq_deliver_fetch(veventq))) { + if (IS_ERR(cur)) { + if (done == 0) + rc = PTR_ERR(cur); + break; + } + /* Validate the remaining bytes against the header size */ if (done >= count || sizeof(*hdr) > count - done) { iommufd_veventq_deliver_restore(veventq, cur); @@ -321,8 +340,11 @@ static ssize_t iommufd_veventq_fops_read(struct file *filep, char __user *buf, /* If being a normal vEVENT, validate against the full size */ if (!vevent_for_lost_events_header(cur) && - sizeof(hdr) + cur->data_len > count - done) { + sizeof(*hdr) + cur->data_len > count - done) { iommufd_veventq_deliver_restore(veventq, cur); + /* Read count doesn't fit a single normal vEVENT */ + if (done == 0) + rc = -EINVAL; break; } @@ -336,6 +358,7 @@ static ssize_t iommufd_veventq_fops_read(struct file *filep, char __user *buf, if (cur->data_len && copy_to_user(buf + done, cur->event_data, cur->data_len)) { iommufd_veventq_deliver_restore(veventq, cur); + done -= sizeof(*hdr); rc = -EFAULT; break; } @@ -473,6 +496,9 @@ int iommufd_fault_iopf_handler(struct iopf_group *group) static const struct file_operations iommufd_veventq_fops = INIT_EVENTQ_FOPS(iommufd_veventq_fops_read, NULL); +/* An arbitrary upper bound for veventq_depth that fits all existing HWs */ +#define VEVENTQ_MAX_DEPTH (1U << 19) + int iommufd_veventq_alloc(struct iommufd_ucmd *ucmd) { struct iommu_veventq_alloc *cmd = ucmd->cmd; @@ -484,7 +510,7 @@ int iommufd_veventq_alloc(struct iommufd_ucmd *ucmd) if (cmd->flags || cmd->__reserved || cmd->type == IOMMU_VEVENTQ_TYPE_DEFAULT) return -EOPNOTSUPP; - if (!cmd->veventq_depth) + if (!cmd->veventq_depth || cmd->veventq_depth > VEVENTQ_MAX_DEPTH) return -EINVAL; viommu = iommufd_get_viommu(ucmd, cmd->viommu_id); diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index fe789c2dc0c9..623cc608ca0c 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -489,6 +489,9 @@ int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd) return rc; } +/* An arbitrary entry_num cap, far above any realistic invalidation batch */ +#define IOMMU_HWPT_INVALIDATE_ENTRY_NUM_MAX (1U << 19) + int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd) { struct iommu_hwpt_invalidate *cmd = ucmd->cmd; @@ -507,7 +510,13 @@ int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd) goto out; } - if (cmd->entry_num && (!cmd->data_uptr || !cmd->entry_len)) { + /* + * Bound entry_num and entry_len so a single call cannot pin the CPU; + * entry_len also caps the copy_struct_from_user() trailing-zero scan. + */ + if (cmd->entry_num && + (!cmd->data_uptr || !cmd->entry_len || cmd->entry_len > PAGE_SIZE || + cmd->entry_num > IOMMU_HWPT_INVALIDATE_ENTRY_NUM_MAX)) { rc = -EINVAL; goto out; } diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 6ac1965199e9..43fbc5bed8de 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -602,7 +602,7 @@ struct iommufd_vevent { struct iommufd_vevent_header header; struct list_head node; /* for iommufd_eventq::deliver */ ssize_t data_len; - u64 event_data[] __counted_by(data_len); + u8 event_data[] __counted_by(data_len); }; #define vevent_for_lost_events_header(vevent) \ diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c index 9bdb2945afe1..03c8379bbc34 100644 --- a/drivers/iommu/iommufd/pages.c +++ b/drivers/iommu/iommufd/pages.c @@ -1656,20 +1656,22 @@ void iopt_release_pages(struct kref *kref) WARN_ON(!RB_EMPTY_ROOT(&pages->domains_itree.rb_root)); WARN_ON(pages->npinned); WARN_ON(!xa_empty(&pages->pinned_pfns)); - mmdrop(pages->source_mm); - mutex_destroy(&pages->mutex); - put_task_struct(pages->source_task); - free_uid(pages->source_user); if (iopt_is_dmabuf(pages) && pages->dmabuf.attach) { struct dma_buf *dmabuf = pages->dmabuf.attach->dmabuf; + dma_resv_lock(dmabuf->resv, NULL); dma_buf_unpin(pages->dmabuf.attach); + dma_resv_unlock(dmabuf->resv); dma_buf_detach(dmabuf, pages->dmabuf.attach); dma_buf_put(dmabuf); WARN_ON(!list_empty(&pages->dmabuf.tracker)); } else if (pages->type == IOPT_ADDRESS_FILE) { fput(pages->file); } + mmdrop(pages->source_mm); + mutex_destroy(&pages->mutex); + put_task_struct(pages->source_task); + free_uid(pages->source_user); kfree(pages); } diff --git a/include/linux/iommu.h b/include/linux/iommu.h index bf8a77a164e4..d20aa6f6863a 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -575,6 +575,7 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size, user_array->entry_num * user_array->entry_len)) return -EFAULT; + return 0; } /* Copy item by item */ diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index e998dfbd6960..0425d452d41e 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -224,13 +224,17 @@ struct iommu_ioas_map { * @size: sizeof(struct iommu_ioas_map_file) * @flags: same as for iommu_ioas_map * @ioas_id: same as for iommu_ioas_map - * @fd: the memfd to map - * @start: byte offset from start of file to map from + * @fd: the memfd or supported dma-buf file to map + * @start: byte offset from start of the file to map from * @length: same as for iommu_ioas_map * @iova: same as for iommu_ioas_map * - * Set an IOVA mapping from a memfd file. All other arguments and semantics - * match those of IOMMU_IOAS_MAP. + * Set an IOVA mapping from a memfd file. On kernels with dma-buf support, + * supported dma-buf files may also be accepted. This is not a generic + * dma-buf import path; currently supported dma-bufs include single-range + * VFIO PCI dma-bufs exported through VFIO_DEVICE_FEATURE_DMA_BUF, and + * other dma-bufs may be rejected. All other arguments and semantics match + * those of IOMMU_IOAS_MAP. */ struct iommu_ioas_map_file { __u32 size; diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index d1fe5dbc2813..d44b34b05757 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -556,6 +556,21 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested) 1, &num_inv); assert(!num_inv); + /* Negative test: entry_len is bounded by PAGE_SIZE */ + num_inv = 1; + test_err_hwpt_invalidate(EINVAL, nested_hwpt_id[0], inv_reqs, + IOMMU_HWPT_INVALIDATE_DATA_SELFTEST, + PAGE_SIZE + 1, &num_inv); + assert(!num_inv); + + /* Negative test: entry_num is bounded */ +#define IOMMU_HWPT_INVALIDATE_ENTRY_NUM_MAX (1U << 19) + num_inv = IOMMU_HWPT_INVALIDATE_ENTRY_NUM_MAX + 1; + test_err_hwpt_invalidate(EINVAL, nested_hwpt_id[0], inv_reqs, + IOMMU_HWPT_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + /* Negative test: invalid flag is passed */ num_inv = 1; inv_reqs[0].flags = 0xffffffff; @@ -2980,22 +2995,54 @@ TEST_F(iommufd_viommu, vdevice_alloc) uint32_t veventq_id; uint32_t veventq_fd; int prev_seq = -1; + size_t hdr_size = sizeof(struct iommufd_vevent_header); + char vbuf[64]; if (dev_id) { /* Must allocate vdevice before attaching to a nested hwpt */ test_err_mock_domain_replace(ENOENT, self->stdev_id, self->nested_hwpt_id); + /* Test depth lower and upper bounds (mirrors kernel cap) */ +#define VEVENTQ_MAX_DEPTH (1U << 19) + test_err_veventq_alloc(EINVAL, viommu_id, + IOMMU_VEVENTQ_TYPE_SELFTEST, 0, NULL, + NULL); + test_err_veventq_alloc(EINVAL, viommu_id, + IOMMU_VEVENTQ_TYPE_SELFTEST, + VEVENTQ_MAX_DEPTH + 1, NULL, NULL); + test_cmd_veventq_alloc(viommu_id, IOMMU_VEVENTQ_TYPE_SELFTEST, + VEVENTQ_MAX_DEPTH, &veventq_id, + &veventq_fd); + close(veventq_fd); + test_ioctl_destroy(veventq_id); + /* Allocate a vEVENTQ with veventq_depth=2 */ test_cmd_veventq_alloc(viommu_id, IOMMU_VEVENTQ_TYPE_SELFTEST, - &veventq_id, &veventq_fd); + 2, &veventq_id, &veventq_fd); test_err_veventq_alloc(EEXIST, viommu_id, - IOMMU_VEVENTQ_TYPE_SELFTEST, NULL, NULL); + IOMMU_VEVENTQ_TYPE_SELFTEST, 2, NULL, + NULL); + + /* Invalid read counts on an empty vEVENTQ */ + ASSERT_EQ(-1, read(veventq_fd, vbuf, 0)); + ASSERT_EQ(EINVAL, errno); + ASSERT_EQ(-1, read(veventq_fd, vbuf, hdr_size - 1)); + ASSERT_EQ(EINVAL, errno); + /* Set vdev_id to 0x99, unset it, and set to 0x88 */ test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id); test_cmd_mock_domain_replace(self->stdev_id, self->nested_hwpt_id); test_cmd_trigger_vevents(dev_id, 1); + + /* Invalid read counts on a non-empty vEVENTQ */ + ASSERT_EQ(-1, read(veventq_fd, vbuf, 0)); + ASSERT_EQ(EINVAL, errno); + /* header fits but the event's payload doesn't */ + ASSERT_EQ(-1, read(veventq_fd, vbuf, hdr_size)); + ASSERT_EQ(EINVAL, errno); + test_cmd_read_vevents(veventq_fd, 1, 0x99, &prev_seq); test_err_vdevice_alloc(EEXIST, viommu_id, dev_id, 0x99, &vdev_id); diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c index 45c14323a618..25495d8dceb3 100644 --- a/tools/testing/selftests/iommu/iommufd_fail_nth.c +++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c @@ -712,7 +712,7 @@ TEST_FAIL_NTH(basic_fail_nth, device) return -1; if (_test_cmd_veventq_alloc(self->fd, viommu_id, - IOMMU_VEVENTQ_TYPE_SELFTEST, &veventq_id, + IOMMU_VEVENTQ_TYPE_SELFTEST, 2, &veventq_id, &veventq_fd)) return -1; close(veventq_fd); diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index 5502751d500c..b4928cbd4d9c 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -1060,12 +1060,13 @@ static int _test_cmd_hw_queue_alloc(int fd, __u32 viommu_id, __u32 type, base_addr, len, out_qid)) static int _test_cmd_veventq_alloc(int fd, __u32 viommu_id, __u32 type, - __u32 *veventq_id, __u32 *veventq_fd) + __u32 depth, __u32 *veventq_id, + __u32 *veventq_fd) { struct iommu_veventq_alloc cmd = { .size = sizeof(cmd), .type = type, - .veventq_depth = 2, + .veventq_depth = depth, .viommu_id = viommu_id, }; int ret; @@ -1080,13 +1081,13 @@ static int _test_cmd_veventq_alloc(int fd, __u32 viommu_id, __u32 type, return 0; } -#define test_cmd_veventq_alloc(viommu_id, type, veventq_id, veventq_fd) \ - ASSERT_EQ(0, _test_cmd_veventq_alloc(self->fd, viommu_id, type, \ +#define test_cmd_veventq_alloc(viommu_id, type, depth, veventq_id, veventq_fd) \ + ASSERT_EQ(0, _test_cmd_veventq_alloc(self->fd, viommu_id, type, depth, \ veventq_id, veventq_fd)) -#define test_err_veventq_alloc(_errno, viommu_id, type, veventq_id, \ - veventq_fd) \ - EXPECT_ERRNO(_errno, \ - _test_cmd_veventq_alloc(self->fd, viommu_id, type, \ +#define test_err_veventq_alloc(_errno, viommu_id, type, depth, veventq_id, \ + veventq_fd) \ + EXPECT_ERRNO(_errno, \ + _test_cmd_veventq_alloc(self->fd, viommu_id, type, depth, \ veventq_id, veventq_fd)) static int _test_cmd_trigger_vevents(int fd, __u32 dev_id, __u32 nvevents) |
