From 3c4629b68dbe18e454cce4b864c530268cffbeed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 22 Dec 2025 09:00:33 +0100 Subject: virtio: uapi: avoid usage of libc types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using libc types and headers from the UAPI headers is problematic as it introduces a dependency on a full C toolchain. On Linux 'unsigned long' works as a replacement for 'uintptr_t' and does not depend on libc. Signed-off-by: Thomas Weißschuh Acked-by: Arnd Bergmann Signed-off-by: Michael S. Tsirkin Message-Id: <20251222-uapi-virtio-v1-1-29390f87bcad@linutronix.de> --- include/uapi/linux/virtio_ring.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/virtio_ring.h b/include/uapi/linux/virtio_ring.h index f8c20d3de8da..3c478582a3c2 100644 --- a/include/uapi/linux/virtio_ring.h +++ b/include/uapi/linux/virtio_ring.h @@ -31,9 +31,6 @@ * SUCH DAMAGE. * * Copyright Rusty Russell IBM Corporation 2007. */ -#ifndef __KERNEL__ -#include -#endif #include #include @@ -202,7 +199,7 @@ static inline void vring_init(struct vring *vr, unsigned int num, void *p, vr->num = num; vr->desc = p; vr->avail = (struct vring_avail *)((char *)p + num * sizeof(struct vring_desc)); - vr->used = (void *)(((uintptr_t)&vr->avail->ring[num] + sizeof(__virtio16) + vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + sizeof(__virtio16) + align-1) & ~(align - 1)); } -- cgit v1.2.3 From ca085faabb42c31ee204235facc5a430cb9e78a9 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 29 Dec 2025 03:53:39 -0500 Subject: dma-mapping: add __dma_from_device_group_begin()/end() When a structure contains a buffer that DMA writes to alongside fields that the CPU writes to, cache line sharing between the DMA buffer and CPU-written fields can cause data corruption on non-cache-coherent platforms. Add __dma_from_device_group_begin()/end() annotations to ensure proper alignment to prevent this: struct my_device { spinlock_t lock1; __dma_from_device_group_begin(); char dma_buffer1[16]; char dma_buffer2[16]; __dma_from_device_group_end(); spinlock_t lock2; }; Message-ID: <19163086d5e4704c316f18f6da06bc1c72968904.1767601130.git.mst@redhat.com> Acked-by: Marek Szyprowski Reviewed-by: Petr Tesarik Signed-off-by: Michael S. Tsirkin --- include/linux/dma-mapping.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index aa36a0d1d9df..29ad2ce700f0 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -7,6 +7,7 @@ #include #include #include +#include /** * List of possible attributes associated with a DMA mapping. The semantics @@ -703,6 +704,18 @@ static inline int dma_get_cache_alignment(void) } #endif +#ifdef ARCH_HAS_DMA_MINALIGN +#define ____dma_from_device_aligned __aligned(ARCH_DMA_MINALIGN) +#else +#define ____dma_from_device_aligned +#endif +/* Mark start of DMA buffer */ +#define __dma_from_device_group_begin(GROUP) \ + __cacheline_group_begin(GROUP) ____dma_from_device_aligned +/* Mark end of DMA buffer */ +#define __dma_from_device_group_end(GROUP) \ + __cacheline_group_end(GROUP) ____dma_from_device_aligned + static inline void *dmam_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { -- cgit v1.2.3 From 61868dc55a119a5e4b912d458fc2c48ba80a35fe Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 29 Dec 2025 07:28:43 -0500 Subject: dma-mapping: add DMA_ATTR_CPU_CACHE_CLEAN When multiple small DMA_FROM_DEVICE or DMA_BIDIRECTIONAL buffers share a cacheline, and DMA_API_DEBUG is enabled, we get this warning: cacheline tracking EEXIST, overlapping mappings aren't supported. This is because when one of the mappings is removed, while another one is active, CPU might write into the buffer. Add an attribute for the driver to promise not to do this, making the overlapping safe, and suppressing the warning. Message-ID: <2d5d091f9d84b68ea96abd545b365dd1d00bbf48.1767601130.git.mst@redhat.com> Reviewed-by: Petr Tesarik Acked-by: Marek Szyprowski Signed-off-by: Michael S. Tsirkin --- include/linux/dma-mapping.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 29ad2ce700f0..29973baa0581 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -79,6 +79,13 @@ */ #define DMA_ATTR_MMIO (1UL << 10) +/* + * DMA_ATTR_CPU_CACHE_CLEAN: Indicates the CPU will not dirty any cacheline + * overlapping this buffer while it is mapped for DMA. All mappings sharing + * a cacheline must have this attribute for this to be considered safe. + */ +#define DMA_ATTR_CPU_CACHE_CLEAN (1UL << 11) + /* * A dma_addr_t can hold any valid DMA or bus address for the platform. It can * be given to a device to use as a DMA source or target. It is specific to a -- cgit v1.2.3 From 5fc6dd158e97d317aeb85ea930613f8db172603b Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 29 Dec 2025 13:25:23 -0500 Subject: virtio: add virtqueue_add_inbuf_cache_clean API Add virtqueue_add_inbuf_cache_clean() for passing DMA_ATTR_CPU_CACHE_CLEAN to virtqueue operations. This suppresses DMA debug cacheline overlap warnings for buffers where proper cache management is ensured by the caller. Message-ID: Signed-off-by: Michael S. Tsirkin --- include/linux/virtio.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 3626eb694728..63bb05ece8c5 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -62,6 +62,11 @@ int virtqueue_add_inbuf(struct virtqueue *vq, void *data, gfp_t gfp); +int virtqueue_add_inbuf_cache_clean(struct virtqueue *vq, + struct scatterlist sg[], unsigned int num, + void *data, + gfp_t gfp); + int virtqueue_add_inbuf_ctx(struct virtqueue *vq, struct scatterlist sg[], unsigned int num, void *data, -- cgit v1.2.3 From a006ed4ecd4905b69402980ad7d4e5f31bf44953 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Mon, 19 Jan 2026 15:32:55 +0100 Subject: vduse: add v1 API definition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows the kernel to detect whether the userspace VDUSE device supports the VQ group and ASID features. VDUSE devices that don't set the V1 API will not receive the new messages, and vdpa device will be created with only one vq group and asid. The next patches implement the new feature incrementally, only enabling the VDUSE device to set the V1 API version by the end of the series. Acked-by: Jason Wang Reviewed-by: Xie Yongji Signed-off-by: Eugenio Pérez Signed-off-by: Michael S. Tsirkin Message-Id: <20260119143306.1818855-3-eperezma@redhat.com> --- include/uapi/linux/vduse.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/vduse.h b/include/uapi/linux/vduse.h index 10ad71aa00d6..ccb92a1efce0 100644 --- a/include/uapi/linux/vduse.h +++ b/include/uapi/linux/vduse.h @@ -10,6 +10,10 @@ #define VDUSE_API_VERSION 0 +/* VQ groups and ASID support */ + +#define VDUSE_API_VERSION_1 1 + /* * Get the version of VDUSE API that kernel supported (VDUSE_API_VERSION). * This is used for future extension. -- cgit v1.2.3 From 9350a09afd086771b0612c7b7c9583e8a1568135 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Mon, 19 Jan 2026 15:32:56 +0100 Subject: vduse: add vq group support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows separate the different virtqueues in groups that shares the same address space. Asking the VDUSE device for the groups of the vq at the beginning as they're needed for the DMA API. Allocating 3 vq groups as net is the device that need the most groups: * Dataplane (guest passthrough) * CVQ * Shadowed vrings. Future versions of the series can include dynamic allocation of the groups array so VDUSE can declare more groups. Acked-by: Jason Wang Reviewed-by: Xie Yongji Signed-off-by: Eugenio Pérez Signed-off-by: Michael S. Tsirkin Message-Id: <20260119143306.1818855-4-eperezma@redhat.com> --- include/uapi/linux/vduse.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/vduse.h b/include/uapi/linux/vduse.h index ccb92a1efce0..a3d51cf6df3a 100644 --- a/include/uapi/linux/vduse.h +++ b/include/uapi/linux/vduse.h @@ -31,6 +31,7 @@ * @features: virtio features * @vq_num: the number of virtqueues * @vq_align: the allocation alignment of virtqueue's metadata + * @ngroups: number of vq groups that VDUSE device declares * @reserved: for future use, needs to be initialized to zero * @config_size: the size of the configuration space * @config: the buffer of the configuration space @@ -45,7 +46,8 @@ struct vduse_dev_config { __u64 features; __u32 vq_num; __u32 vq_align; - __u32 reserved[13]; + __u32 ngroups; /* if VDUSE_API_VERSION >= 1 */ + __u32 reserved[12]; __u32 config_size; __u8 config[]; }; @@ -122,14 +124,18 @@ struct vduse_config_data { * struct vduse_vq_config - basic configuration of a virtqueue * @index: virtqueue index * @max_size: the max size of virtqueue - * @reserved: for future use, needs to be initialized to zero + * @reserved1: for future use, needs to be initialized to zero + * @group: virtqueue group + * @reserved2: for future use, needs to be initialized to zero * * Structure used by VDUSE_VQ_SETUP ioctl to setup a virtqueue. */ struct vduse_vq_config { __u32 index; __u16 max_size; - __u16 reserved[13]; + __u16 reserved1; + __u32 group; + __u16 reserved2[10]; }; /* -- cgit v1.2.3 From 02e3f7ffe2906033da73b7c7ea8180b131d0cdbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Mon, 19 Jan 2026 15:32:57 +0100 Subject: vduse: return internal vq group struct as map token MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Return the internal struct that represents the vq group as virtqueue map token, instead of the device. This allows the map functions to access the information per group. At this moment all the virtqueues share the same vq group, that only can point to ASID 0. This change prepares the infrastructure for actual per-group address space handling Acked-by: Jason Wang Signed-off-by: Eugenio Pérez Signed-off-by: Michael S. Tsirkin Message-Id: <20260119143306.1818855-5-eperezma@redhat.com> --- include/linux/virtio.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 63bb05ece8c5..3bbc4cb6a672 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -43,13 +43,13 @@ struct virtqueue { void *priv; }; -struct vduse_iova_domain; +struct vduse_vq_group; union virtio_map { /* Device that performs DMA */ struct device *dma_dev; - /* VDUSE specific mapping data */ - struct vduse_iova_domain *iova_domain; + /* VDUSE specific virtqueue group for doing map */ + struct vduse_vq_group *group; }; int virtqueue_add_outbuf(struct virtqueue *vq, -- cgit v1.2.3 From 0d215afdc8199ef9702567778bbc781449f48e50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Mon, 19 Jan 2026 15:32:58 +0100 Subject: vdpa: document set_group_asid thread safety MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Document that the function races with the check of DRIVER_OK. Acked-by: Jason Wang Signed-off-by: Eugenio Pérez Signed-off-by: Michael S. Tsirkin Message-Id: <20260119143306.1818855-6-eperezma@redhat.com> --- include/linux/vdpa.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 4cf21d6e9cfd..2bfe3baa63f4 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -312,7 +312,9 @@ struct vdpa_map_file { * @idx: virtqueue index * Returns the affinity mask * @set_group_asid: Set address space identifier for a - * virtqueue group (optional) + * virtqueue group (optional). Caller must + * prevent this from being executed concurrently + * with set_status. * @vdev: vdpa device * @group: virtqueue group * @asid: address space id for this group -- cgit v1.2.3 From 079212f6877e5d07308c8998a8fbc7539ca3f8f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Mon, 19 Jan 2026 15:33:04 +0100 Subject: vduse: add vq group asid support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for assigning Address Space Identifiers (ASIDs) to each VQ group. This enables mapping each group into a distinct memory space. The vq group to ASID association is protected by a rwlock now. But the mutex domain_lock keeps protecting the domains of all ASIDs, as some operations like the one related with the bounce buffer size still requires to lock all the ASIDs. Acked-by: Jason Wang Signed-off-by: Eugenio Pérez Signed-off-by: Michael S. Tsirkin Message-Id: <20260119143306.1818855-12-eperezma@redhat.com> --- include/uapi/linux/vduse.h | 66 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 63 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/vduse.h b/include/uapi/linux/vduse.h index a3d51cf6df3a..68b4287f9fac 100644 --- a/include/uapi/linux/vduse.h +++ b/include/uapi/linux/vduse.h @@ -32,6 +32,7 @@ * @vq_num: the number of virtqueues * @vq_align: the allocation alignment of virtqueue's metadata * @ngroups: number of vq groups that VDUSE device declares + * @nas: number of address spaces that VDUSE device declares * @reserved: for future use, needs to be initialized to zero * @config_size: the size of the configuration space * @config: the buffer of the configuration space @@ -47,7 +48,8 @@ struct vduse_dev_config { __u32 vq_num; __u32 vq_align; __u32 ngroups; /* if VDUSE_API_VERSION >= 1 */ - __u32 reserved[12]; + __u32 nas; /* if VDUSE_API_VERSION >= 1 */ + __u32 reserved[11]; __u32 config_size; __u8 config[]; }; @@ -166,6 +168,16 @@ struct vduse_vq_state_packed { __u16 last_used_idx; }; +/** + * struct vduse_vq_group_asid - virtqueue group ASID + * @group: Index of the virtqueue group + * @asid: Address space ID of the group + */ +struct vduse_vq_group_asid { + __u32 group; + __u32 asid; +}; + /** * struct vduse_vq_info - information of a virtqueue * @index: virtqueue index @@ -225,6 +237,7 @@ struct vduse_vq_eventfd { * @uaddr: start address of userspace memory, it must be aligned to page size * @iova: start of the IOVA region * @size: size of the IOVA region + * @asid: Address space ID of the IOVA region * @reserved: for future use, needs to be initialized to zero * * Structure used by VDUSE_IOTLB_REG_UMEM and VDUSE_IOTLB_DEREG_UMEM @@ -234,7 +247,8 @@ struct vduse_iova_umem { __u64 uaddr; __u64 iova; __u64 size; - __u64 reserved[3]; + __u32 asid; + __u32 reserved[5]; }; /* Register userspace memory for IOVA regions */ @@ -248,6 +262,7 @@ struct vduse_iova_umem { * @start: start of the IOVA region * @last: last of the IOVA region * @capability: capability of the IOVA region + * @asid: Address space ID of the IOVA region, only if device API version >= 1 * @reserved: for future use, needs to be initialized to zero * * Structure used by VDUSE_IOTLB_GET_INFO ioctl to get information of @@ -258,7 +273,8 @@ struct vduse_iova_info { __u64 last; #define VDUSE_IOVA_CAP_UMEM (1 << 0) __u64 capability; - __u64 reserved[3]; + __u32 asid; /* Only if device API version >= 1 */ + __u32 reserved[5]; }; /* @@ -267,6 +283,28 @@ struct vduse_iova_info { */ #define VDUSE_IOTLB_GET_INFO _IOWR(VDUSE_BASE, 0x1a, struct vduse_iova_info) +/** + * struct vduse_iotlb_entry_v2 - entry of IOTLB to describe one IOVA region + * + * @v1: the original vduse_iotlb_entry + * @asid: address space ID of the IOVA region + * @reserved: for future use, needs to be initialized to zero + * + * Structure used by VDUSE_IOTLB_GET_FD2 ioctl to find an overlapped IOVA region. + */ +struct vduse_iotlb_entry_v2 { + struct vduse_iotlb_entry v1; + __u32 asid; + __u32 reserved[12]; +}; + +/* + * Same as VDUSE_IOTLB_GET_FD but with vduse_iotlb_entry_v2 argument that + * support extra fields. + */ +#define VDUSE_IOTLB_GET_FD2 _IOWR(VDUSE_BASE, 0x1b, struct vduse_iotlb_entry_v2) + + /* The control messages definition for read(2)/write(2) on /dev/vduse/$NAME */ /** @@ -275,11 +313,14 @@ struct vduse_iova_info { * @VDUSE_SET_STATUS: set the device status * @VDUSE_UPDATE_IOTLB: Notify userspace to update the memory mapping for * specified IOVA range via VDUSE_IOTLB_GET_FD ioctl + * @VDUSE_SET_VQ_GROUP_ASID: Notify userspace to update the address space of a + * virtqueue group. */ enum vduse_req_type { VDUSE_GET_VQ_STATE, VDUSE_SET_STATUS, VDUSE_UPDATE_IOTLB, + VDUSE_SET_VQ_GROUP_ASID, }; /** @@ -314,6 +355,18 @@ struct vduse_iova_range { __u64 last; }; +/** + * struct vduse_iova_range_v2 - IOVA range [start, last] if API_VERSION >= 1 + * @start: start of the IOVA range + * @last: last of the IOVA range + * @asid: address space ID of the IOVA range + */ +struct vduse_iova_range_v2 { + __u64 start; + __u64 last; + __u32 asid; +}; + /** * struct vduse_dev_request - control request * @type: request type @@ -322,6 +375,8 @@ struct vduse_iova_range { * @vq_state: virtqueue state, only index field is available * @s: device status * @iova: IOVA range for updating + * @iova_v2: IOVA range for updating if API_VERSION >= 1 + * @vq_group_asid: ASID of a virtqueue group * @padding: padding * * Structure used by read(2) on /dev/vduse/$NAME. @@ -334,6 +389,11 @@ struct vduse_dev_request { struct vduse_vq_state vq_state; struct vduse_dev_status s; struct vduse_iova_range iova; + /* Following members but padding exist only if vduse api + * version >= 1 + */ + struct vduse_iova_range_v2 iova_v2; + struct vduse_vq_group_asid vq_group_asid; __u32 padding[32]; }; }; -- cgit v1.2.3 From ebcff9dacaf2c1418f8bc927388186d7d3674603 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 2 Feb 2026 23:48:07 +0100 Subject: vduse: avoid adding implicit padding The vduse_iova_range_v2 and vduse_iotlb_entry_v2 structures are both defined in a way that adds implicit padding and is incompatible between i386 and x86_64 userspace because of the different structure alignment requirements. Building the header with -Wpadded shows these new warnings: vduse.h:305:1: error: padding struct size to alignment boundary with 4 bytes [-Werror=padded] vduse.h:374:1: error: padding struct size to alignment boundary with 4 bytes [-Werror=padded] Change the amount of padding in these two structures to align them to 64 bit words and avoid those problems. Since the v1 vduse_iotlb_entry already has an inconsistent size, do not attempt to reuse the structure but rather list the members indiviudally, with a fixed amount of padding. Fixes: 079212f6877e ("vduse: add vq group asid support") Signed-off-by: Arnd Bergmann Signed-off-by: Michael S. Tsirkin Message-Id: <20260202224835.559538-1-arnd@kernel.org> --- include/uapi/linux/vduse.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/vduse.h b/include/uapi/linux/vduse.h index 68b4287f9fac..361eea511c21 100644 --- a/include/uapi/linux/vduse.h +++ b/include/uapi/linux/vduse.h @@ -293,9 +293,13 @@ struct vduse_iova_info { * Structure used by VDUSE_IOTLB_GET_FD2 ioctl to find an overlapped IOVA region. */ struct vduse_iotlb_entry_v2 { - struct vduse_iotlb_entry v1; + __u64 offset; + __u64 start; + __u64 last; + __u8 perm; + __u8 padding[7]; __u32 asid; - __u32 reserved[12]; + __u32 reserved[11]; }; /* @@ -365,6 +369,7 @@ struct vduse_iova_range_v2 { __u64 start; __u64 last; __u32 asid; + __u32 padding; }; /** -- cgit v1.2.3