From 34268c9dde4cbae0b701b66c44497da068f418ee Mon Sep 17 00:00:00 2001 From: Gurchetan Singh Date: Tue, 21 Sep 2021 16:20:13 -0700 Subject: virtio-gpu api: multiple context types with explicit initialization This feature allows for each virtio-gpu 3D context to be created with a "context_init" variable. This variable can specify: - the type of protocol used by the context via the capset id. This is useful for differentiating virgl, gfxstream, and venus protocols by host userspace. - other things in the future, such as the version of the context. In addition, each different context needs one or more timelines, so for example a virgl context's waiting can be independent on a gfxstream context's waiting. VIRTIO_GPU_FLAG_INFO_RING_IDX is introduced to specific to tell the host which per-context command ring (or "hardware queue", distinct from the virtio-queue) the fence should be associated with. The new capability sets (gfxstream, venus etc.) are only defined in the virtio-gpu spec and not defined in the header. Signed-off-by: Gurchetan Singh Acked-by: Lingfeng Yang Link: http://patchwork.freedesktop.org/patch/msgid/20210921232024.817-2-gurchetansingh@chromium.org Signed-off-by: Gerd Hoffmann --- include/uapi/linux/virtio_gpu.h | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h index 97523a95781d..f556fde07b76 100644 --- a/include/uapi/linux/virtio_gpu.h +++ b/include/uapi/linux/virtio_gpu.h @@ -59,6 +59,11 @@ * VIRTIO_GPU_CMD_RESOURCE_CREATE_BLOB */ #define VIRTIO_GPU_F_RESOURCE_BLOB 3 +/* + * VIRTIO_GPU_CMD_CREATE_CONTEXT with + * context_init and multiple timelines + */ +#define VIRTIO_GPU_F_CONTEXT_INIT 4 enum virtio_gpu_ctrl_type { VIRTIO_GPU_UNDEFINED = 0, @@ -122,14 +127,20 @@ enum virtio_gpu_shm_id { VIRTIO_GPU_SHM_ID_HOST_VISIBLE = 1 }; -#define VIRTIO_GPU_FLAG_FENCE (1 << 0) +#define VIRTIO_GPU_FLAG_FENCE (1 << 0) +/* + * If the following flag is set, then ring_idx contains the index + * of the command ring that needs to used when creating the fence + */ +#define VIRTIO_GPU_FLAG_INFO_RING_IDX (1 << 1) struct virtio_gpu_ctrl_hdr { __le32 type; __le32 flags; __le64 fence_id; __le32 ctx_id; - __le32 padding; + __u8 ring_idx; + __u8 padding[3]; }; /* data passed in the cursor vq */ @@ -269,10 +280,11 @@ struct virtio_gpu_resource_create_3d { }; /* VIRTIO_GPU_CMD_CTX_CREATE */ +#define VIRTIO_GPU_CONTEXT_INIT_CAPSET_ID_MASK 0x000000ff struct virtio_gpu_ctx_create { struct virtio_gpu_ctrl_hdr hdr; __le32 nlen; - __le32 padding; + __le32 context_init; char debug_name[64]; }; -- cgit v1.2.3 From b10790434cf2a40017bd796a99d5c4a6e949d616 Mon Sep 17 00:00:00 2001 From: Gurchetan Singh Date: Tue, 21 Sep 2021 16:20:14 -0700 Subject: drm/virtgpu api: create context init feature This change allows creating contexts of depending on set of context parameters. The meaning of each of the parameters is listed below: 1) VIRTGPU_CONTEXT_PARAM_CAPSET_ID This determines the type of a context based on the capability set ID. For example, the current capsets: VIRTIO_GPU_CAPSET_VIRGL VIRTIO_GPU_CAPSET_VIRGL2 define a Gallium, TGSI based "virgl" context. We only need 1 capset ID per context type, though virgl has two due a bug that has since been fixed. The use case is the "gfxstream" rendering library and "venus" renderer. gfxstream doesn't do Gallium/TGSI translation and mostly relies on auto-generated API streaming. Certain users prefer gfxstream over virgl for GLES on GLES emulation. {gfxstream vk}/{venus} are also required for Vulkan emulation. The maximum capset ID is 63. The goal is for guest userspace to choose the optimal context type depending on the situation/hardware. 2) VIRTGPU_CONTEXT_PARAM_NUM_RINGS This tells the number of independent command rings that the context will use. This value may be zero and is inferred to be zero if VIRTGPU_CONTEXT_PARAM_NUM_RINGS is not passed in. This is for backwards compatibility for virgl, which has one big giant command ring for all commands. The maxiumum number of rings is 64. In practice, multi-queue or multi-ring submission is used for powerful dGPUs and virtio-gpu may not be the best option in that case (see PCI passthrough or rendernode forwarding). 3) VIRTGPU_CONTEXT_PARAM_POLL_RING_IDX_MASK This is a mask of ring indices for which the DRM fd is pollable. For example, if VIRTGPU_CONTEXT_PARAM_NUM_RINGS is 2, then the mask may be: [ring idx] | [1 << ring_idx] | final mask ------------------------------------------- 0 1 1 1 2 3 The "Sommelier" guest Wayland proxy uses this to poll for events from the host compositor. Signed-off-by: Gurchetan Singh Acked-by: Lingfeng Yang Acked-by: Nicholas Verne Link: http://patchwork.freedesktop.org/patch/msgid/20210921232024.817-3-gurchetansingh@chromium.org Signed-off-by: Gerd Hoffmann --- include/uapi/drm/virtgpu_drm.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/virtgpu_drm.h b/include/uapi/drm/virtgpu_drm.h index b9ec26e9c646..a13e20cc66b4 100644 --- a/include/uapi/drm/virtgpu_drm.h +++ b/include/uapi/drm/virtgpu_drm.h @@ -47,12 +47,15 @@ extern "C" { #define DRM_VIRTGPU_WAIT 0x08 #define DRM_VIRTGPU_GET_CAPS 0x09 #define DRM_VIRTGPU_RESOURCE_CREATE_BLOB 0x0a +#define DRM_VIRTGPU_CONTEXT_INIT 0x0b #define VIRTGPU_EXECBUF_FENCE_FD_IN 0x01 #define VIRTGPU_EXECBUF_FENCE_FD_OUT 0x02 +#define VIRTGPU_EXECBUF_RING_IDX 0x04 #define VIRTGPU_EXECBUF_FLAGS (\ VIRTGPU_EXECBUF_FENCE_FD_IN |\ VIRTGPU_EXECBUF_FENCE_FD_OUT |\ + VIRTGPU_EXECBUF_RING_IDX |\ 0) struct drm_virtgpu_map { @@ -68,6 +71,8 @@ struct drm_virtgpu_execbuffer { __u64 bo_handles; __u32 num_bo_handles; __s32 fence_fd; /* in/out fence fd (see VIRTGPU_EXECBUF_FENCE_FD_IN/OUT) */ + __u32 ring_idx; /* command ring index (see VIRTGPU_EXECBUF_RING_IDX) */ + __u32 pad; }; #define VIRTGPU_PARAM_3D_FEATURES 1 /* do we have 3D features in the hw */ @@ -75,6 +80,8 @@ struct drm_virtgpu_execbuffer { #define VIRTGPU_PARAM_RESOURCE_BLOB 3 /* DRM_VIRTGPU_RESOURCE_CREATE_BLOB */ #define VIRTGPU_PARAM_HOST_VISIBLE 4 /* Host blob resources are mappable */ #define VIRTGPU_PARAM_CROSS_DEVICE 5 /* Cross virtio-device resource sharing */ +#define VIRTGPU_PARAM_CONTEXT_INIT 6 /* DRM_VIRTGPU_CONTEXT_INIT */ +#define VIRTGPU_PARAM_SUPPORTED_CAPSET_IDs 7 /* Bitmask of supported capability set ids */ struct drm_virtgpu_getparam { __u64 param; @@ -173,6 +180,22 @@ struct drm_virtgpu_resource_create_blob { __u64 blob_id; }; +#define VIRTGPU_CONTEXT_PARAM_CAPSET_ID 0x0001 +#define VIRTGPU_CONTEXT_PARAM_NUM_RINGS 0x0002 +#define VIRTGPU_CONTEXT_PARAM_POLL_RINGS_MASK 0x0003 +struct drm_virtgpu_context_set_param { + __u64 param; + __u64 value; +}; + +struct drm_virtgpu_context_init { + __u32 num_params; + __u32 pad; + + /* pointer to drm_virtgpu_context_set_param array */ + __u64 ctx_set_params; +}; + #define DRM_IOCTL_VIRTGPU_MAP \ DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_MAP, struct drm_virtgpu_map) @@ -212,6 +235,10 @@ struct drm_virtgpu_resource_create_blob { DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_RESOURCE_CREATE_BLOB, \ struct drm_virtgpu_resource_create_blob) +#define DRM_IOCTL_VIRTGPU_CONTEXT_INIT \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_CONTEXT_INIT, \ + struct drm_virtgpu_context_init) + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From 4bb2d367a5a2807185a04949ae922d247f650576 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Fri, 3 Sep 2021 13:00:32 +0000 Subject: drm/lease: allow empty leases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This can be used to create a separate DRM file description, thus creating a new GEM handle namespace. My use-case is wlroots. The library splits responsibilities between separate components: the GBM allocator creates buffers, the GLES2 renderer uses EGL to import them and render to them, the DRM backend imports the buffers and displays them. wlroots has a modular architecture, and any of these components can be swapped and replaced with something else. For instance, the pipeline can be set up so that the DRM dumb buffer allocator is used instead of GBM and the Pixman renderer is used instead of GLES2. Library users can also replace any of these components with their own custom one. DMA-BUFs are used to pass buffer references across components. We could use GEM handles instead, but this would result in pain if multiple GPUs are in use: wlroots copies buffers across GPUs as needed. Importing a GEM handle created on one GPU into a completely different GPU will blow up (fail at best, mix unrelated buffers otherwise). Everything is fine if all components use Mesa. However, this isn't always desirable. For instance when running with DRM dumb buffers and the Pixman software renderer it's unfortunate to depend on GBM in the DRM backend just to turn DMA-BUFs into FB IDs. GBM loads Mesa drivers to perform an action which has nothing driver-specific. Additionally, drivers will fail the import if the 3D engine can't use the imported buffer, for instance amdgpu will refuse to import DRM dumb buffers [1]. We might also want to be running with a Vulkan renderer and a Vulkan allocator in the future, and GBM wouldn't be welcome in this setup. To address this, GBM can be side-stepped in the DRM backend, and can be replaced with drmPrimeFDToHandle calls. However because of GEM handle reference counting issues, care must be taken to avoid double-closing the same GEM handle. In particular, it's not possible to share a DRM FD with GBM or EGL and perform some drmPrimeFDToHandle calls manually. So wlroots needs to re-open the DRM FD to create a new GEM handle namespace. However there's no guarantee that the file-system permissions will be set up so that the primary FD can be opened by the compsoitor. On modern systems seatd or logind is a privileged process responsible for doing this, and other processes aren't expected to do it. For historical reasons systemd still allows physically logged in users to open primary DRM nodes, but this doesn't work on non-systemd setups and it's desirable to lock them down at some point. Some might suggest to open the render node instead of re-opening the primary node. However some systems don't have a render node at all (e.g. no GPU, or a split render/display SoC). Solutions to this issue have been discussed in [2]. One solution would be to open the magic /proc/self/fd/ file, but it's a Linux-specific hack (wlroots supports BSDs too). Another solution is to add support for re-opening a DRM primary node to seatd/logind, but they don't support it now and really haven't been designed for this (logind would need to grow a completely new API, because it assumes unique dev_t IDs). Also this seems like pushing down a kernel limitation to user-space a bit too hard. Another solution is to allow creating empty DRM leases. The lessee FD would have its own GEM handle namespace, so wouldn't conflict wth GBM/EGL. It would have the master bit set, but would be able to manage zero resources. wlroots doesn't intend to share this FD with any other process. All in all IMHO that seems like a pretty reasonable solution to the issue at hand. Note, I've discussed with Jonas Ådahl and Mutter plans to adopt a similar design in the future. Example usage in wlroots is available at [3]. IGT test available at [4]. [1]: https://github.com/swaywm/wlroots/issues/2916 [2]: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/110 [3]: https://github.com/swaywm/wlroots/pull/3158 [4]: https://patchwork.freedesktop.org/series/94323/ Signed-off-by: Simon Ser Cc: Daniel Vetter Cc: Daniel Stone Cc: Pekka Paalanen Cc: Michel Dänzer Cc: Emil Velikov Cc: Keith Packard Cc: Boris Brezillon Cc: Dave Airlie Acked-by: Pekka Paalanen Reviewed-by: Daniel Stone Link: https://patchwork.freedesktop.org/patch/msgid/20210903130000.1590-2-contact@emersion.fr --- include/uapi/drm/drm_mode.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index e4a2570a6058..e1e351682872 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -1112,7 +1112,8 @@ struct drm_mode_destroy_blob { * Lease mode resources, creating another drm_master. * * The @object_ids array must reference at least one CRTC, one connector and - * one plane if &DRM_CLIENT_CAP_UNIVERSAL_PLANES is enabled. + * one plane if &DRM_CLIENT_CAP_UNIVERSAL_PLANES is enabled. Alternatively, + * the lease can be completely empty. */ struct drm_mode_create_lease { /** @object_ids: Pointer to array of object ids (__u32) */ -- cgit v1.2.3 From bb3425efdcd99f2b4e608e850226f7107b2f993e Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Thu, 30 Sep 2021 17:18:50 +0100 Subject: drm/v3d: add generic ioctl extension Add support to attach generic extensions on job submission. This patch is third prep work to enable multiple syncobjs on job submission. With this work, when the job submission interface needs to be extended to accommodate a new feature, we will use a generic extension struct where an id determines the data type to be pointed. The first application is to enable multiples in/out syncobj (next patch), but the base is already done for future features. Therefore, to attach a new feature, a specific extension struct should subclass drm_v3d_extension and update the list of extensions in a job submission. v2: - remove redundant elements to subclass struct (Daniel) v3: - add comment for v3d_get_extensions Signed-off-by: Melissa Wen Reviewed-by: Iago Toral Quiroga Signed-off-by: Melissa Wen Link: https://patchwork.freedesktop.org/patch/msgid/ed53b1cd7e3125b76f18fe3fb995a04393639bc6.1633016479.git.mwen@igalia.com --- include/uapi/drm/v3d_drm.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h index 4104f22fb3d3..55b443ca6c0b 100644 --- a/include/uapi/drm/v3d_drm.h +++ b/include/uapi/drm/v3d_drm.h @@ -58,6 +58,20 @@ extern "C" { struct drm_v3d_perfmon_get_values) #define DRM_V3D_SUBMIT_CL_FLUSH_CACHE 0x01 +#define DRM_V3D_SUBMIT_EXTENSION 0x02 + +/* struct drm_v3d_extension - ioctl extensions + * + * Linked-list of generic extensions where the id identify which struct is + * pointed by ext_data. Therefore, DRM_V3D_EXT_ID_* is used on id to identify + * the extension type. + */ +struct drm_v3d_extension { + __u64 next; + __u32 id; +#define DRM_V3D_EXT_ID_MULTI_SYNC 0x01 + __u32 flags; /* mbz */ +}; /** * struct drm_v3d_submit_cl - ioctl argument for submitting commands to the 3D @@ -135,12 +149,16 @@ struct drm_v3d_submit_cl { /* Number of BO handles passed in (size is that times 4). */ __u32 bo_handle_count; + /* DRM_V3D_SUBMIT_* properties */ __u32 flags; /* ID of the perfmon to attach to this job. 0 means no perfmon. */ __u32 perfmon_id; __u32 pad; + + /* Pointer to an array of ioctl extensions*/ + __u64 extensions; }; /** @@ -248,6 +266,12 @@ struct drm_v3d_submit_tfu { __u32 in_sync; /* Sync object to signal when the TFU job is done. */ __u32 out_sync; + + __u32 flags; + + /* Pointer to an array of ioctl extensions*/ + __u64 extensions; + }; /* Submits a compute shader for dispatch. This job will block on any @@ -276,6 +300,13 @@ struct drm_v3d_submit_csd { /* ID of the perfmon to attach to this job. 0 means no perfmon. */ __u32 perfmon_id; + + /* Pointer to an array of ioctl extensions*/ + __u64 extensions; + + __u32 flags; + + __u32 pad; }; enum { -- cgit v1.2.3 From e4165ae8304e5ea822fbe5909dd3be5445c058b7 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Thu, 30 Sep 2021 17:19:56 +0100 Subject: drm/v3d: add multiple syncobjs support Using the generic extension from the previous patch, a specific multisync extension enables more than one in/out binary syncobj per job submission. Arrays of syncobjs are set in struct drm_v3d_multisync, that also cares of determining the stage for sync (wait deps) according to the job queue. v2: - subclass the generic extension struct (Daniel) - simplify adding dependency conditions to make understandable (Iago) v3: - fix conditions to consider single or multiples in/out_syncs (Iago) - remove irrelevant comment (Iago) Signed-off-by: Melissa Wen Reviewed-by: Iago Toral Quiroga Signed-off-by: Melissa Wen Link: https://patchwork.freedesktop.org/patch/msgid/ffd8b2e3dd2e0c686db441a0c0a4a0181ff85328.1633016479.git.mwen@igalia.com --- include/uapi/drm/v3d_drm.h | 49 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h index 55b443ca6c0b..3dfc0af8756a 100644 --- a/include/uapi/drm/v3d_drm.h +++ b/include/uapi/drm/v3d_drm.h @@ -73,6 +73,53 @@ struct drm_v3d_extension { __u32 flags; /* mbz */ }; +/* struct drm_v3d_sem - wait/signal semaphore + * + * If binary semaphore, it only takes syncobj handle and ignores flags and + * point fields. Point is defined for timeline syncobj feature. + */ +struct drm_v3d_sem { + __u32 handle; /* syncobj */ + /* rsv below, for future uses */ + __u32 flags; + __u64 point; /* for timeline sem support */ + __u64 mbz[2]; /* must be zero, rsv */ +}; + +/* Enum for each of the V3D queues. */ +enum v3d_queue { + V3D_BIN, + V3D_RENDER, + V3D_TFU, + V3D_CSD, + V3D_CACHE_CLEAN, +}; + +/** + * struct drm_v3d_multi_sync - ioctl extension to add support multiples + * syncobjs for commands submission. + * + * When an extension of DRM_V3D_EXT_ID_MULTI_SYNC id is defined, it points to + * this extension to define wait and signal dependencies, instead of single + * in/out sync entries on submitting commands. The field flags is used to + * determine the stage to set wait dependencies. + */ +struct drm_v3d_multi_sync { + struct drm_v3d_extension base; + /* Array of wait and signal semaphores */ + __u64 in_syncs; + __u64 out_syncs; + + /* Number of entries */ + __u32 in_sync_count; + __u32 out_sync_count; + + /* set the stage (v3d_queue) to sync */ + __u32 wait_stage; + + __u32 pad; /* mbz */ +}; + /** * struct drm_v3d_submit_cl - ioctl argument for submitting commands to the 3D * engine. @@ -228,6 +275,7 @@ enum drm_v3d_param { DRM_V3D_PARAM_SUPPORTS_CSD, DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH, DRM_V3D_PARAM_SUPPORTS_PERFMON, + DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT, }; struct drm_v3d_get_param { @@ -271,7 +319,6 @@ struct drm_v3d_submit_tfu { /* Pointer to an array of ioctl extensions*/ __u64 extensions; - }; /* Submits a compute shader for dispatch. This job will block on any -- cgit v1.2.3