From 7eb61c2dffa635e4fb05f89736d8fcf39bb24d42 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 10 Oct 2023 12:17:41 +0200 Subject: drm/amdgpu: UAPI for user queue management MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch intorduces new UAPI/IOCTL for usermode graphics queue. The userspace app will fill this structure and request the graphics driver to add a graphics work queue for it. The output of this UAPI is a queue id. This UAPI maps the queue into GPU, so the graphics app can start submitting work to the queue as soon as the call returns. V2: Addressed review comments from Alex and Christian - Make the doorbell offset's comment clearer - Change the output parameter name to queue_id V3: Integration with doorbell manager V4: - Updated the UAPI doc (Pierre-Eric) - Created a Union for engine specific MQDs (Alex) - Added Christian's R-B V5: - Add variables for GDS and CSA in MQD structure (Alex) - Make MQD data a ptr-size pair instead of union (Alex) V9: - renamed struct drm_amdgpu_userq_mqd_gfx_v11 to struct drm_amdgpu_userq_mqd as its being used for SDMA and compute queues as well V10: - keeping the drm_amdgpu_userq_mqd IP independent, moving the _gfx_v11 objects in a separate structure in other patch. (Alex) Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Christian König Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 90 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 25d5c6e90a99..53081050cb3e 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -54,6 +54,7 @@ extern "C" { #define DRM_AMDGPU_VM 0x13 #define DRM_AMDGPU_FENCE_TO_HANDLE 0x14 #define DRM_AMDGPU_SCHED 0x15 +#define DRM_AMDGPU_USERQ 0x16 #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create) #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap) @@ -71,6 +72,7 @@ extern "C" { #define DRM_IOCTL_AMDGPU_VM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm) #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle) #define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched) +#define DRM_IOCTL_AMDGPU_USERQ DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq) /** * DOC: memory domains @@ -319,6 +321,94 @@ union drm_amdgpu_ctx { union drm_amdgpu_ctx_out out; }; +/* user queue IOCTL */ +#define AMDGPU_USERQ_OP_CREATE 1 +#define AMDGPU_USERQ_OP_FREE 2 + +/* Flag to indicate secure buffer related workload, unused for now */ +#define AMDGPU_USERQ_MQD_FLAGS_SECURE (1 << 0) +/* Flag to indicate AQL workload, unused for now */ +#define AMDGPU_USERQ_MQD_FLAGS_AQL (1 << 1) + +/* + * MQD (memory queue descriptor) is a set of parameters which allow + * the GPU to uniquely define and identify a usermode queue. This + * structure defines the MQD for GFX-V11 IP ver 0. + */ +struct drm_amdgpu_userq_in { + /** AMDGPU_USERQ_OP_* */ + __u32 op; + /** Queue handle for USERQ_OP_FREE */ + __u32 queue_id; + /** the target GPU engine to execute workload (AMDGPU_HW_IP_*) */ + __u32 ip_type; + /** + * @flags: flags to indicate special function for queue like secure + * buffer (TMZ). Unused for now. + */ + __u32 flags; + /** + * @doorbell_handle: the handle of doorbell GEM object + * associated to this client. + */ + __u32 doorbell_handle; + /** + * @doorbell_offset: 32-bit offset of the doorbell in the doorbell bo. + * Kernel will generate absolute doorbell offset using doorbell_handle + * and doorbell_offset in the doorbell bo. + */ + __u32 doorbell_offset; + + /** + * @queue_va: Virtual address of the GPU memory which holds the queue + * object. The queue holds the workload packets. + */ + __u64 queue_va; + /** + * @queue_size: Size of the queue in bytes, this needs to be 256-byte + * aligned. + */ + __u64 queue_size; + /** + * @rptr_va : Virtual address of the GPU memory which holds the ring RPTR. + * This object must be at least 8 byte in size and aligned to 8-byte offset. + */ + __u64 rptr_va; + /** + * @wptr_va : Virtual address of the GPU memory which holds the ring WPTR. + * This object must be at least 8 byte in size and aligned to 8-byte offset. + * + * Queue, RPTR and WPTR can come from the same object, as long as the size + * and alignment related requirements are met. + */ + __u64 wptr_va; + /** + * @mqd: Queue descriptor for USERQ_OP_CREATE + * MQD data can be of different size for different GPU IP/engine and + * their respective versions/revisions, so this points to a __u64 * + * which holds MQD of this usermode queue. + */ + __u64 mqd; + /** + * @size: size of MQD data in bytes, it must match the MQD structure + * size of the respective engine/revision defined in UAPI for ex, for + * gfx_v11 workloads, size = sizeof(drm_amdgpu_userq_mqd_gfx_v11). + */ + __u64 mqd_size; +}; + +struct drm_amdgpu_userq_out { + /** Queue handle */ + __u32 queue_id; + /** Flags */ + __u32 flags; +}; + +union drm_amdgpu_userq { + struct drm_amdgpu_userq_in in; + struct drm_amdgpu_userq_out out; +}; + /* vm ioctl */ #define AMDGPU_VM_OP_RESERVE_VMID 1 #define AMDGPU_VM_OP_UNRESERVE_VMID 2 -- cgit v1.2.3 From a1d201e16940775f0e2f0230960d69e40879ec6d Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Thu, 2 May 2024 12:37:30 +0200 Subject: drm/amdgpu: enable GFX-V11 userqueue support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch enables GFX-v11 IP support in the usermode queue base code. It typically: - adds a GFX_v11 specific MQD structure - sets IP functions to create and destroy MQDs - sets MQD objects coming from userspace V10: introduced this spearate patch for GFX V11 enabling (Alex). V11: Addressed review comments: - update the comments in GFX mqd structure informing user about using the INFO IOCTL for object sizes (Alex) - rename struct drm_amdgpu_userq_mqd_gfx_v11 to drm_amdgpu_userq_mqd_gfx11 (Marek) Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Christian König Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 53081050cb3e..4e07e15d5076 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -409,6 +409,25 @@ union drm_amdgpu_userq { struct drm_amdgpu_userq_out out; }; +/* GFX V11 IP specific MQD parameters */ +struct drm_amdgpu_userq_mqd_gfx11 { + /** + * @shadow_va: Virtual address of the GPU memory to hold the shadow buffer. + * Use AMDGPU_INFO_IOCTL to find the exact size of the object. + */ + __u64 shadow_va; + /** + * @gds_va: Virtual address of the GPU memory to hold the GDS buffer. + * Use AMDGPU_INFO_IOCTL to find the exact size of the object. + */ + __u64 gds_va; + /** + * @csa_va: Virtual address of the GPU memory to hold the CSA buffer. + * Use AMDGPU_INFO_IOCTL to find the exact size of the object. + */ + __u64 csa_va; +}; + /* vm ioctl */ #define AMDGPU_VM_OP_RESERVE_VMID 1 #define AMDGPU_VM_OP_UNRESERVE_VMID 2 -- cgit v1.2.3 From 543b6145377458b5ec0d1440606c31db62867bf4 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Tue, 27 Aug 2024 14:52:07 +0530 Subject: drm/amdgpu: enable SDMA usermode queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch does necessary modifications to enable the SDMA usermode queues using the existing userqueue infrastructure. V9: introduced this patch in the series V10: use header file instead of extern (Alex) V11: rename drm_amdgpu_userq_mqd_sdma_gfx_v11 to drm_amdgpu_userq_mqd_sdma_gfx11 (Marek) Cc: Christian König Cc: Alex Deucher Reviewed-by: Christian König Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav Signed-off-by: Srinivasan Shanmugam Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 4e07e15d5076..6ae988574084 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -428,6 +428,16 @@ struct drm_amdgpu_userq_mqd_gfx11 { __u64 csa_va; }; +/* GFX V11 SDMA IP specific MQD parameters */ +struct drm_amdgpu_userq_mqd_sdma_gfx11 { + /** + * @csa_va: Virtual address of the GPU memory to hold the CSA buffer. + * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL + * to get the size. + */ + __u64 csa_va; +}; + /* vm ioctl */ #define AMDGPU_VM_OP_RESERVE_VMID 1 #define AMDGPU_VM_OP_UNRESERVE_VMID 2 -- cgit v1.2.3 From 2c695d7c072067cd53fb52e52aa0b48277120314 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Thu, 9 May 2024 14:31:15 +0200 Subject: drm/amdgpu: enable compute/gfx usermode queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch does the necessary changes required to enable compute workload support using the existing usermode queues infrastructure. V9: Patch introduced V10: Add custom IP specific mqd strcuture for compute (Alex) V11: Rename drm_amdgpu_userq_mqd_compute_gfx_v11 to drm_amdgpu_userq_mqd_compute_gfx11 (Marek) Cc: Alex Deucher Cc: Christian Koenig Acked-by: Christian König Signed-off-by: Arvind Yadav Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 6ae988574084..59f0818e8dcd 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -438,6 +438,16 @@ struct drm_amdgpu_userq_mqd_sdma_gfx11 { __u64 csa_va; }; +/* GFX V11 Compute IP specific MQD parameters */ +struct drm_amdgpu_userq_mqd_compute_gfx11 { + /** + * @eop_va: Virtual address of the GPU memory to hold the EOP buffer. + * This must be a from a separate GPU object, and must be at least 1 page + * sized. + */ + __u64 eop_va; +}; + /* vm ioctl */ #define AMDGPU_VM_OP_RESERVE_VMID 1 #define AMDGPU_VM_OP_UNRESERVE_VMID 2 -- cgit v1.2.3 From 6b0c7c367317a663a58f72f79e73ad787aac873d Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Wed, 30 Oct 2024 10:09:50 +0530 Subject: drm/amdgpu: UAPI headers for userqueue Secure semaphore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add UAPI header support for userqueue Secure semaphore v2: Worked on review comments from Christian for the following modifications - Add bo handles, bo flags and padding fields. - Include value/va in a combined array. v3: Worked on review comments from Christian - Add num_fences field to obtain the number of objects required to allocate memory for userq_fence_info. - Replace obj_handle name with syncobj_handle. - Replace point name with syncobj_point. - Replace count_handles name with num_syncobj_handles. - Fix structure padding related issues. v4: Worked on review comments from Christian - Modify the bo flags description. Signed-off-by: Alex Deucher Signed-off-by: Arunpravin Paneer Selvam Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 115 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 59f0818e8dcd..4e9414c0f924 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -55,6 +55,8 @@ extern "C" { #define DRM_AMDGPU_FENCE_TO_HANDLE 0x14 #define DRM_AMDGPU_SCHED 0x15 #define DRM_AMDGPU_USERQ 0x16 +#define DRM_AMDGPU_USERQ_SIGNAL 0x17 +#define DRM_AMDGPU_USERQ_WAIT 0x18 #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create) #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap) @@ -73,6 +75,8 @@ extern "C" { #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle) #define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched) #define DRM_IOCTL_AMDGPU_USERQ DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq) +#define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal) +#define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait) /** * DOC: memory domains @@ -448,6 +452,117 @@ struct drm_amdgpu_userq_mqd_compute_gfx11 { __u64 eop_va; }; +/* dma_resv usage flag */ +#define AMDGPU_USERQ_BO_WRITE 1 + +/* userq signal/wait ioctl */ +struct drm_amdgpu_userq_signal { + /** + * @queue_id: Queue handle used by the userq fence creation function + * to retrieve the WPTR. + */ + __u32 queue_id; + /** + * @flags: flags to indicate special function for userq fence creation. + * Unused for now. + */ + __u32 flags; + /** + * @syncobj_handles_array: An array of syncobj handles used by the userq fence + * creation IOCTL to install the created dma_fence object which can be + * utilized by userspace to explicitly synchronize GPU commands. + */ + __u64 syncobj_handles_array; + /** + * @num_syncobj_handles: A count that represents the number of syncobj handles in + * @syncobj_handles_array. + */ + __u64 num_syncobj_handles; + /** + * @syncobj_point: A given point on the timeline to be signaled. + * Unused for now. + */ + __u64 syncobj_point; + /** + * @bo_handles_array: An array of GEM BO handles used by the userq fence creation + * IOCTL to install the created dma_fence object which can be utilized by + * userspace to synchronize the BO usage between user processes. + */ + __u64 bo_handles_array; + /** + * @num_bo_handles: A count that represents the number of GEM BO handles in + * @bo_handles_array. + */ + __u32 num_bo_handles; + /** + * @bo_flags: flags to indicate BOs synchronize for READ or WRITE + */ + __u32 bo_flags; +}; + +struct drm_amdgpu_userq_fence_info { + /** + * @va: A gpu address allocated for each queue which stores the + * read pointer (RPTR) value. + */ + __u64 va; + /** + * @value: A 64 bit value represents the write pointer (WPTR) of the + * queue commands which compared with the RPTR value to signal the + * fences. + */ + __u64 value; +}; + +struct drm_amdgpu_userq_wait { + /** + * @waitq_id: Queue handle used to retrieve the queue information to store + * the fence driver references in the wait user queue structure. + */ + __u32 waitq_id; + /** + * @flags: flags to specify special function for userq wait information. + * Unused for now. + */ + __u32 flags; + /** + * @bo_wait_flags: flags to define the BOs for READ or WRITE to store the + * matching fence wait info pair in @userq_fence_info. + */ + __u32 bo_wait_flags; + __u32 pad; + /** + * @syncobj_handles_array: An array of syncobj handles defined to get the + * fence wait information of every syncobj handles in the array. + */ + __u64 syncobj_handles_array; + /** + * @bo_handles_array: An array of GEM BO handles defined to fetch the fence + * wait information of every BO handles in the array. + */ + __u64 bo_handles_array; + /** + * @num_syncobj_handles: A count that represents the number of syncobj handles in + * @syncobj_handles_array. + */ + __u32 num_syncobj_handles; + /** + * @num_bo_handles: A count that represents the number of GEM BO handles in + * @bo_handles_array. + */ + __u32 num_bo_handles; + /** + * @userq_fence_info: An array of fence information (va and value) pair of each + * objects stored in @syncobj_handles_array and @bo_handles_array. + */ + __u64 userq_fence_info; + /** + * @num_fences: A count that represents the number of actual fences installed in + * each syncobj and bo handles. + */ + __u64 num_fences; +}; + /* vm ioctl */ #define AMDGPU_VM_OP_RESERVE_VMID 1 #define AMDGPU_VM_OP_UNRESERVE_VMID 2 -- cgit v1.2.3 From 15e30a6e479282fef4365bd586159911c8cf140d Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Wed, 30 Oct 2024 10:33:28 +0530 Subject: drm/amdgpu: Add wait IOCTL timeline syncobj support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add user fence wait IOCTL timeline syncobj support. v2:(Christian) - handle dma_fence_wait() return value. - shorten the variable name syncobj_timeline_points a bit. - move num_points up to avoid padding issues. v3:(Christian) - Handle timeline drm_syncobj_find_fence() call error handling - Use dma_fence_unwrap_for_each() in timeline fence as there could be more than one fence. v4:(Christian) - Drop the first num_fences since fence is always included in the dma_fence_unwrap_for_each() iteration, when fence != f then fence is most likely just a container. v5: Added Alex RB to merge the kernel UAPI changes since he has already approved the amdgpu_drm.h changes. Signed-off-by: Arunpravin Paneer Selvam Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 4e9414c0f924..1a21259cb8c4 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -530,12 +530,26 @@ struct drm_amdgpu_userq_wait { * matching fence wait info pair in @userq_fence_info. */ __u32 bo_wait_flags; - __u32 pad; + /** + * @num_points: A count that represents the number of timeline syncobj handles in + * syncobj_handles_array. + */ + __u32 num_points; /** * @syncobj_handles_array: An array of syncobj handles defined to get the * fence wait information of every syncobj handles in the array. */ __u64 syncobj_handles_array; + /** + * @syncobj_timeline_handles: An array of timeline syncobj handles defined to get the + * fence wait information of every timeline syncobj handles in the array. + */ + __u64 syncobj_timeline_handles; + /** + * @syncobj_timeline_points: An array of timeline syncobj points defined to get the + * fence wait points of every timeline syncobj handles in the syncobj_handles_array. + */ + __u64 syncobj_timeline_points; /** * @bo_handles_array: An array of GEM BO handles defined to fetch the fence * wait information of every BO handles in the array. -- cgit v1.2.3 From 70773bef4e091ff6d2a91e3dfb4f29013eb81f1f Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Wed, 25 Sep 2024 18:09:49 +0200 Subject: drm/amdgpu: update userqueue BOs and PDs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch updates the VM_IOCTL to allow userspace to synchronize the mapping/unmapping of a BO in the page table. The major changes are: - it adds a drm_timeline object as an input parameter to the VM IOCTL. - this object is used by the kernel to sync the update of the BO in the page table during the mapping of the object. - the kernel also synchronizes the tlb flush of the page table entry of this object during the unmapping (Added in this series: https://patchwork.freedesktop.org/series/131276/ and https://patchwork.freedesktop.org/patch/584182/) - the userspace can wait on this timeline, and then the BO is ready to be consumed by the GPU. The UAPI for the same has been approved here: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/392 V2: - remove the eviction fence coupling V3: - added the drm timeline support instead of input/output fence (Christian) V4: - made timeline 64-bit (Christian) - bug fix (Arvind) V5: GLCTS bug fix (Arvind) V6: Rename syncobj_handle -> timeline_syncobj_out Rename point -> timeline_point_in (Marek) V7: Addressed review comments from Christian: - do not send last_update fence in case of vm_clear_freed, instead return the fence from gen_va_update_vm - move the functions to update bo_mapping to amdgpu_gem.c - do not use amdgpu_userq_update_vm anymore in userq_create() V8: Addressed review comments from Christian: - Split amdgpu_gem_update_bo_mapping function. - amdgpu_gem_va_update_vm should return stub for error. V9: Addressed review comments from Christian: - Rename the function amdgpu_gem_update_timeline_node. - amdgpu_gem_update_timeline_node should be void function. - when timeline_point is zero don't allocate a chain and call drm_syncobj_replace_fence() instead of drm_syncobj_add_point(). V11: rebase V12: Fix 32-bit holes issue in sturct drm_amdgpu_gem_va. V13: Fix the review comment by renaming timeline syncobj (Marek) Cc: Alex Deucher Cc: Felix Kuehling Cc: Christian König Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Arvind Yadav Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 1a21259cb8c4..ca82935ff93a 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -857,6 +857,15 @@ struct drm_amdgpu_gem_va { __u64 offset_in_bo; /** Specify mapping size. Must be correctly aligned. */ __u64 map_size; + /** + * vm_timeline_point is a sequence number used to add new timeline point. + */ + __u64 vm_timeline_point; + /** + * The vm page table update fence is installed in given vm_timeline_syncobj_out + * at vm_timeline_point. + */ + __u32 vm_timeline_syncobj_out; }; #define AMDGPU_HW_IP_GFX 0 -- cgit v1.2.3 From cb4a73f46f253b5f7a30b1e0488c8ef2832e8747 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Wed, 30 Oct 2024 10:59:04 +0530 Subject: drm/amdgpu: Add separate array of read and write for BO handles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop AMDGPU_USERQ_BO_WRITE as this should not be a global option of the IOCTL, It should be option per buffer. Hence adding separate array for read and write BO handles. v2(Marek): - Internal kernel details shouldn't be here. This file should only document the observed behavior, not the implementation . v3: - Fix DAL CI clang issue. v4: - Added Alex RB to merge the kernel UAPI changes since he has already approved the amdgpu_drm.h changes. Signed-off-by: Arunpravin Paneer Selvam Reviewed-by: Alex Deucher Acked-by: Christian König Suggested-by: Marek Olšák Suggested-by: Christian König Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 50 +++++++++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 16 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index ca82935ff93a..02cf03e811d5 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -452,9 +452,6 @@ struct drm_amdgpu_userq_mqd_compute_gfx11 { __u64 eop_va; }; -/* dma_resv usage flag */ -#define AMDGPU_USERQ_BO_WRITE 1 - /* userq signal/wait ioctl */ struct drm_amdgpu_userq_signal { /** @@ -484,20 +481,30 @@ struct drm_amdgpu_userq_signal { */ __u64 syncobj_point; /** - * @bo_handles_array: An array of GEM BO handles used by the userq fence creation - * IOCTL to install the created dma_fence object which can be utilized by - * userspace to synchronize the BO usage between user processes. + * @bo_read_handles: The list of BO handles that the submitted user queue job + * is using for read only. This will update BO fences in the kernel. + */ + __u64 bo_read_handles; + /** + * @bo_write_handles: The list of BO handles that the submitted user queue job + * is using for write only. This will update BO fences in the kernel. + */ + __u64 bo_write_handles; + /** + * @num_read_bo_handles: A count that represents the number of read BO handles in + * @bo_read_handles. */ - __u64 bo_handles_array; + __u32 num_read_bo_handles; /** - * @num_bo_handles: A count that represents the number of GEM BO handles in - * @bo_handles_array. + * @num_write_bo_handles: A count that represents the number of write BO handles in + * @bo_write_handles. */ - __u32 num_bo_handles; + __u32 num_write_bo_handles; /** * @bo_flags: flags to indicate BOs synchronize for READ or WRITE */ __u32 bo_flags; + __u32 pad; }; struct drm_amdgpu_userq_fence_info { @@ -551,20 +558,31 @@ struct drm_amdgpu_userq_wait { */ __u64 syncobj_timeline_points; /** - * @bo_handles_array: An array of GEM BO handles defined to fetch the fence - * wait information of every BO handles in the array. + * @bo_read_handles: The list of read BO handles submitted by the user queue + * job to get the va/value pairs. */ - __u64 bo_handles_array; + __u64 bo_read_handles; + /** + * @bo_write_handles: The list of write BO handles submitted by the user queue + * job to get the va/value pairs. + */ + __u64 bo_write_handles; /** * @num_syncobj_handles: A count that represents the number of syncobj handles in * @syncobj_handles_array. */ __u32 num_syncobj_handles; /** - * @num_bo_handles: A count that represents the number of GEM BO handles in - * @bo_handles_array. + * @num_read_bo_handles: A count that represents the number of read BO handles in + * @bo_read_handles. + */ + __u32 num_read_bo_handles; + /** + * @num_write_bo_handles: A count that represents the number of write BO handles in + * @bo_write_handles. */ - __u32 num_bo_handles; + __u32 num_write_bo_handles; + __u32 pad; /** * @userq_fence_info: An array of fence information (va and value) pair of each * objects stored in @syncobj_handles_array and @bo_handles_array. -- cgit v1.2.3 From 38c67ec9aa4be7bc0ae55e7bebe95f615fa09a1e Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Wed, 25 Sep 2024 18:10:41 +0200 Subject: drm/amdgpu: Add input fence to sync bo map/unmap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds input fences to VM_IOCTL for buffer object. The kernel will map/unmap the BO only when the fence is signaled. The UAPI for the same has been approved here: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/392 V2: Bug fix (Arvind) V3: Bug fix (Arvind) V4: Rename UAPI objects as per UAPI review (Marek) V5: Addressed review comemnts from Christian - function should return error. - Add 'TODO' comment - The input fence should be independent of the operation. V6: Addressed review comemnts from Christian - Release the memory allocated by memdup_user(). V7: Addressed review comemnts from Christian - Drop the debug print and add "return r;" for the error handling. V11: Rebase v12: Fix 32-bit holes issue in sturct drm_amdgpu_gem_va. v13: Fix deadlock issue. v14: Fix merge conflict. v15: Fix review comment by renaming syncobj handles. Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Arvind Yadav Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 02cf03e811d5..0910a6f8c5f2 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -884,6 +884,10 @@ struct drm_amdgpu_gem_va { * at vm_timeline_point. */ __u32 vm_timeline_syncobj_out; + /** the number of syncobj handles in @input_fence_syncobj_handles */ + __u32 num_syncobj_handles; + /** Array of sync object handle to wait for given input fences */ + __u64 input_fence_syncobj_handles; }; #define AMDGPU_HW_IP_GFX 0 -- cgit v1.2.3 From 2e06b175fff5ba1415b74fed441c0d066b8c8dab Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Mon, 11 Nov 2024 12:34:30 +0100 Subject: drm/amdgpu: fix userqueue UAPI comments This patch fixes some of the pending UAPI review comments from the libDRM/UAPI review process. - It updates some outdated comments in the userqueue UAPI header highlighted during the libdrm UAPI review. - It removes the GDS BO support which was found unused. - It also removes the unused flags parameter from the UAPI. - It also adds a padding variables in userqueue in/out structures. (Pierre-Eric and Marek) - clarify comments on top of drm_amdgpu_userq_in - clarify comment for queue_id (in) - clarify comment for mqd - clarify comment for compute MQD size - clarify comment for queue_id (out) - remove GDB object from BO object list - remove the unused flags parameter Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Alex Deucher Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 54 +++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 30 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 0910a6f8c5f2..6158496cc1d0 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -325,35 +325,28 @@ union drm_amdgpu_ctx { union drm_amdgpu_ctx_out out; }; -/* user queue IOCTL */ +/* user queue IOCTL operations */ #define AMDGPU_USERQ_OP_CREATE 1 #define AMDGPU_USERQ_OP_FREE 2 -/* Flag to indicate secure buffer related workload, unused for now */ -#define AMDGPU_USERQ_MQD_FLAGS_SECURE (1 << 0) -/* Flag to indicate AQL workload, unused for now */ -#define AMDGPU_USERQ_MQD_FLAGS_AQL (1 << 1) - /* - * MQD (memory queue descriptor) is a set of parameters which allow - * the GPU to uniquely define and identify a usermode queue. This - * structure defines the MQD for GFX-V11 IP ver 0. + * This structure is a container to pass input configuration + * info for all supported userqueue related operations. + * For operation AMDGPU_USERQ_OP_CREATE: user is expected + * to set all fields, excep the parameter 'queue_id'. + * For operation AMDGPU_USERQ_OP_FREE: the only input parameter expected + * to be set is 'queue_id', eveything else is ignored. */ struct drm_amdgpu_userq_in { /** AMDGPU_USERQ_OP_* */ __u32 op; - /** Queue handle for USERQ_OP_FREE */ + /** Queue id passed for operation USERQ_OP_FREE */ __u32 queue_id; /** the target GPU engine to execute workload (AMDGPU_HW_IP_*) */ __u32 ip_type; - /** - * @flags: flags to indicate special function for queue like secure - * buffer (TMZ). Unused for now. - */ - __u32 flags; /** * @doorbell_handle: the handle of doorbell GEM object - * associated to this client. + * associated with this userqueue client. */ __u32 doorbell_handle; /** @@ -362,7 +355,7 @@ struct drm_amdgpu_userq_in { * and doorbell_offset in the doorbell bo. */ __u32 doorbell_offset; - + __u32 _pad; /** * @queue_va: Virtual address of the GPU memory which holds the queue * object. The queue holds the workload packets. @@ -387,25 +380,31 @@ struct drm_amdgpu_userq_in { */ __u64 wptr_va; /** - * @mqd: Queue descriptor for USERQ_OP_CREATE + * @mqd: MQD (memory queue descriptor) is a set of parameters which allow + * the GPU to uniquely define and identify a usermode queue. + * * MQD data can be of different size for different GPU IP/engine and * their respective versions/revisions, so this points to a __u64 * - * which holds MQD of this usermode queue. + * which holds IP specific MQD of this usermode queue. */ __u64 mqd; /** * @size: size of MQD data in bytes, it must match the MQD structure * size of the respective engine/revision defined in UAPI for ex, for - * gfx_v11 workloads, size = sizeof(drm_amdgpu_userq_mqd_gfx_v11). + * gfx11 workloads, size = sizeof(drm_amdgpu_userq_mqd_gfx11). */ __u64 mqd_size; }; +/* The structure to carry output of userqueue ops */ struct drm_amdgpu_userq_out { - /** Queue handle */ + /** + * For operation AMDGPU_USERQ_OP_CREATE: This field contains a unique + * queue ID to represent the newly created userqueue in the system, otherwise + * it should be ignored. + */ __u32 queue_id; - /** Flags */ - __u32 flags; + __u32 _pad; }; union drm_amdgpu_userq { @@ -420,11 +419,6 @@ struct drm_amdgpu_userq_mqd_gfx11 { * Use AMDGPU_INFO_IOCTL to find the exact size of the object. */ __u64 shadow_va; - /** - * @gds_va: Virtual address of the GPU memory to hold the GDS buffer. - * Use AMDGPU_INFO_IOCTL to find the exact size of the object. - */ - __u64 gds_va; /** * @csa_va: Virtual address of the GPU memory to hold the CSA buffer. * Use AMDGPU_INFO_IOCTL to find the exact size of the object. @@ -446,8 +440,8 @@ struct drm_amdgpu_userq_mqd_sdma_gfx11 { struct drm_amdgpu_userq_mqd_compute_gfx11 { /** * @eop_va: Virtual address of the GPU memory to hold the EOP buffer. - * This must be a from a separate GPU object, and must be at least 1 page - * sized. + * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL + * to get the size. */ __u64 eop_va; }; -- cgit v1.2.3 From 2761bb9a31f1b863037547d73dc6aac1461ceab6 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Mon, 11 Nov 2024 12:43:07 +0530 Subject: drm/amdgpu: Modify userq signal/wait struct field names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Modify kernel UAPI userq signal/wait struct field names and description corresponding to the libdrm UAPI review comments. libdrm MR: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/392 Signed-off-by: Arunpravin Paneer Selvam Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 102 +++++++++++++++--------------------------- 1 file changed, 36 insertions(+), 66 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 6158496cc1d0..72dc16dbca7f 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -453,27 +453,17 @@ struct drm_amdgpu_userq_signal { * to retrieve the WPTR. */ __u32 queue_id; + __u32 pad; /** - * @flags: flags to indicate special function for userq fence creation. - * Unused for now. - */ - __u32 flags; - /** - * @syncobj_handles_array: An array of syncobj handles used by the userq fence - * creation IOCTL to install the created dma_fence object which can be - * utilized by userspace to explicitly synchronize GPU commands. + * @syncobj_handles: The list of syncobj handles submitted by the user queue + * job to be signaled. */ - __u64 syncobj_handles_array; + __u64 syncobj_handles; /** * @num_syncobj_handles: A count that represents the number of syncobj handles in - * @syncobj_handles_array. + * @syncobj_handles. */ __u64 num_syncobj_handles; - /** - * @syncobj_point: A given point on the timeline to be signaled. - * Unused for now. - */ - __u64 syncobj_point; /** * @bo_read_handles: The list of BO handles that the submitted user queue job * is using for read only. This will update BO fences in the kernel. @@ -485,20 +475,15 @@ struct drm_amdgpu_userq_signal { */ __u64 bo_write_handles; /** - * @num_read_bo_handles: A count that represents the number of read BO handles in + * @num_bo_read_handles: A count that represents the number of read BO handles in * @bo_read_handles. */ - __u32 num_read_bo_handles; + __u32 num_bo_read_handles; /** - * @num_write_bo_handles: A count that represents the number of write BO handles in + * @num_bo_write_handles: A count that represents the number of write BO handles in * @bo_write_handles. */ - __u32 num_write_bo_handles; - /** - * @bo_flags: flags to indicate BOs synchronize for READ or WRITE - */ - __u32 bo_flags; - __u32 pad; + __u32 num_bo_write_handles; }; struct drm_amdgpu_userq_fence_info { @@ -517,38 +502,18 @@ struct drm_amdgpu_userq_fence_info { struct drm_amdgpu_userq_wait { /** - * @waitq_id: Queue handle used to retrieve the queue information to store - * the fence driver references in the wait user queue structure. - */ - __u32 waitq_id; - /** - * @flags: flags to specify special function for userq wait information. - * Unused for now. - */ - __u32 flags; - /** - * @bo_wait_flags: flags to define the BOs for READ or WRITE to store the - * matching fence wait info pair in @userq_fence_info. - */ - __u32 bo_wait_flags; - /** - * @num_points: A count that represents the number of timeline syncobj handles in - * syncobj_handles_array. - */ - __u32 num_points; - /** - * @syncobj_handles_array: An array of syncobj handles defined to get the - * fence wait information of every syncobj handles in the array. + * @syncobj_handles: The list of syncobj handles submitted by the user queue + * job to get the va/value pairs. */ - __u64 syncobj_handles_array; + __u64 syncobj_handles; /** - * @syncobj_timeline_handles: An array of timeline syncobj handles defined to get the - * fence wait information of every timeline syncobj handles in the array. + * @syncobj_timeline_handles: The list of timeline syncobj handles submitted by + * the user queue job to get the va/value pairs at given @syncobj_timeline_points. */ - __u64 syncobj_timeline_handles; + __u64 syncobj_timeline_handles; /** - * @syncobj_timeline_points: An array of timeline syncobj points defined to get the - * fence wait points of every timeline syncobj handles in the syncobj_handles_array. + * @syncobj_timeline_points: The list of timeline syncobj points submitted by the + * user queue job for the corresponding @syncobj_timeline_handles. */ __u64 syncobj_timeline_points; /** @@ -561,32 +526,37 @@ struct drm_amdgpu_userq_wait { * job to get the va/value pairs. */ __u64 bo_write_handles; + /** + * @num_syncobj_timeline_handles: A count that represents the number of timeline + * syncobj handles in @syncobj_timeline_handles. + */ + __u16 num_syncobj_timeline_handles; + /** + * @num_fences: This field can be used both as input and output. As input it defines + * the maximum number of fences that can be returned and as output it will specify + * how many fences were actually returned from the ioctl. + */ + __u16 num_fences; /** * @num_syncobj_handles: A count that represents the number of syncobj handles in - * @syncobj_handles_array. + * @syncobj_handles. */ __u32 num_syncobj_handles; /** - * @num_read_bo_handles: A count that represents the number of read BO handles in + * @num_bo_read_handles: A count that represents the number of read BO handles in * @bo_read_handles. */ - __u32 num_read_bo_handles; + __u32 num_bo_read_handles; /** - * @num_write_bo_handles: A count that represents the number of write BO handles in + * @num_bo_write_handles: A count that represents the number of write BO handles in * @bo_write_handles. */ - __u32 num_write_bo_handles; - __u32 pad; - /** - * @userq_fence_info: An array of fence information (va and value) pair of each - * objects stored in @syncobj_handles_array and @bo_handles_array. - */ - __u64 userq_fence_info; + __u32 num_bo_write_handles; /** - * @num_fences: A count that represents the number of actual fences installed in - * each syncobj and bo handles. + * @out_fences: The field is a return value from the ioctl containing the list of + * address/value pairs to wait for. */ - __u64 num_fences; + __u64 out_fences; }; /* vm ioctl */ -- cgit v1.2.3 From 90c448fef3120d79bd8031665213981c966dbaf4 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Wed, 30 Oct 2024 15:39:42 +0100 Subject: drm/amdgpu: add new AMDGPU_INFO subquery for userq objects This patch adds a new subquery (AMDGPU_INFO_UQ_FW_AREAS) in AMDGPU_INFO_IOCTL to get the size and alignment of shadow and csa objects from the FW setup. This information is required for the userqueue consumers. V2: Added Alex's suggestions and addressed review comments: - make this query IP specific (GFX/SDMA etc) - give a better title (AMDGPU_INFO_UQ_METADATA) - restructured the code as per sample code shared by Alex V3: Split the UAPI patch from shadow_size_fn modifications V4: Addressed review comments from UAPI review (Marek/Pierre-Eric) - Change the query name to AMDGPU_INFO_UQ_FW_AREAS - remove unused inpur parameter for AMDGPU_HW_IP* UAPI link: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/400/ Cc: Alex Deucher Cc: Christian Koenig Cc: Arvind Yadav Reviewed-by: Alex Deucher Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 72dc16dbca7f..5dbd9037afe7 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1193,6 +1193,8 @@ struct drm_amdgpu_cs_chunk_cp_gfx_shadow { #define AMDGPU_INFO_MAX_IBS 0x22 /* query last page fault info */ #define AMDGPU_INFO_GPUVM_FAULT 0x23 +/* query FW object size and alignment */ +#define AMDGPU_INFO_UQ_FW_AREAS 0x24 #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0 #define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff @@ -1469,6 +1471,27 @@ struct drm_amdgpu_info_hw_ip { __u32 ip_discovery_version; }; +/* GFX metadata BO sizes and alignment info (in bytes) */ +struct drm_amdgpu_info_uq_fw_areas_gfx { + /* shadow area size */ + __u32 shadow_size; + /* shadow area base virtual mem alignment */ + __u32 shadow_alignment; + /* context save area size */ + __u32 csa_size; + /* context save area base virtual mem alignment */ + __u32 csa_alignment; +}; + +/* IP specific fw related information used in the + * subquery AMDGPU_INFO_UQ_FW_AREAS + */ +struct drm_amdgpu_info_uq_fw_areas { + union { + struct drm_amdgpu_info_uq_fw_areas_gfx gfx; + }; +}; + struct drm_amdgpu_info_num_handles { /** Max handles as supported by firmware for UVD */ __u32 uvd_max_handles; @@ -1532,6 +1555,23 @@ struct drm_amdgpu_info_gpuvm_fault { __u32 vmhub; }; +struct drm_amdgpu_info_uq_metadata_gfx { + /* shadow area size for gfx11 */ + __u32 shadow_size; + /* shadow area base virtual alignment for gfx11 */ + __u32 shadow_alignment; + /* context save area size for gfx11 */ + __u32 csa_size; + /* context save area base virtual alignment for gfx11 */ + __u32 csa_alignment; +}; + +struct drm_amdgpu_info_uq_metadata { + union { + struct drm_amdgpu_info_uq_metadata_gfx gfx; + }; +}; + /* * Supported GPU families */ -- cgit v1.2.3 From 1af688126361bc881b134b9d25738e22dd457f30 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 24 Mar 2025 16:26:00 -0400 Subject: drm/amdgpu: add UAPI to query if user queues are supported MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an INFO query to check if user queues are supported. v2: switch to a mask of IPs (Marek) v3: move to drm_amdgpu_info_device (Marek) Cc: marek.olsak@amd.com Cc: prike.liang@amd.com Cc: sunil.khatri@amd.com Cc: yogesh.mohanmarimuthu@amd.com Reviewed-by: Marek Olšák Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 5dbd9037afe7..ef97c0d78b8a 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1453,6 +1453,9 @@ struct drm_amdgpu_info_device { __u32 csa_size; /* context save area base virtual alignment for gfx11 */ __u32 csa_alignment; + /* Userq IP mask (1 << AMDGPU_HW_IP_*) */ + __u32 userq_ip_mask; + __u32 pad; }; struct drm_amdgpu_info_hw_ip { -- cgit v1.2.3 From 4172b556fd5bdfdf9b2d1e42da39df6ce99ee989 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Mon, 7 Apr 2025 15:32:33 -0400 Subject: drm/amdkfd: add smi events for process start and end rocm-smi will be able to show the events for KFD process start/end, it is the implementation of this feature. Signed-off-by: Eric Huang Reviewed-by: Kent Russell Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 1e59344c5673..04c7d283dc7d 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -536,6 +536,8 @@ enum kfd_smi_event { KFD_SMI_EVENT_QUEUE_EVICTION = 9, KFD_SMI_EVENT_QUEUE_RESTORE = 10, KFD_SMI_EVENT_UNMAP_FROM_GPU = 11, + KFD_SMI_EVENT_PROCESS_START = 12, + KFD_SMI_EVENT_PROCESS_END = 13, /* * max event number, as a flag bit to get events from all processes, @@ -651,6 +653,9 @@ struct kfd_ioctl_smi_events_args { "%lld -%d @%lx(%lx) %x %d\n", (ns), (pid), (addr), (size),\ (node), (unmap_trigger) +#define KFD_EVENT_FMT_PROCESS(pid, task_name)\ + "%x %s\n", (pid), (task_name) + /************************************************************************************************** * CRIU IOCTLs (Checkpoint Restore In Userspace) * -- cgit v1.2.3 From fced8e7d2ddeba7f41b19e065f8c02a9abf9ac00 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Feb 2025 16:54:27 -0500 Subject: drm/amdgpu: convert userq UAPI _pad to flags Reuse the _pad field for flags. Reviewed-by: Sunil Khatri Reviewed-by: Jesse.Zhang Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index ef97c0d78b8a..1a451907184c 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -355,7 +355,10 @@ struct drm_amdgpu_userq_in { * and doorbell_offset in the doorbell bo. */ __u32 doorbell_offset; - __u32 _pad; + /** + * @flags: flags used for queue parameters + */ + __u32 flags; /** * @queue_va: Virtual address of the GPU memory which holds the queue * object. The queue holds the workload packets. -- cgit v1.2.3 From 024cc8a71aac8194fc2883782d36cbad6a9fe36b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Feb 2025 16:55:36 -0500 Subject: drm/amdgpu/userq: add UAPI for setting queue priority Allow the user to set a queue priority levels: 0 - normal low - most apps (maps to MES AMD_PRIORITY_LEVEL_NORMAL) 1 - low - background jobs (maps to MES AMD_PRIORITY_LEVEL_LOW) 2 - normal high - apps that need relative high (maps to MES AMD_PRIORITY_LEVEL_MEDIUM) 3 - high (admin only - for compositors) (maps to MES AMD_PRIORITY_LEVEL_HIGH) Reviewed-by: Sunil Khatri Reviewed-by: Jesse.Zhang Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 1a451907184c..267ed4adcfb9 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -329,6 +329,15 @@ union drm_amdgpu_ctx { #define AMDGPU_USERQ_OP_CREATE 1 #define AMDGPU_USERQ_OP_FREE 2 +/* queue priority levels */ +/* low < normal low < normal high < high */ +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK 0x3 +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT 0 +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_LOW 0 +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_LOW 1 +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_HIGH 2 +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH 3 /* admin only */ + /* * This structure is a container to pass input configuration * info for all supported userqueue related operations. -- cgit v1.2.3 From 94a62b0f573f868f6f706d96c8c577c2e9b309e0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Feb 2025 17:12:58 -0500 Subject: drm/amdgpu/userq: add UAPI for setting up secure queues If the queues needs to access TMZ surfaces, it must be set up as secure. Reviewed-by: Sunil Khatri Reviewed-by: Jesse.Zhang Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 267ed4adcfb9..284ac25ab5c4 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -337,6 +337,8 @@ union drm_amdgpu_ctx { #define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_LOW 1 #define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_HIGH 2 #define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH 3 /* admin only */ +/* for queues that need access to protected content */ +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE (1 << 2) /* * This structure is a container to pass input configuration -- cgit v1.2.3 From 4b27406380b0b9ada6b4893bc8f6766dd34fff36 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Fri, 11 Apr 2025 15:08:30 +0530 Subject: drm/amdgpu: Add queue id support to the user queue wait IOCTL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add queue id support to the user queue wait IOCTL drm_amdgpu_userq_wait structure. This is required to retrieve the wait user queue and maintain the fence driver references in it so that the user queue in the same context releases their reference to the fence drivers at some point before queue destruction. Otherwise, we would gather those references until we don't have any more space left and crash. v2: Modify the UAPI comment as per the mesa and libdrm UAPI comment. Libdrm MR: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/408 Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34493 Signed-off-by: Arunpravin Paneer Selvam Suggested-by: Christian König Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 284ac25ab5c4..1fd96474e64c 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -515,6 +515,12 @@ struct drm_amdgpu_userq_fence_info { }; struct drm_amdgpu_userq_wait { + /** + * @waitq_id: Queue handle used by the userq wait IOCTL to retrieve the + * wait queue and maintain the fence driver references in it. + */ + __u32 waitq_id; + __u32 pad; /** * @syncobj_handles: The list of syncobj handles submitted by the user queue * job to get the va/value pairs. -- cgit v1.2.3