From 7eb61c2dffa635e4fb05f89736d8fcf39bb24d42 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 10 Oct 2023 12:17:41 +0200
Subject: drm/amdgpu: UAPI for user queue management
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch intorduces new UAPI/IOCTL for usermode graphics
queue. The userspace app will fill this structure and request
the graphics driver to add a graphics work queue for it. The
output of this UAPI is a queue id.

This UAPI maps the queue into GPU, so the graphics app can start
submitting work to the queue as soon as the call returns.

V2: Addressed review comments from Alex and Christian
    - Make the doorbell offset's comment clearer
    - Change the output parameter name to queue_id

V3: Integration with doorbell manager

V4:
    - Updated the UAPI doc (Pierre-Eric)
    - Created a Union for engine specific MQDs (Alex)
    - Added Christian's R-B
V5:
    - Add variables for GDS and CSA in MQD structure (Alex)
    - Make MQD data a ptr-size pair instead of union (Alex)

V9:
   - renamed struct drm_amdgpu_userq_mqd_gfx_v11 to struct
     drm_amdgpu_userq_mqd as its being used for SDMA and
     compute queues as well

V10:
    - keeping the drm_amdgpu_userq_mqd IP independent, moving the
      _gfx_v11 objects in a separate structure in other patch.
      (Alex)

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian Koenig <christian.koenig@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/drm/amdgpu_drm.h | 90 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 25d5c6e90a99..53081050cb3e 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -54,6 +54,7 @@ extern "C" {
 #define DRM_AMDGPU_VM			0x13
 #define DRM_AMDGPU_FENCE_TO_HANDLE	0x14
 #define DRM_AMDGPU_SCHED		0x15
+#define DRM_AMDGPU_USERQ		0x16
 
 #define DRM_IOCTL_AMDGPU_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
 #define DRM_IOCTL_AMDGPU_GEM_MMAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -71,6 +72,7 @@ extern "C" {
 #define DRM_IOCTL_AMDGPU_VM		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm)
 #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
 #define DRM_IOCTL_AMDGPU_SCHED		DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
+#define DRM_IOCTL_AMDGPU_USERQ		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
 
 /**
  * DOC: memory domains
@@ -319,6 +321,94 @@ union drm_amdgpu_ctx {
 	union drm_amdgpu_ctx_out out;
 };
 
+/* user queue IOCTL */
+#define AMDGPU_USERQ_OP_CREATE	1
+#define AMDGPU_USERQ_OP_FREE	2
+
+/* Flag to indicate secure buffer related workload, unused for now */
+#define AMDGPU_USERQ_MQD_FLAGS_SECURE	(1 << 0)
+/* Flag to indicate AQL workload, unused for now */
+#define AMDGPU_USERQ_MQD_FLAGS_AQL	(1 << 1)
+
+/*
+ * MQD (memory queue descriptor) is a set of parameters which allow
+ * the GPU to uniquely define and identify a usermode queue. This
+ * structure defines the MQD for GFX-V11 IP ver 0.
+ */
+struct drm_amdgpu_userq_in {
+	/** AMDGPU_USERQ_OP_* */
+	__u32	op;
+	/** Queue handle for USERQ_OP_FREE */
+	__u32	queue_id;
+	/** the target GPU engine to execute workload (AMDGPU_HW_IP_*) */
+	__u32   ip_type;
+	/**
+	 * @flags: flags to indicate special function for queue like secure
+	 * buffer (TMZ). Unused for now.
+	 */
+	__u32   flags;
+	/**
+	 * @doorbell_handle: the handle of doorbell GEM object
+	 * associated to this client.
+	 */
+	__u32   doorbell_handle;
+	/**
+	 * @doorbell_offset: 32-bit offset of the doorbell in the doorbell bo.
+	 * Kernel will generate absolute doorbell offset using doorbell_handle
+	 * and doorbell_offset in the doorbell bo.
+	 */
+	__u32   doorbell_offset;
+
+	/**
+	 * @queue_va: Virtual address of the GPU memory which holds the queue
+	 * object. The queue holds the workload packets.
+	 */
+	__u64   queue_va;
+	/**
+	 * @queue_size: Size of the queue in bytes, this needs to be 256-byte
+	 * aligned.
+	 */
+	__u64   queue_size;
+	/**
+	 * @rptr_va : Virtual address of the GPU memory which holds the ring RPTR.
+	 * This object must be at least 8 byte in size and aligned to 8-byte offset.
+	 */
+	__u64   rptr_va;
+	/**
+	 * @wptr_va : Virtual address of the GPU memory which holds the ring WPTR.
+	 * This object must be at least 8 byte in size and aligned to 8-byte offset.
+	 *
+	 * Queue, RPTR and WPTR can come from the same object, as long as the size
+	 * and alignment related requirements are met.
+	 */
+	__u64   wptr_va;
+	/**
+	 * @mqd: Queue descriptor for USERQ_OP_CREATE
+	 * MQD data can be of different size for different GPU IP/engine and
+	 * their respective versions/revisions, so this points to a __u64 *
+	 * which holds MQD of this usermode queue.
+	 */
+	__u64 mqd;
+	/**
+	 * @size: size of MQD data in bytes, it must match the MQD structure
+	 * size of the respective engine/revision defined in UAPI for ex, for
+	 * gfx_v11 workloads, size = sizeof(drm_amdgpu_userq_mqd_gfx_v11).
+	 */
+	__u64 mqd_size;
+};
+
+struct drm_amdgpu_userq_out {
+	/** Queue handle */
+	__u32	queue_id;
+	/** Flags */
+	__u32	flags;
+};
+
+union drm_amdgpu_userq {
+	struct drm_amdgpu_userq_in in;
+	struct drm_amdgpu_userq_out out;
+};
+
 /* vm ioctl */
 #define AMDGPU_VM_OP_RESERVE_VMID	1
 #define AMDGPU_VM_OP_UNRESERVE_VMID	2
-- 
cgit v1.2.3


From a1d201e16940775f0e2f0230960d69e40879ec6d Mon Sep 17 00:00:00 2001
From: Shashank Sharma <shashank.sharma@amd.com>
Date: Thu, 2 May 2024 12:37:30 +0200
Subject: drm/amdgpu: enable GFX-V11 userqueue support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch enables GFX-v11 IP support in the usermode queue base
code. It typically:
- adds a GFX_v11 specific MQD structure
- sets IP functions to create and destroy MQDs
- sets MQD objects coming from userspace

V10: introduced this spearate patch for GFX V11 enabling (Alex).
V11: Addressed review comments:
     - update the comments in GFX mqd structure informing user about using
       the INFO IOCTL for object sizes (Alex)
     - rename struct drm_amdgpu_userq_mqd_gfx_v11 to
       drm_amdgpu_userq_mqd_gfx11 (Marek)

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian Koenig <christian.koenig@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
Signed-off-by: Arvind Yadav <arvind.yadav@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/drm/amdgpu_drm.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 53081050cb3e..4e07e15d5076 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -409,6 +409,25 @@ union drm_amdgpu_userq {
 	struct drm_amdgpu_userq_out out;
 };
 
+/* GFX V11 IP specific MQD parameters */
+struct drm_amdgpu_userq_mqd_gfx11 {
+	/**
+	 * @shadow_va: Virtual address of the GPU memory to hold the shadow buffer.
+	 * Use AMDGPU_INFO_IOCTL to find the exact size of the object.
+	 */
+	__u64   shadow_va;
+	/**
+	 * @gds_va: Virtual address of the GPU memory to hold the GDS buffer.
+	 * Use AMDGPU_INFO_IOCTL to find the exact size of the object.
+	 */
+	__u64   gds_va;
+	/**
+	 * @csa_va: Virtual address of the GPU memory to hold the CSA buffer.
+	 * Use AMDGPU_INFO_IOCTL to find the exact size of the object.
+	 */
+	__u64   csa_va;
+};
+
 /* vm ioctl */
 #define AMDGPU_VM_OP_RESERVE_VMID	1
 #define AMDGPU_VM_OP_UNRESERVE_VMID	2
-- 
cgit v1.2.3


From 543b6145377458b5ec0d1440606c31db62867bf4 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <Arvind.Yadav@amd.com>
Date: Tue, 27 Aug 2024 14:52:07 +0530
Subject: drm/amdgpu: enable SDMA usermode queues
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch does necessary modifications to enable the SDMA
usermode queues using the existing userqueue infrastructure.

V9:  introduced this patch in the series
V10: use header file instead of extern (Alex)
V11: rename drm_amdgpu_userq_mqd_sdma_gfx_v11 to
     drm_amdgpu_userq_mqd_sdma_gfx11 (Marek)

Cc: Christian König <Christian.Koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
Signed-off-by: Arvind Yadav <arvind.yadav@amd.com>
Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/drm/amdgpu_drm.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 4e07e15d5076..6ae988574084 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -428,6 +428,16 @@ struct drm_amdgpu_userq_mqd_gfx11 {
 	__u64   csa_va;
 };
 
+/* GFX V11 SDMA IP specific MQD parameters */
+struct drm_amdgpu_userq_mqd_sdma_gfx11 {
+	/**
+	 * @csa_va: Virtual address of the GPU memory to hold the CSA buffer.
+	 * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL
+	 * to get the size.
+	 */
+	__u64   csa_va;
+};
+
 /* vm ioctl */
 #define AMDGPU_VM_OP_RESERVE_VMID	1
 #define AMDGPU_VM_OP_UNRESERVE_VMID	2
-- 
cgit v1.2.3


From 2c695d7c072067cd53fb52e52aa0b48277120314 Mon Sep 17 00:00:00 2001
From: Shashank Sharma <shashank.sharma@amd.com>
Date: Thu, 9 May 2024 14:31:15 +0200
Subject: drm/amdgpu: enable compute/gfx usermode queue
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch does the necessary changes required to
enable compute workload support using the existing
usermode queues infrastructure.

V9:  Patch introduced
V10: Add custom IP specific mqd strcuture for compute (Alex)
V11: Rename drm_amdgpu_userq_mqd_compute_gfx_v11 to
     drm_amdgpu_userq_mqd_compute_gfx11 (Marek)

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian Koenig <christian.koenig@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Arvind Yadav <arvind.yadav@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/drm/amdgpu_drm.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 6ae988574084..59f0818e8dcd 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -438,6 +438,16 @@ struct drm_amdgpu_userq_mqd_sdma_gfx11 {
 	__u64   csa_va;
 };
 
+/* GFX V11 Compute IP specific MQD parameters */
+struct drm_amdgpu_userq_mqd_compute_gfx11 {
+	/**
+	 * @eop_va: Virtual address of the GPU memory to hold the EOP buffer.
+	 * This must be a from a separate GPU object, and must be at least 1 page
+	 * sized.
+	 */
+	__u64   eop_va;
+};
+
 /* vm ioctl */
 #define AMDGPU_VM_OP_RESERVE_VMID	1
 #define AMDGPU_VM_OP_UNRESERVE_VMID	2
-- 
cgit v1.2.3


From 6b0c7c367317a663a58f72f79e73ad787aac873d Mon Sep 17 00:00:00 2001
From: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Date: Wed, 30 Oct 2024 10:09:50 +0530
Subject: drm/amdgpu: UAPI headers for userqueue Secure semaphore
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add UAPI header support for userqueue Secure semaphore

v2: Worked on review comments from Christian for the following
    modifications

    - Add bo handles, bo flags and padding fields.
    - Include value/va in a combined array.

v3: Worked on review comments from Christian

    - Add num_fences field to obtain the number of objects required
      to allocate memory for userq_fence_info.
    - Replace obj_handle name with syncobj_handle.
    - Replace point name with syncobj_point.
    - Replace count_handles name with num_syncobj_handles.
    - Fix structure padding related issues.

v4: Worked on review comments from Christian
    - Modify the bo flags description.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/drm/amdgpu_drm.h | 115 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 115 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 59f0818e8dcd..4e9414c0f924 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -55,6 +55,8 @@ extern "C" {
 #define DRM_AMDGPU_FENCE_TO_HANDLE	0x14
 #define DRM_AMDGPU_SCHED		0x15
 #define DRM_AMDGPU_USERQ		0x16
+#define DRM_AMDGPU_USERQ_SIGNAL		0x17
+#define DRM_AMDGPU_USERQ_WAIT		0x18
 
 #define DRM_IOCTL_AMDGPU_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
 #define DRM_IOCTL_AMDGPU_GEM_MMAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -73,6 +75,8 @@ extern "C" {
 #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
 #define DRM_IOCTL_AMDGPU_SCHED		DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
 #define DRM_IOCTL_AMDGPU_USERQ		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
+#define DRM_IOCTL_AMDGPU_USERQ_SIGNAL	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
+#define DRM_IOCTL_AMDGPU_USERQ_WAIT	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
 
 /**
  * DOC: memory domains
@@ -448,6 +452,117 @@ struct drm_amdgpu_userq_mqd_compute_gfx11 {
 	__u64   eop_va;
 };
 
+/* dma_resv usage flag */
+#define AMDGPU_USERQ_BO_WRITE	1
+
+/* userq signal/wait ioctl */
+struct drm_amdgpu_userq_signal {
+	/**
+	 * @queue_id: Queue handle used by the userq fence creation function
+	 * to retrieve the WPTR.
+	 */
+	__u32	queue_id;
+	/**
+	 * @flags: flags to indicate special function for userq fence creation.
+	 * Unused for now.
+	 */
+	__u32	flags;
+	/**
+	 * @syncobj_handles_array: An array of syncobj handles used by the userq fence
+	 * creation IOCTL to install the created dma_fence object which can be
+	 * utilized by userspace to explicitly synchronize GPU commands.
+	 */
+	__u64	syncobj_handles_array;
+	/**
+	 * @num_syncobj_handles: A count that represents the number of syncobj handles in
+	 * @syncobj_handles_array.
+	 */
+	__u64	num_syncobj_handles;
+	/**
+	 * @syncobj_point: A given point on the timeline to be signaled.
+	 * Unused for now.
+	 */
+	__u64	syncobj_point;
+	/**
+	 * @bo_handles_array: An array of GEM BO handles used by the userq fence creation
+	 * IOCTL to install the created dma_fence object which can be utilized by
+	 * userspace to synchronize the BO usage between user processes.
+	 */
+	__u64	bo_handles_array;
+	/**
+	 * @num_bo_handles: A count that represents the number of GEM BO handles in
+	 * @bo_handles_array.
+	 */
+	__u32	num_bo_handles;
+	/**
+	 * @bo_flags: flags to indicate BOs synchronize for READ or WRITE
+	 */
+	__u32	bo_flags;
+};
+
+struct drm_amdgpu_userq_fence_info {
+	/**
+	 * @va: A gpu address allocated for each queue which stores the
+	 * read pointer (RPTR) value.
+	 */
+	__u64	va;
+	/**
+	 * @value: A 64 bit value represents the write pointer (WPTR) of the
+	 * queue commands which compared with the RPTR value to signal the
+	 * fences.
+	 */
+	__u64	value;
+};
+
+struct drm_amdgpu_userq_wait {
+	/**
+	 * @waitq_id: Queue handle used to retrieve the queue information to store
+	 * the fence driver references in the wait user queue structure.
+	 */
+	__u32	waitq_id;
+	/**
+	 * @flags: flags to specify special function for userq wait information.
+	 * Unused for now.
+	 */
+	__u32	flags;
+	/**
+	 * @bo_wait_flags: flags to define the BOs for READ or WRITE to store the
+	 * matching fence wait info pair in @userq_fence_info.
+	 */
+	__u32	bo_wait_flags;
+	__u32	pad;
+	/**
+	 * @syncobj_handles_array: An array of syncobj handles defined to get the
+	 * fence wait information of every syncobj handles in the array.
+	 */
+	__u64	syncobj_handles_array;
+	/**
+	 * @bo_handles_array: An array of GEM BO handles defined to fetch the fence
+	 * wait information of every BO handles in the array.
+	 */
+	__u64	bo_handles_array;
+	/**
+	 * @num_syncobj_handles: A count that represents the number of syncobj handles in
+	 * @syncobj_handles_array.
+	 */
+	__u32	num_syncobj_handles;
+	/**
+	 * @num_bo_handles: A count that represents the number of GEM BO handles in
+	 * @bo_handles_array.
+	 */
+	__u32	num_bo_handles;
+	/**
+	 * @userq_fence_info: An array of fence information (va and value) pair of each
+	 * objects stored in @syncobj_handles_array and @bo_handles_array.
+	 */
+	__u64	userq_fence_info;
+	/**
+	 * @num_fences: A count that represents the number of actual fences installed in
+	 * each syncobj and bo handles.
+	 */
+	__u64	num_fences;
+};
+
 /* vm ioctl */
 #define AMDGPU_VM_OP_RESERVE_VMID	1
 #define AMDGPU_VM_OP_UNRESERVE_VMID	2
-- 
cgit v1.2.3


From 15e30a6e479282fef4365bd586159911c8cf140d Mon Sep 17 00:00:00 2001
From: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Date: Wed, 30 Oct 2024 10:33:28 +0530
Subject: drm/amdgpu: Add wait IOCTL timeline syncobj support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add user fence wait IOCTL timeline syncobj support.

v2:(Christian)
  - handle dma_fence_wait() return value.
  - shorten the variable name syncobj_timeline_points a bit.
  - move num_points up to avoid padding issues.

v3:(Christian)
  - Handle timeline drm_syncobj_find_fence() call error
    handling
  - Use dma_fence_unwrap_for_each() in timeline fence as
    there could be more than one fence.

v4:(Christian)
  - Drop the first num_fences since fence is always included in
    the dma_fence_unwrap_for_each() iteration, when fence != f
    then fence is most likely just a container.

v5: Added Alex RB to merge the kernel UAPI changes since he has
    already approved the amdgpu_drm.h changes.

Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/drm/amdgpu_drm.h | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 4e9414c0f924..1a21259cb8c4 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -530,12 +530,26 @@ struct drm_amdgpu_userq_wait {
 	 * matching fence wait info pair in @userq_fence_info.
 	 */
 	__u32	bo_wait_flags;
-	__u32	pad;
+	/**
+	 * @num_points: A count that represents the number of timeline syncobj handles in
+	 * syncobj_handles_array.
+	 */
+	__u32	num_points;
 	/**
 	 * @syncobj_handles_array: An array of syncobj handles defined to get the
 	 * fence wait information of every syncobj handles in the array.
 	 */
 	__u64	syncobj_handles_array;
+	/**
+	 * @syncobj_timeline_handles: An array of timeline syncobj handles defined to get the
+	 * fence wait information of every timeline syncobj handles in the array.
+	 */
+	__u64   syncobj_timeline_handles;
+	/**
+	 * @syncobj_timeline_points: An array of timeline syncobj points defined to get the
+	 * fence wait points of every timeline syncobj handles in the syncobj_handles_array.
+	 */
+	__u64	syncobj_timeline_points;
 	/**
 	 * @bo_handles_array: An array of GEM BO handles defined to fetch the fence
 	 * wait information of every BO handles in the array.
-- 
cgit v1.2.3


From 70773bef4e091ff6d2a91e3dfb4f29013eb81f1f Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav@amd.com>
Date: Wed, 25 Sep 2024 18:09:49 +0200
Subject: drm/amdgpu: update userqueue BOs and PDs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch updates the VM_IOCTL to allow userspace to synchronize
the mapping/unmapping of a BO in the page table.

The major changes are:
- it adds a drm_timeline object as an input parameter to the VM IOCTL.
- this object is used by the kernel to sync the update of the BO in
  the page table during the mapping of the object.
- the kernel also synchronizes the tlb flush of the page table entry of
  this object during the unmapping (Added in this series:
  https://patchwork.freedesktop.org/series/131276/ and
  https://patchwork.freedesktop.org/patch/584182/)
- the userspace can wait on this timeline, and then the BO is ready to
  be consumed by the GPU.

The UAPI for the same has been approved here:
https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/392

V2:
 - remove the eviction fence coupling

V3:
 - added the drm timeline support instead of input/output fence
   (Christian)

V4:
 - made timeline 64-bit (Christian)
 - bug fix (Arvind)

V5: GLCTS bug fix (Arvind)
V6: Rename syncobj_handle -> timeline_syncobj_out
    Rename point -> timeline_point_in (Marek)
V7: Addressed review comments from Christian:
    - do not send last_update fence in case of vm_clear_freed, instead
      return the fence from gen_va_update_vm
    - move the functions to update bo_mapping  to amdgpu_gem.c
    - do not use amdgpu_userq_update_vm anymore in userq_create()
V8: Addressed review comments from Christian:
    - Split amdgpu_gem_update_bo_mapping function.
    - amdgpu_gem_va_update_vm should return stub for error.
V9: Addressed review comments from Christian:
    - Rename the function amdgpu_gem_update_timeline_node.
    - amdgpu_gem_update_timeline_node should be void function.
    - when timeline_point is zero don't allocate a chain and
      call drm_syncobj_replace_fence() instead of
      drm_syncobj_add_point().
V11: rebase
V12: Fix 32-bit holes issue in sturct drm_amdgpu_gem_va.
V13: Fix the review comment by renaming timeline syncobj (Marek)

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Felix Kuehling <felix.kuehling@amd.com>
Cc: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Arvind Yadav <arvind.yadav@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/drm/amdgpu_drm.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 1a21259cb8c4..ca82935ff93a 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -857,6 +857,15 @@ struct drm_amdgpu_gem_va {
 	__u64 offset_in_bo;
 	/** Specify mapping size. Must be correctly aligned. */
 	__u64 map_size;
+	/**
+	 * vm_timeline_point is a sequence number used to add new timeline point.
+	 */
+	__u64 vm_timeline_point;
+	/**
+	 * The vm page table update fence is installed in given vm_timeline_syncobj_out
+	 * at vm_timeline_point.
+	 */
+	__u32 vm_timeline_syncobj_out;
 };
 
 #define AMDGPU_HW_IP_GFX          0
-- 
cgit v1.2.3


From cb4a73f46f253b5f7a30b1e0488c8ef2832e8747 Mon Sep 17 00:00:00 2001
From: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Date: Wed, 30 Oct 2024 10:59:04 +0530
Subject: drm/amdgpu: Add separate array of read and write for BO handles
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drop AMDGPU_USERQ_BO_WRITE as this should not be a global option
of the IOCTL, It should be option per buffer. Hence adding separate
array for read and write BO handles.

v2(Marek):
  - Internal kernel details shouldn't be here. This file should only
    document the observed behavior, not the implementation .

v3:
  - Fix DAL CI clang issue.

v4:
  - Added Alex RB to merge the kernel UAPI changes since he has
    already approved the amdgpu_drm.h changes.

Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Suggested-by: Marek Olšák <marek.olsak@amd.com>
Suggested-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/drm/amdgpu_drm.h | 50 +++++++++++++++++++++++++++++--------------
 1 file changed, 34 insertions(+), 16 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index ca82935ff93a..02cf03e811d5 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -452,9 +452,6 @@ struct drm_amdgpu_userq_mqd_compute_gfx11 {
 	__u64   eop_va;
 };
 
-/* dma_resv usage flag */
-#define AMDGPU_USERQ_BO_WRITE	1
-
 /* userq signal/wait ioctl */
 struct drm_amdgpu_userq_signal {
 	/**
@@ -484,20 +481,30 @@ struct drm_amdgpu_userq_signal {
 	 */
 	__u64	syncobj_point;
 	/**
-	 * @bo_handles_array: An array of GEM BO handles used by the userq fence creation
-	 * IOCTL to install the created dma_fence object which can be utilized by
-	 * userspace to synchronize the BO usage between user processes.
+	 * @bo_read_handles: The list of BO handles that the submitted user queue job
+	 * is using for read only. This will update BO fences in the kernel.
+	 */
+	__u64	bo_read_handles;
+	/**
+	 * @bo_write_handles: The list of BO handles that the submitted user queue job
+	 * is using for write only. This will update BO fences in the kernel.
+	 */
+	__u64	bo_write_handles;
+	/**
+	 * @num_read_bo_handles: A count that represents the number of read BO handles in
+	 * @bo_read_handles.
 	 */
-	__u64	bo_handles_array;
+	__u32	num_read_bo_handles;
 	/**
-	 * @num_bo_handles: A count that represents the number of GEM BO handles in
-	 * @bo_handles_array.
+	 * @num_write_bo_handles: A count that represents the number of write BO handles in
+	 * @bo_write_handles.
 	 */
-	__u32	num_bo_handles;
+	__u32	num_write_bo_handles;
 	/**
 	 * @bo_flags: flags to indicate BOs synchronize for READ or WRITE
 	 */
 	__u32	bo_flags;
+	__u32	pad;
 };
 
 struct drm_amdgpu_userq_fence_info {
@@ -551,20 +558,31 @@ struct drm_amdgpu_userq_wait {
 	 */
 	__u64	syncobj_timeline_points;
 	/**
-	 * @bo_handles_array: An array of GEM BO handles defined to fetch the fence
-	 * wait information of every BO handles in the array.
+	 * @bo_read_handles: The list of read BO handles submitted by the user queue
+	 * job to get the va/value pairs.
 	 */
-	__u64	bo_handles_array;
+	__u64	bo_read_handles;
+	/**
+	 * @bo_write_handles: The list of write BO handles submitted by the user queue
+	 * job to get the va/value pairs.
+	 */
+	__u64	bo_write_handles;
 	/**
 	 * @num_syncobj_handles: A count that represents the number of syncobj handles in
 	 * @syncobj_handles_array.
 	 */
 	__u32	num_syncobj_handles;
 	/**
-	 * @num_bo_handles: A count that represents the number of GEM BO handles in
-	 * @bo_handles_array.
+	 * @num_read_bo_handles: A count that represents the number of read BO handles in
+	 * @bo_read_handles.
+	 */
+	__u32	num_read_bo_handles;
+	/**
+	 * @num_write_bo_handles: A count that represents the number of write BO handles in
+	 * @bo_write_handles.
 	 */
-	__u32	num_bo_handles;
+	__u32	num_write_bo_handles;
+	__u32	pad;
 	/**
 	 * @userq_fence_info: An array of fence information (va and value) pair of each
 	 * objects stored in @syncobj_handles_array and @bo_handles_array.
-- 
cgit v1.2.3


From 38c67ec9aa4be7bc0ae55e7bebe95f615fa09a1e Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav@amd.com>
Date: Wed, 25 Sep 2024 18:10:41 +0200
Subject: drm/amdgpu: Add input fence to sync bo map/unmap
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds input fences to VM_IOCTL for buffer object.
The kernel will map/unmap the BO only when the fence is signaled.
The UAPI for the same has been approved here:
https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/392

V2: Bug fix (Arvind)
V3: Bug fix (Arvind)
V4: Rename UAPI objects as per UAPI review (Marek)
V5: Addressed review comemnts from Christian
     - function should return error.
     - Add 'TODO' comment
     - The input fence should be independent of the operation.
V6: Addressed review comemnts from Christian
    - Release the memory allocated by memdup_user().
V7: Addressed review comemnts from Christian
    - Drop the debug print and add "return r;" for the error handling.

V11: Rebase
v12: Fix 32-bit holes issue in sturct drm_amdgpu_gem_va.
v13: Fix deadlock issue.
v14: Fix merge conflict.
v15: Fix review comment by renaming syncobj handles.

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian Koenig <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Arvind Yadav <arvind.yadav@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/drm/amdgpu_drm.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 02cf03e811d5..0910a6f8c5f2 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -884,6 +884,10 @@ struct drm_amdgpu_gem_va {
 	 * at vm_timeline_point.
 	 */
 	__u32 vm_timeline_syncobj_out;
+	/** the number of syncobj handles in @input_fence_syncobj_handles */
+	__u32 num_syncobj_handles;
+	/** Array of sync object handle to wait for given input fences */
+	__u64 input_fence_syncobj_handles;
 };
 
 #define AMDGPU_HW_IP_GFX          0
-- 
cgit v1.2.3


From 2e06b175fff5ba1415b74fed441c0d066b8c8dab Mon Sep 17 00:00:00 2001
From: Shashank Sharma <shashank.sharma@amd.com>
Date: Mon, 11 Nov 2024 12:34:30 +0100
Subject: drm/amdgpu: fix userqueue UAPI comments

This patch fixes some of the pending UAPI review comments
from the libDRM/UAPI review process.

- It updates some outdated comments in the userqueue UAPI header
  highlighted during the libdrm UAPI review.
- It removes the GDS BO support which was found unused.
- It also removes the unused flags parameter from the UAPI.
- It also adds a padding variables in userqueue in/out structures.

(Pierre-Eric and Marek)
  - clarify comments on top of drm_amdgpu_userq_in
  - clarify comment for queue_id (in)
  - clarify comment for mqd
  - clarify comment for compute MQD size
  - clarify comment for queue_id (out)
  - remove GDB object from BO object list
  - remove the unused flags parameter

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian Koenig <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
Signed-off-by: Arvind Yadav <arvind.yadav@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/drm/amdgpu_drm.h | 54 +++++++++++++++++++------------------------
 1 file changed, 24 insertions(+), 30 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 0910a6f8c5f2..6158496cc1d0 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -325,35 +325,28 @@ union drm_amdgpu_ctx {
 	union drm_amdgpu_ctx_out out;
 };
 
-/* user queue IOCTL */
+/* user queue IOCTL operations */
 #define AMDGPU_USERQ_OP_CREATE	1
 #define AMDGPU_USERQ_OP_FREE	2
 
-/* Flag to indicate secure buffer related workload, unused for now */
-#define AMDGPU_USERQ_MQD_FLAGS_SECURE	(1 << 0)
-/* Flag to indicate AQL workload, unused for now */
-#define AMDGPU_USERQ_MQD_FLAGS_AQL	(1 << 1)
-
 /*
- * MQD (memory queue descriptor) is a set of parameters which allow
- * the GPU to uniquely define and identify a usermode queue. This
- * structure defines the MQD for GFX-V11 IP ver 0.
+ * This structure is a container to pass input configuration
+ * info for all supported userqueue related operations.
+ * For operation AMDGPU_USERQ_OP_CREATE: user is expected
+ *  to set all fields, excep the parameter 'queue_id'.
+ * For operation AMDGPU_USERQ_OP_FREE: the only input parameter expected
+ *  to be set is 'queue_id', eveything else is ignored.
  */
 struct drm_amdgpu_userq_in {
 	/** AMDGPU_USERQ_OP_* */
 	__u32	op;
-	/** Queue handle for USERQ_OP_FREE */
+	/** Queue id passed for operation USERQ_OP_FREE */
 	__u32	queue_id;
 	/** the target GPU engine to execute workload (AMDGPU_HW_IP_*) */
 	__u32   ip_type;
-	/**
-	 * @flags: flags to indicate special function for queue like secure
-	 * buffer (TMZ). Unused for now.
-	 */
-	__u32   flags;
 	/**
 	 * @doorbell_handle: the handle of doorbell GEM object
-	 * associated to this client.
+	 * associated with this userqueue client.
 	 */
 	__u32   doorbell_handle;
 	/**
@@ -362,7 +355,7 @@ struct drm_amdgpu_userq_in {
 	 * and doorbell_offset in the doorbell bo.
 	 */
 	__u32   doorbell_offset;
-
+	__u32 _pad;
 	/**
 	 * @queue_va: Virtual address of the GPU memory which holds the queue
 	 * object. The queue holds the workload packets.
@@ -387,25 +380,31 @@ struct drm_amdgpu_userq_in {
 	 */
 	__u64   wptr_va;
 	/**
-	 * @mqd: Queue descriptor for USERQ_OP_CREATE
+	 * @mqd: MQD (memory queue descriptor) is a set of parameters which allow
+	 * the GPU to uniquely define and identify a usermode queue.
+	 *
 	 * MQD data can be of different size for different GPU IP/engine and
 	 * their respective versions/revisions, so this points to a __u64 *
-	 * which holds MQD of this usermode queue.
+	 * which holds IP specific MQD of this usermode queue.
 	 */
 	__u64 mqd;
 	/**
 	 * @size: size of MQD data in bytes, it must match the MQD structure
 	 * size of the respective engine/revision defined in UAPI for ex, for
-	 * gfx_v11 workloads, size = sizeof(drm_amdgpu_userq_mqd_gfx_v11).
+	 * gfx11 workloads, size = sizeof(drm_amdgpu_userq_mqd_gfx11).
 	 */
 	__u64 mqd_size;
 };
 
+/* The structure to carry output of userqueue ops */
 struct drm_amdgpu_userq_out {
-	/** Queue handle */
+	/**
+	 * For operation AMDGPU_USERQ_OP_CREATE: This field contains a unique
+	 * queue ID to represent the newly created userqueue in the system, otherwise
+	 * it should be ignored.
+	 */
 	__u32	queue_id;
-	/** Flags */
-	__u32	flags;
+	__u32 _pad;
 };
 
 union drm_amdgpu_userq {
@@ -420,11 +419,6 @@ struct drm_amdgpu_userq_mqd_gfx11 {
 	 * Use AMDGPU_INFO_IOCTL to find the exact size of the object.
 	 */
 	__u64   shadow_va;
-	/**
-	 * @gds_va: Virtual address of the GPU memory to hold the GDS buffer.
-	 * Use AMDGPU_INFO_IOCTL to find the exact size of the object.
-	 */
-	__u64   gds_va;
 	/**
 	 * @csa_va: Virtual address of the GPU memory to hold the CSA buffer.
 	 * Use AMDGPU_INFO_IOCTL to find the exact size of the object.
@@ -446,8 +440,8 @@ struct drm_amdgpu_userq_mqd_sdma_gfx11 {
 struct drm_amdgpu_userq_mqd_compute_gfx11 {
 	/**
 	 * @eop_va: Virtual address of the GPU memory to hold the EOP buffer.
-	 * This must be a from a separate GPU object, and must be at least 1 page
-	 * sized.
+	 * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL
+	 * to get the size.
 	 */
 	__u64   eop_va;
 };
-- 
cgit v1.2.3


From 2761bb9a31f1b863037547d73dc6aac1461ceab6 Mon Sep 17 00:00:00 2001
From: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Date: Mon, 11 Nov 2024 12:43:07 +0530
Subject: drm/amdgpu: Modify userq signal/wait struct field names
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Modify kernel UAPI userq signal/wait struct field names and
description corresponding to the libdrm UAPI review comments.

libdrm MR: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/392

Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/drm/amdgpu_drm.h | 102 +++++++++++++++---------------------------
 1 file changed, 36 insertions(+), 66 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 6158496cc1d0..72dc16dbca7f 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -453,27 +453,17 @@ struct drm_amdgpu_userq_signal {
 	 * to retrieve the WPTR.
 	 */
 	__u32	queue_id;
+	__u32	pad;
 	/**
-	 * @flags: flags to indicate special function for userq fence creation.
-	 * Unused for now.
-	 */
-	__u32	flags;
-	/**
-	 * @syncobj_handles_array: An array of syncobj handles used by the userq fence
-	 * creation IOCTL to install the created dma_fence object which can be
-	 * utilized by userspace to explicitly synchronize GPU commands.
+	 * @syncobj_handles: The list of syncobj handles submitted by the user queue
+	 * job to be signaled.
 	 */
-	__u64	syncobj_handles_array;
+	__u64	syncobj_handles;
 	/**
 	 * @num_syncobj_handles: A count that represents the number of syncobj handles in
-	 * @syncobj_handles_array.
+	 * @syncobj_handles.
 	 */
 	__u64	num_syncobj_handles;
-	/**
-	 * @syncobj_point: A given point on the timeline to be signaled.
-	 * Unused for now.
-	 */
-	__u64	syncobj_point;
 	/**
 	 * @bo_read_handles: The list of BO handles that the submitted user queue job
 	 * is using for read only. This will update BO fences in the kernel.
@@ -485,20 +475,15 @@ struct drm_amdgpu_userq_signal {
 	 */
 	__u64	bo_write_handles;
 	/**
-	 * @num_read_bo_handles: A count that represents the number of read BO handles in
+	 * @num_bo_read_handles: A count that represents the number of read BO handles in
 	 * @bo_read_handles.
 	 */
-	__u32	num_read_bo_handles;
+	__u32	num_bo_read_handles;
 	/**
-	 * @num_write_bo_handles: A count that represents the number of write BO handles in
+	 * @num_bo_write_handles: A count that represents the number of write BO handles in
 	 * @bo_write_handles.
 	 */
-	__u32	num_write_bo_handles;
-	/**
-	 * @bo_flags: flags to indicate BOs synchronize for READ or WRITE
-	 */
-	__u32	bo_flags;
-	__u32	pad;
+	__u32	num_bo_write_handles;
 };
 
 struct drm_amdgpu_userq_fence_info {
@@ -517,38 +502,18 @@ struct drm_amdgpu_userq_fence_info {
 
 struct drm_amdgpu_userq_wait {
 	/**
-	 * @waitq_id: Queue handle used to retrieve the queue information to store
-	 * the fence driver references in the wait user queue structure.
-	 */
-	__u32	waitq_id;
-	/**
-	 * @flags: flags to specify special function for userq wait information.
-	 * Unused for now.
-	 */
-	__u32	flags;
-	/**
-	 * @bo_wait_flags: flags to define the BOs for READ or WRITE to store the
-	 * matching fence wait info pair in @userq_fence_info.
-	 */
-	__u32	bo_wait_flags;
-	/**
-	 * @num_points: A count that represents the number of timeline syncobj handles in
-	 * syncobj_handles_array.
-	 */
-	__u32	num_points;
-	/**
-	 * @syncobj_handles_array: An array of syncobj handles defined to get the
-	 * fence wait information of every syncobj handles in the array.
+	 * @syncobj_handles: The list of syncobj handles submitted by the user queue
+	 * job to get the va/value pairs.
 	 */
-	__u64	syncobj_handles_array;
+	__u64	syncobj_handles;
 	/**
-	 * @syncobj_timeline_handles: An array of timeline syncobj handles defined to get the
-	 * fence wait information of every timeline syncobj handles in the array.
+	 * @syncobj_timeline_handles: The list of timeline syncobj handles submitted by
+	 * the user queue job to get the va/value pairs at given @syncobj_timeline_points.
 	 */
-	__u64   syncobj_timeline_handles;
+	__u64	syncobj_timeline_handles;
 	/**
-	 * @syncobj_timeline_points: An array of timeline syncobj points defined to get the
-	 * fence wait points of every timeline syncobj handles in the syncobj_handles_array.
+	 * @syncobj_timeline_points: The list of timeline syncobj points submitted by the
+	 * user queue job for the corresponding @syncobj_timeline_handles.
 	 */
 	__u64	syncobj_timeline_points;
 	/**
@@ -561,32 +526,37 @@ struct drm_amdgpu_userq_wait {
 	 * job to get the va/value pairs.
 	 */
 	__u64	bo_write_handles;
+	/**
+	 * @num_syncobj_timeline_handles: A count that represents the number of timeline
+	 * syncobj handles in @syncobj_timeline_handles.
+	 */
+	__u16	num_syncobj_timeline_handles;
+	/**
+	 * @num_fences: This field can be used both as input and output. As input it defines
+	 * the maximum number of fences that can be returned and as output it will specify
+	 * how many fences were actually returned from the ioctl.
+	 */
+	__u16	num_fences;
 	/**
 	 * @num_syncobj_handles: A count that represents the number of syncobj handles in
-	 * @syncobj_handles_array.
+	 * @syncobj_handles.
 	 */
 	__u32	num_syncobj_handles;
 	/**
-	 * @num_read_bo_handles: A count that represents the number of read BO handles in
+	 * @num_bo_read_handles: A count that represents the number of read BO handles in
 	 * @bo_read_handles.
 	 */
-	__u32	num_read_bo_handles;
+	__u32	num_bo_read_handles;
 	/**
-	 * @num_write_bo_handles: A count that represents the number of write BO handles in
+	 * @num_bo_write_handles: A count that represents the number of write BO handles in
 	 * @bo_write_handles.
 	 */
-	__u32	num_write_bo_handles;
-	__u32	pad;
-	/**
-	 * @userq_fence_info: An array of fence information (va and value) pair of each
-	 * objects stored in @syncobj_handles_array and @bo_handles_array.
-	 */
-	__u64	userq_fence_info;
+	__u32	num_bo_write_handles;
 	/**
-	 * @num_fences: A count that represents the number of actual fences installed in
-	 * each syncobj and bo handles.
+	 * @out_fences: The field is a return value from the ioctl containing the list of
+	 * address/value pairs to wait for.
 	 */
-	__u64	num_fences;
+	__u64	out_fences;
 };
 
 /* vm ioctl */
-- 
cgit v1.2.3


From 90c448fef3120d79bd8031665213981c966dbaf4 Mon Sep 17 00:00:00 2001
From: Shashank Sharma <shashank.sharma@amd.com>
Date: Wed, 30 Oct 2024 15:39:42 +0100
Subject: drm/amdgpu: add new AMDGPU_INFO subquery for userq objects

This patch adds a new subquery (AMDGPU_INFO_UQ_FW_AREAS) in
AMDGPU_INFO_IOCTL to get the size and alignment of shadow
and csa objects from the FW setup. This information is
required for the userqueue consumers.

V2: Added Alex's suggestions and addressed review comments:
- make this query IP specific (GFX/SDMA etc)
- give a better title (AMDGPU_INFO_UQ_METADATA)
- restructured the code as per sample code shared by Alex

V3: Split the UAPI patch from shadow_size_fn modifications
V4: Addressed review comments from UAPI review (Marek/Pierre-Eric)
    - Change the query name to AMDGPU_INFO_UQ_FW_AREAS
    - remove unused inpur parameter for AMDGPU_HW_IP*

UAPI link: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/400/

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian Koenig <christian.koenig@amd.com>
Cc: Arvind Yadav <arvind.yadav@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/drm/amdgpu_drm.h | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 72dc16dbca7f..5dbd9037afe7 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -1193,6 +1193,8 @@ struct drm_amdgpu_cs_chunk_cp_gfx_shadow {
 #define AMDGPU_INFO_MAX_IBS			0x22
 /* query last page fault info */
 #define AMDGPU_INFO_GPUVM_FAULT			0x23
+/* query FW object size and alignment */
+#define AMDGPU_INFO_UQ_FW_AREAS			0x24
 
 #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT	0
 #define AMDGPU_INFO_MMR_SE_INDEX_MASK	0xff
@@ -1469,6 +1471,27 @@ struct drm_amdgpu_info_hw_ip {
 	__u32  ip_discovery_version;
 };
 
+/* GFX metadata BO sizes and alignment info (in bytes) */
+struct drm_amdgpu_info_uq_fw_areas_gfx {
+	/* shadow area size */
+	__u32 shadow_size;
+	/* shadow area base virtual mem alignment */
+	__u32 shadow_alignment;
+	/* context save area size */
+	__u32 csa_size;
+	/* context save area base virtual mem alignment */
+	__u32 csa_alignment;
+};
+
+/* IP specific fw related information used in the
+ * subquery AMDGPU_INFO_UQ_FW_AREAS
+ */
+struct drm_amdgpu_info_uq_fw_areas {
+	union {
+		struct drm_amdgpu_info_uq_fw_areas_gfx gfx;
+	};
+};
+
 struct drm_amdgpu_info_num_handles {
 	/** Max handles as supported by firmware for UVD */
 	__u32  uvd_max_handles;
@@ -1532,6 +1555,23 @@ struct drm_amdgpu_info_gpuvm_fault {
 	__u32 vmhub;
 };
 
+struct drm_amdgpu_info_uq_metadata_gfx {
+	/* shadow area size for gfx11 */
+	__u32 shadow_size;
+	/* shadow area base virtual alignment for gfx11 */
+	__u32 shadow_alignment;
+	/* context save area size for gfx11 */
+	__u32 csa_size;
+	/* context save area base virtual alignment for gfx11 */
+	__u32 csa_alignment;
+};
+
+struct drm_amdgpu_info_uq_metadata {
+	union {
+		struct drm_amdgpu_info_uq_metadata_gfx gfx;
+	};
+};
+
 /*
  * Supported GPU families
  */
-- 
cgit v1.2.3


From 1af688126361bc881b134b9d25738e22dd457f30 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 24 Mar 2025 16:26:00 -0400
Subject: drm/amdgpu: add UAPI to query if user queues are supported
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add an INFO query to check if user queues are supported.

v2: switch to a mask of IPs (Marek)
v3: move to drm_amdgpu_info_device (Marek)

Cc: marek.olsak@amd.com
Cc: prike.liang@amd.com
Cc: sunil.khatri@amd.com
Cc: yogesh.mohanmarimuthu@amd.com
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Sunil Khatri <sunil.khatri@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/drm/amdgpu_drm.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 5dbd9037afe7..ef97c0d78b8a 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -1453,6 +1453,9 @@ struct drm_amdgpu_info_device {
 	__u32 csa_size;
 	/* context save area base virtual alignment for gfx11 */
 	__u32 csa_alignment;
+	/* Userq IP mask (1 << AMDGPU_HW_IP_*) */
+	__u32 userq_ip_mask;
+	__u32 pad;
 };
 
 struct drm_amdgpu_info_hw_ip {
-- 
cgit v1.2.3


From 4172b556fd5bdfdf9b2d1e42da39df6ce99ee989 Mon Sep 17 00:00:00 2001
From: Eric Huang <jinhuieric.huang@amd.com>
Date: Mon, 7 Apr 2025 15:32:33 -0400
Subject: drm/amdkfd: add smi events for process start and end

rocm-smi will be able to show the events for KFD process
start/end, it is the implementation of this feature.

Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
Reviewed-by: Kent Russell <kent.russell@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/linux/kfd_ioctl.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 1e59344c5673..04c7d283dc7d 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -536,6 +536,8 @@ enum kfd_smi_event {
 	KFD_SMI_EVENT_QUEUE_EVICTION = 9,
 	KFD_SMI_EVENT_QUEUE_RESTORE = 10,
 	KFD_SMI_EVENT_UNMAP_FROM_GPU = 11,
+	KFD_SMI_EVENT_PROCESS_START = 12,
+	KFD_SMI_EVENT_PROCESS_END = 13,
 
 	/*
 	 * max event number, as a flag bit to get events from all processes,
@@ -651,6 +653,9 @@ struct kfd_ioctl_smi_events_args {
 		"%lld -%d @%lx(%lx) %x %d\n", (ns), (pid), (addr), (size),\
 		(node), (unmap_trigger)
 
+#define KFD_EVENT_FMT_PROCESS(pid, task_name)\
+		"%x %s\n", (pid), (task_name)
+
 /**************************************************************************************************
  * CRIU IOCTLs (Checkpoint Restore In Userspace)
  *
-- 
cgit v1.2.3


From fced8e7d2ddeba7f41b19e065f8c02a9abf9ac00 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 26 Feb 2025 16:54:27 -0500
Subject: drm/amdgpu: convert userq UAPI _pad to flags

Reuse the _pad field for flags.

Reviewed-by: Sunil Khatri <sunil.khatri@amd.com>
Reviewed-by: Jesse.Zhang <Jesse.zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/drm/amdgpu_drm.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index ef97c0d78b8a..1a451907184c 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -355,7 +355,10 @@ struct drm_amdgpu_userq_in {
 	 * and doorbell_offset in the doorbell bo.
 	 */
 	__u32   doorbell_offset;
-	__u32 _pad;
+	/**
+	 * @flags: flags used for queue parameters
+	 */
+	__u32 flags;
 	/**
 	 * @queue_va: Virtual address of the GPU memory which holds the queue
 	 * object. The queue holds the workload packets.
-- 
cgit v1.2.3


From 024cc8a71aac8194fc2883782d36cbad6a9fe36b Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 26 Feb 2025 16:55:36 -0500
Subject: drm/amdgpu/userq: add UAPI for setting queue priority

Allow the user to set a queue priority levels:
0 - normal low - most apps (maps to MES AMD_PRIORITY_LEVEL_NORMAL)
1 - low - background jobs (maps to MES AMD_PRIORITY_LEVEL_LOW)
2 - normal high - apps that need relative high (maps to MES AMD_PRIORITY_LEVEL_MEDIUM)
3 - high (admin only - for compositors) (maps to MES AMD_PRIORITY_LEVEL_HIGH)

Reviewed-by: Sunil Khatri <sunil.khatri@amd.com>
Reviewed-by: Jesse.Zhang <Jesse.zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/drm/amdgpu_drm.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 1a451907184c..267ed4adcfb9 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -329,6 +329,15 @@ union drm_amdgpu_ctx {
 #define AMDGPU_USERQ_OP_CREATE	1
 #define AMDGPU_USERQ_OP_FREE	2
 
+/* queue priority levels */
+/* low < normal low < normal high < high */
+#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK  0x3
+#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT 0
+#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_LOW 0
+#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_LOW 1
+#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_HIGH 2
+#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH 3 /* admin only */
+
 /*
  * This structure is a container to pass input configuration
  * info for all supported userqueue related operations.
-- 
cgit v1.2.3


From 94a62b0f573f868f6f706d96c8c577c2e9b309e0 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 26 Feb 2025 17:12:58 -0500
Subject: drm/amdgpu/userq: add UAPI for setting up secure queues

If the queues needs to access TMZ surfaces, it must
be set up as secure.

Reviewed-by: Sunil Khatri <sunil.khatri@amd.com>
Reviewed-by: Jesse.Zhang <Jesse.zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/drm/amdgpu_drm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 267ed4adcfb9..284ac25ab5c4 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -337,6 +337,8 @@ union drm_amdgpu_ctx {
 #define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_LOW 1
 #define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_HIGH 2
 #define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH 3 /* admin only */
+/* for queues that need access to protected content */
+#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE  (1 << 2)
 
 /*
  * This structure is a container to pass input configuration
-- 
cgit v1.2.3


From 4b27406380b0b9ada6b4893bc8f6766dd34fff36 Mon Sep 17 00:00:00 2001
From: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Date: Fri, 11 Apr 2025 15:08:30 +0530
Subject: drm/amdgpu: Add queue id support to the user queue wait IOCTL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add queue id support to the user queue wait IOCTL
drm_amdgpu_userq_wait structure.

This is required to retrieve the wait user queue and maintain
the fence driver references in it so that the user queue in
the same context releases their reference to the fence drivers
at some point before queue destruction.

Otherwise, we would gather those references until we
don't have any more space left and crash.

v2: Modify the UAPI comment as per the mesa and libdrm UAPI comment.

Libdrm MR: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/408
Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34493

Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Suggested-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/drm/amdgpu_drm.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 284ac25ab5c4..1fd96474e64c 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -515,6 +515,12 @@ struct drm_amdgpu_userq_fence_info {
 };
 
 struct drm_amdgpu_userq_wait {
+	/**
+	 * @waitq_id: Queue handle used by the userq wait IOCTL to retrieve the
+	 * wait queue and maintain the fence driver references in it.
+	 */
+	__u32	waitq_id;
+	__u32	pad;
 	/**
 	 * @syncobj_handles: The list of syncobj handles submitted by the user queue
 	 * job to get the va/value pairs.
-- 
cgit v1.2.3