From 2271e0a20ef795838527815e057f5af206b69c87 Mon Sep 17 00:00:00 2001 From: Robert Mader Date: Fri, 9 May 2025 15:35:35 +0200 Subject: drm: drm_fourcc: add 10/12/16bit software decoder YCbCr formats This adds FOURCCs for 3-plane 10/12/16bit YCbCr formats used by software decoders like ffmpeg, dav1d and libvpx. The intended use-case is buffer sharing between decoders and GPUs by allocating buffers with e.g. udmabuf or dma-heaps, avoiding unnecessary copies and format conversions in various scenarios. Unlike formats typically used by hardware decoders the 10/12bit formats use a LSB alignment. In order to allow fast implementations in GL and Vulkan the padding must contain only zeros, so the float representation can be calculated by multiplying with 2^6=64 or 2^4=16 respectively. MRs or branches for Mesa, Vulkan, Gstreamer, Weston and Mutter can be found at: - https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34303 - https://github.com/rmader/Vulkan-Docs/commits/ycbcr-16bit-lsb-formats/ - https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/8540 - https://gitlab.freedesktop.org/wayland/weston/-/merge_requests/1753 - https://gitlab.gnome.org/GNOME/mutter/-/merge_requests/4348 The naming scheme follows the 'P' and 'Q' formats. The 'S' stands for 'software' and was selected in order to make remembering easy. The 'Sx16' formats could as well be 'Qx16'. We stick with 'S' as 16bit software decoders are likely much more common than hardware ones for the foreseeable future. Note that these formats already have Vulkan equivalents: - VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM - VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM - VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM Signed-off-by: Robert Mader Reviewed-by: Daniel Stone Link: https://lore.kernel.org/r/20250509133535.60330-1-robert.mader@collabora.com Signed-off-by: Daniel Stone --- include/uapi/drm/drm_fourcc.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 81202a50dc9e..6483f76a2165 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -377,6 +377,42 @@ extern "C" { */ #define DRM_FORMAT_Q401 fourcc_code('Q', '4', '0', '1') +/* + * 3 plane YCbCr LSB aligned + * In order to use these formats in a similar fashion to MSB aligned ones + * implementation can multiply the values by 2^6=64. For that reason the padding + * must only contain zeros. + * index 0 = Y plane, [15:0] z:Y [6:10] little endian + * index 1 = Cr plane, [15:0] z:Cr [6:10] little endian + * index 2 = Cb plane, [15:0] z:Cb [6:10] little endian + */ +#define DRM_FORMAT_S010 fourcc_code('S', '0', '1', '0') /* 2x2 subsampled Cb (1) and Cr (2) planes 10 bits per channel */ +#define DRM_FORMAT_S210 fourcc_code('S', '2', '1', '0') /* 2x1 subsampled Cb (1) and Cr (2) planes 10 bits per channel */ +#define DRM_FORMAT_S410 fourcc_code('S', '4', '1', '0') /* non-subsampled Cb (1) and Cr (2) planes 10 bits per channel */ + +/* + * 3 plane YCbCr LSB aligned + * In order to use these formats in a similar fashion to MSB aligned ones + * implementation can multiply the values by 2^4=16. For that reason the padding + * must only contain zeros. + * index 0 = Y plane, [15:0] z:Y [4:12] little endian + * index 1 = Cr plane, [15:0] z:Cr [4:12] little endian + * index 2 = Cb plane, [15:0] z:Cb [4:12] little endian + */ +#define DRM_FORMAT_S012 fourcc_code('S', '0', '1', '2') /* 2x2 subsampled Cb (1) and Cr (2) planes 12 bits per channel */ +#define DRM_FORMAT_S212 fourcc_code('S', '2', '1', '2') /* 2x1 subsampled Cb (1) and Cr (2) planes 12 bits per channel */ +#define DRM_FORMAT_S412 fourcc_code('S', '4', '1', '2') /* non-subsampled Cb (1) and Cr (2) planes 12 bits per channel */ + +/* + * 3 plane YCbCr + * index 0 = Y plane, [15:0] Y little endian + * index 1 = Cr plane, [15:0] Cr little endian + * index 2 = Cb plane, [15:0] Cb little endian + */ +#define DRM_FORMAT_S016 fourcc_code('S', '0', '1', '6') /* 2x2 subsampled Cb (1) and Cr (2) planes 16 bits per channel */ +#define DRM_FORMAT_S216 fourcc_code('S', '2', '1', '6') /* 2x1 subsampled Cb (1) and Cr (2) planes 16 bits per channel */ +#define DRM_FORMAT_S416 fourcc_code('S', '4', '1', '6') /* non-subsampled Cb (1) and Cr (2) planes 16 bits per channel */ + /* * 3 plane YCbCr * index 0: Y plane, [7:0] Y -- cgit v1.2.3 From 2f684bbbcb27048e6b16732b440dbadc0e342363 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Larumbe?= Date: Tue, 20 May 2025 18:44:00 +0100 Subject: drm/panfrost: Add driver IOCTL for setting BO labels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow UM to label a BO for which it possesses a DRM handle. Signed-off-by: Adrián Larumbe Reviewed-by: Steven Price Reviewed-by: Boris Brezillon Signed-off-by: Steven Price Link: https://lore.kernel.org/r/20250520174634.353267-4-adrian.larumbe@collabora.com --- include/uapi/drm/panfrost_drm.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h index 568724be6628..ed67510395bd 100644 --- a/include/uapi/drm/panfrost_drm.h +++ b/include/uapi/drm/panfrost_drm.h @@ -21,6 +21,7 @@ extern "C" { #define DRM_PANFROST_PERFCNT_ENABLE 0x06 #define DRM_PANFROST_PERFCNT_DUMP 0x07 #define DRM_PANFROST_MADVISE 0x08 +#define DRM_PANFROST_SET_LABEL_BO 0x09 #define DRM_IOCTL_PANFROST_SUBMIT DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_SUBMIT, struct drm_panfrost_submit) #define DRM_IOCTL_PANFROST_WAIT_BO DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_WAIT_BO, struct drm_panfrost_wait_bo) @@ -29,6 +30,7 @@ extern "C" { #define DRM_IOCTL_PANFROST_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_GET_PARAM, struct drm_panfrost_get_param) #define DRM_IOCTL_PANFROST_GET_BO_OFFSET DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_GET_BO_OFFSET, struct drm_panfrost_get_bo_offset) #define DRM_IOCTL_PANFROST_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_MADVISE, struct drm_panfrost_madvise) +#define DRM_IOCTL_PANFROST_SET_LABEL_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_SET_LABEL_BO, struct drm_panfrost_set_label_bo) /* * Unstable ioctl(s): only exposed when the unsafe unstable_ioctls module @@ -227,6 +229,25 @@ struct drm_panfrost_madvise { __u32 retained; /* out, whether backing store still exists */ }; +/** + * struct drm_panfrost_set_label_bo - ioctl argument for labelling Panfrost BOs. + */ +struct drm_panfrost_set_label_bo { + /** @handle: Handle of the buffer object to label. */ + __u32 handle; + + /** @pad: MBZ. */ + __u32 pad; + + /** + * @label: User pointer to a NUL-terminated string + * + * Length cannot be greater than 4096. + * NULL is permitted and means clear the label. + */ + __u64 label; +}; + /* Definitions for coredump decoding in user space */ #define PANFROSTDUMP_MAJOR 1 #define PANFROSTDUMP_MINOR 0 -- cgit v1.2.3 From 21784ca96025b62d95b670b7639ad70ddafa69b8 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 22 May 2025 15:54:04 -0700 Subject: drm/xe/pxp: Clarify PXP queue creation behavior if PXP is not ready MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The expected flow of operations when using PXP is to query the PXP status and wait for it to transition to "ready" before attempting to create an exec_queue. This flow is followed by the Mesa driver, but there is no guarantee that an incorrectly coded (or malicious) app will not attempt to create the queue first without querying the status. Therefore, we need to clarify what the expected behavior of the queue creation ioctl is in this scenario. Currently, the ioctl always fails with an -EBUSY code no matter the error, but for consistency it is better to distinguish between "failed to init" (-EIO) and "not ready" (-EBUSY), the same way the query ioctl does. Note that, while this is a change in the return code of an ioctl, the behavior of the ioctl in this particular corner case was not clearly spec'd, so no one should have been relying on it (and we know that Mesa, which is the only known userspace for this, didn't). v2: Minor rework of the doc (Rodrigo) Fixes: 72d479601d67 ("drm/xe/pxp/uapi: Add userspace and LRC support for PXP-using queues") Signed-off-by: Daniele Ceraolo Spurio Cc: John Harrison Cc: José Roberto de Souza Reviewed-by: José Roberto de Souza Reviewed-by: John Harrison Acked-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20250522225401.3953243-7-daniele.ceraolospurio@intel.com --- include/uapi/drm/xe_drm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 9c08738c3b91..6a702ba7817c 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1210,6 +1210,11 @@ struct drm_xe_vm_bind { * there is no need to explicitly set that. When a queue of type * %DRM_XE_PXP_TYPE_HWDRM is created, the PXP default HWDRM session * (%XE_PXP_HWDRM_DEFAULT_SESSION) will be started, if isn't already running. + * The user is expected to query the PXP status via the query ioctl (see + * %DRM_XE_DEVICE_QUERY_PXP_STATUS) and to wait for PXP to be ready before + * attempting to create a queue with this property. When a queue is created + * before PXP is ready, the ioctl will return -EBUSY if init is still in + * progress or -EIO if init failed. * Given that going into a power-saving state kills PXP HWDRM sessions, * runtime PM will be blocked while queues of this type are alive. * All PXP queues will be killed if a PXP invalidation event occurs. -- cgit v1.2.3 From 95cbab48782bf62e4093837dc15ac6133902c12f Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 6 Jun 2025 10:09:31 +0200 Subject: drm/panthor: Add missing explicit padding in drm_panthor_gpu_info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drm_panthor_gpu_info::shader_present is currently automatically offset by 4 byte to meet Arm's 32-bit/64-bit field alignment rules, but those constraints don't stand on 32-bit x86 and cause a mismatch when running an x86 binary in a user emulated environment like FEX. It's also generally agreed that uAPIs should explicitly pad their struct fields, which we originally intended to do, but a mistake slipped through during the submission process, leading drm_panthor_gpu_info::shader_present to be misaligned. This uAPI change doesn't break any of the existing users of panthor which are either arm32 or arm64 where the 64-bit alignment of u64 fields is already enforced a the compiler level. Changes in v2: - Rename the garbage field into pad0 and adjust the comment accordingly - Add Liviu's A-b Changes in v3: - Add R-bs Fixes: 0f25e493a246 ("drm/panthor: Add uAPI") Acked-by: Liviu Dudau Reviewed-by: Adrián Larumbe Reviewed-by: Steven Price Link: https://lore.kernel.org/r/20250606080932.4140010-2-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- include/uapi/drm/panthor_drm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index ad9a70afea6c..3a76c4f2882b 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -296,6 +296,9 @@ struct drm_panthor_gpu_info { /** @as_present: Bitmask encoding the number of address-space exposed by the MMU. */ __u32 as_present; + /** @pad0: MBZ. */ + __u32 pad0; + /** @shader_present: Bitmask encoding the shader cores exposed by the GPU. */ __u64 shader_present; -- cgit v1.2.3 From 94ac529a9932654c0b8cbff29745c8417978a7d0 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 6 Jun 2025 10:09:32 +0200 Subject: drm/panthor: Fix the user MMIO offset logic for emulators MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, we pick the MMIO offset based on the size of the pgoff_t type seen by the process that manipulates the FD, such that a 32-bit process can always map the user MMIO ranges. But this approach doesn't work well for emulators like FEX, where the emulator is a 64-bit binary which might be executing 32-bit code. In that case, the kernel thinks it's the 64-bit process and assumes DRM_PANTHOR_USER_MMIO_OFFSET_64BIT is in use, but the UMD library expects DRM_PANTHOR_USER_MMIO_OFFSET_32BIT, because it can't mmap() anything above the pgoff_t size. In order to solve that, we need a way to explicitly set the user MMIO offset from the UMD, such that the kernel doesn't have to guess it from the TIF_32BIT flag set on user thread. We keep the old behavior if DRM_PANTHOR_SET_USER_MMIO_OFFSET is never called. Changes in v2: - Drop the lock/immutable fields and allow SET_USER_MMIO_OFFSET requests to race with mmap() requests - Don't do the is_user_mmio_offset test twice in panthor_mmap() - Improve the uAPI docs Changes in v3: - Bump to version 1.5 instead of 1.4 after rebasing - Add R-bs - Fix/rephrase comment as suggested by Liviu Reviewed-by: Adrián Larumbe Reviewed-by: Steven Price Reviewed-by: Liviu Dudau Link: https://lore.kernel.org/r/20250606080932.4140010-3-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- include/uapi/drm/panthor_drm.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index 3a76c4f2882b..e1f43deb7eca 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -130,6 +130,20 @@ enum drm_panthor_ioctl_id { /** @DRM_PANTHOR_BO_SET_LABEL: Label a BO. */ DRM_PANTHOR_BO_SET_LABEL, + + /** + * @DRM_PANTHOR_SET_USER_MMIO_OFFSET: Set the offset to use as the user MMIO offset. + * + * The default behavior is to pick the MMIO offset based on the size of the pgoff_t + * type seen by the process that manipulates the FD, such that a 32-bit process can + * always map the user MMIO ranges. But this approach doesn't work well for emulators + * like FEX, where the emulator is an 64-bit binary which might be executing 32-bit + * code. In that case, the kernel thinks it's the 64-bit process and assumes + * DRM_PANTHOR_USER_MMIO_OFFSET_64BIT is in use, but the UMD library expects + * DRM_PANTHOR_USER_MMIO_OFFSET_32BIT, because it can't mmap() anything above the + * pgoff_t size. + */ + DRM_PANTHOR_SET_USER_MMIO_OFFSET, }; /** @@ -1001,6 +1015,28 @@ struct drm_panthor_bo_set_label { __u64 label; }; +/** + * struct drm_panthor_set_user_mmio_offset - Arguments passed to + * DRM_IOCTL_PANTHOR_SET_USER_MMIO_OFFSET + * + * This ioctl is only really useful if you want to support userspace + * CPU emulation environments where the size of an unsigned long differs + * between the host and the guest architectures. + */ +struct drm_panthor_set_user_mmio_offset { + /** + * @offset: User MMIO offset to use. + * + * Must be either DRM_PANTHOR_USER_MMIO_OFFSET_32BIT or + * DRM_PANTHOR_USER_MMIO_OFFSET_64BIT. + * + * Use DRM_PANTHOR_USER_MMIO_OFFSET (which selects OFFSET_32BIT or + * OFFSET_64BIT based on the size of an unsigned long) unless you + * have a very good reason to overrule this decision. + */ + __u64 offset; +}; + /** * DRM_IOCTL_PANTHOR() - Build a Panthor IOCTL number * @__access: Access type. Must be R, W or RW. @@ -1045,6 +1081,8 @@ enum { DRM_IOCTL_PANTHOR(WR, TILER_HEAP_DESTROY, tiler_heap_destroy), DRM_IOCTL_PANTHOR_BO_SET_LABEL = DRM_IOCTL_PANTHOR(WR, BO_SET_LABEL, bo_set_label), + DRM_IOCTL_PANTHOR_SET_USER_MMIO_OFFSET = + DRM_IOCTL_PANTHOR(WR, SET_USER_MMIO_OFFSET, set_user_mmio_offset), }; #if defined(__cplusplus) -- cgit v1.2.3 From 5e861a695a39263123cdc086934b7336dbe6946d Mon Sep 17 00:00:00 2001 From: Andrzej Kacprowski Date: Thu, 5 Jun 2025 18:20:01 +0200 Subject: accel/ivpu: Add turbo flag to the DRM_IVPU_CMDQ_CREATE ioctl Introduce a new parameter to the DRM_IVPU_CMDQ_CREATE ioctl, enabling turbo mode for jobs submitted via the command queue. Turbo mode allows jobs to run at higher frequencies, potentially improving performance for demanding workloads. Also adds the IVPU_TEST_MODE_TURBO_DISABLE flag to allow test mode to explicitly disable turbo mode requested by the application. The IVPU_TEST_MODE_TURBO mode has been renamed to IVPU_TEST_MODE_TURBO_ENABLE for clarity and consistency. Signed-off-by: Andrzej Kacprowski Signed-off-by: Maciej Falkowski Reviewed-by: Jeff Hugo Signed-off-by: Jacek Lawrynowicz Link: https://lore.kernel.org/r/20250605162001.1237789-1-maciej.falkowski@linux.intel.com --- include/uapi/drm/ivpu_accel.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h index 2f24103f4533..160ee1411d4a 100644 --- a/include/uapi/drm/ivpu_accel.h +++ b/include/uapi/drm/ivpu_accel.h @@ -445,6 +445,9 @@ struct drm_ivpu_metric_streamer_get_data { __u64 data_size; }; +/* Command queue flags */ +#define DRM_IVPU_CMDQ_FLAG_TURBO 0x00000001 + /** * struct drm_ivpu_cmdq_create - Create command queue for job submission */ @@ -462,6 +465,17 @@ struct drm_ivpu_cmdq_create { * %DRM_IVPU_JOB_PRIORITY_REALTIME */ __u32 priority; + /** + * @flags: + * + * Supported flags: + * + * %DRM_IVPU_CMDQ_FLAG_TURBO + * + * Enable low-latency mode for the command queue. The NPU will maximize performance + * when executing jobs from such queue at the cost of increased power usage. + */ + __u32 flags; }; /** -- cgit v1.2.3 From e04dac12cec853347908432b663a3f78e26d3b8d Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Fri, 6 Jun 2025 12:26:13 -0700 Subject: drm/xe/oa/uapi: Expose media OA units On Xe2+ platforms, media engines are attached to "SCMI" OA media (OAM) units. One or more SCMI OAM units might be present on a platform. In addition there is another OAM unit for global events, called OAM-SAG. Performance metrics for media workloads can be obtained from these OAM units, similar to OAG. Expose these OAM units for userspace to use. OAM-SAG is exposed as an OA unit without any attached engines. Bspec: 70819, 67103, 63844, 72572, 74476, 61284 v2: Fix xe_gt_WARN_ON in __hwe_oam_unit for < 12.7 platforms v3: Return XE_OA_UNIT_INVALID for < 12.7 to indicate no OAM units v4: Move xe_oa_print_oa_units() to separate patch v5: Introduce DRM_XE_OA_UNIT_TYPE_OAM_SAG v6: Introduce DRM_XE_OA_CAPS_OAM Signed-off-by: Ashutosh Dixit Reviewed-by: Umesh Nerlige Ramappa Link: https://lore.kernel.org/r/20250606192618.4133817-2-ashutosh.dixit@intel.com --- include/uapi/drm/xe_drm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 6a702ba7817c..8e8bbdec8c5c 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1617,6 +1617,9 @@ enum drm_xe_oa_unit_type { /** @DRM_XE_OA_UNIT_TYPE_OAM: OAM OA unit */ DRM_XE_OA_UNIT_TYPE_OAM, + + /** @DRM_XE_OA_UNIT_TYPE_OAM_SAG: OAM_SAG OA unit */ + DRM_XE_OA_UNIT_TYPE_OAM_SAG, }; /** @@ -1638,6 +1641,7 @@ struct drm_xe_oa_unit { #define DRM_XE_OA_CAPS_SYNCS (1 << 1) #define DRM_XE_OA_CAPS_OA_BUFFER_SIZE (1 << 2) #define DRM_XE_OA_CAPS_WAIT_NUM_REPORTS (1 << 3) +#define DRM_XE_OA_CAPS_OAM (1 << 4) /** @oa_timestamp_freq: OA timestamp freq */ __u64 oa_timestamp_freq; -- cgit v1.2.3 From 771f002ef1d6f6c2b9bddf779abd31da6b9ccd25 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Sun, 8 Jun 2025 23:01:33 +0000 Subject: drm/xe/uapi: Correct sync type definition in comments Commit 37d078e51b4c ("drm/xe/uapi: Split xe_sync types from flags") renamed some DRM_XE_SYNC_* defines but later commits kept using the old names. Correct them with the new definition. v2: correct fixes tag and update commit message to explain why (Lucas) Fixes: 9329f0667215 ("drm/xe/uapi: Use LR abbrev for long-running vms") Fixes: 4b437893a826 ("drm/xe/uapi: More uAPI documentation additions and cosmetic updates") Reviewed-by: Lucas De Marchi Cc: Rodrigo Vivi Cc: Francois Dugast Cc: Zongyao Bai Signed-off-by: Shuicheng Lin Link: https://lore.kernel.org/r/20250608230133.1250849-1-shuicheng.lin@intel.com Signed-off-by: Rodrigo Vivi --- include/uapi/drm/xe_drm.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 8e8bbdec8c5c..e2426413488f 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -925,9 +925,9 @@ struct drm_xe_gem_mmap_offset { * - %DRM_XE_VM_CREATE_FLAG_LR_MODE - An LR, or Long Running VM accepts * exec submissions to its exec_queues that don't have an upper time * limit on the job execution time. But exec submissions to these - * don't allow any of the flags DRM_XE_SYNC_FLAG_SYNCOBJ, - * DRM_XE_SYNC_FLAG_TIMELINE_SYNCOBJ, DRM_XE_SYNC_FLAG_DMA_BUF, - * used as out-syncobjs, that is, together with DRM_XE_SYNC_FLAG_SIGNAL. + * don't allow any of the sync types DRM_XE_SYNC_TYPE_SYNCOBJ, + * DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ, used as out-syncobjs, that is, + * together with sync flag DRM_XE_SYNC_FLAG_SIGNAL. * LR VMs can be created in recoverable page-fault mode using * DRM_XE_VM_CREATE_FLAG_FAULT_MODE, if the device supports it. * If that flag is omitted, the UMD can not rely on the slightly @@ -1394,7 +1394,7 @@ struct drm_xe_sync { /** * @timeline_value: Input for the timeline sync object. Needs to be - * different than 0 when used with %DRM_XE_SYNC_FLAG_TIMELINE_SYNCOBJ. + * different than 0 when used with %DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ. */ __u64 timeline_value; -- cgit v1.2.3 From e04c3521df073b688b9e9e2213cd3c588e3b6e68 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 25 Jun 2025 10:37:10 -0700 Subject: drm/fourcc: Add missing half-float formats Not something that is likely to be scanned out, but GPUs usually support half-float formats with 1, 2, or possibly 3 components, and it is useful to be able to import/export them with a valid fourcc, and/or use gbm to create them. These correspond to PIPE_FORMAT_{R16,R16G16,R16G16B16}_FLOAT in mesa. Signed-off-by: Rob Clark Acked-by: Simona Vetter Acked-by: Daniel Stone Link: https://lore.kernel.org/r/20250625173712.116446-2-robin.clark@oss.qualcomm.com Signed-off-by: Dmitry Baryshkov --- include/uapi/drm/drm_fourcc.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 6483f76a2165..89a82d02b7e8 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -218,7 +218,7 @@ extern "C" { #define DRM_FORMAT_ABGR16161616 fourcc_code('A', 'B', '4', '8') /* [63:0] A:B:G:R 16:16:16:16 little endian */ /* - * Floating point 64bpp RGB + * Half-Floating point - 16b/component * IEEE 754-2008 binary16 half-precision float * [15:0] sign:exponent:mantissa 1:5:10 */ @@ -228,6 +228,10 @@ extern "C" { #define DRM_FORMAT_ARGB16161616F fourcc_code('A', 'R', '4', 'H') /* [63:0] A:R:G:B 16:16:16:16 little endian */ #define DRM_FORMAT_ABGR16161616F fourcc_code('A', 'B', '4', 'H') /* [63:0] A:B:G:R 16:16:16:16 little endian */ +#define DRM_FORMAT_R16F fourcc_code('R', ' ', ' ', 'H') /* [15:0] R 16 little endian */ +#define DRM_FORMAT_GR1616F fourcc_code('G', 'R', ' ', 'H') /* [31:0] G:R 16:16 little endian */ +#define DRM_FORMAT_BGR161616F fourcc_code('B', 'G', 'R', 'H') /* [47:0] B:G:R 16:16:16 little endian */ + /* * RGBA format with 10-bit components packed in 64-bit per pixel, with 6 bits * of unused padding per component: -- cgit v1.2.3 From 3529cb5ab16b4f1f8bbc31dc39a1076a94bd1e38 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 25 Jun 2025 10:37:11 -0700 Subject: drm/fourcc: Add 32b float formats Add 1, 2, 3, and 4 component 32b float formats, so that buffers with these formats can be imported/exported with fourcc+modifier, and/or created by gbm. These correspond to PIPE_FORMAT_{R32,R32G32,R32G32B32,R32G32B32A32}_FLOAT in mesa. v2: Fix comment describing float32 layout [Sima] Signed-off-by: Rob Clark Acked-by: Simona Vetter Acked-by: Daniel Stone Link: https://lore.kernel.org/r/20250625173712.116446-3-robin.clark@oss.qualcomm.com Signed-off-by: Dmitry Baryshkov --- include/uapi/drm/drm_fourcc.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 89a82d02b7e8..0d375c5bfc9d 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -232,6 +232,16 @@ extern "C" { #define DRM_FORMAT_GR1616F fourcc_code('G', 'R', ' ', 'H') /* [31:0] G:R 16:16 little endian */ #define DRM_FORMAT_BGR161616F fourcc_code('B', 'G', 'R', 'H') /* [47:0] B:G:R 16:16:16 little endian */ +/* + * Floating point - 32b/component + * IEEE 754-2008 binary32 float + * [31:0] sign:exponent:mantissa 1:8:23 + */ +#define DRM_FORMAT_R32F fourcc_code('R', ' ', ' ', 'F') /* [31:0] R 32 little endian */ +#define DRM_FORMAT_GR3232F fourcc_code('G', 'R', ' ', 'F') /* [63:0] R:G 32:32 little endian */ +#define DRM_FORMAT_BGR323232F fourcc_code('B', 'G', 'R', 'F') /* [95:0] R:G:B 32:32:32 little endian */ +#define DRM_FORMAT_ABGR32323232F fourcc_code('A', 'B', '8', 'F') /* [127:0] R:G:B:A 32:32:32:32 little endian */ + /* * RGBA format with 10-bit components packed in 64-bit per pixel, with 6 bits * of unused padding per component: -- cgit v1.2.3 From 1aa93cfb1288a141c64e923dbaaa277616f0f7d5 Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Mon, 26 Feb 2024 14:25:43 +0100 Subject: drm/fourcc: Add RGB161616 and BGR161616 formats Add FourCC definitions for the 48-bit RGB/BGR formats to the DRM/KMS uapi. The format will be used by the Raspberry Pi PiSP Back End, supported by a V4L2 driver in kernel space and by libcamera in userspace, which uses the DRM FourCC identifiers. Signed-off-by: Jacopo Mondi Reviewed-by: Rob Clark Reviewed-by: Simon Ser Reviewed-by: Naushir Patuck Link: https://lore.kernel.org/r/20240226132544.82817-1-jacopo.mondi@ideasonboard.com Signed-off-by: Javier Martinez Canillas --- include/uapi/drm/drm_fourcc.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 0d375c5bfc9d..ea91aa8afde9 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -210,6 +210,10 @@ extern "C" { #define DRM_FORMAT_RGBA1010102 fourcc_code('R', 'A', '3', '0') /* [31:0] R:G:B:A 10:10:10:2 little endian */ #define DRM_FORMAT_BGRA1010102 fourcc_code('B', 'A', '3', '0') /* [31:0] B:G:R:A 10:10:10:2 little endian */ +/* 48 bpp RGB */ +#define DRM_FORMAT_RGB161616 fourcc_code('R', 'G', '4', '8') /* [47:0] R:G:B 16:16:16 little endian */ +#define DRM_FORMAT_BGR161616 fourcc_code('B', 'G', '4', '8') /* [47:0] B:G:R 16:16:16 little endian */ + /* 64 bpp RGB */ #define DRM_FORMAT_XRGB16161616 fourcc_code('X', 'R', '4', '8') /* [63:0] x:R:G:B 16:16:16:16 little endian */ #define DRM_FORMAT_XBGR16161616 fourcc_code('X', 'B', '4', '8') /* [63:0] x:B:G:R 16:16:16:16 little endian */ -- cgit v1.2.3 From dbbde63c9e9d472743a88f975baac412ba93f29d Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sun, 29 Jun 2025 13:13:01 -0700 Subject: drm/msm: Add PRR support Add PRR (Partial Resident Region) is a bypass address which make GPU writes go to /dev/null and reads return zero. This is used to implement vulkan sparse residency. To support PRR/NULL mappings, we allocate a page to reserve a physical address which we know will not be used as part of a GEM object, and configure the SMMU to use this address for PRR/NULL mappings. Signed-off-by: Rob Clark Signed-off-by: Rob Clark Tested-by: Antonino Maniscalco Reviewed-by: Antonino Maniscalco Patchwork: https://patchwork.freedesktop.org/patch/661486/ --- include/uapi/drm/msm_drm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index 2342cb90857e..5bc5e4526ccf 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -91,6 +91,8 @@ struct drm_msm_timespec { #define MSM_PARAM_UBWC_SWIZZLE 0x12 /* RO */ #define MSM_PARAM_MACROTILE_MODE 0x13 /* RO */ #define MSM_PARAM_UCHE_TRAP_BASE 0x14 /* RO */ +/* PRR (Partially Resident Region) is required for sparse residency: */ +#define MSM_PARAM_HAS_PRR 0x15 /* RO */ /* For backwards compat. The original support for preemption was based on * a single ring per priority level so # of priority levels equals the # -- cgit v1.2.3 From feb8ef4636a457a1fd916a3ae575f552935e69b9 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sun, 29 Jun 2025 13:13:05 -0700 Subject: drm/msm: Add opt-in for VM_BIND Add a SET_PARAM for userspace to request to manage to the VM itself, instead of getting a kernel managed VM. In order to transition to a userspace managed VM, this param must be set before any mappings are created. Signed-off-by: Rob Clark Signed-off-by: Rob Clark Tested-by: Antonino Maniscalco Reviewed-by: Antonino Maniscalco Patchwork: https://patchwork.freedesktop.org/patch/661494/ --- include/uapi/drm/msm_drm.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index 5bc5e4526ccf..b974f5a24dbc 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -93,6 +93,30 @@ struct drm_msm_timespec { #define MSM_PARAM_UCHE_TRAP_BASE 0x14 /* RO */ /* PRR (Partially Resident Region) is required for sparse residency: */ #define MSM_PARAM_HAS_PRR 0x15 /* RO */ +/* MSM_PARAM_EN_VM_BIND is set to 1 to enable VM_BIND ops. + * + * With VM_BIND enabled, userspace is required to allocate iova and use the + * VM_BIND ops for map/unmap ioctls. MSM_INFO_SET_IOVA and MSM_INFO_GET_IOVA + * will be rejected. (The latter does not have a sensible meaning when a BO + * can have multiple and/or partial mappings.) + * + * With VM_BIND enabled, userspace does not include a submit_bo table in the + * SUBMIT ioctl (this will be rejected), the resident set is determined by + * the the VM_BIND ops. + * + * Enabling VM_BIND will fail on devices which do not have per-process pgtables. + * And it is not allowed to disable VM_BIND once it has been enabled. + * + * Enabling VM_BIND should be done (attempted) prior to allocating any BOs or + * submitqueues of type MSM_SUBMITQUEUE_VM_BIND. + * + * Relatedly, when VM_BIND mode is enabled, the kernel will not try to recover + * from GPU faults or failed async VM_BIND ops, in particular because it is + * difficult to communicate to userspace which op failed so that userspace + * could rewind and try again. When the VM is marked unusable, the SUBMIT + * ioctl will throw -EPIPE. + */ +#define MSM_PARAM_EN_VM_BIND 0x16 /* WO, once */ /* For backwards compat. The original support for preemption was based on * a single ring per priority level so # of priority levels equals the # -- cgit v1.2.3 From b58e12a66e47eaf95b31bbefbc260e5a0b3e638c Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sun, 29 Jun 2025 13:13:07 -0700 Subject: drm/msm: Add _NO_SHARE flag Buffers that are not shared between contexts can share a single resv object. This way drm_gpuvm will not track them as external objects, and submit-time validating overhead will be O(1) for all N non-shared BOs, instead of O(n). Signed-off-by: Rob Clark Signed-off-by: Rob Clark Tested-by: Antonino Maniscalco Reviewed-by: Antonino Maniscalco Patchwork: https://patchwork.freedesktop.org/patch/661497/ --- include/uapi/drm/msm_drm.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index b974f5a24dbc..1bccc347945c 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -140,6 +140,19 @@ struct drm_msm_param { #define MSM_BO_SCANOUT 0x00000001 /* scanout capable */ #define MSM_BO_GPU_READONLY 0x00000002 +/* Private buffers do not need to be explicitly listed in the SUBMIT + * ioctl, unless referenced by a drm_msm_gem_submit_cmd. Private + * buffers may NOT be imported/exported or used for scanout (or any + * other situation where buffers can be indefinitely pinned, but + * cases other than scanout are all kernel owned BOs which are not + * visible to userspace). + * + * In exchange for those constraints, all private BOs associated with + * a single context (drm_file) share a single dma_resv, and if there + * has been no eviction since the last submit, there are no per-BO + * bookeeping to do, significantly cutting the SUBMIT overhead. + */ +#define MSM_BO_NO_SHARE 0x00000004 #define MSM_BO_CACHE_MASK 0x000f0000 /* cache modes */ #define MSM_BO_CACHED 0x00010000 @@ -149,6 +162,7 @@ struct drm_msm_param { #define MSM_BO_FLAGS (MSM_BO_SCANOUT | \ MSM_BO_GPU_READONLY | \ + MSM_BO_NO_SHARE | \ MSM_BO_CACHE_MASK) struct drm_msm_gem_new { -- cgit v1.2.3 From e1341f91450525b94474b75d5e77587d1d84e52c Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sun, 29 Jun 2025 13:13:12 -0700 Subject: drm/msm: Extract out syncobj helpers We'll be re-using these for the VM_BIND ioctl. Also, rename a few things in the uapi header to reflect that syncobj use is not specific to the submit ioctl. Signed-off-by: Rob Clark Signed-off-by: Rob Clark Tested-by: Antonino Maniscalco Reviewed-by: Antonino Maniscalco Patchwork: https://patchwork.freedesktop.org/patch/661512/ --- include/uapi/drm/msm_drm.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index 1bccc347945c..2c2fc4b284d0 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -220,6 +220,17 @@ struct drm_msm_gem_cpu_fini { * Cmdstream Submission: */ +#define MSM_SYNCOBJ_RESET 0x00000001 /* Reset syncobj after wait. */ +#define MSM_SYNCOBJ_FLAGS ( \ + MSM_SYNCOBJ_RESET | \ + 0) + +struct drm_msm_syncobj { + __u32 handle; /* in, syncobj handle. */ + __u32 flags; /* in, from MSM_SUBMIT_SYNCOBJ_FLAGS */ + __u64 point; /* in, timepoint for timeline syncobjs. */ +}; + /* The value written into the cmdstream is logically: * * ((relocbuf->gpuaddr + reloc_offset) << shift) | or @@ -309,17 +320,6 @@ struct drm_msm_gem_submit_bo { MSM_SUBMIT_FENCE_SN_IN | \ 0) -#define MSM_SUBMIT_SYNCOBJ_RESET 0x00000001 /* Reset syncobj after wait. */ -#define MSM_SUBMIT_SYNCOBJ_FLAGS ( \ - MSM_SUBMIT_SYNCOBJ_RESET | \ - 0) - -struct drm_msm_gem_submit_syncobj { - __u32 handle; /* in, syncobj handle. */ - __u32 flags; /* in, from MSM_SUBMIT_SYNCOBJ_FLAGS */ - __u64 point; /* in, timepoint for timeline syncobjs. */ -}; - /* Each cmdstream submit consists of a table of buffers involved, and * one or more cmdstream buffers. This allows for conditional execution * (context-restore), and IB buffers needed for per tile/bin draw cmds. @@ -333,8 +333,8 @@ struct drm_msm_gem_submit { __u64 cmds; /* in, ptr to array of submit_cmd's */ __s32 fence_fd; /* in/out fence fd (see MSM_SUBMIT_FENCE_FD_IN/OUT) */ __u32 queueid; /* in, submitqueue id */ - __u64 in_syncobjs; /* in, ptr to array of drm_msm_gem_submit_syncobj */ - __u64 out_syncobjs; /* in, ptr to array of drm_msm_gem_submit_syncobj */ + __u64 in_syncobjs; /* in, ptr to array of drm_msm_syncobj */ + __u64 out_syncobjs; /* in, ptr to array of drm_msm_syncobj */ __u32 nr_in_syncobjs; /* in, number of entries in in_syncobj */ __u32 nr_out_syncobjs; /* in, number of entries in out_syncobj. */ __u32 syncobj_stride; /* in, stride of syncobj arrays. */ -- cgit v1.2.3 From 92395af63a9958615edfa9d4ef1ea72c92a00410 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sun, 29 Jun 2025 13:13:14 -0700 Subject: drm/msm: Add VM_BIND submitqueue This submitqueue type isn't tied to a hw ringbuffer, but instead executes on the CPU for performing async VM_BIND ops. Signed-off-by: Rob Clark Signed-off-by: Rob Clark Tested-by: Antonino Maniscalco Reviewed-by: Antonino Maniscalco Patchwork: https://patchwork.freedesktop.org/patch/661517/ --- include/uapi/drm/msm_drm.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index 2c2fc4b284d0..6d6cd1219926 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -385,12 +385,19 @@ struct drm_msm_gem_madvise { /* * Draw queues allow the user to set specific submission parameter. Command * submissions specify a specific submitqueue to use. ID 0 is reserved for - * backwards compatibility as a "default" submitqueue + * backwards compatibility as a "default" submitqueue. + * + * Because VM_BIND async updates happen on the CPU, they must run on a + * virtual queue created with the flag MSM_SUBMITQUEUE_VM_BIND. If we had + * a way to do pgtable updates on the GPU, we could drop this restriction. */ #define MSM_SUBMITQUEUE_ALLOW_PREEMPT 0x00000001 +#define MSM_SUBMITQUEUE_VM_BIND 0x00000002 /* virtual queue for VM_BIND ops */ + #define MSM_SUBMITQUEUE_FLAGS ( \ MSM_SUBMITQUEUE_ALLOW_PREEMPT | \ + MSM_SUBMITQUEUE_VM_BIND | \ 0) /* -- cgit v1.2.3 From 2e6a8a1fe2b262a6dfd0a65041fcd830ee1e7143 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sun, 29 Jun 2025 13:13:18 -0700 Subject: drm/msm: Add VM_BIND ioctl Add a VM_BIND ioctl for binding/unbinding buffers into a VM. This is only supported if userspace has opted in to MSM_PARAM_EN_VM_BIND. Signed-off-by: Rob Clark Signed-off-by: Rob Clark Tested-by: Antonino Maniscalco Reviewed-by: Antonino Maniscalco Patchwork: https://patchwork.freedesktop.org/patch/661524/ --- include/uapi/drm/msm_drm.h | 74 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 73 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index 6d6cd1219926..5c67294edc95 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -272,7 +272,10 @@ struct drm_msm_gem_submit_cmd { __u32 size; /* in, cmdstream size */ __u32 pad; __u32 nr_relocs; /* in, number of submit_reloc's */ - __u64 relocs; /* in, ptr to array of submit_reloc's */ + union { + __u64 relocs; /* in, ptr to array of submit_reloc's */ + __u64 iova; /* cmdstream address (for VM_BIND contexts) */ + }; }; /* Each buffer referenced elsewhere in the cmdstream submit (ie. the @@ -339,7 +342,74 @@ struct drm_msm_gem_submit { __u32 nr_out_syncobjs; /* in, number of entries in out_syncobj. */ __u32 syncobj_stride; /* in, stride of syncobj arrays. */ __u32 pad; /*in, reserved for future use, always 0. */ +}; + +#define MSM_VM_BIND_OP_UNMAP 0 +#define MSM_VM_BIND_OP_MAP 1 +#define MSM_VM_BIND_OP_MAP_NULL 2 + +#define MSM_VM_BIND_OP_DUMP 1 +#define MSM_VM_BIND_OP_FLAGS ( \ + MSM_VM_BIND_OP_DUMP | \ + 0) +/** + * struct drm_msm_vm_bind_op - bind/unbind op to run + */ +struct drm_msm_vm_bind_op { + /** @op: one of MSM_VM_BIND_OP_x */ + __u32 op; + /** @handle: GEM object handle, MBZ for UNMAP or MAP_NULL */ + __u32 handle; + /** @obj_offset: Offset into GEM object, MBZ for UNMAP or MAP_NULL */ + __u64 obj_offset; + /** @iova: Address to operate on */ + __u64 iova; + /** @range: Number of bites to to map/unmap */ + __u64 range; + /** @flags: Bitmask of MSM_VM_BIND_OP_FLAG_x */ + __u32 flags; + /** @pad: MBZ */ + __u32 pad; +}; + +#define MSM_VM_BIND_FENCE_FD_IN 0x00000001 +#define MSM_VM_BIND_FENCE_FD_OUT 0x00000002 +#define MSM_VM_BIND_FLAGS ( \ + MSM_VM_BIND_FENCE_FD_IN | \ + MSM_VM_BIND_FENCE_FD_OUT | \ + 0) + +/** + * struct drm_msm_vm_bind - Input of &DRM_IOCTL_MSM_VM_BIND + */ +struct drm_msm_vm_bind { + /** @flags: in, bitmask of MSM_VM_BIND_x */ + __u32 flags; + /** @nr_ops: the number of bind ops in this ioctl */ + __u32 nr_ops; + /** @fence_fd: in/out fence fd (see MSM_VM_BIND_FENCE_FD_IN/OUT) */ + __s32 fence_fd; + /** @queue_id: in, submitqueue id */ + __u32 queue_id; + /** @in_syncobjs: in, ptr to array of drm_msm_gem_syncobj */ + __u64 in_syncobjs; + /** @out_syncobjs: in, ptr to array of drm_msm_gem_syncobj */ + __u64 out_syncobjs; + /** @nr_in_syncobjs: in, number of entries in in_syncobj */ + __u32 nr_in_syncobjs; + /** @nr_out_syncobjs: in, number of entries in out_syncobj */ + __u32 nr_out_syncobjs; + /** @syncobj_stride: in, stride of syncobj arrays */ + __u32 syncobj_stride; + /** @op_stride: sizeof each struct drm_msm_vm_bind_op in @ops */ + __u32 op_stride; + union { + /** @op: used if num_ops == 1 */ + struct drm_msm_vm_bind_op op; + /** @ops: userptr to array of drm_msm_vm_bind_op if num_ops > 1 */ + __u64 ops; + }; }; #define MSM_WAIT_FENCE_BOOST 0x00000001 @@ -435,6 +505,7 @@ struct drm_msm_submitqueue_query { #define DRM_MSM_SUBMITQUEUE_NEW 0x0A #define DRM_MSM_SUBMITQUEUE_CLOSE 0x0B #define DRM_MSM_SUBMITQUEUE_QUERY 0x0C +#define DRM_MSM_VM_BIND 0x0D #define DRM_IOCTL_MSM_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GET_PARAM, struct drm_msm_param) #define DRM_IOCTL_MSM_SET_PARAM DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_SET_PARAM, struct drm_msm_param) @@ -448,6 +519,7 @@ struct drm_msm_submitqueue_query { #define DRM_IOCTL_MSM_SUBMITQUEUE_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_NEW, struct drm_msm_submitqueue) #define DRM_IOCTL_MSM_SUBMITQUEUE_CLOSE DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_CLOSE, __u32) #define DRM_IOCTL_MSM_SUBMITQUEUE_QUERY DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_QUERY, struct drm_msm_submitqueue_query) +#define DRM_IOCTL_MSM_VM_BIND DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_VM_BIND, struct drm_msm_vm_bind) #if defined(__cplusplus) } -- cgit v1.2.3 From 78d0a27ae0e2e70b22895f4b388cc0ab88e3c6ca Mon Sep 17 00:00:00 2001 From: Jesse Zhang Date: Wed, 25 Jun 2025 15:29:45 +0800 Subject: drm/amdgpu: Add user queue instance count in HW IP info This change exposes the number of available user queue instances for each hardware IP type (GFX, COMPUTE, SDMA) through the drm_amdgpu_info_hw_ip interface. Key changes: 1. Added userq_num_instance field to drm_amdgpu_info_hw_ip structure 2. Implemented counting of available HQD slots using: - mes.gfx_hqd_mask for GFX queues - mes.compute_hqd_mask for COMPUTE queues - mes.sdma_hqd_mask for SDMA queues 3. Only counts available instances when user queues are enabled (!disable_uq) v2: using the adev->mes.gfx_hqd_mask[]/compute_hqd_mask[]/sdma_hqd_mask[] masks to determine the number of queue slots available for each engine type (Alex) v3: rename userq_num_instance to userq_num_hqds (Alex) Suggested-by: Alex Deucher Reviewed-by: Alex Deucher Signed-off-by: Jesse Zhang Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 45c4fa13499c..66c4a03ac9f9 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1493,6 +1493,8 @@ struct drm_amdgpu_info_hw_ip { __u32 available_rings; /** version info: bits 23:16 major, 15:8 minor, 7:0 revision */ __u32 ip_discovery_version; + /* Userq available hqds */ + __u32 userq_num_hqds; }; /* GFX metadata BO sizes and alignment info (in bytes) */ -- cgit v1.2.3 From 9ffab039bcb0bbfade0e659552d2fb912347a871 Mon Sep 17 00:00:00 2001 From: Jesse Zhang Date: Fri, 4 Jul 2025 15:17:43 +0800 Subject: drm/amdgpu: Replace HQD terminology with slots naming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The term "HQD" is CP-specific and doesn't accurately describe the queue resources for other IP blocks like SDMA, VCN, or VPE. This change: 1. Renames `num_hqds` to `num_slots` in amdgpu_kms.c to better reflect the generic nature of the resource counting 2. Updates the UAPI struct member from `userq_num_hqds` to `userq_num_slots` 3. Maintains the same functionality while using more appropriate terminology Signed-off-by: Jesse Zhang Reviewed-by: Marek Olšák Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 66c4a03ac9f9..bdedbaccf776 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1493,8 +1493,8 @@ struct drm_amdgpu_info_hw_ip { __u32 available_rings; /** version info: bits 23:16 major, 15:8 minor, 7:0 revision */ __u32 ip_discovery_version; - /* Userq available hqds */ - __u32 userq_num_hqds; + /* Userq available slots */ + __u32 userq_num_slots; }; /* GFX metadata BO sizes and alignment info (in bytes) */ -- cgit v1.2.3