summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2026-01-15 14:49:33 +1000
committerDave Airlie <airlied@redhat.com>2026-01-15 14:49:33 +1000
commit83dc0ba2755296b5e5882e044c80973b7c3fce9e (patch)
tree53774323449c41eb1b17e101be83197c6b5cbb82 /drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
parenta87fef0880c4f52769b5a3c2fc1b2d73aaa04eb3 (diff)
parent38a0f4cf8c6147fd10baa206ab349f8ff724e391 (diff)
Merge tag 'amd-drm-next-6.20-2026-01-09' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.20-2026-01-09: amdgpu: - GPUVM updates - Initial support for larger GPU address spaces - Initial SMUIO 15.x support - Documentation updates - Initial PSP 15.x support - Initial IH 7.1 support - Initial IH 6.1.1 support - SMU 13.0.12 updates - RAS updates - Initial MMHUB 3.4 support - Initial MMHUB 4.2 support - Initial GC 12.1 support - Initial GC 11.5.4 support - HDMI fixes - Panel replay improvements - DML updates - DC FP fixes - Initial SDMA 6.1.4 support - Initial SDMA 7.1 support - Userq updates - DC HPD refactor - SwSMU cleanups and refactoring - TTM memory ops parallelization - DCN 3.5 fixes - DP audio fixes - Clang fixes - Misc spelling fixes and cleanups - Initial SDMA 7.11.4 support - Convert legacy DRM logging helpers to new drm logging helpers - Initial JPEG 5.3 support - Add support for changing UMA size via the driver - DC analog fixes - GC 9 gfx queue reset support - Initial SMU 15.x support amdkfd: - Reserved SDMA rework - Refactor SPM - Initial GC 12.1 support - Initial GC 11.5.4 support - Initial SDMA 7.1 support - Initial SDMA 6.1.4 support - Increase the kfd process hash table - Per context support - Topology fixes radeon: - Convert legacy DRM logging helpers to new drm logging helpers - Use devm for i2c adapters - Variable sized array fix - Misc cleanups UAPI: - KFD context support. Proposed userspace: https://github.com/ROCm/rocm-systems/pull/1705 https://github.com/ROCm/rocm-systems/pull/1701 - Add userq metadata queries for more queue types. Proposed userspace: https://gitlab.freedesktop.org/yogeshmohan/mesa/-/commits/userq_query From: Alex Deucher <alexander.deucher@amd.com> Link: https://patch.msgid.link/20260109154713.3242957-1-alexander.deucher@amd.com Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c112
1 files changed, 76 insertions, 36 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 9c182ce501af..dffa0f7276b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -31,6 +31,7 @@
#define AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
#define AMDGPU_ONE_DOORBELL_SIZE 8
+#define AMDGPU_MES_RESERVED_QUEUES 2
int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev)
{
@@ -91,6 +92,9 @@ static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev)
int amdgpu_mes_init(struct amdgpu_device *adev)
{
int i, r, num_pipes;
+ u32 total_vmid_mask, reserved_vmid_mask;
+ u32 queue_mask, reserved_queue_mask;
+ int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
adev->mes.adev = adev;
@@ -101,12 +105,18 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
spin_lock_init(&adev->mes.queue_id_lock);
mutex_init(&adev->mes.mutex_hidden);
- for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++)
+ for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++)
spin_lock_init(&adev->mes.ring_lock[i]);
adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK;
+ total_vmid_mask = (u32)((1UL << 16) - 1);
+ reserved_vmid_mask = (u32)((1UL << adev->vm_manager.first_kfd_vmid) - 1);
+
adev->mes.vmid_mask_mmhub = 0xFF00;
- adev->mes.vmid_mask_gfxhub = adev->gfx.disable_kq ? 0xFFFE : 0xFF00;
+ adev->mes.vmid_mask_gfxhub = total_vmid_mask & ~reserved_vmid_mask;
+
+ queue_mask = (u32)(1UL << adev->gfx.mec.num_queue_per_pipe) - 1;
+ reserved_queue_mask = (u32)(1UL << AMDGPU_MES_RESERVED_QUEUES) - 1;
num_pipes = adev->gfx.me.num_pipe_per_me * adev->gfx.me.num_me;
if (num_pipes > AMDGPU_MES_MAX_GFX_PIPES)
@@ -142,7 +152,8 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) {
if (i >= num_pipes)
break;
- adev->mes.compute_hqd_mask[i] = adev->gfx.disable_kq ? 0xF : 0xC;
+ adev->mes.compute_hqd_mask[i] =
+ adev->gfx.disable_kq ? 0xF : (queue_mask & ~reserved_queue_mask);
}
num_pipes = adev->sdma.num_instances;
@@ -156,7 +167,7 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
adev->mes.sdma_hqd_mask[i] = 0xfc;
}
- for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) {
+ for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) {
r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs[i]);
if (r) {
dev_err(adev->dev,
@@ -192,16 +203,18 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
goto error_doorbell;
if (adev->mes.hung_queue_db_array_size) {
- r = amdgpu_bo_create_kernel(adev,
- adev->mes.hung_queue_db_array_size * sizeof(u32),
- PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_GTT,
- &adev->mes.hung_queue_db_array_gpu_obj,
- &adev->mes.hung_queue_db_array_gpu_addr,
- &adev->mes.hung_queue_db_array_cpu_addr);
- if (r) {
- dev_warn(adev->dev, "failed to create MES hung db array buffer (%d)", r);
- goto error_doorbell;
+ for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) {
+ r = amdgpu_bo_create_kernel(adev,
+ adev->mes.hung_queue_db_array_size * sizeof(u32),
+ PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->mes.hung_queue_db_array_gpu_obj[i],
+ &adev->mes.hung_queue_db_array_gpu_addr[i],
+ &adev->mes.hung_queue_db_array_cpu_addr[i]);
+ if (r) {
+ dev_warn(adev->dev, "failed to create MES hung db array buffer (%d)", r);
+ goto error_doorbell;
+ }
}
}
@@ -210,12 +223,16 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
error_doorbell:
amdgpu_mes_doorbell_free(adev);
error:
- for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) {
+ for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) {
if (adev->mes.sch_ctx_ptr[i])
amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]);
if (adev->mes.query_status_fence_ptr[i])
amdgpu_device_wb_free(adev,
adev->mes.query_status_fence_offs[i]);
+ if (adev->mes.hung_queue_db_array_gpu_obj[i])
+ amdgpu_bo_free_kernel(&adev->mes.hung_queue_db_array_gpu_obj[i],
+ &adev->mes.hung_queue_db_array_gpu_addr[i],
+ &adev->mes.hung_queue_db_array_cpu_addr[i]);
}
idr_destroy(&adev->mes.pasid_idr);
@@ -229,16 +246,17 @@ error:
void amdgpu_mes_fini(struct amdgpu_device *adev)
{
int i;
-
- amdgpu_bo_free_kernel(&adev->mes.hung_queue_db_array_gpu_obj,
- &adev->mes.hung_queue_db_array_gpu_addr,
- &adev->mes.hung_queue_db_array_cpu_addr);
+ int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj,
&adev->mes.event_log_gpu_addr,
&adev->mes.event_log_cpu_addr);
- for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) {
+ for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) {
+ amdgpu_bo_free_kernel(&adev->mes.hung_queue_db_array_gpu_obj[i],
+ &adev->mes.hung_queue_db_array_gpu_addr[i],
+ &adev->mes.hung_queue_db_array_cpu_addr[i]);
+
if (adev->mes.sch_ctx_ptr[i])
amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]);
if (adev->mes.query_status_fence_ptr[i])
@@ -304,13 +322,14 @@ int amdgpu_mes_resume(struct amdgpu_device *adev)
}
int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev,
- struct amdgpu_ring *ring)
+ struct amdgpu_ring *ring, uint32_t xcc_id)
{
struct mes_map_legacy_queue_input queue_input;
int r;
memset(&queue_input, 0, sizeof(queue_input));
+ queue_input.xcc_id = xcc_id;
queue_input.queue_type = ring->funcs->type;
queue_input.doorbell_offset = ring->doorbell_index;
queue_input.pipe_id = ring->pipe;
@@ -330,11 +349,12 @@ int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev,
int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
enum amdgpu_unmap_queues_action action,
- u64 gpu_addr, u64 seq)
+ u64 gpu_addr, u64 seq, uint32_t xcc_id)
{
struct mes_unmap_legacy_queue_input queue_input;
int r;
+ queue_input.xcc_id = xcc_id;
queue_input.action = action;
queue_input.queue_type = ring->funcs->type;
queue_input.doorbell_offset = ring->doorbell_index;
@@ -355,13 +375,15 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
unsigned int vmid,
- bool use_mmio)
+ bool use_mmio,
+ uint32_t xcc_id)
{
struct mes_reset_queue_input queue_input;
int r;
memset(&queue_input, 0, sizeof(queue_input));
+ queue_input.xcc_id = xcc_id;
queue_input.queue_type = ring->funcs->type;
queue_input.doorbell_offset = ring->doorbell_index;
queue_input.me_id = ring->me;
@@ -393,11 +415,11 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev,
int queue_type,
bool detect_only,
unsigned int *hung_db_num,
- u32 *hung_db_array)
-
+ u32 *hung_db_array,
+ uint32_t xcc_id)
{
struct mes_detect_and_reset_queue_input input;
- u32 *db_array = adev->mes.hung_queue_db_array_cpu_addr;
+ u32 *db_array = adev->mes.hung_queue_db_array_cpu_addr[xcc_id];
int r, i;
if (!hung_db_num || !hung_db_array)
@@ -409,7 +431,7 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev,
return -EINVAL;
/* Clear the doorbell array before detection */
- memset(adev->mes.hung_queue_db_array_cpu_addr, AMDGPU_MES_INVALID_DB_OFFSET,
+ memset(adev->mes.hung_queue_db_array_cpu_addr[xcc_id], AMDGPU_MES_INVALID_DB_OFFSET,
adev->mes.hung_queue_db_array_size * sizeof(u32));
input.queue_type = queue_type;
input.detect_only = detect_only;
@@ -436,7 +458,8 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev,
return r;
}
-uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg)
+uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg,
+ uint32_t xcc_id)
{
struct mes_misc_op_input op_input;
int r, val = 0;
@@ -450,6 +473,7 @@ uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg)
}
read_val_gpu_addr = adev->wb.gpu_addr + (addr_offset * 4);
read_val_ptr = (uint32_t *)&adev->wb.wb[addr_offset];
+ op_input.xcc_id = xcc_id;
op_input.op = MES_MISC_OP_READ_REG;
op_input.read_reg.reg_offset = reg;
op_input.read_reg.buffer_addr = read_val_gpu_addr;
@@ -473,12 +497,13 @@ error:
return val;
}
-int amdgpu_mes_wreg(struct amdgpu_device *adev,
- uint32_t reg, uint32_t val)
+int amdgpu_mes_wreg(struct amdgpu_device *adev, uint32_t reg,
+ uint32_t val, uint32_t xcc_id)
{
struct mes_misc_op_input op_input;
int r;
+ op_input.xcc_id = xcc_id;
op_input.op = MES_MISC_OP_WRITE_REG;
op_input.write_reg.reg_offset = reg;
op_input.write_reg.reg_value = val;
@@ -501,11 +526,13 @@ error:
int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t reg1,
- uint32_t ref, uint32_t mask)
+ uint32_t ref, uint32_t mask,
+ uint32_t xcc_id)
{
struct mes_misc_op_input op_input;
int r;
+ op_input.xcc_id = xcc_id;
op_input.op = MES_MISC_OP_WRM_REG_WR_WAIT;
op_input.wrm_reg.reg0 = reg0;
op_input.wrm_reg.reg1 = reg1;
@@ -530,14 +557,23 @@ error:
int amdgpu_mes_hdp_flush(struct amdgpu_device *adev)
{
- uint32_t hdp_flush_req_offset, hdp_flush_done_offset, ref_and_mask;
+ uint32_t hdp_flush_req_offset, hdp_flush_done_offset;
+ struct amdgpu_ring *mes_ring;
+ uint32_t ref_and_mask = 0, reg_mem_engine = 0;
+ if (!adev->gfx.funcs->get_hdp_flush_mask) {
+ dev_err(adev->dev, "mes hdp flush is not supported.\n");
+ return -EINVAL;
+ }
+
+ mes_ring = &adev->mes.ring[0];
hdp_flush_req_offset = adev->nbio.funcs->get_hdp_flush_req_offset(adev);
hdp_flush_done_offset = adev->nbio.funcs->get_hdp_flush_done_offset(adev);
- ref_and_mask = adev->nbio.hdp_flush_reg->ref_and_mask_cp0;
+
+ adev->gfx.funcs->get_hdp_flush_mask(mes_ring, &ref_and_mask, &reg_mem_engine);
return amdgpu_mes_reg_write_reg_wait(adev, hdp_flush_req_offset, hdp_flush_done_offset,
- ref_and_mask, ref_and_mask);
+ ref_and_mask, ref_and_mask, 0);
}
int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
@@ -545,7 +581,8 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
uint32_t spi_gdbg_per_vmid_cntl,
const uint32_t *tcp_watch_cntl,
uint32_t flags,
- bool trap_en)
+ bool trap_en,
+ uint32_t xcc_id)
{
struct mes_misc_op_input op_input = {0};
int r;
@@ -556,6 +593,7 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
return -EINVAL;
}
+ op_input.xcc_id = xcc_id;
op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER;
op_input.set_shader_debugger.process_context_addr = process_context_addr;
op_input.set_shader_debugger.flags.u32all = flags;
@@ -584,7 +622,8 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
}
int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev,
- uint64_t process_context_addr)
+ uint64_t process_context_addr,
+ uint32_t xcc_id)
{
struct mes_misc_op_input op_input = {0};
int r;
@@ -595,6 +634,7 @@ int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev,
return -EINVAL;
}
+ op_input.xcc_id = xcc_id;
op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER;
op_input.set_shader_debugger.process_context_addr = process_context_addr;
op_input.set_shader_debugger.flags.process_ctx_flush = true;