summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2026-01-15 14:49:33 +1000
committerDave Airlie <airlied@redhat.com>2026-01-15 14:49:33 +1000
commit83dc0ba2755296b5e5882e044c80973b7c3fce9e (patch)
tree53774323449c41eb1b17e101be83197c6b5cbb82 /drivers/gpu/drm/amd/amdgpu
parenta87fef0880c4f52769b5a3c2fc1b2d73aaa04eb3 (diff)
parent38a0f4cf8c6147fd10baa206ab349f8ff724e391 (diff)
Merge tag 'amd-drm-next-6.20-2026-01-09' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.20-2026-01-09: amdgpu: - GPUVM updates - Initial support for larger GPU address spaces - Initial SMUIO 15.x support - Documentation updates - Initial PSP 15.x support - Initial IH 7.1 support - Initial IH 6.1.1 support - SMU 13.0.12 updates - RAS updates - Initial MMHUB 3.4 support - Initial MMHUB 4.2 support - Initial GC 12.1 support - Initial GC 11.5.4 support - HDMI fixes - Panel replay improvements - DML updates - DC FP fixes - Initial SDMA 6.1.4 support - Initial SDMA 7.1 support - Userq updates - DC HPD refactor - SwSMU cleanups and refactoring - TTM memory ops parallelization - DCN 3.5 fixes - DP audio fixes - Clang fixes - Misc spelling fixes and cleanups - Initial SDMA 7.11.4 support - Convert legacy DRM logging helpers to new drm logging helpers - Initial JPEG 5.3 support - Add support for changing UMA size via the driver - DC analog fixes - GC 9 gfx queue reset support - Initial SMU 15.x support amdkfd: - Reserved SDMA rework - Refactor SPM - Initial GC 12.1 support - Initial GC 11.5.4 support - Initial SDMA 7.1 support - Initial SDMA 6.1.4 support - Increase the kfd process hash table - Per context support - Topology fixes radeon: - Convert legacy DRM logging helpers to new drm logging helpers - Use devm for i2c adapters - Variable sized array fix - Misc cleanups UAPI: - KFD context support. Proposed userspace: https://github.com/ROCm/rocm-systems/pull/1705 https://github.com/ROCm/rocm-systems/pull/1701 - Add userq metadata queries for more queue types. Proposed userspace: https://gitlab.freedesktop.org/yogeshmohan/mesa/-/commits/userq_query From: Alex Deucher <alexander.deucher@amd.com> Link: https://patch.msgid.link/20260109154713.3242957-1-alexander.deucher@amd.com Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h176
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c73
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12_1.c387
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c77
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c526
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c68
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c47
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c148
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c62
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c55
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c48
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ip.c315
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ip.h125
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c68
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c112
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h108
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c85
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c298
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h45
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c131
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c159
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atom.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_encoders.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c73
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c55
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c4067
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v12_1.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v12_1_pkt.h475
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c68
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c82
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c208
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c39
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v12_1.c893
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v12_1.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c132
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c637
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v12_1.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c65
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v7_0.c30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/imu_v11_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/imu_v12_1.c166
-rw-r--r--drivers/gpu/drm/amd/amdgpu/imu_v12_1.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c41
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v5_3_0.c714
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v5_3_0.h35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_userqueue.c65
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v11_0.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v12_0.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v12_1.c1968
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v12_1.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.c916
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.h28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c73
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nv.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v10_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v11_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v12_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v15_0.c202
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v15_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v15_0_8.c342
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v15_0_8.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v3_1.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c1817
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v7_1.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v7_1_0_pkt_open.h5673
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dma.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v15_0_0.c50
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v15_0_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v15_0_8.c213
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v15_0_8.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc21.c44
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc24.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc_v1_0.c862
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc_v1_0.h37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ta_ras_if.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v6_1.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v2_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v3_0.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v4_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c45
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c5
168 files changed, 23124 insertions, 1538 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index c88760fb52ea..8e22882b66aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -86,7 +86,7 @@ amdgpu-y += \
nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o soc24.o \
sienna_cichlid.o smu_v13_0_10.o nbio_v4_3.o hdp_v6_0.o nbio_v7_7.o hdp_v5_2.o lsdma_v6_0.o \
nbio_v7_9.o aqua_vanjaram.o nbio_v7_11.o lsdma_v7_0.o hdp_v7_0.o nbif_v6_3_1.o \
- cyan_skillfish_reg_init.o
+ cyan_skillfish_reg_init.o soc_v1_0.o
# add DF block
amdgpu-y += \
@@ -104,7 +104,8 @@ amdgpu-y += \
gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o gfxhub_v2_1.o mmhub_v2_3.o \
mmhub_v1_7.o gfxhub_v3_0.o mmhub_v3_0.o mmhub_v3_0_2.o gmc_v11_0.o \
mmhub_v3_0_1.o gfxhub_v3_0_3.o gfxhub_v1_2.o mmhub_v1_8.o mmhub_v3_3.o \
- gfxhub_v11_5_0.o mmhub_v4_1_0.o gfxhub_v12_0.o gmc_v12_0.o
+ gfxhub_v11_5_0.o mmhub_v4_1_0.o gfxhub_v12_0.o gmc_v12_0.o \
+ mmhub_v4_2_0.o gfxhub_v12_1.o gmc_v12_1.o
# add UMC block
amdgpu-y += \
@@ -134,7 +135,9 @@ amdgpu-y += \
psp_v12_0.o \
psp_v13_0.o \
psp_v13_0_4.o \
- psp_v14_0.o
+ psp_v14_0.o \
+ psp_v15_0.o \
+ psp_v15_0_8.o
# add DCE block
amdgpu-y += \
@@ -156,7 +159,9 @@ amdgpu-y += \
gfx_v11_0_3.o \
imu_v11_0_3.o \
gfx_v12_0.o \
- imu_v12_0.o
+ gfx_v12_1.o \
+ imu_v12_0.o \
+ imu_v12_1.o
# add async DMA block
amdgpu-y += \
@@ -169,13 +174,15 @@ amdgpu-y += \
sdma_v5_0.o \
sdma_v5_2.o \
sdma_v6_0.o \
- sdma_v7_0.o
+ sdma_v7_0.o \
+ sdma_v7_1.o
# add MES block
amdgpu-y += \
amdgpu_mes.o \
mes_v11_0.o \
mes_v12_0.o \
+ mes_v12_1.o
# add GFX userqueue support
amdgpu-y += mes_userqueue.o
@@ -215,7 +222,8 @@ amdgpu-y += \
jpeg_v4_0_3.o \
jpeg_v4_0_5.o \
jpeg_v5_0_0.o \
- jpeg_v5_0_1.o
+ jpeg_v5_0_1.o \
+ jpeg_v5_3_0.o
# add VPE block
amdgpu-y += \
@@ -244,7 +252,9 @@ amdgpu-y += \
smuio_v13_0.o \
smuio_v13_0_3.o \
smuio_v13_0_6.o \
- smuio_v14_0_2.o
+ smuio_v14_0_2.o \
+ smuio_v15_0_0.o \
+ smuio_v15_0_8.o
# add reset block
amdgpu-y += \
@@ -275,7 +285,8 @@ amdgpu-y += \
amdgpu_amdkfd_gfx_v10.o \
amdgpu_amdkfd_gfx_v10_3.o \
amdgpu_amdkfd_gfx_v11.o \
- amdgpu_amdkfd_gfx_v12.o
+ amdgpu_amdkfd_gfx_v12.o \
+ amdgpu_amdkfd_gfx_v12_1.o
ifneq ($(CONFIG_DRM_AMDGPU_CIK),)
amdgpu-y += amdgpu_amdkfd_gfx_v7.o
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9f9774f58ce1..11a36c132905 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -34,12 +34,6 @@
#define pr_fmt(fmt) "amdgpu: " fmt
-#ifdef dev_fmt
-#undef dev_fmt
-#endif
-
-#define dev_fmt(fmt) "amdgpu: " fmt
-
#include "amdgpu_ctx.h"
#include <linux/atomic.h>
@@ -116,6 +110,7 @@
#include "amdgpu_reg_state.h"
#include "amdgpu_userq.h"
#include "amdgpu_eviction_fence.h"
+#include "amdgpu_ip.h"
#if defined(CONFIG_DRM_AMD_ISP)
#include "amdgpu_isp.h"
#endif
@@ -362,59 +357,6 @@ enum amdgpu_kiq_irq {
#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */
#define MAX_KIQ_REG_TRY 1000
-int amdgpu_device_ip_set_clockgating_state(void *dev,
- enum amd_ip_block_type block_type,
- enum amd_clockgating_state state);
-int amdgpu_device_ip_set_powergating_state(void *dev,
- enum amd_ip_block_type block_type,
- enum amd_powergating_state state);
-void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
- u64 *flags);
-int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
- enum amd_ip_block_type block_type);
-bool amdgpu_device_ip_is_hw(struct amdgpu_device *adev,
- enum amd_ip_block_type block_type);
-bool amdgpu_device_ip_is_valid(struct amdgpu_device *adev,
- enum amd_ip_block_type block_type);
-int amdgpu_ip_block_suspend(struct amdgpu_ip_block *ip_block);
-
-int amdgpu_ip_block_resume(struct amdgpu_ip_block *ip_block);
-
-#define AMDGPU_MAX_IP_NUM AMD_IP_BLOCK_TYPE_NUM
-
-struct amdgpu_ip_block_status {
- bool valid;
- bool sw;
- bool hw;
- bool late_initialized;
- bool hang;
-};
-
-struct amdgpu_ip_block_version {
- const enum amd_ip_block_type type;
- const u32 major;
- const u32 minor;
- const u32 rev;
- const struct amd_ip_funcs *funcs;
-};
-
-struct amdgpu_ip_block {
- struct amdgpu_ip_block_status status;
- const struct amdgpu_ip_block_version *version;
- struct amdgpu_device *adev;
-};
-
-int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
- enum amd_ip_block_type type,
- u32 major, u32 minor);
-
-struct amdgpu_ip_block *
-amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
- enum amd_ip_block_type type);
-
-int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
- const struct amdgpu_ip_block_version *ip_block_version);
-
/*
* BIOS.
*/
@@ -757,71 +699,6 @@ struct amdgpu_mmio_remap {
struct amdgpu_bo *bo;
};
-/* Define the HW IP blocks will be used in driver , add more if necessary */
-enum amd_hw_ip_block_type {
- GC_HWIP = 1,
- HDP_HWIP,
- SDMA0_HWIP,
- SDMA1_HWIP,
- SDMA2_HWIP,
- SDMA3_HWIP,
- SDMA4_HWIP,
- SDMA5_HWIP,
- SDMA6_HWIP,
- SDMA7_HWIP,
- LSDMA_HWIP,
- MMHUB_HWIP,
- ATHUB_HWIP,
- NBIO_HWIP,
- MP0_HWIP,
- MP1_HWIP,
- UVD_HWIP,
- VCN_HWIP = UVD_HWIP,
- JPEG_HWIP = VCN_HWIP,
- VCN1_HWIP,
- VCE_HWIP,
- VPE_HWIP,
- DF_HWIP,
- DCE_HWIP,
- OSSSYS_HWIP,
- SMUIO_HWIP,
- PWR_HWIP,
- NBIF_HWIP,
- THM_HWIP,
- CLK_HWIP,
- UMC_HWIP,
- RSMU_HWIP,
- XGMI_HWIP,
- DCI_HWIP,
- PCIE_HWIP,
- ISP_HWIP,
- MAX_HWIP
-};
-
-#define HWIP_MAX_INSTANCE 44
-
-#define HW_ID_MAX 300
-#define IP_VERSION_FULL(mj, mn, rv, var, srev) \
- (((mj) << 24) | ((mn) << 16) | ((rv) << 8) | ((var) << 4) | (srev))
-#define IP_VERSION(mj, mn, rv) IP_VERSION_FULL(mj, mn, rv, 0, 0)
-#define IP_VERSION_MAJ(ver) ((ver) >> 24)
-#define IP_VERSION_MIN(ver) (((ver) >> 16) & 0xFF)
-#define IP_VERSION_REV(ver) (((ver) >> 8) & 0xFF)
-#define IP_VERSION_VARIANT(ver) (((ver) >> 4) & 0xF)
-#define IP_VERSION_SUBREV(ver) ((ver) & 0xF)
-#define IP_VERSION_MAJ_MIN_REV(ver) ((ver) >> 8)
-
-struct amdgpu_ip_map_info {
- /* Map of logical to actual dev instances/mask */
- uint32_t dev_inst[MAX_HWIP][HWIP_MAX_INSTANCE];
- int8_t (*logical_to_dev_inst)(struct amdgpu_device *adev,
- enum amd_hw_ip_block_type block,
- int8_t inst);
- uint32_t (*logical_to_dev_mask)(struct amdgpu_device *adev,
- enum amd_hw_ip_block_type block,
- uint32_t mask);
-};
-
enum amdgpu_uid_type {
AMDGPU_UID_TYPE_XCD,
AMDGPU_UID_TYPE_AID,
@@ -836,6 +713,38 @@ struct amdgpu_uid {
struct amdgpu_device *adev;
};
+#define MAX_UMA_OPTION_NAME 28
+#define MAX_UMA_OPTION_ENTRIES 19
+
+#define AMDGPU_UMA_FLAG_AUTO BIT(1)
+#define AMDGPU_UMA_FLAG_CUSTOM BIT(0)
+
+/**
+ * struct amdgpu_uma_carveout_option - single UMA carveout option
+ * @name: Name of the carveout option
+ * @memory_carved_mb: Amount of memory carved in MB
+ * @flags: ATCS flags supported by this option
+ */
+struct amdgpu_uma_carveout_option {
+ char name[MAX_UMA_OPTION_NAME];
+ uint32_t memory_carved_mb;
+ uint8_t flags;
+};
+
+/**
+ * struct amdgpu_uma_carveout_info - table of available UMA carveout options
+ * @num_entries: Number of available options
+ * @uma_option_index: The index of the option currently applied
+ * @update_lock: Lock to serialize changes to the option
+ * @entries: The array of carveout options
+ */
+struct amdgpu_uma_carveout_info {
+ uint8_t num_entries;
+ uint8_t uma_option_index;
+ struct mutex update_lock;
+ struct amdgpu_uma_carveout_option entries[MAX_UMA_OPTION_ENTRIES];
+};
+
struct amd_powerplay {
void *pp_handle;
const struct amd_pm_funcs *pp_funcs;
@@ -891,6 +800,7 @@ struct amdgpu_mqd_prop {
uint64_t eop_gpu_addr;
uint32_t hqd_pipe_priority;
uint32_t hqd_queue_priority;
+ uint32_t mqd_stride_size;
bool allow_tunneling;
bool hqd_active;
uint64_t shadow_addr;
@@ -1319,6 +1229,8 @@ struct amdgpu_device {
struct work_struct userq_reset_work;
struct amdgpu_uid *uid_info;
+ struct amdgpu_uma_carveout_info uma_info;
+
/* KFD
* Must be last --ends in a flexible-array member.
*/
@@ -1545,8 +1457,13 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))
#define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev))
#define amdgpu_asic_get_pcie_replay_count(adev) ((adev)->asic_funcs->get_pcie_replay_count((adev)))
-#define amdgpu_asic_supports_baco(adev) (adev)->asic_funcs->supports_baco((adev))
-#define amdgpu_asic_pre_asic_init(adev) (adev)->asic_funcs->pre_asic_init((adev))
+#define amdgpu_asic_supports_baco(adev) \
+ ((adev)->asic_funcs->supports_baco ? (adev)->asic_funcs->supports_baco((adev)) : 0)
+#define amdgpu_asic_pre_asic_init(adev) \
+ { \
+ if ((adev)->asic_funcs && (adev)->asic_funcs->pre_asic_init) \
+ (adev)->asic_funcs->pre_asic_init((adev)); \
+ }
#define amdgpu_asic_update_umd_stable_pstate(adev, enter) \
((adev)->asic_funcs->update_umd_stable_pstate ? (adev)->asic_funcs->update_umd_stable_pstate((adev), (enter)) : 0)
#define amdgpu_asic_query_video_codecs(adev, e, c) (adev)->asic_funcs->query_video_codecs((adev), (e), (c))
@@ -1686,12 +1603,14 @@ int amdgpu_acpi_init(struct amdgpu_device *adev);
void amdgpu_acpi_fini(struct amdgpu_device *adev);
bool amdgpu_acpi_is_pcie_performance_request_supported(struct amdgpu_device *adev);
bool amdgpu_acpi_is_power_shift_control_supported(void);
+bool amdgpu_acpi_is_set_uma_allocation_size_supported(void);
int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev,
u8 perf_req, bool advertise);
int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev,
u8 dev_state, bool drv_state);
int amdgpu_acpi_smart_shift_update(struct amdgpu_device *adev,
enum amdgpu_ss ss_state);
+int amdgpu_acpi_set_uma_allocation_size(struct amdgpu_device *adev, u8 index, u8 type);
int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev);
int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev, u64 *tmr_offset,
u64 *tmr_size);
@@ -1720,6 +1639,7 @@ static inline bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) { re
static inline void amdgpu_acpi_detect(void) { }
static inline void amdgpu_acpi_release(void) { }
static inline bool amdgpu_acpi_is_power_shift_control_supported(void) { return false; }
+static inline bool amdgpu_acpi_is_set_uma_allocation_size_supported(void) { return false; }
static inline int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev,
u8 dev_state, bool drv_state) { return 0; }
static inline int amdgpu_acpi_smart_shift_update(struct amdgpu_device *adev,
@@ -1727,6 +1647,10 @@ static inline int amdgpu_acpi_smart_shift_update(struct amdgpu_device *adev,
{
return 0;
}
+static inline int amdgpu_acpi_set_uma_allocation_size(struct amdgpu_device *adev, u8 index, u8 type)
+{
+ return -EINVAL;
+}
static inline void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps) { }
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index d31460a9e958..c126d1bf2bc8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -116,7 +116,9 @@ struct amdgpu_atcs_functions {
bool pcie_perf_req;
bool pcie_dev_rdy;
bool pcie_bus_width;
+ bool get_uma_size;
bool power_shift_control;
+ bool set_uma_allocation_size;
};
struct amdgpu_atcs {
@@ -241,7 +243,8 @@ static void amdgpu_atif_parse_functions(struct amdgpu_atif_functions *f, u32 mas
* (all asics).
* returns 0 on success, error on failure.
*/
-static int amdgpu_atif_verify_interface(struct amdgpu_atif *atif)
+static noinline_for_stack
+int amdgpu_atif_verify_interface(struct amdgpu_atif *atif)
{
union acpi_object *info;
struct atif_verify_interface output;
@@ -286,7 +289,8 @@ out:
* where n is specified in the result if a notifier is used.
* Returns 0 on success, error on failure.
*/
-static int amdgpu_atif_get_notification_params(struct amdgpu_atif *atif)
+static noinline_for_stack
+int amdgpu_atif_get_notification_params(struct amdgpu_atif *atif)
{
union acpi_object *info;
struct amdgpu_atif_notification_cfg *n = &atif->notification_cfg;
@@ -354,7 +358,8 @@ out:
*
* Returns 0 on success, error on failure.
*/
-static int amdgpu_atif_query_backlight_caps(struct amdgpu_atif *atif)
+static noinline_for_stack
+int amdgpu_atif_query_backlight_caps(struct amdgpu_atif *atif)
{
union acpi_object *info;
struct atif_qbtc_output characteristics;
@@ -587,7 +592,9 @@ static void amdgpu_atcs_parse_functions(struct amdgpu_atcs_functions *f, u32 mas
f->pcie_perf_req = mask & ATCS_PCIE_PERFORMANCE_REQUEST_SUPPORTED;
f->pcie_dev_rdy = mask & ATCS_PCIE_DEVICE_READY_NOTIFICATION_SUPPORTED;
f->pcie_bus_width = mask & ATCS_SET_PCIE_BUS_WIDTH_SUPPORTED;
+ f->get_uma_size = mask & ACPI_ATCS_GET_UMA_SIZE_SUPPORTED;
f->power_shift_control = mask & ATCS_SET_POWER_SHIFT_CONTROL_SUPPORTED;
+ f->set_uma_allocation_size = mask & ACPI_ATCS_SET_UMA_ALLOCATION_SIZE_SUPPORTED;
}
/**
@@ -600,7 +607,8 @@ static void amdgpu_atcs_parse_functions(struct amdgpu_atcs_functions *f, u32 mas
* (all asics).
* returns 0 on success, error on failure.
*/
-static int amdgpu_atcs_verify_interface(struct amdgpu_atcs *atcs)
+static noinline_for_stack
+int amdgpu_atcs_verify_interface(struct amdgpu_atcs *atcs)
{
union acpi_object *info;
struct atcs_verify_interface output;
@@ -664,6 +672,11 @@ bool amdgpu_acpi_is_power_shift_control_supported(void)
return amdgpu_acpi_priv.atcs.functions.power_shift_control;
}
+bool amdgpu_acpi_is_set_uma_allocation_size_supported(void)
+{
+ return amdgpu_acpi_priv.atcs.functions.set_uma_allocation_size;
+}
+
/**
* amdgpu_acpi_pcie_notify_device_ready
*
@@ -740,7 +753,8 @@ int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev,
size = *(u16 *) info->buffer.pointer;
if (size < 3) {
- DRM_INFO("ATCS buffer is too small: %zu\n", size);
+ drm_info(adev_to_drm(adev),
+ "ATCS buffer is too small: %zu\n", size);
kfree(info);
return -EINVAL;
}
@@ -799,7 +813,7 @@ int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev,
info = amdgpu_atcs_call(atcs, ATCS_FUNCTION_POWER_SHIFT_CONTROL, &params);
if (!info) {
- DRM_ERROR("ATCS PSC update failed\n");
+ drm_err(adev_to_drm(adev), "ATCS PSC call failed\n");
return -EIO;
}
@@ -905,6 +919,44 @@ static struct amdgpu_numa_info *amdgpu_acpi_get_numa_info(uint32_t pxm)
#endif
/**
+ * amdgpu_acpi_set_uma_allocation_size - Set Unified Memory Architecture allocation size via ACPI
+ * @adev: Pointer to the amdgpu_device structure
+ * @index: Index specifying the UMA allocation
+ * @type: Type of UMA allocation
+ *
+ * This function configures the UMA allocation size for the specified device
+ * using ACPI methods. The allocation is determined by the provided index and type.
+ * Returns 0 on success or a negative error code on failure.
+ */
+int amdgpu_acpi_set_uma_allocation_size(struct amdgpu_device *adev, u8 index, u8 type)
+{
+ struct atcs_set_uma_allocation_size_input atcs_input;
+ struct amdgpu_atcs *atcs = &amdgpu_acpi_priv.atcs;
+ struct acpi_buffer params;
+ union acpi_object *info;
+
+ if (!amdgpu_acpi_is_set_uma_allocation_size_supported())
+ return -EINVAL;
+
+ atcs_input.size = sizeof(struct atcs_set_uma_allocation_size_input);
+ atcs_input.uma_size_index = index;
+ atcs_input.uma_size_type = type;
+
+ params.length = sizeof(struct atcs_set_uma_allocation_size_input);
+ params.pointer = &atcs_input;
+
+ info = amdgpu_atcs_call(atcs, ATCS_FUNCTION_SET_UMA_ALLOCATION_SIZE, &params);
+ if (!info) {
+ drm_err(adev_to_drm(adev), "ATCS UMA allocation size update failed\n");
+ return -EIO;
+ }
+
+ kfree(info);
+
+ return 0;
+}
+
+/**
* amdgpu_acpi_get_node_id - obtain the NUMA node id for corresponding amdgpu
* acpi device handle
*
@@ -1089,7 +1141,8 @@ out:
return ret;
}
-static int amdgpu_acpi_enumerate_xcc(void)
+static noinline_for_stack
+int amdgpu_acpi_enumerate_xcc(void)
{
struct amdgpu_acpi_dev_info *dev_info = NULL;
struct amdgpu_acpi_xcc_info *xcc_info;
@@ -1108,17 +1161,15 @@ static int amdgpu_acpi_enumerate_xcc(void)
* one is not found, no need to check the rest.
*/
if (!acpi_dev) {
- DRM_DEBUG_DRIVER("No matching acpi device found for %s",
+ DRM_DEBUG_DRIVER("No matching acpi device found for %s\n",
hid);
break;
}
xcc_info = kzalloc(sizeof(struct amdgpu_acpi_xcc_info),
GFP_KERNEL);
- if (!xcc_info) {
- DRM_ERROR("Failed to allocate memory for xcc info\n");
+ if (!xcc_info)
return -ENOMEM;
- }
INIT_LIST_HEAD(&xcc_info->list);
xcc_info->handle = acpi_device_handle(acpi_dev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index a2879d2b7c8e..67a01c4f3885 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -683,7 +683,7 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);
if (ret) {
- DRM_ERROR("amdgpu: failed to schedule IB.\n");
+ drm_err(adev_to_drm(adev), "failed to schedule IB.\n");
goto err_ib_sched;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 8bdfcde2029b..da4575676335 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -37,7 +37,7 @@
#include "amdgpu_sync.h"
#include "amdgpu_vm.h"
#include "amdgpu_xcp.h"
-
+#include "kfd_topology.h"
extern uint64_t amdgpu_amdkfd_total_mem_size;
enum TLB_FLUSH_TYPE {
@@ -98,6 +98,7 @@ struct amdgpu_amdkfd_fence {
spinlock_t lock;
char timeline_name[TASK_COMM_LEN];
struct svm_range_bo *svm_bo;
+ uint16_t context_id;
};
struct amdgpu_kfd_dev {
@@ -148,6 +149,8 @@ struct amdkfd_process_info {
/* MMU-notifier related fields */
struct mutex notifier_lock;
uint32_t evicted_bos;
+ /* kfd context id */
+ u16 context_id;
struct delayed_work restore_userptr_work;
struct pid *pid;
bool block_mmu_notifications;
@@ -188,7 +191,8 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
struct mm_struct *mm,
- struct svm_range_bo *svm_bo);
+ struct svm_range_bo *svm_bo,
+ u16 context_id);
int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev);
#if defined(CONFIG_DEBUG_FS)
@@ -407,7 +411,7 @@ int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger);
int kgd2kfd_resume_mm(struct mm_struct *mm);
int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
- struct dma_fence *fence);
+ u16 context_id, struct dma_fence *fence);
#if IS_ENABLED(CONFIG_HSA_AMD)
int kgd2kfd_init(void);
void kgd2kfd_exit(void);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
index 1ef758ac5076..193ed8becab8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
@@ -62,7 +62,8 @@ static atomic_t fence_seq = ATOMIC_INIT(0);
struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
struct mm_struct *mm,
- struct svm_range_bo *svm_bo)
+ struct svm_range_bo *svm_bo,
+ u16 context_id)
{
struct amdgpu_amdkfd_fence *fence;
@@ -76,6 +77,7 @@ struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
get_task_comm(fence->timeline_name, current);
spin_lock_init(&fence->lock);
fence->svm_bo = svm_bo;
+ fence->context_id = context_id;
dma_fence_init(&fence->base, &amdkfd_fence_ops, &fence->lock,
context, atomic_inc_return(&fence_seq));
@@ -126,8 +128,12 @@ static bool amdkfd_fence_enable_signaling(struct dma_fence *f)
if (dma_fence_is_signaled(f))
return true;
+ /* if fence->svm_bo is NULL, means this fence is created through
+ * init_kfd_vm() or amdgpu_amdkfd_gpuvm_restore_process_bos().
+ * Therefore, this fence is amdgpu_amdkfd_fence->eviction_fence.
+ */
if (!fence->svm_bo) {
- if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, f))
+ if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, fence->context_id, f))
return true;
} else {
if (!svm_range_schedule_evict_svm_bo(fence))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12_1.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12_1.c
new file mode 100644
index 000000000000..965c7e688535
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12_1.c
@@ -0,0 +1,387 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "gc/gc_12_1_0_offset.h"
+#include "gc/gc_12_1_0_sh_mask.h"
+#include "soc_v1_0.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
+ uint32_t queue, uint32_t vmid, uint32_t inst)
+{
+ mutex_lock(&adev->srbm_mutex);
+ amdgpu_gfx_select_me_pipe_q(adev, mec, pipe, queue, vmid, inst);
+}
+
+static void unlock_srbm(struct amdgpu_device *adev, uint32_t inst)
+{
+ amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, inst);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t inst)
+{
+ uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ lock_srbm(adev, mec, pipe, queue_id, 0, inst);
+}
+
+static void release_queue(struct amdgpu_device *adev, uint32_t inst)
+{
+ unlock_srbm(adev, inst);
+}
+
+static int init_interrupts_v12_1(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t inst)
+{
+ uint32_t mec;
+ uint32_t pipe;
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ lock_srbm(adev, mec, pipe, 0, 0, inst);
+
+ WREG32_SOC15(GC, GET_INST(GC, inst), regCPC_INT_CNTL,
+ CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
+ CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
+
+ unlock_srbm(adev, inst);
+
+ return 0;
+}
+
+static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
+ unsigned int engine_id,
+ unsigned int queue_id)
+{
+ uint32_t sdma_engine_reg_base = 0;
+ uint32_t sdma_rlc_reg_offset;
+ uint32_t dev_inst = GET_INST(SDMA0, engine_id);
+
+ switch (dev_inst % adev->sdma.num_inst_per_xcc) {
+ case 0:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0,
+ dev_inst / adev->sdma.num_inst_per_xcc,
+ regSDMA0_SDMA_QUEUE0_RB_CNTL) - regSDMA0_SDMA_QUEUE0_RB_CNTL;
+ break;
+ case 1:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1,
+ dev_inst / adev->sdma.num_inst_per_xcc,
+ regSDMA1_SDMA_QUEUE0_RB_CNTL) - regSDMA0_SDMA_QUEUE0_RB_CNTL;
+ break;
+ default:
+ BUG();
+ }
+
+ sdma_rlc_reg_offset = sdma_engine_reg_base
+ + queue_id * (regSDMA0_SDMA_QUEUE1_RB_CNTL - regSDMA0_SDMA_QUEUE0_RB_CNTL);
+
+ pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
+ queue_id, sdma_rlc_reg_offset);
+
+ return sdma_rlc_reg_offset;
+}
+
+static int hqd_dump_v12_1(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
+{
+ uint32_t i = 0, reg;
+#define HQD_N_REGS 56
+#define DUMP_REG(addr) do { \
+ if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
+ break; \
+ (*dump)[i][0] = (addr) << 2; \
+ (*dump)[i++][1] = RREG32(addr); \
+ } while (0)
+
+ *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ acquire_queue(adev, pipe_id, queue_id, inst);
+
+ for (reg = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_MQD_BASE_ADDR);
+ reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_HI); reg++)
+ DUMP_REG(reg);
+
+ release_queue(adev, inst);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static int hqd_sdma_dump_v12_1(struct amdgpu_device *adev,
+ uint32_t engine_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs)
+{
+ uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
+ engine_id, queue_id);
+ uint32_t i = 0, reg;
+
+ const uint32_t first_reg = regSDMA0_SDMA_QUEUE0_RB_CNTL;
+ const uint32_t last_reg = regSDMA0_SDMA_QUEUE0_CONTEXT_STATUS;
+#undef HQD_N_REGS
+#define HQD_N_REGS (last_reg - first_reg + 1)
+
+ *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ for (reg = first_reg;
+ reg <= last_reg; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static int wave_control_execute_v12_1(struct amdgpu_device *adev,
+ uint32_t gfx_index_val,
+ uint32_t sq_cmd, uint32_t inst)
+{
+ uint32_t data = 0;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ WREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regGRBM_GFX_INDEX), gfx_index_val);
+ WREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regSQ_CMD), sq_cmd);
+
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SA_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SE_BROADCAST_WRITES, 1);
+
+ WREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regGRBM_GFX_INDEX), data);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_gfx_v12_1_enable_debug_trap(struct amdgpu_device *adev,
+ bool restore_dbg_registers,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
+
+ return data;
+}
+
+/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_gfx_v12_1_disable_debug_trap(struct amdgpu_device *adev,
+ bool keep_trap_enabled,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
+
+ return data;
+}
+
+static int kgd_gfx_v12_1_validate_trap_override_request(struct amdgpu_device *adev,
+ uint32_t trap_override,
+ uint32_t *trap_mask_supported)
+{
+ *trap_mask_supported &= KFD_DBG_TRAP_MASK_FP_INVALID |
+ KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
+ KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_FP_OVERFLOW |
+ KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
+ KFD_DBG_TRAP_MASK_FP_INEXACT |
+ KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
+ KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION |
+ KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START |
+ KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
+
+
+ if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR &&
+ trap_override != KFD_DBG_TRAP_OVERRIDE_REPLACE)
+ return -EPERM;
+
+ return 0;
+}
+
+static uint32_t trap_mask_map_sw_to_hw(uint32_t mask)
+{
+ uint32_t trap_on_start = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START) ? 1 : 0;
+ uint32_t trap_on_end = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END) ? 1 : 0;
+ uint32_t excp_en = mask & (KFD_DBG_TRAP_MASK_FP_INVALID |
+ KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
+ KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_FP_OVERFLOW |
+ KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
+ KFD_DBG_TRAP_MASK_FP_INEXACT |
+ KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
+ KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION);
+ uint32_t ret;
+
+ ret = REG_SET_FIELD(0, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, excp_en);
+ ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START, trap_on_start);
+ ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END, trap_on_end);
+
+ return ret;
+}
+
+static uint32_t trap_mask_map_hw_to_sw(uint32_t mask)
+{
+ uint32_t ret = REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, EXCP_EN);
+
+ if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START))
+ ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START;
+
+ if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END))
+ ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
+
+ return ret;
+}
+
+/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_gfx_v12_1_set_wave_launch_trap_override(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint32_t trap_override,
+ uint32_t trap_mask_bits,
+ uint32_t trap_mask_request,
+ uint32_t *trap_mask_prev,
+ uint32_t kfd_dbg_trap_cntl_prev)
+
+{
+ uint32_t data = 0;
+
+ *trap_mask_prev = trap_mask_map_hw_to_sw(kfd_dbg_trap_cntl_prev);
+
+ data = (trap_mask_bits & trap_mask_request) | (*trap_mask_prev & ~trap_mask_request);
+ data = trap_mask_map_sw_to_hw(data);
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, trap_override);
+
+ return data;
+}
+
+/* returns STALL_VMID or LAUNCH_MODE. */
+static uint32_t kgd_gfx_v12_1_set_wave_launch_mode(struct amdgpu_device *adev,
+ uint8_t wave_launch_mode,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+ bool is_stall_mode = wave_launch_mode == 4;
+
+ if (is_stall_mode)
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, STALL_VMID,
+ 1);
+ else
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, LAUNCH_MODE,
+ wave_launch_mode);
+
+ return data;
+}
+
+#define TCP_WATCH_STRIDE (regTCP_WATCH1_ADDR_H - regTCP_WATCH0_ADDR_H)
+static uint32_t kgd_gfx_v12_1_set_address_watch(struct amdgpu_device *adev,
+ uint64_t watch_address,
+ uint32_t watch_address_mask,
+ uint32_t watch_id,
+ uint32_t watch_mode,
+ uint32_t debug_vmid,
+ uint32_t inst)
+{
+ uint32_t watch_address_high;
+ uint32_t watch_address_low;
+ uint32_t watch_address_cntl;
+
+ watch_address_cntl = 0;
+ watch_address_low = lower_32_bits(watch_address);
+ watch_address_high = upper_32_bits(watch_address) & 0xffff;
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MODE,
+ watch_mode);
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MASK,
+ watch_address_mask >> 7);
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VALID,
+ 1);
+
+ WREG32_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regTCP_WATCH0_ADDR_H) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_high, inst);
+
+ WREG32_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regTCP_WATCH0_ADDR_L) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_low, inst);
+
+ return watch_address_cntl;
+}
+
+static uint32_t kgd_gfx_v12_1_clear_address_watch(struct amdgpu_device *adev,
+ uint32_t watch_id)
+{
+ return 0;
+}
+
+static uint32_t kgd_gfx_v12_1_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue)
+{
+ return 0;
+}
+
+const struct kfd2kgd_calls gfx_v12_1_kfd2kgd = {
+ .init_interrupts = init_interrupts_v12_1,
+ .hqd_dump = hqd_dump_v12_1,
+ .hqd_sdma_dump = hqd_sdma_dump_v12_1,
+ .wave_control_execute = wave_control_execute_v12_1,
+ .get_atc_vmid_pasid_mapping_info = NULL,
+ .enable_debug_trap = kgd_gfx_v12_1_enable_debug_trap,
+ .disable_debug_trap = kgd_gfx_v12_1_disable_debug_trap,
+ .validate_trap_override_request = kgd_gfx_v12_1_validate_trap_override_request,
+ .set_wave_launch_trap_override = kgd_gfx_v12_1_set_wave_launch_trap_override,
+ .set_wave_launch_mode = kgd_gfx_v12_1_set_wave_launch_mode,
+ .set_address_watch = kgd_gfx_v12_1_set_address_watch,
+ .clear_address_watch = kgd_gfx_v12_1_clear_address_watch,
+ .hqd_sdma_get_doorbell = kgd_gfx_v12_1_hqd_sdma_get_doorbell
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index b1c24c8fa686..88fc430b9425 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1397,8 +1397,10 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
struct dma_fence **ef)
{
struct amdkfd_process_info *info = NULL;
+ struct kfd_process *process = NULL;
int ret;
+ process = container_of(process_info, struct kfd_process, kgd_process_info);
if (!*process_info) {
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
@@ -1414,7 +1416,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
info->eviction_fence =
amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
current->mm,
- NULL);
+ NULL, process->context_id);
if (!info->eviction_fence) {
pr_err("Failed to create eviction fence\n");
ret = -ENOMEM;
@@ -1425,6 +1427,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
INIT_DELAYED_WORK(&info->restore_userptr_work,
amdgpu_amdkfd_restore_userptr_worker);
+ info->context_id = process->context_id;
+
*process_info = info;
}
@@ -1987,7 +1991,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
drm_gem_object_put(&mem->bo->tbo.base);
/*
- * For kgd_mem allocated in amdgpu_amdkfd_gpuvm_import_dmabuf(),
+ * For kgd_mem allocated in import_obj_create() via
+ * amdgpu_amdkfd_gpuvm_import_dmabuf_fd(),
* explicitly free it here.
*/
if (!use_release_notifier)
@@ -3066,7 +3071,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu *
amdgpu_amdkfd_fence_create(
process_info->eviction_fence->base.context,
process_info->eviction_fence->mm,
- NULL);
+ NULL, process_info->context_id);
if (!new_fence) {
pr_err("Failed to create eviction fence\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index 636385c80f64..7f4751e5caaf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -296,6 +296,83 @@ static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev,
return vram_type;
}
+static int amdgpu_atomfirmware_get_uma_carveout_info_v2_3(struct amdgpu_device *adev,
+ union igp_info *igp_info,
+ struct amdgpu_uma_carveout_info *uma_info)
+{
+ struct uma_carveout_option *opts;
+ uint8_t nr_uma_options;
+ int i;
+
+ nr_uma_options = igp_info->v23.UMACarveoutIndexMax;
+
+ if (!nr_uma_options)
+ return -ENODEV;
+
+ if (nr_uma_options > MAX_UMA_OPTION_ENTRIES) {
+ drm_dbg(adev_to_drm(adev),
+ "Number of UMA options exceeds max table size. Options will not be parsed");
+ return -EINVAL;
+ }
+
+ uma_info->num_entries = nr_uma_options;
+ uma_info->uma_option_index = igp_info->v23.UMACarveoutIndex;
+
+ opts = igp_info->v23.UMASizeControlOption;
+
+ for (i = 0; i < nr_uma_options; i++) {
+ if (!opts[i].memoryCarvedGb)
+ uma_info->entries[i].memory_carved_mb = 512;
+ else
+ uma_info->entries[i].memory_carved_mb = (uint32_t)opts[i].memoryCarvedGb << 10;
+
+ uma_info->entries[i].flags = opts[i].uma_carveout_option_flags.all8;
+ strscpy(uma_info->entries[i].name, opts[i].optionName, MAX_UMA_OPTION_NAME);
+ }
+
+ return 0;
+}
+
+int amdgpu_atomfirmware_get_uma_carveout_info(struct amdgpu_device *adev,
+ struct amdgpu_uma_carveout_info *uma_info)
+{
+ struct amdgpu_mode_info *mode_info = &adev->mode_info;
+ union igp_info *igp_info;
+ u16 data_offset, size;
+ u8 frev, crev;
+ int index;
+
+ if (!(adev->flags & AMD_IS_APU))
+ return -ENODEV;
+
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+ integratedsysteminfo);
+
+ if (!amdgpu_atom_parse_data_header(mode_info->atom_context,
+ index, &size,
+ &frev, &crev, &data_offset)) {
+ return -EINVAL;
+ }
+
+ igp_info = (union igp_info *)
+ (mode_info->atom_context->bios + data_offset);
+
+ switch (frev) {
+ case 2:
+ switch (crev) {
+ case 3:
+ return amdgpu_atomfirmware_get_uma_carveout_info_v2_3(adev, igp_info, uma_info);
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ return -ENODEV;
+}
+
int
amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
int *vram_width, int *vram_type,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
index 649b5530d8ae..67c8d105729b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
@@ -32,6 +32,8 @@ void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev);
int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev);
int amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
int *vram_width, int *vram_type, int *vram_vendor);
+int amdgpu_atomfirmware_get_uma_carveout_info(struct amdgpu_device *adev,
+ struct amdgpu_uma_carveout_info *uma_info);
int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev);
int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev);
bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
index 199693369c7c..1cbba9803d31 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -37,9 +37,9 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
stime = ktime_get();
for (i = 0; i < n; i++) {
- struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
- r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence,
- false, false, 0);
+ r = amdgpu_copy_buffer(adev, &adev->mman.default_entity,
+ saddr, daddr, size, NULL, &fence,
+ false, 0);
if (r)
goto exit_do_move;
r = dma_fence_wait(fence, false);
@@ -66,7 +66,7 @@ static void amdgpu_benchmark_log_results(struct amdgpu_device *adev,
throughput = div64_s64(throughput, time_ms);
- dev_info(adev->dev, "amdgpu: %s %u bo moves of %u kB from"
+ dev_info(adev->dev, " %s %u bo moves of %u kB from"
" %d to %d in %lld ms, throughput: %lld Mb/s or %lld MB/s\n",
kind, n, size >> 10, sdomain, ddomain, time_ms,
throughput * 8, throughput);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index 004a6a9d6b9f..09c8942c22d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -349,7 +349,7 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
strscpy(fw_name, "amdgpu/vega20_smc.bin");
break;
default:
- DRM_ERROR("SMC firmware not supported\n");
+ drm_err(adev_to_drm(adev), "SMC firmware not supported\n");
return -EINVAL;
}
@@ -357,7 +357,8 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
AMDGPU_UCODE_REQUIRED,
"%s", fw_name);
if (err) {
- DRM_ERROR("Failed to load firmware \"%s\"", fw_name);
+ drm_err(adev_to_drm(adev),
+ "Failed to load firmware \"%s\"\n", fw_name);
amdgpu_ucode_release(&adev->pm.fw);
return err;
}
@@ -402,7 +403,7 @@ struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev)
kmalloc(sizeof(*cgs_device), GFP_KERNEL);
if (!cgs_device) {
- DRM_ERROR("Couldn't allocate CGS device structure\n");
+ drm_err(adev_to_drm(adev), "Couldn't allocate CGS device structure\n");
return NULL;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index 9f96d568acf2..d3e312bda4ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -877,8 +877,9 @@ amdgpu_connector_vga_detect(struct drm_connector *connector, bool force)
amdgpu_connector_get_edid(connector);
if (!amdgpu_connector->edid) {
- DRM_ERROR("%s: probed a monitor but no|invalid EDID\n",
- connector->name);
+ drm_err(connector->dev,
+ "%s: probed a monitor but no|invalid EDID\n",
+ connector->name);
ret = connector_status_connected;
} else {
amdgpu_connector->use_digital =
@@ -1056,7 +1057,7 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
amdgpu_connector_get_edid(connector);
if (!amdgpu_connector->edid) {
- DRM_ERROR("%s: probed a monitor but no|invalid EDID\n",
+ drm_err(adev_to_drm(adev), "%s: probed a monitor but no|invalid EDID\n",
connector->name);
ret = connector_status_connected;
broken_edid = true; /* defer use_digital to later */
@@ -1667,7 +1668,8 @@ amdgpu_connector_add(struct amdgpu_device *adev,
if (router->ddc_valid || router->cd_valid) {
amdgpu_connector->router_bus = amdgpu_i2c_lookup(adev, &router->i2c_info);
if (!amdgpu_connector->router_bus)
- DRM_ERROR("Failed to assign router i2c bus! Check dmesg for i2c errors.\n");
+ drm_err(adev_to_drm(adev),
+ "Failed to assign router i2c bus! Check dmesg for i2c errors.\n");
}
if (is_dp_bridge) {
@@ -1681,7 +1683,8 @@ amdgpu_connector_add(struct amdgpu_device *adev,
has_aux = true;
ddc = &amdgpu_connector->ddc_bus->adapter;
} else {
- DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+ drm_err(adev_to_drm(adev),
+ "DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
}
}
switch (connector_type) {
@@ -1775,7 +1778,8 @@ amdgpu_connector_add(struct amdgpu_device *adev,
if (i2c_bus->valid) {
amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus);
if (!amdgpu_connector->ddc_bus)
- DRM_ERROR("VGA: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+ drm_err(adev_to_drm(adev),
+ "VGA: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
else
ddc = &amdgpu_connector->ddc_bus->adapter;
}
@@ -1800,7 +1804,8 @@ amdgpu_connector_add(struct amdgpu_device *adev,
if (i2c_bus->valid) {
amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus);
if (!amdgpu_connector->ddc_bus)
- DRM_ERROR("DVIA: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+ drm_err(adev_to_drm(adev),
+ "DVIA: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
else
ddc = &amdgpu_connector->ddc_bus->adapter;
}
@@ -1830,7 +1835,8 @@ amdgpu_connector_add(struct amdgpu_device *adev,
if (i2c_bus->valid) {
amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus);
if (!amdgpu_connector->ddc_bus)
- DRM_ERROR("DVI: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+ drm_err(adev_to_drm(adev),
+ "DVI: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
else
ddc = &amdgpu_connector->ddc_bus->adapter;
}
@@ -1886,7 +1892,8 @@ amdgpu_connector_add(struct amdgpu_device *adev,
if (i2c_bus->valid) {
amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus);
if (!amdgpu_connector->ddc_bus)
- DRM_ERROR("HDMI: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+ drm_err(adev_to_drm(adev),
+ "HDMI: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
else
ddc = &amdgpu_connector->ddc_bus->adapter;
}
@@ -1937,7 +1944,8 @@ amdgpu_connector_add(struct amdgpu_device *adev,
has_aux = true;
ddc = &amdgpu_connector->ddc_bus->adapter;
} else {
- DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+ drm_err(adev_to_drm(adev),
+ "DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
}
}
drm_connector_init_with_ddc(dev, &amdgpu_connector->base,
@@ -1985,7 +1993,8 @@ amdgpu_connector_add(struct amdgpu_device *adev,
has_aux = true;
ddc = &amdgpu_connector->ddc_bus->adapter;
} else {
- DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+ drm_err(adev_to_drm(adev),
+ "eDP: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
}
}
drm_connector_init_with_ddc(dev, &amdgpu_connector->base,
@@ -2008,7 +2017,8 @@ amdgpu_connector_add(struct amdgpu_device *adev,
if (i2c_bus->valid) {
amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus);
if (!amdgpu_connector->ddc_bus)
- DRM_ERROR("LVDS: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+ drm_err(adev_to_drm(adev),
+ "LVDS: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
else
ddc = &amdgpu_connector->ddc_bus->adapter;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c
index 425a3e564360..c72c345334d0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c
@@ -23,6 +23,7 @@
*/
#include <linux/list.h>
#include "amdgpu.h"
+#include "amdgpu_ras_mgr.h"
static const guid_t MCE = CPER_NOTIFY_MCE;
static const guid_t CMC = CPER_NOTIFY_CMC;
@@ -559,7 +560,10 @@ int amdgpu_cper_init(struct amdgpu_device *adev)
{
int r;
- if (!amdgpu_aca_is_enabled(adev) && !amdgpu_sriov_ras_cper_en(adev))
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_ras_cper_en(adev))
+ return 0;
+ else if (!amdgpu_sriov_vf(adev) && !amdgpu_uniras_enabled(adev) &&
+ !amdgpu_aca_is_enabled(adev))
return 0;
r = amdgpu_cper_ring_init(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index ecdfe6cb36cc..d591dce0f3b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1021,6 +1021,7 @@ static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p,
struct amdgpu_job *job)
{
struct amdgpu_ring *ring = amdgpu_job_ring(job);
+ struct amdgpu_device *adev = ring->adev;
unsigned int i;
int r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
index 02138aa55793..11d70fe4f75a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@@ -76,7 +76,8 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
r = drm_exec_lock_obj(&exec, &bo->tbo.base);
drm_exec_retry_on_contention(&exec);
if (unlikely(r)) {
- DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
+ drm_err(adev_to_drm(adev),
+ "failed to reserve CSA,PD BOs: err=%d\n", r);
goto error;
}
}
@@ -92,7 +93,8 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
AMDGPU_PTE_EXECUTABLE);
if (r) {
- DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r);
+ drm_err(adev_to_drm(adev),
+ "failed to do bo_map on static CSA, err=%d\n", r);
amdgpu_vm_bo_del(adev, *bo_va);
goto error;
}
@@ -116,14 +118,16 @@ int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
r = drm_exec_lock_obj(&exec, &bo->tbo.base);
drm_exec_retry_on_contention(&exec);
if (unlikely(r)) {
- DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
+ drm_err(adev_to_drm(adev),
+ "failed to reserve CSA,PD BOs: err=%d\n", r);
goto error;
}
}
r = amdgpu_vm_bo_unmap(adev, bo_va, csa_addr);
if (r) {
- DRM_ERROR("failed to do bo_unmap on static CSA, err=%d\n", r);
+ drm_err(adev_to_drm(adev),
+ "failed to do bo_unmap on static CSA, err=%d\n", r);
goto error;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index afedea02188d..5c344665b43c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -438,18 +438,21 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
struct drm_sched_entity *ctx_entity;
if (hw_ip >= AMDGPU_HW_IP_NUM) {
- DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
+ drm_err(adev_to_drm(ctx->mgr->adev),
+ "unknown HW IP type: %d\n", hw_ip);
return -EINVAL;
}
/* Right now all IPs have only one instance - multiple rings. */
if (instance != 0) {
- DRM_DEBUG("invalid ip instance: %d\n", instance);
+ drm_dbg(adev_to_drm(ctx->mgr->adev),
+ "invalid ip instance: %d\n", instance);
return -EINVAL;
}
if (ring >= amdgpu_ctx_num_entities[hw_ip]) {
- DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring);
+ drm_dbg(adev_to_drm(ctx->mgr->adev),
+ "invalid ring: %d %d\n", hw_ip, ring);
return -EINVAL;
}
@@ -874,7 +877,8 @@ int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
r = dma_fence_wait(other, true);
if (r < 0 && r != -ERESTARTSYS)
- DRM_ERROR("Error (%ld) waiting for fence!\n", r);
+ drm_err(adev_to_drm(ctx->mgr->adev),
+ "AMDGPU: Error waiting for fence in ctx %p\n", ctx);
dma_fence_put(other);
return r;
@@ -929,7 +933,7 @@ static void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
idr_for_each_entry(idp, ctx, id) {
if (kref_read(&ctx->refcount) != 1) {
- DRM_ERROR("ctx %p is still alive\n", ctx);
+ drm_err(adev_to_drm(mgr->adev), "ctx %p is still alive\n", ctx);
continue;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 62d43b8cbe58..aeb90708f229 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1921,7 +1921,7 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
/* preempt the IB */
r = amdgpu_ring_preempt_ib(ring);
if (r) {
- DRM_WARN("failed to preempt ring %d\n", ring->idx);
+ drm_warn(adev_to_drm(adev), "failed to preempt ring %d\n", ring->idx);
goto failure;
}
@@ -1929,7 +1929,7 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
if (atomic_read(&ring->fence_drv.last_seq) !=
ring->fence_drv.sync_seq) {
- DRM_INFO("ring %d was preempted\n", ring->idx);
+ drm_info(adev_to_drm(adev), "ring %d was preempted\n", ring->idx);
amdgpu_ib_preempt_mark_partial_job(ring);
@@ -2016,14 +2016,16 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
ent = debugfs_create_file("amdgpu_preempt_ib", 0600, root, adev,
&fops_ib_preempt);
if (IS_ERR(ent)) {
- DRM_ERROR("unable to create amdgpu_preempt_ib debugsfs file\n");
+ drm_err(adev_to_drm(adev),
+ "unable to create amdgpu_preempt_ib debugsfs file\n");
return PTR_ERR(ent);
}
ent = debugfs_create_file("amdgpu_force_sclk", 0200, root, adev,
&fops_sclk_set);
if (IS_ERR(ent)) {
- DRM_ERROR("unable to create amdgpu_set_sclk debugsfs file\n");
+ drm_err(adev_to_drm(adev),
+ "unable to create amdgpu_set_sclk debugsfs file\n");
return PTR_ERR(ent);
}
@@ -2036,7 +2038,7 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
r = amdgpu_debugfs_regs_init(adev);
if (r)
- DRM_ERROR("registering register debugfs failed (%d).\n", r);
+ drm_err(adev_to_drm(adev), "registering register debugfs failed (%d).\n", r);
amdgpu_debugfs_firmware_init(adev);
amdgpu_ta_if_debugfs_init(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
index 4e2fe6674db8..ca71c2948227 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
@@ -333,11 +333,8 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
struct drm_sched_job *s_job;
coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT);
-
- if (!coredump) {
- DRM_ERROR("%s: failed to allocate memory for coredump\n", __func__);
+ if (!coredump)
return;
- }
coredump->skip_vram_check = skip_vram_check;
coredump->reset_vram_lost = vram_lost;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 12201b8e99b3..347996f6ffaa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -36,6 +36,7 @@
#include <linux/pci.h>
#include <linux/pci-p2pdma.h>
#include <linux/apple-gmux.h>
+#include <linux/nospec.h>
#include <drm/drm_atomic_helper.h>
#include <drm/drm_client_event.h>
@@ -313,42 +314,6 @@ void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
}
-int amdgpu_ip_block_suspend(struct amdgpu_ip_block *ip_block)
-{
- int r;
-
- if (ip_block->version->funcs->suspend) {
- r = ip_block->version->funcs->suspend(ip_block);
- if (r) {
- dev_err(ip_block->adev->dev,
- "suspend of IP block <%s> failed %d\n",
- ip_block->version->funcs->name, r);
- return r;
- }
- }
-
- ip_block->status.hw = false;
- return 0;
-}
-
-int amdgpu_ip_block_resume(struct amdgpu_ip_block *ip_block)
-{
- int r;
-
- if (ip_block->version->funcs->resume) {
- r = ip_block->version->funcs->resume(ip_block);
- if (r) {
- dev_err(ip_block->adev->dev,
- "resume of IP block <%s> failed %d\n",
- ip_block->version->funcs->name, r);
- return r;
- }
- }
-
- ip_block->status.hw = true;
- return 0;
-}
-
/**
* DOC: board_info
*
@@ -417,6 +382,175 @@ static const struct attribute_group amdgpu_board_attrs_group = {
.is_visible = amdgpu_board_attrs_is_visible
};
+/**
+ * DOC: uma/carveout_options
+ *
+ * This is a read-only file that lists all available UMA allocation
+ * options and their corresponding indices. Example output::
+ *
+ * $ cat uma/carveout_options
+ * 0: Minimum (512 MB)
+ * 1: (1 GB)
+ * 2: (2 GB)
+ * 3: (4 GB)
+ * 4: (6 GB)
+ * 5: (8 GB)
+ * 6: (12 GB)
+ * 7: Medium (16 GB)
+ * 8: (24 GB)
+ * 9: High (32 GB)
+ */
+static ssize_t carveout_options_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ struct amdgpu_uma_carveout_info *uma_info = &adev->uma_info;
+ uint32_t memory_carved;
+ ssize_t size = 0;
+
+ if (!uma_info || !uma_info->num_entries)
+ return -ENODEV;
+
+ for (int i = 0; i < uma_info->num_entries; i++) {
+ memory_carved = uma_info->entries[i].memory_carved_mb;
+ if (memory_carved >= SZ_1G/SZ_1M) {
+ size += sysfs_emit_at(buf, size, "%d: %s (%u GB)\n",
+ i,
+ uma_info->entries[i].name,
+ memory_carved >> 10);
+ } else {
+ size += sysfs_emit_at(buf, size, "%d: %s (%u MB)\n",
+ i,
+ uma_info->entries[i].name,
+ memory_carved);
+ }
+ }
+
+ return size;
+}
+static DEVICE_ATTR_RO(carveout_options);
+
+/**
+ * DOC: uma/carveout
+ *
+ * This file is both readable and writable. When read, it shows the
+ * index of the current setting. Writing a valid index to this file
+ * allows users to change the UMA carveout size to the selected option
+ * on the next boot.
+ *
+ * The available options and their corresponding indices can be read
+ * from the uma/carveout_options file.
+ */
+static ssize_t carveout_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ return sysfs_emit(buf, "%u\n", adev->uma_info.uma_option_index);
+}
+
+static ssize_t carveout_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ struct amdgpu_uma_carveout_info *uma_info = &adev->uma_info;
+ struct amdgpu_uma_carveout_option *opt;
+ unsigned long val;
+ uint8_t flags;
+ int r;
+
+ r = kstrtoul(buf, 10, &val);
+ if (r)
+ return r;
+
+ if (val >= uma_info->num_entries)
+ return -EINVAL;
+
+ val = array_index_nospec(val, uma_info->num_entries);
+ opt = &uma_info->entries[val];
+
+ if (!(opt->flags & AMDGPU_UMA_FLAG_AUTO) &&
+ !(opt->flags & AMDGPU_UMA_FLAG_CUSTOM)) {
+ drm_err_once(ddev, "Option %lu not supported due to lack of Custom/Auto flag", val);
+ return -EINVAL;
+ }
+
+ flags = opt->flags;
+ flags &= ~((flags & AMDGPU_UMA_FLAG_AUTO) >> 1);
+
+ guard(mutex)(&uma_info->update_lock);
+
+ r = amdgpu_acpi_set_uma_allocation_size(adev, val, flags);
+ if (r)
+ return r;
+
+ uma_info->uma_option_index = val;
+
+ return count;
+}
+static DEVICE_ATTR_RW(carveout);
+
+static struct attribute *amdgpu_uma_attrs[] = {
+ &dev_attr_carveout.attr,
+ &dev_attr_carveout_options.attr,
+ NULL
+};
+
+const struct attribute_group amdgpu_uma_attr_group = {
+ .name = "uma",
+ .attrs = amdgpu_uma_attrs
+};
+
+static void amdgpu_uma_sysfs_init(struct amdgpu_device *adev)
+{
+ int rc;
+
+ if (!(adev->flags & AMD_IS_APU))
+ return;
+
+ if (!amdgpu_acpi_is_set_uma_allocation_size_supported())
+ return;
+
+ rc = amdgpu_atomfirmware_get_uma_carveout_info(adev, &adev->uma_info);
+ if (rc) {
+ drm_dbg(adev_to_drm(adev),
+ "Failed to parse UMA carveout info from VBIOS: %d\n", rc);
+ goto out_info;
+ }
+
+ mutex_init(&adev->uma_info.update_lock);
+
+ rc = devm_device_add_group(adev->dev, &amdgpu_uma_attr_group);
+ if (rc) {
+ drm_dbg(adev_to_drm(adev), "Failed to add UMA carveout sysfs interfaces %d\n", rc);
+ goto out_attr;
+ }
+
+ return;
+
+out_attr:
+ mutex_destroy(&adev->uma_info.update_lock);
+out_info:
+ return;
+}
+
+static void amdgpu_uma_sysfs_fini(struct amdgpu_device *adev)
+{
+ struct amdgpu_uma_carveout_info *uma_info = &adev->uma_info;
+
+ if (!amdgpu_acpi_is_set_uma_allocation_size_supported())
+ return;
+
+ mutex_destroy(&uma_info->update_lock);
+ uma_info->num_entries = 0;
+}
+
static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
/**
@@ -2264,293 +2398,6 @@ static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
};
/**
- * amdgpu_device_ip_set_clockgating_state - set the CG state
- *
- * @dev: amdgpu_device pointer
- * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
- * @state: clockgating state (gate or ungate)
- *
- * Sets the requested clockgating state for all instances of
- * the hardware IP specified.
- * Returns the error code from the last instance.
- */
-int amdgpu_device_ip_set_clockgating_state(void *dev,
- enum amd_ip_block_type block_type,
- enum amd_clockgating_state state)
-{
- struct amdgpu_device *adev = dev;
- int i, r = 0;
-
- for (i = 0; i < adev->num_ip_blocks; i++) {
- if (!adev->ip_blocks[i].status.valid)
- continue;
- if (adev->ip_blocks[i].version->type != block_type)
- continue;
- if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
- continue;
- r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
- &adev->ip_blocks[i], state);
- if (r)
- dev_err(adev->dev,
- "set_clockgating_state of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
- }
- return r;
-}
-
-/**
- * amdgpu_device_ip_set_powergating_state - set the PG state
- *
- * @dev: amdgpu_device pointer
- * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
- * @state: powergating state (gate or ungate)
- *
- * Sets the requested powergating state for all instances of
- * the hardware IP specified.
- * Returns the error code from the last instance.
- */
-int amdgpu_device_ip_set_powergating_state(void *dev,
- enum amd_ip_block_type block_type,
- enum amd_powergating_state state)
-{
- struct amdgpu_device *adev = dev;
- int i, r = 0;
-
- for (i = 0; i < adev->num_ip_blocks; i++) {
- if (!adev->ip_blocks[i].status.valid)
- continue;
- if (adev->ip_blocks[i].version->type != block_type)
- continue;
- if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
- continue;
- r = adev->ip_blocks[i].version->funcs->set_powergating_state(
- &adev->ip_blocks[i], state);
- if (r)
- dev_err(adev->dev,
- "set_powergating_state of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
- }
- return r;
-}
-
-/**
- * amdgpu_device_ip_get_clockgating_state - get the CG state
- *
- * @adev: amdgpu_device pointer
- * @flags: clockgating feature flags
- *
- * Walks the list of IPs on the device and updates the clockgating
- * flags for each IP.
- * Updates @flags with the feature flags for each hardware IP where
- * clockgating is enabled.
- */
-void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
- u64 *flags)
-{
- int i;
-
- for (i = 0; i < adev->num_ip_blocks; i++) {
- if (!adev->ip_blocks[i].status.valid)
- continue;
- if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
- adev->ip_blocks[i].version->funcs->get_clockgating_state(
- &adev->ip_blocks[i], flags);
- }
-}
-
-/**
- * amdgpu_device_ip_wait_for_idle - wait for idle
- *
- * @adev: amdgpu_device pointer
- * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
- *
- * Waits for the request hardware IP to be idle.
- * Returns 0 for success or a negative error code on failure.
- */
-int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
- enum amd_ip_block_type block_type)
-{
- int i, r;
-
- for (i = 0; i < adev->num_ip_blocks; i++) {
- if (!adev->ip_blocks[i].status.valid)
- continue;
- if (adev->ip_blocks[i].version->type == block_type) {
- if (adev->ip_blocks[i].version->funcs->wait_for_idle) {
- r = adev->ip_blocks[i].version->funcs->wait_for_idle(
- &adev->ip_blocks[i]);
- if (r)
- return r;
- }
- break;
- }
- }
- return 0;
-
-}
-
-/**
- * amdgpu_device_ip_is_hw - is the hardware IP enabled
- *
- * @adev: amdgpu_device pointer
- * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
- *
- * Check if the hardware IP is enable or not.
- * Returns true if it the IP is enable, false if not.
- */
-bool amdgpu_device_ip_is_hw(struct amdgpu_device *adev,
- enum amd_ip_block_type block_type)
-{
- int i;
-
- for (i = 0; i < adev->num_ip_blocks; i++) {
- if (adev->ip_blocks[i].version->type == block_type)
- return adev->ip_blocks[i].status.hw;
- }
- return false;
-}
-
-/**
- * amdgpu_device_ip_is_valid - is the hardware IP valid
- *
- * @adev: amdgpu_device pointer
- * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
- *
- * Check if the hardware IP is valid or not.
- * Returns true if it the IP is valid, false if not.
- */
-bool amdgpu_device_ip_is_valid(struct amdgpu_device *adev,
- enum amd_ip_block_type block_type)
-{
- int i;
-
- for (i = 0; i < adev->num_ip_blocks; i++) {
- if (adev->ip_blocks[i].version->type == block_type)
- return adev->ip_blocks[i].status.valid;
- }
- return false;
-
-}
-
-/**
- * amdgpu_device_ip_get_ip_block - get a hw IP pointer
- *
- * @adev: amdgpu_device pointer
- * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
- *
- * Returns a pointer to the hardware IP block structure
- * if it exists for the asic, otherwise NULL.
- */
-struct amdgpu_ip_block *
-amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
- enum amd_ip_block_type type)
-{
- int i;
-
- for (i = 0; i < adev->num_ip_blocks; i++)
- if (adev->ip_blocks[i].version->type == type)
- return &adev->ip_blocks[i];
-
- return NULL;
-}
-
-/**
- * amdgpu_device_ip_block_version_cmp
- *
- * @adev: amdgpu_device pointer
- * @type: enum amd_ip_block_type
- * @major: major version
- * @minor: minor version
- *
- * return 0 if equal or greater
- * return 1 if smaller or the ip_block doesn't exist
- */
-int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
- enum amd_ip_block_type type,
- u32 major, u32 minor)
-{
- struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
-
- if (ip_block && ((ip_block->version->major > major) ||
- ((ip_block->version->major == major) &&
- (ip_block->version->minor >= minor))))
- return 0;
-
- return 1;
-}
-
-static const char *ip_block_names[] = {
- [AMD_IP_BLOCK_TYPE_COMMON] = "common",
- [AMD_IP_BLOCK_TYPE_GMC] = "gmc",
- [AMD_IP_BLOCK_TYPE_IH] = "ih",
- [AMD_IP_BLOCK_TYPE_SMC] = "smu",
- [AMD_IP_BLOCK_TYPE_PSP] = "psp",
- [AMD_IP_BLOCK_TYPE_DCE] = "dce",
- [AMD_IP_BLOCK_TYPE_GFX] = "gfx",
- [AMD_IP_BLOCK_TYPE_SDMA] = "sdma",
- [AMD_IP_BLOCK_TYPE_UVD] = "uvd",
- [AMD_IP_BLOCK_TYPE_VCE] = "vce",
- [AMD_IP_BLOCK_TYPE_ACP] = "acp",
- [AMD_IP_BLOCK_TYPE_VCN] = "vcn",
- [AMD_IP_BLOCK_TYPE_MES] = "mes",
- [AMD_IP_BLOCK_TYPE_JPEG] = "jpeg",
- [AMD_IP_BLOCK_TYPE_VPE] = "vpe",
- [AMD_IP_BLOCK_TYPE_UMSCH_MM] = "umsch_mm",
- [AMD_IP_BLOCK_TYPE_ISP] = "isp",
- [AMD_IP_BLOCK_TYPE_RAS] = "ras",
-};
-
-static const char *ip_block_name(struct amdgpu_device *adev, enum amd_ip_block_type type)
-{
- int idx = (int)type;
-
- return idx < ARRAY_SIZE(ip_block_names) ? ip_block_names[idx] : "unknown";
-}
-
-/**
- * amdgpu_device_ip_block_add
- *
- * @adev: amdgpu_device pointer
- * @ip_block_version: pointer to the IP to add
- *
- * Adds the IP block driver information to the collection of IPs
- * on the asic.
- */
-int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
- const struct amdgpu_ip_block_version *ip_block_version)
-{
- if (!ip_block_version)
- return -EINVAL;
-
- switch (ip_block_version->type) {
- case AMD_IP_BLOCK_TYPE_VCN:
- if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
- return 0;
- break;
- case AMD_IP_BLOCK_TYPE_JPEG:
- if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
- return 0;
- break;
- default:
- break;
- }
-
- dev_info(adev->dev, "detected ip block number %d <%s_v%d_%d_%d> (%s)\n",
- adev->num_ip_blocks,
- ip_block_name(adev, ip_block_version->type),
- ip_block_version->major,
- ip_block_version->minor,
- ip_block_version->rev,
- ip_block_version->funcs->name);
-
- adev->ip_blocks[adev->num_ip_blocks].adev = adev;
-
- adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
-
- return 0;
-}
-
-/**
* amdgpu_device_enable_virtual_display - enable virtual display feature
*
* @adev: amdgpu_device pointer
@@ -3309,19 +3156,18 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
if (r)
goto init_failed;
- if (adev->mman.buffer_funcs_ring->sched.ready)
+ if (adev->mman.buffer_funcs_ring &&
+ adev->mman.buffer_funcs_ring->sched.ready)
amdgpu_ttm_set_buffer_funcs_status(adev, true);
/* Don't init kfd if whole hive need to be reset during init */
if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
- kgd2kfd_init_zone_device(adev);
amdgpu_amdkfd_device_init(adev);
}
amdgpu_fru_get_product_info(adev);
- if (!amdgpu_sriov_vf(adev) || amdgpu_sriov_ras_cper_en(adev))
- r = amdgpu_cper_init(adev);
+ r = amdgpu_cper_init(adev);
init_failed:
@@ -4491,6 +4337,7 @@ static int amdgpu_device_sys_interface_init(struct amdgpu_device *adev)
amdgpu_fru_sysfs_init(adev);
amdgpu_reg_state_sysfs_init(adev);
amdgpu_xcp_sysfs_init(adev);
+ amdgpu_uma_sysfs_init(adev);
return r;
}
@@ -4506,6 +4353,7 @@ static void amdgpu_device_sys_interface_fini(struct amdgpu_device *adev)
amdgpu_reg_state_sysfs_fini(adev);
amdgpu_xcp_sysfs_fini(adev);
+ amdgpu_uma_sysfs_fini(adev);
}
/**
@@ -4931,8 +4779,15 @@ fence_driver_init:
flush_delayed_work(&adev->delayed_init_work);
}
+ /* Don't init kfd if whole hive need to be reset during init */
+ if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
+ kgd2kfd_init_zone_device(adev);
+ kfd_update_svm_support_properties(adev);
+ }
+
if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
amdgpu_xgmi_reset_on_init(adev);
+
/*
* Place those sysfs registering after `late_init`. As some of those
* operations performed in `late_init` might affect the sysfs
@@ -5030,7 +4885,7 @@ static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
*/
void amdgpu_device_fini_hw(struct amdgpu_device *adev)
{
- dev_info(adev->dev, "amdgpu: finishing device.\n");
+ dev_info(adev->dev, "finishing device.\n");
flush_delayed_work(&adev->delayed_init_work);
if (adev->mman.initialized)
@@ -5064,6 +4919,14 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
amdgpu_ttm_set_buffer_funcs_status(adev, false);
+ /*
+ * device went through surprise hotplug; we need to destroy topology
+ * before ip_fini_early to prevent kfd locking refcount issues by calling
+ * amdgpu_amdkfd_suspend()
+ */
+ if (drm_dev_is_unplugged(adev_to_drm(adev)))
+ amdgpu_amdkfd_device_fini_sw(adev);
+
amdgpu_device_ip_fini_early(adev);
amdgpu_irq_fini_hw(adev);
@@ -5867,6 +5730,9 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
if (ret)
goto mode1_reset_failed;
+ /* enable mmio access after mode 1 reset completed */
+ adev->no_hw_access = false;
+
amdgpu_device_load_pci_state(adev->pdev);
ret = amdgpu_psp_wait_for_bootloader(adev);
if (ret)
@@ -6541,7 +6407,7 @@ static int amdgpu_device_sched_resume(struct list_head *device_list,
!amdgpu_ras_eeprom_check_err_threshold(tmp_adev))
dev_info(
tmp_adev->dev,
- "GPU reset(%d) failed with error %d \n",
+ "GPU reset(%d) failed with error %d\n",
atomic_read(
&tmp_adev->gpu_reset_counter),
tmp_adev->asic_reset_res);
@@ -6683,7 +6549,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
*
* job->base holds a reference to parent fence
*/
- if (job && dma_fence_is_signaled(&job->hw_fence->base)) {
+ if (job && (dma_fence_get_status(&job->hw_fence->base) > 0)) {
job_signaled = true;
dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
goto skip_hw_reset;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index fa2a22dfa048..8070a6da794f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -71,6 +71,7 @@
#include "nv.h"
#include "soc21.h"
#include "soc24.h"
+#include "soc_v1_0.h"
#include "navi10_ih.h"
#include "ih_v6_0.h"
#include "ih_v6_1.h"
@@ -78,10 +79,12 @@
#include "gfx_v10_0.h"
#include "gfx_v11_0.h"
#include "gfx_v12_0.h"
+#include "gfx_v12_1.h"
#include "sdma_v5_0.h"
#include "sdma_v5_2.h"
#include "sdma_v6_0.h"
#include "sdma_v7_0.h"
+#include "sdma_v7_1.h"
#include "lsdma_v6_0.h"
#include "lsdma_v7_0.h"
#include "vcn_v2_0.h"
@@ -97,16 +100,21 @@
#include "amdgpu_vkms.h"
#include "mes_v11_0.h"
#include "mes_v12_0.h"
+#include "mes_v12_1.h"
#include "smuio_v11_0.h"
#include "smuio_v11_0_6.h"
#include "smuio_v13_0.h"
#include "smuio_v13_0_3.h"
#include "smuio_v13_0_6.h"
#include "smuio_v14_0_2.h"
+#include "smuio_v15_0_0.h"
+#include "smuio_v15_0_8.h"
#include "vcn_v5_0_0.h"
#include "vcn_v5_0_1.h"
#include "jpeg_v5_0_0.h"
#include "jpeg_v5_0_1.h"
+#include "jpeg_v5_3_0.h"
+
#include "amdgpu_ras_mgr.h"
#include "amdgpu_vpe.h"
@@ -209,6 +217,8 @@ static const char *hw_id_names[HW_ID_MAX] = {
[XGBE_HWID] = "XGBE",
[MP0_HWID] = "MP0",
[VPE_HWID] = "VPE",
+ [ATU_HWID] = "ATU",
+ [AIGC_HWID] = "AIGC",
};
static int hw_id_map[MAX_HWIP] = {
@@ -240,6 +250,7 @@ static int hw_id_map[MAX_HWIP] = {
[PCIE_HWIP] = PCIE_HWID,
[VPE_HWIP] = VPE_HWID,
[ISP_HWIP] = ISP_HWID,
+ [ATU_HWIP] = ATU_HWID,
};
static int amdgpu_discovery_read_binary_from_sysmem(struct amdgpu_device *adev, uint8_t *binary)
@@ -1980,12 +1991,16 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 5, 1):
case IP_VERSION(11, 5, 2):
case IP_VERSION(11, 5, 3):
+ case IP_VERSION(11, 5, 4):
amdgpu_device_ip_block_add(adev, &soc21_common_ip_block);
break;
case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1):
amdgpu_device_ip_block_add(adev, &soc24_common_ip_block);
break;
+ case IP_VERSION(12, 1, 0):
+ amdgpu_device_ip_block_add(adev, &soc_v1_0_common_ip_block);
+ break;
default:
dev_err(adev->dev,
"Failed to add common ip block(GC_HWIP:0x%x)\n",
@@ -2036,10 +2051,12 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 5, 1):
case IP_VERSION(11, 5, 2):
case IP_VERSION(11, 5, 3):
+ case IP_VERSION(11, 5, 4):
amdgpu_device_ip_block_add(adev, &gmc_v11_0_ip_block);
break;
case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1):
+ case IP_VERSION(12, 1, 0):
amdgpu_device_ip_block_add(adev, &gmc_v12_0_ip_block);
break;
default:
@@ -2081,9 +2098,11 @@ static int amdgpu_discovery_set_ih_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &ih_v6_0_ip_block);
break;
case IP_VERSION(6, 1, 0):
+ case IP_VERSION(6, 1, 1):
amdgpu_device_ip_block_add(adev, &ih_v6_1_ip_block);
break;
case IP_VERSION(7, 0, 0):
+ case IP_VERSION(7, 1, 0):
amdgpu_device_ip_block_add(adev, &ih_v7_0_ip_block);
break;
default:
@@ -2150,6 +2169,12 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(14, 0, 5):
amdgpu_device_ip_block_add(adev, &psp_v14_0_ip_block);
break;
+ case IP_VERSION(15, 0, 0):
+ amdgpu_device_ip_block_add(adev, &psp_v15_0_ip_block);
+ break;
+ case IP_VERSION(15, 0, 8):
+ amdgpu_device_ip_block_add(adev, &psp_v15_0_8_ip_block);
+ break;
default:
dev_err(adev->dev,
"Failed to add psp ip block(MP0_HWIP:0x%x)\n",
@@ -2213,6 +2238,9 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(14, 0, 5):
amdgpu_device_ip_block_add(adev, &smu_v14_0_ip_block);
break;
+ case IP_VERSION(15, 0, 0):
+ amdgpu_device_ip_block_add(adev, &smu_v15_0_ip_block);
+ break;
default:
dev_err(adev->dev,
"Failed to add smu ip block(MP1_HWIP:0x%x)\n",
@@ -2342,12 +2370,16 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 5, 1):
case IP_VERSION(11, 5, 2):
case IP_VERSION(11, 5, 3):
+ case IP_VERSION(11, 5, 4):
amdgpu_device_ip_block_add(adev, &gfx_v11_0_ip_block);
break;
case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1):
amdgpu_device_ip_block_add(adev, &gfx_v12_0_ip_block);
break;
+ case IP_VERSION(12, 1, 0):
+ amdgpu_device_ip_block_add(adev, &gfx_v12_1_ip_block);
+ break;
default:
dev_err(adev->dev, "Failed to add gfx ip block(GC_HWIP:0x%x)\n",
amdgpu_ip_version(adev, GC_HWIP, 0));
@@ -2398,12 +2430,16 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(6, 1, 1):
case IP_VERSION(6, 1, 2):
case IP_VERSION(6, 1, 3):
+ case IP_VERSION(6, 1, 4):
amdgpu_device_ip_block_add(adev, &sdma_v6_0_ip_block);
break;
case IP_VERSION(7, 0, 0):
case IP_VERSION(7, 0, 1):
amdgpu_device_ip_block_add(adev, &sdma_v7_0_ip_block);
break;
+ case IP_VERSION(7, 1, 0):
+ amdgpu_device_ip_block_add(adev, &sdma_v7_1_ip_block);
+ break;
default:
dev_err(adev->dev,
"Failed to add sdma ip block(SDMA0_HWIP:0x%x)\n",
@@ -2511,6 +2547,10 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &vcn_v5_0_0_ip_block);
amdgpu_device_ip_block_add(adev, &jpeg_v5_0_0_ip_block);
break;
+ case IP_VERSION(5, 3, 0):
+ amdgpu_device_ip_block_add(adev, &vcn_v5_0_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &jpeg_v5_3_0_ip_block);
+ break;
case IP_VERSION(5, 0, 1):
amdgpu_device_ip_block_add(adev, &vcn_v5_0_1_ip_block);
amdgpu_device_ip_block_add(adev, &jpeg_v5_0_1_ip_block);
@@ -2537,6 +2577,7 @@ static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 5, 1):
case IP_VERSION(11, 5, 2):
case IP_VERSION(11, 5, 3):
+ case IP_VERSION(11, 5, 4):
amdgpu_device_ip_block_add(adev, &mes_v11_0_ip_block);
adev->enable_mes = true;
adev->enable_mes_kiq = true;
@@ -2549,6 +2590,13 @@ static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev)
if (amdgpu_uni_mes)
adev->enable_uni_mes = true;
break;
+ case IP_VERSION(12, 1, 0):
+ amdgpu_device_ip_block_add(adev, &mes_v12_1_ip_block);
+ adev->enable_mes = true;
+ adev->enable_mes_kiq = true;
+ if (amdgpu_uni_mes)
+ adev->enable_uni_mes = true;
+ break;
default:
break;
}
@@ -2563,6 +2611,9 @@ static void amdgpu_discovery_init_soc_config(struct amdgpu_device *adev)
case IP_VERSION(9, 5, 0):
aqua_vanjaram_init_soc_config(adev);
break;
+ case IP_VERSION(12, 1, 0):
+ soc_v1_0_init_soc_config(adev);
+ break;
default:
break;
}
@@ -2929,10 +2980,12 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 5, 1):
case IP_VERSION(11, 5, 2):
case IP_VERSION(11, 5, 3):
+ case IP_VERSION(11, 5, 4):
adev->family = AMDGPU_FAMILY_GC_11_5_0;
break;
case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1):
+ case IP_VERSION(12, 1, 0):
adev->family = AMDGPU_FAMILY_GC_12_0_0;
break;
default:
@@ -2955,6 +3008,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 5, 1):
case IP_VERSION(11, 5, 2):
case IP_VERSION(11, 5, 3):
+ case IP_VERSION(11, 5, 4):
adev->flags |= AMD_IS_APU;
break;
default:
@@ -3025,6 +3079,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
adev->nbio.hdp_flush_reg = &nbio_v7_7_hdp_flush_reg;
break;
case IP_VERSION(6, 3, 1):
+ case IP_VERSION(7, 11, 4):
adev->nbio.funcs = &nbif_v6_3_1_funcs;
adev->nbio.hdp_flush_reg = &nbif_v6_3_1_hdp_flush_reg;
break;
@@ -3059,6 +3114,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(6, 0, 0):
case IP_VERSION(6, 0, 1):
case IP_VERSION(6, 1, 0):
+ case IP_VERSION(6, 1, 1):
adev->hdp.funcs = &hdp_v6_0_funcs;
break;
case IP_VERSION(7, 0, 0):
@@ -3140,6 +3196,12 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(14, 0, 2):
adev->smuio.funcs = &smuio_v14_0_2_funcs;
break;
+ case IP_VERSION(15, 0, 0):
+ adev->smuio.funcs = &smuio_v15_0_0_funcs;
+ break;
+ case IP_VERSION(15, 0, 8):
+ adev->smuio.funcs = &smuio_v15_0_8_funcs;
+ break;
default:
break;
}
@@ -3210,8 +3272,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
return r;
if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT &&
- !amdgpu_sriov_vf(adev)) ||
- (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) {
+ !amdgpu_sriov_vf(adev) &&
+ amdgpu_dpm == 1) ||
+ (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO &&
+ amdgpu_dpm == 1)) {
r = amdgpu_discovery_set_smu_ip_blocks(adev);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index b5d34797d606..48b6f6077992 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -415,15 +415,15 @@ void amdgpu_display_print_display_setup(struct drm_device *dev)
int i = 0;
drm_connector_list_iter_begin(dev, &iter);
- DRM_INFO("AMDGPU Display Connectors\n");
+ drm_info(dev, "AMDGPU Display Connectors\n");
drm_for_each_connector_iter(connector, &iter) {
amdgpu_connector = to_amdgpu_connector(connector);
- DRM_INFO("Connector %d:\n", i);
- DRM_INFO(" %s\n", connector->name);
+ drm_info(dev, "Connector %d:\n", i);
+ drm_info(dev, " %s\n", connector->name);
if (amdgpu_connector->hpd.hpd != AMDGPU_HPD_NONE)
- DRM_INFO(" %s\n", hpd_names[amdgpu_connector->hpd.hpd]);
+ drm_info(dev, " %s\n", hpd_names[amdgpu_connector->hpd.hpd]);
if (amdgpu_connector->ddc_bus) {
- DRM_INFO(" DDC: 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
+ drm_info(dev, " DDC: 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
amdgpu_connector->ddc_bus->rec.mask_clk_reg,
amdgpu_connector->ddc_bus->rec.mask_data_reg,
amdgpu_connector->ddc_bus->rec.a_clk_reg,
@@ -433,11 +433,11 @@ void amdgpu_display_print_display_setup(struct drm_device *dev)
amdgpu_connector->ddc_bus->rec.y_clk_reg,
amdgpu_connector->ddc_bus->rec.y_data_reg);
if (amdgpu_connector->router.ddc_valid)
- DRM_INFO(" DDC Router 0x%x/0x%x\n",
+ drm_info(dev, " DDC Router 0x%x/0x%x\n",
amdgpu_connector->router.ddc_mux_control_pin,
amdgpu_connector->router.ddc_mux_state);
if (amdgpu_connector->router.cd_valid)
- DRM_INFO(" Clock/Data Router 0x%x/0x%x\n",
+ drm_info(dev, " Clock/Data Router 0x%x/0x%x\n",
amdgpu_connector->router.cd_mux_control_pin,
amdgpu_connector->router.cd_mux_state);
} else {
@@ -447,35 +447,35 @@ void amdgpu_display_print_display_setup(struct drm_device *dev)
connector->connector_type == DRM_MODE_CONNECTOR_DVIA ||
connector->connector_type == DRM_MODE_CONNECTOR_HDMIA ||
connector->connector_type == DRM_MODE_CONNECTOR_HDMIB)
- DRM_INFO(" DDC: no ddc bus - possible BIOS bug - please report to xorg-driver-ati@lists.x.org\n");
+ drm_info(dev, " DDC: no ddc bus - possible BIOS bug - please report to xorg-driver-ati@lists.x.org\n");
}
- DRM_INFO(" Encoders:\n");
+ drm_info(dev, " Encoders:\n");
list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
amdgpu_encoder = to_amdgpu_encoder(encoder);
devices = amdgpu_encoder->devices & amdgpu_connector->devices;
if (devices) {
if (devices & ATOM_DEVICE_CRT1_SUPPORT)
- DRM_INFO(" CRT1: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
+ drm_info(dev, " CRT1: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
if (devices & ATOM_DEVICE_CRT2_SUPPORT)
- DRM_INFO(" CRT2: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
+ drm_info(dev, " CRT2: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
if (devices & ATOM_DEVICE_LCD1_SUPPORT)
- DRM_INFO(" LCD1: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
+ drm_info(dev, " LCD1: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
if (devices & ATOM_DEVICE_DFP1_SUPPORT)
- DRM_INFO(" DFP1: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
+ drm_info(dev, " DFP1: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
if (devices & ATOM_DEVICE_DFP2_SUPPORT)
- DRM_INFO(" DFP2: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
+ drm_info(dev, " DFP2: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
if (devices & ATOM_DEVICE_DFP3_SUPPORT)
- DRM_INFO(" DFP3: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
+ drm_info(dev, " DFP3: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
if (devices & ATOM_DEVICE_DFP4_SUPPORT)
- DRM_INFO(" DFP4: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
+ drm_info(dev, " DFP4: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
if (devices & ATOM_DEVICE_DFP5_SUPPORT)
- DRM_INFO(" DFP5: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
+ drm_info(dev, " DFP5: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
if (devices & ATOM_DEVICE_DFP6_SUPPORT)
- DRM_INFO(" DFP6: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
+ drm_info(dev, " DFP6: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
if (devices & ATOM_DEVICE_TV1_SUPPORT)
- DRM_INFO(" TV1: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
+ drm_info(dev, " TV1: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
if (devices & ATOM_DEVICE_CV_SUPPORT)
- DRM_INFO(" CV: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
+ drm_info(dev, " CV: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
}
}
i++;
@@ -1880,7 +1880,12 @@ int amdgpu_display_get_scanout_buffer(struct drm_plane *plane,
struct drm_scanout_buffer *sb)
{
struct amdgpu_bo *abo;
- struct drm_framebuffer *fb = plane->state->fb;
+ struct drm_framebuffer *fb;
+
+ if (drm_drv_uses_atomic_modeset(plane->dev))
+ fb = plane->state->fb;
+ else
+ fb = plane->fb;
if (!fb)
return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index e22cfa7c6d32..b9c38a4fe546 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -37,6 +37,7 @@
#include "amdgpu_dma_buf.h"
#include "amdgpu_xgmi.h"
#include "amdgpu_vm.h"
+#include "amdgpu_ttm.h"
#include <drm/amdgpu_drm.h>
#include <drm/ttm/ttm_tt.h>
#include <linux/dma-buf.h>
@@ -95,18 +96,6 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC)
attach->peer2peer = false;
- /*
- * Disable peer-to-peer access for DCC-enabled VRAM surfaces on GFX12+.
- * Such buffers cannot be safely accessed over P2P due to device-local
- * compression metadata. Fallback to system-memory path instead.
- * Device supports GFX12 (GC 12.x or newer)
- * BO was created with the AMDGPU_GEM_CREATE_GFX12_DCC flag
- *
- */
- if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0) &&
- bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC)
- attach->peer2peer = false;
-
if (!amdgpu_dmabuf_is_xgmi_accessible(attach_adev, bo) &&
pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0)
attach->peer2peer = false;
@@ -241,6 +230,14 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach,
if (r)
return ERR_PTR(r);
break;
+
+ case AMDGPU_PL_MMIO_REMAP:
+ r = amdgpu_ttm_mmio_remap_alloc_sgt(adev, bo->tbo.resource,
+ attach->dev, dir, &sgt);
+ if (r)
+ return ERR_PTR(r);
+ break;
+
default:
return ERR_PTR(-EINVAL);
}
@@ -266,6 +263,15 @@ static void amdgpu_dma_buf_unmap(struct dma_buf_attachment *attach,
struct sg_table *sgt,
enum dma_data_direction dir)
{
+ struct drm_gem_object *obj = attach->dmabuf->priv;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+
+ if (bo->tbo.resource &&
+ bo->tbo.resource->mem_type == AMDGPU_PL_MMIO_REMAP) {
+ amdgpu_ttm_mmio_remap_free_sgt(attach->dev, dir, sgt);
+ return;
+ }
+
if (sg_page(sgt->sgl)) {
dma_unmap_sgtable(attach->dev, sgt, dir, 0);
sg_free_table(sgt);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
index 2675689ef70f..ebd14c292978 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
@@ -348,6 +348,36 @@ enum AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1 {
AMDGPU_DOORBELL_LAYOUT1_INVALID = 0xFFFF
};
+enum AMDGPU_SOC_V1_0_DOORBELL_ASSIGNMENT {
+ /* KIQ/HIQ/DIQ */
+ AMDGPU_SOC_V1_0_DOORBELL_KIQ_START = 0x000,
+ AMDGPU_SOC_V1_0_DOORBELL_HIQ = 0x001,
+ AMDGPU_SOC_V1_0_DOORBELL_DIQ = 0x002,
+ /* Compute: 0x03 ~ 0x20 */
+ AMDGPU_SOC_V1_0_DOORBELL_MEC_RING_START = 0x003,
+ AMDGPU_SOC_V1_0_DOORBELL_MEC_RING_END = 0x00A,
+ AMDGPU_SOC_V1_0_DOORBELL_MES_RING0 = 0x00B,
+ AMDGPU_SOC_V1_0_DOORBELL_MES_RING1 = 0x00C,
+ AMDGPU_SOC_V1_0_DOORBELL_USERQUEUE_START = 0x00D,
+ AMDGPU_SOC_V1_0_DOORBELL_USERQUEUE_END = 0x01F,
+ AMDGPU_SOC_V1_0_DOORBELL_XCC_RANGE = 0x020,
+
+ /* SDMA: 0x100 ~ 0x19F */
+ AMDGPU_SOC_V1_0_DOORBELL_sDMA_ENGINE_START = 0x100,
+ AMDGPU_SOC_V1_0_DOORBELL_sDMA_ENGINE_END = 0x19F,
+ /* IH: 0x1A0 ~ 0x1AF */
+ AMDGPU_SOC_V1_0_DOORBELL_IH = 0x1A0,
+ /* VCN: 0x1B0 ~ 0x1EF */
+ AMDGPU_SOC_V1_0_DOORBELL_VCN_START = 0x1B0,
+ AMDGPU_SOC_V1_0_DOORBELL_VCN_END = 0x1EF,
+
+ AMDGPU_SOC_V1_0_DOORBELL_FIRST_NON_CP = AMDGPU_SOC_V1_0_DOORBELL_sDMA_ENGINE_START,
+ AMDGPU_SOC_V1_0_DOORBELL_LAST_NON_CP = AMDGPU_SOC_V1_0_DOORBELL_VCN_END,
+
+ AMDGPU_SOC_V1_0_DOORBELL_MAX_ASSIGNMENT = 0x1EF,
+ AMDGPU_SOC_V1_0_DOORBELL_INVALID = 0xFFFF
+};
+
u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index);
void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c
index 3040437d99c2..bc7858567321 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c
@@ -129,7 +129,7 @@ uint32_t amdgpu_doorbell_index_on_bar(struct amdgpu_device *adev,
{
int db_bo_offset;
- db_bo_offset = amdgpu_bo_gpu_offset_no_check(db_bo);
+ db_bo_offset = amdgpu_bo_gpu_offset(db_bo);
/* doorbell index is 32 bit but doorbell's size can be 32 bit
* or 64 bit, so *db_size(in byte)/4 for alignment.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 848e6b7db482..bb8d9256fae0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2402,7 +2402,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
supports_atomic = true;
if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) {
- DRM_INFO("This hardware requires experimental hardware support.\n"
+ dev_info(&pdev->dev, "This hardware requires experimental hardware support.\n"
"See modparam exp_hw_support\n");
return -ENODEV;
}
@@ -2449,7 +2449,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
retry_init:
ret = drm_dev_register(ddev, flags);
if (ret == -EAGAIN && ++retry <= 3) {
- DRM_INFO("retry init %d\n", retry);
+ drm_info(adev_to_drm(adev), "retry init %d\n", retry);
/* Don't request EX mode too frequently which is attacking */
msleep(5000);
goto retry_init;
@@ -3164,7 +3164,6 @@ static int __init amdgpu_init(void)
if (r)
goto error_fence;
- DRM_INFO("amdgpu kernel modesetting enabled.\n");
amdgpu_register_atpx_handler();
amdgpu_acpi_detect();
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index c7843e336310..06c333b2213b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -89,6 +89,16 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
return seq;
}
+static void amdgpu_fence_save_fence_wptr_start(struct amdgpu_fence *af)
+{
+ af->fence_wptr_start = af->ring->wptr;
+}
+
+static void amdgpu_fence_save_fence_wptr_end(struct amdgpu_fence *af)
+{
+ af->fence_wptr_end = af->ring->wptr;
+}
+
/**
* amdgpu_fence_emit - emit a fence on the requested ring
*
@@ -116,8 +126,10 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af,
&ring->fence_drv.lock,
adev->fence_context + ring->idx, seq);
+ amdgpu_fence_save_fence_wptr_start(af);
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
seq, flags | AMDGPU_FENCE_FLAG_INT);
+ amdgpu_fence_save_fence_wptr_end(af);
amdgpu_fence_save_wptr(af);
pm_runtime_get_noresume(adev_to_drm(adev)->dev);
ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
@@ -709,6 +721,7 @@ void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af)
struct amdgpu_ring *ring = af->ring;
unsigned long flags;
u32 seq, last_seq;
+ bool reemitted = false;
last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask;
seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask;
@@ -726,7 +739,9 @@ void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af)
if (unprocessed && !dma_fence_is_signaled_locked(unprocessed)) {
fence = container_of(unprocessed, struct amdgpu_fence, base);
- if (fence == af)
+ if (fence->reemitted > 1)
+ reemitted = true;
+ else if (fence == af)
dma_fence_set_error(&fence->base, -ETIME);
else if (fence->context == af->context)
dma_fence_set_error(&fence->base, -ECANCELED);
@@ -734,9 +749,12 @@ void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af)
rcu_read_unlock();
} while (last_seq != seq);
spin_unlock_irqrestore(&ring->fence_drv.lock, flags);
- /* signal the guilty fence */
- amdgpu_fence_write(ring, (u32)af->base.seqno);
- amdgpu_fence_process(ring);
+
+ if (reemitted) {
+ /* if we've already reemitted once then just cancel everything */
+ amdgpu_fence_driver_force_completion(af->ring);
+ af->ring->ring_backup_entries_to_copy = 0;
+ }
}
void amdgpu_fence_save_wptr(struct amdgpu_fence *af)
@@ -784,10 +802,18 @@ void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring,
/* save everything if the ring is not guilty, otherwise
* just save the content from other contexts.
*/
- if (!guilty_fence || (fence->context != guilty_fence->context))
+ if (!fence->reemitted &&
+ (!guilty_fence || (fence->context != guilty_fence->context))) {
amdgpu_ring_backup_unprocessed_command(ring, wptr,
fence->wptr);
+ } else if (!fence->reemitted) {
+ /* always save the fence */
+ amdgpu_ring_backup_unprocessed_command(ring,
+ fence->fence_wptr_start,
+ fence->fence_wptr_end);
+ }
wptr = fence->wptr;
+ fence->reemitted++;
}
rcu_read_unlock();
} while (last_seq != seq);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c
index 328a1b963548..988ec7d7b487 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c
@@ -62,17 +62,17 @@ static ssize_t amdgpu_fw_attestation_debugfs_read(struct file *f,
struct FW_ATT_RECORD fw_att_record = {0};
if (size < sizeof(struct FW_ATT_RECORD)) {
- DRM_WARN("FW attestation input buffer not enough memory");
+ drm_warn(adev_to_drm(adev), "FW attestation input buffer not enough memory");
return -EINVAL;
}
if ((*pos + sizeof(struct FW_ATT_DB_HEADER)) >= FW_ATTESTATION_MAX_SIZE) {
- DRM_WARN("FW attestation out of bounds");
+ drm_warn(adev_to_drm(adev), "FW attestation out of bounds");
return 0;
}
if (psp_get_fw_attestation_records_addr(&adev->psp, &records_addr)) {
- DRM_WARN("Failed to get FW attestation record address");
+ drm_warn(adev_to_drm(adev), "Failed to get FW attestation record address");
return -EINVAL;
}
@@ -86,11 +86,12 @@ static ssize_t amdgpu_fw_attestation_debugfs_read(struct file *f,
false);
if (fw_att_hdr.AttDbCookie != FW_ATTESTATION_DB_COOKIE) {
- DRM_WARN("Invalid FW attestation cookie");
+ drm_warn(adev_to_drm(adev), "Invalid FW attestation cookie");
return -EINVAL;
}
- DRM_INFO("FW attestation version = 0x%X", fw_att_hdr.AttDbVersion);
+ drm_info(adev_to_drm(adev), "FW attestation version = 0x%X",
+ fw_att_hdr.AttDbVersion);
}
amdgpu_device_vram_access(adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index d2237ce9da70..16c3b78e50cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -476,7 +476,7 @@ int amdgpu_gart_init(struct amdgpu_device *adev)
/* Compute table size */
adev->gart.num_cpu_pages = adev->gmc.gart_size / PAGE_SIZE;
adev->gart.num_gpu_pages = adev->gmc.gart_size / AMDGPU_GPU_PAGE_SIZE;
- DRM_INFO("GART: num cpu pages %u, num gpu pages %u\n",
+ drm_info(adev_to_drm(adev), "GART: num cpu pages %u, num gpu pages %u\n",
adev->gart.num_cpu_pages, adev->gart.num_gpu_pages);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 3e38c5db2987..032971d0a3cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -112,47 +112,6 @@ amdgpu_gem_update_timeline_node(struct drm_file *filp,
return 0;
}
-static void
-amdgpu_gem_update_bo_mapping(struct drm_file *filp,
- struct amdgpu_bo_va *bo_va,
- uint32_t operation,
- uint64_t point,
- struct dma_fence *fence,
- struct drm_syncobj *syncobj,
- struct dma_fence_chain *chain)
-{
- struct amdgpu_bo *bo = bo_va ? bo_va->base.bo : NULL;
- struct amdgpu_fpriv *fpriv = filp->driver_priv;
- struct amdgpu_vm *vm = &fpriv->vm;
- struct dma_fence *last_update;
-
- if (!syncobj)
- return;
-
- /* Find the last update fence */
- switch (operation) {
- case AMDGPU_VA_OP_MAP:
- case AMDGPU_VA_OP_REPLACE:
- if (bo && (bo->tbo.base.resv == vm->root.bo->tbo.base.resv))
- last_update = vm->last_update;
- else
- last_update = bo_va->last_pt_update;
- break;
- case AMDGPU_VA_OP_UNMAP:
- case AMDGPU_VA_OP_CLEAR:
- last_update = fence;
- break;
- default:
- return;
- }
-
- /* Add fence to timeline */
- if (!point)
- drm_syncobj_replace_fence(syncobj, last_update);
- else
- drm_syncobj_add_point(syncobj, chain, last_update, point);
-}
-
static vm_fault_t amdgpu_gem_fault(struct vm_fault *vmf)
{
struct ttm_buffer_object *bo = vmf->vma->vm_private_data;
@@ -378,7 +337,7 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj,
goto out_unlock;
r = amdgpu_vm_clear_freed(adev, vm, &fence);
- if (unlikely(r < 0))
+ if (unlikely(r < 0) && !drm_dev_is_unplugged(adev_to_drm(adev)))
dev_err(adev->dev, "failed to clear page "
"tables on GEM object close (%ld)\n", r);
if (r || !fence)
@@ -388,7 +347,7 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj,
dma_fence_put(fence);
out_unlock:
- if (r)
+ if (r && !drm_dev_is_unplugged(adev_to_drm(adev)))
dev_err(adev->dev, "leaking bo va (%ld)\n", r);
drm_exec_fini(&exec);
}
@@ -719,6 +678,15 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
if (unlikely(r != 0))
goto out;
+ /* Reject MMIO_REMAP BOs at IOCTL level: metadata/tiling does not apply. */
+ if (robj->tbo.resource &&
+ robj->tbo.resource->mem_type == AMDGPU_PL_MMIO_REMAP) {
+ DRM_WARN("metadata ioctl on MMIO_REMAP BO (handle %d)\n",
+ args->handle);
+ r = -EINVAL;
+ goto unreserve;
+ }
+
if (args->op == AMDGPU_GEM_METADATA_OP_GET_METADATA) {
amdgpu_bo_get_tiling_flags(robj, &args->data.tiling_info);
r = amdgpu_bo_get_metadata(robj, args->data.data,
@@ -764,16 +732,19 @@ amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
uint32_t operation)
{
- struct dma_fence *fence = dma_fence_get_stub();
+ struct dma_fence *clear_fence = dma_fence_get_stub();
+ struct dma_fence *last_update = NULL;
int r;
if (!amdgpu_vm_ready(vm))
- return fence;
+ return clear_fence;
- r = amdgpu_vm_clear_freed(adev, vm, &fence);
+ /* First clear freed BOs and get a fence for that work, if any. */
+ r = amdgpu_vm_clear_freed(adev, vm, &clear_fence);
if (r)
goto error;
+ /* For MAP/REPLACE we also need to update the BO mappings. */
if (operation == AMDGPU_VA_OP_MAP ||
operation == AMDGPU_VA_OP_REPLACE) {
r = amdgpu_vm_bo_update(adev, bo_va, false);
@@ -781,13 +752,59 @@ amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
goto error;
}
+ /* Always update PDEs after we touched the mappings. */
r = amdgpu_vm_update_pdes(adev, vm, false);
+ if (r)
+ goto error;
+
+ /*
+ * Decide which fence represents the "last update" for this VM/BO:
+ *
+ * - For MAP/REPLACE we want the PT update fence, which is tracked as
+ * either vm->last_update (for always-valid BOs) or bo_va->last_pt_update
+ * (for per-BO updates).
+ *
+ * - For UNMAP/CLEAR we rely on the fence returned by
+ * amdgpu_vm_clear_freed(), which already covers the page table work
+ * for the removed mappings.
+ */
+ switch (operation) {
+ case AMDGPU_VA_OP_MAP:
+ case AMDGPU_VA_OP_REPLACE:
+ if (bo_va && bo_va->base.bo) {
+ if (amdgpu_vm_is_bo_always_valid(vm, bo_va->base.bo)) {
+ if (vm->last_update)
+ last_update = dma_fence_get(vm->last_update);
+ } else {
+ if (bo_va->last_pt_update)
+ last_update = dma_fence_get(bo_va->last_pt_update);
+ }
+ }
+ break;
+ case AMDGPU_VA_OP_UNMAP:
+ case AMDGPU_VA_OP_CLEAR:
+ if (clear_fence)
+ last_update = dma_fence_get(clear_fence);
+ break;
+ default:
+ break;
+ }
error:
if (r && r != -ERESTARTSYS)
DRM_ERROR("Couldn't update BO_VA (%d)\n", r);
- return fence;
+ /*
+ * If we managed to pick a more specific last-update fence, prefer it
+ * over the generic clear_fence and drop the extra reference to the
+ * latter.
+ */
+ if (last_update) {
+ dma_fence_put(clear_fence);
+ return last_update;
+ }
+
+ return clear_fence;
}
int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
@@ -813,6 +830,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
uint64_t vm_size;
int r = 0;
+ /* Validate virtual address range against reserved regions. */
if (args->va_address < AMDGPU_VA_RESERVED_BOTTOM) {
dev_dbg(dev->dev,
"va_address 0x%llx is in reserved area 0x%llx\n",
@@ -846,6 +864,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
}
+ /* Validate operation type. */
switch (args->operation) {
case AMDGPU_VA_OP_MAP:
case AMDGPU_VA_OP_UNMAP:
@@ -869,6 +888,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
abo = NULL;
}
+ /* Add input syncobj fences (if any) for synchronization. */
r = amdgpu_gem_add_input_fence(filp,
args->input_fence_syncobj_handles,
args->num_syncobj_handles);
@@ -891,6 +911,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
goto error;
}
+ /* Resolve the BO-VA mapping for this VM/BO combination. */
if (abo) {
bo_va = amdgpu_vm_bo_find(&fpriv->vm, abo);
if (!bo_va) {
@@ -903,6 +924,11 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
bo_va = NULL;
}
+ /*
+ * Prepare the timeline syncobj node if the user requested a VM
+ * timeline update. This only allocates/looks up the syncobj and
+ * chain node; the actual fence is attached later.
+ */
r = amdgpu_gem_update_timeline_node(filp,
args->vm_timeline_syncobj_out,
args->vm_timeline_point,
@@ -934,18 +960,30 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
default:
break;
}
+
+ /*
+ * Once the VA operation is done, update the VM and obtain the fence
+ * that represents the last relevant update for this mapping. This
+ * fence can then be exported to the user-visible VM timeline.
+ */
if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm) {
fence = amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
args->operation);
- if (timeline_syncobj)
- amdgpu_gem_update_bo_mapping(filp, bo_va,
- args->operation,
- args->vm_timeline_point,
- fence, timeline_syncobj,
- timeline_chain);
- else
- dma_fence_put(fence);
+ if (timeline_syncobj && fence) {
+ if (!args->vm_timeline_point) {
+ /* Replace the existing fence when no point is given. */
+ drm_syncobj_replace_fence(timeline_syncobj,
+ fence);
+ } else {
+ /* Attach the last-update fence at a specific point. */
+ drm_syncobj_add_point(timeline_syncobj,
+ timeline_chain,
+ fence,
+ args->vm_timeline_point);
+ }
+ }
+ dma_fence_put(fence);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 8b118c53f351..cf7a07855dae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -100,6 +100,7 @@ bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev,
/**
* amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter
*
+ * @adev: amdgpu device pointer
* @mask: array in which the per-shader array disable masks will be stored
* @max_se: number of SEs
* @max_sh: number of SHs
@@ -107,7 +108,8 @@ bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev,
* The bitmask of CUs to be disabled in the shader array determined by se and
* sh is stored in mask[se * max_sh + sh].
*/
-void amdgpu_gfx_parse_disable_cu(unsigned int *mask, unsigned int max_se, unsigned int max_sh)
+void amdgpu_gfx_parse_disable_cu(struct amdgpu_device *adev, unsigned int *mask,
+ unsigned int max_se, unsigned int max_sh)
{
unsigned int se, sh, cu;
const char *p;
@@ -123,16 +125,16 @@ void amdgpu_gfx_parse_disable_cu(unsigned int *mask, unsigned int max_se, unsign
int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu);
if (ret < 3) {
- DRM_ERROR("amdgpu: could not parse disable_cu\n");
+ drm_err(adev_to_drm(adev), "could not parse disable_cu\n");
return;
}
if (se < max_se && sh < max_sh && cu < 16) {
- DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu);
+ drm_info(adev_to_drm(adev), "Disabling CU %u.%u.%u\n", se, sh, cu);
mask[se * max_sh + sh] |= 1u << cu;
} else {
- DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n",
- se, sh, cu);
+ drm_err(adev_to_drm(adev), "disable_cu %u.%u.%u is out of range\n",
+ se, sh, cu);
}
next = strchr(p, ',');
@@ -150,7 +152,7 @@ static bool amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device *adev)
static bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev)
{
if (amdgpu_compute_multipipe != -1) {
- dev_info(adev->dev, "amdgpu: forcing compute pipe policy %d\n",
+ dev_info(adev->dev, " forcing compute pipe policy %d\n",
amdgpu_compute_multipipe);
return amdgpu_compute_multipipe == 1;
}
@@ -511,7 +513,7 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
j = i + xcc_id * adev->gfx.num_compute_rings;
amdgpu_mes_unmap_legacy_queue(adev,
&adev->gfx.compute_ring[j],
- RESET_QUEUES, 0, 0);
+ RESET_QUEUES, 0, 0, xcc_id);
}
return 0;
}
@@ -562,7 +564,7 @@ int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id)
j = i + xcc_id * adev->gfx.num_gfx_rings;
amdgpu_mes_unmap_legacy_queue(adev,
&adev->gfx.gfx_ring[j],
- PREEMPT_QUEUES, 0, 0);
+ PREEMPT_QUEUES, 0, 0, xcc_id);
}
}
return 0;
@@ -644,7 +646,8 @@ static int amdgpu_gfx_mes_enable_kcq(struct amdgpu_device *adev, int xcc_id)
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
j = i + xcc_id * adev->gfx.num_compute_rings;
r = amdgpu_mes_map_legacy_queue(adev,
- &adev->gfx.compute_ring[j]);
+ &adev->gfx.compute_ring[j],
+ xcc_id);
if (r) {
dev_err(adev->dev, "failed to map compute queue\n");
return r;
@@ -733,7 +736,8 @@ int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id)
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
j = i + xcc_id * adev->gfx.num_gfx_rings;
r = amdgpu_mes_map_legacy_queue(adev,
- &adev->gfx.gfx_ring[j]);
+ &adev->gfx.gfx_ring[j],
+ xcc_id);
if (r) {
dev_err(adev->dev, "failed to map gfx queue\n");
return r;
@@ -1067,7 +1071,7 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_
return 0;
if (adev->mes.ring[0].sched.ready)
- return amdgpu_mes_rreg(adev, reg);
+ return amdgpu_mes_rreg(adev, reg, xcc_id);
BUG_ON(!ring->funcs->emit_rreg);
@@ -1143,7 +1147,7 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint3
return;
if (adev->mes.ring[0].sched.ready) {
- amdgpu_mes_wreg(adev, reg, v);
+ amdgpu_mes_wreg(adev, reg, v, xcc_id);
return;
}
@@ -1195,6 +1199,40 @@ failed_kiq_write:
dev_err(adev->dev, "failed to write reg:%x\n", reg);
}
+void amdgpu_gfx_get_hdp_flush_mask(struct amdgpu_ring *ring,
+ uint32_t *hdp_flush_mask, uint32_t *reg_mem_engine)
+{
+
+ if (!ring || !hdp_flush_mask || !reg_mem_engine) {
+ DRM_INFO("%s:invalid params\n", __func__);
+ return;
+ }
+
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = ring->adev->nbio.hdp_flush_reg;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_GFX:
+ *hdp_flush_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe;
+ *reg_mem_engine = 1; /* pfp */
+ break;
+ case AMDGPU_RING_TYPE_COMPUTE:
+ *hdp_flush_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
+ *reg_mem_engine = 0;
+ break;
+ case AMDGPU_RING_TYPE_MES:
+ *hdp_flush_mask = nbio_hf_reg->ref_and_mask_cp8;
+ *reg_mem_engine = 0;
+ break;
+ case AMDGPU_RING_TYPE_KIQ:
+ *hdp_flush_mask = nbio_hf_reg->ref_and_mask_cp9;
+ *reg_mem_engine = 0;
+ break;
+ default:
+ DRM_ERROR("%s:unsupported ring type %d\n", __func__, ring->funcs->type);
+ return;
+ }
+}
+
int amdgpu_kiq_hdp_flush(struct amdgpu_device *adev)
{
signed long r, cnt = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index efd61a1ccc66..585cc8e81bb2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -328,6 +328,8 @@ struct amdgpu_gfx_shadow_info {
u32 shadow_alignment;
u32 csa_size;
u32 csa_alignment;
+ u32 eop_size;
+ u32 eop_alignment;
};
struct amdgpu_gfx_funcs {
@@ -356,6 +358,8 @@ struct amdgpu_gfx_funcs {
int num_xccs_per_xcp);
int (*ih_node_to_logical_xcc)(struct amdgpu_device *adev, int ih_node);
int (*get_xccs_per_xcp)(struct amdgpu_device *adev);
+ void (*get_hdp_flush_mask)(struct amdgpu_ring *ring,
+ uint32_t *ref_and_mask, uint32_t *reg_mem_engine);
};
struct sq_work {
@@ -565,8 +569,8 @@ static inline u32 amdgpu_gfx_create_bitmask(u32 bit_width)
return (u32)((1ULL << bit_width) - 1);
}
-void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se,
- unsigned max_sh);
+void amdgpu_gfx_parse_disable_cu(struct amdgpu_device *adev, unsigned int *mask,
+ unsigned int max_se, unsigned int max_sh);
int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, int xcc_id);
@@ -615,6 +619,8 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry);
uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id);
void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id);
+void amdgpu_gfx_get_hdp_flush_mask(struct amdgpu_ring *ring,
+ uint32_t *ref_and_mask, uint32_t *reg_mem_engine);
int amdgpu_kiq_hdp_flush(struct amdgpu_device *adev);
int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev);
void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, uint32_t ucode_id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 869bceb0fe2c..6d7b8bb953ae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -524,6 +524,54 @@ void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
} while (fault->timestamp < tmp);
}
+int amdgpu_gmc_handle_retry_fault(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry,
+ u64 addr,
+ u32 cam_index,
+ u32 node_id,
+ bool write_fault)
+{
+ int ret;
+
+ if (adev->irq.retry_cam_enabled) {
+ /* Delegate it to a different ring if the hardware hasn't
+ * already done it.
+ */
+ if (entry->ih == &adev->irq.ih) {
+ amdgpu_irq_delegate(adev, entry, 8);
+ return 1;
+ }
+
+ ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id,
+ addr, entry->timestamp, write_fault);
+ WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index);
+ if (ret)
+ return 1;
+ } else {
+ /* Process it only if it's the first fault for this address */
+ if (entry->ih != &adev->irq.ih_soft &&
+ amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
+ entry->timestamp))
+ return 1;
+
+ /* Delegate it to a different ring if the hardware hasn't
+ * already done it.
+ */
+ if (entry->ih == &adev->irq.ih) {
+ amdgpu_irq_delegate(adev, entry, 8);
+ return 1;
+ }
+
+ /* Try to handle the recoverable page faults by filling page
+ * tables
+ */
+ if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id,
+ addr, entry->timestamp, write_fault))
+ return 1;
+ }
+ return 0;
+}
+
int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev)
{
int r;
@@ -690,7 +738,7 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
* itself at least for GART.
*/
mutex_lock(&adev->mman.gtt_window_lock);
- r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr,
+ r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.default_entity.base,
AMDGPU_FENCE_OWNER_UNDEFINED,
16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
&job, AMDGPU_KERNEL_JOB_ID_FLUSH_GPU_TLB);
@@ -811,9 +859,9 @@ void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev,
unsigned long flags;
uint32_t seq;
- if (adev->mes.ring[0].sched.ready) {
+ if (adev->mes.ring[MES_PIPE_INST(xcc_inst, 0)].sched.ready) {
amdgpu_mes_reg_write_reg_wait(adev, reg0, reg1,
- ref, mask);
+ ref, mask, xcc_inst);
return;
}
@@ -901,6 +949,7 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
case IP_VERSION(11, 5, 1):
case IP_VERSION(11, 5, 2):
case IP_VERSION(11, 5, 3):
+ case IP_VERSION(11, 5, 4):
/* Don't enable it by default yet.
*/
if (amdgpu_tmz < 1) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 727342689d4b..e8e8bfa098c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -32,9 +32,11 @@
#include "amdgpu_xgmi.h"
#include "amdgpu_ras.h"
-/* VA hole for 48bit addresses on Vega10 */
-#define AMDGPU_GMC_HOLE_START 0x0000800000000000ULL
-#define AMDGPU_GMC_HOLE_END 0xffff800000000000ULL
+/* VA hole for 48bit and 57bit addresses */
+#define AMDGPU_GMC_HOLE_START (adev->vm_manager.root_level == AMDGPU_VM_PDB3 ?\
+ 0x0100000000000000ULL : 0x0000800000000000ULL)
+#define AMDGPU_GMC_HOLE_END (adev->vm_manager.root_level == AMDGPU_VM_PDB3 ?\
+ 0xff00000000000000ULL : 0xffff800000000000ULL)
/*
* Hardware is programmed as if the hole doesn't exists with start and end
@@ -43,7 +45,8 @@
* This mask is used to remove the upper 16bits of the VA and so come up with
* the linear addr value.
*/
-#define AMDGPU_GMC_HOLE_MASK 0x0000ffffffffffffULL
+#define AMDGPU_GMC_HOLE_MASK (adev->vm_manager.root_level == AMDGPU_VM_PDB3 ?\
+ 0x00ffffffffffffffULL : 0x0000ffffffffffffULL)
/*
* Ring size as power of two for the log of recent faults.
@@ -353,6 +356,7 @@ struct amdgpu_gmc {
u64 MC_VM_MX_L1_TLB_CNTL;
u64 noretry_flags;
+ u64 init_pte_flags;
bool flush_tlb_needs_extra_type_0;
bool flush_tlb_needs_extra_type_2;
@@ -394,13 +398,8 @@ static inline bool amdgpu_gmc_vram_full_visible(struct amdgpu_gmc *gmc)
*
* @addr: address to extend
*/
-static inline uint64_t amdgpu_gmc_sign_extend(uint64_t addr)
-{
- if (addr >= AMDGPU_GMC_HOLE_START)
- addr |= AMDGPU_GMC_HOLE_END;
-
- return addr;
-}
+#define amdgpu_gmc_sign_extend(addr) ((addr) >= AMDGPU_GMC_HOLE_START ?\
+ ((addr) | AMDGPU_GMC_HOLE_END) : (addr))
bool amdgpu_gmc_is_pdb0_enabled(struct amdgpu_device *adev);
int amdgpu_gmc_pdb0_alloc(struct amdgpu_device *adev);
@@ -426,6 +425,12 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev,
uint16_t pasid, uint64_t timestamp);
void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
uint16_t pasid);
+int amdgpu_gmc_handle_retry_fault(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry,
+ u64 addr,
+ u32 cam_index,
+ u32 node_id,
+ bool write_fault);
int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev);
int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev);
void amdgpu_gmc_ras_fini(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 895c1e4c6747..dd9b845d5783 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -26,6 +26,8 @@
#include "amdgpu.h"
+#define GART_ENTRY_WITHOUT_BO_COLOR 1
+
static inline struct amdgpu_gtt_mgr *
to_gtt_mgr(struct ttm_resource_manager *man)
{
@@ -181,6 +183,49 @@ static void amdgpu_gtt_mgr_del(struct ttm_resource_manager *man,
}
/**
+ * amdgpu_gtt_mgr_alloc_entries - alloc GART entries without GTT bo
+ *
+ * @mgr: The GTT manager object
+ * @mm_node: The drm mm node to return the new allocation node information
+ * @num_pages: The number of pages for the new allocation
+ * @mode: The new allocation mode
+ *
+ * Helper to dynamic alloc GART entries to map memory not accociated with
+ * GTT BO, for example VRAM BO physical memory, remote physical memory.
+ */
+int amdgpu_gtt_mgr_alloc_entries(struct amdgpu_gtt_mgr *mgr,
+ struct drm_mm_node *mm_node,
+ u64 num_pages,
+ enum drm_mm_insert_mode mode)
+{
+ struct amdgpu_device *adev = container_of(mgr, typeof(*adev), mman.gtt_mgr);
+ int r;
+
+ spin_lock(&mgr->lock);
+ r = drm_mm_insert_node_in_range(&mgr->mm, mm_node, num_pages,
+ 0, GART_ENTRY_WITHOUT_BO_COLOR, 0,
+ adev->gmc.gart_size >> PAGE_SHIFT,
+ mode);
+ spin_unlock(&mgr->lock);
+ return r;
+}
+
+/**
+ * amdgpu_gtt_mgr_free_entries - free GART entries not accocaited with GTT bo
+ *
+ * @mgr: The GTT manager object
+ * @mm_node: The drm mm node to free
+ */
+void amdgpu_gtt_mgr_free_entries(struct amdgpu_gtt_mgr *mgr,
+ struct drm_mm_node *mm_node)
+{
+ spin_lock(&mgr->lock);
+ if (drm_mm_node_allocated(mm_node))
+ drm_mm_remove_node(mm_node);
+ spin_unlock(&mgr->lock);
+}
+
+/**
* amdgpu_gtt_mgr_recover - re-init gart
*
* @mgr: amdgpu_gtt_mgr pointer
@@ -196,6 +241,9 @@ void amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr)
adev = container_of(mgr, typeof(*adev), mman.gtt_mgr);
spin_lock(&mgr->lock);
drm_mm_for_each_node(mm_node, &mgr->mm) {
+ if (mm_node->color == GART_ENTRY_WITHOUT_BO_COLOR)
+ continue;
+
node = container_of(mm_node, typeof(*node), mm_nodes[0]);
amdgpu_ttm_recover_gart(node->base.bo);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h
index 484e936812e4..9c56be725ff3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h
@@ -36,6 +36,10 @@ struct amdgpu_imu_funcs {
int (*start_imu)(struct amdgpu_device *adev);
void (*program_rlc_ram)(struct amdgpu_device *adev);
int (*wait_for_reset_status)(struct amdgpu_device *adev);
+ int (*switch_compute_partition)(struct amdgpu_device *adev,
+ int num_xccs_per_xcp,
+ int compute_partition_mode);
+ void (*init_mcm_addr_lut)(struct amdgpu_device *adev);
};
struct imu_rlc_ram_golden {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.c
index 99e1cf4fc955..6aa54156bbc9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.c
@@ -94,3 +94,318 @@ void amdgpu_ip_map_init(struct amdgpu_device *adev)
adev->ip_map.logical_to_dev_inst = amdgpu_logical_to_dev_inst;
adev->ip_map.logical_to_dev_mask = amdgpu_logical_to_dev_mask;
}
+
+int amdgpu_ip_block_suspend(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ if (ip_block->version->funcs->suspend) {
+ r = ip_block->version->funcs->suspend(ip_block);
+ if (r) {
+ dev_err(ip_block->adev->dev,
+ "suspend of IP block <%s> failed %d\n",
+ ip_block->version->funcs->name, r);
+ return r;
+ }
+ }
+
+ ip_block->status.hw = false;
+ return 0;
+}
+
+int amdgpu_ip_block_resume(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ if (ip_block->version->funcs->resume) {
+ r = ip_block->version->funcs->resume(ip_block);
+ if (r) {
+ dev_err(ip_block->adev->dev,
+ "resume of IP block <%s> failed %d\n",
+ ip_block->version->funcs->name, r);
+ return r;
+ }
+ }
+
+ ip_block->status.hw = true;
+ return 0;
+}
+
+/**
+ * amdgpu_device_ip_get_ip_block - get a hw IP pointer
+ *
+ * @adev: amdgpu_device pointer
+ * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
+ *
+ * Returns a pointer to the hardware IP block structure
+ * if it exists for the asic, otherwise NULL.
+ */
+struct amdgpu_ip_block *
+amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
+ enum amd_ip_block_type type)
+{
+ int i;
+
+ for (i = 0; i < adev->num_ip_blocks; i++)
+ if (adev->ip_blocks[i].version->type == type)
+ return &adev->ip_blocks[i];
+
+ return NULL;
+}
+
+/**
+ * amdgpu_device_ip_block_version_cmp
+ *
+ * @adev: amdgpu_device pointer
+ * @type: enum amd_ip_block_type
+ * @major: major version
+ * @minor: minor version
+ *
+ * return 0 if equal or greater
+ * return 1 if smaller or the ip_block doesn't exist
+ */
+int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
+ enum amd_ip_block_type type, u32 major,
+ u32 minor)
+{
+ struct amdgpu_ip_block *ip_block =
+ amdgpu_device_ip_get_ip_block(adev, type);
+
+ if (ip_block && ((ip_block->version->major > major) ||
+ ((ip_block->version->major == major) &&
+ (ip_block->version->minor >= minor))))
+ return 0;
+
+ return 1;
+}
+
+static const char *const ip_block_names[] = {
+ [AMD_IP_BLOCK_TYPE_COMMON] = "common",
+ [AMD_IP_BLOCK_TYPE_GMC] = "gmc",
+ [AMD_IP_BLOCK_TYPE_IH] = "ih",
+ [AMD_IP_BLOCK_TYPE_SMC] = "smu",
+ [AMD_IP_BLOCK_TYPE_PSP] = "psp",
+ [AMD_IP_BLOCK_TYPE_DCE] = "dce",
+ [AMD_IP_BLOCK_TYPE_GFX] = "gfx",
+ [AMD_IP_BLOCK_TYPE_SDMA] = "sdma",
+ [AMD_IP_BLOCK_TYPE_UVD] = "uvd",
+ [AMD_IP_BLOCK_TYPE_VCE] = "vce",
+ [AMD_IP_BLOCK_TYPE_ACP] = "acp",
+ [AMD_IP_BLOCK_TYPE_VCN] = "vcn",
+ [AMD_IP_BLOCK_TYPE_MES] = "mes",
+ [AMD_IP_BLOCK_TYPE_JPEG] = "jpeg",
+ [AMD_IP_BLOCK_TYPE_VPE] = "vpe",
+ [AMD_IP_BLOCK_TYPE_UMSCH_MM] = "umsch_mm",
+ [AMD_IP_BLOCK_TYPE_ISP] = "isp",
+ [AMD_IP_BLOCK_TYPE_RAS] = "ras",
+};
+
+static const char *ip_block_name(struct amdgpu_device *adev,
+ enum amd_ip_block_type type)
+{
+ int idx = (int)type;
+
+ return idx < ARRAY_SIZE(ip_block_names) ? ip_block_names[idx] :
+ "unknown";
+}
+
+/**
+ * amdgpu_device_ip_block_add
+ *
+ * @adev: amdgpu_device pointer
+ * @ip_block_version: pointer to the IP to add
+ *
+ * Adds the IP block driver information to the collection of IPs
+ * on the asic.
+ */
+int amdgpu_device_ip_block_add(
+ struct amdgpu_device *adev,
+ const struct amdgpu_ip_block_version *ip_block_version)
+{
+ if (!ip_block_version)
+ return -EINVAL;
+
+ switch (ip_block_version->type) {
+ case AMD_IP_BLOCK_TYPE_VCN:
+ if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
+ return 0;
+ break;
+ case AMD_IP_BLOCK_TYPE_JPEG:
+ if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
+ return 0;
+ break;
+ default:
+ break;
+ }
+
+ dev_info(adev->dev, "detected ip block number %d <%s_v%d_%d_%d> (%s)\n",
+ adev->num_ip_blocks,
+ ip_block_name(adev, ip_block_version->type),
+ ip_block_version->major, ip_block_version->minor,
+ ip_block_version->rev, ip_block_version->funcs->name);
+
+ adev->ip_blocks[adev->num_ip_blocks].adev = adev;
+
+ adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
+
+ return 0;
+}
+
+/**
+ * amdgpu_device_ip_set_clockgating_state - set the CG state
+ *
+ * @adev: amdgpu_device pointer
+ * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
+ * @state: clockgating state (gate or ungate)
+ *
+ * Sets the requested clockgating state for all instances of
+ * the hardware IP specified.
+ * Returns the error code from the last instance.
+ */
+int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev,
+ enum amd_ip_block_type block_type,
+ enum amd_clockgating_state state)
+{
+ int i, r = 0;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.valid)
+ continue;
+ if (adev->ip_blocks[i].version->type != block_type)
+ continue;
+ if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
+ continue;
+ r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
+ &adev->ip_blocks[i], state);
+ if (r)
+ dev_err(adev->dev,
+ "set_clockgating_state of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ }
+ return r;
+}
+
+/**
+ * amdgpu_device_ip_set_powergating_state - set the PG state
+ *
+ * @adev: amdgpu_device pointer
+ * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
+ * @state: powergating state (gate or ungate)
+ *
+ * Sets the requested powergating state for all instances of
+ * the hardware IP specified.
+ * Returns the error code from the last instance.
+ */
+int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev,
+ enum amd_ip_block_type block_type,
+ enum amd_powergating_state state)
+{
+ int i, r = 0;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.valid)
+ continue;
+ if (adev->ip_blocks[i].version->type != block_type)
+ continue;
+ if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
+ continue;
+ r = adev->ip_blocks[i].version->funcs->set_powergating_state(
+ &adev->ip_blocks[i], state);
+ if (r)
+ dev_err(adev->dev,
+ "set_powergating_state of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ }
+ return r;
+}
+
+/**
+ * amdgpu_device_ip_get_clockgating_state - get the CG state
+ *
+ * @adev: amdgpu_device pointer
+ * @flags: clockgating feature flags
+ *
+ * Walks the list of IPs on the device and updates the clockgating
+ * flags for each IP.
+ * Updates @flags with the feature flags for each hardware IP where
+ * clockgating is enabled.
+ */
+void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+ int i;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.valid)
+ continue;
+ if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
+ adev->ip_blocks[i].version->funcs->get_clockgating_state(
+ &adev->ip_blocks[i], flags);
+ }
+}
+
+/**
+ * amdgpu_device_ip_wait_for_idle - wait for idle
+ *
+ * @adev: amdgpu_device pointer
+ * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
+ *
+ * Waits for the request hardware IP to be idle.
+ * Returns 0 for success or a negative error code on failure.
+ */
+int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
+ enum amd_ip_block_type block_type)
+{
+ struct amdgpu_ip_block *ip_block;
+
+ ip_block = amdgpu_device_ip_get_ip_block(adev, block_type);
+ if (!ip_block || !ip_block->status.valid)
+ return 0;
+
+ if (ip_block->version->funcs->wait_for_idle)
+ return ip_block->version->funcs->wait_for_idle(ip_block);
+
+ return 0;
+}
+
+/**
+ * amdgpu_device_ip_is_hw - is the hardware IP enabled
+ *
+ * @adev: amdgpu_device pointer
+ * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
+ *
+ * Check if the hardware IP is enable or not.
+ * Returns true if it the IP is enable, false if not.
+ */
+bool amdgpu_device_ip_is_hw(struct amdgpu_device *adev,
+ enum amd_ip_block_type block_type)
+{
+ struct amdgpu_ip_block *ip_block;
+
+ ip_block = amdgpu_device_ip_get_ip_block(adev, block_type);
+ if (ip_block)
+ return ip_block->status.hw;
+
+ return false;
+}
+
+/**
+ * amdgpu_device_ip_is_valid - is the hardware IP valid
+ *
+ * @adev: amdgpu_device pointer
+ * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
+ *
+ * Check if the hardware IP is valid or not.
+ * Returns true if it the IP is valid, false if not.
+ */
+bool amdgpu_device_ip_is_valid(struct amdgpu_device *adev,
+ enum amd_ip_block_type block_type)
+{
+ struct amdgpu_ip_block *ip_block;
+
+ ip_block = amdgpu_device_ip_get_ip_block(adev, block_type);
+ if (ip_block)
+ return ip_block->status.valid;
+
+ return false;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.h
index 2490fd322aec..1d0df6d93957 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.h
@@ -24,6 +24,131 @@
#ifndef __AMDGPU_IP_H__
#define __AMDGPU_IP_H__
+#include "amd_shared.h"
+
+struct amdgpu_device;
+
+/* Define the HW IP blocks will be used in driver , add more if necessary */
+enum amd_hw_ip_block_type {
+ GC_HWIP = 1,
+ HDP_HWIP,
+ SDMA0_HWIP,
+ SDMA1_HWIP,
+ SDMA2_HWIP,
+ SDMA3_HWIP,
+ SDMA4_HWIP,
+ SDMA5_HWIP,
+ SDMA6_HWIP,
+ SDMA7_HWIP,
+ LSDMA_HWIP,
+ MMHUB_HWIP,
+ ATHUB_HWIP,
+ NBIO_HWIP,
+ MP0_HWIP,
+ MP1_HWIP,
+ UVD_HWIP,
+ VCN_HWIP = UVD_HWIP,
+ JPEG_HWIP = VCN_HWIP,
+ VCN1_HWIP,
+ VCE_HWIP,
+ VPE_HWIP,
+ DF_HWIP,
+ DCE_HWIP,
+ OSSSYS_HWIP,
+ SMUIO_HWIP,
+ PWR_HWIP,
+ NBIF_HWIP,
+ THM_HWIP,
+ CLK_HWIP,
+ UMC_HWIP,
+ RSMU_HWIP,
+ XGMI_HWIP,
+ DCI_HWIP,
+ PCIE_HWIP,
+ ISP_HWIP,
+ ATU_HWIP,
+ AIGC_HWIP,
+ MAX_HWIP
+};
+
+#define HWIP_MAX_INSTANCE 48
+
+#define HW_ID_MAX 300
+#define IP_VERSION_FULL(mj, mn, rv, var, srev) \
+ (((mj) << 24) | ((mn) << 16) | ((rv) << 8) | ((var) << 4) | (srev))
+#define IP_VERSION(mj, mn, rv) IP_VERSION_FULL(mj, mn, rv, 0, 0)
+#define IP_VERSION_MAJ(ver) ((ver) >> 24)
+#define IP_VERSION_MIN(ver) (((ver) >> 16) & 0xFF)
+#define IP_VERSION_REV(ver) (((ver) >> 8) & 0xFF)
+#define IP_VERSION_VARIANT(ver) (((ver) >> 4) & 0xF)
+#define IP_VERSION_SUBREV(ver) ((ver) & 0xF)
+#define IP_VERSION_MAJ_MIN_REV(ver) ((ver) >> 8)
+
+struct amdgpu_ip_map_info {
+ /* Map of logical to actual dev instances/mask */
+ uint32_t dev_inst[MAX_HWIP][HWIP_MAX_INSTANCE];
+ int8_t (*logical_to_dev_inst)(struct amdgpu_device *adev,
+ enum amd_hw_ip_block_type block,
+ int8_t inst);
+ uint32_t (*logical_to_dev_mask)(struct amdgpu_device *adev,
+ enum amd_hw_ip_block_type block,
+ uint32_t mask);
+};
+
+#define AMDGPU_MAX_IP_NUM AMD_IP_BLOCK_TYPE_NUM
+
+struct amdgpu_ip_block_status {
+ bool valid;
+ bool sw;
+ bool hw;
+ bool late_initialized;
+ bool hang;
+};
+
+struct amdgpu_ip_block_version {
+ const enum amd_ip_block_type type;
+ const u32 major;
+ const u32 minor;
+ const u32 rev;
+ const struct amd_ip_funcs *funcs;
+};
+
+struct amdgpu_ip_block {
+ struct amdgpu_ip_block_status status;
+ const struct amdgpu_ip_block_version *version;
+ struct amdgpu_device *adev;
+};
+
void amdgpu_ip_map_init(struct amdgpu_device *adev);
+int amdgpu_ip_block_suspend(struct amdgpu_ip_block *ip_block);
+int amdgpu_ip_block_resume(struct amdgpu_ip_block *ip_block);
+
+struct amdgpu_ip_block *
+amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
+ enum amd_ip_block_type type);
+
+int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
+ enum amd_ip_block_type type, u32 major,
+ u32 minor);
+
+int amdgpu_device_ip_block_add(
+ struct amdgpu_device *adev,
+ const struct amdgpu_ip_block_version *ip_block_version);
+
+int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev,
+ enum amd_ip_block_type block_type,
+ enum amd_clockgating_state state);
+int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev,
+ enum amd_ip_block_type block_type,
+ enum amd_powergating_state state);
+void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags);
+int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
+ enum amd_ip_block_type block_type);
+bool amdgpu_device_ip_is_hw(struct amdgpu_device *adev,
+ enum amd_ip_block_type block_type);
+bool amdgpu_device_ip_is_valid(struct amdgpu_device *adev,
+ enum amd_ip_block_type block_type);
+
#endif /* __AMDGPU_IP_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 8112ffc85995..82bc6d657e5a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -99,6 +99,41 @@ const char *soc15_ih_clientid_name[] = {
"MP1"
};
+const char *soc_v1_0_ih_clientid_name[] = {
+ "IH",
+ "Reserved",
+ "ATHUB",
+ "BIF",
+ "Reserved",
+ "Reserved",
+ "Reserved",
+ "RLC",
+ "Reserved",
+ "Reserved",
+ "GFX",
+ "IMU",
+ "Reserved",
+ "Reserved",
+ "VCN1 or UVD1",
+ "THM",
+ "VCN or UVD",
+ "Reserved",
+ "VMC",
+ "Reserved",
+ "GRBM_CP",
+ "GC_AID",
+ "ROM_SMUIO",
+ "DF",
+ "Reserved",
+ "PWR",
+ "LSDMA",
+ "GC_UTCL2",
+ "nHT",
+ "Reserved",
+ "MP0",
+ "MP1",
+};
+
const int node_id_to_phys_map[NODEID_MAX] = {
[AID0_NODEID] = 0,
[XCD0_NODEID] = 0,
@@ -316,7 +351,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
adev->irq.irq = irq;
adev_to_drm(adev)->max_vblank_count = 0x00ffffff;
- dev_dbg(adev->dev, "amdgpu: irq initialized.\n");
+ dev_dbg(adev->dev, "irq initialized.\n");
return 0;
free_vectors:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
index 9f0417456abd..af72405a7226 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
@@ -26,6 +26,7 @@
#include <linux/irqdomain.h>
#include "soc15_ih_clientid.h"
+#include "soc_v1_0_ih_clientid.h"
#include "amdgpu_ih.h"
#define AMDGPU_MAX_IRQ_SRC_ID 0x100
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c
index 37270c4dab8d..532f83d783d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c
@@ -318,12 +318,36 @@ void isp_kernel_buffer_free(void **buf_obj, u64 *gpu_addr, void **cpu_addr)
}
EXPORT_SYMBOL(isp_kernel_buffer_free);
+static int isp_resume(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_isp *isp = &adev->isp;
+
+ if (isp->funcs->hw_resume)
+ return isp->funcs->hw_resume(isp);
+
+ return -ENODEV;
+}
+
+static int isp_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_isp *isp = &adev->isp;
+
+ if (isp->funcs->hw_suspend)
+ return isp->funcs->hw_suspend(isp);
+
+ return -ENODEV;
+}
+
static const struct amd_ip_funcs isp_ip_funcs = {
.name = "isp_ip",
.early_init = isp_early_init,
.hw_init = isp_hw_init,
.hw_fini = isp_hw_fini,
.is_idle = isp_is_idle,
+ .suspend = isp_suspend,
+ .resume = isp_resume,
.set_clockgating_state = isp_set_clockgating_state,
.set_powergating_state = isp_set_powergating_state,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h
index d6f4ffa4c97c..9a5d2b1dff9e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h
@@ -38,6 +38,8 @@ struct amdgpu_isp;
struct isp_funcs {
int (*hw_init)(struct amdgpu_isp *isp);
int (*hw_fini)(struct amdgpu_isp *isp);
+ int (*hw_suspend)(struct amdgpu_isp *isp);
+ int (*hw_resume)(struct amdgpu_isp *isp);
};
struct amdgpu_isp {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 0a0dcbf0798d..c533c0806912 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -147,7 +147,8 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
dev_err(adev->dev, "Ring %s reset failed\n", ring->sched.name);
}
- dma_fence_set_error(&s_job->s_fence->finished, -ETIME);
+ if (dma_fence_get_status(&s_job->s_fence->finished) == 0)
+ dma_fence_set_error(&s_job->s_fence->finished, -ETIME);
if (amdgpu_device_should_recover_gpu(ring->adev)) {
struct amdgpu_reset_context reset_context;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 6ee77f431d56..1878e0faa722 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -92,7 +92,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev)
return;
if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DRV_UNLOAD))
- DRM_WARN("smart shift update failed\n");
+ drm_warn(dev, "smart shift update failed\n");
amdgpu_acpi_fini(adev);
amdgpu_device_fini_hw(adev);
@@ -105,7 +105,7 @@ void amdgpu_register_gpu_instance(struct amdgpu_device *adev)
mutex_lock(&mgpu_info.mutex);
if (mgpu_info.num_gpu >= MAX_GPU_INSTANCE) {
- DRM_ERROR("Cannot register more gpu instance\n");
+ drm_err(adev_to_drm(adev), "Cannot register more gpu instance\n");
mutex_unlock(&mgpu_info.mutex);
return;
}
@@ -162,7 +162,7 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags)
dev_dbg(dev->dev, "Error during ACPI methods call\n");
if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DRV_LOAD))
- DRM_WARN("smart shift update failed\n");
+ drm_warn(dev, "smart shift update failed\n");
out:
if (r)
@@ -201,6 +201,9 @@ static enum amd_ip_block_type
type = (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ?
AMD_IP_BLOCK_TYPE_JPEG : AMD_IP_BLOCK_TYPE_VCN;
break;
+ case AMDGPU_HW_IP_VPE:
+ type = AMD_IP_BLOCK_TYPE_VPE;
+ break;
default:
type = AMD_IP_BLOCK_TYPE_NUM;
break;
@@ -391,6 +394,42 @@ static int amdgpu_userq_metadata_info_gfx(struct amdgpu_device *adev,
return ret;
}
+static int amdgpu_userq_metadata_info_compute(struct amdgpu_device *adev,
+ struct drm_amdgpu_info *info,
+ struct drm_amdgpu_info_uq_metadata_compute *meta)
+{
+ int ret = -EOPNOTSUPP;
+
+ if (adev->gfx.funcs->get_gfx_shadow_info) {
+ struct amdgpu_gfx_shadow_info shadow = {};
+
+ adev->gfx.funcs->get_gfx_shadow_info(adev, &shadow, true);
+ meta->eop_size = shadow.eop_size;
+ meta->eop_alignment = shadow.eop_alignment;
+ ret = 0;
+ }
+
+ return ret;
+}
+
+static int amdgpu_userq_metadata_info_sdma(struct amdgpu_device *adev,
+ struct drm_amdgpu_info *info,
+ struct drm_amdgpu_info_uq_metadata_sdma *meta)
+{
+ int ret = -EOPNOTSUPP;
+
+ if (adev->sdma.get_csa_info) {
+ struct amdgpu_sdma_csa_info csa = {};
+
+ adev->sdma.get_csa_info(adev, &csa);
+ meta->csa_size = csa.size;
+ meta->csa_alignment = csa.alignment;
+ ret = 0;
+ }
+
+ return ret;
+}
+
static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
struct drm_amdgpu_info *info,
struct drm_amdgpu_info_hw_ip *result)
@@ -721,6 +760,9 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
case AMD_IP_BLOCK_TYPE_UVD:
count = adev->uvd.num_uvd_inst;
break;
+ case AMD_IP_BLOCK_TYPE_VPE:
+ count = adev->vpe.num_instances;
+ break;
/* For all other IP block types not listed in the switch statement
* the ip status is valid here and the instance count is one.
*/
@@ -1363,6 +1405,22 @@ out:
ret = copy_to_user(out, &meta_info,
min((size_t)size, sizeof(meta_info))) ? -EFAULT : 0;
return 0;
+ case AMDGPU_HW_IP_COMPUTE:
+ ret = amdgpu_userq_metadata_info_compute(adev, info, &meta_info.compute);
+ if (ret)
+ return ret;
+
+ ret = copy_to_user(out, &meta_info,
+ min((size_t)size, sizeof(meta_info))) ? -EFAULT : 0;
+ return 0;
+ case AMDGPU_HW_IP_DMA:
+ ret = amdgpu_userq_metadata_info_sdma(adev, info, &meta_info.sdma);
+ if (ret)
+ return ret;
+
+ ret = copy_to_user(out, &meta_info,
+ min((size_t)size, sizeof(meta_info))) ? -EFAULT : 0;
+ return 0;
default:
return -EINVAL;
}
@@ -1450,7 +1508,9 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
r = amdgpu_userq_mgr_init(&fpriv->userq_mgr, file_priv, adev);
if (r)
- DRM_WARN("Can't setup usermode queues, use legacy workload submission only\n");
+ drm_warn(adev_to_drm(adev),
+ "Failed to init usermode queue manager (%d), use legacy workload submission only\n",
+ r);
r = amdgpu_eviction_fence_init(&fpriv->evf_mgr);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 9c182ce501af..dffa0f7276b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -31,6 +31,7 @@
#define AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
#define AMDGPU_ONE_DOORBELL_SIZE 8
+#define AMDGPU_MES_RESERVED_QUEUES 2
int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev)
{
@@ -91,6 +92,9 @@ static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev)
int amdgpu_mes_init(struct amdgpu_device *adev)
{
int i, r, num_pipes;
+ u32 total_vmid_mask, reserved_vmid_mask;
+ u32 queue_mask, reserved_queue_mask;
+ int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
adev->mes.adev = adev;
@@ -101,12 +105,18 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
spin_lock_init(&adev->mes.queue_id_lock);
mutex_init(&adev->mes.mutex_hidden);
- for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++)
+ for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++)
spin_lock_init(&adev->mes.ring_lock[i]);
adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK;
+ total_vmid_mask = (u32)((1UL << 16) - 1);
+ reserved_vmid_mask = (u32)((1UL << adev->vm_manager.first_kfd_vmid) - 1);
+
adev->mes.vmid_mask_mmhub = 0xFF00;
- adev->mes.vmid_mask_gfxhub = adev->gfx.disable_kq ? 0xFFFE : 0xFF00;
+ adev->mes.vmid_mask_gfxhub = total_vmid_mask & ~reserved_vmid_mask;
+
+ queue_mask = (u32)(1UL << adev->gfx.mec.num_queue_per_pipe) - 1;
+ reserved_queue_mask = (u32)(1UL << AMDGPU_MES_RESERVED_QUEUES) - 1;
num_pipes = adev->gfx.me.num_pipe_per_me * adev->gfx.me.num_me;
if (num_pipes > AMDGPU_MES_MAX_GFX_PIPES)
@@ -142,7 +152,8 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) {
if (i >= num_pipes)
break;
- adev->mes.compute_hqd_mask[i] = adev->gfx.disable_kq ? 0xF : 0xC;
+ adev->mes.compute_hqd_mask[i] =
+ adev->gfx.disable_kq ? 0xF : (queue_mask & ~reserved_queue_mask);
}
num_pipes = adev->sdma.num_instances;
@@ -156,7 +167,7 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
adev->mes.sdma_hqd_mask[i] = 0xfc;
}
- for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) {
+ for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) {
r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs[i]);
if (r) {
dev_err(adev->dev,
@@ -192,16 +203,18 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
goto error_doorbell;
if (adev->mes.hung_queue_db_array_size) {
- r = amdgpu_bo_create_kernel(adev,
- adev->mes.hung_queue_db_array_size * sizeof(u32),
- PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_GTT,
- &adev->mes.hung_queue_db_array_gpu_obj,
- &adev->mes.hung_queue_db_array_gpu_addr,
- &adev->mes.hung_queue_db_array_cpu_addr);
- if (r) {
- dev_warn(adev->dev, "failed to create MES hung db array buffer (%d)", r);
- goto error_doorbell;
+ for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) {
+ r = amdgpu_bo_create_kernel(adev,
+ adev->mes.hung_queue_db_array_size * sizeof(u32),
+ PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->mes.hung_queue_db_array_gpu_obj[i],
+ &adev->mes.hung_queue_db_array_gpu_addr[i],
+ &adev->mes.hung_queue_db_array_cpu_addr[i]);
+ if (r) {
+ dev_warn(adev->dev, "failed to create MES hung db array buffer (%d)", r);
+ goto error_doorbell;
+ }
}
}
@@ -210,12 +223,16 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
error_doorbell:
amdgpu_mes_doorbell_free(adev);
error:
- for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) {
+ for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) {
if (adev->mes.sch_ctx_ptr[i])
amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]);
if (adev->mes.query_status_fence_ptr[i])
amdgpu_device_wb_free(adev,
adev->mes.query_status_fence_offs[i]);
+ if (adev->mes.hung_queue_db_array_gpu_obj[i])
+ amdgpu_bo_free_kernel(&adev->mes.hung_queue_db_array_gpu_obj[i],
+ &adev->mes.hung_queue_db_array_gpu_addr[i],
+ &adev->mes.hung_queue_db_array_cpu_addr[i]);
}
idr_destroy(&adev->mes.pasid_idr);
@@ -229,16 +246,17 @@ error:
void amdgpu_mes_fini(struct amdgpu_device *adev)
{
int i;
-
- amdgpu_bo_free_kernel(&adev->mes.hung_queue_db_array_gpu_obj,
- &adev->mes.hung_queue_db_array_gpu_addr,
- &adev->mes.hung_queue_db_array_cpu_addr);
+ int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj,
&adev->mes.event_log_gpu_addr,
&adev->mes.event_log_cpu_addr);
- for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) {
+ for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) {
+ amdgpu_bo_free_kernel(&adev->mes.hung_queue_db_array_gpu_obj[i],
+ &adev->mes.hung_queue_db_array_gpu_addr[i],
+ &adev->mes.hung_queue_db_array_cpu_addr[i]);
+
if (adev->mes.sch_ctx_ptr[i])
amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]);
if (adev->mes.query_status_fence_ptr[i])
@@ -304,13 +322,14 @@ int amdgpu_mes_resume(struct amdgpu_device *adev)
}
int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev,
- struct amdgpu_ring *ring)
+ struct amdgpu_ring *ring, uint32_t xcc_id)
{
struct mes_map_legacy_queue_input queue_input;
int r;
memset(&queue_input, 0, sizeof(queue_input));
+ queue_input.xcc_id = xcc_id;
queue_input.queue_type = ring->funcs->type;
queue_input.doorbell_offset = ring->doorbell_index;
queue_input.pipe_id = ring->pipe;
@@ -330,11 +349,12 @@ int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev,
int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
enum amdgpu_unmap_queues_action action,
- u64 gpu_addr, u64 seq)
+ u64 gpu_addr, u64 seq, uint32_t xcc_id)
{
struct mes_unmap_legacy_queue_input queue_input;
int r;
+ queue_input.xcc_id = xcc_id;
queue_input.action = action;
queue_input.queue_type = ring->funcs->type;
queue_input.doorbell_offset = ring->doorbell_index;
@@ -355,13 +375,15 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
unsigned int vmid,
- bool use_mmio)
+ bool use_mmio,
+ uint32_t xcc_id)
{
struct mes_reset_queue_input queue_input;
int r;
memset(&queue_input, 0, sizeof(queue_input));
+ queue_input.xcc_id = xcc_id;
queue_input.queue_type = ring->funcs->type;
queue_input.doorbell_offset = ring->doorbell_index;
queue_input.me_id = ring->me;
@@ -393,11 +415,11 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev,
int queue_type,
bool detect_only,
unsigned int *hung_db_num,
- u32 *hung_db_array)
-
+ u32 *hung_db_array,
+ uint32_t xcc_id)
{
struct mes_detect_and_reset_queue_input input;
- u32 *db_array = adev->mes.hung_queue_db_array_cpu_addr;
+ u32 *db_array = adev->mes.hung_queue_db_array_cpu_addr[xcc_id];
int r, i;
if (!hung_db_num || !hung_db_array)
@@ -409,7 +431,7 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev,
return -EINVAL;
/* Clear the doorbell array before detection */
- memset(adev->mes.hung_queue_db_array_cpu_addr, AMDGPU_MES_INVALID_DB_OFFSET,
+ memset(adev->mes.hung_queue_db_array_cpu_addr[xcc_id], AMDGPU_MES_INVALID_DB_OFFSET,
adev->mes.hung_queue_db_array_size * sizeof(u32));
input.queue_type = queue_type;
input.detect_only = detect_only;
@@ -436,7 +458,8 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev,
return r;
}
-uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg)
+uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg,
+ uint32_t xcc_id)
{
struct mes_misc_op_input op_input;
int r, val = 0;
@@ -450,6 +473,7 @@ uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg)
}
read_val_gpu_addr = adev->wb.gpu_addr + (addr_offset * 4);
read_val_ptr = (uint32_t *)&adev->wb.wb[addr_offset];
+ op_input.xcc_id = xcc_id;
op_input.op = MES_MISC_OP_READ_REG;
op_input.read_reg.reg_offset = reg;
op_input.read_reg.buffer_addr = read_val_gpu_addr;
@@ -473,12 +497,13 @@ error:
return val;
}
-int amdgpu_mes_wreg(struct amdgpu_device *adev,
- uint32_t reg, uint32_t val)
+int amdgpu_mes_wreg(struct amdgpu_device *adev, uint32_t reg,
+ uint32_t val, uint32_t xcc_id)
{
struct mes_misc_op_input op_input;
int r;
+ op_input.xcc_id = xcc_id;
op_input.op = MES_MISC_OP_WRITE_REG;
op_input.write_reg.reg_offset = reg;
op_input.write_reg.reg_value = val;
@@ -501,11 +526,13 @@ error:
int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t reg1,
- uint32_t ref, uint32_t mask)
+ uint32_t ref, uint32_t mask,
+ uint32_t xcc_id)
{
struct mes_misc_op_input op_input;
int r;
+ op_input.xcc_id = xcc_id;
op_input.op = MES_MISC_OP_WRM_REG_WR_WAIT;
op_input.wrm_reg.reg0 = reg0;
op_input.wrm_reg.reg1 = reg1;
@@ -530,14 +557,23 @@ error:
int amdgpu_mes_hdp_flush(struct amdgpu_device *adev)
{
- uint32_t hdp_flush_req_offset, hdp_flush_done_offset, ref_and_mask;
+ uint32_t hdp_flush_req_offset, hdp_flush_done_offset;
+ struct amdgpu_ring *mes_ring;
+ uint32_t ref_and_mask = 0, reg_mem_engine = 0;
+ if (!adev->gfx.funcs->get_hdp_flush_mask) {
+ dev_err(adev->dev, "mes hdp flush is not supported.\n");
+ return -EINVAL;
+ }
+
+ mes_ring = &adev->mes.ring[0];
hdp_flush_req_offset = adev->nbio.funcs->get_hdp_flush_req_offset(adev);
hdp_flush_done_offset = adev->nbio.funcs->get_hdp_flush_done_offset(adev);
- ref_and_mask = adev->nbio.hdp_flush_reg->ref_and_mask_cp0;
+
+ adev->gfx.funcs->get_hdp_flush_mask(mes_ring, &ref_and_mask, &reg_mem_engine);
return amdgpu_mes_reg_write_reg_wait(adev, hdp_flush_req_offset, hdp_flush_done_offset,
- ref_and_mask, ref_and_mask);
+ ref_and_mask, ref_and_mask, 0);
}
int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
@@ -545,7 +581,8 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
uint32_t spi_gdbg_per_vmid_cntl,
const uint32_t *tcp_watch_cntl,
uint32_t flags,
- bool trap_en)
+ bool trap_en,
+ uint32_t xcc_id)
{
struct mes_misc_op_input op_input = {0};
int r;
@@ -556,6 +593,7 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
return -EINVAL;
}
+ op_input.xcc_id = xcc_id;
op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER;
op_input.set_shader_debugger.process_context_addr = process_context_addr;
op_input.set_shader_debugger.flags.u32all = flags;
@@ -584,7 +622,8 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
}
int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev,
- uint64_t process_context_addr)
+ uint64_t process_context_addr,
+ uint32_t xcc_id)
{
struct mes_misc_op_input op_input = {0};
int r;
@@ -595,6 +634,7 @@ int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev,
return -EINVAL;
}
+ op_input.xcc_id = xcc_id;
op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER;
op_input.set_shader_debugger.process_context_addr = process_context_addr;
op_input.set_shader_debugger.flags.process_ctx_flush = true;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index e989225b354b..88685c58798e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -58,11 +58,20 @@ enum amdgpu_mes_priority_level {
struct amdgpu_mes_funcs;
enum amdgpu_mes_pipe {
- AMDGPU_MES_SCHED_PIPE = 0,
- AMDGPU_MES_KIQ_PIPE,
+ AMDGPU_MES_PIPE_0 = 0,
+ AMDGPU_MES_PIPE_1,
AMDGPU_MAX_MES_PIPES = 2,
};
+#define AMDGPU_MES_SCHED_PIPE AMDGPU_MES_PIPE_0
+#define AMDGPU_MES_KIQ_PIPE AMDGPU_MES_PIPE_1
+
+#define AMDGPU_MAX_MES_INST_PIPES \
+ (AMDGPU_MAX_MES_PIPES * AMDGPU_MAX_GC_INSTANCES)
+
+#define MES_PIPE_INST(xcc_id, pipe_id) \
+ (xcc_id * AMDGPU_MAX_MES_PIPES + pipe_id)
+
struct amdgpu_mes {
struct amdgpu_device *adev;
@@ -86,29 +95,29 @@ struct amdgpu_mes {
uint64_t default_process_quantum;
uint64_t default_gang_quantum;
- struct amdgpu_ring ring[AMDGPU_MAX_MES_PIPES];
- spinlock_t ring_lock[AMDGPU_MAX_MES_PIPES];
+ struct amdgpu_ring ring[AMDGPU_MAX_MES_INST_PIPES];
+ spinlock_t ring_lock[AMDGPU_MAX_MES_INST_PIPES];
const struct firmware *fw[AMDGPU_MAX_MES_PIPES];
/* mes ucode */
- struct amdgpu_bo *ucode_fw_obj[AMDGPU_MAX_MES_PIPES];
- uint64_t ucode_fw_gpu_addr[AMDGPU_MAX_MES_PIPES];
- uint32_t *ucode_fw_ptr[AMDGPU_MAX_MES_PIPES];
+ struct amdgpu_bo *ucode_fw_obj[AMDGPU_MAX_MES_INST_PIPES];
+ uint64_t ucode_fw_gpu_addr[AMDGPU_MAX_MES_INST_PIPES];
+ uint32_t *ucode_fw_ptr[AMDGPU_MAX_MES_INST_PIPES];
uint64_t uc_start_addr[AMDGPU_MAX_MES_PIPES];
/* mes ucode data */
- struct amdgpu_bo *data_fw_obj[AMDGPU_MAX_MES_PIPES];
- uint64_t data_fw_gpu_addr[AMDGPU_MAX_MES_PIPES];
- uint32_t *data_fw_ptr[AMDGPU_MAX_MES_PIPES];
+ struct amdgpu_bo *data_fw_obj[AMDGPU_MAX_MES_INST_PIPES];
+ uint64_t data_fw_gpu_addr[AMDGPU_MAX_MES_INST_PIPES];
+ uint32_t *data_fw_ptr[AMDGPU_MAX_MES_INST_PIPES];
uint64_t data_start_addr[AMDGPU_MAX_MES_PIPES];
/* eop gpu obj */
- struct amdgpu_bo *eop_gpu_obj[AMDGPU_MAX_MES_PIPES];
- uint64_t eop_gpu_addr[AMDGPU_MAX_MES_PIPES];
+ struct amdgpu_bo *eop_gpu_obj[AMDGPU_MAX_MES_INST_PIPES];
+ uint64_t eop_gpu_addr[AMDGPU_MAX_MES_INST_PIPES];
- void *mqd_backup[AMDGPU_MAX_MES_PIPES];
- struct amdgpu_irq_src irq[AMDGPU_MAX_MES_PIPES];
+ void *mqd_backup[AMDGPU_MAX_MES_INST_PIPES];
+ struct amdgpu_irq_src irq[AMDGPU_MAX_MES_INST_PIPES];
uint32_t vmid_mask_gfxhub;
uint32_t vmid_mask_mmhub;
@@ -116,18 +125,21 @@ struct amdgpu_mes {
uint32_t compute_hqd_mask[AMDGPU_MES_MAX_COMPUTE_PIPES];
uint32_t sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES];
uint32_t aggregated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS];
- uint32_t sch_ctx_offs[AMDGPU_MAX_MES_PIPES];
- uint64_t sch_ctx_gpu_addr[AMDGPU_MAX_MES_PIPES];
- uint64_t *sch_ctx_ptr[AMDGPU_MAX_MES_PIPES];
- uint32_t query_status_fence_offs[AMDGPU_MAX_MES_PIPES];
- uint64_t query_status_fence_gpu_addr[AMDGPU_MAX_MES_PIPES];
- uint64_t *query_status_fence_ptr[AMDGPU_MAX_MES_PIPES];
+
+ uint32_t sch_ctx_offs[AMDGPU_MAX_MES_INST_PIPES];
+ uint64_t sch_ctx_gpu_addr[AMDGPU_MAX_MES_INST_PIPES];
+ uint64_t *sch_ctx_ptr[AMDGPU_MAX_MES_INST_PIPES];
+ uint32_t query_status_fence_offs[AMDGPU_MAX_MES_INST_PIPES];
+ uint64_t query_status_fence_gpu_addr[AMDGPU_MAX_MES_INST_PIPES];
+ uint64_t *query_status_fence_ptr[AMDGPU_MAX_MES_INST_PIPES];
uint32_t saved_flags;
/* initialize kiq pipe */
- int (*kiq_hw_init)(struct amdgpu_device *adev);
- int (*kiq_hw_fini)(struct amdgpu_device *adev);
+ int (*kiq_hw_init)(struct amdgpu_device *adev,
+ uint32_t xcc_id);
+ int (*kiq_hw_fini)(struct amdgpu_device *adev,
+ uint32_t xcc_id);
/* MES doorbells */
uint32_t db_start_dw_offset;
@@ -150,9 +162,15 @@ struct amdgpu_mes {
int hung_queue_db_array_size;
int hung_queue_hqd_info_offset;
- struct amdgpu_bo *hung_queue_db_array_gpu_obj;
- uint64_t hung_queue_db_array_gpu_addr;
- void *hung_queue_db_array_cpu_addr;
+ struct amdgpu_bo *hung_queue_db_array_gpu_obj[AMDGPU_MAX_MES_PIPES];
+ uint64_t hung_queue_db_array_gpu_addr[AMDGPU_MAX_MES_PIPES];
+ void *hung_queue_db_array_cpu_addr[AMDGPU_MAX_MES_PIPES];
+
+ /* cooperative dispatch */
+ bool enable_coop_mode;
+ int master_xcc_ids[AMDGPU_MAX_MES_INST_PIPES];
+ struct amdgpu_bo *shared_cmd_buf_obj[AMDGPU_MAX_MES_INST_PIPES];
+ uint64_t shared_cmd_buf_gpu_addr[AMDGPU_MAX_MES_INST_PIPES];
};
struct amdgpu_mes_gang {
@@ -208,6 +226,7 @@ struct amdgpu_mes_gang_properties {
};
struct mes_add_queue_input {
+ uint32_t xcc_id;
uint32_t process_id;
uint64_t page_table_base_addr;
uint64_t process_va_start;
@@ -234,15 +253,19 @@ struct mes_add_queue_input {
uint32_t is_aql_queue;
uint32_t queue_size;
uint32_t exclusively_scheduled;
+ uint32_t sh_mem_config_data;
+ uint32_t vm_cntx_cntl;
};
struct mes_remove_queue_input {
+ uint32_t xcc_id;
uint32_t doorbell_offset;
uint64_t gang_context_addr;
bool remove_queue_after_reset;
};
struct mes_map_legacy_queue_input {
+ uint32_t xcc_id;
uint32_t queue_type;
uint32_t doorbell_offset;
uint32_t pipe_id;
@@ -252,6 +275,7 @@ struct mes_map_legacy_queue_input {
};
struct mes_unmap_legacy_queue_input {
+ uint32_t xcc_id;
enum amdgpu_unmap_queues_action action;
uint32_t queue_type;
uint32_t doorbell_offset;
@@ -262,6 +286,7 @@ struct mes_unmap_legacy_queue_input {
};
struct mes_suspend_gang_input {
+ uint32_t xcc_id;
bool suspend_all_gangs;
uint64_t gang_context_addr;
uint64_t suspend_fence_addr;
@@ -269,11 +294,13 @@ struct mes_suspend_gang_input {
};
struct mes_resume_gang_input {
+ uint32_t xcc_id;
bool resume_all_gangs;
uint64_t gang_context_addr;
};
struct mes_reset_queue_input {
+ uint32_t xcc_id;
uint32_t queue_type;
uint32_t doorbell_offset;
bool use_mmio;
@@ -309,7 +336,8 @@ enum mes_misc_opcode {
};
struct mes_misc_op_input {
- enum mes_misc_opcode op;
+ uint32_t xcc_id;
+ enum mes_misc_opcode op;
union {
struct {
@@ -395,8 +423,10 @@ struct amdgpu_mes_funcs {
struct mes_inv_tlbs_pasid_input *input);
};
-#define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
-#define amdgpu_mes_kiq_hw_fini(adev) (adev)->mes.kiq_hw_fini((adev))
+#define amdgpu_mes_kiq_hw_init(adev, xcc_id) \
+ (adev)->mes.kiq_hw_init((adev), (xcc_id))
+#define amdgpu_mes_kiq_hw_fini(adev, xcc_id) \
+ (adev)->mes.kiq_hw_fini((adev), (xcc_id))
int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe);
int amdgpu_mes_init(struct amdgpu_device *adev);
@@ -406,38 +436,42 @@ int amdgpu_mes_suspend(struct amdgpu_device *adev);
int amdgpu_mes_resume(struct amdgpu_device *adev);
int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev,
- struct amdgpu_ring *ring);
+ struct amdgpu_ring *ring, uint32_t xcc_id);
int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
enum amdgpu_unmap_queues_action action,
- u64 gpu_addr, u64 seq);
+ u64 gpu_addr, u64 seq, uint32_t xcc_id);
int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
unsigned int vmid,
- bool use_mmio);
+ bool use_mmio,
+ uint32_t xcc_id);
int amdgpu_mes_get_hung_queue_db_array_size(struct amdgpu_device *adev);
int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev,
int queue_type,
bool detect_only,
unsigned int *hung_db_num,
- u32 *hung_db_array);
+ u32 *hung_db_array,
+ uint32_t xcc_id);
-uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg);
+uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg,
+ uint32_t xcc_id);
int amdgpu_mes_wreg(struct amdgpu_device *adev,
- uint32_t reg, uint32_t val);
+ uint32_t reg, uint32_t val, uint32_t xcc_id);
int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t reg1,
- uint32_t ref, uint32_t mask);
+ uint32_t ref, uint32_t mask, uint32_t xcc_id);
int amdgpu_mes_hdp_flush(struct amdgpu_device *adev);
int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
uint64_t process_context_addr,
uint32_t spi_gdbg_per_vmid_cntl,
const uint32_t *tcp_watch_cntl,
uint32_t flags,
- bool trap_en);
+ bool trap_en,
+ uint32_t xcc_id);
int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev,
- uint64_t process_context_addr);
+ uint64_t process_context_addr, uint32_t xcc_id);
uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev,
enum amdgpu_mes_priority_level prio);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index e08f58de4b17..b676310ce9ac 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1050,7 +1050,8 @@ static const char * const amdgpu_vram_names[] = {
"DDR5",
"LPDDR4",
"LPDDR5",
- "HBM3E"
+ "HBM3E",
+ "HBM4"
};
/**
@@ -1080,10 +1081,10 @@ int amdgpu_bo_init(struct amdgpu_device *adev)
adev->gmc.aper_size);
}
- DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n",
+ drm_info(adev_to_drm(adev), "Detected VRAM RAM=%lluM, BAR=%lluM\n",
adev->gmc.mc_vram_size >> 20,
(unsigned long long)adev->gmc.aper_size >> 20);
- DRM_INFO("RAM width %dbits %s\n",
+ drm_info(adev_to_drm(adev), "RAM width %dbits %s\n",
adev->gmc.vram_width, amdgpu_vram_names[adev->gmc.vram_type]);
return amdgpu_ttm_init(adev);
}
@@ -1125,6 +1126,10 @@ int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags)
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct amdgpu_bo_user *ubo;
+ /* MMIO_REMAP is BAR I/O space; tiling should never be used here. */
+ WARN_ON_ONCE(bo->tbo.resource &&
+ bo->tbo.resource->mem_type == AMDGPU_PL_MMIO_REMAP);
+
BUG_ON(bo->tbo.type == ttm_bo_type_kernel);
if (adev->family <= AMDGPU_FAMILY_CZ &&
AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT) > 6)
@@ -1147,6 +1152,13 @@ void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags)
{
struct amdgpu_bo_user *ubo;
+ /*
+ * MMIO_REMAP BOs are not real VRAM/GTT memory but a fixed BAR I/O window.
+ * They should never go through GEM tiling helpers.
+ */
+ WARN_ON_ONCE(bo->tbo.resource &&
+ bo->tbo.resource->mem_type == AMDGPU_PL_MMIO_REMAP);
+
BUG_ON(bo->tbo.type == ttm_bo_type_kernel);
dma_resv_assert_held(bo->tbo.base.resv);
ubo = to_amdgpu_bo_user(bo);
@@ -1321,8 +1333,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
if (r)
goto out;
- r = amdgpu_fill_buffer(abo, 0, &bo->base._resv, &fence, true,
- AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
+ r = amdgpu_fill_buffer(&adev->mman.clear_entity, abo, 0, &bo->base._resv,
+ &fence, AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
if (WARN_ON(r))
goto out;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 0b10497d487c..b0540b009e84 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -39,6 +39,8 @@
#include "psp_v13_0.h"
#include "psp_v13_0_4.h"
#include "psp_v14_0.h"
+#include "psp_v15_0.h"
+#include "psp_v15_0_8.h"
#include "amdgpu_ras.h"
#include "amdgpu_securedisplay.h"
@@ -259,6 +261,13 @@ static int psp_early_init(struct amdgpu_ip_block *ip_block)
psp_v14_0_set_psp_funcs(psp);
psp->boot_time_tmr = false;
break;
+ case IP_VERSION(15, 0, 0):
+ psp_v15_0_0_set_psp_funcs(psp);
+ psp->boot_time_tmr = false;
+ break;
+ case IP_VERSION(15, 0, 8):
+ psp_v15_0_8_set_psp_funcs(psp);
+ break;
default:
return -EINVAL;
}
@@ -893,18 +902,12 @@ static int psp_tmr_init(struct psp_context *psp)
static bool psp_skip_tmr(struct psp_context *psp)
{
- switch (amdgpu_ip_version(psp->adev, MP0_HWIP, 0)) {
- case IP_VERSION(11, 0, 9):
- case IP_VERSION(11, 0, 7):
- case IP_VERSION(13, 0, 2):
- case IP_VERSION(13, 0, 6):
- case IP_VERSION(13, 0, 10):
- case IP_VERSION(13, 0, 12):
- case IP_VERSION(13, 0, 14):
- return true;
- default:
- return false;
- }
+ u32 ip_version = amdgpu_ip_version(psp->adev, MP0_HWIP, 0);
+
+ if (amdgpu_sriov_vf(psp->adev))
+ return (ip_version >= IP_VERSION(11, 0, 7)) ? true : false;
+ else
+ return (!psp->boot_time_tmr || !psp->autoload_supported) ? false : true;
}
static int psp_tmr_load(struct psp_context *psp)
@@ -912,10 +915,7 @@ static int psp_tmr_load(struct psp_context *psp)
int ret;
struct psp_gfx_cmd_resp *cmd;
- /* For Navi12 and CHIP_SIENNA_CICHLID SRIOV, do not set up TMR.
- * Already set up by host driver.
- */
- if (amdgpu_sriov_vf(psp->adev) && psp_skip_tmr(psp))
+ if (psp_skip_tmr(psp))
return 0;
cmd = acquire_psp_cmd_buf(psp);
@@ -947,10 +947,7 @@ static int psp_tmr_unload(struct psp_context *psp)
int ret;
struct psp_gfx_cmd_resp *cmd;
- /* skip TMR unload for Navi12 and CHIP_SIENNA_CICHLID SRIOV,
- * as TMR is not loaded at all
- */
- if (amdgpu_sriov_vf(psp->adev) && psp_skip_tmr(psp))
+ if (psp_skip_tmr(psp))
return 0;
cmd = acquire_psp_cmd_buf(psp);
@@ -1995,6 +1992,7 @@ int psp_ras_initialize(struct psp_context *psp)
ras_cmd->ras_in_message.init_flags.nps_mode =
adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
ras_cmd->ras_in_message.init_flags.active_umc_mask = adev->umc.active_mask;
+ ras_cmd->ras_in_message.init_flags.vram_type = (uint8_t)adev->gmc.vram_type;
ret = psp_ta_load(psp, &psp->ras_context.context);
@@ -2620,18 +2618,16 @@ skip_pin_bo:
return ret;
}
- if (!psp->boot_time_tmr || !psp->autoload_supported) {
- ret = psp_tmr_load(psp);
- if (ret) {
- dev_err(adev->dev, "PSP load tmr failed!\n");
- return ret;
- }
+ ret = psp_tmr_load(psp);
+ if (ret) {
+ dev_err(adev->dev, "PSP load tmr failed!\n");
+ return ret;
}
return 0;
}
-static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,
+int amdgpu_psp_get_fw_type(struct amdgpu_firmware_info *ucode,
enum psp_gfx_fw_type *type)
{
switch (ucode->ucode_id) {
@@ -2719,6 +2715,12 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,
case AMDGPU_UCODE_ID_RLC_DRAM:
*type = GFX_FW_TYPE_RLC_DRAM_BOOT;
break;
+ case AMDGPU_UCODE_ID_RLC_IRAM_1:
+ *type = GFX_FW_TYPE_RLX6_UCODE_CORE1;
+ break;
+ case AMDGPU_UCODE_ID_RLC_DRAM_1:
+ *type = GFX_FW_TYPE_RLX6_DRAM_BOOT_CORE1;
+ break;
case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS:
*type = GFX_FW_TYPE_GLOBAL_TAP_DELAYS;
break;
@@ -2887,6 +2889,8 @@ static void psp_print_fw_hdr(struct psp_context *psp,
amdgpu_ucode_print_gfx_hdr(hdr);
break;
case AMDGPU_UCODE_ID_RLC_G:
+ case AMDGPU_UCODE_ID_RLC_DRAM_1:
+ case AMDGPU_UCODE_ID_RLC_IRAM_1:
hdr = (struct common_firmware_header *)adev->gfx.rlc_fw->data;
amdgpu_ucode_print_rlc_hdr(hdr);
break;
@@ -2911,10 +2915,9 @@ static int psp_prep_load_ip_fw_cmd_buf(struct psp_context *psp,
cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(fw_mem_mc_addr);
cmd->cmd.cmd_load_ip_fw.fw_size = ucode->ucode_size;
- ret = psp_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type);
+ ret = psp_get_fw_type(psp, ucode, &cmd->cmd.cmd_load_ip_fw.fw_type);
if (ret)
- dev_err(psp->adev->dev, "Unknown firmware type\n");
-
+ dev_err(psp->adev->dev, "Unknown firmware type %d\n", ucode->ucode_id);
return ret;
}
@@ -3077,7 +3080,11 @@ static int psp_load_non_psp_fw(struct psp_context *psp)
amdgpu_ip_version(adev, MP0_HWIP, 0) ==
IP_VERSION(11, 0, 11) ||
amdgpu_ip_version(adev, MP0_HWIP, 0) ==
- IP_VERSION(11, 0, 12)) &&
+ IP_VERSION(11, 0, 12) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) ==
+ IP_VERSION(15, 0, 0) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) ==
+ IP_VERSION(15, 0, 8)) &&
(ucode->ucode_id == AMDGPU_UCODE_ID_SDMA1 ||
ucode->ucode_id == AMDGPU_UCODE_ID_SDMA2 ||
ucode->ucode_id == AMDGPU_UCODE_ID_SDMA3))
@@ -4531,3 +4538,19 @@ const struct amdgpu_ip_block_version psp_v14_0_ip_block = {
.rev = 0,
.funcs = &psp_ip_funcs,
};
+
+const struct amdgpu_ip_block_version psp_v15_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_PSP,
+ .major = 15,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &psp_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version psp_v15_0_8_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_PSP,
+ .major = 15,
+ .minor = 0,
+ .rev = 8,
+ .funcs = &psp_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 237b624aa51c..79a49cba8d40 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -172,6 +172,8 @@ struct psp_funcs {
bool (*is_reload_needed)(struct psp_context *psp);
int (*reg_program_no_ring)(struct psp_context *psp, uint32_t val,
enum psp_reg_prog_id id);
+ int (*get_fw_type)(struct amdgpu_firmware_info *ucode,
+ enum psp_gfx_fw_type *type);
};
struct ta_funcs {
@@ -524,6 +526,10 @@ struct amdgpu_psp_funcs {
((psp)->funcs->reg_program_no_ring ? \
(psp)->funcs->reg_program_no_ring((psp), val, id) : -EINVAL)
+#define psp_get_fw_type(psp, ucode, type) \
+ ((psp)->funcs->get_fw_type ? \
+ (psp)->funcs->get_fw_type(ucode, type):amdgpu_psp_get_fw_type(ucode, type))
+
extern const struct amd_ip_funcs psp_ip_funcs;
extern const struct amdgpu_ip_block_version psp_v3_1_ip_block;
@@ -534,6 +540,8 @@ extern const struct amdgpu_ip_block_version psp_v12_0_ip_block;
extern const struct amdgpu_ip_block_version psp_v13_0_ip_block;
extern const struct amdgpu_ip_block_version psp_v13_0_4_ip_block;
extern const struct amdgpu_ip_block_version psp_v14_0_ip_block;
+extern const struct amdgpu_ip_block_version psp_v15_0_ip_block;
+extern const struct amdgpu_ip_block_version psp_v15_0_8_ip_block;
int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
uint32_t field_val, uint32_t mask, uint32_t flags);
@@ -621,6 +629,8 @@ bool amdgpu_psp_tos_reload_needed(struct amdgpu_device *adev);
int amdgpu_psp_reg_program_no_ring(struct psp_context *psp, uint32_t val,
enum psp_reg_prog_id id);
void amdgpu_psp_debugfs_init(struct amdgpu_device *adev);
+int amdgpu_psp_get_fw_type(struct amdgpu_firmware_info *ucode,
+ enum psp_gfx_fw_type *type);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 2a6cf7963dde..c91529c778ba 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -237,8 +237,13 @@ static int amdgpu_check_address_validity(struct amdgpu_device *adev,
(address >= RAS_UMC_INJECT_ADDR_LIMIT))
return -EFAULT;
- count = amdgpu_umc_lookup_bad_pages_in_a_row(adev,
+ if (amdgpu_uniras_enabled(adev))
+ count = amdgpu_ras_mgr_lookup_bad_pages_in_a_row(adev, address,
+ page_pfns, ARRAY_SIZE(page_pfns));
+ else
+ count = amdgpu_umc_lookup_bad_pages_in_a_row(adev,
address, page_pfns, ARRAY_SIZE(page_pfns));
+
if (count <= 0)
return -EPERM;
@@ -1917,8 +1922,6 @@ static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f,
for (i = 0; i < bps_count; i++) {
address = ((uint64_t)bps[i].bp) << AMDGPU_GPU_PAGE_SHIFT;
- if (amdgpu_ras_check_critical_address(adev, address))
- continue;
bps[i].size = AMDGPU_GPU_PAGE_SIZE;
@@ -1931,6 +1934,10 @@ static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f,
else
bps[i].flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED;
+ if ((bps[i].flags != AMDGPU_RAS_RETIRE_PAGE_RESERVED) &&
+ amdgpu_ras_check_critical_address(adev, address))
+ bps[i].flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED;
+
s += scnprintf(&buf[s], element_size + 1,
"0x%08x : 0x%08x : %1s\n",
bps[i].bp,
@@ -3076,6 +3083,11 @@ static int __amdgpu_ras_restore_bad_pages(struct amdgpu_device *adev,
struct ras_err_handler_data *data = con->eh_data;
for (j = 0; j < count; j++) {
+ if (!data->space_left &&
+ amdgpu_ras_realloc_eh_data_space(adev, data, 256)) {
+ return -ENOMEM;
+ }
+
if (amdgpu_ras_check_bad_page_unlock(con,
bps[j].retired_page << AMDGPU_GPU_PAGE_SHIFT)) {
data->count++;
@@ -3083,11 +3095,6 @@ static int __amdgpu_ras_restore_bad_pages(struct amdgpu_device *adev,
continue;
}
- if (!data->space_left &&
- amdgpu_ras_realloc_eh_data_space(adev, data, 256)) {
- return -ENOMEM;
- }
-
amdgpu_ras_reserve_page(adev, bps[j].retired_page);
memcpy(&data->bps[data->count], &(bps[j]),
@@ -3249,8 +3256,6 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
/* deal with retire_unit records a time */
ret = __amdgpu_ras_convert_rec_array_from_rom(adev,
&bps[i], &err_data, nps);
- if (ret)
- con->bad_page_num -= adev->umc.retire_unit;
i += (adev->umc.retire_unit - 1);
} else {
break;
@@ -3263,8 +3268,6 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
for (; i < pages; i++) {
ret = __amdgpu_ras_convert_rec_from_rom(adev,
&bps[i], &err_data, nps);
- if (ret)
- con->bad_page_num -= adev->umc.retire_unit;
}
con->eh_data->count_saved = con->eh_data->count;
@@ -4421,10 +4424,10 @@ static int amdgpu_persistent_edc_harvesting(struct amdgpu_device *adev,
return 0;
if (amdgpu_ras_query_error_status(adev, &info) != 0)
- DRM_WARN("RAS init harvest failure");
+ drm_warn(adev_to_drm(adev), "RAS init query failure");
if (amdgpu_ras_reset_error_status(adev, ras_block->block) != 0)
- DRM_WARN("RAS init harvest reset failure");
+ drm_warn(adev_to_drm(adev), "RAS init harvest reset failure");
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index c596b6df2e2d..600e6bb98af7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -76,8 +76,12 @@ unsigned int amdgpu_ring_max_ibs(enum amdgpu_ring_type type)
* @ring: amdgpu_ring structure holding ring information
* @ndw: number of dwords to allocate in the ring buffer
*
- * Allocate @ndw dwords in the ring buffer (all asics).
- * Returns 0 on success, error on failure.
+ * Allocate @ndw dwords in the ring buffer. The number of dwords should be the
+ * sum of all commands written to the ring.
+ *
+ * Returns:
+ * 0 on success, otherwise -ENOMEM if it tries to allocate more than the
+ * maximum dword allowed for one submission.
*/
int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw)
{
@@ -123,7 +127,8 @@ static void amdgpu_ring_alloc_reemit(struct amdgpu_ring *ring, unsigned int ndw)
ring->funcs->begin_use(ring);
}
-/** amdgpu_ring_insert_nop - insert NOP packets
+/**
+ * amdgpu_ring_insert_nop - insert NOP packets
*
* @ring: amdgpu_ring structure holding ring information
* @count: the number of NOP packets to insert
@@ -186,7 +191,7 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
uint32_t count;
if (ring->count_dw < 0)
- DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n");
+ drm_err(adev_to_drm(ring->adev), "writing more dwords to the ring than expected!\n");
/* We pad to match fetch size */
count = ring->funcs->align_mask + 1 -
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 7a27c6c4bb44..87c9df6c2ecf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -62,6 +62,8 @@ enum amdgpu_ring_priority_level {
#define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
#define AMDGPU_FENCE_FLAG_INT (1 << 1)
#define AMDGPU_FENCE_FLAG_TC_WB_ONLY (1 << 2)
+
+/* Ensure the execution in case of preemption or reset */
#define AMDGPU_FENCE_FLAG_EXEC (1 << 3)
#define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched)
@@ -144,10 +146,15 @@ struct amdgpu_fence {
struct amdgpu_ring *ring;
ktime_t start_timestamp;
- /* wptr for the fence for resets */
+ /* wptr for the total submission for resets */
u64 wptr;
/* fence context for resets */
u64 context;
+ /* has this fence been reemitted */
+ unsigned int reemitted;
+ /* wptr for the fence for the submission */
+ u64 fence_wptr_start;
+ u64 fence_wptr_end;
};
extern const struct drm_sched_backend_ops amdgpu_sched_ops;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
index 5aa830a02d80..572a60e1b3cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
@@ -515,6 +515,40 @@ static void amdgpu_gfx_rlc_init_microcode_v2_4(struct amdgpu_device *adev)
}
}
+static void amdgpu_gfx_rlc_init_microcode_v2_5(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_5 *rlc_hdr;
+ struct amdgpu_firmware_info *info;
+
+ rlc_hdr = (const struct rlc_firmware_header_v2_5 *)adev->gfx.rlc_fw->data;
+ adev->gfx.rlc.rlc_1_iram_ucode_size_bytes =
+ le32_to_cpu(rlc_hdr->rlc_1_iram_ucode_size_bytes);
+ adev->gfx.rlc.rlc_1_iram_ucode = (u8 *)rlc_hdr +
+ le32_to_cpu(rlc_hdr->rlc_1_iram_ucode_offset_bytes);
+ adev->gfx.rlc.rlc_1_dram_ucode_size_bytes =
+ le32_to_cpu(rlc_hdr->rlc_1_dram_ucode_size_bytes);
+ adev->gfx.rlc.rlc_1_dram_ucode = (u8 *)rlc_hdr +
+ le32_to_cpu(rlc_hdr->rlc_1_dram_ucode_offset_bytes);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (adev->gfx.rlc.rlc_1_iram_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_IRAM_1];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_IRAM_1;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.rlc_1_iram_ucode_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.rlc_1_dram_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_DRAM_1];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_DRAM_1;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.rlc_1_dram_ucode_size_bytes, PAGE_SIZE);
+ }
+ }
+}
+
int amdgpu_gfx_rlc_init_microcode(struct amdgpu_device *adev,
uint16_t version_major,
uint16_t version_minor)
@@ -545,6 +579,7 @@ int amdgpu_gfx_rlc_init_microcode(struct amdgpu_device *adev,
amdgpu_gfx_rlc_init_microcode_v2_3(adev);
if (version_minor == 4)
amdgpu_gfx_rlc_init_microcode_v2_4(adev);
-
+ if (version_minor == 5)
+ amdgpu_gfx_rlc_init_microcode_v2_5(adev);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
index 2ce310b31942..e535534237a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
@@ -257,7 +257,8 @@ struct amdgpu_rlc_funcs {
void (*stop)(struct amdgpu_device *adev);
void (*reset)(struct amdgpu_device *adev);
void (*start)(struct amdgpu_device *adev);
- void (*update_spm_vmid)(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid);
+ void (*update_spm_vmid)(struct amdgpu_device *adev, int xcc_id,
+ struct amdgpu_ring *ring, unsigned vmid);
bool (*is_rlcg_access_range)(struct amdgpu_device *adev, uint32_t reg);
};
@@ -269,6 +270,15 @@ struct amdgpu_rlcg_reg_access_ctrl {
uint32_t grbm_cntl;
uint32_t grbm_idx;
uint32_t spare_int;
+
+ uint32_t vfi_cmd;
+ uint32_t vfi_stat;
+ uint32_t vfi_addr;
+ uint32_t vfi_data;
+ uint32_t vfi_grbm_cntl;
+ uint32_t vfi_grbm_idx;
+ uint32_t vfi_grbm_cntl_data;
+ uint32_t vfi_grbm_idx_data;
};
struct amdgpu_rlc {
@@ -310,6 +320,8 @@ struct amdgpu_rlc {
u32 save_restore_list_srm_size_bytes;
u32 rlc_iram_ucode_size_bytes;
u32 rlc_dram_ucode_size_bytes;
+ u32 rlc_1_iram_ucode_size_bytes;
+ u32 rlc_1_dram_ucode_size_bytes;
u32 rlcp_ucode_size_bytes;
u32 rlcv_ucode_size_bytes;
u32 global_tap_delays_ucode_size_bytes;
@@ -325,6 +337,8 @@ struct amdgpu_rlc {
u8 *save_restore_list_srm;
u8 *rlc_iram_ucode;
u8 *rlc_dram_ucode;
+ u8 *rlc_1_iram_ucode;
+ u8 *rlc_1_dram_ucode;
u8 *rlcp_ucode;
u8 *rlcv_ucode;
u8 *global_tap_delays_ucode;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index 34311f32be4c..2bf365609775 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -50,6 +50,11 @@ enum amdgpu_sdma_irq {
#define NUM_SDMA(x) hweight32(x)
+struct amdgpu_sdma_csa_info {
+ u32 size;
+ u32 alignment;
+};
+
struct amdgpu_sdma_funcs {
int (*stop_kernel_queue)(struct amdgpu_ring *ring);
int (*start_kernel_queue)(struct amdgpu_ring *ring);
@@ -65,7 +70,10 @@ struct amdgpu_sdma_instance {
struct amdgpu_ring ring;
struct amdgpu_ring page;
bool burst_nop;
- uint32_t aid_id;
+ union {
+ uint32_t aid_id;
+ uint32_t xcc_id;
+ };
struct amdgpu_bo *sdma_fw_obj;
uint64_t sdma_fw_gpu_addr;
@@ -123,7 +131,10 @@ struct amdgpu_sdma {
int num_instances;
uint32_t sdma_mask;
- int num_inst_per_aid;
+ union {
+ int num_inst_per_aid;
+ int num_inst_per_xcc;
+ };
uint32_t srbm_soft_reset;
bool has_page_queue;
struct ras_common_if *ras_if;
@@ -133,6 +144,8 @@ struct amdgpu_sdma {
struct list_head reset_callback_list;
bool no_user_submission;
bool disable_uq;
+ void (*get_csa_info)(struct amdgpu_device *adev,
+ struct amdgpu_sdma_csa_info *csa_info);
};
/*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h
index ec9d12f85f39..124b13a68f3f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h
@@ -27,6 +27,7 @@ enum amdgpu_pkg_type {
AMDGPU_PKG_TYPE_APU = 2,
AMDGPU_PKG_TYPE_CEM = 3,
AMDGPU_PKG_TYPE_OAM = 4,
+ AMDGPU_PKG_TYPE_BB = 5,
AMDGPU_PKG_TYPE_UNKNOWN,
};
@@ -44,6 +45,8 @@ struct amdgpu_smuio_funcs {
u32 (*get_socket_id)(struct amdgpu_device *adev);
enum amdgpu_pkg_type (*get_pkg_type)(struct amdgpu_device *adev);
bool (*is_host_gpu_xgmi_supported)(struct amdgpu_device *adev);
+ bool (*is_connected_with_ethernet_switch)(struct amdgpu_device *adev);
+ bool (*is_custom_hbm_supported)(struct amdgpu_device *adev);
u64 (*get_gpu_clock_counter)(struct amdgpu_device *adev);
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 2b931e855abd..cfbcce9c27c5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -162,13 +162,25 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
*placement = abo->placement;
}
+static struct dma_fence *
+amdgpu_ttm_job_submit(struct amdgpu_device *adev, struct amdgpu_job *job, u32 num_dw)
+{
+ struct amdgpu_ring *ring;
+
+ ring = adev->mman.buffer_funcs_ring;
+ amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+ WARN_ON(job->ibs[0].length_dw > num_dw);
+
+ return amdgpu_job_submit(job);
+}
+
/**
* amdgpu_ttm_map_buffer - Map memory into the GART windows
+ * @entity: entity to run the window setup job
* @bo: buffer object to map
* @mem: memory object to map
* @mm_cur: range to map
* @window: which GART window to use
- * @ring: DMA ring to use for the copy
* @tmz: if we should setup a TMZ enabled mapping
* @size: in number of bytes to map, out number of bytes mapped
* @addr: resulting address inside the MC address space
@@ -176,13 +188,14 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
* Setup one of the GART windows to access a specific piece of memory or return
* the physical address for local memory.
*/
-static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
+static int amdgpu_ttm_map_buffer(struct amdgpu_ttm_buffer_entity *entity,
+ struct ttm_buffer_object *bo,
struct ttm_resource *mem,
struct amdgpu_res_cursor *mm_cur,
- unsigned int window, struct amdgpu_ring *ring,
+ unsigned int window,
bool tmz, uint64_t *size, uint64_t *addr)
{
- struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
unsigned int offset, num_pages, num_dw, num_bytes;
uint64_t src_addr, dst_addr;
struct amdgpu_job *job;
@@ -223,7 +236,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
- r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr,
+ r = amdgpu_job_alloc_with_ib(adev, &entity->base,
AMDGPU_FENCE_OWNER_UNDEFINED,
num_dw * 4 + num_bytes,
AMDGPU_IB_POOL_DELAYED, &job,
@@ -239,9 +252,6 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
dst_addr, num_bytes, 0);
- amdgpu_ring_pad_ib(ring, &job->ibs[0]);
- WARN_ON(job->ibs[0].length_dw > num_dw);
-
flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, mem);
if (tmz)
flags |= AMDGPU_PTE_TMZ;
@@ -259,13 +269,14 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
amdgpu_gart_map_vram_range(adev, pa, 0, num_pages, flags, cpu_addr);
}
- dma_fence_put(amdgpu_job_submit(job));
+ dma_fence_put(amdgpu_ttm_job_submit(adev, job, num_dw));
return 0;
}
/**
* amdgpu_ttm_copy_mem_to_mem - Helper function for copy
* @adev: amdgpu device
+ * @entity: entity to run the jobs
* @src: buffer/address where to read from
* @dst: buffer/address where to write to
* @size: number of bytes to copy
@@ -280,13 +291,13 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
*/
__attribute__((nonnull))
static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
+ struct amdgpu_ttm_buffer_entity *entity,
const struct amdgpu_copy_mem *src,
const struct amdgpu_copy_mem *dst,
uint64_t size, bool tmz,
struct dma_resv *resv,
struct dma_fence **f)
{
- struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
struct amdgpu_res_cursor src_mm, dst_mm;
struct dma_fence *fence = NULL;
int r = 0;
@@ -312,13 +323,13 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
cur_size = min3(src_mm.size, dst_mm.size, 256ULL << 20);
/* Map src to window 0 and dst to window 1. */
- r = amdgpu_ttm_map_buffer(src->bo, src->mem, &src_mm,
- 0, ring, tmz, &cur_size, &from);
+ r = amdgpu_ttm_map_buffer(entity, src->bo, src->mem, &src_mm,
+ 0, tmz, &cur_size, &from);
if (r)
goto error;
- r = amdgpu_ttm_map_buffer(dst->bo, dst->mem, &dst_mm,
- 1, ring, tmz, &cur_size, &to);
+ r = amdgpu_ttm_map_buffer(entity, dst->bo, dst->mem, &dst_mm,
+ 1, tmz, &cur_size, &to);
if (r)
goto error;
@@ -345,8 +356,8 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
write_compress_disable));
}
- r = amdgpu_copy_buffer(ring, from, to, cur_size, resv,
- &next, false, true, copy_flags);
+ r = amdgpu_copy_buffer(adev, entity, from, to, cur_size, resv,
+ &next, true, copy_flags);
if (r)
goto error;
@@ -386,7 +397,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
src.offset = 0;
dst.offset = 0;
- r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
+ r = amdgpu_ttm_copy_mem_to_mem(adev,
+ &adev->mman.move_entity,
+ &src, &dst,
new_mem->size,
amdgpu_bo_encrypted(abo),
bo->base.resv, &fence);
@@ -398,8 +411,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
struct dma_fence *wipe_fence = NULL;
- r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
- false, AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
+ r = amdgpu_fill_buffer(&adev->mman.move_entity,
+ abo, 0, NULL, &wipe_fence,
+ AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
if (r) {
goto error;
} else if (wipe_fence) {
@@ -1063,6 +1077,86 @@ static void amdgpu_ttm_backend_destroy(struct ttm_device *bdev,
}
/**
+ * amdgpu_ttm_mmio_remap_alloc_sgt - build an sg_table for MMIO_REMAP I/O aperture
+ * @adev: amdgpu device providing the remap BAR base (adev->rmmio_remap.bus_addr)
+ * @res: TTM resource of the BO to export; expected to live in AMDGPU_PL_MMIO_REMAP
+ * @dev: importing device to map for (typically @attach->dev in dma-buf paths)
+ * @dir: DMA data direction for the importer (passed to dma_map_resource())
+ * @sgt: output; on success, set to a newly allocated sg_table describing the I/O span
+ *
+ * The HDP flush page (AMDGPU_PL_MMIO_REMAP) is a fixed hardware I/O window in a PCI
+ * BAR—there are no struct pages to back it. Importers still need a DMA address list,
+ * so we synthesize a minimal sg_table and populate it from dma_map_resource(), not
+ * from pages. Using the common amdgpu_res_cursor walker keeps the offset/size math
+ * consistent with other TTM/manager users.
+ *
+ * - @res is assumed to be a small, contiguous I/O region (typically a single 4 KiB
+ * page) in AMDGPU_PL_MMIO_REMAP. Callers should validate placement before calling.
+ * - The sg entry is created with sg_set_page(sg, NULL, …) to reflect I/O space.
+ * - The mapping uses DMA_ATTR_SKIP_CPU_SYNC because this is MMIO, not cacheable RAM.
+ * - Peer reachability / p2pdma policy checks must be done by the caller.
+ *
+ * Return:
+ * * 0 on success, with *@sgt set to a valid table that must be freed via
+ * amdgpu_ttm_mmio_remap_free_sgt().
+ * * -ENOMEM if allocation of the sg_table fails.
+ * * -EIO if dma_map_resource() fails.
+ *
+ */
+int amdgpu_ttm_mmio_remap_alloc_sgt(struct amdgpu_device *adev,
+ struct ttm_resource *res,
+ struct device *dev,
+ enum dma_data_direction dir,
+ struct sg_table **sgt)
+{
+ struct amdgpu_res_cursor cur;
+ dma_addr_t dma;
+ resource_size_t phys;
+ struct scatterlist *sg;
+ int r;
+
+ /* Walk the resource once; MMIO_REMAP is expected to be contiguous+small. */
+ amdgpu_res_first(res, 0, res->size, &cur);
+
+ /* Translate byte offset in the remap window into a host physical BAR address. */
+ phys = adev->rmmio_remap.bus_addr + cur.start;
+
+ /* Build a single-entry sg_table mapped as I/O (no struct page backing). */
+ *sgt = kzalloc(sizeof(**sgt), GFP_KERNEL);
+ if (!*sgt)
+ return -ENOMEM;
+ r = sg_alloc_table(*sgt, 1, GFP_KERNEL);
+ if (r) {
+ kfree(*sgt);
+ return r;
+ }
+ sg = (*sgt)->sgl;
+ sg_set_page(sg, NULL, cur.size, 0); /* WHY: I/O space → no pages */
+
+ dma = dma_map_resource(dev, phys, cur.size, dir, DMA_ATTR_SKIP_CPU_SYNC);
+ if (dma_mapping_error(dev, dma)) {
+ sg_free_table(*sgt);
+ kfree(*sgt);
+ return -EIO;
+ }
+ sg_dma_address(sg) = dma;
+ sg_dma_len(sg) = cur.size;
+ return 0;
+}
+
+void amdgpu_ttm_mmio_remap_free_sgt(struct device *dev,
+ enum dma_data_direction dir,
+ struct sg_table *sgt)
+{
+ struct scatterlist *sg = sgt->sgl;
+
+ dma_unmap_resource(dev, sg_dma_address(sg), sg_dma_len(sg),
+ dir, DMA_ATTR_SKIP_CPU_SYNC);
+ sg_free_table(sgt);
+ kfree(sgt);
+}
+
+/**
* amdgpu_ttm_tt_create - Create a ttm_tt object for a given BO
*
* @bo: The buffer object to create a GTT ttm_tt object around
@@ -1478,7 +1572,7 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
memcpy(adev->mman.sdma_access_ptr, buf, len);
num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
- r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr,
+ r = amdgpu_job_alloc_with_ib(adev, &adev->mman.default_entity.base,
AMDGPU_FENCE_OWNER_UNDEFINED,
num_dw * 4, AMDGPU_IB_POOL_DELAYED,
&job,
@@ -1497,10 +1591,7 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr,
PAGE_SIZE, 0);
- amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]);
- WARN_ON(job->ibs[0].length_dw > num_dw);
-
- fence = amdgpu_job_submit(job);
+ fence = amdgpu_ttm_job_submit(adev, job, num_dw);
mutex_unlock(&adev->mman.gtt_window_lock);
if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout))
@@ -1744,7 +1835,13 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)))
reserve_size = max(reserve_size, (uint32_t)280 << 20);
- else if (!reserve_size)
+ else if (!adev->bios &&
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 1, 0)) {
+ if (hweight32(adev->aid_mask) == 1)
+ reserve_size = max(reserve_size, (uint32_t)128 << 20);
+ else
+ reserve_size = max(reserve_size, (uint32_t)144 << 20);
+ } else if (!reserve_size)
reserve_size = DISCOVERY_TMR_OFFSET;
if (mem_train_support) {
@@ -1820,6 +1917,10 @@ static void amdgpu_ttm_pools_fini(struct amdgpu_device *adev)
* PAGE_SIZE is <= AMDGPU_GPU_PAGE_SIZE (4K). The BO is created as a regular
* GEM object (amdgpu_bo_create).
*
+ * The BO is created as a normal GEM object via amdgpu_bo_create(), then
+ * reserved and pinned at the TTM level (ttm_bo_pin()) so it can never be
+ * migrated or evicted. No CPU mapping is established here.
+ *
* Return:
* * 0 on success or intentional skip (feature not present/unsupported)
* * negative errno on allocation failure
@@ -1848,7 +1949,26 @@ static int amdgpu_ttm_mmio_remap_bo_init(struct amdgpu_device *adev)
if (r)
return r;
+ r = amdgpu_bo_reserve(adev->rmmio_remap.bo, true);
+ if (r)
+ goto err_unref;
+
+ /*
+ * MMIO_REMAP is a fixed I/O placement (AMDGPU_PL_MMIO_REMAP).
+ * Use TTM-level pin so the BO cannot be evicted/migrated,
+ * independent of GEM domains. This
+ * enforces the “fixed I/O window”
+ */
+ ttm_bo_pin(&adev->rmmio_remap.bo->tbo);
+
+ amdgpu_bo_unreserve(adev->rmmio_remap.bo);
return 0;
+
+err_unref:
+ if (adev->rmmio_remap.bo)
+ amdgpu_bo_unref(&adev->rmmio_remap.bo);
+ adev->rmmio_remap.bo = NULL;
+ return r;
}
/**
@@ -1860,6 +1980,15 @@ static int amdgpu_ttm_mmio_remap_bo_init(struct amdgpu_device *adev)
*/
static void amdgpu_ttm_mmio_remap_bo_fini(struct amdgpu_device *adev)
{
+ struct amdgpu_bo *bo = adev->rmmio_remap.bo;
+
+ if (!bo)
+ return; /* <-- safest early exit */
+
+ if (!amdgpu_bo_reserve(adev->rmmio_remap.bo, true)) {
+ ttm_bo_unpin(&adev->rmmio_remap.bo->tbo);
+ amdgpu_bo_unreserve(adev->rmmio_remap.bo);
+ }
amdgpu_bo_unref(&adev->rmmio_remap.bo);
adev->rmmio_remap.bo = NULL;
}
@@ -1988,7 +2117,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
DRM_DEBUG_DRIVER("Skipped stolen memory reservation\n");
}
- dev_info(adev->dev, "amdgpu: %uM of VRAM memory ready\n",
+ dev_info(adev->dev, " %uM of VRAM memory ready\n",
(unsigned int)(adev->gmc.real_vram_size / (1024 * 1024)));
/* Compute GTT size, either based on TTM limit
@@ -2014,7 +2143,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
dev_err(adev->dev, "Failed initializing GTT heap.\n");
return r;
}
- dev_info(adev->dev, "amdgpu: %uM of GTT memory ready.\n",
+ dev_info(adev->dev, " %uM of GTT memory ready.\n",
(unsigned int)(gtt_size / (1024 * 1024)));
if (adev->flags & AMD_IS_APU) {
@@ -2077,7 +2206,8 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
AMDGPU_GEM_DOMAIN_GTT,
&adev->mman.sdma_access_bo, NULL,
&adev->mman.sdma_access_ptr))
- DRM_WARN("Debug VRAM access will use slowpath MM access\n");
+ drm_warn(adev_to_drm(adev),
+ "Debug VRAM access will use slowpath MM access\n");
return 0;
}
@@ -2137,7 +2267,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_MMIO_REMAP);
ttm_device_fini(&adev->mman.bdev);
adev->mman.initialized = false;
- dev_info(adev->dev, "amdgpu: ttm finalized\n");
+ dev_info(adev->dev, " ttm finalized\n");
}
/**
@@ -2165,7 +2295,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
ring = adev->mman.buffer_funcs_ring;
sched = &ring->sched;
- r = drm_sched_entity_init(&adev->mman.high_pr,
+ r = drm_sched_entity_init(&adev->mman.default_entity.base,
DRM_SCHED_PRIORITY_KERNEL, &sched,
1, NULL);
if (r) {
@@ -2175,18 +2305,30 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
return;
}
- r = drm_sched_entity_init(&adev->mman.low_pr,
+ r = drm_sched_entity_init(&adev->mman.clear_entity.base,
+ DRM_SCHED_PRIORITY_NORMAL, &sched,
+ 1, NULL);
+ if (r) {
+ dev_err(adev->dev,
+ "Failed setting up TTM BO clear entity (%d)\n",
+ r);
+ goto error_free_entity;
+ }
+
+ r = drm_sched_entity_init(&adev->mman.move_entity.base,
DRM_SCHED_PRIORITY_NORMAL, &sched,
1, NULL);
if (r) {
dev_err(adev->dev,
"Failed setting up TTM BO move entity (%d)\n",
r);
+ drm_sched_entity_destroy(&adev->mman.clear_entity.base);
goto error_free_entity;
}
} else {
- drm_sched_entity_destroy(&adev->mman.high_pr);
- drm_sched_entity_destroy(&adev->mman.low_pr);
+ drm_sched_entity_destroy(&adev->mman.default_entity.base);
+ drm_sched_entity_destroy(&adev->mman.clear_entity.base);
+ drm_sched_entity_destroy(&adev->mman.move_entity.base);
/* Drop all the old fences since re-creating the scheduler entities
* will allocate new contexts.
*/
@@ -2204,24 +2346,20 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
return;
error_free_entity:
- drm_sched_entity_destroy(&adev->mman.high_pr);
+ drm_sched_entity_destroy(&adev->mman.default_entity.base);
}
static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
- bool direct_submit,
+ struct amdgpu_ttm_buffer_entity *entity,
unsigned int num_dw,
struct dma_resv *resv,
bool vm_needs_flush,
struct amdgpu_job **job,
- bool delayed, u64 k_job_id)
+ u64 k_job_id)
{
- enum amdgpu_ib_pool_type pool = direct_submit ?
- AMDGPU_IB_POOL_DIRECT :
- AMDGPU_IB_POOL_DELAYED;
+ enum amdgpu_ib_pool_type pool = AMDGPU_IB_POOL_DELAYED;
int r;
- struct drm_sched_entity *entity = delayed ? &adev->mman.low_pr :
- &adev->mman.high_pr;
- r = amdgpu_job_alloc_with_ib(adev, entity,
+ r = amdgpu_job_alloc_with_ib(adev, &entity->base,
AMDGPU_FENCE_OWNER_UNDEFINED,
num_dw * 4, pool, job, k_job_id);
if (r)
@@ -2240,20 +2378,24 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
DMA_RESV_USAGE_BOOKKEEP);
}
-int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
+int amdgpu_copy_buffer(struct amdgpu_device *adev,
+ struct amdgpu_ttm_buffer_entity *entity,
+ uint64_t src_offset,
uint64_t dst_offset, uint32_t byte_count,
struct dma_resv *resv,
- struct dma_fence **fence, bool direct_submit,
+ struct dma_fence **fence,
bool vm_needs_flush, uint32_t copy_flags)
{
- struct amdgpu_device *adev = ring->adev;
unsigned int num_loops, num_dw;
+ struct amdgpu_ring *ring;
struct amdgpu_job *job;
uint32_t max_bytes;
unsigned int i;
int r;
- if (!direct_submit && !ring->sched.ready) {
+ ring = adev->mman.buffer_funcs_ring;
+
+ if (!ring->sched.ready) {
dev_err(adev->dev,
"Trying to move memory with ring turned off.\n");
return -EINVAL;
@@ -2262,11 +2404,11 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
num_loops = DIV_ROUND_UP(byte_count, max_bytes);
num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
- r = amdgpu_ttm_prepare_job(adev, direct_submit, num_dw,
- resv, vm_needs_flush, &job, false,
+ r = amdgpu_ttm_prepare_job(adev, entity, num_dw,
+ resv, vm_needs_flush, &job,
AMDGPU_KERNEL_JOB_ID_TTM_COPY_BUFFER);
if (r)
- return r;
+ goto error_free;
for (i = 0; i < num_loops; i++) {
uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
@@ -2278,16 +2420,9 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
byte_count -= cur_size_in_bytes;
}
- amdgpu_ring_pad_ib(ring, &job->ibs[0]);
- WARN_ON(job->ibs[0].length_dw > num_dw);
- if (direct_submit)
- r = amdgpu_job_submit_direct(job, ring, fence);
- else
- *fence = amdgpu_job_submit(job);
- if (r)
- goto error_free;
+ *fence = amdgpu_ttm_job_submit(adev, job, num_dw);
- return r;
+ return 0;
error_free:
amdgpu_job_free(job);
@@ -2295,14 +2430,15 @@ error_free:
return r;
}
-static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
+static int amdgpu_ttm_fill_mem(struct amdgpu_device *adev,
+ struct amdgpu_ttm_buffer_entity *entity,
+ uint32_t src_data,
uint64_t dst_addr, uint32_t byte_count,
struct dma_resv *resv,
struct dma_fence **fence,
- bool vm_needs_flush, bool delayed,
+ bool vm_needs_flush,
u64 k_job_id)
{
- struct amdgpu_device *adev = ring->adev;
unsigned int num_loops, num_dw;
struct amdgpu_job *job;
uint32_t max_bytes;
@@ -2312,8 +2448,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes);
num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8);
- r = amdgpu_ttm_prepare_job(adev, false, num_dw, resv, vm_needs_flush,
- &job, delayed, k_job_id);
+ r = amdgpu_ttm_prepare_job(adev, entity, num_dw, resv,
+ vm_needs_flush, &job, k_job_id);
if (r)
return r;
@@ -2327,9 +2463,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
byte_count -= cur_size;
}
- amdgpu_ring_pad_ib(ring, &job->ibs[0]);
- WARN_ON(job->ibs[0].length_dw > num_dw);
- *fence = amdgpu_job_submit(job);
+ *fence = amdgpu_ttm_job_submit(adev, job, num_dw);
return 0;
}
@@ -2349,7 +2483,6 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
struct dma_fence **fence)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
struct amdgpu_res_cursor cursor;
u64 addr;
int r = 0;
@@ -2377,13 +2510,14 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
/* Never clear more than 256MiB at once to avoid timeouts */
size = min(cursor.size, 256ULL << 20);
- r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
- 1, ring, false, &size, &addr);
+ r = amdgpu_ttm_map_buffer(&adev->mman.clear_entity,
+ &bo->tbo, bo->tbo.resource, &cursor,
+ 1, false, &size, &addr);
if (r)
goto err;
- r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
- &next, true, true,
+ r = amdgpu_ttm_fill_mem(adev, &adev->mman.clear_entity, 0, addr, size, resv,
+ &next, true,
AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER);
if (r)
goto err;
@@ -2399,15 +2533,14 @@ err:
return r;
}
-int amdgpu_fill_buffer(struct amdgpu_bo *bo,
- uint32_t src_data,
- struct dma_resv *resv,
- struct dma_fence **f,
- bool delayed,
- u64 k_job_id)
+int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity,
+ struct amdgpu_bo *bo,
+ uint32_t src_data,
+ struct dma_resv *resv,
+ struct dma_fence **f,
+ u64 k_job_id)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
struct dma_fence *fence = NULL;
struct amdgpu_res_cursor dst;
int r;
@@ -2428,13 +2561,14 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
/* Never fill more than 256MiB at once to avoid timeouts */
cur_size = min(dst.size, 256ULL << 20);
- r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &dst,
- 1, ring, false, &cur_size, &to);
+ r = amdgpu_ttm_map_buffer(entity, &bo->tbo, bo->tbo.resource, &dst,
+ 1, false, &cur_size, &to);
if (r)
goto error;
- r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv,
- &next, true, delayed, k_job_id);
+ r = amdgpu_ttm_fill_mem(adev, entity,
+ src_data, to, cur_size, resv,
+ &next, true, k_job_id);
if (r)
goto error;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 577ee04ce0bf..143201ecea3f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -52,6 +52,10 @@ struct amdgpu_gtt_mgr {
spinlock_t lock;
};
+struct amdgpu_ttm_buffer_entity {
+ struct drm_sched_entity base;
+};
+
struct amdgpu_mman {
struct ttm_device bdev;
struct ttm_pool *ttm_pools;
@@ -64,10 +68,10 @@ struct amdgpu_mman {
bool buffer_funcs_enabled;
struct mutex gtt_window_lock;
- /* High priority scheduler entity for buffer moves */
- struct drm_sched_entity high_pr;
- /* Low priority scheduler entity for VRAM clearing */
- struct drm_sched_entity low_pr;
+
+ struct amdgpu_ttm_buffer_entity default_entity;
+ struct amdgpu_ttm_buffer_entity clear_entity;
+ struct amdgpu_ttm_buffer_entity move_entity;
struct amdgpu_vram_mgr vram_mgr;
struct amdgpu_gtt_mgr gtt_mgr;
@@ -137,6 +141,12 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev);
bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *mem);
void amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr);
+int amdgpu_gtt_mgr_alloc_entries(struct amdgpu_gtt_mgr *mgr,
+ struct drm_mm_node *mm_node,
+ u64 num_pages,
+ enum drm_mm_insert_mode mode);
+void amdgpu_gtt_mgr_free_entries(struct amdgpu_gtt_mgr *mgr,
+ struct drm_mm_node *mm_node);
uint64_t amdgpu_preempt_mgr_usage(struct ttm_resource_manager *man);
u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo);
@@ -163,20 +173,22 @@ int amdgpu_ttm_init(struct amdgpu_device *adev);
void amdgpu_ttm_fini(struct amdgpu_device *adev);
void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
bool enable);
-int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
+int amdgpu_copy_buffer(struct amdgpu_device *adev,
+ struct amdgpu_ttm_buffer_entity *entity,
+ uint64_t src_offset,
uint64_t dst_offset, uint32_t byte_count,
struct dma_resv *resv,
- struct dma_fence **fence, bool direct_submit,
+ struct dma_fence **fence,
bool vm_needs_flush, uint32_t copy_flags);
int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
struct dma_resv *resv,
struct dma_fence **fence);
-int amdgpu_fill_buffer(struct amdgpu_bo *bo,
- uint32_t src_data,
- struct dma_resv *resv,
- struct dma_fence **fence,
- bool delayed,
- u64 k_job_id);
+int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity,
+ struct amdgpu_bo *bo,
+ uint32_t src_data,
+ struct dma_resv *resv,
+ struct dma_fence **f,
+ u64 k_job_id);
int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
@@ -213,4 +225,13 @@ int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type);
void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev);
+int amdgpu_ttm_mmio_remap_alloc_sgt(struct amdgpu_device *adev,
+ struct ttm_resource *res,
+ struct device *dev,
+ enum dma_data_direction dir,
+ struct sg_table **sgt);
+void amdgpu_ttm_mmio_remap_free_sgt(struct device *dev,
+ enum dma_data_direction dir,
+ struct sg_table *sgt);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index e96f24e9ad57..1ab61e7b35db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -166,6 +166,8 @@ void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr)
container_of(rlc_hdr_v2_2, struct rlc_firmware_header_v2_3, v2_2);
const struct rlc_firmware_header_v2_4 *rlc_hdr_v2_4 =
container_of(rlc_hdr_v2_3, struct rlc_firmware_header_v2_4, v2_3);
+ const struct rlc_firmware_header_v2_5 *rlc_hdr_v2_5 =
+ container_of(rlc_hdr_v2_2, struct rlc_firmware_header_v2_5, v2_2);
switch (version_minor) {
case 0:
@@ -287,6 +289,26 @@ void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr)
DRM_DEBUG("se3_tap_delays_ucode_offset_bytes: %u\n",
le32_to_cpu(rlc_hdr_v2_4->se3_tap_delays_ucode_offset_bytes));
break;
+ case 5:
+ /* rlc_hdr v2_5 */
+ DRM_INFO("rlc_iram_ucode_size_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_5->v2_2.rlc_iram_ucode_size_bytes));
+ DRM_INFO("rlc_iram_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_5->v2_2.rlc_iram_ucode_offset_bytes));
+ DRM_INFO("rlc_dram_ucode_size_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_5->v2_2.rlc_dram_ucode_size_bytes));
+ DRM_INFO("rlc_dram_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_5->v2_2.rlc_dram_ucode_offset_bytes));
+ /* rlc_hdr v2_5 */
+ DRM_INFO("rlc_1_iram_ucode_size_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_5->rlc_1_iram_ucode_size_bytes));
+ DRM_INFO("rlc_1_iram_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_5->rlc_1_iram_ucode_offset_bytes));
+ DRM_INFO("rlc_1_dram_ucode_size_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_5->rlc_1_dram_ucode_size_bytes));
+ DRM_INFO("rlc_1_dram_ucode_offset_bytes: %u\n",
+ le32_to_cpu(rlc_hdr_v2_5->rlc_1_dram_ucode_offset_bytes));
+ break;
default:
DRM_ERROR("Unknown RLC v2 ucode: v2.%u\n", version_minor);
break;
@@ -631,6 +653,10 @@ const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id)
return "RLC_IRAM";
case AMDGPU_UCODE_ID_RLC_DRAM:
return "RLC_DRAM";
+ case AMDGPU_UCODE_ID_RLC_IRAM_1:
+ return "RLC_IRAM_1";
+ case AMDGPU_UCODE_ID_RLC_DRAM_1:
+ return "RLC_DRAM_1";
case AMDGPU_UCODE_ID_RLC_G:
return "RLC_G";
case AMDGPU_UCODE_ID_RLC_P:
@@ -911,6 +937,14 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
ucode->ucode_size = adev->gfx.rlc.rlc_dram_ucode_size_bytes;
ucode_addr = adev->gfx.rlc.rlc_dram_ucode;
break;
+ case AMDGPU_UCODE_ID_RLC_IRAM_1:
+ ucode->ucode_size = adev->gfx.rlc.rlc_1_iram_ucode_size_bytes;
+ ucode_addr = adev->gfx.rlc.rlc_1_iram_ucode;
+ break;
+ case AMDGPU_UCODE_ID_RLC_DRAM_1:
+ ucode->ucode_size = adev->gfx.rlc.rlc_1_dram_ucode_size_bytes;
+ ucode_addr = adev->gfx.rlc.rlc_1_dram_ucode;
+ break;
case AMDGPU_UCODE_ID_RLC_P:
ucode->ucode_size = adev->gfx.rlc.rlcp_ucode_size_bytes;
ucode_addr = adev->gfx.rlc.rlcp_ucode;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index 6349aad6da35..f316776fe950 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -300,6 +300,15 @@ struct rlc_firmware_header_v2_4 {
uint32_t se3_tap_delays_ucode_offset_bytes;
};
+/* version_major=2, version_minor=5 */
+struct rlc_firmware_header_v2_5 {
+ struct rlc_firmware_header_v2_2 v2_2;
+ uint32_t rlc_1_iram_ucode_size_bytes;
+ uint32_t rlc_1_iram_ucode_offset_bytes;
+ uint32_t rlc_1_dram_ucode_size_bytes;
+ uint32_t rlc_1_dram_ucode_offset_bytes;
+};
+
/* version_major=1, version_minor=0 */
struct sdma_firmware_header_v1_0 {
struct common_firmware_header header;
@@ -449,6 +458,7 @@ union amdgpu_firmware_header {
struct rlc_firmware_header_v2_2 rlc_v2_2;
struct rlc_firmware_header_v2_3 rlc_v2_3;
struct rlc_firmware_header_v2_4 rlc_v2_4;
+ struct rlc_firmware_header_v2_5 rlc_v2_5;
struct sdma_firmware_header_v1_0 sdma;
struct sdma_firmware_header_v1_1 sdma_v1_1;
struct sdma_firmware_header_v2_0 sdma_v2_0;
@@ -512,6 +522,8 @@ enum AMDGPU_UCODE_ID {
AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM,
AMDGPU_UCODE_ID_RLC_IRAM,
AMDGPU_UCODE_ID_RLC_DRAM,
+ AMDGPU_UCODE_ID_RLC_IRAM_1,
+ AMDGPU_UCODE_ID_RLC_DRAM_1,
AMDGPU_UCODE_ID_RLC_P,
AMDGPU_UCODE_ID_RLC_V,
AMDGPU_UCODE_ID_RLC_G,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 9a969175900e..c5dd5815056c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -166,7 +166,8 @@ static int amdgpu_userq_buffer_va_list_add(struct amdgpu_usermode_queue *queue,
return 0;
}
-int amdgpu_userq_input_va_validate(struct amdgpu_usermode_queue *queue,
+int amdgpu_userq_input_va_validate(struct amdgpu_device *adev,
+ struct amdgpu_usermode_queue *queue,
u64 addr, u64 expected_size)
{
struct amdgpu_bo_va_mapping *va_map;
@@ -271,10 +272,9 @@ err:
return r;
}
-static int
-amdgpu_userq_preempt_helper(struct amdgpu_userq_mgr *uq_mgr,
- struct amdgpu_usermode_queue *queue)
+static int amdgpu_userq_preempt_helper(struct amdgpu_usermode_queue *queue)
{
+ struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
struct amdgpu_device *adev = uq_mgr->adev;
const struct amdgpu_userq_funcs *userq_funcs =
adev->userq_funcs[queue->queue_type];
@@ -282,7 +282,7 @@ amdgpu_userq_preempt_helper(struct amdgpu_userq_mgr *uq_mgr,
int r = 0;
if (queue->state == AMDGPU_USERQ_STATE_MAPPED) {
- r = userq_funcs->preempt(uq_mgr, queue);
+ r = userq_funcs->preempt(queue);
if (r) {
queue->state = AMDGPU_USERQ_STATE_HUNG;
found_hung_queue = true;
@@ -297,17 +297,16 @@ amdgpu_userq_preempt_helper(struct amdgpu_userq_mgr *uq_mgr,
return r;
}
-static int
-amdgpu_userq_restore_helper(struct amdgpu_userq_mgr *uq_mgr,
- struct amdgpu_usermode_queue *queue)
+static int amdgpu_userq_restore_helper(struct amdgpu_usermode_queue *queue)
{
+ struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
struct amdgpu_device *adev = uq_mgr->adev;
const struct amdgpu_userq_funcs *userq_funcs =
adev->userq_funcs[queue->queue_type];
int r = 0;
if (queue->state == AMDGPU_USERQ_STATE_PREEMPTED) {
- r = userq_funcs->restore(uq_mgr, queue);
+ r = userq_funcs->restore(queue);
if (r) {
queue->state = AMDGPU_USERQ_STATE_HUNG;
} else {
@@ -318,10 +317,9 @@ amdgpu_userq_restore_helper(struct amdgpu_userq_mgr *uq_mgr,
return r;
}
-static int
-amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr,
- struct amdgpu_usermode_queue *queue)
+static int amdgpu_userq_unmap_helper(struct amdgpu_usermode_queue *queue)
{
+ struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
struct amdgpu_device *adev = uq_mgr->adev;
const struct amdgpu_userq_funcs *userq_funcs =
adev->userq_funcs[queue->queue_type];
@@ -330,7 +328,7 @@ amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr,
if ((queue->state == AMDGPU_USERQ_STATE_MAPPED) ||
(queue->state == AMDGPU_USERQ_STATE_PREEMPTED)) {
- r = userq_funcs->unmap(uq_mgr, queue);
+ r = userq_funcs->unmap(queue);
if (r) {
queue->state = AMDGPU_USERQ_STATE_HUNG;
found_hung_queue = true;
@@ -345,17 +343,16 @@ amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr,
return r;
}
-static int
-amdgpu_userq_map_helper(struct amdgpu_userq_mgr *uq_mgr,
- struct amdgpu_usermode_queue *queue)
+static int amdgpu_userq_map_helper(struct amdgpu_usermode_queue *queue)
{
+ struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
struct amdgpu_device *adev = uq_mgr->adev;
const struct amdgpu_userq_funcs *userq_funcs =
adev->userq_funcs[queue->queue_type];
int r = 0;
if (queue->state == AMDGPU_USERQ_STATE_UNMAPPED) {
- r = userq_funcs->map(uq_mgr, queue);
+ r = userq_funcs->map(queue);
if (r) {
queue->state = AMDGPU_USERQ_STATE_HUNG;
amdgpu_userq_detect_and_reset_queues(uq_mgr);
@@ -367,10 +364,9 @@ amdgpu_userq_map_helper(struct amdgpu_userq_mgr *uq_mgr,
return r;
}
-static int
-amdgpu_userq_wait_for_last_fence(struct amdgpu_userq_mgr *uq_mgr,
- struct amdgpu_usermode_queue *queue)
+static int amdgpu_userq_wait_for_last_fence(struct amdgpu_usermode_queue *queue)
{
+ struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
struct dma_fence *f = queue->last_fence;
int ret = 0;
@@ -387,11 +383,10 @@ amdgpu_userq_wait_for_last_fence(struct amdgpu_userq_mgr *uq_mgr,
return ret;
}
-static void
-amdgpu_userq_cleanup(struct amdgpu_userq_mgr *uq_mgr,
- struct amdgpu_usermode_queue *queue,
- int queue_id)
+static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue,
+ int queue_id)
{
+ struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
struct amdgpu_device *adev = uq_mgr->adev;
const struct amdgpu_userq_funcs *uq_funcs = adev->userq_funcs[queue->queue_type];
@@ -400,10 +395,10 @@ amdgpu_userq_cleanup(struct amdgpu_userq_mgr *uq_mgr,
/* Drop the userq reference. */
amdgpu_userq_buffer_vas_list_cleanup(adev, queue);
- uq_funcs->mqd_destroy(uq_mgr, queue);
+ uq_funcs->mqd_destroy(queue);
amdgpu_userq_fence_driver_free(queue);
/* Use interrupt-safe locking since IRQ handlers may access these XArrays */
- xa_erase_irq(&uq_mgr->userq_mgr_xa, (unsigned long)queue_id);
+ xa_erase_irq(&uq_mgr->userq_xa, (unsigned long)queue_id);
xa_erase_irq(&adev->userq_doorbell_xa, queue->doorbell_index);
queue->userq_mgr = NULL;
list_del(&queue->userq_va_list);
@@ -415,7 +410,7 @@ amdgpu_userq_cleanup(struct amdgpu_userq_mgr *uq_mgr,
static struct amdgpu_usermode_queue *
amdgpu_userq_find(struct amdgpu_userq_mgr *uq_mgr, int qid)
{
- return xa_load(&uq_mgr->userq_mgr_xa, qid);
+ return xa_load(&uq_mgr->userq_xa, qid);
}
void
@@ -584,25 +579,33 @@ amdgpu_userq_destroy(struct drm_file *filp, int queue_id)
mutex_unlock(&uq_mgr->userq_mutex);
return -EINVAL;
}
- amdgpu_userq_wait_for_last_fence(uq_mgr, queue);
+ amdgpu_userq_wait_for_last_fence(queue);
r = amdgpu_bo_reserve(queue->db_obj.obj, true);
if (!r) {
amdgpu_bo_unpin(queue->db_obj.obj);
amdgpu_bo_unreserve(queue->db_obj.obj);
}
amdgpu_bo_unref(&queue->db_obj.obj);
+
+ r = amdgpu_bo_reserve(queue->wptr_obj.obj, true);
+ if (!r) {
+ amdgpu_bo_unpin(queue->wptr_obj.obj);
+ amdgpu_bo_unreserve(queue->wptr_obj.obj);
+ }
+ amdgpu_bo_unref(&queue->wptr_obj.obj);
+
atomic_dec(&uq_mgr->userq_count[queue->queue_type]);
#if defined(CONFIG_DEBUG_FS)
debugfs_remove_recursive(queue->debugfs_queue);
#endif
amdgpu_userq_detect_and_reset_queues(uq_mgr);
- r = amdgpu_userq_unmap_helper(uq_mgr, queue);
+ r = amdgpu_userq_unmap_helper(queue);
/*TODO: It requires a reset for userq hw unmap error*/
if (unlikely(r != AMDGPU_USERQ_STATE_UNMAPPED)) {
drm_warn(adev_to_drm(uq_mgr->adev), "trying to destroy a HW mapping userq\n");
queue->state = AMDGPU_USERQ_STATE_HUNG;
}
- amdgpu_userq_cleanup(uq_mgr, queue, queue_id);
+ amdgpu_userq_cleanup(queue, queue_id);
mutex_unlock(&uq_mgr->userq_mutex);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
@@ -729,10 +732,11 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
db_info.db_obj = &queue->db_obj;
db_info.doorbell_offset = args->in.doorbell_offset;
+ queue->userq_mgr = uq_mgr;
/* Validate the userq virtual address.*/
- if (amdgpu_userq_input_va_validate(queue, args->in.queue_va, args->in.queue_size) ||
- amdgpu_userq_input_va_validate(queue, args->in.rptr_va, AMDGPU_GPU_PAGE_SIZE) ||
- amdgpu_userq_input_va_validate(queue, args->in.wptr_va, AMDGPU_GPU_PAGE_SIZE)) {
+ if (amdgpu_userq_input_va_validate(adev, queue, args->in.queue_va, args->in.queue_size) ||
+ amdgpu_userq_input_va_validate(adev, queue, args->in.rptr_va, AMDGPU_GPU_PAGE_SIZE) ||
+ amdgpu_userq_input_va_validate(adev, queue, args->in.wptr_va, AMDGPU_GPU_PAGE_SIZE)) {
r = -EINVAL;
kfree(queue);
goto unlock;
@@ -755,7 +759,7 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
goto unlock;
}
- r = uq_funcs->mqd_create(uq_mgr, &args->in, queue);
+ r = uq_funcs->mqd_create(queue, &args->in);
if (r) {
drm_file_err(uq_mgr->file, "Failed to create Queue\n");
amdgpu_userq_fence_driver_free(queue);
@@ -772,18 +776,18 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
goto unlock;
}
- r = xa_alloc(&uq_mgr->userq_mgr_xa, &qid, queue, XA_LIMIT(1, AMDGPU_MAX_USERQ_COUNT), GFP_KERNEL);
+ r = xa_alloc(&uq_mgr->userq_xa, &qid, queue,
+ XA_LIMIT(1, AMDGPU_MAX_USERQ_COUNT), GFP_KERNEL);
if (r) {
drm_file_err(uq_mgr->file, "Failed to allocate a queue id\n");
amdgpu_userq_fence_driver_free(queue);
- uq_funcs->mqd_destroy(uq_mgr, queue);
+ uq_funcs->mqd_destroy(queue);
kfree(queue);
r = -ENOMEM;
up_read(&adev->reset_domain->sem);
goto unlock;
}
up_read(&adev->reset_domain->sem);
- queue->userq_mgr = uq_mgr;
/* don't map the queue if scheduling is halted */
if (adev->userq_halt_for_enforce_isolation &&
@@ -793,12 +797,12 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
else
skip_map_queue = false;
if (!skip_map_queue) {
- r = amdgpu_userq_map_helper(uq_mgr, queue);
+ r = amdgpu_userq_map_helper(queue);
if (r) {
drm_file_err(uq_mgr->file, "Failed to map Queue\n");
- xa_erase(&uq_mgr->userq_mgr_xa, qid);
+ xa_erase(&uq_mgr->userq_xa, qid);
amdgpu_userq_fence_driver_free(queue);
- uq_funcs->mqd_destroy(uq_mgr, queue);
+ uq_funcs->mqd_destroy(queue);
kfree(queue);
goto unlock;
}
@@ -923,8 +927,7 @@ amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr)
int ret = 0, r;
/* Resume all the queues for this process */
- xa_for_each(&uq_mgr->userq_mgr_xa, queue_id, queue) {
-
+ xa_for_each(&uq_mgr->userq_xa, queue_id, queue) {
if (!amdgpu_userq_buffer_vas_mapped(queue)) {
drm_file_err(uq_mgr->file,
"trying restore queue without va mapping\n");
@@ -932,7 +935,7 @@ amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr)
continue;
}
- r = amdgpu_userq_restore_helper(uq_mgr, queue);
+ r = amdgpu_userq_restore_helper(queue);
if (r)
ret = r;
}
@@ -1167,8 +1170,8 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr)
amdgpu_userq_detect_and_reset_queues(uq_mgr);
/* Try to unmap all the queues in this process ctx */
- xa_for_each(&uq_mgr->userq_mgr_xa, queue_id, queue) {
- r = amdgpu_userq_preempt_helper(uq_mgr, queue);
+ xa_for_each(&uq_mgr->userq_xa, queue_id, queue) {
+ r = amdgpu_userq_preempt_helper(queue);
if (r)
ret = r;
}
@@ -1202,7 +1205,7 @@ amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr)
unsigned long queue_id;
int ret;
- xa_for_each(&uq_mgr->userq_mgr_xa, queue_id, queue) {
+ xa_for_each(&uq_mgr->userq_xa, queue_id, queue) {
struct dma_fence *f = queue->last_fence;
if (!f || dma_fence_is_signaled(f))
@@ -1252,7 +1255,7 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *f
struct amdgpu_device *adev)
{
mutex_init(&userq_mgr->userq_mutex);
- xa_init_flags(&userq_mgr->userq_mgr_xa, XA_FLAGS_ALLOC);
+ xa_init_flags(&userq_mgr->userq_xa, XA_FLAGS_ALLOC);
userq_mgr->adev = adev;
userq_mgr->file = file_priv;
@@ -1269,13 +1272,13 @@ void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr)
mutex_lock(&userq_mgr->userq_mutex);
amdgpu_userq_detect_and_reset_queues(userq_mgr);
- xa_for_each(&userq_mgr->userq_mgr_xa, queue_id, queue) {
- amdgpu_userq_wait_for_last_fence(userq_mgr, queue);
- amdgpu_userq_unmap_helper(userq_mgr, queue);
- amdgpu_userq_cleanup(userq_mgr, queue, queue_id);
+ xa_for_each(&userq_mgr->userq_xa, queue_id, queue) {
+ amdgpu_userq_wait_for_last_fence(queue);
+ amdgpu_userq_unmap_helper(queue);
+ amdgpu_userq_cleanup(queue, queue_id);
}
- xa_destroy(&userq_mgr->userq_mgr_xa);
+ xa_destroy(&userq_mgr->userq_xa);
mutex_unlock(&userq_mgr->userq_mutex);
mutex_destroy(&userq_mgr->userq_mutex);
}
@@ -1297,9 +1300,9 @@ int amdgpu_userq_suspend(struct amdgpu_device *adev)
guard(mutex)(&uqm->userq_mutex);
amdgpu_userq_detect_and_reset_queues(uqm);
if (adev->in_s0ix)
- r = amdgpu_userq_preempt_helper(uqm, queue);
+ r = amdgpu_userq_preempt_helper(queue);
else
- r = amdgpu_userq_unmap_helper(uqm, queue);
+ r = amdgpu_userq_unmap_helper(queue);
if (r)
return r;
}
@@ -1321,9 +1324,9 @@ int amdgpu_userq_resume(struct amdgpu_device *adev)
uqm = queue->userq_mgr;
guard(mutex)(&uqm->userq_mutex);
if (adev->in_s0ix)
- r = amdgpu_userq_restore_helper(uqm, queue);
+ r = amdgpu_userq_restore_helper(queue);
else
- r = amdgpu_userq_map_helper(uqm, queue);
+ r = amdgpu_userq_map_helper(queue);
if (r)
return r;
}
@@ -1355,7 +1358,7 @@ int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
(queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
(queue->xcp_id == idx)) {
amdgpu_userq_detect_and_reset_queues(uqm);
- r = amdgpu_userq_preempt_helper(uqm, queue);
+ r = amdgpu_userq_preempt_helper(queue);
if (r)
ret = r;
}
@@ -1387,9 +1390,9 @@ int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
(queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
(queue->xcp_id == idx)) {
- r = amdgpu_userq_restore_helper(uqm, queue);
- if (r)
- ret = r;
+ r = amdgpu_userq_restore_helper(queue);
+ if (r)
+ ret = r;
}
mutex_unlock(&uqm->userq_mutex);
}
@@ -1439,9 +1442,9 @@ void amdgpu_userq_pre_reset(struct amdgpu_device *adev)
uqm = queue->userq_mgr;
cancel_delayed_work_sync(&uqm->resume_work);
if (queue->state == AMDGPU_USERQ_STATE_MAPPED) {
- amdgpu_userq_wait_for_last_fence(uqm, queue);
+ amdgpu_userq_wait_for_last_fence(queue);
userq_funcs = adev->userq_funcs[queue->queue_type];
- userq_funcs->unmap(uqm, queue);
+ userq_funcs->unmap(queue);
/* just mark all queues as hung at this point.
* if unmap succeeds, we could map again
* in amdgpu_userq_post_reset() if vram is not lost
@@ -1458,18 +1461,16 @@ int amdgpu_userq_post_reset(struct amdgpu_device *adev, bool vram_lost)
* at this point, we should be able to map it again
* and continue if vram is not lost.
*/
- struct amdgpu_userq_mgr *uqm;
struct amdgpu_usermode_queue *queue;
const struct amdgpu_userq_funcs *userq_funcs;
unsigned long queue_id;
int r = 0;
xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
- uqm = queue->userq_mgr;
if (queue->state == AMDGPU_USERQ_STATE_HUNG && !vram_lost) {
userq_funcs = adev->userq_funcs[queue->queue_type];
/* Re-map queue */
- r = userq_funcs->map(uqm, queue);
+ r = userq_funcs->map(queue);
if (r) {
dev_err(adev->dev, "Failed to remap queue %ld\n", queue_id);
continue;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
index c37444427a14..1eaa94f8a291 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
@@ -77,19 +77,13 @@ struct amdgpu_usermode_queue {
};
struct amdgpu_userq_funcs {
- int (*mqd_create)(struct amdgpu_userq_mgr *uq_mgr,
- struct drm_amdgpu_userq_in *args,
- struct amdgpu_usermode_queue *queue);
- void (*mqd_destroy)(struct amdgpu_userq_mgr *uq_mgr,
- struct amdgpu_usermode_queue *uq);
- int (*unmap)(struct amdgpu_userq_mgr *uq_mgr,
- struct amdgpu_usermode_queue *queue);
- int (*map)(struct amdgpu_userq_mgr *uq_mgr,
- struct amdgpu_usermode_queue *queue);
- int (*preempt)(struct amdgpu_userq_mgr *uq_mgr,
- struct amdgpu_usermode_queue *queue);
- int (*restore)(struct amdgpu_userq_mgr *uq_mgr,
- struct amdgpu_usermode_queue *queue);
+ int (*mqd_create)(struct amdgpu_usermode_queue *queue,
+ struct drm_amdgpu_userq_in *args);
+ void (*mqd_destroy)(struct amdgpu_usermode_queue *uq);
+ int (*unmap)(struct amdgpu_usermode_queue *queue);
+ int (*map)(struct amdgpu_usermode_queue *queue);
+ int (*preempt)(struct amdgpu_usermode_queue *queue);
+ int (*restore)(struct amdgpu_usermode_queue *queue);
int (*detect_and_reset)(struct amdgpu_device *adev,
int queue_type);
};
@@ -97,11 +91,11 @@ struct amdgpu_userq_funcs {
/* Usermode queues for gfx */
struct amdgpu_userq_mgr {
/**
- * @userq_mgr_xa: Per-process user queue map (queue ID → queue)
+ * @userq_xa: Per-process user queue map (queue ID → queue)
* Key: queue_id (unique ID within the process's userq manager)
* Value: struct amdgpu_usermode_queue
*/
- struct xarray userq_mgr_xa;
+ struct xarray userq_xa;
struct mutex userq_mutex;
struct amdgpu_device *adev;
struct delayed_work resume_work;
@@ -153,7 +147,8 @@ void amdgpu_userq_reset_work(struct work_struct *work);
void amdgpu_userq_pre_reset(struct amdgpu_device *adev);
int amdgpu_userq_post_reset(struct amdgpu_device *adev, bool vram_lost);
-int amdgpu_userq_input_va_validate(struct amdgpu_usermode_queue *queue,
+int amdgpu_userq_input_va_validate(struct amdgpu_device *adev,
+ struct amdgpu_usermode_queue *queue,
u64 addr, u64 expected_size);
int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping *mapping,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
index eba9fb359047..25f178536469 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -352,6 +352,7 @@ static const struct dma_fence_ops amdgpu_userq_fence_ops = {
/**
* amdgpu_userq_fence_read_wptr - Read the userq wptr value
*
+ * @adev: amdgpu_device pointer
* @queue: user mode queue structure pointer
* @wptr: write pointer value
*
@@ -361,7 +362,8 @@ static const struct dma_fence_ops amdgpu_userq_fence_ops = {
*
* Returns wptr value on success, error on failure.
*/
-static int amdgpu_userq_fence_read_wptr(struct amdgpu_usermode_queue *queue,
+static int amdgpu_userq_fence_read_wptr(struct amdgpu_device *adev,
+ struct amdgpu_usermode_queue *queue,
u64 *wptr)
{
struct amdgpu_bo_va_mapping *mapping;
@@ -455,6 +457,7 @@ amdgpu_userq_fence_driver_force_completion(struct amdgpu_usermode_queue *userq)
int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
{
+ struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_fpriv *fpriv = filp->driver_priv;
struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr;
struct drm_amdgpu_userq_signal *args = data;
@@ -539,13 +542,13 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
}
/* Retrieve the user queue */
- queue = xa_load(&userq_mgr->userq_mgr_xa, args->queue_id);
+ queue = xa_load(&userq_mgr->userq_xa, args->queue_id);
if (!queue) {
r = -ENOENT;
goto put_gobj_write;
}
- r = amdgpu_userq_fence_read_wptr(queue, &wptr);
+ r = amdgpu_userq_fence_read_wptr(adev, queue, &wptr);
if (r)
goto put_gobj_write;
@@ -901,7 +904,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
*/
num_fences = dma_fence_dedup_array(fences, num_fences);
- waitq = xa_load(&userq_mgr->userq_mgr_xa, wait_info->waitq_id);
+ waitq = xa_load(&userq_mgr->userq_xa, wait_info->waitq_id);
if (!waitq) {
r = -EINVAL;
goto free_fences;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 5c38f0d30c87..9d5cca7da1d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -279,7 +279,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
- DRM_INFO("Found UVD firmware Version: %u.%u Family ID: %u\n",
+ drm_info(adev_to_drm(adev), "Found UVD firmware Version: %u.%u Family ID: %u\n",
version_major, version_minor, family_id);
/*
@@ -306,7 +306,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
dec_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
enc_minor = (le32_to_cpu(hdr->ucode_version) >> 24) & 0x3f;
enc_major = (le32_to_cpu(hdr->ucode_version) >> 30) & 0x3;
- DRM_INFO("Found UVD firmware ENC: %u.%u DEC: .%u Family ID: %u\n",
+ drm_info(adev_to_drm(adev), "Found UVD firmware ENC: %u.%u DEC: .%u Family ID: %u\n",
enc_major, enc_minor, dec_minor, family_id);
adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES;
@@ -467,7 +467,8 @@ int amdgpu_uvd_prepare_suspend(struct amdgpu_device *adev)
int amdgpu_uvd_suspend(struct amdgpu_device *adev)
{
if (amdgpu_ras_intr_triggered())
- DRM_WARN("UVD VCPU state may lost due to RAS ERREVENT_ATHUB_INTERRUPT\n");
+ drm_warn(adev_to_drm(adev),
+ "UVD VCPU state may lost due to RAS ERREVENT_ATHUB_INTERRUPT\n");
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 5e0786ea911b..75ae9b429420 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -63,6 +63,7 @@
#define FIRMWARE_VCN4_0_6_1 "amdgpu/vcn_4_0_6_1.bin"
#define FIRMWARE_VCN5_0_0 "amdgpu/vcn_5_0_0.bin"
#define FIRMWARE_VCN5_0_1 "amdgpu/vcn_5_0_1.bin"
+#define FIRMWARE_VCN5_3_0 "amdgpu/vcn_5_3_0.bin"
MODULE_FIRMWARE(FIRMWARE_RAVEN);
MODULE_FIRMWARE(FIRMWARE_PICASSO);
@@ -90,6 +91,7 @@ MODULE_FIRMWARE(FIRMWARE_VCN4_0_6);
MODULE_FIRMWARE(FIRMWARE_VCN4_0_6_1);
MODULE_FIRMWARE(FIRMWARE_VCN5_0_0);
MODULE_FIRMWARE(FIRMWARE_VCN5_0_1);
+MODULE_FIRMWARE(FIRMWARE_VCN5_3_0);
static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
static void amdgpu_vcn_reg_dump_fini(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 47a6ce4fdc74..f8eac92a2b36 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -37,6 +37,7 @@
#include "vi.h"
#include "soc15.h"
#include "nv.h"
+#include "amdgpu_virt_ras_cmd.h"
#define POPULATE_UCODE_INFO(vf2pf_info, ucode, ver) \
do { \
@@ -1337,6 +1338,133 @@ bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev,
return ret;
}
+static u32 amdgpu_virt_rlcg_vfi_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id)
+{
+ uint32_t timeout = 100;
+ uint32_t i;
+
+ struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
+ void *vfi_cmd;
+ void *vfi_stat;
+ void *vfi_addr;
+ void *vfi_data;
+ void *vfi_grbm_cntl;
+ void *vfi_grbm_idx;
+ uint32_t cmd;
+ uint32_t stat;
+ uint32_t addr = offset;
+ uint32_t data;
+ uint32_t grbm_cntl_data;
+ uint32_t grbm_idx_data;
+
+ unsigned long flags;
+ bool is_err = true;
+
+ if (!adev->gfx.rlc.rlcg_reg_access_supported) {
+ dev_err(adev->dev, "VFi interface is not available\n");
+ return 0;
+ }
+
+ if (adev->gfx.xcc_mask && (((1 << xcc_id) & adev->gfx.xcc_mask) == 0)) {
+ dev_err(adev->dev, "VFi invalid XCC, xcc_id=0x%x\n", xcc_id);
+ return 0;
+ }
+
+ if (amdgpu_device_skip_hw_access(adev))
+ return 0;
+
+ reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[xcc_id];
+ vfi_cmd = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->vfi_cmd;
+ vfi_stat = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->vfi_stat;
+ vfi_addr = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->vfi_addr;
+ vfi_data = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->vfi_data;
+ vfi_grbm_cntl = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->vfi_grbm_cntl;
+ vfi_grbm_idx = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->vfi_grbm_idx;
+ grbm_cntl_data = reg_access_ctrl->vfi_grbm_cntl_data;
+ grbm_idx_data = reg_access_ctrl->vfi_grbm_idx_data;
+
+ if (flag == AMDGPU_RLCG_GC_WRITE) {
+ data = v;
+ cmd = AMDGPU_RLCG_VFI_CMD__WR;
+
+ // the GRBM_GFX_CNTL and GRBM_GFX_INDEX are protected by mutex outside this call
+ if (addr == reg_access_ctrl->grbm_cntl) {
+ reg_access_ctrl->vfi_grbm_cntl_data = data;
+ return 0;
+ } else if (addr == reg_access_ctrl->grbm_idx) {
+ reg_access_ctrl->vfi_grbm_idx_data = data;
+ return 0;
+ }
+
+ } else if (flag == AMDGPU_RLCG_GC_READ) {
+ data = 0;
+ cmd = AMDGPU_RLCG_VFI_CMD__RD;
+
+ // the GRBM_GFX_CNTL and GRBM_GFX_INDEX are protected by mutex outside this call
+ if (addr == reg_access_ctrl->grbm_cntl)
+ return grbm_cntl_data;
+ else if (addr == reg_access_ctrl->grbm_idx)
+ return grbm_idx_data;
+
+ } else {
+ dev_err(adev->dev, "VFi invalid access, flag=0x%x\n", flag);
+ return 0;
+ }
+
+ spin_lock_irqsave(&adev->virt.rlcg_reg_lock, flags);
+
+ writel(addr, vfi_addr);
+ writel(data, vfi_data);
+ writel(grbm_cntl_data, vfi_grbm_cntl);
+ writel(grbm_idx_data, vfi_grbm_idx);
+
+ writel(AMDGPU_RLCG_VFI_STAT__BUSY, vfi_stat);
+ writel(cmd, vfi_cmd);
+
+ for (i = 0; i < timeout; i++) {
+ stat = readl(vfi_stat);
+ if (stat != AMDGPU_RLCG_VFI_STAT__BUSY)
+ break;
+ udelay(10);
+ }
+
+ switch (stat) {
+ case AMDGPU_RLCG_VFI_STAT__DONE:
+ is_err = false;
+ if (cmd == AMDGPU_RLCG_VFI_CMD__RD)
+ data = readl(vfi_data);
+ break;
+ case AMDGPU_RLCG_VFI_STAT__BUSY:
+ dev_err(adev->dev, "VFi access timeout\n");
+ break;
+ case AMDGPU_RLCG_VFI_STAT__INV_CMD:
+ dev_err(adev->dev, "VFi invalid command\n");
+ break;
+ case AMDGPU_RLCG_VFI_STAT__INV_ADDR:
+ dev_err(adev->dev, "VFi invalid address\n");
+ break;
+ case AMDGPU_RLCG_VFI_STAT__ERR:
+ dev_err(adev->dev, "VFi unknown error\n");
+ break;
+ default:
+ dev_err(adev->dev, "VFi unknown status code\n");
+ break;
+ }
+
+ spin_unlock_irqrestore(&adev->virt.rlcg_reg_lock, flags);
+
+ if (is_err)
+ dev_err(adev->dev, "VFi: [grbm_cntl=0x%x grbm_idx=0x%x] addr=0x%x (byte addr 0x%x), data=0x%x, cmd=0x%x\n",
+ grbm_cntl_data, grbm_idx_data,
+ addr, addr * 4, data, cmd);
+ else
+ dev_dbg(adev->dev, "VFi: [grbm_cntl=0x%x grbm_idx=0x%x] addr=0x%x (byte addr 0x%x), data=0x%x, cmd=0x%x\n",
+ grbm_cntl_data, grbm_idx_data,
+ addr, addr * 4, data, cmd);
+
+ return data;
+}
+
u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id)
{
struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
@@ -1350,6 +1478,9 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 f
void *spare_int;
unsigned long flags;
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 1, 0))
+ return amdgpu_virt_rlcg_vfi_reg_rw(adev, offset, v, flag, xcc_id);
+
if (!adev->gfx.rlc.rlcg_reg_access_supported) {
dev_err(adev->dev,
"indirect registers access through rlcg is not available\n");
@@ -1533,6 +1664,9 @@ bool amdgpu_virt_get_ras_capability(struct amdgpu_device *adev)
if (adev->virt.ras_en_caps.bits.poison_propogation_mode)
con->poison_supported = true; /* Poison is handled by host */
+ if (adev->virt.ras_en_caps.bits.uniras_supported)
+ amdgpu_virt_ras_set_remote_uniras(adev, true);
+
return true;
}
@@ -1845,3 +1979,28 @@ int amdgpu_virt_check_vf_critical_region(struct amdgpu_device *adev, u64 addr, b
return r;
}
+
+static int req_remote_ras_cmd(struct amdgpu_device *adev,
+ u32 param1, u32 param2, u32 param3)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+
+ if (virt->ops && virt->ops->req_remote_ras_cmd)
+ return virt->ops->req_remote_ras_cmd(adev, param1, param2, param3);
+ return -ENOENT;
+}
+
+int amdgpu_virt_send_remote_ras_cmd(struct amdgpu_device *adev,
+ uint64_t buf, uint32_t buf_len)
+{
+ uint64_t gpa = buf;
+ int ret = -EIO;
+
+ if (down_read_trylock(&adev->reset_domain->sem)) {
+ ret = req_remote_ras_cmd(adev,
+ lower_32_bits(gpa), upper_32_bits(gpa), buf_len);
+ up_read(&adev->reset_domain->sem);
+ }
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 01d5bca2dee1..886fbce0bfd1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -47,6 +47,15 @@
#define AMDGPU_RLCG_SCRATCH1_ADDRESS_MASK 0xFFFFF
#define AMDGPU_RLCG_SCRATCH1_ERROR_MASK 0xF000000
+#define AMDGPU_RLCG_VFI_CMD__WR 0x0
+#define AMDGPU_RLCG_VFI_CMD__RD 0x1
+
+#define AMDGPU_RLCG_VFI_STAT__BUSY 0x0
+#define AMDGPU_RLCG_VFI_STAT__DONE 0x1
+#define AMDGPU_RLCG_VFI_STAT__INV_CMD 0x2
+#define AMDGPU_RLCG_VFI_STAT__INV_ADDR 0x3
+#define AMDGPU_RLCG_VFI_STAT__ERR 0xFF
+
/* all asic after AI use this offset */
#define mmRCC_IOV_FUNC_IDENTIFIER 0xDE5
/* tonga/fiji use this offset */
@@ -105,6 +114,8 @@ struct amdgpu_virt_ops {
int (*req_ras_cper_dump)(struct amdgpu_device *adev, u64 vf_rptr);
int (*req_bad_pages)(struct amdgpu_device *adev);
int (*req_ras_chk_criti)(struct amdgpu_device *adev, u64 addr);
+ int (*req_remote_ras_cmd)(struct amdgpu_device *adev,
+ u32 param1, u32 param2, u32 param3);
};
/*
@@ -483,4 +494,6 @@ bool amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev,
enum amdgpu_ras_block block);
void amdgpu_virt_request_bad_pages(struct amdgpu_device *adev);
int amdgpu_virt_check_vf_critical_region(struct amdgpu_device *adev, u64 addr, bool *hit);
+int amdgpu_virt_send_remote_ras_cmd(struct amdgpu_device *adev,
+ uint64_t buf, uint32_t buf_len);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
index 79bad9cbe2ab..e548dc9708a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
@@ -53,7 +53,9 @@ static enum hrtimer_restart amdgpu_vkms_vblank_simulate(struct hrtimer *timer)
ret_overrun = hrtimer_forward_now(&amdgpu_crtc->vblank_timer,
output->period_ns);
if (ret_overrun != 1)
- DRM_WARN("%s: vblank timer overrun\n", __func__);
+ drm_warn(amdgpu_crtc->base.dev,
+ "%s: vblank timer overrun count: %llu\n",
+ __func__, ret_overrun);
ret = drm_crtc_handle_vblank(crtc);
/* Don't queue timer again when vblank is disabled. */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index c362d4dfb5bb..0eccb31793ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -834,7 +834,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid)
- adev->gfx.rlc.funcs->update_spm_vmid(adev, ring, job->vmid);
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, ring->xcc_id, ring, job->vmid);
if (ring->funcs->emit_gds_switch &&
gds_switch_needed) {
@@ -2362,9 +2362,26 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
unsigned max_bits)
{
unsigned int max_size = 1 << (max_bits - 30);
+ bool sys_5level_pgtable = false;
unsigned int vm_size;
uint64_t tmp;
+#ifdef CONFIG_X86_64
+ /*
+ * Refer to function configure_5level_paging() for details.
+ */
+ sys_5level_pgtable = (native_read_cr4() & X86_CR4_LA57);
+#endif
+
+ /*
+ * If GPU supports 5-level page table, but system uses 4-level page table,
+ * then use 4-level page table on GPU
+ */
+ if (max_level == 4 && !sys_5level_pgtable) {
+ min_vm_size = 256 * 1024;
+ max_level = 3;
+ }
+
/* adjust vm size first */
if (amdgpu_vm_size != -1) {
vm_size = amdgpu_vm_size;
@@ -2407,6 +2424,9 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
tmp = DIV_ROUND_UP(fls64(tmp) - 1, 9) - 1;
adev->vm_manager.num_level = min_t(unsigned int, max_level, tmp);
switch (adev->vm_manager.num_level) {
+ case 4:
+ adev->vm_manager.root_level = AMDGPU_VM_PDB3;
+ break;
case 3:
adev->vm_manager.root_level = AMDGPU_VM_PDB2;
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 15d757c016cb..139642eacdd0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -129,6 +129,7 @@ struct amdgpu_bo_vm;
AMDGPU_PTE_MTYPE_GFX12_SHIFT(mtype))
#define AMDGPU_PTE_DCC (1ULL << 58)
+#define AMDGPU_PTE_BUS_ATOMICS (1ULL << 59)
#define AMDGPU_PTE_IS_PTE (1ULL << 63)
/* PDE Block Fragment Size for gfx v12 */
@@ -185,9 +186,10 @@ struct amdgpu_bo_vm;
#define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1)
/* VMPT level enumerate, and the hiberachy is:
- * PDB2->PDB1->PDB0->PTB
+ * PDB3->PDB2->PDB1->PDB0->PTB
*/
enum amdgpu_vm_level {
+ AMDGPU_VM_PDB3,
AMDGPU_VM_PDB2,
AMDGPU_VM_PDB1,
AMDGPU_VM_PDB0,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index f794fb1cc06e..31a437ce9570 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -51,6 +51,7 @@ static unsigned int amdgpu_vm_pt_level_shift(struct amdgpu_device *adev,
unsigned int level)
{
switch (level) {
+ case AMDGPU_VM_PDB3:
case AMDGPU_VM_PDB2:
case AMDGPU_VM_PDB1:
case AMDGPU_VM_PDB0:
@@ -366,6 +367,7 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_bo *ancestor = &vmbo->bo;
unsigned int entries;
struct amdgpu_bo *bo = &vmbo->bo;
+ uint64_t value = 0, flags = 0;
uint64_t addr;
int r, idx;
@@ -403,7 +405,6 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
addr = 0;
- uint64_t value = 0, flags = 0;
if (adev->asic_type >= CHIP_VEGA10) {
if (level != AMDGPU_VM_PTB) {
/* Handle leaf PDEs as PTEs */
@@ -412,7 +413,7 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
&value, &flags);
} else {
/* Workaround for fault priority problem on GMC9 */
- flags = AMDGPU_PTE_EXECUTABLE;
+ flags = AMDGPU_PTE_EXECUTABLE | adev->gmc.init_pte_flags;
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
index aa78c2ee9e21..fd881388d612 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
@@ -790,7 +790,7 @@ static int vpe_ring_test_ring(struct amdgpu_ring *ring)
ret = amdgpu_ring_alloc(ring, 4);
if (ret) {
- dev_err(adev->dev, "amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, ret);
+ dev_err(adev->dev, "dma failed to lock ring %d (%d).\n", ring->idx, ret);
goto out;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
index 1083db8cea2e..73250ab45f20 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
@@ -540,6 +540,7 @@ static void amdgpu_set_xcp_id(struct amdgpu_device *adev,
case AMDGPU_HW_IP_GFX:
case AMDGPU_RING_TYPE_COMPUTE:
case AMDGPU_RING_TYPE_KIQ:
+ case AMDGPU_RING_TYPE_MES:
ip_blk = AMDGPU_XCP_GFX;
break;
case AMDGPU_RING_TYPE_SDMA:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
index 1928d9e224fc..8058e8f35d41 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
@@ -41,6 +41,9 @@
#define AMDGPU_XCP_OPS_KFD (1 << 0)
+#define XCP_INST_MASK(num_inst, xcp_id) \
+ (num_inst ? GENMASK(num_inst - 1, 0) << (xcp_id * num_inst) : 0)
+
struct amdgpu_fpriv;
enum AMDGPU_XCP_IP_BLOCK {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
index 3cdb1e0eca37..cffb2f805de2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
@@ -201,7 +201,8 @@ union amd_sriov_ras_caps {
uint64_t block_mpio : 1;
uint64_t block_mmsch : 1;
uint64_t poison_propogation_mode : 1;
- uint64_t reserved : 43;
+ uint64_t uniras_supported : 1;
+ uint64_t reserved : 42;
} bits;
uint64_t all;
};
diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
index f9e2edf5260b..d9842aa25283 100644
--- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
+++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
@@ -31,9 +31,6 @@
#include "sdma_v4_4_2.h"
#include "amdgpu_ip.h"
-#define XCP_INST_MASK(num_inst, xcp_id) \
- (num_inst ? GENMASK(num_inst - 1, 0) << (xcp_id * num_inst) : 0)
-
void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev)
{
int i;
diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c
index 7a063e44d429..371ee82a8912 100644
--- a/drivers/gpu/drm/amd/amdgpu/atom.c
+++ b/drivers/gpu/drm/amd/amdgpu/atom.c
@@ -1463,7 +1463,7 @@ static void atom_get_vbios_pn(struct atom_context *ctx)
ctx->vbios_pn[count] = 0;
}
- pr_info("ATOM BIOS: %s\n", ctx->vbios_pn);
+ drm_info(ctx->card->dev, "ATOM BIOS: %s\n", ctx->vbios_pn);
}
static void atom_get_vbios_version(struct atom_context *ctx)
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
index a51f3414b65d..34644cab6cff 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
@@ -218,7 +218,7 @@ void amdgpu_atombios_encoder_init_backlight(struct amdgpu_encoder *amdgpu_encode
bd->props.power = BACKLIGHT_POWER_ON;
backlight_update_status(bd);
- DRM_INFO("amdgpu atom DIG backlight initialized\n");
+ drm_info(adev_to_drm(adev), "ATOM DIG backlight initialized\n");
return;
@@ -256,7 +256,7 @@ amdgpu_atombios_encoder_fini_backlight(struct amdgpu_encoder *amdgpu_encoder)
backlight_device_unregister(bd);
kfree(pdata);
- DRM_INFO("amdgpu atom LVDS backlight unloaded\n");
+ drm_info(adev_to_drm(adev), "ATOM LVDS backlight unloaded\n");
}
}
@@ -1724,7 +1724,7 @@ amdgpu_atombios_encoder_dac_detect(struct drm_encoder *encoder,
uint32_t bios_0_scratch;
if (!amdgpu_atombios_encoder_dac_load_detect(encoder, connector)) {
- DRM_DEBUG_KMS("detect returned false \n");
+ DRM_DEBUG_KMS("detect returned false\n");
return connector_status_unknown;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 9cd63b4177bf..c081784a19c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1552,16 +1552,16 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT;
if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) {
if (current_data_rate == 2) {
- DRM_INFO("PCIE gen 3 link speeds already enabled\n");
+ drm_info(adev_to_drm(adev), "PCIE gen 3 link speeds already enabled\n");
return;
}
- DRM_INFO("enabling PCIE gen 3 link speeds, disable with amdgpu.pcie_gen2=0\n");
+ drm_info(adev_to_drm(adev), "enabling PCIE gen 3 link speeds, disable with amdgpu.pcie_gen2=0\n");
} else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) {
if (current_data_rate == 1) {
- DRM_INFO("PCIE gen 2 link speeds already enabled\n");
+ drm_info(adev_to_drm(adev), "PCIE gen 2 link speeds already enabled\n");
return;
}
- DRM_INFO("enabling PCIE gen 2 link speeds, disable with amdgpu.pcie_gen2=0\n");
+ drm_info(adev_to_drm(adev), "enabling PCIE gen 2 link speeds, disable with amdgpu.pcie_gen2=0\n");
}
if (!pci_is_pcie(root) || !pci_is_pcie(adev->pdev))
@@ -1957,10 +1957,6 @@ static uint64_t cik_get_pcie_replay_count(struct amdgpu_device *adev)
return (nak_r + nak_g);
}
-static void cik_pre_asic_init(struct amdgpu_device *adev)
-{
-}
-
static const struct amdgpu_asic_funcs cik_asic_funcs =
{
.read_disabled_bios = &cik_read_disabled_bios,
@@ -1981,7 +1977,6 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
.need_reset_on_init = &cik_need_reset_on_init,
.get_pcie_replay_count = &cik_get_pcie_replay_count,
.supports_baco = &cik_asic_supports_baco,
- .pre_asic_init = &cik_pre_asic_init,
.query_video_codecs = &cik_query_video_codecs,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index d75b9940f248..41bbedb8e157 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -4041,8 +4041,8 @@ static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring)
WREG32(scratch, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r) {
- DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
- ring->idx, r);
+ drm_err(adev_to_drm(adev), "cp failed to lock ring %d (%d).\n",
+ ring->idx, r);
return r;
}
@@ -4090,7 +4090,7 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
if (r) {
- DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ drm_err(adev_to_drm(adev), "failed to get ib (%ld).\n", r);
goto err1;
}
@@ -4170,7 +4170,7 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
}
if (!adev->gfx.cp_fw_write_wait)
- DRM_WARN_ONCE("CP firmware version too old, please update!");
+ drm_warn_once(adev_to_drm(adev), "CP firmware version too old, please update!");
}
static bool gfx_v10_0_navi10_gfxoff_should_enable(struct amdgpu_device *adev)
@@ -4575,6 +4575,7 @@ static const struct amdgpu_gfx_funcs gfx_v10_0_gfx_funcs = {
.select_me_pipe_q = &gfx_v10_0_select_me_pipe_q,
.init_spm_golden = &gfx_v10_0_init_spm_golden_registers,
.update_perfmon_mgcg = &gfx_v10_0_update_perfmon_mgcg,
+ .get_hdp_flush_mask = &amdgpu_gfx_get_hdp_flush_mask,
};
static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev)
@@ -6378,7 +6379,7 @@ static int gfx_v10_0_cp_gfx_start(struct amdgpu_device *adev)
ring = &adev->gfx.gfx_ring[0];
r = amdgpu_ring_alloc(ring, gfx_v10_0_get_csb_size(adev) + 4);
if (r) {
- DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
+ drm_err(adev_to_drm(adev), "cp failed to lock ring (%d).\n", r);
return r;
}
@@ -6428,7 +6429,7 @@ static int gfx_v10_0_cp_gfx_start(struct amdgpu_device *adev)
ring = &adev->gfx.gfx_ring[1];
r = amdgpu_ring_alloc(ring, 2);
if (r) {
- DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
+ drm_err(adev_to_drm(adev), "cp failed to lock ring (%d).\n", r);
return r;
}
@@ -8318,7 +8319,8 @@ static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev,
}
}
-static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid)
+static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, int xcc_id,
+ struct amdgpu_ring *ring, unsigned int vmid)
{
amdgpu_gfx_off_ctrl(adev, false);
@@ -8613,25 +8615,13 @@ static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
u32 ref_and_mask, reg_mem_engine;
- const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
- if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
- switch (ring->me) {
- case 1:
- ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
- break;
- case 2:
- ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
- break;
- default:
- return;
- }
- reg_mem_engine = 0;
- } else {
- ref_and_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe;
- reg_mem_engine = 1; /* pfp */
+ if (!adev->gfx.funcs->get_hdp_flush_mask) {
+ dev_err(adev->dev, "%s: gfx hdp flush is not supported.\n", __func__);
+ return;
}
+ adev->gfx.funcs->get_hdp_flush_mask(ring, &ref_and_mask, &reg_mem_engine);
gfx_v10_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
adev->nbio.funcs->get_hdp_flush_req_offset(adev),
adev->nbio.funcs->get_hdp_flush_done_offset(adev),
@@ -9395,7 +9385,7 @@ static int gfx_v10_0_bad_op_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- DRM_ERROR("Illegal opcode in command stream \n");
+ DRM_ERROR("Illegal opcode in command stream\n");
gfx_v10_0_handle_priv_fault(adev, entry);
return 0;
}
@@ -10124,7 +10114,7 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev,
if (!adev || !cu_info)
return -EINVAL;
- amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
+ amdgpu_gfx_parse_disable_cu(adev, disable_masks, 4, 2);
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 8a2ee2de390f..3a4ca104b161 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -120,6 +120,10 @@ MODULE_FIRMWARE("amdgpu/gc_11_5_3_pfp.bin");
MODULE_FIRMWARE("amdgpu/gc_11_5_3_me.bin");
MODULE_FIRMWARE("amdgpu/gc_11_5_3_mec.bin");
MODULE_FIRMWARE("amdgpu/gc_11_5_3_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_4_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_4_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_4_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_4_rlc.bin");
static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = {
SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS),
@@ -416,7 +420,8 @@ static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) {
- amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
+ amdgpu_mes_unmap_legacy_queue(adev, ring, action,
+ gpu_addr, seq, 0);
return;
}
@@ -566,8 +571,8 @@ static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring)
WREG32(scratch, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 5);
if (r) {
- DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
- ring->idx, r);
+ drm_err(adev_to_drm(adev), "cp failed to lock ring %d (%d).\n",
+ ring->idx, r);
return r;
}
@@ -623,7 +628,7 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
if (r) {
- DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ drm_err(adev_to_drm(adev), "failed to get ib (%ld).\n", r);
goto err1;
}
@@ -917,7 +922,7 @@ static int gfx_v11_0_rlc_init(struct amdgpu_device *adev)
/* init spm vmid with 0xf */
if (adev->gfx.rlc.funcs->update_spm_vmid)
- adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, 0, NULL, 0xf);
return 0;
}
@@ -1052,10 +1057,14 @@ static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev,
static void gfx_v11_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev,
struct amdgpu_gfx_shadow_info *shadow_info)
{
+ /* for gfx */
shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE;
shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT;
shadow_info->csa_size = MQD_FWWORKAREA_SIZE;
shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT;
+ /* for compute */
+ shadow_info->eop_size = GFX11_MEC_HPD_SIZE;
+ shadow_info->eop_alignment = 256;
}
static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev,
@@ -1080,6 +1089,7 @@ static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = {
.select_me_pipe_q = &gfx_v11_0_select_me_pipe_q,
.update_perfmon_mgcg = &gfx_v11_0_update_perf_clk,
.get_gfx_shadow_info = &gfx_v11_0_get_gfx_shadow_info,
+ .get_hdp_flush_mask = &amdgpu_gfx_get_hdp_flush_mask,
};
static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
@@ -1107,6 +1117,7 @@ static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
case IP_VERSION(11, 5, 1):
case IP_VERSION(11, 5, 2):
case IP_VERSION(11, 5, 3):
+ case IP_VERSION(11, 5, 4):
adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
@@ -1589,6 +1600,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
case IP_VERSION(11, 5, 1):
case IP_VERSION(11, 5, 2):
case IP_VERSION(11, 5, 3):
+ case IP_VERSION(11, 5, 4):
adev->gfx.me.num_me = 1;
adev->gfx.me.num_pipe_per_me = 1;
adev->gfx.me.num_queue_per_pipe = 2;
@@ -3046,7 +3058,8 @@ static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 1) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 2) ||
- amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 3))
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 3) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 4))
bootload_status = RREG32_SOC15(GC, 0,
regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1);
else
@@ -3617,7 +3630,7 @@ static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev)
ring = &adev->gfx.gfx_ring[0];
r = amdgpu_ring_alloc(ring, gfx_v11_0_get_csb_size(adev));
if (r) {
- DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
+ drm_err(&adev->ddev, "cp failed to lock ring (%d).\n", r);
return r;
}
@@ -3662,7 +3675,7 @@ static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev)
ring = &adev->gfx.gfx_ring[1];
r = amdgpu_ring_alloc(ring, 2);
if (r) {
- DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
+ drm_err(adev_to_drm(adev), "cp failed to lock ring (%d).\n", r);
return r;
}
@@ -4593,7 +4606,7 @@ static int gfx_v11_0_cp_resume(struct amdgpu_device *adev)
}
if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
- r = amdgpu_mes_kiq_hw_init(adev);
+ r = amdgpu_mes_kiq_hw_init(adev, 0);
else
r = gfx_v11_0_kiq_resume(adev);
if (r)
@@ -4783,7 +4796,7 @@ static int gfx_v11_0_hw_init(struct amdgpu_ip_block *ip_block)
adev->gfx.is_poweron = true;
if(get_gb_addr_config(adev))
- DRM_WARN("Invalid gb_addr_config !\n");
+ drm_warn(adev_to_drm(adev), "Invalid gb_addr_config !\n");
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
adev->gfx.rs64_enable)
@@ -4901,7 +4914,7 @@ static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block)
if (amdgpu_gfx_disable_kcq(adev, 0))
DRM_ERROR("KCQ disable failed\n");
- amdgpu_mes_kiq_hw_fini(adev);
+ amdgpu_mes_kiq_hw_fini(adev, 0);
}
if (amdgpu_sriov_vf(adev))
@@ -5568,7 +5581,8 @@ static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev,
return 0;
}
-static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid)
+static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, int xcc_id,
+ struct amdgpu_ring *ring, unsigned vmid)
{
u32 reg, pre_data, data;
@@ -5633,6 +5647,7 @@ static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable)
case IP_VERSION(11, 5, 1):
case IP_VERSION(11, 5, 2):
case IP_VERSION(11, 5, 3):
+ case IP_VERSION(11, 5, 4):
WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1);
break;
default:
@@ -5671,6 +5686,7 @@ static int gfx_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
case IP_VERSION(11, 5, 1):
case IP_VERSION(11, 5, 2):
case IP_VERSION(11, 5, 3):
+ case IP_VERSION(11, 5, 4):
if (!enable)
amdgpu_gfx_off_ctrl(adev, false);
@@ -5705,6 +5721,7 @@ static int gfx_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
case IP_VERSION(11, 5, 1):
case IP_VERSION(11, 5, 2):
case IP_VERSION(11, 5, 3):
+ case IP_VERSION(11, 5, 4):
gfx_v11_0_update_gfx_clock_gating(adev,
state == AMD_CG_STATE_GATE);
break;
@@ -5831,25 +5848,13 @@ static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
u32 ref_and_mask, reg_mem_engine;
- const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
- if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
- switch (ring->me) {
- case 1:
- ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
- break;
- case 2:
- ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
- break;
- default:
- return;
- }
- reg_mem_engine = 0;
- } else {
- ref_and_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe;
- reg_mem_engine = 1; /* pfp */
+ if (!adev->gfx.funcs->get_hdp_flush_mask) {
+ dev_err(adev->dev, "%s: gfx hdp flush is not supported.\n", __func__);
+ return;
}
+ adev->gfx.funcs->get_hdp_flush_mask(ring, &ref_and_mask, &reg_mem_engine);
gfx_v11_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
adev->nbio.funcs->get_hdp_flush_req_offset(adev),
adev->nbio.funcs->get_hdp_flush_done_offset(adev),
@@ -6664,7 +6669,7 @@ static int gfx_v11_0_bad_op_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- DRM_ERROR("Illegal opcode in command stream \n");
+ DRM_ERROR("Illegal opcode in command stream\n");
gfx_v11_0_handle_priv_fault(adev, entry);
return 0;
}
@@ -6827,7 +6832,7 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring,
amdgpu_ring_reset_helper_begin(ring, timedout_fence);
- r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false);
+ r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false, 0);
if (r) {
dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r);
@@ -6842,7 +6847,7 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring,
return r;
}
- r = amdgpu_mes_map_legacy_queue(adev, ring);
+ r = amdgpu_mes_map_legacy_queue(adev, ring, 0);
if (r) {
dev_err(adev->dev, "failed to remap kgq\n");
return r;
@@ -6990,7 +6995,7 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring,
amdgpu_ring_reset_helper_begin(ring, timedout_fence);
- r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true);
+ r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true, 0);
if (r) {
dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r);
r = gfx_v11_0_reset_compute_pipe(ring);
@@ -7003,7 +7008,7 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring,
dev_err(adev->dev, "fail to init kcq\n");
return r;
}
- r = amdgpu_mes_map_legacy_queue(adev, ring);
+ r = amdgpu_mes_map_legacy_queue(adev, ring, 0);
if (r) {
dev_err(adev->dev, "failed to remap kcq\n");
return r;
@@ -7477,7 +7482,7 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
if (!adev || !cu_info)
return -EINVAL;
- amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2);
+ amdgpu_gfx_parse_disable_cu(adev, disable_masks, 8, 2);
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
index d01d2712cf57..6cd16f016c37 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -356,7 +356,8 @@ static void gfx_v12_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) {
- amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
+ amdgpu_mes_unmap_legacy_queue(adev, ring, action,
+ gpu_addr, seq, 0);
return;
}
@@ -459,8 +460,8 @@ static int gfx_v12_0_ring_test_ring(struct amdgpu_ring *ring)
WREG32(scratch, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 5);
if (r) {
- dev_err(adev->dev,
- "amdgpu: cp failed to lock ring %d (%d).\n",
+ drm_err(adev_to_drm(adev),
+ "cp failed to lock ring %d (%d).\n",
ring->idx, r);
return r;
}
@@ -517,7 +518,7 @@ static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib);
if (r) {
- dev_err(adev->dev, "amdgpu: failed to get ib (%ld).\n", r);
+ drm_err(adev_to_drm(adev), "failed to get ib (%ld).\n", r);
goto err1;
}
@@ -761,7 +762,7 @@ static int gfx_v12_0_rlc_init(struct amdgpu_device *adev)
/* init spm vmid with 0xf */
if (adev->gfx.rlc.funcs->update_spm_vmid)
- adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, 0, NULL, 0xf);
return 0;
}
@@ -909,10 +910,14 @@ static void gfx_v12_0_select_me_pipe_q(struct amdgpu_device *adev,
static void gfx_v12_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev,
struct amdgpu_gfx_shadow_info *shadow_info)
{
+ /* for gfx */
shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE;
shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT;
shadow_info->csa_size = MQD_FWWORKAREA_SIZE;
shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT;
+ /* for compute */
+ shadow_info->eop_size = GFX12_MEC_HPD_SIZE;
+ shadow_info->eop_alignment = 256;
}
static int gfx_v12_0_get_gfx_shadow_info(struct amdgpu_device *adev,
@@ -937,6 +942,7 @@ static const struct amdgpu_gfx_funcs gfx_v12_0_gfx_funcs = {
.select_me_pipe_q = &gfx_v12_0_select_me_pipe_q,
.update_perfmon_mgcg = &gfx_v12_0_update_perf_clk,
.get_gfx_shadow_info = &gfx_v12_0_get_gfx_shadow_info,
+ .get_hdp_flush_mask = &amdgpu_gfx_get_hdp_flush_mask,
};
static int gfx_v12_0_gpu_early_init(struct amdgpu_device *adev)
@@ -3470,7 +3476,7 @@ static int gfx_v12_0_cp_resume(struct amdgpu_device *adev)
}
if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
- r = amdgpu_mes_kiq_hw_init(adev);
+ r = amdgpu_mes_kiq_hw_init(adev, 0);
else
r = gfx_v12_0_kiq_resume(adev);
if (r)
@@ -3651,7 +3657,7 @@ static int gfx_v12_0_hw_init(struct amdgpu_ip_block *ip_block)
adev->gfx.is_poweron = true;
if (get_gb_addr_config(adev))
- DRM_WARN("Invalid gb_addr_config !\n");
+ drm_warn(adev_to_drm(adev), "Invalid gb_addr_config !\n");
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)
gfx_v12_0_config_gfx_rs64(adev);
@@ -3759,7 +3765,7 @@ static int gfx_v12_0_hw_fini(struct amdgpu_ip_block *ip_block)
if (amdgpu_gfx_disable_kcq(adev, 0))
DRM_ERROR("KCQ disable failed\n");
- amdgpu_mes_kiq_hw_fini(adev);
+ amdgpu_mes_kiq_hw_fini(adev, 0);
}
if (amdgpu_sriov_vf(adev)) {
@@ -3956,6 +3962,7 @@ static void gfx_v12_0_update_perf_clk(struct amdgpu_device *adev,
}
static void gfx_v12_0_update_spm_vmid(struct amdgpu_device *adev,
+ int xcc_id,
struct amdgpu_ring *ring,
unsigned vmid)
{
@@ -4387,25 +4394,13 @@ static void gfx_v12_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
u32 ref_and_mask, reg_mem_engine;
- const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
- if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
- switch (ring->me) {
- case 1:
- ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
- break;
- case 2:
- ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
- break;
- default:
- return;
- }
- reg_mem_engine = 0;
- } else {
- ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
- reg_mem_engine = 1; /* pfp */
+ if (!adev->gfx.funcs->get_hdp_flush_mask) {
+ dev_err(adev->dev, "%s: gfx hdp flush is not supported.\n", __func__);
+ return;
}
+ adev->gfx.funcs->get_hdp_flush_mask(ring, &ref_and_mask, &reg_mem_engine);
gfx_v12_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
adev->nbio.funcs->get_hdp_flush_req_offset(adev),
adev->nbio.funcs->get_hdp_flush_done_offset(adev),
@@ -5051,7 +5046,7 @@ static int gfx_v12_0_bad_op_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- DRM_ERROR("Illegal opcode in command stream \n");
+ DRM_ERROR("Illegal opcode in command stream\n");
gfx_v12_0_handle_priv_fault(adev, entry);
return 0;
}
@@ -5312,7 +5307,7 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring,
amdgpu_ring_reset_helper_begin(ring, timedout_fence);
- r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false);
+ r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false, 0);
if (r) {
dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r);
r = gfx_v12_reset_gfx_pipe(ring);
@@ -5326,7 +5321,7 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring,
return r;
}
- r = amdgpu_mes_map_legacy_queue(adev, ring);
+ r = amdgpu_mes_map_legacy_queue(adev, ring, 0);
if (r) {
dev_err(adev->dev, "failed to remap kgq\n");
return r;
@@ -5427,7 +5422,7 @@ static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring,
amdgpu_ring_reset_helper_begin(ring, timedout_fence);
- r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true);
+ r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true, 0);
if (r) {
dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r);
r = gfx_v12_0_reset_compute_pipe(ring);
@@ -5440,7 +5435,7 @@ static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring,
dev_err(adev->dev, "failed to init kcq\n");
return r;
}
- r = amdgpu_mes_map_legacy_queue(adev, ring);
+ r = amdgpu_mes_map_legacy_queue(adev, ring, 0);
if (r) {
dev_err(adev->dev, "failed to remap kcq\n");
return r;
@@ -5733,7 +5728,7 @@ static int gfx_v12_0_get_cu_info(struct amdgpu_device *adev,
if (!adev || !cu_info)
return -EINVAL;
- amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2);
+ amdgpu_gfx_parse_disable_cu(adev, disable_masks, 8, 2);
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
new file mode 100644
index 000000000000..86cc90a66296
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
@@ -0,0 +1,4067 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include "amdgpu.h"
+#include "amdgpu_gfx.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_smu.h"
+#include "amdgpu_atomfirmware.h"
+#include "imu_v12_1.h"
+#include "soc_v1_0.h"
+#include "gfx_v12_1_pkt.h"
+
+#include "gc/gc_12_1_0_offset.h"
+#include "gc/gc_12_1_0_sh_mask.h"
+#include "soc24_enum.h"
+#include "ivsrcid/gfx/irqsrcs_gfx_12_1_0.h"
+
+#include "soc15.h"
+#include "clearstate_gfx12.h"
+#include "v12_structs.h"
+#include "gfx_v12_1.h"
+#include "mes_v12_1.h"
+
+#define GFX12_MEC_HPD_SIZE 2048
+#define NUM_SIMD_PER_CU_GFX12_1 4
+
+#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
+
+#define regCP_HQD_EOP_CONTROL_DEFAULT 0x00000000
+#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000
+#define regCP_MQD_CONTROL_DEFAULT 0x00000100
+#define regCP_HQD_PQ_CONTROL_DEFAULT 0x00308509
+#define regCP_HQD_PQ_RPTR_DEFAULT 0x00000000
+#define regCP_HQD_PERSISTENT_STATE_DEFAULT 0x0ae06301
+#define regCP_HQD_IB_CONTROL_DEFAULT 0x00100000
+
+MODULE_FIRMWARE("amdgpu/gc_12_1_0_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_1_0_rlc.bin");
+
+#define SH_MEM_ALIGNMENT_MODE_UNALIGNED_GFX12_1_0 0x00000001
+#define DEFAULT_SH_MEM_CONFIG \
+ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
+ (SH_MEM_ALIGNMENT_MODE_UNALIGNED_GFX12_1_0 << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
+ (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
+
+static void gfx_v12_1_xcc_disable_gpa_mode(struct amdgpu_device *adev, int xcc_id);
+static void gfx_v12_1_set_ring_funcs(struct amdgpu_device *adev);
+static void gfx_v12_1_set_irq_funcs(struct amdgpu_device *adev);
+static void gfx_v12_1_set_rlc_funcs(struct amdgpu_device *adev);
+static void gfx_v12_1_set_mqd_funcs(struct amdgpu_device *adev);
+static void gfx_v12_1_set_imu_funcs(struct amdgpu_device *adev);
+static int gfx_v12_1_get_cu_info(struct amdgpu_device *adev,
+ struct amdgpu_cu_info *cu_info);
+static uint64_t gfx_v12_1_get_gpu_clock_counter(struct amdgpu_device *adev);
+static void gfx_v12_1_xcc_select_se_sh(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 instance, int xcc_id);
+static void gfx_v12_1_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val);
+static int gfx_v12_1_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
+static void gfx_v12_1_ring_invalidate_tlbs(struct amdgpu_ring *ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint8_t dst_sel);
+static void gfx_v12_1_xcc_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
+static void gfx_v12_1_xcc_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
+static void gfx_v12_1_update_perf_clk(struct amdgpu_device *adev,
+ bool enable);
+static void gfx_v12_1_xcc_update_perf_clk(struct amdgpu_device *adev,
+ bool enable, int xcc_id);
+static int gfx_v12_1_init_cp_compute_microcode_bo(struct amdgpu_device *adev);
+
+static void gfx_v12_1_kiq_set_resources(struct amdgpu_ring *kiq_ring,
+ uint64_t queue_mask)
+{
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
+ amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
+ PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
+ amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
+ amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
+ amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
+ amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
+ amdgpu_ring_write(kiq_ring, 0); /* oac mask */
+ amdgpu_ring_write(kiq_ring, 0);
+}
+
+static void gfx_v12_1_kiq_map_queues(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring)
+{
+ uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
+ uint64_t wptr_addr = ring->wptr_gpu_addr;
+ uint32_t me = 0, eng_sel = 0;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_COMPUTE:
+ me = 1;
+ eng_sel = 0;
+ break;
+ case AMDGPU_RING_TYPE_MES:
+ me = 2;
+ eng_sel = 5;
+ break;
+ default:
+ WARN_ON(1);
+ }
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+ /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+ PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
+ PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
+ PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
+ PACKET3_MAP_QUEUES_ME((me)) |
+ PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
+ PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
+ PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
+ PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
+ amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
+}
+
+static void gfx_v12_1_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ enum amdgpu_unmap_queues_action action,
+ u64 gpu_addr, u64 seq)
+{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) {
+ amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr,
+ seq, kiq_ring->xcc_id);
+ return;
+ }
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_UNMAP_QUEUES_ACTION(action) |
+ PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
+ PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
+ PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
+
+ if (action == PREEMPT_QUEUES_NO_UNMAP) {
+ amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
+ amdgpu_ring_write(kiq_ring, seq);
+ } else {
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ }
+}
+
+static void gfx_v12_1_kiq_query_status(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ u64 addr, u64 seq)
+{
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
+ PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
+ PACKET3_QUERY_STATUS_COMMAND(2));
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
+ PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
+}
+
+static void gfx_v12_1_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
+ uint16_t pasid,
+ uint32_t flush_type,
+ bool all_hub)
+{
+ gfx_v12_1_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
+}
+
+static const struct kiq_pm4_funcs gfx_v12_1_kiq_pm4_funcs = {
+ .kiq_set_resources = gfx_v12_1_kiq_set_resources,
+ .kiq_map_queues = gfx_v12_1_kiq_map_queues,
+ .kiq_unmap_queues = gfx_v12_1_kiq_unmap_queues,
+ .kiq_query_status = gfx_v12_1_kiq_query_status,
+ .kiq_invalidate_tlbs = gfx_v12_1_kiq_invalidate_tlbs,
+ .set_resources_size = 8,
+ .map_queues_size = 7,
+ .unmap_queues_size = 6,
+ .query_status_size = 7,
+ .invalidate_tlbs_size = 2,
+};
+
+static void gfx_v12_1_set_kiq_pm4_funcs(struct amdgpu_device *adev)
+{
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i =0; i < num_xcc; i++)
+ adev->gfx.kiq[i].pmf = &gfx_v12_1_kiq_pm4_funcs;
+}
+
+static void gfx_v12_1_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
+ int mem_space, int opt, uint32_t addr0,
+ uint32_t addr1, uint32_t ref,
+ uint32_t mask, uint32_t inv)
+{
+ if (mem_space == 0) {
+ addr0 = soc_v1_0_normalize_xcc_reg_offset(addr0);
+ addr1 = soc_v1_0_normalize_xcc_reg_offset(addr1);
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+ amdgpu_ring_write(ring,
+ /* memory (1) or register (0) */
+ (WAIT_REG_MEM_MEM_SPACE(mem_space) |
+ WAIT_REG_MEM_OPERATION(opt) | /* wait */
+ WAIT_REG_MEM_FUNCTION(3) | /* equal */
+ WAIT_REG_MEM_ENGINE(eng_sel)));
+
+ if (mem_space)
+ BUG_ON(addr0 & 0x3); /* Dword align */
+ amdgpu_ring_write(ring, addr0);
+ amdgpu_ring_write(ring, addr1);
+ amdgpu_ring_write(ring, ref);
+ amdgpu_ring_write(ring, mask);
+ amdgpu_ring_write(ring, inv); /* poll interval */
+}
+
+static int gfx_v12_1_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t scratch_reg0_offset, xcc_offset;
+ uint32_t tmp = 0;
+ unsigned i;
+ int r;
+
+ /* Use register offset which is local to XCC in the packet */
+ xcc_offset = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
+ scratch_reg0_offset = SOC15_REG_OFFSET(GC, GET_INST(GC, ring->xcc_id), regSCRATCH_REG0);
+ WREG32(scratch_reg0_offset, 0xCAFEDEAD);
+ tmp = RREG32(scratch_reg0_offset);
+
+ r = amdgpu_ring_alloc(ring, 5);
+ if (r) {
+ dev_err(adev->dev,
+ "amdgpu: cp failed to lock ring %d (%d).\n",
+ ring->idx, r);
+ return r;
+ }
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) {
+ gfx_v12_1_ring_emit_wreg(ring, xcc_offset, 0xDEADBEEF);
+ } else {
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
+ amdgpu_ring_write(ring, xcc_offset -
+ PACKET3_SET_UCONFIG_REG_START);
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ }
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(scratch_reg0_offset);
+ if (tmp == 0xDEADBEEF)
+ break;
+ if (amdgpu_emu_mode == 1)
+ msleep(1);
+ else
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+ return r;
+}
+
+static int gfx_v12_1_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+ unsigned index;
+ uint64_t gpu_addr;
+ volatile uint32_t *cpu_ptr;
+ long r;
+
+ /* MES KIQ fw hasn't indirect buffer support for now */
+ if (adev->enable_mes_kiq &&
+ ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+ return 0;
+
+ memset(&ib, 0, sizeof(ib));
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
+ cpu_ptr = &adev->wb.wb[index];
+
+ r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ dev_err(adev->dev, "amdgpu: failed to get ib (%ld).\n", r);
+ goto err1;
+ }
+
+ ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
+ ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
+ ib.ptr[2] = lower_32_bits(gpu_addr);
+ ib.ptr[3] = upper_32_bits(gpu_addr);
+ ib.ptr[4] = 0xDEADBEEF;
+ ib.length_dw = 5;
+
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto err2;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ r = -ETIMEDOUT;
+ goto err2;
+ } else if (r < 0) {
+ goto err2;
+ }
+
+ if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
+ r = 0;
+ else
+ r = -EINVAL;
+err2:
+ amdgpu_ib_free(&ib, NULL);
+ dma_fence_put(f);
+err1:
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+static void gfx_v12_1_free_microcode(struct amdgpu_device *adev)
+{
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+
+ kfree(adev->gfx.rlc.register_list_format);
+}
+
+static int gfx_v12_1_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix)
+{
+ const struct psp_firmware_header_v1_0 *toc_hdr;
+ int err = 0;
+
+ err = amdgpu_ucode_request(adev, &adev->psp.toc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_toc.bin", ucode_prefix);
+ if (err)
+ goto out;
+
+ toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data;
+ adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
+ adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
+ adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes);
+ adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
+ le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
+ return 0;
+out:
+ amdgpu_ucode_release(&adev->psp.toc_fw);
+ return err;
+}
+
+static int gfx_v12_1_init_microcode(struct amdgpu_device *adev)
+{
+ char ucode_prefix[15];
+ int err;
+ const struct rlc_firmware_header_v2_0 *rlc_hdr;
+ uint16_t version_major;
+ uint16_t version_minor;
+
+ DRM_DEBUG("\n");
+
+ amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
+
+ if (!amdgpu_sriov_vf(adev)) {
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_rlc.bin", ucode_prefix);
+ if (err)
+ goto out;
+ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+ version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
+ err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
+ if (err)
+ goto out;
+ }
+
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec.bin", ucode_prefix);
+ if (err)
+ goto out;
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
+ err = gfx_v12_1_init_toc_microcode(adev, ucode_prefix);
+
+ /* only one MEC for gfx 12 */
+ adev->gfx.mec2_fw = NULL;
+
+ if (adev->gfx.imu.funcs) {
+ if (adev->gfx.imu.funcs->init_microcode) {
+ err = adev->gfx.imu.funcs->init_microcode(adev);
+ if (err)
+ dev_err(adev->dev, "Failed to load imu firmware!\n");
+ }
+ }
+
+out:
+ if (err) {
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+ }
+
+ return err;
+}
+
+static u32 gfx_v12_1_get_csb_size(struct amdgpu_device *adev)
+{
+ u32 count = 0;
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+
+ count += 1;
+
+ for (sect = gfx12_cs_data; sect->section != NULL; ++sect) {
+ if (sect->id == SECT_CONTEXT) {
+ for (ext = sect->section; ext->extent != NULL; ++ext)
+ count += 2 + ext->reg_count;
+ } else
+ return 0;
+ }
+
+ return count;
+}
+
+static void gfx_v12_1_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
+{
+ u32 count = 0, clustercount = 0, i;
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+
+ if (adev->gfx.rlc.cs_data == NULL)
+ return;
+ if (buffer == NULL)
+ return;
+
+ count += 1;
+
+ for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
+ if (sect->id == SECT_CONTEXT) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ clustercount++;
+ buffer[count++] = ext->reg_count;
+ buffer[count++] = ext->reg_index;
+
+ for (i = 0; i < ext->reg_count; i++)
+ buffer[count++] = cpu_to_le32(ext->extent[i]);
+ }
+ } else
+ return;
+ }
+
+ buffer[0] = clustercount;
+}
+
+static void gfx_v12_1_rlc_fini(struct amdgpu_device *adev)
+{
+ /* clear state block */
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
+ &adev->gfx.rlc.clear_state_gpu_addr,
+ (void **)&adev->gfx.rlc.cs_ptr);
+
+ /* jump table block */
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
+ &adev->gfx.rlc.cp_table_gpu_addr,
+ (void **)&adev->gfx.rlc.cp_table_ptr);
+}
+
+static void gfx_v12_1_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
+{
+ int xcc_id, num_xcc;
+ struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
+ reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[GET_INST(GC, xcc_id)];
+
+ reg_access_ctrl->grbm_cntl =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regGRBM_GFX_CNTL);
+ reg_access_ctrl->grbm_idx =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regGRBM_GFX_INDEX);
+
+ reg_access_ctrl->vfi_cmd =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_VFI_CMD);
+ reg_access_ctrl->vfi_stat =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_VFI_STAT);
+ reg_access_ctrl->vfi_addr =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_VFI_ADDR);
+ reg_access_ctrl->vfi_data =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_VFI_DATA);
+ reg_access_ctrl->vfi_grbm_cntl =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_VFI_GRBM_GFX_CNTL);
+ reg_access_ctrl->vfi_grbm_idx =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_VFI_GRBM_GFX_INDEX);
+ reg_access_ctrl->vfi_grbm_cntl_data = 0;
+ reg_access_ctrl->vfi_grbm_idx_data = 0;
+ }
+ adev->gfx.rlc.rlcg_reg_access_supported = true;
+}
+
+static int gfx_v12_1_rlc_init(struct amdgpu_device *adev)
+{
+ const struct cs_section_def *cs_data;
+ int r, i, num_xcc;
+
+ adev->gfx.rlc.cs_data = gfx12_cs_data;
+
+ cs_data = adev->gfx.rlc.cs_data;
+
+ if (cs_data) {
+ /* init clear state block */
+ r = amdgpu_gfx_rlc_init_csb(adev);
+ if (r)
+ return r;
+ }
+
+ /* init spm vmid with 0xf */
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ if (adev->gfx.rlc.funcs->update_spm_vmid)
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, i, NULL, 0xf);
+ }
+
+ return 0;
+}
+
+static void gfx_v12_1_mec_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL);
+}
+
+static int gfx_v12_1_mec_init(struct amdgpu_device *adev)
+{
+ int r, i, num_xcc;
+ u32 *hpd;
+ size_t mec_hpd_size;
+
+ bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++)
+ bitmap_zero(adev->gfx.mec_bitmap[i].queue_bitmap,
+ AMDGPU_MAX_COMPUTE_QUEUES);
+
+ /* take ownership of the relevant compute queues */
+ amdgpu_gfx_compute_queue_acquire(adev);
+ mec_hpd_size = adev->gfx.num_compute_rings *
+ GFX12_MEC_HPD_SIZE * num_xcc;
+
+ if (mec_hpd_size) {
+ r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.hpd_eop_obj,
+ &adev->gfx.mec.hpd_eop_gpu_addr,
+ (void **)&hpd);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
+ gfx_v12_1_mec_fini(adev);
+ return r;
+ }
+
+ memset(hpd, 0, mec_hpd_size);
+
+ amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+ }
+
+ return 0;
+}
+
+static uint32_t wave_read_ind(struct amdgpu_device *adev,
+ uint32_t xcc_id, uint32_t wave,
+ uint32_t address)
+{
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_IND_INDEX,
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (address << SQ_IND_INDEX__INDEX__SHIFT));
+ return RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_IND_DATA);
+}
+
+static void wave_read_regs(struct amdgpu_device *adev,
+ uint32_t xcc_id, uint32_t wave,
+ uint32_t thread, uint32_t regno,
+ uint32_t num, uint32_t *out)
+{
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_IND_INDEX,
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (regno << SQ_IND_INDEX__INDEX__SHIFT) |
+ (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) |
+ (SQ_IND_INDEX__AUTO_INCR_MASK));
+ while (num--)
+ *(out++) = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_IND_DATA);
+}
+
+static void gfx_v12_1_read_wave_data(struct amdgpu_device *adev,
+ uint32_t xcc_id,
+ uint32_t simd, uint32_t wave,
+ uint32_t *dst, int *no_fields)
+{
+ /* in gfx12 the SIMD_ID is specified as part of the INSTANCE
+ * field when performing a select_se_sh so it should be
+ * zero here */
+ WARN_ON(simd != 0);
+
+ /* type 4 wave data */
+ dst[(*no_fields)++] = 4;
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_STATUS);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_PC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_PC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_EXEC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_EXEC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_HW_ID1);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_HW_ID2);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_GPR_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_LDS_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_IB_STS);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_IB_STS2);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_IB_DBG1);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_M0);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_MODE);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_STATE_PRIV);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_EXCP_FLAG_PRIV);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_EXCP_FLAG_USER);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_TRAP_CTRL);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_ACTIVE);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_VALID_AND_IDLE);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_DVGPR_ALLOC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_DVGPR_ALLOC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_SCHED_MODE);
+}
+
+static void gfx_v12_1_read_wave_sgprs(struct amdgpu_device *adev,
+ uint32_t xcc_id, uint32_t simd,
+ uint32_t wave, uint32_t start,
+ uint32_t size, uint32_t *dst)
+{
+ WARN_ON(simd != 0);
+
+ wave_read_regs(adev, xcc_id, wave, 0,
+ start + SQIND_WAVE_SGPRS_OFFSET,
+ size, dst);
+}
+
+static void gfx_v12_1_read_wave_vgprs(struct amdgpu_device *adev,
+ uint32_t xcc_id, uint32_t simd,
+ uint32_t wave, uint32_t thread,
+ uint32_t start, uint32_t size,
+ uint32_t *dst)
+{
+ wave_read_regs(adev, xcc_id, wave, thread,
+ start + SQIND_WAVE_VGPRS_OFFSET,
+ size, dst);
+}
+
+static void gfx_v12_1_select_me_pipe_q(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
+{
+ soc_v1_0_grbm_select(adev, me, pipe, q, vm, GET_INST(GC, xcc_id));
+}
+
+static int gfx_v12_1_get_xccs_per_xcp(struct amdgpu_device *adev)
+{
+ /* Fill this in when the interface is ready */
+ return 1;
+}
+
+static int gfx_v12_1_ih_to_xcc_inst(struct amdgpu_device *adev, int ih_node)
+{
+ int logic_xcc;
+ int xcc = (ih_node & 0x7) - 2 + (ih_node >> 3) * 4;
+
+ for (logic_xcc = 0; logic_xcc < NUM_XCC(adev->gfx.xcc_mask); logic_xcc++) {
+ if (xcc == GET_INST(GC, logic_xcc))
+ return logic_xcc;
+ }
+
+ dev_err(adev->dev, "Couldn't find xcc mapping from IH node");
+ return -EINVAL;
+}
+
+static const struct amdgpu_gfx_funcs gfx_v12_1_gfx_funcs = {
+ .get_gpu_clock_counter = &gfx_v12_1_get_gpu_clock_counter,
+ .select_se_sh = &gfx_v12_1_xcc_select_se_sh,
+ .read_wave_data = &gfx_v12_1_read_wave_data,
+ .read_wave_sgprs = &gfx_v12_1_read_wave_sgprs,
+ .read_wave_vgprs = &gfx_v12_1_read_wave_vgprs,
+ .select_me_pipe_q = &gfx_v12_1_select_me_pipe_q,
+ .update_perfmon_mgcg = &gfx_v12_1_update_perf_clk,
+ .get_xccs_per_xcp = &gfx_v12_1_get_xccs_per_xcp,
+ .ih_node_to_logical_xcc = &gfx_v12_1_ih_to_xcc_inst,
+};
+
+static int gfx_v12_1_gpu_early_init(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 1, 0):
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v12_1_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+ int xcc_id, int mec, int pipe, int queue)
+{
+ int r;
+ unsigned irq_type;
+ struct amdgpu_ring *ring;
+ unsigned int hw_prio;
+ uint32_t xcc_doorbell_start;
+
+ ring = &adev->gfx.compute_ring[xcc_id * adev->gfx.num_compute_rings +
+ ring_id];
+
+ /* mec0 is me1 */
+ ring->xcc_id = xcc_id;
+ ring->me = mec + 1;
+ ring->pipe = pipe;
+ ring->queue = queue;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ xcc_doorbell_start = adev->doorbell_index.mec_ring0 +
+ xcc_id * adev->doorbell_index.xcc_doorbell_range;
+ ring->doorbell_index = (xcc_doorbell_start + ring_id) << 1;
+ ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr +
+ (ring_id + xcc_id * adev->gfx.num_compute_rings) *
+ GFX12_MEC_HPD_SIZE;
+ ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
+ sprintf(ring->name, "comp_%d.%d.%d.%d",
+ ring->xcc_id, ring->me, ring->pipe, ring->queue);
+
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+ + ring->pipe;
+ hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
+ AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
+ /* type-2 packets are deprecated on MEC, use type-3 instead */
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+ hw_prio, NULL);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static struct {
+ SOC24_FIRMWARE_ID id;
+ unsigned int offset;
+ unsigned int size;
+ unsigned int size_x16;
+ unsigned int num_inst;
+} rlc_autoload_info[SOC24_FIRMWARE_ID_MAX];
+
+#define RLC_TOC_OFFSET_DWUNIT 8
+#define RLC_SIZE_MULTIPLE 1024
+#define RLC_TOC_UMF_SIZE_inM 23ULL
+#define RLC_TOC_FORMAT_API 165ULL
+
+#define RLC_NUM_INS_CODE0 1
+#define RLC_NUM_INS_CODE1 8
+#define RLC_NUM_INS_CODE2 2
+#define RLC_NUM_INS_CODE3 16
+
+static void gfx_v12_1_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc)
+{
+ RLC_TABLE_OF_CONTENT_V2 *ucode = rlc_toc;
+
+ while (ucode && (ucode->id > SOC24_FIRMWARE_ID_INVALID)) {
+ rlc_autoload_info[ucode->id].id = ucode->id;
+ rlc_autoload_info[ucode->id].offset =
+ ucode->offset * RLC_TOC_OFFSET_DWUNIT * 4;
+ rlc_autoload_info[ucode->id].size =
+ ucode->size_x16 ? ucode->size * RLC_SIZE_MULTIPLE * 4 :
+ ucode->size * 4;
+ switch (ucode->vfflr_image_code) {
+ case 0:
+ rlc_autoload_info[ucode->id].num_inst =
+ RLC_NUM_INS_CODE0;
+ break;
+ case 1:
+ rlc_autoload_info[ucode->id].num_inst =
+ RLC_NUM_INS_CODE1;
+ break;
+ case 2:
+ rlc_autoload_info[ucode->id].num_inst =
+ RLC_NUM_INS_CODE2;
+ break;
+ case 3:
+ rlc_autoload_info[ucode->id].num_inst =
+ RLC_NUM_INS_CODE3;
+ break;
+ default:
+ dev_err(adev->dev,
+ "Invalid Instance number detected\n");
+ break;
+ }
+ ucode++;
+ }
+}
+
+static uint32_t gfx_v12_1_calc_toc_total_size(struct amdgpu_device *adev)
+{
+ uint32_t total_size = 0;
+ SOC24_FIRMWARE_ID id;
+
+ gfx_v12_1_parse_rlc_toc(adev, adev->psp.toc.start_addr);
+
+ for (id = SOC24_FIRMWARE_ID_RLC_G_UCODE; id < SOC24_FIRMWARE_ID_MAX; id++)
+ total_size += rlc_autoload_info[id].size;
+
+ /* In case the offset in rlc toc ucode is aligned */
+ if (total_size < rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].offset)
+ total_size = rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].offset +
+ rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].size;
+ if (total_size < (RLC_TOC_UMF_SIZE_inM << 20))
+ total_size = RLC_TOC_UMF_SIZE_inM << 20;
+
+ return total_size;
+}
+
+static int gfx_v12_1_rlc_autoload_buffer_init(struct amdgpu_device *adev)
+{
+ int r;
+ uint32_t total_size;
+
+ total_size = gfx_v12_1_calc_toc_total_size(adev);
+
+ r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->gfx.rlc.rlc_autoload_bo,
+ &adev->gfx.rlc.rlc_autoload_gpu_addr,
+ (void **)&adev->gfx.rlc.rlc_autoload_ptr);
+
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r);
+ return r;
+ }
+
+ return 0;
+}
+
+static void gfx_v12_1_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev,
+ SOC24_FIRMWARE_ID id,
+ const void *fw_data,
+ uint32_t fw_size)
+{
+ uint32_t toc_offset;
+ uint32_t toc_fw_size, toc_fw_inst_size;
+ char *ptr = adev->gfx.rlc.rlc_autoload_ptr;
+ int i, num_inst;
+
+ if (id <= SOC24_FIRMWARE_ID_INVALID || id >= SOC24_FIRMWARE_ID_MAX)
+ return;
+
+ toc_offset = rlc_autoload_info[id].offset;
+ toc_fw_size = rlc_autoload_info[id].size;
+ num_inst = rlc_autoload_info[id].num_inst;
+ toc_fw_inst_size = toc_fw_size / num_inst;
+
+ if (fw_size == 0)
+ fw_size = toc_fw_inst_size;
+
+ if (fw_size > toc_fw_inst_size)
+ fw_size = toc_fw_inst_size;
+
+ for (i = 0; i < num_inst; i++) {
+ if ((num_inst == RLC_NUM_INS_CODE0) ||
+ ((1 << (i / 2)) & adev->gfx.xcc_mask)) {
+ memcpy(ptr + toc_offset + i * toc_fw_inst_size, fw_data, fw_size);
+
+ if (fw_size < toc_fw_inst_size)
+ memset(ptr + toc_offset + fw_size + i * toc_fw_inst_size,
+ 0, toc_fw_inst_size - fw_size);
+ }
+ }
+}
+
+static void
+gfx_v12_1_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev)
+{
+ void *data;
+ uint32_t size;
+ uint32_t *toc_ptr;
+
+ data = adev->psp.toc.start_addr;
+ size = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_TOC].size;
+
+ toc_ptr = (uint32_t *)data + size / 4 - 2;
+ *toc_ptr = (RLC_TOC_FORMAT_API << 24) | 0x1;
+
+ gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_TOC,
+ data, size);
+}
+
+static void
+gfx_v12_1_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev)
+{
+ const __le32 *fw_data;
+ uint32_t fw_size;
+ const struct gfx_firmware_header_v2_0 *cpv2_hdr;
+ const struct rlc_firmware_header_v2_0 *rlc_hdr;
+ const struct rlc_firmware_header_v2_1 *rlcv21_hdr;
+ const struct rlc_firmware_header_v2_2 *rlcv22_hdr;
+ uint16_t version_major, version_minor;
+
+ /* mec ucode */
+ cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.mec_fw->data;
+ /* instruction */
+ fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+ le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+ gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC,
+ fw_data, fw_size);
+ /* data */
+ fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P0_STACK,
+ fw_data, fw_size);
+ gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P1_STACK,
+ fw_data, fw_size);
+ gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P2_STACK,
+ fw_data, fw_size);
+ gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P3_STACK,
+ fw_data, fw_size);
+
+ /* rlc ucode */
+ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)
+ adev->gfx.rlc_fw->data;
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes);
+ gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_G_UCODE,
+ fw_data, fw_size);
+
+ version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+ version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
+ if (version_major == 2) {
+ if (version_minor >= 1) {
+ rlcv21_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(rlcv21_hdr->save_restore_list_gpm_offset_bytes));
+ fw_size = le32_to_cpu(rlcv21_hdr->save_restore_list_gpm_size_bytes);
+ gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLCG_SCRATCH,
+ fw_data, fw_size);
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(rlcv21_hdr->save_restore_list_srm_offset_bytes));
+ fw_size = le32_to_cpu(rlcv21_hdr->save_restore_list_srm_size_bytes);
+ gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_SRM_ARAM,
+ fw_data, fw_size);
+ }
+ if (version_minor >= 2) {
+ rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes));
+ fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes);
+ gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLX6_UCODE,
+ fw_data, fw_size);
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes));
+ fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes);
+ gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLX6_DRAM_BOOT,
+ fw_data, fw_size);
+ }
+ }
+}
+
+static void
+gfx_v12_1_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev)
+{
+ const __le32 *fw_data;
+ uint32_t fw_size;
+ const struct sdma_firmware_header_v3_0 *sdma_hdr;
+
+ if (adev->sdma.instance[0].fw) {
+ sdma_hdr = (const struct sdma_firmware_header_v3_0 *)
+ adev->sdma.instance[0].fw->data;
+ fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
+ le32_to_cpu(sdma_hdr->ucode_offset_bytes));
+ fw_size = le32_to_cpu(sdma_hdr->ucode_size_bytes);
+
+ gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_SDMA_UCODE_TH0,
+ fw_data, fw_size);
+ }
+}
+
+static void
+gfx_v12_1_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev)
+{
+ const __le32 *fw_data;
+ unsigned fw_size;
+ const struct mes_firmware_header_v1_0 *mes_hdr;
+ int pipe, ucode_id, data_id;
+
+ for (pipe = 0; pipe < 2; pipe++) {
+ if (pipe == 0) {
+ ucode_id = SOC24_FIRMWARE_ID_RS64_MES_P0;
+ data_id = SOC24_FIRMWARE_ID_RS64_MES_P0_STACK;
+ } else {
+ ucode_id = SOC24_FIRMWARE_ID_RS64_MES_P1;
+ data_id = SOC24_FIRMWARE_ID_RS64_MES_P1_STACK;
+ }
+
+ mes_hdr = (const struct mes_firmware_header_v1_0 *)
+ adev->mes.fw[pipe]->data;
+
+ fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
+ le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
+ fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
+
+ gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, ucode_id, fw_data, fw_size);
+
+ fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
+ le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
+ fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
+
+ gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, data_id, fw_data, fw_size);
+ }
+}
+
+static int gfx_v12_1_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
+{
+ uint32_t rlc_g_offset, rlc_g_size;
+ uint64_t gpu_addr;
+ uint32_t data;
+ int i, num_xcc;
+
+ /* RLC autoload sequence 2: copy ucode */
+ gfx_v12_1_rlc_backdoor_autoload_copy_sdma_ucode(adev);
+ gfx_v12_1_rlc_backdoor_autoload_copy_gfx_ucode(adev);
+ gfx_v12_1_rlc_backdoor_autoload_copy_mes_ucode(adev);
+ gfx_v12_1_rlc_backdoor_autoload_copy_toc_ucode(adev);
+
+ rlc_g_offset = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_G_UCODE].offset;
+ rlc_g_size = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_G_UCODE].size;
+ gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset - adev->gmc.vram_start;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGFX_IMU_RLC_BOOTLOADER_ADDR_HI,
+ upper_32_bits(gpu_addr));
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGFX_IMU_RLC_BOOTLOADER_ADDR_LO,
+ lower_32_bits(gpu_addr));
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGFX_IMU_RLC_BOOTLOADER_SIZE,
+ rlc_g_size);
+ }
+
+ if (adev->gfx.imu.funcs) {
+ /* RLC autoload sequence 3: load IMU fw */
+ if (adev->gfx.imu.funcs->load_microcode)
+ adev->gfx.imu.funcs->load_microcode(adev);
+ }
+
+ /* unhalt rlc to start autoload */
+ for (i = 0; i < num_xcc; i++) {
+ data = RREG32_SOC15(GC, GET_INST(GC, i), regRLC_GPM_THREAD_ENABLE);
+ data = REG_SET_FIELD(data, RLC_GPM_THREAD_ENABLE, THREAD0_ENABLE, 1);
+ data = REG_SET_FIELD(data, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1);
+ WREG32_SOC15(GC, GET_INST(GC, i), regRLC_GPM_THREAD_ENABLE, data);
+ WREG32_SOC15(GC, GET_INST(GC, i), regRLC_CNTL, RLC_CNTL__RLC_ENABLE_F32_MASK);
+ }
+
+ return 0;
+}
+
+static int gfx_v12_1_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ int i, j, k, r, ring_id = 0;
+ unsigned num_compute_rings;
+ int xcc_id, num_xcc;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 1, 0):
+ adev->gfx.mec.num_mec = 1;
+ adev->gfx.mec.num_pipe_per_mec = 4;
+ adev->gfx.mec.num_queue_per_pipe = 8;
+ break;
+ default:
+ adev->gfx.mec.num_mec = 2;
+ adev->gfx.mec.num_pipe_per_mec = 2;
+ adev->gfx.mec.num_queue_per_pipe = 4;
+ break;
+ }
+
+ /* recalculate compute rings to use based on hardware configuration */
+ num_compute_rings = (adev->gfx.mec.num_pipe_per_mec *
+ adev->gfx.mec.num_queue_per_pipe) / 2;
+ adev->gfx.num_compute_rings = min(adev->gfx.num_compute_rings,
+ num_compute_rings);
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+
+ /* EOP Event */
+ r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_GRBM_CP,
+ GFX_12_1_0__SRCID__CP_EOP_INTERRUPT,
+ &adev->gfx.eop_irq);
+ if (r)
+ return r;
+
+ /* Privileged reg */
+ r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_GRBM_CP,
+ GFX_12_1_0__SRCID__CP_PRIV_REG_FAULT,
+ &adev->gfx.priv_reg_irq);
+ if (r)
+ return r;
+
+ /* Privileged inst */
+ r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_GRBM_CP,
+ GFX_12_1_0__SRCID__CP_PRIV_INSTR_FAULT,
+ &adev->gfx.priv_inst_irq);
+ if (r)
+ return r;
+
+ adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
+
+ r = gfx_v12_1_rlc_init(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed to init rlc BOs!\n");
+ return r;
+ }
+
+ r = gfx_v12_1_mec_init(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed to init MEC BOs!\n");
+ return r;
+ }
+
+ /* set up the compute queues - allocate horizontally across pipes */
+ for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
+ ring_id = 0;
+ for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+ for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+ if (!amdgpu_gfx_is_mec_queue_enabled(adev,
+ xcc_id, i, k, j))
+ continue;
+
+ r = gfx_v12_1_compute_ring_init(adev, ring_id,
+ xcc_id, i, k, j);
+ if (r)
+ return r;
+
+ ring_id++;
+ }
+ }
+ }
+
+ if (!adev->enable_mes_kiq) {
+ r = amdgpu_gfx_kiq_init(adev, GFX12_MEC_HPD_SIZE, xcc_id);
+ if (r) {
+ dev_err(adev->dev, "Failed to init KIQ BOs!\n");
+ return r;
+ }
+
+ r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v12_1_compute_mqd), xcc_id);
+ if (r)
+ return r;
+ }
+
+ /* allocate visible FB for rlc auto-loading fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+ r = gfx_v12_1_rlc_autoload_buffer_init(adev);
+ if (r)
+ return r;
+ } else if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ r = gfx_v12_1_init_cp_compute_microcode_bo(adev);
+ if (r)
+ return r;
+ }
+
+ r = gfx_v12_1_gpu_early_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_gfx_sysfs_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static void gfx_v12_1_rlc_autoload_buffer_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo,
+ &adev->gfx.rlc.rlc_autoload_gpu_addr,
+ (void **)&adev->gfx.rlc.rlc_autoload_ptr);
+}
+
+static int gfx_v12_1_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ int i, num_xcc;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < adev->gfx.num_compute_rings * num_xcc; i++)
+ amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
+
+ for (i = 0; i < num_xcc; i++) {
+ amdgpu_gfx_mqd_sw_fini(adev, i);
+
+ if (!adev->enable_mes_kiq) {
+ amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[i].ring);
+ amdgpu_gfx_kiq_fini(adev, i);
+ }
+ }
+
+ gfx_v12_1_rlc_fini(adev);
+ gfx_v12_1_mec_fini(adev);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
+ gfx_v12_1_rlc_autoload_buffer_fini(adev);
+
+ gfx_v12_1_free_microcode(adev);
+ amdgpu_gfx_sysfs_fini(adev);
+
+ return 0;
+}
+
+static void gfx_v12_1_xcc_select_se_sh(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 instance, int xcc_id)
+{
+ u32 data;
+
+ if (instance == 0xffffffff)
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ else
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
+ instance);
+
+ if (se_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
+ 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
+
+ if (sh_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES,
+ 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num);
+
+ WREG32_SOC15_RLC_SHADOW_EX(reg, GC, GET_INST(GC, xcc_id), regGRBM_GFX_INDEX, data);
+}
+
+static u32 gfx_v12_1_get_sa_active_bitmap(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask;
+
+ gc_disabled_sa_mask = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCC_GC_SA_UNIT_DISABLE);
+ gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask,
+ CC_GC_SA_UNIT_DISABLE,
+ SA_DISABLE);
+ gc_user_disabled_sa_mask = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SA_UNIT_DISABLE);
+ gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask,
+ GC_USER_SA_UNIT_DISABLE,
+ SA_DISABLE);
+ sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se *
+ adev->gfx.config.max_shader_engines);
+
+ return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask));
+}
+
+static u32 gfx_v12_1_get_rb_active_bitmap(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask;
+ u32 rb_mask;
+
+ gc_disabled_rb_mask = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regCC_RB_BACKEND_DISABLE);
+ gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask,
+ CC_RB_BACKEND_DISABLE,
+ BACKEND_DISABLE);
+ gc_user_disabled_rb_mask = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regGC_USER_RB_BACKEND_DISABLE);
+ gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask,
+ GC_USER_RB_BACKEND_DISABLE,
+ BACKEND_DISABLE);
+ rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se *
+ adev->gfx.config.max_shader_engines);
+
+ return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask));
+}
+
+static void gfx_v12_1_setup_rb(struct amdgpu_device *adev)
+{
+ u32 rb_bitmap_width_per_sa;
+ u32 max_sa;
+ u32 active_sa_bitmap;
+ u32 global_active_rb_bitmap;
+ u32 active_rb_bitmap = 0;
+ u32 i;
+ int xcc_id;
+
+ for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) {
+ /* query sa bitmap from SA_UNIT_DISABLE registers */
+ active_sa_bitmap = gfx_v12_1_get_sa_active_bitmap(adev, xcc_id);
+ /* query rb bitmap from RB_BACKEND_DISABLE registers */
+ global_active_rb_bitmap = gfx_v12_1_get_rb_active_bitmap(adev, xcc_id);
+
+ /* generate active rb bitmap according to active sa bitmap */
+ max_sa = adev->gfx.config.max_shader_engines *
+ adev->gfx.config.max_sh_per_se;
+ rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se /
+ adev->gfx.config.max_sh_per_se;
+ for (i = 0; i < max_sa; i++) {
+ if (active_sa_bitmap & (1 << i))
+ active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa));
+ }
+
+ active_rb_bitmap |= global_active_rb_bitmap;
+ }
+
+ adev->gfx.config.backend_enable_mask = active_rb_bitmap;
+ adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
+}
+
+static void gfx_v12_1_xcc_init_compute_vmid(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ int i;
+ uint32_t sh_mem_bases;
+ uint32_t data;
+
+ /*
+ * Configure apertures:
+ * LDS: 0x20000000'00000000 - 0x20000001'00000000 (4GB)
+ * Scratch: 0x10000000'00000000 - 0x10000001'00000000 (4GB)
+ */
+ sh_mem_bases = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
+ (adev->gmc.private_aperture_start >> 58));
+ sh_mem_bases = REG_SET_FIELD(sh_mem_bases, SH_MEM_BASES, SHARED_BASE,
+ (adev->gmc.shared_aperture_start >> 48));
+
+ mutex_lock(&adev->srbm_mutex);
+ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
+ soc_v1_0_grbm_select(adev, 0, 0, 0, i, GET_INST(GC, xcc_id));
+ /* CP and shaders */
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSH_MEM_BASES, sh_mem_bases);
+
+ /* Enable trap for each kfd vmid. */
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSPI_GDBG_PER_VMID_CNTL);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSPI_GDBG_PER_VMID_CNTL, data);
+
+ /* Disable VGPR deallocation instruction for each KFD vmid. */
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_DEBUG);
+ data = REG_SET_FIELD(data, SQ_DEBUG, DISABLE_VGPR_DEALLOC, 1);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_DEBUG, data);
+ }
+ soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void gfx_v12_1_tcp_harvest(struct amdgpu_device *adev)
+{
+ /* TODO: harvest feature to be added later. */
+}
+
+static void gfx_v12_1_get_tcc_info(struct amdgpu_device *adev)
+{
+}
+
+static void gfx_v12_1_xcc_constants_init(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ u32 tmp;
+ int i;
+
+ /* XXX SH_MEM regs */
+ /* where to put LDS, scratch, GPUVM in FSA64 space */
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
+ soc_v1_0_grbm_select(adev, 0, 0, 0, i, GET_INST(GC, xcc_id));
+ /* CP and shaders */
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
+ if (i != 0) {
+ tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
+ (adev->gmc.private_aperture_start >> 58));
+ tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
+ (adev->gmc.shared_aperture_start >> 48));
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSH_MEM_BASES, tmp);
+ }
+ }
+ soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+
+ mutex_unlock(&adev->srbm_mutex);
+
+ gfx_v12_1_xcc_init_compute_vmid(adev, xcc_id);
+}
+
+static void gfx_v12_1_constants_init(struct amdgpu_device *adev)
+{
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+
+ gfx_v12_1_setup_rb(adev);
+ gfx_v12_1_get_cu_info(adev, &adev->gfx.cu_info);
+ gfx_v12_1_get_tcc_info(adev);
+ adev->gfx.config.pa_sc_tile_steering_override = 0;
+
+ for (i = 0; i < num_xcc; i++)
+ gfx_v12_1_xcc_constants_init(adev, i);
+}
+
+static void gfx_v12_1_xcc_enable_gui_idle_interrupt(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ u32 tmp;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_INT_CNTL_RING0);
+
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
+ enable ? 1 : 0);
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_INT_CNTL_RING0, tmp);
+}
+
+static int gfx_v12_1_xcc_init_csb(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CSIB_ADDR_HI,
+ adev->gfx.rlc.clear_state_gpu_addr >> 32);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CSIB_ADDR_LO,
+ adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
+
+ return 0;
+}
+
+static void gfx_v12_1_xcc_rlc_stop(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ u32 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CNTL, tmp);
+}
+
+static void gfx_v12_1_rlc_stop(struct amdgpu_device *adev)
+{
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++)
+ gfx_v12_1_xcc_rlc_stop(adev, i);
+}
+
+static void gfx_v12_1_xcc_rlc_reset(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id),
+ GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
+ udelay(50);
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id),
+ GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
+ udelay(50);
+}
+
+static void gfx_v12_1_rlc_reset(struct amdgpu_device *adev)
+{
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++)
+ gfx_v12_1_xcc_rlc_reset(adev, i);
+}
+
+static void gfx_v12_1_xcc_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ uint32_t rlc_pg_cntl;
+
+ rlc_pg_cntl = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_PG_CNTL);
+
+ if (!enable) {
+ /* RLC_PG_CNTL[23] = 0 (default)
+ * RLC will wait for handshake acks with SMU
+ * GFXOFF will be enabled
+ * RLC_PG_CNTL[23] = 1
+ * RLC will not issue any message to SMU
+ * hence no handshake between SMU & RLC
+ * GFXOFF will be disabled
+ */
+ rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
+ } else
+ rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_PG_CNTL, rlc_pg_cntl);
+}
+
+static void gfx_v12_1_xcc_rlc_start(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ /* TODO: enable rlc & smu handshake until smu
+ * and gfxoff feature works as expected */
+ if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
+ gfx_v12_1_xcc_rlc_smu_handshake_cntl(adev, false, xcc_id);
+
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), RLC_CNTL, RLC_ENABLE_F32, 1);
+ udelay(50);
+}
+
+static void gfx_v12_1_rlc_start(struct amdgpu_device *adev)
+{
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ gfx_v12_1_xcc_rlc_start(adev, i);
+ }
+}
+
+static void gfx_v12_1_xcc_rlc_enable_srm(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ uint32_t tmp;
+
+ /* enable Save Restore Machine */
+ tmp = RREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_SRM_CNTL));
+ tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
+ tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
+ WREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_SRM_CNTL), tmp);
+}
+
+static void gfx_v12_1_xcc_load_rlcg_microcode(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ const struct rlc_firmware_header_v2_0 *hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+
+ hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_GPM_UCODE_ADDR,
+ RLCG_UCODE_LOADING_START_ADDRESS);
+
+ for (i = 0; i < fw_size; i++)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regRLC_GPM_UCODE_DATA,
+ le32_to_cpup(fw_data++));
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regRLC_GPM_UCODE_ADDR,
+ adev->gfx.rlc_fw_version);
+}
+
+static void gfx_v12_1_xcc_load_rlc_iram_dram_microcode(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ const struct rlc_firmware_header_v2_2 *hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+ u32 tmp;
+
+ hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes));
+ fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_LX6_IRAM_ADDR, 0);
+
+ for (i = 0; i < fw_size; i++) {
+ if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
+ msleep(1);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regRLC_LX6_IRAM_DATA,
+ le32_to_cpup(fw_data++));
+ }
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes));
+ fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regRLC_LX6_DRAM_ADDR, 0);
+ for (i = 0; i < fw_size; i++) {
+ if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
+ msleep(1);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regRLC_LX6_DRAM_DATA,
+ le32_to_cpup(fw_data++));
+ }
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
+
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_LX6_CNTL);
+ tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_LX6_CNTL, tmp);
+}
+
+static int gfx_v12_1_xcc_rlc_load_microcode(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ const struct rlc_firmware_header_v2_0 *hdr;
+ uint16_t version_major;
+ uint16_t version_minor;
+
+ if (!adev->gfx.rlc_fw)
+ return -EINVAL;
+
+ hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ amdgpu_ucode_print_rlc_hdr(&hdr->header);
+
+ version_major = le16_to_cpu(hdr->header.header_version_major);
+ version_minor = le16_to_cpu(hdr->header.header_version_minor);
+
+ if (version_major == 2) {
+ gfx_v12_1_xcc_load_rlcg_microcode(adev, xcc_id);
+ if (amdgpu_dpm == 1) {
+ if (version_minor >= 2)
+ gfx_v12_1_xcc_load_rlc_iram_dram_microcode(adev, xcc_id);
+ }
+
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static int gfx_v12_1_xcc_rlc_resume(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ int r;
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ gfx_v12_1_xcc_init_csb(adev, xcc_id);
+
+ if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
+ gfx_v12_1_xcc_rlc_enable_srm(adev, xcc_id);
+ } else {
+ if (amdgpu_sriov_vf(adev)) {
+ gfx_v12_1_xcc_init_csb(adev, xcc_id);
+ return 0;
+ }
+
+ gfx_v12_1_xcc_rlc_stop(adev, xcc_id);
+
+ /* disable CG */
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, 0);
+
+ /* disable PG */
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_PG_CNTL, 0);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ /* legacy rlc firmware loading */
+ r = gfx_v12_1_xcc_rlc_load_microcode(adev, xcc_id);
+ if (r)
+ return r;
+ }
+
+ gfx_v12_1_xcc_init_csb(adev, xcc_id);
+
+ gfx_v12_1_xcc_rlc_start(adev, xcc_id);
+ }
+
+ return 0;
+}
+
+static int gfx_v12_1_rlc_resume(struct amdgpu_device *adev)
+{
+ int r, i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ r = gfx_v12_1_xcc_rlc_resume(adev, i);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static void gfx_v12_1_xcc_config_gfx_rs64(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ const struct gfx_firmware_header_v2_0 *mec_hdr;
+ uint32_t pipe_id, tmp;
+
+ mec_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.mec_fw->data;
+
+ /* config mec program start addr */
+ for (pipe_id = 0; pipe_id < 4; pipe_id++) {
+ soc_v1_0_grbm_select(adev, 1, pipe_id, 0, 0, GET_INST(GC, xcc_id));
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_PRGRM_CNTR_START,
+ mec_hdr->ucode_start_addr_lo >> 2 |
+ mec_hdr->ucode_start_addr_hi << 30);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_PRGRM_CNTR_START_HI,
+ mec_hdr->ucode_start_addr_hi >> 2);
+ }
+ soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+
+ /* reset mec pipe */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_CNTL, tmp);
+
+ /* clear mec pipe reset */
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_CNTL, tmp);
+}
+
+static void gfx_v12_1_config_gfx_rs64(struct amdgpu_device *adev)
+{
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+
+ for (i = 0; i < num_xcc; i++)
+ gfx_v12_1_xcc_config_gfx_rs64(adev, i);
+}
+
+static void gfx_v12_1_xcc_set_mec_ucode_start_addr(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ const struct gfx_firmware_header_v2_0 *cp_hdr;
+ unsigned pipe_id;
+
+ cp_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.mec_fw->data;
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe_id = 0; pipe_id < adev->gfx.mec.num_pipe_per_mec; pipe_id++) {
+ soc_v1_0_grbm_select(adev, 1, pipe_id, 0, 0, GET_INST(GC, xcc_id));
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_PRGRM_CNTR_START,
+ cp_hdr->ucode_start_addr_lo >> 2 |
+ cp_hdr->ucode_start_addr_hi << 30);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_PRGRM_CNTR_START_HI,
+ cp_hdr->ucode_start_addr_hi >> 2);
+ }
+ soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static int gfx_v12_1_xcc_wait_for_rlc_autoload_complete(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ uint32_t cp_status;
+ uint32_t bootload_status;
+ int i;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ cp_status = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_STAT);
+ bootload_status = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regRLC_RLCS_BOOTLOAD_STATUS);
+
+ if ((cp_status == 0) &&
+ (REG_GET_FIELD(bootload_status,
+ RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) {
+ break;
+ }
+ udelay(1);
+ if (amdgpu_emu_mode)
+ msleep(10);
+ }
+
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev,
+ "rlc autoload: xcc%d gc ucode autoload timeout\n", xcc_id);
+ return -ETIMEDOUT;
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+ gfx_v12_1_xcc_set_mec_ucode_start_addr(adev, xcc_id);
+ }
+
+ return 0;
+}
+
+static int gfx_v12_1_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
+{
+ int xcc_id;
+
+ for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++)
+ gfx_v12_1_xcc_wait_for_rlc_autoload_complete(adev, xcc_id);
+
+ return 0;
+}
+
+static void gfx_v12_1_xcc_cp_compute_enable(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ u32 data;
+
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_CNTL);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE,
+ enable ? 0 : 1);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET,
+ enable ? 0 : 1);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET,
+ enable ? 0 : 1);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET,
+ enable ? 0 : 1);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET,
+ enable ? 0 : 1);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE,
+ enable ? 1 : 0);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE,
+ enable ? 1 : 0);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE,
+ enable ? 1 : 0);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE,
+ enable ? 1 : 0);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT,
+ enable ? 0 : 1);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_CNTL, data);
+
+ adev->gfx.kiq[xcc_id].ring.sched.ready = enable;
+
+ udelay(50);
+}
+
+static int gfx_v12_1_init_cp_compute_microcode_bo(struct amdgpu_device *adev)
+{
+ const struct gfx_firmware_header_v2_0 *mec_hdr;
+ const __le32 *fw_ucode, *fw_data;
+ u32 fw_ucode_size, fw_data_size;
+ u32 *fw_ucode_ptr, *fw_data_ptr;
+ int i, r, xcc_id;
+
+ if (!adev->gfx.mec_fw)
+ return -EINVAL;
+
+ mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
+ amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
+
+ fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data +
+ le32_to_cpu(mec_hdr->ucode_offset_bytes));
+ fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes);
+
+ fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+ le32_to_cpu(mec_hdr->data_offset_bytes));
+ fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes);
+
+ if (adev->gfx.mec.mec_fw_obj == NULL) {
+ r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
+ 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->gfx.mec.mec_fw_obj,
+ &adev->gfx.mec.mec_fw_gpu_addr,
+ (void **)&fw_ucode_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
+ gfx_v12_1_mec_fini(adev);
+ return r;
+ }
+
+ memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size);
+
+ amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
+ }
+
+ if (adev->gfx.mec.mec_fw_data_obj == NULL) {
+ r = amdgpu_bo_create_reserved(adev,
+ ALIGN(fw_data_size, 64 * 1024) *
+ adev->gfx.mec.num_pipe_per_mec * NUM_XCC(adev->gfx.xcc_mask),
+ 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->gfx.mec.mec_fw_data_obj,
+ &adev->gfx.mec.mec_fw_data_gpu_addr,
+ (void **)&fw_data_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mec fw data bo\n", r);
+ gfx_v12_1_mec_fini(adev);
+ return r;
+ }
+
+ for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) {
+ for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
+ u32 offset = (xcc_id * adev->gfx.mec.num_pipe_per_mec + i) *
+ ALIGN(fw_data_size, 64 * 1024) / 4;
+ memcpy(fw_data_ptr + offset, fw_data, fw_data_size);
+ }
+ }
+
+ amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj);
+ }
+
+ return 0;
+}
+
+static int gfx_v12_1_xcc_cp_compute_load_microcode_rs64(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ const struct gfx_firmware_header_v2_0 *mec_hdr;
+ u32 fw_data_size;
+ u32 tmp, i, usec_timeout = 50000; /* Wait for 50 ms */
+
+ if (!adev->gfx.mec_fw)
+ return -EINVAL;
+
+ mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
+ fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes);
+
+ gfx_v12_1_xcc_cp_compute_enable(adev, false, xcc_id);
+
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_CNTL, tmp);
+
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_BASE_CNTL, tmp);
+
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
+ soc_v1_0_grbm_select(adev, 1, i, 0, 0, GET_INST(GC, xcc_id));
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_MDBASE_LO,
+ lower_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr +
+ (xcc_id * adev->gfx.mec.num_pipe_per_mec + i) *
+ ALIGN(fw_data_size, 64 * 1024)));
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_MDBASE_HI,
+ upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr +
+ (xcc_id * adev->gfx.mec.num_pipe_per_mec + i) *
+ ALIGN(fw_data_size, 64 * 1024)));
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_LO,
+ lower_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_HI,
+ upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
+ }
+ mutex_unlock(&adev->srbm_mutex);
+ soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
+ INVALIDATE_DCACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate data cache\n");
+ return -EINVAL;
+ }
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ gfx_v12_1_xcc_set_mec_ucode_start_addr(adev, xcc_id);
+
+ return 0;
+}
+
+static void gfx_v12_1_xcc_kiq_setting(struct amdgpu_ring *ring,
+ int xcc_id)
+{
+ uint32_t tmp;
+ struct amdgpu_device *adev = ring->adev;
+
+ /* tell RLC which is KIQ queue */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS);
+ tmp &= 0xffffff00;
+ tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp);
+ tmp |= 0x80;
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp);
+}
+
+static void gfx_v12_1_xcc_cp_set_doorbell_range(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ /* disable gfx engine doorbell range */
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_DOORBELL_RANGE_LOWER, 0);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_DOORBELL_RANGE_UPPER, 0);
+
+ /* set compute engine doorbell range */
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DOORBELL_RANGE_LOWER,
+ ((adev->doorbell_index.kiq +
+ xcc_id * adev->doorbell_index.xcc_doorbell_range) *
+ 2) << 2);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DOORBELL_RANGE_UPPER,
+ ((adev->doorbell_index.userqueue_end +
+ xcc_id * adev->doorbell_index.xcc_doorbell_range) *
+ 2) << 2);
+}
+
+static int gfx_v12_1_compute_mqd_init(struct amdgpu_device *adev, void *m,
+ struct amdgpu_mqd_prop *prop)
+{
+ struct v12_1_compute_mqd *mqd = m;
+ uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
+ uint32_t tmp;
+
+ mqd->header = 0xC0310800;
+ mqd->compute_pipelinestat_enable = 0x00000001;
+ mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
+ mqd->compute_misc_reserved = 0x00000007;
+
+ eop_base_addr = prop->eop_gpu_addr >> 8;
+ mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
+ mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ tmp = regCP_HQD_EOP_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
+ (order_base_2(GFX12_MEC_HPD_SIZE / 4) - 1));
+
+ mqd->cp_hqd_eop_control = tmp;
+
+ /* enable doorbell? */
+ tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT;
+
+ if (prop->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, prop->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_SOURCE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 0);
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ }
+
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ /* disable the queue if it's active */
+ mqd->cp_hqd_dequeue_request = 0;
+ mqd->cp_hqd_pq_rptr = 0;
+ mqd->cp_hqd_pq_wptr_lo = 0;
+ mqd->cp_hqd_pq_wptr_hi = 0;
+
+ /* set the pointer to the MQD */
+ mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
+
+ /* set MQD vmid to 0 */
+ tmp = regCP_MQD_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
+ mqd->cp_mqd_control = tmp;
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
+ mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
+ mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ tmp = regCP_HQD_PQ_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
+ (order_base_2(prop->queue_size / 4) - 1));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
+ (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+ mqd->cp_hqd_pq_control = tmp;
+
+ /* set the wb address whether it's enabled or not */
+ wb_gpu_addr = prop->rptr_gpu_addr;
+ mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_rptr_report_addr_hi =
+ upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ wb_gpu_addr = prop->wptr_gpu_addr;
+ mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ tmp = 0;
+ /* enable the doorbell if requested */
+ if (prop->use_doorbell) {
+ tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, prop->doorbell_index);
+
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_SOURCE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 0);
+ }
+
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ mqd->cp_hqd_pq_rptr = regCP_HQD_PQ_RPTR_DEFAULT;
+
+ /* set the vmid for the queue */
+ mqd->cp_hqd_vmid = 0;
+
+ tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x63);
+ mqd->cp_hqd_persistent_state = tmp;
+
+ /* set MIN_IB_AVAIL_SIZE */
+ tmp = regCP_HQD_IB_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 1);
+ mqd->cp_hqd_ib_control = tmp;
+
+ /* set static priority for a compute queue/ring */
+ mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
+ mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
+
+ mqd->cp_mqd_stride_size = prop->mqd_stride_size ? prop->mqd_stride_size :
+ sizeof(struct v12_1_compute_mqd);
+
+ mqd->cp_hqd_active = prop->hqd_active;
+
+ return 0;
+}
+
+static int gfx_v12_1_xcc_kiq_init_register(struct amdgpu_ring *ring,
+ int xcc_id)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v12_1_compute_mqd *mqd = ring->mqd_ptr;
+ int j;
+
+ /* inactivate the queue */
+ if (amdgpu_sriov_vf(adev))
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE, 0);
+
+ /* disable wptr polling */
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), CP_PQ_WPTR_POLL_CNTL, EN, 0);
+
+ /* write the EOP addr */
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_BASE_ADDR,
+ mqd->cp_hqd_eop_base_addr_lo);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_BASE_ADDR_HI,
+ mqd->cp_hqd_eop_base_addr_hi);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_CONTROL,
+ mqd->cp_hqd_eop_control);
+
+ /* enable doorbell? */
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL,
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* disable the queue if it's active */
+ if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1) {
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 1);
+ for (j = 0; j < adev->usec_timeout; j++) {
+ if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST,
+ mqd->cp_hqd_dequeue_request);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR,
+ mqd->cp_hqd_pq_rptr);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO,
+ mqd->cp_hqd_pq_wptr_lo);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI,
+ mqd->cp_hqd_pq_wptr_hi);
+ }
+
+ /* set the pointer to the MQD */
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR,
+ mqd->cp_mqd_base_addr_lo);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR_HI,
+ mqd->cp_mqd_base_addr_hi);
+
+ /* set MQD vmid to 0 */
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL,
+ mqd->cp_mqd_control);
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE,
+ mqd->cp_hqd_pq_base_lo);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE_HI,
+ mqd->cp_hqd_pq_base_hi);
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_CONTROL,
+ mqd->cp_hqd_pq_control);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR,
+ mqd->cp_hqd_pq_rptr_report_addr_lo);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
+ mqd->cp_hqd_pq_rptr_report_addr_hi);
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR,
+ mqd->cp_hqd_pq_wptr_poll_addr_lo);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+ mqd->cp_hqd_pq_wptr_poll_addr_hi);
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL,
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO,
+ mqd->cp_hqd_pq_wptr_lo);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI,
+ mqd->cp_hqd_pq_wptr_hi);
+
+ /* set the vmid for the queue */
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID, mqd->cp_hqd_vmid);
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE,
+ mqd->cp_hqd_persistent_state);
+
+ /* activate the queue */
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE,
+ mqd->cp_hqd_active);
+
+ if (ring->use_doorbell)
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), CP_PQ_STATUS, DOORBELL_ENABLE, 1);
+
+ return 0;
+}
+
+static int gfx_v12_1_xcc_kiq_init_queue(struct amdgpu_ring *ring,
+ int xcc_id)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v12_1_compute_mqd *mqd = ring->mqd_ptr;
+
+ gfx_v12_1_xcc_kiq_setting(ring, xcc_id);
+
+ if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
+ /* reset MQD to a clean status */
+ if (adev->gfx.kiq[xcc_id].mqd_backup)
+ memcpy(mqd, adev->gfx.kiq[xcc_id].mqd_backup, sizeof(*mqd));
+
+ /* reset ring buffer */
+ ring->wptr = 0;
+ amdgpu_ring_clear_ring(ring);
+
+ mutex_lock(&adev->srbm_mutex);
+ soc_v1_0_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id));
+ gfx_v12_1_xcc_kiq_init_register(ring, xcc_id);
+ soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+ } else {
+ memset((void *)mqd, 0, sizeof(*mqd));
+ if (amdgpu_sriov_vf(adev) && adev->in_suspend)
+ amdgpu_ring_clear_ring(ring);
+ mutex_lock(&adev->srbm_mutex);
+ soc_v1_0_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id));
+ amdgpu_ring_init_mqd(ring);
+ gfx_v12_1_xcc_kiq_init_register(ring, xcc_id);
+ soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+
+ if (adev->gfx.kiq[xcc_id].mqd_backup)
+ memcpy(adev->gfx.kiq[xcc_id].mqd_backup, mqd, sizeof(*mqd));
+ }
+
+ return 0;
+}
+
+static int gfx_v12_1_xcc_kcq_init_queue(struct amdgpu_ring *ring,
+ int xcc_id)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v12_1_compute_mqd *mqd = ring->mqd_ptr;
+ int mqd_idx = ring - &adev->gfx.compute_ring[0];
+
+ if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+ memset((void *)mqd, 0, sizeof(*mqd));
+ mutex_lock(&adev->srbm_mutex);
+ soc_v1_0_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id));
+ amdgpu_ring_init_mqd(ring);
+ soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ } else {
+ /* restore MQD to a clean status */
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
+ /* reset ring buffer */
+ ring->wptr = 0;
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
+ amdgpu_ring_clear_ring(ring);
+ }
+
+ return 0;
+}
+
+static int gfx_v12_1_xcc_kiq_resume(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ struct amdgpu_ring *ring;
+ int r;
+
+ ring = &adev->gfx.kiq[xcc_id].ring;
+
+ r = amdgpu_bo_reserve(ring->mqd_obj, false);
+ if (unlikely(r != 0))
+ return r;
+
+ r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
+ if (unlikely(r != 0)) {
+ amdgpu_bo_unreserve(ring->mqd_obj);
+ return r;
+ }
+
+ gfx_v12_1_xcc_kiq_init_queue(ring, xcc_id);
+ amdgpu_bo_kunmap(ring->mqd_obj);
+ ring->mqd_ptr = NULL;
+ amdgpu_bo_unreserve(ring->mqd_obj);
+ ring->sched.ready = true;
+ return 0;
+}
+
+static int gfx_v12_1_xcc_kcq_resume(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ struct amdgpu_ring *ring = NULL;
+ int r = 0, i;
+
+ if (!amdgpu_async_gfx_ring)
+ gfx_v12_1_xcc_cp_compute_enable(adev, true, xcc_id);
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings];
+
+ r = amdgpu_bo_reserve(ring->mqd_obj, false);
+ if (unlikely(r != 0))
+ goto done;
+ r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
+ if (!r) {
+ r = gfx_v12_1_xcc_kcq_init_queue(ring, xcc_id);
+ amdgpu_bo_kunmap(ring->mqd_obj);
+ ring->mqd_ptr = NULL;
+ }
+ amdgpu_bo_unreserve(ring->mqd_obj);
+ if (r)
+ goto done;
+ }
+
+ r = amdgpu_gfx_enable_kcq(adev, xcc_id);
+done:
+ return r;
+}
+
+static int gfx_v12_1_xcc_cp_resume(struct amdgpu_device *adev, uint16_t xcc_mask)
+{
+ int r, i, xcc_id;
+ struct amdgpu_ring *ring;
+
+ for_each_inst(xcc_id, xcc_mask) {
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ /* legacy firmware loading */
+ r = gfx_v12_1_xcc_cp_compute_load_microcode_rs64(adev, xcc_id);
+ if (r)
+ return r;
+ }
+
+ /* GFX CGCG and LS is set by default */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)
+ gfx_v12_1_xcc_enable_gui_idle_interrupt(adev, true, xcc_id);
+
+ gfx_v12_1_xcc_cp_set_doorbell_range(adev, xcc_id);
+
+ gfx_v12_1_xcc_cp_compute_enable(adev, true, xcc_id);
+
+ if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
+ r = amdgpu_mes_kiq_hw_init(adev, xcc_id);
+ else
+ r = gfx_v12_1_xcc_kiq_resume(adev, xcc_id);
+ if (r)
+ return r;
+
+ r = gfx_v12_1_xcc_kcq_resume(adev, xcc_id);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static int gfx_v12_1_cp_resume(struct amdgpu_device *adev)
+{
+ int num_xcc, num_xcp, num_xcc_per_xcp;
+ uint16_t xcc_mask;
+ int r = 0;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ if (amdgpu_sriov_vf(adev)) {
+ enum amdgpu_gfx_partition mode;
+
+ mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
+ AMDGPU_XCP_FL_NONE);
+ if (mode == AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE)
+ return -EINVAL;
+ if (adev->gfx.funcs &&
+ adev->gfx.funcs->get_xccs_per_xcp) {
+ num_xcc_per_xcp = adev->gfx.funcs->get_xccs_per_xcp(adev);
+ adev->gfx.num_xcc_per_xcp = num_xcc_per_xcp;
+ num_xcp = num_xcc / num_xcc_per_xcp;
+ } else {
+ return -EINVAL;
+ }
+ r = amdgpu_xcp_init(adev->xcp_mgr, num_xcp, mode);
+
+ } else {
+ if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
+ AMDGPU_XCP_FL_NONE) ==
+ AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE)
+ r = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr,
+ amdgpu_user_partt_mode);
+ }
+
+ if (r)
+ return r;
+
+ xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0);
+
+ return gfx_v12_1_xcc_cp_resume(adev, xcc_mask);
+}
+
+static int gfx_v12_1_gfxhub_enable(struct amdgpu_device *adev)
+{
+ int r, i;
+ bool value;
+
+ r = adev->gfxhub.funcs->gart_enable(adev);
+ if (r)
+ return r;
+
+ value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
+ false : true;
+
+ adev->gfxhub.funcs->set_fault_enable_default(adev, value);
+ /* TODO investigate why TLB flush is needed,
+ * are we missing a flush somewhere else? */
+ for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
+ if (AMDGPU_IS_GFXHUB(i))
+ adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(i), 0);
+ }
+
+ return 0;
+}
+
+static int get_gb_addr_config(struct amdgpu_device *adev)
+{
+ u32 gb_addr_config;
+
+ gb_addr_config = RREG32_SOC15(GC, GET_INST(GC, 0), regGB_ADDR_CONFIG_READ);
+ if (gb_addr_config == 0)
+ return -EINVAL;
+
+ adev->gfx.config.gb_addr_config_fields.num_pkrs =
+ 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG_READ, NUM_PKRS);
+
+ adev->gfx.config.gb_addr_config = gb_addr_config;
+
+ adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG_READ, NUM_PIPES);
+
+ adev->gfx.config.max_tile_pipes =
+ adev->gfx.config.gb_addr_config_fields.num_pipes;
+
+ adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG_READ, MAX_COMPRESSED_FRAGS);
+ adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG_READ, NUM_RB_PER_SE);
+ adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG_READ, NUM_SHADER_ENGINES);
+ adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG_READ, PIPE_INTERLEAVE_SIZE));
+
+ return 0;
+}
+
+static void gfx_v12_1_xcc_disable_gpa_mode(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ uint32_t data;
+
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPC_PSP_DEBUG);
+ data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK;
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPC_PSP_DEBUG, data);
+
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPG_PSP_DEBUG);
+ data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK;
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPG_PSP_DEBUG, data);
+}
+
+static void gfx_v12_1_xcc_setup_tcp_thrashing_ctrl(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ uint32_t val;
+
+ /* Set the TCP UTCL0 register to enable atomics */
+ val = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regTCP_UTCL0_THRASHING_CTRL);
+ val = REG_SET_FIELD(val, TCP_UTCL0_THRASHING_CTRL, THRASHING_EN, 0x2);
+ val = REG_SET_FIELD(val, TCP_UTCL0_THRASHING_CTRL,
+ RETRY_FRAGMENT_THRESHOLD_UP_EN, 0x1);
+ val = REG_SET_FIELD(val, TCP_UTCL0_THRASHING_CTRL,
+ RETRY_FRAGMENT_THRESHOLD_DOWN_EN, 0x1);
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regTCP_UTCL0_THRASHING_CTRL, val);
+}
+
+static void gfx_v12_1_xcc_enable_atomics(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ uint32_t data;
+
+ /* Set the TCP UTCL0 register to enable atomics */
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regTCP_UTCL0_CNTL1);
+ data = REG_SET_FIELD(data, TCP_UTCL0_CNTL1, ATOMIC_REQUESTER_EN, 0x1);
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regTCP_UTCL0_CNTL1, data);
+}
+
+static void gfx_v12_1_xcc_disable_burst(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGL1_DRAM_BURST_CTRL, 0xf);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGLARB_DRAM_BURST_CTRL, 0xf);
+}
+
+static void gfx_v12_1_xcc_disable_early_write_ack(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ uint32_t data;
+
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regTCP_CNTL3);
+ data = REG_SET_FIELD(data, TCP_CNTL3, DISABLE_EARLY_WRITE_ACK, 0x1);
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regTCP_CNTL3, data);
+}
+
+static void gfx_v12_1_xcc_disable_tcp_spill_cache(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ uint32_t data;
+
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regTCP_CNTL);
+ data = REG_SET_FIELD(data, TCP_CNTL, TCP_SPILL_CACHE_DISABLE, 0x1);
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regTCP_CNTL, data);
+}
+
+static void gfx_v12_1_init_golden_registers(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < NUM_XCC(adev->gfx.xcc_mask); i++) {
+ gfx_v12_1_xcc_disable_burst(adev, i);
+ gfx_v12_1_xcc_enable_atomics(adev, i);
+ gfx_v12_1_xcc_setup_tcp_thrashing_ctrl(adev, i);
+ gfx_v12_1_xcc_disable_early_write_ack(adev, i);
+ gfx_v12_1_xcc_disable_tcp_spill_cache(adev, i);
+ }
+}
+
+static int gfx_v12_1_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r, i, num_xcc;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+ /* rlc autoload firmware */
+ r = gfx_v12_1_rlc_backdoor_autoload_enable(adev);
+ if (r)
+ return r;
+ } else {
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+
+ if (adev->gfx.imu.funcs) {
+ if (adev->gfx.imu.funcs->load_microcode)
+ adev->gfx.imu.funcs->load_microcode(adev);
+ }
+
+ for (i = 0; i < num_xcc; i++) {
+ /* disable gpa mode in backdoor loading */
+ gfx_v12_1_xcc_disable_gpa_mode(adev, i);
+ }
+ }
+ }
+
+ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) ||
+ (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
+ r = gfx_v12_1_wait_for_rlc_autoload_complete(adev);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r);
+ return r;
+ }
+ }
+
+ adev->gfx.is_poweron = true;
+
+ if (get_gb_addr_config(adev))
+ DRM_WARN("Invalid gb_addr_config !\n");
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)
+ gfx_v12_1_config_gfx_rs64(adev);
+
+ r = gfx_v12_1_gfxhub_enable(adev);
+ if (r)
+ return r;
+
+ gfx_v12_1_init_golden_registers(adev);
+
+ gfx_v12_1_constants_init(adev);
+
+ if (adev->nbio.funcs->gc_doorbell_init)
+ adev->nbio.funcs->gc_doorbell_init(adev);
+
+ r = gfx_v12_1_rlc_resume(adev);
+ if (r)
+ return r;
+
+ /*
+ * init golden registers and rlc resume may override some registers,
+ * reconfig them here
+ */
+ gfx_v12_1_tcp_harvest(adev);
+
+ r = gfx_v12_1_cp_resume(adev);
+ if (r)
+ return r;
+
+ return r;
+}
+
+static void gfx_v12_1_xcc_fini(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ uint32_t tmp;
+
+ if (!adev->no_hw_access) {
+ if (amdgpu_gfx_disable_kcq(adev, xcc_id))
+ DRM_ERROR("KCQ disable failed\n");
+
+ amdgpu_mes_kiq_hw_fini(adev, xcc_id);
+ }
+
+ if (amdgpu_sriov_vf(adev)) {
+ /* Program KIQ position of RLC_CP_SCHEDULERS during destroy */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS);
+ tmp &= 0xffffff00;
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp);
+ }
+ gfx_v12_1_xcc_cp_compute_enable(adev, false, xcc_id);
+ gfx_v12_1_xcc_enable_gui_idle_interrupt(adev, false, xcc_id);
+}
+
+static int gfx_v12_1_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, num_xcc;
+
+ amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ gfx_v12_1_xcc_fini(adev, i);
+ }
+
+ adev->gfxhub.funcs->gart_disable(adev);
+
+ adev->gfx.is_poweron = false;
+
+ return 0;
+}
+
+static int gfx_v12_1_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return gfx_v12_1_hw_fini(ip_block);
+}
+
+static int gfx_v12_1_resume(struct amdgpu_ip_block *ip_block)
+{
+ return gfx_v12_1_hw_init(ip_block);
+}
+
+static bool gfx_v12_1_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ if (REG_GET_FIELD(RREG32_SOC15(GC, GET_INST(GC, i),
+ regGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
+ return false;
+ }
+ return true;
+}
+
+static int gfx_v12_1_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ unsigned i;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (gfx_v12_1_is_idle(ip_block))
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static uint64_t gfx_v12_1_get_gpu_clock_counter(struct amdgpu_device *adev)
+{
+ uint64_t clock = 0;
+
+ if (adev->smuio.funcs &&
+ adev->smuio.funcs->get_gpu_clock_counter)
+ clock = adev->smuio.funcs->get_gpu_clock_counter(adev);
+ else
+ dev_warn(adev->dev, "query gpu clock counter is not supported\n");
+
+ return clock;
+}
+
+static int gfx_v12_1_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->gfx.funcs = &gfx_v12_1_gfx_funcs;
+
+ adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
+ AMDGPU_MAX_COMPUTE_RINGS);
+
+ gfx_v12_1_set_kiq_pm4_funcs(adev);
+ gfx_v12_1_set_ring_funcs(adev);
+ gfx_v12_1_set_irq_funcs(adev);
+ gfx_v12_1_set_rlc_funcs(adev);
+ gfx_v12_1_set_mqd_funcs(adev);
+ gfx_v12_1_set_imu_funcs(adev);
+
+ gfx_v12_1_init_rlcg_reg_access_ctrl(adev);
+
+ return gfx_v12_1_init_microcode(adev);
+}
+
+static int gfx_v12_1_late_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static bool gfx_v12_1_is_rlc_enabled(struct amdgpu_device *adev)
+{
+ uint32_t rlc_cntl;
+
+ /* if RLC is not enabled, do nothing */
+ rlc_cntl = RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_CNTL);
+ return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
+}
+
+static void gfx_v12_1_xcc_set_safe_mode(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ uint32_t data;
+ unsigned i;
+
+ data = RLC_SAFE_MODE__CMD_MASK;
+ data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SAFE_MODE, data);
+
+ /* wait for RLC_SAFE_MODE */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!REG_GET_FIELD(RREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
+ break;
+ udelay(1);
+ }
+}
+
+static void gfx_v12_1_xcc_unset_safe_mode(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK);
+}
+
+static void gfx_v12_1_update_perf_clk(struct amdgpu_device *adev,
+ bool enable)
+{
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++)
+ gfx_v12_1_xcc_update_perf_clk(adev, enable, i);
+}
+
+static void gfx_v12_1_update_spm_vmid(struct amdgpu_device *adev,
+ int xcc_id,
+ struct amdgpu_ring *ring,
+ unsigned vmid)
+{
+ u32 reg, data;
+
+ reg = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_SPM_MC_CNTL);
+ if (amdgpu_sriov_is_pp_one_vf(adev))
+ data = RREG32_NO_KIQ(reg);
+ else
+ data = RREG32(reg);
+
+ data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
+ data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
+
+ if (amdgpu_sriov_is_pp_one_vf(adev))
+ WREG32_SOC15_NO_KIQ(GC, GET_INST(GC, xcc_id), regRLC_SPM_MC_CNTL, data);
+ else
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SPM_MC_CNTL, data);
+
+ if (ring
+ && amdgpu_sriov_is_pp_one_vf(adev)
+ && ((ring->funcs->type == AMDGPU_RING_TYPE_GFX)
+ || (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) {
+ uint32_t reg = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_SPM_MC_CNTL);
+ amdgpu_ring_emit_wreg(ring, reg, data);
+ }
+}
+
+static const struct amdgpu_rlc_funcs gfx_v12_1_rlc_funcs = {
+ .is_rlc_enabled = gfx_v12_1_is_rlc_enabled,
+ .set_safe_mode = gfx_v12_1_xcc_set_safe_mode,
+ .unset_safe_mode = gfx_v12_1_xcc_unset_safe_mode,
+ .init = gfx_v12_1_rlc_init,
+ .get_csb_size = gfx_v12_1_get_csb_size,
+ .get_csb_buffer = gfx_v12_1_get_csb_buffer,
+ .resume = gfx_v12_1_rlc_resume,
+ .stop = gfx_v12_1_rlc_stop,
+ .reset = gfx_v12_1_rlc_reset,
+ .start = gfx_v12_1_rlc_start,
+ .update_spm_vmid = gfx_v12_1_update_spm_vmid,
+};
+
+#if 0
+static void gfx_v12_cntl_power_gating(struct amdgpu_device *adev, bool enable)
+{
+ /* TODO */
+}
+
+static void gfx_v12_cntl_pg(struct amdgpu_device *adev, bool enable)
+{
+ /* TODO */
+}
+#endif
+
+static int gfx_v12_1_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = (state == AMD_PG_STATE_GATE);
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 1, 0):
+ amdgpu_gfx_off_ctrl(adev, enable);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void gfx_v12_1_xcc_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags &
+ (AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS)))
+ return;
+
+ if (enable) {
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regRLC_CGTT_MGCG_OVERRIDE);
+
+ /* unset CGCG override */
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG ||
+ adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
+
+ /* update CGCG override bits */
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regRLC_CGTT_MGCG_OVERRIDE, data);
+
+ /* enable cgcg FSM(0x0000363F) */
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
+ data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK;
+ data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+ RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+ }
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
+ data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK;
+ data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
+ RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+ }
+
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regRLC_CGCG_CGLS_CTRL, data);
+
+ /* set IDLE_POLL_COUNT(0x00900100) */
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_WPTR_POLL_CNTL);
+
+ data &= ~CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK;
+ data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
+ data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
+ (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
+
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_WPTR_POLL_CNTL, data);
+
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_INT_CNTL);
+ data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
+ data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
+ data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
+ data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_INT_CNTL, data);
+ } else {
+ /* Program RLC_CGCG_CGLS_CTRL */
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
+ data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+ data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, data);
+ }
+}
+
+static void gfx_v12_1_xcc_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ uint32_t data, def;
+ if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
+ return;
+
+ /* It is disabled by HW by default */
+ if (enable) {
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
+ /* 1 - RLC_CGTT_MGCG_OVERRIDE */
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE);
+
+ data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
+
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data);
+ }
+ } else {
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE);
+
+ data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
+
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data);
+ }
+ }
+}
+
+static void gfx_v12_1_xcc_update_repeater_fgcg(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG))
+ return;
+
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE);
+
+ if (enable)
+ data &= ~(RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__RLC_REPEATER_FGCG_OVERRIDE_MASK);
+ else
+ data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__RLC_REPEATER_FGCG_OVERRIDE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data);
+}
+
+static void gfx_v12_1_xcc_update_sram_fgcg(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
+ return;
+
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE);
+
+ if (enable)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
+ else
+ data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data);
+}
+
+static void gfx_v12_1_xcc_update_perf_clk(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK))
+ return;
+
+ def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE);
+
+ if (enable)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
+ else
+ data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data);
+}
+
+static int gfx_v12_1_xcc_update_gfx_clock_gating(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
+
+ gfx_v12_1_xcc_update_coarse_grain_clock_gating(adev, enable, xcc_id);
+
+ gfx_v12_1_xcc_update_medium_grain_clock_gating(adev, enable, xcc_id);
+
+ gfx_v12_1_xcc_update_repeater_fgcg(adev, enable, xcc_id);
+
+ gfx_v12_1_xcc_update_sram_fgcg(adev, enable, xcc_id);
+
+ gfx_v12_1_xcc_update_perf_clk(adev, enable, xcc_id);
+
+ if (adev->cg_flags &
+ (AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS))
+ gfx_v12_1_xcc_enable_gui_idle_interrupt(adev, enable, xcc_id);
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id);
+
+ return 0;
+}
+
+static int gfx_v12_1_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, num_xcc;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(12, 1, 0):
+ for (i = 0; i < num_xcc; i++)
+ gfx_v12_1_xcc_update_gfx_clock_gating(adev,
+ state == AMD_CG_STATE_GATE, i);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void gfx_v12_1_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int data;
+
+ /* AMD_CG_SUPPORT_GFX_MGCG */
+ data = RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_CGTT_MGCG_OVERRIDE);
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
+ *flags |= AMD_CG_SUPPORT_GFX_MGCG;
+
+ /* AMD_CG_SUPPORT_REPEATER_FGCG */
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK))
+ *flags |= AMD_CG_SUPPORT_REPEATER_FGCG;
+
+ /* AMD_CG_SUPPORT_GFX_FGCG */
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK))
+ *flags |= AMD_CG_SUPPORT_GFX_FGCG;
+
+ /* AMD_CG_SUPPORT_GFX_PERF_CLK */
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK))
+ *flags |= AMD_CG_SUPPORT_GFX_PERF_CLK;
+
+ /* AMD_CG_SUPPORT_GFX_CGCG */
+ data = RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_CGCG_CGLS_CTRL);
+ if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CGCG;
+
+ /* AMD_CG_SUPPORT_GFX_CGLS */
+ if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CGLS;
+}
+
+static u64 gfx_v12_1_ring_get_rptr_compute(struct amdgpu_ring *ring)
+{
+ /* gfx12 hardware is 32bit rptr */
+ return *(uint32_t *)ring->rptr_cpu_addr;
+}
+
+static u64 gfx_v12_1_ring_get_wptr_compute(struct amdgpu_ring *ring)
+{
+ u64 wptr;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell)
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+ else
+ BUG();
+ return wptr;
+}
+
+static void gfx_v12_1_ring_set_wptr_compute(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell) {
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ BUG(); /* only DOORBELL method supported on gfx12 now */
+ }
+}
+
+static void gfx_v12_1_ring_emit_ib_compute(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
+
+ /* Currently, there is a high possibility to get wave ID mismatch
+ * between ME and GDS, leading to a hw deadlock, because ME generates
+ * different wave IDs than the GDS expects. This situation happens
+ * randomly when at least 5 compute pipes use GDS ordered append.
+ * The wave IDs generated by ME are also wrong after suspend/resume.
+ * Those are probably bugs somewhere else in the kernel driver.
+ *
+ * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
+ * GDS to 0 for this ring (me/pipe).
+ */
+ if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+ amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID);
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
+ BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+ amdgpu_ring_write(ring,
+#ifdef __BIG_ENDIAN
+ (2 << 0) |
+#endif
+ lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, control);
+}
+
+static void gfx_v12_1_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned flags)
+{
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
+
+ /* RELEASE_MEM - flush caches, send int */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
+ amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ(1) |
+ PACKET3_RELEASE_MEM_GCR_GLV_WB |
+ PACKET3_RELEASE_MEM_GCR_GL2_WB |
+ PACKET3_RELEASE_MEM_GCR_GL2_SCOPE(2) |
+ PACKET3_RELEASE_MEM_TEMPORAL(3) |
+ PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+ PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
+ amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
+ PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
+
+ /*
+ * the address should be Qword aligned if 64bit write, Dword
+ * aligned if only send 32bit data low (discard data high)
+ */
+ if (write64bit)
+ BUG_ON(addr & 0x7);
+ else
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+ amdgpu_ring_write(ring, 0);
+}
+
+static void gfx_v12_1_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ gfx_v12_1_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr),
+ upper_32_bits(addr), seq, 0xffffffff, 4);
+}
+
+static void gfx_v12_1_ring_invalidate_tlbs(struct amdgpu_ring *ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint8_t dst_sel)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+ amdgpu_ring_write(ring,
+ PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
+ PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+ PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+ PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+}
+
+static void gfx_v12_1_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* compute doesn't have PFP */
+ if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
+ /* sync PFP to ME, otherwise we might get invalid PFP reads */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
+ amdgpu_ring_write(ring, 0x0);
+ }
+}
+
+static void gfx_v12_1_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned int flags)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* we only allocate 32bit for each seq wb address */
+ BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ /* write fence seq to the "addr" */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+
+ if (flags & AMDGPU_FENCE_FLAG_INT) {
+ /* set register to trigger INT */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
+ amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regCPC_INT_STATUS));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
+ }
+}
+
+static void gfx_v12_1_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t reg_val_offs)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ reg = soc_v1_0_normalize_xcc_reg_offset(reg);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
+ amdgpu_ring_write(ring, 0 | /* src: register*/
+ (5 << 8) | /* dst: memory */
+ (1 << 20)); /* write confirm */
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
+ reg_val_offs * 4));
+ amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
+ reg_val_offs * 4));
+}
+
+static void gfx_v12_1_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg,
+ uint32_t val)
+{
+ uint32_t cmd = 0;
+
+ reg = soc_v1_0_normalize_xcc_reg_offset(reg);
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_KIQ:
+ cmd = (1 << 16); /* no inc addr */
+ break;
+ default:
+ cmd = WR_CONFIRM;
+ break;
+ }
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, cmd);
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val);
+}
+
+static void gfx_v12_1_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ gfx_v12_1_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
+}
+
+static void gfx_v12_1_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+
+ gfx_v12_1_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
+ ref, mask, 0x20);
+}
+
+static void gfx_v12_1_xcc_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
+ int me, int pipe,
+ enum amdgpu_interrupt_state state,
+ int xcc_id)
+{
+ u32 mec_int_cntl, mec_int_cntl_reg;
+
+ /*
+ * amdgpu controls only the first MEC. That's why this function only
+ * handles the setting of interrupts for this specific MEC. All other
+ * pipes' interrupts are set by amdkfd.
+ */
+
+ if (me == 1) {
+ switch (pipe) {
+ case 0:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(
+ GC, GET_INST(GC, xcc_id),
+ regCP_ME1_PIPE0_INT_CNTL);
+ break;
+ case 1:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(
+ GC, GET_INST(GC, xcc_id),
+ regCP_ME1_PIPE1_INT_CNTL);
+ break;
+ case 2:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(
+ GC, GET_INST(GC, xcc_id),
+ regCP_ME1_PIPE2_INT_CNTL);
+ break;
+ case 3:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(
+ GC, GET_INST(GC, xcc_id),
+ regCP_ME1_PIPE3_INT_CNTL);
+ break;
+ default:
+ DRM_DEBUG("invalid pipe %d\n", pipe);
+ return;
+ }
+ } else {
+ DRM_DEBUG("invalid me %d\n", me);
+ return;
+ }
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, xcc_id);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ TIME_STAMP_INT_ENABLE, 0);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ GENERIC0_INT_ENABLE, 0);
+ WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, xcc_id);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, xcc_id);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ TIME_STAMP_INT_ENABLE, 1);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ GENERIC0_INT_ENABLE, 1);
+ WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, xcc_id);
+ break;
+ default:
+ break;
+ }
+}
+
+static int gfx_v12_1_set_eop_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ switch (type) {
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
+ gfx_v12_1_xcc_set_compute_eop_interrupt_state(
+ adev, 1, 0, state, i);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
+ gfx_v12_1_xcc_set_compute_eop_interrupt_state(
+ adev, 1, 1, state, i);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
+ gfx_v12_1_xcc_set_compute_eop_interrupt_state(
+ adev, 1, 2, state, i);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
+ gfx_v12_1_xcc_set_compute_eop_interrupt_state(
+ adev, 1, 3, state, i);
+ break;
+ default:
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int gfx_v12_1_eop_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ int i, xcc_id;
+ u8 me_id, pipe_id, queue_id;
+ struct amdgpu_ring *ring;
+ uint32_t mes_queue_id = entry->src_data[0];
+
+ DRM_DEBUG("IH: CP EOP\n");
+
+ if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
+ struct amdgpu_mes_queue *queue;
+
+ mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
+
+ spin_lock(&adev->mes.queue_id_lock);
+ queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
+ if (queue) {
+ DRM_DEBUG("process mes queue id = %d\n", mes_queue_id);
+ amdgpu_fence_process(queue->ring);
+ }
+ spin_unlock(&adev->mes.queue_id_lock);
+ } else {
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ queue_id = (entry->ring_id & 0x70) >> 4;
+ xcc_id = gfx_v12_1_ih_to_xcc_inst(adev, entry->node_id);
+
+ if (xcc_id == -EINVAL)
+ return -EINVAL;
+
+ switch (me_id) {
+ case 0:
+ if (pipe_id == 0)
+ amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
+ else
+ amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
+ break;
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring
+ [i +
+ xcc_id * adev->gfx.num_compute_rings];
+ /* Per-queue interrupt is supported for MEC starting from VI.
+ * The interrupt can only be enabled/disabled per pipe instead
+ * of per queue.
+ */
+ if ((ring->me == me_id) &&
+ (ring->pipe == pipe_id) &&
+ (ring->queue == queue_id))
+ amdgpu_fence_process(ring);
+ }
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int gfx_v12_1_set_priv_reg_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for (i = 0; i < num_xcc; i++)
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), CP_INT_CNTL_RING0,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v12_1_set_priv_inst_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ int i, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for (i = 0; i < num_xcc; i++)
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), CP_INT_CNTL_RING0,
+ PRIV_INSTR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void gfx_v12_1_handle_priv_fault(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry)
+{
+ u8 me_id, pipe_id, queue_id;
+ struct amdgpu_ring *ring;
+ int i, xcc_id;
+
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ queue_id = (entry->ring_id & 0x70) >> 4;
+ xcc_id = gfx_v12_1_ih_to_xcc_inst(adev, entry->node_id);
+
+ if (xcc_id == -EINVAL)
+ return;
+
+ switch (me_id) {
+ case 0:
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ /* we only enabled 1 gfx queue per pipe for now */
+ if (ring->me == me_id && ring->pipe == pipe_id)
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring
+ [i +
+ xcc_id * adev->gfx.num_compute_rings];
+ if (ring->me == me_id && ring->pipe == pipe_id &&
+ ring->queue == queue_id)
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ default:
+ BUG();
+ break;
+ }
+}
+
+static int gfx_v12_1_priv_reg_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal register access in command stream\n");
+ gfx_v12_1_handle_priv_fault(adev, entry);
+ return 0;
+}
+
+static int gfx_v12_1_priv_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal instruction in command stream\n");
+ gfx_v12_1_handle_priv_fault(adev, entry);
+ return 0;
+}
+
+static void gfx_v12_1_emit_mem_sync(struct amdgpu_ring *ring)
+{
+ const unsigned int gcr_cntl =
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_SCOPE(2);
+
+ /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
+ amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
+ amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
+ amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
+ amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
+ amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
+}
+
+static const struct amd_ip_funcs gfx_v12_1_ip_funcs = {
+ .name = "gfx_v12_1",
+ .early_init = gfx_v12_1_early_init,
+ .late_init = gfx_v12_1_late_init,
+ .sw_init = gfx_v12_1_sw_init,
+ .sw_fini = gfx_v12_1_sw_fini,
+ .hw_init = gfx_v12_1_hw_init,
+ .hw_fini = gfx_v12_1_hw_fini,
+ .suspend = gfx_v12_1_suspend,
+ .resume = gfx_v12_1_resume,
+ .is_idle = gfx_v12_1_is_idle,
+ .wait_for_idle = gfx_v12_1_wait_for_idle,
+ .set_clockgating_state = gfx_v12_1_set_clockgating_state,
+ .set_powergating_state = gfx_v12_1_set_powergating_state,
+ .get_clockgating_state = gfx_v12_1_get_clockgating_state,
+};
+
+static const struct amdgpu_ring_funcs gfx_v12_1_ring_funcs_compute = {
+ .type = AMDGPU_RING_TYPE_COMPUTE,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .get_rptr = gfx_v12_1_ring_get_rptr_compute,
+ .get_wptr = gfx_v12_1_ring_get_wptr_compute,
+ .set_wptr = gfx_v12_1_ring_set_wptr_compute,
+ .emit_frame_size =
+ 7 + /* gfx_v12_1_ring_emit_pipeline_sync */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* gfx_v12_1_ring_emit_vm_flush */
+ 8 + 8 + 8 + /* gfx_v12_1_ring_emit_fence x3 for user fence, vm fence */
+ 8, /* gfx_v12_1_emit_mem_sync */
+ .emit_ib_size = 7, /* gfx_v12_1_ring_emit_ib_compute */
+ .emit_ib = gfx_v12_1_ring_emit_ib_compute,
+ .emit_fence = gfx_v12_1_ring_emit_fence,
+ .emit_pipeline_sync = gfx_v12_1_ring_emit_pipeline_sync,
+ .emit_vm_flush = gfx_v12_1_ring_emit_vm_flush,
+ .test_ring = gfx_v12_1_ring_test_ring,
+ .test_ib = gfx_v12_1_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_wreg = gfx_v12_1_ring_emit_wreg,
+ .emit_reg_wait = gfx_v12_1_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v12_1_ring_emit_reg_write_reg_wait,
+ .emit_mem_sync = gfx_v12_1_emit_mem_sync,
+};
+
+static const struct amdgpu_ring_funcs gfx_v12_1_ring_funcs_kiq = {
+ .type = AMDGPU_RING_TYPE_KIQ,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .get_rptr = gfx_v12_1_ring_get_rptr_compute,
+ .get_wptr = gfx_v12_1_ring_get_wptr_compute,
+ .set_wptr = gfx_v12_1_ring_set_wptr_compute,
+ .emit_frame_size =
+ 7 + /* gfx_v12_1_ring_emit_pipeline_sync */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* gfx_v12_1_ring_emit_vm_flush */
+ 8 + 8 + 8, /* gfx_v12_1_ring_emit_fence_kiq x3 for user fence, vm fence */
+ .emit_ib_size = 7, /* gfx_v12_1_ring_emit_ib_compute */
+ .emit_ib = gfx_v12_1_ring_emit_ib_compute,
+ .emit_fence = gfx_v12_1_ring_emit_fence_kiq,
+ .test_ring = gfx_v12_1_ring_test_ring,
+ .test_ib = gfx_v12_1_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_rreg = gfx_v12_1_ring_emit_rreg,
+ .emit_wreg = gfx_v12_1_ring_emit_wreg,
+ .emit_reg_wait = gfx_v12_1_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v12_1_ring_emit_reg_write_reg_wait,
+};
+
+static void gfx_v12_1_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i, j, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ adev->gfx.kiq[i].ring.funcs = &gfx_v12_1_ring_funcs_kiq;
+
+ for (j = 0; j < adev->gfx.num_compute_rings; j++)
+ adev->gfx.compute_ring[j + i * adev->gfx.num_compute_rings].funcs =
+ &gfx_v12_1_ring_funcs_compute;
+ }
+}
+
+static const struct amdgpu_irq_src_funcs gfx_v12_1_eop_irq_funcs = {
+ .set = gfx_v12_1_set_eop_interrupt_state,
+ .process = gfx_v12_1_eop_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v12_1_priv_reg_irq_funcs = {
+ .set = gfx_v12_1_set_priv_reg_fault_state,
+ .process = gfx_v12_1_priv_reg_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v12_1_priv_inst_irq_funcs = {
+ .set = gfx_v12_1_set_priv_inst_fault_state,
+ .process = gfx_v12_1_priv_inst_irq,
+};
+
+static void gfx_v12_1_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
+ adev->gfx.eop_irq.funcs = &gfx_v12_1_eop_irq_funcs;
+
+ adev->gfx.priv_reg_irq.num_types = 1;
+ adev->gfx.priv_reg_irq.funcs = &gfx_v12_1_priv_reg_irq_funcs;
+
+ adev->gfx.priv_inst_irq.num_types = 1;
+ adev->gfx.priv_inst_irq.funcs = &gfx_v12_1_priv_inst_irq_funcs;
+}
+
+static void gfx_v12_1_set_imu_funcs(struct amdgpu_device *adev)
+{
+ if (adev->flags & AMD_IS_APU)
+ adev->gfx.imu.mode = MISSION_MODE;
+ else
+ adev->gfx.imu.mode = DEBUG_MODE;
+ if (!amdgpu_sriov_vf(adev))
+ adev->gfx.imu.funcs = &gfx_v12_1_imu_funcs;
+}
+
+static void gfx_v12_1_set_rlc_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.rlc.funcs = &gfx_v12_1_rlc_funcs;
+}
+
+static void gfx_v12_1_set_mqd_funcs(struct amdgpu_device *adev)
+{
+ /* set compute eng mqd */
+ adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
+ sizeof(struct v12_1_compute_mqd);
+ adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
+ gfx_v12_1_compute_mqd_init;
+}
+
+static void gfx_v12_1_set_user_cu_inactive_bitmap_per_sh(struct amdgpu_device *adev,
+ u32 bitmap, int xcc_id)
+{
+ u32 data;
+
+ if (!bitmap)
+ return;
+
+ data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
+ data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG, data);
+}
+
+static u32 gfx_v12_1_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ u32 data, mask;
+
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCC_GC_SHADER_ARRAY_CONFIG);
+ data |= RREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG);
+
+ data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
+ data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
+
+ mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
+
+ return (~data) & mask;
+}
+
+static int gfx_v12_1_get_cu_info(struct amdgpu_device *adev,
+ struct amdgpu_cu_info *cu_info)
+{
+ int i, j, k, counter, xcc_id, active_cu_number = 0;
+ u32 mask, bitmap;
+ unsigned int disable_masks[2 * 2];
+
+ if (!adev || !cu_info)
+ return -EINVAL;
+
+ if (adev->gfx.config.max_shader_engines > 2 ||
+ adev->gfx.config.max_sh_per_se > 2) {
+ dev_err(adev->dev,
+ "Max SE (%d) and Max SA per SE (%d) is greater than expected\n",
+ adev->gfx.config.max_shader_engines,
+ adev->gfx.config.max_sh_per_se);
+ return -EINVAL;
+ }
+
+ amdgpu_gfx_parse_disable_cu(adev, disable_masks,
+ adev->gfx.config.max_shader_engines,
+ adev->gfx.config.max_sh_per_se);
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) {
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ bitmap = i * adev->gfx.config.max_sh_per_se + j;
+ if (!((gfx_v12_1_get_sa_active_bitmap(adev, xcc_id) >> bitmap) & 1))
+ continue;
+ mask = 1;
+ counter = 0;
+ gfx_v12_1_xcc_select_se_sh(adev, i, j, 0xffffffff, xcc_id);
+ gfx_v12_1_set_user_cu_inactive_bitmap_per_sh(
+ adev,
+ disable_masks[i * adev->gfx.config.max_sh_per_se + j],
+ xcc_id);
+ bitmap = gfx_v12_1_get_cu_active_bitmap_per_sh(adev, xcc_id);
+
+ cu_info->bitmap[xcc_id][i][j] = bitmap;
+
+ for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
+ if (bitmap & mask)
+ counter++;
+
+ mask <<= 1;
+ }
+ active_cu_number += counter;
+ }
+ }
+ gfx_v12_1_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, xcc_id);
+ }
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ cu_info->number = active_cu_number;
+ cu_info->simd_per_cu = NUM_SIMD_PER_CU_GFX12_1;
+ cu_info->lds_size = 320;
+
+ return 0;
+}
+
+const struct amdgpu_ip_block_version gfx_v12_1_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_GFX,
+ .major = 12,
+ .minor = 1,
+ .rev = 0,
+ .funcs = &gfx_v12_1_ip_funcs,
+};
+
+static int gfx_v12_1_xcp_resume(void *handle, uint32_t inst_mask)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ uint32_t tmp_mask;
+ int i, r;
+
+ /* TODO : Initialize golden regs */
+ /* gfx_v12_1_init_golden_registers(adev); */
+
+ tmp_mask = inst_mask;
+ for_each_inst(i, tmp_mask)
+ gfx_v12_1_xcc_constants_init(adev, i);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ tmp_mask = inst_mask;
+ for_each_inst(i, tmp_mask) {
+ r = gfx_v12_1_xcc_rlc_resume(adev, i);
+ if (r)
+ return r;
+ }
+ }
+
+ r = gfx_v12_1_xcc_cp_resume(adev, inst_mask);
+
+ return r;
+}
+
+static int gfx_v12_1_xcp_suspend(void *handle, uint32_t inst_mask)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i;
+
+ for_each_inst(i, inst_mask)
+ gfx_v12_1_xcc_fini(adev, i);
+
+ return 0;
+}
+
+struct amdgpu_xcp_ip_funcs gfx_v12_1_xcp_funcs = {
+ .suspend = &gfx_v12_1_xcp_suspend,
+ .resume = &gfx_v12_1_xcp_resume
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.h b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.h
new file mode 100644
index 000000000000..684d93a1e2f8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFX_V12_1_H__
+#define __GFX_V12_1_H__
+
+extern const struct amdgpu_ip_block_version gfx_v12_1_ip_block;
+
+extern struct amdgpu_xcp_ip_funcs gfx_v12_1_xcp_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1_pkt.h b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1_pkt.h
new file mode 100644
index 000000000000..9a5c82c8db53
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1_pkt.h
@@ -0,0 +1,475 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef GFX_V12_1_PKT_H
+#define GFX_V12_1_PKT_H
+
+/**
+ * PM4 definitions
+ */
+#define PACKET_TYPE0 0
+#define PACKET_TYPE1 1
+#define PACKET_TYPE2 2
+#define PACKET_TYPE3 3
+
+#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
+#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
+#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
+#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
+#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \
+ ((reg) & 0xFFFF) | \
+ ((n) & 0x3FFF) << 16)
+#define CP_PACKET2 0x80000000
+#define PACKET2_PAD_SHIFT 0
+#define PACKET2_PAD_MASK (0x3fffffff << 0)
+
+#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
+
+#define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \
+ (((op) & 0xFF) << 8) | \
+ ((n) & 0x3FFF) << 16)
+
+#define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1)
+
+/* Packet 3 types */
+#define PACKET3_NOP 0x10
+#define PACKET3_SET_BASE 0x11
+#define PACKET3_BASE_INDEX(x) ((x) << 0)
+#define CE_PARTITION_BASE 3
+#define PACKET3_CLEAR_STATE 0x12
+#define PACKET3_INDEX_BUFFER_SIZE 0x13
+#define PACKET3_DISPATCH_DIRECT 0x15
+#define PACKET3_DISPATCH_INDIRECT 0x16
+#define PACKET3_INDIRECT_BUFFER_END 0x17
+#define PACKET3_INDIRECT_BUFFER_CNST_END 0x19
+#define PACKET3_ATOMIC_GDS 0x1D
+#define PACKET3_ATOMIC_MEM 0x1E
+#define PACKET3_OCCLUSION_QUERY 0x1F
+#define PACKET3_SET_PREDICATION 0x20
+#define PACKET3_REG_RMW 0x21
+#define PACKET3_COND_EXEC 0x22
+#define PACKET3_PRED_EXEC 0x23
+#define PACKET3_DRAW_INDIRECT 0x24
+#define PACKET3_DRAW_INDEX_INDIRECT 0x25
+#define PACKET3_INDEX_BASE 0x26
+#define PACKET3_DRAW_INDEX_2 0x27
+#define PACKET3_CONTEXT_CONTROL 0x28
+#define PACKET3_INDEX_TYPE 0x2A
+#define PACKET3_DRAW_INDIRECT_MULTI 0x2C
+#define PACKET3_DRAW_INDEX_AUTO 0x2D
+#define PACKET3_NUM_INSTANCES 0x2F
+#define PACKET3_DRAW_INDEX_MULTI_AUTO 0x30
+#define PACKET3_INDIRECT_BUFFER_PRIV 0x32
+#define PACKET3_INDIRECT_BUFFER_CNST 0x33
+#define PACKET3_COND_INDIRECT_BUFFER_CNST 0x33
+#define PACKET3_STRMOUT_BUFFER_UPDATE 0x34
+#define PACKET3_DRAW_INDEX_OFFSET_2 0x35
+#define PACKET3_DRAW_PREAMBLE 0x36
+#define PACKET3_WRITE_DATA 0x37
+#define WRITE_DATA_DST_SEL(x) ((x) << 8)
+ /* 0 - register
+ * 1 - memory (sync - via GRBM)
+ * 2 - gl2
+ * 3 - gds
+ * 4 - reserved
+ * 5 - memory (async - direct)
+ */
+#define WR_ONE_ADDR (1 << 16)
+#define WR_CONFIRM (1 << 20)
+#define WRITE_DATA_CACHE_POLICY(x) ((x) << 25)
+ /* 0 - LRU
+ * 1 - Stream
+ */
+#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30)
+ /* 0 - me
+ * 1 - pfp
+ * 2 - ce
+ */
+#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38
+#define PACKET3_MEM_SEMAPHORE 0x39
+# define PACKET3_SEM_USE_MAILBOX (0x1 << 16)
+# define PACKET3_SEM_SEL_SIGNAL_TYPE (0x1 << 20) /* 0 = increment, 1 = write 1 */
+# define PACKET3_SEM_SEL_SIGNAL (0x6 << 29)
+# define PACKET3_SEM_SEL_WAIT (0x7 << 29)
+#define PACKET3_DRAW_INDEX_MULTI_INST 0x3A
+#define PACKET3_COPY_DW 0x3B
+#define PACKET3_WAIT_REG_MEM 0x3C
+#define WAIT_REG_MEM_FUNCTION(x) ((x) << 0)
+ /* 0 - always
+ * 1 - <
+ * 2 - <=
+ * 3 - ==
+ * 4 - !=
+ * 5 - >=
+ * 6 - >
+ */
+#define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4)
+ /* 0 - reg
+ * 1 - mem
+ */
+#define WAIT_REG_MEM_OPERATION(x) ((x) << 6)
+ /* 0 - wait_reg_mem
+ * 1 - wr_wait_wr_reg
+ */
+#define WAIT_REG_MEM_ENGINE(x) ((x) << 8)
+ /* 0 - me
+ * 1 - pfp
+ */
+#define PACKET3_INDIRECT_BUFFER 0x3F
+#define INDIRECT_BUFFER_VALID (1 << 23)
+#define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28)
+ /* 0 - LRU
+ * 1 - Stream
+ * 2 - Bypass
+ */
+#define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21)
+#define INDIRECT_BUFFER_PRE_RESUME(x) ((x) << 30)
+#define PACKET3_COND_INDIRECT_BUFFER 0x3F
+#define PACKET3_COPY_DATA 0x40
+#define PACKET3_CP_DMA 0x41
+#define PACKET3_PFP_SYNC_ME 0x42
+#define PACKET3_SURFACE_SYNC 0x43
+#define PACKET3_ME_INITIALIZE 0x44
+#define PACKET3_COND_WRITE 0x45
+#define PACKET3_EVENT_WRITE 0x46
+#define EVENT_TYPE(x) ((x) << 0)
+#define EVENT_INDEX(x) ((x) << 8)
+ /* 0 - any non-TS event
+ * 1 - ZPASS_DONE, PIXEL_PIPE_STAT_*
+ * 2 - SAMPLE_PIPELINESTAT
+ * 3 - SAMPLE_STREAMOUTSTAT*
+ * 4 - *S_PARTIAL_FLUSH
+ */
+#define PACKET3_EVENT_WRITE_EOP 0x47
+#define PACKET3_EVENT_WRITE_EOS 0x48
+#define PACKET3_RELEASE_MEM 0x49
+#define PACKET3_RELEASE_MEM_EVENT_TYPE(x) ((x) << 0)
+#define PACKET3_RELEASE_MEM_EVENT_INDEX(x) ((x) << 8)
+#define PACKET3_RELEASE_MEM_GCR_GL2_SCOPE(x) ((x) << 12)
+#define PACKET3_RELEASE_MEM_GCR_GLV_INV (1 << 14)
+#define PACKET3_RELEASE_MEM_GCR_GL2_US (1 << 16)
+#define PACKET3_RELEASE_MEM_GCR_GL2_RANGE(x) ((x) << 17)
+#define PACKET3_RELEASE_MEM_GCR_GL2_DISCARD (1 << 19)
+#define PACKET3_RELEASE_MEM_GCR_GL2_INV (1 << 20)
+#define PACKET3_RELEASE_MEM_GCR_GL2_WB (1 << 21)
+#define PACKET3_RELEASE_MEM_GCR_SEQ(x) ((x) << 22)
+#define PACKET3_RELEASE_MEM_GCR_GLV_WB (1 << 24)
+#define PACKET3_RELEASE_MEM_TEMPORAL(x) ((x) << 25)
+ /* 0 - temporal__release_mem__rt
+ * 1 - temporal__release_mem__nt
+ * 2 - temporal__release_mem__ht
+ * 3 - temporal__release_mem__lu
+ */
+#define PACKET3_RELEASE_MEM_EXECUTE (1 << 28)
+
+#define PACKET3_RELEASE_MEM_DATA_SEL(x) ((x) << 29)
+ /* 0 - discard
+ * 1 - send low 32bit data
+ * 2 - send 64bit data
+ * 3 - send 64bit GPU counter value
+ * 4 - send 64bit sys counter value
+ */
+#define PACKET3_RELEASE_MEM_INT_SEL(x) ((x) << 24)
+ /* 0 - none
+ * 1 - interrupt only (DATA_SEL = 0)
+ * 2 - interrupt when data write is confirmed
+ */
+#define PACKET3_RELEASE_MEM_DST_SEL(x) ((x) << 16)
+ /* 0 - MC
+ * 1 - TC/L2
+ */
+
+
+
+#define PACKET3_PREAMBLE_CNTL 0x4A
+# define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE (2 << 28)
+# define PACKET3_PREAMBLE_END_CLEAR_STATE (3 << 28)
+#define PACKET3_DMA_DATA 0x50
+/* 1. header
+ * 2. CONTROL
+ * 3. SRC_ADDR_LO or DATA [31:0]
+ * 4. SRC_ADDR_HI [31:0]
+ * 5. DST_ADDR_LO [31:0]
+ * 6. DST_ADDR_HI [7:0]
+ * 7. COMMAND [31:26] | BYTE_COUNT [25:0]
+ */
+/* CONTROL */
+# define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0)
+ /* 0 - ME
+ * 1 - PFP
+ */
+# define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
+ /* 0 - LRU
+ * 1 - Stream
+ */
+# define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20)
+ /* 0 - DST_ADDR using DAS
+ * 1 - GDS
+ * 3 - DST_ADDR using L2
+ */
+# define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
+ /* 0 - LRU
+ * 1 - Stream
+ */
+# define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29)
+ /* 0 - SRC_ADDR using SAS
+ * 1 - GDS
+ * 2 - DATA
+ * 3 - SRC_ADDR using L2
+ */
+# define PACKET3_DMA_DATA_CP_SYNC (1 << 31)
+/* COMMAND */
+# define PACKET3_DMA_DATA_CMD_SAS (1 << 26)
+ /* 0 - memory
+ * 1 - register
+ */
+# define PACKET3_DMA_DATA_CMD_DAS (1 << 27)
+ /* 0 - memory
+ * 1 - register
+ */
+# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28)
+# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29)
+# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30)
+#define PACKET3_CONTEXT_REG_RMW 0x51
+#define PACKET3_GFX_CNTX_UPDATE 0x52
+#define PACKET3_BLK_CNTX_UPDATE 0x53
+#define PACKET3_INCR_UPDT_STATE 0x55
+#define PACKET3_ACQUIRE_MEM 0x58
+/* 1. HEADER
+ * 2. COHER_CNTL [30:0]
+ * 2.1 ENGINE_SEL [31:31]
+ * 2. COHER_SIZE [31:0]
+ * 3. COHER_SIZE_HI [7:0]
+ * 4. COHER_BASE_LO [31:0]
+ * 5. COHER_BASE_HI [23:0]
+ * 7. POLL_INTERVAL [15:0]
+ * 8. GCR_CNTL [18:0]
+ */
+#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(x) ((x) << 0)
+ /*
+ * 0:NOP
+ * 1:ALL
+ * 2:RANGE
+ * 3:FIRST_LAST
+ */
+#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_RANGE(x) ((x) << 2)
+ /*
+ * 0:ALL
+ * 1:reserved
+ * 2:RANGE
+ * 3:FIRST_LAST
+ */
+#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_SCOPE(x) ((x) << 4)
+ /*
+ * 0:Device scope
+ * 1:System scope
+ * 2:Force INV/WB all
+ * 3:Reserved
+ */
+#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_WB(x) ((x) << 6)
+#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(x) ((x) << 7)
+#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(x) ((x) << 8)
+#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_US(x) ((x) << 10)
+#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_RANGE(x) ((x) << 11)
+ /*
+ * 0:ALL
+ * 1:VOL
+ * 2:RANGE
+ * 3:FIRST_LAST
+ */
+#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_DISCARD(x) ((x) << 13)
+#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(x) ((x) << 14)
+#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(x) ((x) << 15)
+#define PACKET3_ACQUIRE_MEM_GCR_CNTL_SEQ(x) ((x) << 16)
+ /*
+ * 0: PARALLEL
+ * 1: FORWARD
+ * 2: REVERSE
+ */
+#define PACKET3_ACQUIRE_MEM_GCR_RANGE_IS_PA (1 << 18)
+#define PACKET3_REWIND 0x59
+#define PACKET3_INTERRUPT 0x5A
+#define PACKET3_GEN_PDEPTE 0x5B
+#define PACKET3_INDIRECT_BUFFER_PASID 0x5C
+#define PACKET3_PRIME_UTCL2 0x5D
+#define PACKET3_LOAD_UCONFIG_REG 0x5E
+#define PACKET3_LOAD_SH_REG 0x5F
+#define PACKET3_LOAD_CONFIG_REG 0x60
+#define PACKET3_LOAD_CONTEXT_REG 0x61
+#define PACKET3_LOAD_COMPUTE_STATE 0x62
+#define PACKET3_LOAD_SH_REG_INDEX 0x63
+#define PACKET3_SET_CONFIG_REG 0x68
+#define PACKET3_SET_CONFIG_REG_START 0x00002000
+#define PACKET3_SET_CONFIG_REG_END 0x00002c00
+#define PACKET3_SET_CONTEXT_REG 0x69
+#define PACKET3_SET_CONTEXT_REG_START 0x0000a000
+#define PACKET3_SET_CONTEXT_REG_END 0x0000a400
+#define PACKET3_SET_CONTEXT_REG_INDEX 0x6A
+#define PACKET3_SET_VGPR_REG_DI_MULTI 0x71
+#define PACKET3_SET_SH_REG_DI 0x72
+#define PACKET3_SET_CONTEXT_REG_INDIRECT 0x73
+#define PACKET3_SET_SH_REG_DI_MULTI 0x74
+#define PACKET3_GFX_PIPE_LOCK 0x75
+#define PACKET3_SET_SH_REG 0x76
+#define PACKET3_SET_SH_REG_START 0x00002c00
+#define PACKET3_SET_SH_REG_END 0x00003000
+#define PACKET3_SET_SH_REG_OFFSET 0x77
+#define PACKET3_SET_QUEUE_REG 0x78
+#define PACKET3_SET_UCONFIG_REG 0x79
+#define PACKET3_SET_UCONFIG_REG_START 0x0000c000
+#define PACKET3_SET_UCONFIG_REG_END 0x0000c400
+#define PACKET3_SET_UCONFIG_REG_INDEX 0x7A
+#define PACKET3_FORWARD_HEADER 0x7C
+#define PACKET3_SCRATCH_RAM_WRITE 0x7D
+#define PACKET3_SCRATCH_RAM_READ 0x7E
+#define PACKET3_LOAD_CONST_RAM 0x80
+#define PACKET3_WRITE_CONST_RAM 0x81
+#define PACKET3_DUMP_CONST_RAM 0x83
+#define PACKET3_INCREMENT_CE_COUNTER 0x84
+#define PACKET3_INCREMENT_DE_COUNTER 0x85
+#define PACKET3_WAIT_ON_CE_COUNTER 0x86
+#define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88
+#define PACKET3_SWITCH_BUFFER 0x8B
+#define PACKET3_DISPATCH_DRAW_PREAMBLE 0x8C
+#define PACKET3_DISPATCH_DRAW_PREAMBLE_ACE 0x8C
+#define PACKET3_DISPATCH_DRAW 0x8D
+#define PACKET3_DISPATCH_DRAW_ACE 0x8D
+#define PACKET3_GET_LOD_STATS 0x8E
+#define PACKET3_DRAW_MULTI_PREAMBLE 0x8F
+#define PACKET3_FRAME_CONTROL 0x90
+# define FRAME_TMZ (1 << 0)
+# define FRAME_CMD(x) ((x) << 28)
+ /*
+ * x=0: tmz_begin
+ * x=1: tmz_end
+ */
+#define PACKET3_INDEX_ATTRIBUTES_INDIRECT 0x91
+#define PACKET3_WAIT_REG_MEM64 0x93
+#define PACKET3_COND_PREEMPT 0x94
+#define PACKET3_HDP_FLUSH 0x95
+#define PACKET3_COPY_DATA_RB 0x96
+#define PACKET3_INVALIDATE_TLBS 0x98
+#define PACKET3_INVALIDATE_TLBS_DST_SEL(x) ((x) << 0)
+#define PACKET3_INVALIDATE_TLBS_ALL_HUB(x) ((x) << 4)
+#define PACKET3_INVALIDATE_TLBS_PASID(x) ((x) << 5)
+#define PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(x) ((x) << 29)
+
+#define PACKET3_AQL_PACKET 0x99
+#define PACKET3_DMA_DATA_FILL_MULTI 0x9A
+#define PACKET3_SET_SH_REG_INDEX 0x9B
+#define PACKET3_DRAW_INDIRECT_COUNT_MULTI 0x9C
+#define PACKET3_DRAW_INDEX_INDIRECT_COUNT_MULTI 0x9D
+#define PACKET3_DUMP_CONST_RAM_OFFSET 0x9E
+#define PACKET3_LOAD_CONTEXT_REG_INDEX 0x9F
+#define PACKET3_SET_RESOURCES 0xA0
+/* 1. header
+ * 2. CONTROL
+ * 3. QUEUE_MASK_LO [31:0]
+ * 4. QUEUE_MASK_HI [31:0]
+ * 5. GWS_MASK_LO [31:0]
+ * 6. GWS_MASK_HI [31:0]
+ * 7. OAC_MASK [15:0]
+ * 8. GDS_HEAP_SIZE [16:11] | GDS_HEAP_BASE [5:0]
+ */
+# define PACKET3_SET_RESOURCES_VMID_MASK(x) ((x) << 0)
+# define PACKET3_SET_RESOURCES_UNMAP_LATENTY(x) ((x) << 16)
+# define PACKET3_SET_RESOURCES_QUEUE_TYPE(x) ((x) << 29)
+#define PACKET3_MAP_PROCESS 0xA1
+#define PACKET3_MAP_QUEUES 0xA2
+/* 1. header
+ * 2. CONTROL
+ * 3. CONTROL2
+ * 4. MQD_ADDR_LO [31:0]
+ * 5. MQD_ADDR_HI [31:0]
+ * 6. WPTR_ADDR_LO [31:0]
+ * 7. WPTR_ADDR_HI [31:0]
+ */
+/* CONTROL */
+# define PACKET3_MAP_QUEUES_QUEUE_SEL(x) ((x) << 4)
+# define PACKET3_MAP_QUEUES_VMID(x) ((x) << 8)
+# define PACKET3_MAP_QUEUES_QUEUE(x) ((x) << 13)
+# define PACKET3_MAP_QUEUES_PIPE(x) ((x) << 16)
+# define PACKET3_MAP_QUEUES_ME(x) ((x) << 18)
+# define PACKET3_MAP_QUEUES_QUEUE_TYPE(x) ((x) << 21)
+# define PACKET3_MAP_QUEUES_ALLOC_FORMAT(x) ((x) << 24)
+# define PACKET3_MAP_QUEUES_ENGINE_SEL(x) ((x) << 26)
+# define PACKET3_MAP_QUEUES_NUM_QUEUES(x) ((x) << 29)
+/* CONTROL2 */
+# define PACKET3_MAP_QUEUES_CHECK_DISABLE(x) ((x) << 1)
+# define PACKET3_MAP_QUEUES_DOORBELL_OFFSET(x) ((x) << 2)
+#define PACKET3_UNMAP_QUEUES 0xA3
+/* 1. header
+ * 2. CONTROL
+ * 3. CONTROL2
+ * 4. CONTROL3
+ * 5. CONTROL4
+ * 6. CONTROL5
+ */
+/* CONTROL */
+# define PACKET3_UNMAP_QUEUES_ACTION(x) ((x) << 0)
+ /* 0 - PREEMPT_QUEUES
+ * 1 - RESET_QUEUES
+ * 2 - DISABLE_PROCESS_QUEUES
+ * 3 - PREEMPT_QUEUES_NO_UNMAP
+ */
+# define PACKET3_UNMAP_QUEUES_QUEUE_SEL(x) ((x) << 4)
+# define PACKET3_UNMAP_QUEUES_ENGINE_SEL(x) ((x) << 26)
+# define PACKET3_UNMAP_QUEUES_NUM_QUEUES(x) ((x) << 29)
+/* CONTROL2a */
+# define PACKET3_UNMAP_QUEUES_PASID(x) ((x) << 0)
+/* CONTROL2b */
+# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(x) ((x) << 2)
+/* CONTROL3a */
+# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET1(x) ((x) << 2)
+/* CONTROL3b */
+# define PACKET3_UNMAP_QUEUES_RB_WPTR(x) ((x) << 0)
+/* CONTROL4 */
+# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET2(x) ((x) << 2)
+/* CONTROL5 */
+# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET3(x) ((x) << 2)
+#define PACKET3_QUERY_STATUS 0xA4
+/* 1. header
+ * 2. CONTROL
+ * 3. CONTROL2
+ * 4. ADDR_LO [31:0]
+ * 5. ADDR_HI [31:0]
+ * 6. DATA_LO [31:0]
+ * 7. DATA_HI [31:0]
+ */
+/* CONTROL */
+# define PACKET3_QUERY_STATUS_CONTEXT_ID(x) ((x) << 0)
+# define PACKET3_QUERY_STATUS_INTERRUPT_SEL(x) ((x) << 28)
+# define PACKET3_QUERY_STATUS_COMMAND(x) ((x) << 30)
+/* CONTROL2a */
+# define PACKET3_QUERY_STATUS_PASID(x) ((x) << 0)
+/* CONTROL2b */
+# define PACKET3_QUERY_STATUS_DOORBELL_OFFSET(x) ((x) << 2)
+# define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25)
+#define PACKET3_RUN_LIST 0xA5
+#define PACKET3_MAP_PROCESS_VM 0xA6
+/* GFX11 */
+#define PACKET3_SET_Q_PREEMPTION_MODE 0xF0
+# define PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(x) ((x) << 0)
+# define PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM (1 << 0)
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index 80565392313f..73223d97a87f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -2010,7 +2010,7 @@ static int gfx_v6_0_cp_gfx_start(struct amdgpu_device *adev)
r = amdgpu_ring_alloc(ring, 7 + 4);
if (r) {
- DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
+ drm_err(adev_to_drm(adev), "cp failed to lock ring (%d).\n", r);
return r;
}
amdgpu_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
@@ -2031,7 +2031,7 @@ static int gfx_v6_0_cp_gfx_start(struct amdgpu_device *adev)
r = amdgpu_ring_alloc(ring, gfx_v6_0_get_csb_size(adev) + 10);
if (r) {
- DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
+ drm_err(adev_to_drm(adev), "cp failed to lock ring (%d).\n", r);
return r;
}
@@ -3002,7 +3002,7 @@ static void gfx_v6_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id
static void gfx_v6_0_select_me_pipe_q(struct amdgpu_device *adev,
u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
{
- DRM_INFO("Not implemented\n");
+ drm_info(adev_to_drm(adev), "Not implemented\n");
}
static const struct amdgpu_gfx_funcs gfx_v6_0_gfx_funcs = {
@@ -3555,7 +3555,7 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev)
memset(cu_info, 0, sizeof(*cu_info));
- amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
+ amdgpu_gfx_parse_disable_cu(adev, disable_masks, 4, 2);
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 2b7aba22ecc1..2b691452775b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2068,23 +2068,15 @@ static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring)
static void gfx_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
{
u32 ref_and_mask;
- int usepfp = ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE ? 0 : 1;
+ int usepfp;
+ struct amdgpu_device *adev = ring->adev;
- if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
- switch (ring->me) {
- case 1:
- ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
- break;
- case 2:
- ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
- break;
- default:
- return;
- }
- } else {
- ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
+ if (!adev->gfx.funcs->get_hdp_flush_mask) {
+ dev_err(adev->dev, "%s: gfx hdp flush is not supported.\n", __func__);
+ return;
}
+ adev->gfx.funcs->get_hdp_flush_mask(ring, &ref_and_mask, &usepfp);
amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
WAIT_REG_MEM_FUNCTION(3) | /* == */
@@ -2473,7 +2465,7 @@ static int gfx_v7_0_cp_gfx_start(struct amdgpu_device *adev)
r = amdgpu_ring_alloc(ring, gfx_v7_0_get_csb_size(adev) + 8);
if (r) {
- DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
+ drm_err(adev_to_drm(adev), "cp failed to lock ring (%d).\n", r);
return r;
}
@@ -3245,7 +3237,7 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
/* init spm vmid with 0xf */
if (adev->gfx.rlc.funcs->update_spm_vmid)
- adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, 0, NULL, 0xf);
return 0;
}
@@ -3471,7 +3463,8 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev)
return 0;
}
-static void gfx_v7_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid)
+static void gfx_v7_0_update_spm_vmid(struct amdgpu_device *adev, int xcc_id,
+ struct amdgpu_ring *ring, unsigned vmid)
{
u32 data;
@@ -4074,12 +4067,49 @@ static void gfx_v7_0_select_me_pipe_q(struct amdgpu_device *adev,
cik_srbm_select(adev, me, pipe, q, vm);
}
+/**
+ * gfx_v7_0_get_hdp_flush_mask - get the reference and mask for HDP flush
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @ref_and_mask: pointer to store the reference and mask
+ * @reg_mem_engine: pointer to store the register memory engine
+ *
+ * Calculates the reference and mask for HDP flush based on the ring type and me.
+ */
+static void gfx_v7_0_get_hdp_flush_mask(struct amdgpu_ring *ring,
+ uint32_t *ref_and_mask, uint32_t *reg_mem_engine)
+{
+ if (!ring || !ref_and_mask || !reg_mem_engine) {
+ DRM_INFO("%s:invalid params\n", __func__);
+ return;
+ }
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE ||
+ ring->funcs->type == AMDGPU_RING_TYPE_KIQ) {
+ switch (ring->me) {
+ case 1:
+ *ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
+ break;
+ case 2:
+ *ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
+ break;
+ default:
+ return;
+ }
+ *reg_mem_engine = 0;
+ } else {
+ *ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
+ *reg_mem_engine = 1;
+ }
+}
+
static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = {
.get_gpu_clock_counter = &gfx_v7_0_get_gpu_clock_counter,
.select_se_sh = &gfx_v7_0_select_se_sh,
.read_wave_data = &gfx_v7_0_read_wave_data,
.read_wave_sgprs = &gfx_v7_0_read_wave_sgprs,
- .select_me_pipe_q = &gfx_v7_0_select_me_pipe_q
+ .select_me_pipe_q = &gfx_v7_0_select_me_pipe_q,
+ .get_hdp_flush_mask = &gfx_v7_0_get_hdp_flush_mask,
};
static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = {
@@ -5033,7 +5063,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
memset(cu_info, 0, sizeof(*cu_info));
- amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
+ amdgpu_gfx_parse_disable_cu(adev, disable_masks, 4, 2);
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 1c87375e1dd5..a6b4c8f41dc1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -1048,7 +1048,7 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
if (adev->gfx.ce_feature_version >= 46 &&
adev->gfx.pfp_feature_version >= 46) {
adev->virt.chained_ib_support = true;
- DRM_INFO("Chained IB support enabled!\n");
+ drm_info(adev_to_drm(adev), "Chained IB support enabled!\n");
} else
adev->virt.chained_ib_support = false;
@@ -1274,7 +1274,7 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
/* init spm vmid with 0xf */
if (adev->gfx.rlc.funcs->update_spm_vmid)
- adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, 0, NULL, 0xf);
return 0;
}
@@ -1509,7 +1509,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
r = amdgpu_ib_get(adev, NULL, total_size,
AMDGPU_IB_POOL_DIRECT, &ib);
if (r) {
- DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
+ drm_err(adev_to_drm(adev), "failed to get ib (%d).\n", r);
return r;
}
@@ -1604,14 +1604,14 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
/* shedule the ib on the ring */
r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
if (r) {
- DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
+ drm_err(adev_to_drm(adev), "ib submit failed (%d).\n", r);
goto fail;
}
/* wait for the GPU to finish processing the IB */
r = dma_fence_wait(f, false);
if (r) {
- DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
+ drm_err(adev_to_drm(adev), "fence wait failed (%d).\n", r);
goto fail;
}
@@ -3820,7 +3820,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
gfx_v8_0_select_se_sh(adev, 0xffffffff,
0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
- DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
+ drm_info(adev_to_drm(adev), "Timeout wait for RLC serdes %u,%u\n",
i, j);
return;
}
@@ -4143,7 +4143,7 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
if (r) {
- DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
+ drm_err(adev_to_drm(adev), "cp failed to lock ring (%d).\n", r);
return r;
}
@@ -5211,13 +5211,49 @@ static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id
start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
}
+/**
+ * gfx_v8_0_get_hdp_flush_mask - get the reference and mask for HDP flush
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @ref_and_mask: pointer to store the reference and mask
+ * @reg_mem_engine: pointer to store the register memory engine
+ *
+ * Calculates the reference and mask for HDP flush based on the ring type and me.
+ */
+static void gfx_v8_0_get_hdp_flush_mask(struct amdgpu_ring *ring,
+ uint32_t *ref_and_mask, uint32_t *reg_mem_engine)
+{
+ if (!ring || !ref_and_mask || !reg_mem_engine) {
+ DRM_INFO("%s:invalid params\n", __func__);
+ return;
+ }
+
+ if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
+ (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
+ switch (ring->me) {
+ case 1:
+ *ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
+ break;
+ case 2:
+ *ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
+ break;
+ default:
+ return;
+ }
+ *reg_mem_engine = 0;
+ } else {
+ *ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
+ *reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
+ }
+}
static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
.select_se_sh = &gfx_v8_0_select_se_sh,
.read_wave_data = &gfx_v8_0_read_wave_data,
.read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
- .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
+ .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q,
+ .get_hdp_flush_mask = &gfx_v8_0_get_hdp_flush_mask,
};
static int gfx_v8_0_early_init(struct amdgpu_ip_block *ip_block)
@@ -5541,7 +5577,8 @@ static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
}
}
-static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid)
+static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, int xcc_id,
+ struct amdgpu_ring *ring, unsigned vmid)
{
u32 data;
@@ -5999,25 +6036,14 @@ static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
{
u32 ref_and_mask, reg_mem_engine;
+ struct amdgpu_device *adev = ring->adev;
- if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
- (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
- switch (ring->me) {
- case 1:
- ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
- break;
- case 2:
- ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
- break;
- default:
- return;
- }
- reg_mem_engine = 0;
- } else {
- ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
- reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
+ if (!adev->gfx.funcs->get_hdp_flush_mask) {
+ dev_err(adev->dev, "%s: gfx hdp flush is not supported.\n", __func__);
+ return;
}
+ adev->gfx.funcs->get_hdp_flush_mask(ring, &ref_and_mask, &reg_mem_engine);
amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
WAIT_REG_MEM_FUNCTION(3) | /* == */
@@ -6643,7 +6669,7 @@ static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data,
switch (enc) {
case 0:
- DRM_INFO("SQ general purpose intr detected:"
+ drm_info(adev_to_drm(adev), "SQ general purpose intr detected:"
"se_id %d, immed_overflow %d, host_reg_overflow %d,"
"host_cmd_overflow %d, cmd_timestamp %d,"
"reg_timestamp %d, thread_trace_buff_full %d,"
@@ -6685,7 +6711,7 @@ static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data,
else
sprintf(type, "EDC/ECC error");
- DRM_INFO(
+ drm_info(adev_to_drm(adev),
"SQ %s detected: "
"se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
"trap %s, sq_ed_info.source %s.\n",
@@ -7058,7 +7084,7 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
else
ao_cu_num = adev->gfx.config.max_cu_per_sh;
- amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
+ amdgpu_gfx_parse_disable_cu(adev, disable_masks, 4, 2);
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 0148d7ff34d9..7e9d753f4a80 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1301,7 +1301,8 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
(adev->gfx.mec_feature_version < 46) ||
(adev->gfx.pfp_fw_version < 0x000000b7) ||
(adev->gfx.pfp_feature_version < 46)))
- DRM_WARN_ONCE("CP firmware version too old, please update!");
+ drm_warn_once(adev_to_drm(adev),
+ "CP firmware version too old, please update!");
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 0, 1):
@@ -2004,6 +2005,7 @@ static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
+ .get_hdp_flush_mask = &amdgpu_gfx_get_hdp_flush_mask,
};
const struct amdgpu_ras_block_hw_ops gfx_v9_0_ras_ops = {
@@ -2039,7 +2041,7 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
- DRM_INFO("fix gfx.config for vega12\n");
+ drm_info(adev_to_drm(adev), "fix gfx.config for vega12\n");
break;
case IP_VERSION(9, 4, 0):
adev->gfx.ras = &gfx_v9_0_ras;
@@ -2409,8 +2411,10 @@ static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block)
amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
adev->gfx.compute_supported_reset =
amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
- if (!amdgpu_sriov_vf(adev) && !adev->debug_disable_gpu_ring_reset)
+ if (!amdgpu_sriov_vf(adev) && !adev->debug_disable_gpu_ring_reset) {
adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ }
r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0);
if (r) {
@@ -2727,7 +2731,7 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
amdgpu_gfx_select_se_sh(adev, 0xffffffff,
0xffffffff, 0xffffffff, 0);
mutex_unlock(&adev->grbm_idx_mutex);
- DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
+ drm_info(adev_to_drm(adev), "Timeout wait for RLC serdes %u,%u\n",
i, j);
return;
}
@@ -3150,7 +3154,7 @@ static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
/* RLC_GPM_GENERAL_6 : RLC Ucode version */
rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
if(rlc_ucode_ver == 0x108) {
- DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
+ drm_info(adev_to_drm(adev), "Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i\n",
rlc_ucode_ver, adev->gfx.rlc_fw_version);
/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
* default is 0x9C4 to create a 100us interval */
@@ -3333,12 +3337,12 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
*/
if (adev->flags & AMD_IS_APU &&
adev->in_s3 && !pm_resume_via_firmware()) {
- DRM_INFO("Will skip the CSB packet resubmit\n");
+ drm_info(adev_to_drm(adev), "Will skip the CSB packet resubmit\n");
return 0;
}
r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
if (r) {
- DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
+ drm_err(adev_to_drm(adev), "cp failed to lock ring (%d).\n", r);
return r;
}
@@ -4581,7 +4585,7 @@ static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
r = amdgpu_ring_alloc(ring, 7);
if (r) {
- DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
+ drm_err(adev_to_drm(adev), "GDS workarounds failed to lock ring %s (%d).\n",
ring->name, r);
return r;
}
@@ -4670,7 +4674,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
r = amdgpu_ib_get(adev, NULL, total_size,
AMDGPU_IB_POOL_DIRECT, &ib);
if (r) {
- DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
+ drm_err(adev_to_drm(adev), "failed to get ib (%d).\n", r);
return r;
}
@@ -4771,14 +4775,14 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
/* shedule the ib on the ring */
r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
if (r) {
- DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
+ drm_err(adev_to_drm(adev), "ib schedule failed (%d).\n", r);
goto fail;
}
/* wait for the GPU to finish processing the IB */
r = dma_fence_wait(f, false);
if (r) {
- DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
+ drm_err(adev_to_drm(adev), "fence wait failed (%d).\n", r);
goto fail;
}
@@ -5171,7 +5175,8 @@ static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
}
-static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid)
+static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, int xcc_id,
+ struct amdgpu_ring *ring, unsigned int vmid)
{
amdgpu_gfx_off_ctrl(adev, false);
@@ -5379,25 +5384,13 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
u32 ref_and_mask, reg_mem_engine;
- const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
- if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
- switch (ring->me) {
- case 1:
- ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
- break;
- case 2:
- ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
- break;
- default:
- return;
- }
- reg_mem_engine = 0;
- } else {
- ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
- reg_mem_engine = 1; /* pfp */
+ if (!adev->gfx.funcs->get_hdp_flush_mask) {
+ dev_err(adev->dev, "%s: gfx hdp flush is not supported.\n", __func__);
+ return;
}
+ adev->gfx.funcs->get_hdp_flush_mask(ring, &ref_and_mask, &reg_mem_engine);
gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
adev->nbio.funcs->get_hdp_flush_req_offset(adev),
adev->nbio.funcs->get_hdp_flush_done_offset(adev),
@@ -5581,15 +5574,42 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
amdgpu_ring_write(ring, 0);
}
-static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+static void gfx_v9_0_ring_emit_event_write(struct amdgpu_ring *ring,
+ uint32_t event_type,
+ uint32_t event_index)
{
- int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
- uint32_t seq = ring->fence_drv.sync_seq;
- uint64_t addr = ring->fence_drv.gpu_addr;
+ amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
+ amdgpu_ring_write(ring, EVENT_TYPE(event_type) |
+ EVENT_INDEX(event_index));
+}
- gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
- lower_32_bits(addr), upper_32_bits(addr),
- seq, 0xffffffff, 4);
+static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
+{
+ const unsigned int cp_coher_cntl =
+ PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
+ PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
+ PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
+ PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
+ PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
+
+ /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
+ amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
+ amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
+ amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
+ amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
+}
+
+static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
+ gfx_v9_0_ring_emit_event_write(ring, VS_PARTIAL_FLUSH, 4);
+ gfx_v9_0_ring_emit_event_write(ring, PS_PARTIAL_FLUSH, 4);
+ }
+ gfx_v9_0_ring_emit_event_write(ring, CS_PARTIAL_FLUSH, 4);
+ gfx_v9_0_emit_mem_sync(ring);
}
static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
@@ -5744,7 +5764,7 @@ static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
if (i >= adev->usec_timeout) {
r = -EINVAL;
- DRM_WARN("ring %d timeout to preempt ib\n", ring->idx);
+ drm_warn(adev_to_drm(adev), "ring %d timeout to preempt ib\n", ring->idx);
}
/*reset the CP_VMID_PREEMPT after trailing fence*/
@@ -7080,25 +7100,6 @@ static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
gfx_v9_0_query_utc_edc_status(adev, err_data);
}
-static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
-{
- const unsigned int cp_coher_cntl =
- PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
- PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
- PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
- PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
- PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
-
- /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
- amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
- amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
- amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
- amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */
- amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
- amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
- amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
-}
-
static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
uint32_t pipe, bool enable)
{
@@ -7173,6 +7174,91 @@ static void gfx_v9_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
amdgpu_ring_insert_nop(ring, num_nop - 1);
}
+static void gfx_v9_0_ring_emit_wreg_me(struct amdgpu_ring *ring,
+ uint32_t reg,
+ uint32_t val)
+{
+ uint32_t cmd = 0;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_KIQ:
+ cmd = (1 << 16); /* no inc addr */
+ break;
+ default:
+ cmd = WR_CONFIRM;
+ break;
+ }
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, cmd);
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val);
+}
+
+static int gfx_v9_0_reset_kgq(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ unsigned long flags;
+ u32 tmp;
+ int r;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+
+ if (amdgpu_ring_alloc(kiq_ring, 5)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+
+ /* send the reset - 5 */
+ tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
+ gfx_v9_0_ring_emit_wreg(kiq_ring,
+ SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), tmp);
+ amdgpu_ring_commit(kiq_ring);
+ r = amdgpu_ring_test_ring(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ if (r)
+ return r;
+
+ if (amdgpu_ring_alloc(ring, 8 + 7 + 5 + 2 + 8 + 7))
+ return -ENOMEM;
+ /* emit the fence to finish the reset - 8 */
+ ring->trail_seq++;
+ gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
+ ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC);
+ /* wait for the fence - 7 */
+ gfx_v9_0_wait_reg_mem(ring, 0, 1, 0,
+ lower_32_bits(ring->trail_fence_gpu_addr),
+ upper_32_bits(ring->trail_fence_gpu_addr),
+ ring->trail_seq, 0xffffffff, 4);
+ /* clear mmCP_VMID_RESET - 5 */
+ gfx_v9_0_ring_emit_wreg_me(ring,
+ SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0);
+ /* event write ENABLE_LEGACY_PIPELINE - 2 */
+ gfx_v9_0_ring_emit_event_write(ring, ENABLE_LEGACY_PIPELINE, 0);
+ /* emit a regular fence - 8 */
+ ring->trail_seq++;
+ gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
+ ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC);
+ /* wait for the fence - 7 */
+ gfx_v9_0_wait_reg_mem(ring, 1, 1, 0,
+ lower_32_bits(ring->trail_fence_gpu_addr),
+ upper_32_bits(ring->trail_fence_gpu_addr),
+ ring->trail_seq, 0xffffffff, 4);
+ amdgpu_ring_commit(ring);
+ /* wait for the commands to complete */
+ r = amdgpu_ring_test_ring(ring);
+ if (r)
+ return r;
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring,
unsigned int vmid,
struct amdgpu_fence *timedout_fence)
@@ -7413,7 +7499,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
.emit_frame_size = /* totally 242 maximum if 16 IBs */
5 + /* COND_EXEC */
- 7 + /* PIPELINE_SYNC */
+ 13 + /* PIPELINE_SYNC */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
2 + /* VM_FLUSH */
@@ -7451,9 +7537,9 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
.emit_wreg = gfx_v9_0_ring_emit_wreg,
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
- .soft_recovery = gfx_v9_0_ring_soft_recovery,
.emit_mem_sync = gfx_v9_0_emit_mem_sync,
.emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
+ .reset = gfx_v9_0_reset_kgq,
.begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
.end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
};
@@ -7469,7 +7555,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
.set_wptr = amdgpu_sw_ring_set_wptr_gfx,
.emit_frame_size = /* totally 242 maximum if 16 IBs */
5 + /* COND_EXEC */
- 7 + /* PIPELINE_SYNC */
+ 13 + /* PIPELINE_SYNC */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
2 + /* VM_FLUSH */
@@ -7530,7 +7616,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
20 + /* gfx_v9_0_ring_emit_gds_switch */
7 + /* gfx_v9_0_ring_emit_hdp_flush */
5 + /* hdp invalidate */
- 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
+ 9 + /* gfx_v9_0_ring_emit_pipeline_sync */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
@@ -7552,7 +7638,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
.emit_wreg = gfx_v9_0_ring_emit_wreg,
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
- .soft_recovery = gfx_v9_0_ring_soft_recovery,
.emit_mem_sync = gfx_v9_0_emit_mem_sync,
.emit_wave_limit = gfx_v9_0_emit_wave_limit,
.reset = gfx_v9_0_reset_kcq,
@@ -7573,7 +7658,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
20 + /* gfx_v9_0_ring_emit_gds_switch */
7 + /* gfx_v9_0_ring_emit_hdp_flush */
5 + /* hdp invalidate */
- 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
@@ -7772,7 +7856,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
adev->gfx.config.max_sh_per_se > 16)
return -EINVAL;
- amdgpu_gfx_parse_disable_cu(disable_masks,
+ amdgpu_gfx_parse_disable_cu(adev, disable_masks,
adev->gfx.config.max_shader_engines,
adev->gfx.config.max_sh_per_se);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index cbb74ffc4792..ad4d442e7345 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -848,6 +848,7 @@ static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = {
.switch_partition_mode = &gfx_v9_4_3_switch_compute_partition,
.ih_node_to_logical_xcc = &gfx_v9_4_3_ih_to_xcc_inst,
.get_xccs_per_xcp = &gfx_v9_4_3_get_xccs_per_xcp,
+ .get_hdp_flush_mask = &amdgpu_gfx_get_hdp_flush_mask,
};
static int gfx_v9_4_3_aca_bank_parser(struct aca_handle *handle,
@@ -1455,7 +1456,7 @@ static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev)
{
/* init spm vmid with 0xf */
if (adev->gfx.rlc.funcs->update_spm_vmid)
- adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, 0, NULL, 0xf);
return 0;
}
@@ -1481,7 +1482,7 @@ static void gfx_v9_4_3_xcc_wait_for_rlc_serdes(struct amdgpu_device *adev,
0xffffffff,
0xffffffff, xcc_id);
mutex_unlock(&adev->grbm_idx_mutex);
- DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
+ drm_info(adev_to_drm(adev), "Timeout wait for RLC serdes %u,%u\n",
i, j);
return;
}
@@ -1582,7 +1583,7 @@ static void gfx_v9_4_3_rlc_start(struct amdgpu_device *adev)
rlc_ucode_ver = RREG32_SOC15(GC, GET_INST(GC, i), regRLC_GPM_GENERAL_6);
if (rlc_ucode_ver == 0x108) {
dev_info(adev->dev,
- "Using rlc debug ucode. regRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
+ "Using rlc debug ucode. regRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i\n",
rlc_ucode_ver, adev->gfx.rlc_fw_version);
/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
* default is 0x9C4 to create a 100us interval */
@@ -1666,12 +1667,12 @@ static int gfx_v9_4_3_rlc_resume(struct amdgpu_device *adev)
return 0;
}
-static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring,
- unsigned vmid)
+static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev,
+ int inst, struct amdgpu_ring *ring, unsigned int vmid)
{
u32 reg, pre_data, data;
- reg = SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL);
+ reg = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regRLC_SPM_MC_CNTL);
if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev))
pre_data = RREG32_NO_KIQ(reg);
else
@@ -1682,9 +1683,9 @@ static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu
if (pre_data != data) {
if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) {
- WREG32_SOC15_NO_KIQ(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL, data);
+ WREG32_SOC15_NO_KIQ(GC, GET_INST(GC, inst), regRLC_SPM_MC_CNTL, data);
} else
- WREG32_SOC15(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL, data);
+ WREG32_SOC15(GC, GET_INST(GC, inst), regRLC_SPM_MC_CNTL, data);
}
}
@@ -2818,25 +2819,13 @@ static void gfx_v9_4_3_ring_emit_hdp_flush(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
u32 ref_and_mask, reg_mem_engine;
- const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
- if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
- switch (ring->me) {
- case 1:
- ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
- break;
- case 2:
- ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
- break;
- default:
- return;
- }
- reg_mem_engine = 0;
- } else {
- ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
- reg_mem_engine = 1; /* pfp */
+ if (!adev->gfx.funcs->get_hdp_flush_mask) {
+ dev_err(adev->dev, "%s: gfx hdp flush is not supported.\n", __func__);
+ return;
}
+ adev->gfx.funcs->get_hdp_flush_mask(ring, &ref_and_mask, &reg_mem_engine);
gfx_v9_4_3_wait_reg_mem(ring, reg_mem_engine, 0, 1,
adev->nbio.funcs->get_hdp_flush_req_offset(adev),
adev->nbio.funcs->get_hdp_flush_done_offset(adev),
@@ -4914,7 +4903,7 @@ static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev,
adev->gfx.config.max_sh_per_se > 16)
return -EINVAL;
- amdgpu_gfx_parse_disable_cu(disable_masks,
+ amdgpu_gfx_parse_disable_cu(adev, disable_masks,
adev->gfx.config.max_shader_engines,
adev->gfx.config.max_sh_per_se);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_1.c
new file mode 100644
index 000000000000..4aa004ee2c4d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_1.c
@@ -0,0 +1,893 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_xcp.h"
+#include "gfxhub_v12_1.h"
+
+#include "gc/gc_12_1_0_offset.h"
+#include "gc/gc_12_1_0_sh_mask.h"
+#include "soc_v1_0_enum.h"
+
+#include "soc15_common.h"
+
+#define regGCVM_L2_CNTL3_DEFAULT 0x80120007
+#define regGCVM_L2_CNTL4_DEFAULT 0x000000c1
+#define regGCVM_L2_CNTL5_DEFAULT 0x00003fe0
+#define regGRBM_GFX_INDEX_DEFAULT 0xe0000000
+
+
+static u64 gfxhub_v12_1_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base;
+
+ base = RREG32_SOC15(GC, GET_INST(GC, 0),
+ regGCMC_VM_FB_LOCATION_BASE_LO32);
+ base &= GCMC_VM_FB_LOCATION_BASE_LO32__FB_BASE_LO32_MASK;
+ base <<= 24;
+
+ base |= ((u64)(GCMC_VM_FB_LOCATION_BASE_HI32__FB_BASE_HI1_MASK &
+ RREG32_SOC15(GC, GET_INST(GC, 0),
+ regGCMC_VM_FB_LOCATION_BASE_HI32)) << 56);
+ return base;
+}
+
+static u64 gfxhub_v12_1_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ return (u64)(RREG32_SOC15(GC, GET_INST(GC, 0),
+ regGCMC_VM_FB_OFFSET) << 24);
+}
+
+static void gfxhub_v12_1_xcc_setup_vm_pt_regs(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint64_t page_table_base,
+ uint32_t xcc_mask)
+{
+ struct amdgpu_vmhub *hub;
+ int i;
+
+ for_each_inst(i, xcc_mask) {
+ hub = &adev->vmhub[AMDGPU_GFXHUB(i)];
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, i),
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, i),
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+ }
+}
+
+static void gfxhub_v12_1_setup_vm_pt_regs(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint64_t page_table_base)
+{
+ uint32_t xcc_mask;
+
+ xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0);
+ gfxhub_v12_1_xcc_setup_vm_pt_regs(adev, vmid, page_table_base,
+ xcc_mask);
+}
+
+static void gfxhub_v12_1_xcc_init_gart_aperture_regs(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ uint64_t pt_base;
+ int i;
+
+ if (adev->gmc.pdb0_bo)
+ pt_base = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo);
+ else
+ pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ gfxhub_v12_1_xcc_setup_vm_pt_regs(adev, 0, pt_base, xcc_mask);
+
+ /* If use GART for FB translation, vmid0 page table covers both
+ * vram and system memory (gart)
+ */
+ for_each_inst(i, xcc_mask) {
+ if (adev->gmc.pdb0_bo) {
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.fb_start >> 12));
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.fb_start >> 44));
+
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+ } else {
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+ }
+ }
+}
+
+static void gfxhub_v12_1_xcc_init_system_aperture_regs(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ uint64_t value;
+ uint32_t tmp;
+ int i;
+
+ for_each_inst(i, xcc_mask) {
+ /* Program the AGP BAR */
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i),
+ regGCMC_VM_AGP_BASE_LO32, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i),
+ regGCMC_VM_AGP_BASE_HI32, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i),
+ regGCMC_VM_AGP_BOT_LO32,
+ lower_32_bits(adev->gmc.agp_start >> 24));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i),
+ regGCMC_VM_AGP_BOT_HI32,
+ upper_32_bits(adev->gmc.agp_start >> 24));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i),
+ regGCMC_VM_AGP_TOP_LO32,
+ lower_32_bits(adev->gmc.agp_end >> 24));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i),
+ regGCMC_VM_AGP_TOP_HI32,
+ upper_32_bits(adev->gmc.agp_end >> 24));
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR_LO32,
+ lower_32_bits(min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18));
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR_HI32,
+ upper_32_bits(min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18));
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR_LO32,
+ lower_32_bits(max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18));
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR_HI32,
+ upper_32_bits(max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18));
+
+ /* Set default page address. */
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ tmp = RREG32_SOC15(GC, GET_INST(GC, i),
+ regGCVM_L2_PROTECTION_FAULT_CNTL2);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL2,
+ ENABLE_RETRY_FAULT_INTERRUPT, 0x1);
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCVM_L2_PROTECTION_FAULT_CNTL2, tmp);
+ }
+
+ /* In the case squeezing vram into GART aperture, we don't use
+ * FB aperture and AGP aperture. Disable them.
+ */
+ if (adev->gmc.pdb0_bo) {
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCMC_VM_FB_LOCATION_TOP_LO32, 0);
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCMC_VM_FB_LOCATION_TOP_HI32, 0);
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCMC_VM_FB_LOCATION_BASE_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCMC_VM_FB_LOCATION_BASE_HI32, 1);
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCMC_VM_AGP_TOP_LO32, 0);
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCMC_VM_AGP_TOP_HI32, 0);
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCMC_VM_AGP_BOT_LO32, 0xFFFFFFFF);
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCMC_VM_AGP_BOT_HI32, 1);
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR_HI32,
+ 0x7F);
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR_LO32, 0);
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR_HI32, 0);
+ }
+ }
+}
+
+static void gfxhub_v12_1_xcc_init_tlb_regs(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ uint32_t tmp;
+ int i;
+
+ for_each_inst(i, xcc_mask) {
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, i),
+ regGCMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp,
+ GCMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp,
+ GCMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp,
+ GCMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp,
+ GCMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp,
+ GCMC_VM_MX_L1_TLB_CNTL,
+ ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp,
+ GCMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC);
+
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i),
+ regGCMC_VM_MX_L1_TLB_CNTL, tmp);
+ }
+}
+
+static void gfxhub_v12_1_xcc_init_cache_regs(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ uint32_t tmp;
+ int i;
+
+ for_each_inst(i, xcc_mask) {
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, i), regGCVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ ENABLE_L2_CACHE, 1);
+ /*TODO: set ENABLE_L2_FRAGMENT_PROCESSING to 1? */
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ L2_PDE0_CACHE_TAG_GENERATION_MODE, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i),
+ regGCVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(GC, GET_INST(GC, i), regGCVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2,
+ INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2,
+ INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i),
+ regGCVM_L2_CNTL2, tmp);
+
+ tmp = regGCVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regGCVM_L2_CNTL3, tmp);
+
+ tmp = regGCVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4,
+ VMC_TAP_PDE_REQUEST_PHYSICAL, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4,
+ VMC_TAP_PTE_REQUEST_PHYSICAL, 1);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regGCVM_L2_CNTL4, tmp);
+
+ tmp = regGCVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL5,
+ L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, i), regGCVM_L2_CNTL5, tmp);
+ }
+}
+
+static void gfxhub_v12_1_xcc_enable_system_domain(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ uint32_t tmp;
+ int i;
+
+ for_each_inst(i, xcc_mask) {
+ tmp = RREG32_SOC15(GC, GET_INST(GC, i),
+ regGCVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL,
+ ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL,
+ PAGE_TABLE_DEPTH,
+ adev->gmc.vmid0_page_table_depth);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->gmc.vmid0_page_table_block_size);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCVM_CONTEXT0_CNTL, tmp);
+ }
+}
+
+static void gfxhub_v12_1_xcc_disable_identity_aperture(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ int i;
+
+ for_each_inst(i, xcc_mask) {
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0XFFFFFFFF);
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x00001FFF);
+
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
+ 0);
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
+ 0);
+
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32,
+ 0);
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32,
+ 0);
+ }
+}
+
+static void gfxhub_v12_1_xcc_setup_vmid_config(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ struct amdgpu_vmhub *hub;
+ unsigned int num_level, block_size;
+ uint32_t tmp;
+ int i, j;
+
+ num_level = adev->vm_manager.num_level;
+ block_size = adev->vm_manager.block_size;
+ block_size -= 9;
+
+ for_each_inst(j, xcc_mask) {
+ hub = &adev->vmhub[AMDGPU_GFXHUB(j)];
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(GC, GET_INST(GC, j),
+ regGCVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ PAGE_TABLE_DEPTH, num_level);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE, block_size);
+ /* Send no-retry XNACK on fault to suppress VM fault storm */
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ 1);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regGCVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j),
+ regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j),
+ regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j),
+ regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j),
+ regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+
+ hub->vm_cntx_cntl = tmp;
+ }
+}
+
+static void gfxhub_v12_1_xcc_program_invalidation(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ struct amdgpu_vmhub *hub;
+ unsigned int i, j;
+
+ for_each_inst(j, xcc_mask) {
+ hub = &adev->vmhub[AMDGPU_GFXHUB(j)];
+
+ for (i = 0 ; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j),
+ regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xFFFFFFFF);
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j),
+ regGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x3FFF);
+ }
+ }
+}
+
+static int gfxhub_v12_1_xcc_gart_enable(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ uint32_t i;
+
+ if (amdgpu_sriov_vf(adev)) {
+ /* GCMC_VM_FB_LOCATION_BASE/TOP are VF copy registers
+ * VBIO post does not program them at boot up phase
+ * Need driver to program them from guest side */
+ for_each_inst(i, xcc_mask) {
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCMC_VM_FB_LOCATION_BASE_LO32,
+ lower_32_bits(adev->gmc.vram_start >> 24));
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCMC_VM_FB_LOCATION_BASE_HI32,
+ upper_32_bits(adev->gmc.vram_start >> 24));
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCMC_VM_FB_LOCATION_TOP_LO32,
+ lower_32_bits(adev->gmc.vram_end >> 24));
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCMC_VM_FB_LOCATION_TOP_HI32,
+ upper_32_bits(adev->gmc.vram_end >> 24));
+ }
+ }
+ /* GART Enable. */
+ gfxhub_v12_1_xcc_init_gart_aperture_regs(adev, xcc_mask);
+ gfxhub_v12_1_xcc_init_system_aperture_regs(adev, xcc_mask);
+ gfxhub_v12_1_xcc_init_tlb_regs(adev, xcc_mask);
+ if (!amdgpu_sriov_vf(adev))
+ gfxhub_v12_1_xcc_init_cache_regs(adev, xcc_mask);
+
+ gfxhub_v12_1_xcc_enable_system_domain(adev, xcc_mask);
+ if (!amdgpu_sriov_vf(adev))
+ gfxhub_v12_1_xcc_disable_identity_aperture(adev, xcc_mask);
+ gfxhub_v12_1_xcc_setup_vmid_config(adev, xcc_mask);
+ gfxhub_v12_1_xcc_program_invalidation(adev, xcc_mask);
+
+ return 0;
+}
+
+static int gfxhub_v12_1_gart_enable(struct amdgpu_device *adev)
+{
+ uint32_t xcc_mask;
+
+ xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0);
+ return gfxhub_v12_1_xcc_gart_enable(adev, xcc_mask);
+}
+
+static void gfxhub_v12_1_xcc_gart_disable(struct amdgpu_device *adev,
+ uint32_t xcc_mask)
+{
+ struct amdgpu_vmhub *hub;
+ u32 tmp;
+ u32 i, j;
+
+ for_each_inst(j, xcc_mask) {
+ hub = &adev->vmhub[AMDGPU_GFXHUB(j)];
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(GC, GET_INST(GC, j),
+ regGCVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, j),
+ regGCMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, j),
+ regGCMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ if (!amdgpu_sriov_vf(adev)) {
+ tmp = RREG32_SOC15(GC, GET_INST(GC, j), regGCVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(GC, GET_INST(GC, j), regGCVM_L2_CNTL, tmp);
+ WREG32_SOC15(GC, GET_INST(GC, j), regGCVM_L2_CNTL3, 0);
+ }
+ }
+}
+
+static void gfxhub_v12_1_gart_disable(struct amdgpu_device *adev)
+{
+ uint32_t xcc_mask;
+
+ xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0);
+ gfxhub_v12_1_xcc_gart_disable(adev, xcc_mask);
+}
+
+static void gfxhub_v12_1_xcc_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value, uint32_t xcc_mask)
+{
+ u32 tmp;
+ int i;
+
+ for_each_inst(i, xcc_mask) {
+ tmp = RREG32_SOC15(GC, GET_INST(GC, i),
+ regGCVM_L2_PROTECTION_FAULT_CNTL_LO32);
+ tmp = REG_SET_FIELD(tmp,
+ GCVM_L2_PROTECTION_FAULT_CNTL_LO32,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp,
+ GCVM_L2_PROTECTION_FAULT_CNTL_LO32,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp,
+ GCVM_L2_PROTECTION_FAULT_CNTL_LO32,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp,
+ GCVM_L2_PROTECTION_FAULT_CNTL_LO32,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp,
+ GCVM_L2_PROTECTION_FAULT_CNTL_LO32,
+ PDE3_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp,
+ GCVM_L2_PROTECTION_FAULT_CNTL_LO32,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp,
+ GCVM_L2_PROTECTION_FAULT_CNTL_LO32,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp,
+ GCVM_L2_PROTECTION_FAULT_CNTL_LO32,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp,
+ GCVM_L2_PROTECTION_FAULT_CNTL_LO32,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp,
+ GCVM_L2_PROTECTION_FAULT_CNTL_LO32,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp,
+ GCVM_L2_PROTECTION_FAULT_CNTL_LO32,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp,
+ GCVM_L2_PROTECTION_FAULT_CNTL_LO32,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp,
+ GCVM_L2_PROTECTION_FAULT_CNTL_LO32,
+ CLIENT_ID_NO_RETRY_FAULT_INTERRUPT, value ? 0xFFFF:0);
+ tmp = REG_SET_FIELD(tmp,
+ GCVM_L2_PROTECTION_FAULT_CNTL_LO32,
+ OTHER_CLIENT_ID_NO_RETRY_FAULT_INTERRUPT, value);
+ if (!value)
+ tmp = REG_SET_FIELD(tmp,
+ GCVM_L2_PROTECTION_FAULT_CNTL_LO32,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCVM_L2_PROTECTION_FAULT_CNTL_LO32, tmp);
+
+ tmp = RREG32_SOC15(GC, GET_INST(GC, i),
+ regGCVM_L2_PROTECTION_FAULT_CNTL_HI32);
+ if (!value)
+ tmp = REG_SET_FIELD(tmp,
+ GCVM_L2_PROTECTION_FAULT_CNTL_HI32,
+ CRASH_ON_RETRY_FAULT, 1);
+ WREG32_SOC15(GC, GET_INST(GC, i),
+ regGCVM_L2_PROTECTION_FAULT_CNTL_HI32, tmp);
+ }
+}
+
+/**
+ * gfxhub_v12_1_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void gfxhub_v12_1_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value)
+{
+ uint32_t xcc_mask;
+
+ xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0);
+ gfxhub_v12_1_xcc_set_fault_enable_default(adev, value, xcc_mask);
+}
+
+static uint32_t gfxhub_v12_1_get_invalidate_req(unsigned int vmid,
+ uint32_t flush_type)
+{
+ u32 req = 0;
+
+ /* invalidate using legacy mode on vmid*/
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vmid);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ FLUSH_TYPE, flush_type);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ INVALIDATE_L2_PDE3, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+static const char *gfxhub_v12_1_client_ids[] = {
+ "CB",
+ "DB",
+ "GE1",
+ "GE2",
+ "CPF",
+ "CPC",
+ "CPG",
+ "RLC",
+ "TCP",
+ "SQC (inst)",
+ "SQC (data)",
+ "SQG/PC/SC",
+ "Reserved",
+ "SDMA0",
+ "SDMA1",
+ "GCR",
+ "Reserved",
+ "Reserved",
+ "WGS",
+ "DSM",
+ "PA"
+};
+
+/*TODO: l2 protection fault status is increased to 64bits.
+ * some critical fields like FED are moved to STATUS_HI32 */
+static void gfxhub_v12_1_print_l2_protection_fault_status(struct amdgpu_device *adev,
+ uint32_t status)
+{
+ u32 cid = REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS_LO32,
+ CID);
+
+ dev_err(adev->dev,
+ "GCVM_L2_PROTECTION_FAULT_STATUS_LO32:0x%08X\n",
+ status);
+ dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
+ cid >= ARRAY_SIZE(gfxhub_v12_1_client_ids) ?
+ "unknown" : gfxhub_v12_1_client_ids[cid], cid);
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS_LO32, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS_LO32, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS_LO32, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS_LO32, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS_LO32, RW));
+}
+
+static const struct amdgpu_vmhub_funcs gfxhub_v12_1_vmhub_funcs = {
+ .print_l2_protection_fault_status = gfxhub_v12_1_print_l2_protection_fault_status,
+ .get_invalidate_req = gfxhub_v12_1_get_invalidate_req,
+};
+
+static void gfxhub_v12_1_xcc_init(struct amdgpu_device *adev, uint32_t xcc_mask)
+{
+ struct amdgpu_vmhub *hub;
+ int i;
+
+ for_each_inst(i, xcc_mask) {
+ hub = &adev->vmhub[AMDGPU_GFXHUB(i)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i),
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i),
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i),
+ regGCVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i),
+ regGCVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i),
+ regGCVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i),
+ regGCVM_CONTEXT0_CNTL);
+ /* TODO: add a new member to accomandate additional fault status/cntl reg */
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i),
+ regGCVM_L2_PROTECTION_FAULT_STATUS_LO32);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(GC, GET_INST(GC, i),
+ regGCVM_L2_PROTECTION_FAULT_CNTL_LO32);
+ hub->ctx_distance =
+ regGCVM_CONTEXT1_CNTL -
+ regGCVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance =
+ regGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance =
+ regGCVM_INVALIDATE_ENG1_REQ -
+ regGCVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance =
+ regGCVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+
+ hub->vm_cntx_cntl_vm_fault =
+ GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ hub->vmhub_funcs = &gfxhub_v12_1_vmhub_funcs;
+ }
+}
+
+static void gfxhub_v12_1_init(struct amdgpu_device *adev)
+{
+ uint32_t xcc_mask;
+
+ xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0);
+ gfxhub_v12_1_xcc_init(adev, xcc_mask);
+}
+
+static int gfxhub_v12_1_get_xgmi_info(struct amdgpu_device *adev)
+{
+ u32 max_num_physical_nodes;
+ u32 max_physical_node_id;
+ u32 xgmi_lfb_cntl;
+ u32 max_region;
+ u64 seg_size;
+
+ xgmi_lfb_cntl = RREG32_SOC15(GC, GET_INST(GC, 0),
+ regGCMC_VM_XGMI_LFB_CNTL);
+ seg_size = REG_GET_FIELD(RREG32_SOC15(GC, GET_INST(GC, 0),
+ regGCMC_VM_XGMI_LFB_SIZE),
+ GCMC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24;
+ max_region = REG_GET_FIELD(xgmi_lfb_cntl,
+ GCMC_VM_XGMI_LFB_CNTL,
+ PF_MAX_REGION);
+
+ max_num_physical_nodes = 8;
+ max_physical_node_id = 7;
+
+ /* PF_MAX_REGION=0 means xgmi is disabled */
+ if (max_region || adev->gmc.xgmi.connected_to_cpu) {
+ adev->gmc.xgmi.num_physical_nodes = max_region + 1;
+
+ if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes)
+ return -EINVAL;
+
+ adev->gmc.xgmi.physical_node_id =
+ REG_GET_FIELD(xgmi_lfb_cntl,
+ GCMC_VM_XGMI_LFB_CNTL,
+ PF_LFB_REGION);
+
+ if (adev->gmc.xgmi.physical_node_id > max_physical_node_id)
+ return -EINVAL;
+
+ adev->gmc.xgmi.node_segment_size = seg_size;
+ }
+
+ return 0;
+}
+
+const struct amdgpu_gfxhub_funcs gfxhub_v12_1_funcs = {
+ .get_fb_location = gfxhub_v12_1_get_fb_location,
+ .get_mc_fb_offset = gfxhub_v12_1_get_mc_fb_offset,
+ .setup_vm_pt_regs = gfxhub_v12_1_setup_vm_pt_regs,
+ .gart_enable = gfxhub_v12_1_gart_enable,
+ .gart_disable = gfxhub_v12_1_gart_disable,
+ .set_fault_enable_default = gfxhub_v12_1_set_fault_enable_default,
+ .init = gfxhub_v12_1_init,
+ .get_xgmi_info = gfxhub_v12_1_get_xgmi_info,
+};
+
+static int gfxhub_v12_1_xcp_resume(void *handle, uint32_t inst_mask)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool value;
+
+ if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
+ value = false;
+ else
+ value = true;
+
+ gfxhub_v12_1_xcc_set_fault_enable_default(adev, value, inst_mask);
+
+ if (!amdgpu_sriov_vf(adev))
+ return gfxhub_v12_1_xcc_gart_enable(adev, inst_mask);
+
+ return 0;
+}
+
+static int gfxhub_v12_1_xcp_suspend(void *handle, uint32_t inst_mask)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (!amdgpu_sriov_vf(adev))
+ gfxhub_v12_1_xcc_gart_disable(adev, inst_mask);
+
+ return 0;
+}
+
+struct amdgpu_xcp_ip_funcs gfxhub_v12_1_xcp_funcs = {
+ .suspend = &gfxhub_v12_1_xcp_suspend,
+ .resume = &gfxhub_v12_1_xcp_resume
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_1.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_1.h
new file mode 100644
index 000000000000..b3f8f0aa0024
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_1.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFXHUB_V12_1_H__
+#define __GFXHUB_V12_1_H__
+
+extern const struct amdgpu_gfxhub_funcs gfxhub_v12_1_funcs;
+
+extern struct amdgpu_xcp_ip_funcs gfxhub_v12_1_xcp_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
index deb95fab02df..aceb8447feac 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
@@ -649,7 +649,7 @@ static void gfxhub_v2_1_halt(struct amdgpu_device *adev)
}
if (!time)
- DRM_WARN("failed to wait for GRBM(EA) idle\n");
+ drm_warn(adev_to_drm(adev), "failed to wait for GRBM(EA) idle\n");
}
const struct amdgpu_gfxhub_funcs gfxhub_v2_1_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index ce6e04242c52..2568eeaae945 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -115,27 +115,10 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
addr |= ((u64)entry->src_data[1] & 0xf) << 44;
if (retry_fault) {
+ int ret = amdgpu_gmc_handle_retry_fault(adev, entry, addr, 0, 0,
+ write_fault);
/* Returning 1 here also prevents sending the IV to the KFD */
-
- /* Process it onyl if it's the first fault for this address */
- if (entry->ih != &adev->irq.ih_soft &&
- amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
- entry->timestamp))
- return 1;
-
- /* Delegate it to a different ring if the hardware hasn't
- * already done it.
- */
- if (entry->ih == &adev->irq.ih) {
- amdgpu_irq_delegate(adev, entry, 8);
- return 1;
- }
-
- /* Try to handle the recoverable page faults by filling page
- * tables
- */
- if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr,
- entry->timestamp, write_fault))
+ if (ret == 1)
return 1;
}
@@ -867,7 +850,7 @@ static int gmc_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
if (r) {
- dev_warn(adev->dev, "amdgpu: No suitable DMA available.\n");
+ drm_warn(adev_to_drm(adev), "No suitable DMA available.\n");
return r;
}
@@ -974,7 +957,7 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev)
if (!adev->in_s0ix)
gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0);
- DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+ drm_info(adev_to_drm(adev), "PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned int)(adev->gmc.gart_size >> 20),
(unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index ba59ee8e398a..6349e239a367 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -114,27 +114,10 @@ static int gmc_v11_0_process_interrupt(struct amdgpu_device *adev,
addr |= ((u64)entry->src_data[1] & 0xf) << 44;
if (retry_fault) {
+ int ret = amdgpu_gmc_handle_retry_fault(adev, entry, addr, 0, 0,
+ write_fault);
/* Returning 1 here also prevents sending the IV to the KFD */
-
- /* Process it only if it's the first fault for this address */
- if (entry->ih != &adev->irq.ih_soft &&
- amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
- entry->timestamp))
- return 1;
-
- /* Delegate it to a different ring if the hardware hasn't
- * already done it.
- */
- if (entry->ih == &adev->irq.ih) {
- amdgpu_irq_delegate(adev, entry, 8);
- return 1;
- }
-
- /* Try to handle the recoverable page faults by filling page
- * tables
- */
- if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr,
- entry->timestamp, write_fault))
+ if (ret == 1)
return 1;
}
@@ -601,6 +584,7 @@ static void gmc_v11_0_set_mmhub_funcs(struct amdgpu_device *adev)
case IP_VERSION(3, 3, 0):
case IP_VERSION(3, 3, 1):
case IP_VERSION(3, 3, 2):
+ case IP_VERSION(3, 4, 0):
adev->mmhub.funcs = &mmhub_v3_3_funcs;
break;
default:
@@ -619,6 +603,7 @@ static void gmc_v11_0_set_gfxhub_funcs(struct amdgpu_device *adev)
case IP_VERSION(11, 5, 1):
case IP_VERSION(11, 5, 2):
case IP_VERSION(11, 5, 3):
+ case IP_VERSION(11, 5, 4):
adev->gfxhub.funcs = &gfxhub_v11_5_0_funcs;
break;
default:
@@ -795,6 +780,7 @@ static int gmc_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
case IP_VERSION(11, 5, 1):
case IP_VERSION(11, 5, 2):
case IP_VERSION(11, 5, 3):
+ case IP_VERSION(11, 5, 4):
set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask);
/*
@@ -838,7 +824,7 @@ static int gmc_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
if (r) {
- dev_warn(adev->dev, "amdgpu: No suitable DMA available.\n");
+ drm_warn(adev_to_drm(adev), "No suitable DMA available.\n");
return r;
}
@@ -939,7 +925,7 @@ static int gmc_v11_0_gart_enable(struct amdgpu_device *adev)
adev->mmhub.funcs->set_fault_enable_default(adev, value);
gmc_v11_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB0(0), 0);
- DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+ drm_info(adev_to_drm(adev), "PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned int)(adev->gmc.gart_size >> 20),
(unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
index 7a9d6894e321..b9671fc39e2a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
@@ -28,6 +28,7 @@
#include "amdgpu.h"
#include "amdgpu_atomfirmware.h"
#include "gmc_v12_0.h"
+#include "gmc_v12_1.h"
#include "athub/athub_4_1_0_sh_mask.h"
#include "athub/athub_4_1_0_offset.h"
#include "oss/osssys_7_0_0_offset.h"
@@ -38,7 +39,9 @@
#include "soc15_common.h"
#include "nbif_v6_3_1.h"
#include "gfxhub_v12_0.h"
+#include "gfxhub_v12_1.h"
#include "mmhub_v4_1_0.h"
+#include "mmhub_v4_2_0.h"
#include "athub_v4_1_0.h"
#include "umc_v8_14.h"
@@ -107,27 +110,10 @@ static int gmc_v12_0_process_interrupt(struct amdgpu_device *adev,
hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
if (retry_fault) {
+ int ret = amdgpu_gmc_handle_retry_fault(adev, entry, addr, 0, 0,
+ write_fault);
/* Returning 1 here also prevents sending the IV to the KFD */
-
- /* Process it only if it's the first fault for this address */
- if (entry->ih != &adev->irq.ih_soft &&
- amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
- entry->timestamp))
- return 1;
-
- /* Delegate it to a different ring if the hardware hasn't
- * already done it.
- */
- if (entry->ih == &adev->irq.ih) {
- amdgpu_irq_delegate(adev, entry, 8);
- return 1;
- }
-
- /* Try to handle the recoverable page faults by filling page
- * tables
- */
- if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr,
- entry->timestamp, write_fault))
+ if (ret == 1)
return 1;
}
@@ -623,6 +609,9 @@ static void gmc_v12_0_set_mmhub_funcs(struct amdgpu_device *adev)
case IP_VERSION(4, 1, 0):
adev->mmhub.funcs = &mmhub_v4_1_0_funcs;
break;
+ case IP_VERSION(4, 2, 0):
+ adev->mmhub.funcs = &mmhub_v4_2_0_funcs;
+ break;
default:
break;
}
@@ -635,6 +624,9 @@ static void gmc_v12_0_set_gfxhub_funcs(struct amdgpu_device *adev)
case IP_VERSION(12, 0, 1):
adev->gfxhub.funcs = &gfxhub_v12_0_funcs;
break;
+ case IP_VERSION(12, 1, 0):
+ adev->gfxhub.funcs = &gfxhub_v12_1_funcs;
+ break;
default:
break;
}
@@ -644,10 +636,19 @@ static int gmc_v12_0_early_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 1, 0):
+ gmc_v12_1_set_gmc_funcs(adev);
+ gmc_v12_1_set_irq_funcs(adev);
+ adev->gmc.init_pte_flags = AMDGPU_PTE_IS_PTE;
+ break;
+ default:
+ gmc_v12_0_set_gmc_funcs(adev);
+ gmc_v12_0_set_irq_funcs(adev);
+ break;
+ }
gmc_v12_0_set_gfxhub_funcs(adev);
gmc_v12_0_set_mmhub_funcs(adev);
- gmc_v12_0_set_gmc_funcs(adev);
- gmc_v12_0_set_irq_funcs(adev);
gmc_v12_0_set_umc_funcs(adev);
adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
@@ -656,6 +657,7 @@ static int gmc_v12_0_early_init(struct amdgpu_ip_block *ip_block)
adev->gmc.private_aperture_start = 0x1000000000000000ULL;
adev->gmc.private_aperture_end =
adev->gmc.private_aperture_start + (4ULL << 30) - 1;
+ adev->gmc.noretry_flags = AMDGPU_VM_NORETRY_FLAGS_TF;
return 0;
}
@@ -771,6 +773,7 @@ static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
struct amdgpu_device *adev = ip_block->adev;
+ int i;
adev->mmhub.funcs->init(adev);
@@ -778,12 +781,15 @@ static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
spin_lock_init(&adev->gmc.invalidate_lock);
- r = amdgpu_atomfirmware_get_vram_info(adev,
- &vram_width, &vram_type, &vram_vendor);
- adev->gmc.vram_width = vram_width;
-
- adev->gmc.vram_type = vram_type;
- adev->gmc.vram_vendor = vram_vendor;
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 1, 0)) {
+ gmc_v12_1_init_vram_info(adev);
+ } else {
+ r = amdgpu_atomfirmware_get_vram_info(adev,
+ &vram_width, &vram_type, &vram_vendor);
+ adev->gmc.vram_width = vram_width;
+ adev->gmc.vram_type = vram_type;
+ adev->gmc.vram_vendor = vram_vendor;
+ }
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(12, 0, 0):
@@ -797,6 +803,18 @@ static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
*/
amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
break;
+ case IP_VERSION(12, 1, 0):
+ bitmap_set(adev->vmhubs_mask, AMDGPU_GFXHUB(0),
+ NUM_XCC(adev->gfx.xcc_mask));
+ for (i = 0; i < hweight32(adev->aid_mask); i++)
+ set_bit(AMDGPU_MMHUB0(i), adev->vmhubs_mask);
+ /*
+ * To fulfill 5-level page support,
+ * vm size is 128PetaByte (57bit), maximum size,
+ * block size 512 (9bit)
+ */
+ amdgpu_vm_adjust_size(adev, 128 * 1024 * 1024, 9, 4, 57);
+ break;
default:
break;
}
@@ -809,13 +827,33 @@ static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
if (r)
return r;
- r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
- UTCL2_1_0__SRCID__FAULT,
- &adev->gmc.vm_fault);
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 1, 0)) {
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_UTCL2,
+ UTCL2_1_0__SRCID__FAULT,
+ &adev->gmc.vm_fault);
+ if (r)
+ return r;
+ /* Add GCVM UTCL2 Retry fault */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_UTCL2,
+ UTCL2_1_0__SRCID__RETRY,
+ &adev->gmc.vm_fault);
+ if (r)
+ return r;
+
+ /* Add MMVM UTCL2 Retry fault */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_VMC,
+ VMC_1_0__SRCID__VM_RETRY,
+ &adev->gmc.vm_fault);
+ } else {
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
+ UTCL2_1_0__SRCID__FAULT,
+ &adev->gmc.vm_fault);
+ }
if (r)
return r;
- if (!amdgpu_sriov_vf(adev)) {
+ if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(12, 1, 0)) &&
+ !amdgpu_sriov_vf(adev)) {
/* interrupt sent to DF. */
r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_DF, 0,
&adev->gmc.ecc_irq);
@@ -827,11 +865,11 @@ static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
* Set the internal MC address mask This is the max address of the GPU's
* internal address space.
*/
- adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
+ adev->gmc.mc_mask = AMDGPU_GMC_HOLE_MASK;
r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
if (r) {
- printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
+ drm_warn(adev_to_drm(adev), "No suitable DMA available.\n");
return r;
}
@@ -843,6 +881,14 @@ static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
amdgpu_gmc_get_vbios_allocations(adev);
+#ifdef HAVE_ACPI_DEV_GET_FIRST_MATCH_DEV
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 1, 0)) {
+ r = amdgpu_gmc_init_mem_ranges(adev);
+ if (r)
+ return r;
+ }
+#endif
+
/* Memory manager */
r = amdgpu_bo_init(adev);
if (r)
@@ -858,7 +904,11 @@ static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
* amdgpu graphics/compute will use VMIDs 1-7
* amdkfd will use VMIDs 8-15
*/
- adev->vm_manager.first_kfd_vmid = adev->gfx.disable_kq ? 1 : 8;
+ adev->vm_manager.first_kfd_vmid =
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 1, 0) ?
+ 3 : 8;
+ adev->vm_manager.first_kfd_vmid =
+ adev->gfx.disable_kq ? 1 : (adev->vm_manager.first_kfd_vmid);
amdgpu_vm_manager_init(adev);
@@ -924,9 +974,9 @@ static int gmc_v12_0_gart_enable(struct amdgpu_device *adev)
value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS;
adev->mmhub.funcs->set_fault_enable_default(adev, value);
- gmc_v12_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB0(0), 0);
+ adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_MMHUB0(0), 0);
- dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n",
+ drm_info(adev_to_drm(adev), "PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned)(adev->gmc.gart_size >> 20),
(unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
@@ -1026,7 +1076,10 @@ static int gmc_v12_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
if (r)
return r;
- return athub_v4_1_0_set_clockgating(adev, state);
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(12, 1, 0))
+ return athub_v4_1_0_set_clockgating(adev, state);
+ else
+ return 0;
}
static void gmc_v12_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
@@ -1035,7 +1088,8 @@ static void gmc_v12_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u6
adev->mmhub.funcs->get_clockgating(adev, flags);
- athub_v4_1_0_get_clockgating(adev, flags);
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(12, 1, 0))
+ athub_v4_1_0_get_clockgating(adev, flags);
}
static int gmc_v12_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c
new file mode 100644
index 000000000000..ef6e550ce7c3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c
@@ -0,0 +1,637 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "gmc_v12_1.h"
+#include "soc15_common.h"
+#include "soc_v1_0_enum.h"
+#include "oss/osssys_7_1_0_offset.h"
+#include "oss/osssys_7_1_0_sh_mask.h"
+#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
+
+static int gmc_v12_1_vm_fault_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ struct amdgpu_vmhub *hub;
+ u32 tmp, reg, i, j;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ for_each_set_bit(j, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
+ hub = &adev->vmhub[j];
+ for (i = 0; i < 16; i++) {
+ reg = hub->vm_context0_cntl + i;
+
+ /* This works because this interrupt is only
+ * enabled at init/resume and disabled in
+ * fini/suspend, so the overall state doesn't
+ * change over the course of suspend/resume.
+ */
+ if (adev->in_s0ix && (j == AMDGPU_GFXHUB(0)))
+ continue;
+
+ if (j >= AMDGPU_MMHUB0(0))
+ tmp = RREG32_SOC15_IP(MMHUB, reg);
+ else
+ tmp = RREG32_XCC(reg, j);
+
+ tmp &= ~hub->vm_cntx_cntl_vm_fault;
+
+ if (j >= AMDGPU_MMHUB0(0))
+ WREG32_SOC15_IP(MMHUB, reg, tmp);
+ else
+ WREG32_XCC(reg, tmp, j);
+ }
+ }
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for_each_set_bit(j, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
+ hub = &adev->vmhub[j];
+ for (i = 0; i < 16; i++) {
+ reg = hub->vm_context0_cntl + i;
+
+ /* This works because this interrupt is only
+ * enabled at init/resume and disabled in
+ * fini/suspend, so the overall state doesn't
+ * change over the course of suspend/resume.
+ */
+ if (adev->in_s0ix && (j == AMDGPU_GFXHUB(0)))
+ continue;
+
+ if (j >= AMDGPU_MMHUB0(0))
+ tmp = RREG32_SOC15_IP(MMHUB, reg);
+ else
+ tmp = RREG32_XCC(reg, j);
+
+ tmp |= hub->vm_cntx_cntl_vm_fault;
+
+ if (j >= AMDGPU_MMHUB0(0))
+ WREG32_SOC15_IP(MMHUB, reg, tmp);
+ else
+ WREG32_XCC(reg, tmp, j);
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gmc_v12_1_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ struct amdgpu_task_info *task_info;
+ bool retry_fault = false, write_fault = false;
+ unsigned int vmhub, node_id;
+ struct amdgpu_vmhub *hub;
+ uint32_t cam_index = 0;
+ const char *hub_name;
+ int ret, xcc_id = 0;
+ uint32_t status = 0;
+ u64 addr;
+
+ node_id = entry->node_id;
+
+ addr = (u64)entry->src_data[0] << 12;
+ addr |= ((u64)entry->src_data[1] & 0x1fff) << 44;
+
+ if (entry->src_id == UTCL2_1_0__SRCID__RETRY) {
+ retry_fault = true;
+ write_fault = !!(entry->src_data[1] & 0x200000);
+ }
+
+ if (entry->client_id == SOC_V1_0_IH_CLIENTID_VMC) {
+ hub_name = "mmhub0";
+ vmhub = AMDGPU_MMHUB0(node_id / 4);
+ } else {
+ hub_name = "gfxhub0";
+ if (adev->gfx.funcs->ih_node_to_logical_xcc) {
+ xcc_id = adev->gfx.funcs->ih_node_to_logical_xcc(adev,
+ node_id);
+ if (xcc_id < 0)
+ xcc_id = 0;
+ }
+ vmhub = xcc_id;
+ }
+
+ hub = &adev->vmhub[vmhub];
+
+ if (retry_fault) {
+ if (adev->irq.retry_cam_enabled) {
+ /* Delegate it to a different ring if the hardware hasn't
+ * already done it.
+ */
+ if (entry->ih == &adev->irq.ih) {
+ amdgpu_irq_delegate(adev, entry, 8);
+ return 1;
+ }
+
+ cam_index = entry->src_data[3] & 0x3ff;
+
+ ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id,
+ addr, entry->timestamp, write_fault);
+ WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index);
+ if (ret)
+ return 1;
+ } else {
+ /* Process it onyl if it's the first fault for this address */
+ if (entry->ih != &adev->irq.ih_soft &&
+ amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
+ entry->timestamp))
+ return 1;
+
+ /* Delegate it to a different ring if the hardware hasn't
+ * already done it.
+ */
+ if (entry->ih == &adev->irq.ih) {
+ amdgpu_irq_delegate(adev, entry, 8);
+ return 1;
+ }
+
+ /* Try to handle the recoverable page faults by filling page
+ * tables
+ */
+ if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id,
+ addr, entry->timestamp, write_fault))
+ return 1;
+ }
+ }
+
+ if (kgd2kfd_vmfault_fast_path(adev, entry, retry_fault))
+ return 1;
+
+ if (!printk_ratelimit())
+ return 0;
+
+ dev_err(adev->dev,
+ "[%s] %s page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n", hub_name,
+ retry_fault ? "retry" : "no-retry",
+ entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
+
+ task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+ if (task_info) {
+ amdgpu_vm_print_task_info(adev, task_info);
+ amdgpu_vm_put_task_info(task_info);
+ }
+
+ dev_err(adev->dev, " in page starting at address 0x%016llx from IH client %d (%s)\n",
+ addr, entry->client_id, soc_v1_0_ih_clientid_name[entry->client_id]);
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ /*
+ * Issue a dummy read to wait for the status register to
+ * be updated to avoid reading an incorrect value due to
+ * the new fast GRBM interface.
+ */
+ if (entry->vmid_src == AMDGPU_GFXHUB(0))
+ RREG32(hub->vm_l2_pro_fault_status);
+
+ status = RREG32(hub->vm_l2_pro_fault_status);
+
+ /* Only print L2 fault status if the status register could be read and
+ * contains useful information
+ */
+ if (!status)
+ return 0;
+
+ WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
+
+ amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status, vmhub);
+
+ hub->vmhub_funcs->print_l2_protection_fault_status(adev, status);
+
+ return 0;
+}
+
+static bool gmc_v12_1_get_vmid_pasid_mapping_info(struct amdgpu_device *adev,
+ uint8_t vmid, uint8_t inst,
+ uint16_t *p_pasid)
+{
+ uint16_t index;
+
+ if (inst/4)
+ index = 0xA + inst%4;
+ else
+ index = 0x2 + inst%4;
+
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX), index);
+
+ *p_pasid = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid) & 0xffff;
+
+ return !!(*p_pasid);
+}
+
+/*
+ * GART
+ * VMID 0 is the physical GPU addresses as used by the kernel.
+ * VMIDs 1-15 are used for userspace clients and are handled
+ * by the amdgpu vm/hsa code.
+ */
+
+static void gmc_v12_1_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
+ unsigned int vmhub, uint32_t flush_type)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
+ u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
+ u32 tmp;
+ /* Use register 17 for GART */
+ const unsigned eng = 17;
+ unsigned int i;
+ unsigned char hub_ip = 0;
+
+ hub_ip = (AMDGPU_IS_GFXHUB(vmhub)) ?
+ GC_HWIP : MMHUB_HWIP;
+
+ spin_lock(&adev->gmc.invalidate_lock);
+
+ WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req, hub_ip);
+
+ /* Wait for ACK with a delay.*/
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack +
+ hub->eng_distance * eng, hub_ip);
+ tmp &= 1 << vmid;
+ if (tmp)
+ break;
+
+ udelay(1);
+ }
+
+ /* Issue additional private vm invalidation to MMHUB */
+ if (!AMDGPU_IS_GFXHUB(vmhub) &&
+ (hub->vm_l2_bank_select_reserved_cid2) &&
+ !amdgpu_sriov_vf(adev)) {
+ inv_req = RREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2);
+ /* bit 25: RSERVED_CACHE_PRIVATE_INVALIDATION */
+ inv_req |= (1 << 25);
+ /* Issue private invalidation */
+ WREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2, inv_req);
+ /* Read back to ensure invalidation is done*/
+ RREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2);
+ }
+
+ spin_unlock(&adev->gmc.invalidate_lock);
+
+ if (i < adev->usec_timeout)
+ return;
+
+ dev_err(adev->dev, "Timeout waiting for VM flush ACK!\n");
+}
+
+/**
+ * gmc_v12_1_flush_gpu_tlb - gart tlb flush callback
+ *
+ * @adev: amdgpu_device pointer
+ * @vmid: vm instance to flush
+ * @vmhub: which hub to flush
+ * @flush_type: the flush type
+ *
+ * Flush the TLB for the requested page table.
+ */
+static void gmc_v12_1_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t vmhub, uint32_t flush_type)
+{
+ u32 inst;
+
+ if (AMDGPU_IS_GFXHUB(vmhub) &&
+ !adev->gfx.is_poweron)
+ return;
+
+ if (vmhub >= AMDGPU_MMHUB0(0))
+ inst = 0;
+ else
+ inst = vmhub;
+
+ /* This is necessary for SRIOV as well as for GFXOFF to function
+ * properly under bare metal
+ */
+ if (((adev->gfx.kiq[inst].ring.sched.ready ||
+ adev->mes.ring[MES_PIPE_INST(inst, 0)].sched.ready) &&
+ (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)))) {
+ struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
+ const unsigned eng = 17;
+ u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
+ u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
+ u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
+
+ amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req,
+ 1 << vmid, inst);
+ return;
+ }
+
+ mutex_lock(&adev->mman.gtt_window_lock);
+ gmc_v12_1_flush_vm_hub(adev, vmid, vmhub, 0);
+ mutex_unlock(&adev->mman.gtt_window_lock);
+ return;
+}
+
+/**
+ * gmc_v12_1_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ * @flush_type: the flush type
+ * @all_hub: flush all hubs
+ * @inst: is used to select which instance of KIQ to use for the invalidation
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static void gmc_v12_1_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint32_t inst)
+{
+ uint16_t queried;
+ int vmid, i;
+
+ if (adev->enable_uni_mes && adev->mes.ring[0].sched.ready &&
+ (adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x6f) {
+ struct mes_inv_tlbs_pasid_input input = {0};
+ input.xcc_id = inst;
+ input.pasid = pasid;
+ input.flush_type = flush_type;
+
+ /* MES will invalidate hubs for the device(including slave xcc) from master, ignore request from slave */
+ if (!amdgpu_gfx_is_master_xcc(adev, inst))
+ return;
+
+ input.hub_id = AMDGPU_GFXHUB(0);
+ adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
+
+ if (all_hub) {
+ /* invalidate mm_hub */
+ if (test_bit(AMDGPU_MMHUB1(0), adev->vmhubs_mask)) {
+ input.hub_id = AMDGPU_MMHUB0(0);
+ adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
+ }
+ if (test_bit(AMDGPU_MMHUB1(0), adev->vmhubs_mask)) {
+ input.hub_id = AMDGPU_MMHUB1(0);
+ adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
+ }
+ }
+ return;
+ }
+
+ for (vmid = 1; vmid < 16; vmid++) {
+ bool valid;
+
+ valid = gmc_v12_1_get_vmid_pasid_mapping_info(adev, vmid, inst,
+ &queried);
+ if (!valid || queried != pasid)
+ continue;
+
+ if (all_hub) {
+ for_each_set_bit(i, adev->vmhubs_mask,
+ AMDGPU_MAX_VMHUBS)
+ gmc_v12_1_flush_gpu_tlb(adev, vmid, i,
+ flush_type);
+ } else {
+ gmc_v12_1_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(inst),
+ flush_type);
+ }
+ }
+}
+
+static uint64_t gmc_v12_1_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
+ uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0);
+ unsigned eng = ring->vm_inv_eng;
+
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
+ (hub->ctx_addr_distance * vmid),
+ lower_32_bits(pd_addr));
+
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
+ (hub->ctx_addr_distance * vmid),
+ upper_32_bits(pd_addr));
+
+ amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req +
+ hub->eng_distance * eng,
+ hub->vm_inv_eng0_ack +
+ hub->eng_distance * eng,
+ req, 1 << vmid);
+
+ return pd_addr;
+}
+
+static void gmc_v12_1_emit_pasid_mapping(struct amdgpu_ring *ring,
+ unsigned vmid, unsigned pasid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reg;
+
+ if (ring->vm_hub == AMDGPU_GFXHUB(0))
+ reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid;
+ else
+ reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT_MM) + vmid;
+
+ amdgpu_ring_emit_wreg(ring, reg, pasid);
+}
+
+/*
+ * PTE format:
+ * 63 P
+ * 62:59 reserved
+ * 58 D
+ * 57 G
+ * 56 T
+ * 55:54 M
+ * 53:52 SW
+ * 51:48 reserved for future
+ * 47:12 4k physical page base address
+ * 11:7 fragment
+ * 6 write
+ * 5 read
+ * 4 exe
+ * 3 Z
+ * 2 snooped
+ * 1 system
+ * 0 valid
+ *
+ * PDE format:
+ * 63 P
+ * 62:58 block fragment size
+ * 57 reserved
+ * 56 A
+ * 55:54 M
+ * 53:52 reserved
+ * 51:48 reserved for future
+ * 47:6 physical base address of PD or PTE
+ * 5:3 reserved
+ * 2 C
+ * 1 system
+ * 0 valid
+ */
+
+static void gmc_v12_1_get_vm_pde(struct amdgpu_device *adev, int level,
+ uint64_t *addr, uint64_t *flags)
+{
+ if (!(*flags & AMDGPU_PDE_PTE_GFX12) && !(*flags & AMDGPU_PTE_SYSTEM))
+ *addr = adev->vm_manager.vram_base_offset + *addr -
+ adev->gmc.vram_start;
+ BUG_ON(*addr & 0xFFFF00000000003FULL);
+
+ *flags |= AMDGPU_PTE_SNOOPED;
+
+ if (!adev->gmc.translate_further)
+ return;
+
+ if (level == AMDGPU_VM_PDB1) {
+ /* Set the block fragment size */
+ if (!(*flags & AMDGPU_PDE_PTE_GFX12))
+ *flags |= AMDGPU_PDE_BFS_GFX12(0x9);
+
+ } else if (level == AMDGPU_VM_PDB0) {
+ if (*flags & AMDGPU_PDE_PTE_GFX12)
+ *flags &= ~AMDGPU_PDE_PTE_GFX12;
+ }
+}
+
+static void gmc_v12_1_get_coherence_flags(struct amdgpu_device *adev,
+ struct amdgpu_bo *bo,
+ uint64_t *flags)
+{
+ struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ bool is_vram = bo->tbo.resource &&
+ bo->tbo.resource->mem_type == TTM_PL_VRAM;
+ bool coherent = bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
+ AMDGPU_GEM_CREATE_EXT_COHERENT);
+ bool ext_coherent = bo->flags & AMDGPU_GEM_CREATE_EXT_COHERENT;
+ uint32_t gc_ip_version = amdgpu_ip_version(adev, GC_HWIP, 0);
+ bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED;
+ unsigned int mtype, mtype_local;
+ bool snoop = false;
+ bool is_local = false;
+
+ switch (gc_ip_version) {
+ case IP_VERSION(12, 1, 0):
+ mtype_local = MTYPE_RW;
+ if (amdgpu_mtype_local == 1) {
+ DRM_INFO_ONCE("Using MTYPE_NC for local memory\n");
+ mtype_local = MTYPE_NC;
+ } else if (amdgpu_mtype_local == 2) {
+ DRM_INFO_ONCE("MTYPE_CC not supported, using MTYPE_RW instead for local memory\n");
+ } else {
+ DRM_INFO_ONCE("Using MTYPE_RW for local memory\n");
+ }
+
+ is_local = (is_vram && adev == bo_adev);
+ snoop = true;
+ if (uncached) {
+ mtype = MTYPE_UC;
+ } else if (ext_coherent) {
+ mtype = is_local ? mtype_local : MTYPE_UC;
+ } else {
+ if (is_local)
+ mtype = mtype_local;
+ else
+ mtype = MTYPE_NC;
+ }
+ break;
+ default:
+ if (uncached || coherent)
+ mtype = MTYPE_UC;
+ else
+ mtype = MTYPE_NC;
+ }
+
+ if (mtype != MTYPE_NC)
+ *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, mtype);
+
+ if (is_local || adev->have_atomics_support)
+ *flags |= AMDGPU_PTE_BUS_ATOMICS;
+
+ *flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
+}
+
+static void gmc_v12_1_get_vm_pte(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo,
+ uint32_t vm_flags,
+ uint64_t *flags)
+{
+ if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE)
+ *flags |= AMDGPU_PTE_EXECUTABLE;
+ else
+ *flags &= ~AMDGPU_PTE_EXECUTABLE;
+
+ switch (vm_flags & AMDGPU_VM_MTYPE_MASK) {
+ case AMDGPU_VM_MTYPE_DEFAULT:
+ *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_NC);
+ break;
+ case AMDGPU_VM_MTYPE_NC:
+ default:
+ *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_NC);
+ break;
+ case AMDGPU_VM_MTYPE_RW:
+ *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_RW);
+ break;
+ case AMDGPU_VM_MTYPE_UC:
+ *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC);
+ break;
+ }
+
+ if ((*flags & AMDGPU_PTE_VALID) && bo)
+ gmc_v12_1_get_coherence_flags(adev, bo, flags);
+}
+
+static const struct amdgpu_gmc_funcs gmc_v12_1_gmc_funcs = {
+ .flush_gpu_tlb = gmc_v12_1_flush_gpu_tlb,
+ .flush_gpu_tlb_pasid = gmc_v12_1_flush_gpu_tlb_pasid,
+ .emit_flush_gpu_tlb = gmc_v12_1_emit_flush_gpu_tlb,
+ .emit_pasid_mapping = gmc_v12_1_emit_pasid_mapping,
+ .get_vm_pde = gmc_v12_1_get_vm_pde,
+ .get_vm_pte = gmc_v12_1_get_vm_pte,
+ .query_mem_partition_mode = &amdgpu_gmc_query_memory_partition,
+ .request_mem_partition_mode = &amdgpu_gmc_request_memory_partition,
+};
+
+void gmc_v12_1_set_gmc_funcs(struct amdgpu_device *adev)
+{
+ adev->gmc.gmc_funcs = &gmc_v12_1_gmc_funcs;
+}
+
+static const struct amdgpu_irq_src_funcs gmc_v12_1_irq_funcs = {
+ .set = gmc_v12_1_vm_fault_interrupt_state,
+ .process = gmc_v12_1_process_interrupt,
+};
+
+void gmc_v12_1_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->gmc.vm_fault.num_types = 1;
+ adev->gmc.vm_fault.funcs = &gmc_v12_1_irq_funcs;
+}
+
+void gmc_v12_1_init_vram_info(struct amdgpu_device *adev)
+{
+ /* TODO: query vram_info from ip discovery binary */
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM4;
+ adev->gmc.vram_width = 384 * 64;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.h b/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.h
new file mode 100644
index 000000000000..2f08f4b8cd99
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GMC_V12_1_H__
+#define __GMC_V12_1_H__
+
+void gmc_v12_1_set_gmc_funcs(struct amdgpu_device *adev);
+void gmc_v12_1_set_irq_funcs(struct amdgpu_device *adev);
+void gmc_v12_1_init_vram_info(struct amdgpu_device *adev);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index a8ec95f42926..886bf77309a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -555,7 +555,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
gmc_v6_0_set_fault_enable_default(adev, true);
gmc_v6_0_flush_gpu_tlb(adev, 0, 0, 0);
- dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n",
+ drm_info(adev_to_drm(adev), "PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned int)(adev->gmc.gart_size >> 20),
(unsigned long long)table_addr);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index fbd0bf147f50..b2e87d3aa203 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -705,7 +705,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
}
gmc_v7_0_flush_gpu_tlb(adev, 0, 0, 0);
- DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+ drm_info(adev_to_drm(adev), "PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned int)(adev->gmc.gart_size >> 20),
(unsigned long long)table_addr);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 6551b60f2584..1d5bd90ac57f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -560,7 +560,7 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
tmp = RREG32(mmCONFIG_MEMSIZE);
/* some boards may have garbage in the upper 16 bits */
if (tmp & 0xffff0000) {
- DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
+ drm_info(adev_to_drm(adev), "Probably bad vram size: 0x%08x\n", tmp);
if (tmp & 0xffff)
tmp &= 0xffff;
}
@@ -939,7 +939,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
gmc_v8_0_set_fault_enable_default(adev, true);
gmc_v8_0_flush_gpu_tlb(adev, 0, 0, 0);
- DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+ drm_info(adev_to_drm(adev), "PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned int)(adev->gmc.gart_size >> 20),
(unsigned long long)table_addr);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 8ad7519f7b58..e35ed0cc2ec6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -583,44 +583,13 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
hub = &adev->vmhub[vmhub];
if (retry_fault) {
- if (adev->irq.retry_cam_enabled) {
- /* Delegate it to a different ring if the hardware hasn't
- * already done it.
- */
- if (entry->ih == &adev->irq.ih) {
- amdgpu_irq_delegate(adev, entry, 8);
- return 1;
- }
-
- cam_index = entry->src_data[2] & 0x3ff;
+ cam_index = entry->src_data[2] & 0x3ff;
- ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id,
- addr, entry->timestamp, write_fault);
- WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index);
- if (ret)
- return 1;
- } else {
- /* Process it onyl if it's the first fault for this address */
- if (entry->ih != &adev->irq.ih_soft &&
- amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
- entry->timestamp))
- return 1;
-
- /* Delegate it to a different ring if the hardware hasn't
- * already done it.
- */
- if (entry->ih == &adev->irq.ih) {
- amdgpu_irq_delegate(adev, entry, 8);
- return 1;
- }
-
- /* Try to handle the recoverable page faults by filling page
- * tables
- */
- if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id,
- addr, entry->timestamp, write_fault))
- return 1;
- }
+ ret = amdgpu_gmc_handle_retry_fault(adev, entry, addr, cam_index, node_id,
+ write_fault);
+ /* Returning 1 here also prevents sending the IV to the KFD */
+ if (ret == 1)
+ return 1;
}
if (kgd2kfd_vmfault_fast_path(adev, entry, retry_fault))
@@ -1168,13 +1137,13 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
*/
mtype_local = MTYPE_RW;
if (amdgpu_mtype_local == 1) {
- DRM_INFO_ONCE("Using MTYPE_NC for local memory\n");
+ drm_info_once(adev_to_drm(adev), "Using MTYPE_NC for local memory\n");
mtype_local = MTYPE_NC;
} else if (amdgpu_mtype_local == 2) {
- DRM_INFO_ONCE("Using MTYPE_CC for local memory\n");
+ drm_info_once(adev_to_drm(adev), "Using MTYPE_CC for local memory\n");
mtype_local = MTYPE_CC;
} else {
- DRM_INFO_ONCE("Using MTYPE_RW for local memory\n");
+ drm_info_once(adev_to_drm(adev), "Using MTYPE_RW for local memory\n");
}
is_local = (!is_vram && (adev->flags & AMD_IS_APU) &&
num_possible_nodes() <= 1) ||
@@ -1235,16 +1204,16 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
*flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_NC);
break;
case AMDGPU_VM_MTYPE_WC:
- *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_WC);
+ *flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_WC);
break;
case AMDGPU_VM_MTYPE_RW:
- *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_RW);
+ *flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_RW);
break;
case AMDGPU_VM_MTYPE_CC:
- *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_CC);
+ *flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_CC);
break;
case AMDGPU_VM_MTYPE_UC:
- *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_UC);
+ *flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_UC);
break;
}
@@ -2006,7 +1975,7 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block)
44;
r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(dma_addr_bits));
if (r) {
- dev_warn(adev->dev, "amdgpu: No suitable DMA available.\n");
+ drm_warn(adev_to_drm(adev), "No suitable DMA available.\n");
return r;
}
adev->need_swiotlb = drm_need_swiotlb(dma_addr_bits);
@@ -2162,12 +2131,12 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
if (r)
return r;
- DRM_INFO("PCIE GART of %uM enabled.\n",
+ drm_info(adev_to_drm(adev), "PCIE GART of %uM enabled.\n",
(unsigned int)(adev->gmc.gart_size >> 20));
if (adev->gmc.pdb0_bo)
- DRM_INFO("PDB0 located at 0x%016llX\n",
+ drm_info(adev_to_drm(adev), "PDB0 located at 0x%016llX\n",
(unsigned long long)amdgpu_bo_gpu_offset(adev->gmc.pdb0_bo));
- DRM_INFO("PTB located at 0x%016llX\n",
+ drm_info(adev_to_drm(adev), "PTB located at 0x%016llX\n",
(unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c
index b32ea4129c61..451828bf583e 100644
--- a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c
@@ -279,6 +279,16 @@ static int ih_v7_0_enable_ring(struct amdgpu_device *adev,
return 0;
}
+static uint32_t ih_v7_0_setup_retry_doorbell(u32 doorbell_index)
+{
+ u32 val = 0;
+
+ val = REG_SET_FIELD(val, IH_DOORBELL_RPTR, OFFSET, doorbell_index);
+ val = REG_SET_FIELD(val, IH_DOORBELL_RPTR, ENABLE, 1);
+
+ return val;
+}
+
/**
* ih_v7_0_irq_init - init and enable the interrupt ring
*
@@ -363,6 +373,21 @@ static int ih_v7_0_irq_init(struct amdgpu_device *adev)
pci_set_master(adev->pdev);
+ if (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(7, 1, 0)) {
+ /* Allocate the doorbell for IH Retry CAM */
+ adev->irq.retry_cam_doorbell_index = (adev->doorbell_index.ih + 2) << 1;
+ WREG32_SOC15(OSSSYS, 0, regIH_DOORBELL_RETRY_CAM,
+ ih_v7_0_setup_retry_doorbell(adev->irq.retry_cam_doorbell_index));
+
+ /* Enable IH Retry CAM */
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_RETRY_INT_CAM_CNTL);
+ tmp = REG_SET_FIELD(tmp, IH_RETRY_INT_CAM_CNTL, ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, IH_RETRY_INT_CAM_CNTL, CAM_SIZE, 0xF);
+ WREG32_SOC15(OSSSYS, 0, regIH_RETRY_INT_CAM_CNTL, tmp);
+
+ adev->irq.retry_cam_enabled = true;
+ }
+
/* enable interrupts */
ret = ih_v7_0_toggle_interrupts(adev, true);
if (ret)
@@ -542,6 +567,7 @@ static int ih_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
int r;
struct amdgpu_device *adev = ip_block->adev;
bool use_bus_addr;
+ unsigned int sw_ring_size;
r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_IH, 0,
&adev->irq.self_irq);
@@ -573,7 +599,9 @@ static int ih_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
/* initialize ih control register offset */
ih_v7_0_init_register_offset(adev);
- r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true);
+ sw_ring_size = (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(7, 1, 0)) ?
+ IH_SW_RING_SIZE : PAGE_SIZE;
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, sw_ring_size, true);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
index cc626036ed9c..46d25d55ebbe 100644
--- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
@@ -41,6 +41,7 @@ MODULE_FIRMWARE("amdgpu/gc_11_5_0_imu.bin");
MODULE_FIRMWARE("amdgpu/gc_11_5_1_imu.bin");
MODULE_FIRMWARE("amdgpu/gc_11_5_2_imu.bin");
MODULE_FIRMWARE("amdgpu/gc_11_5_3_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_4_imu.bin");
static int imu_v11_0_init_microcode(struct amdgpu_device *adev)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v12_1.c b/drivers/gpu/drm/amd/amdgpu/imu_v12_1.c
new file mode 100644
index 000000000000..539785a51f69
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/imu_v12_1.c
@@ -0,0 +1,166 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include "amdgpu.h"
+#include "amdgpu_imu.h"
+#include "amdgpu_dpm.h"
+
+#include "imu_v12_1.h"
+
+#include "gc/gc_12_1_0_offset.h"
+#include "gc/gc_12_1_0_sh_mask.h"
+#include "mmhub/mmhub_4_2_0_offset.h"
+
+MODULE_FIRMWARE("amdgpu/gc_12_1_0_imu.bin");
+
+#define TRANSFER_RAM_MASK 0x001c0000
+
+static int imu_v12_1_init_microcode(struct amdgpu_device *adev)
+{
+ char ucode_prefix[15];
+ int err;
+ const struct imu_firmware_header_v1_0 *imu_hdr;
+ struct amdgpu_firmware_info *info = NULL;
+
+ DRM_DEBUG("\n");
+
+ amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
+ err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_imu.bin", ucode_prefix);
+ if (err)
+ goto out;
+
+ imu_hdr = (const struct imu_firmware_header_v1_0 *)adev->gfx.imu_fw->data;
+ adev->gfx.imu_fw_version = le32_to_cpu(imu_hdr->header.ucode_version);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_IMU_I];
+ info->ucode_id = AMDGPU_UCODE_ID_IMU_I;
+ info->fw = adev->gfx.imu_fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(imu_hdr->imu_iram_ucode_size_bytes), PAGE_SIZE);
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_IMU_D];
+ info->ucode_id = AMDGPU_UCODE_ID_IMU_D;
+ info->fw = adev->gfx.imu_fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(imu_hdr->imu_dram_ucode_size_bytes), PAGE_SIZE);
+ }
+
+out:
+ if (err) {
+ dev_err(adev->dev,
+ "gfx12: Failed to load firmware \"%s_imu.bin\"\n",
+ ucode_prefix);
+ amdgpu_ucode_release(&adev->gfx.imu_fw);
+ }
+
+ return err;
+}
+
+static void imu_v12_1_xcc_load_microcode(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ const struct imu_firmware_header_v1_0 *hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+
+ hdr = (const struct imu_firmware_header_v1_0 *)adev->gfx.imu_fw->data;
+ fw_data = (const __le32 *)(adev->gfx.imu_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(hdr->imu_iram_ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGFX_IMU_I_RAM_ADDR, 0);
+
+ for (i = 0; i < fw_size; i++)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regGFX_IMU_I_RAM_DATA,
+ le32_to_cpup(fw_data++));
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regGFX_IMU_I_RAM_ADDR,
+ adev->gfx.imu_fw_version);
+
+ fw_data = (const __le32 *)(adev->gfx.imu_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes) +
+ le32_to_cpu(hdr->imu_iram_ucode_size_bytes));
+ fw_size = le32_to_cpu(hdr->imu_dram_ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGFX_IMU_D_RAM_ADDR, 0);
+
+ for (i = 0; i < fw_size; i++)
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regGFX_IMU_D_RAM_DATA,
+ le32_to_cpup(fw_data++));
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regGFX_IMU_D_RAM_ADDR,
+ adev->gfx.imu_fw_version);
+}
+
+static int imu_v12_1_load_microcode(struct amdgpu_device *adev)
+{
+ int i, num_xcc;
+
+ if (!adev->gfx.imu_fw)
+ return -EINVAL;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ for (i = 0; i < num_xcc; i++) {
+ imu_v12_1_xcc_load_microcode(adev, i);
+ }
+
+ return 0;
+}
+
+static int imu_v12_1_switch_compute_partition(struct amdgpu_device *adev,
+ int num_xccs_per_xcp,
+ int compute_partition_mode)
+{
+ int ret;
+
+ if (adev->psp.funcs) {
+ /*TODO: revisit asp interface once it's avaialble */
+ ret = psp_spatial_partition(&adev->psp,
+ NUM_XCC(adev->gfx.xcc_mask) /
+ num_xccs_per_xcp);
+ if (ret)
+ return ret;
+ }
+
+ adev->gfx.num_xcc_per_xcp = num_xccs_per_xcp;
+
+ return 0;
+}
+
+static void imu_v12_1_init_mcm_addr_lut(struct amdgpu_device *adev)
+{
+ /* todo: fill in when interface is ready */
+}
+
+const struct amdgpu_imu_funcs gfx_v12_1_imu_funcs = {
+ .init_microcode = imu_v12_1_init_microcode,
+ .load_microcode = imu_v12_1_load_microcode,
+ .switch_compute_partition = imu_v12_1_switch_compute_partition,
+ .init_mcm_addr_lut = imu_v12_1_init_mcm_addr_lut,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v12_1.h b/drivers/gpu/drm/amd/amdgpu/imu_v12_1.h
new file mode 100644
index 000000000000..4a7c12bf7b0f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/imu_v12_1.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __IMU_V12_1_H__
+#define __IMU_V12_1_H__
+
+extern const struct amdgpu_imu_funcs gfx_v12_1_imu_funcs;
+
+#endif
+
diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c
index 4258d3e0b706..0002bcc6c4ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c
@@ -26,6 +26,7 @@
*/
#include <linux/gpio/machine.h>
+#include <linux/pm_runtime.h>
#include "amdgpu.h"
#include "isp_v4_1_1.h"
@@ -145,6 +146,9 @@ static int isp_genpd_add_device(struct device *dev, void *data)
return -ENODEV;
}
+ /* The devices will be managed by the pm ops from the parent */
+ dev_pm_syscore_device(dev, true);
+
exit:
/* Continue to add */
return 0;
@@ -177,12 +181,47 @@ static int isp_genpd_remove_device(struct device *dev, void *data)
drm_err(&adev->ddev, "Failed to remove dev from genpd %d\n", ret);
return -ENODEV;
}
+ dev_pm_syscore_device(dev, false);
exit:
/* Continue to remove */
return 0;
}
+static int isp_suspend_device(struct device *dev, void *data)
+{
+ return pm_runtime_force_suspend(dev);
+}
+
+static int isp_resume_device(struct device *dev, void *data)
+{
+ return pm_runtime_force_resume(dev);
+}
+
+static int isp_v4_1_1_hw_suspend(struct amdgpu_isp *isp)
+{
+ int r;
+
+ r = device_for_each_child(isp->parent, NULL,
+ isp_suspend_device);
+ if (r)
+ dev_err(isp->parent, "failed to suspend hw devices (%d)\n", r);
+
+ return r;
+}
+
+static int isp_v4_1_1_hw_resume(struct amdgpu_isp *isp)
+{
+ int r;
+
+ r = device_for_each_child(isp->parent, NULL,
+ isp_resume_device);
+ if (r)
+ dev_err(isp->parent, "failed to resume hw device (%d)\n", r);
+
+ return r;
+}
+
static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp)
{
const struct software_node *amd_camera_node, *isp4_node;
@@ -369,6 +408,8 @@ static int isp_v4_1_1_hw_fini(struct amdgpu_isp *isp)
static const struct isp_funcs isp_v4_1_1_funcs = {
.hw_init = isp_v4_1_1_hw_init,
.hw_fini = isp_v4_1_1_hw_fini,
+ .hw_suspend = isp_v4_1_1_hw_suspend,
+ .hw_resume = isp_v4_1_1_hw_resume,
};
void isp_v4_1_1_set_isp_funcs(struct amdgpu_isp *isp)
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
index 27c76bd424cf..9fe8d10ab270 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
@@ -240,7 +240,7 @@ static int jpeg_v2_0_disable_power_gating(struct amdgpu_device *adev)
UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
if (r) {
- DRM_ERROR("amdgpu: JPEG disable power gating failed\n");
+ drm_err(adev_to_drm(adev), "failed to disable JPEG power gating\n");
return r;
}
}
@@ -271,7 +271,7 @@ static int jpeg_v2_0_enable_power_gating(struct amdgpu_device *adev)
UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
if (r) {
- DRM_ERROR("amdgpu: JPEG enable power gating failed\n");
+ drm_err(adev_to_drm(adev), "failed to enable JPEG power gating\n");
return r;
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
index d1a011c40ba2..98f5e0622bc5 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
@@ -298,7 +298,7 @@ static int jpeg_v3_0_disable_static_power_gating(struct amdgpu_device *adev)
UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
if (r) {
- DRM_ERROR("amdgpu: JPEG disable power gating failed\n");
+ drm_err(adev_to_drm(adev), "failed to disable JPEG power gating\n");
return r;
}
}
@@ -333,7 +333,7 @@ static int jpeg_v3_0_enable_static_power_gating(struct amdgpu_device *adev)
UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
if (r) {
- DRM_ERROR("amdgpu: JPEG enable power gating failed\n");
+ drm_err(adev_to_drm(adev), "failed to enable JPEG power gating\n");
return r;
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
index 33db2c1ae6cc..0bd83820dd20 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
@@ -335,7 +335,7 @@ static int jpeg_v4_0_disable_static_power_gating(struct amdgpu_device *adev)
UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
if (r) {
- DRM_DEV_ERROR(adev->dev, "amdgpu: JPEG disable power gating failed\n");
+ drm_err(adev_to_drm(adev), "failed to disable JPEG power gating\n");
return r;
}
}
@@ -370,7 +370,7 @@ static int jpeg_v4_0_enable_static_power_gating(struct amdgpu_device *adev)
UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
if (r) {
- DRM_DEV_ERROR(adev->dev, "amdgpu: JPEG enable power gating failed\n");
+ drm_err(adev_to_drm(adev), "failed to enable JPEG power gating\n");
return r;
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_3_0.c
new file mode 100644
index 000000000000..1821dced936f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_3_0.c
@@ -0,0 +1,714 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "jpeg_v2_0.h"
+#include "jpeg_v4_0_3.h"
+
+#include "vcn/vcn_5_3_0_offset.h"
+#include "vcn/vcn_5_3_0_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h"
+#include "jpeg_v5_3_0.h"
+
+static void jpeg_v5_3_0_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void jpeg_v5_3_0_set_irq_funcs(struct amdgpu_device *adev);
+static int jpeg_v5_3_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state);
+
+
+/**
+ * jpeg_v5_3_0_early_init - set function pointers
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Set ring and irq function pointers
+ */
+static int jpeg_v5_3_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->jpeg.num_jpeg_inst = 1;
+ adev->jpeg.num_jpeg_rings = 1;
+
+ jpeg_v5_3_0_set_dec_ring_funcs(adev);
+ jpeg_v5_3_0_set_irq_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_3_0_sw_init - sw init for JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Load firmware and sw initialization
+ */
+static int jpeg_v5_3_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int r;
+
+ /* JPEG TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_5_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ ring = adev->jpeg.inst->ring_dec;
+ ring->use_doorbell = true;
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+
+ sprintf(ring->name, "jpeg_dec");
+ r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+
+ adev->jpeg.internal.jpeg_pitch[0] = regUVD_JPEG_PITCH_INTERNAL_OFFSET;
+ adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_PITCH);
+
+ /* TODO: Add queue reset mask when FW fully supports it */
+ adev->jpeg.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]);
+ if (!amdgpu_sriov_vf(adev))
+ adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+ return 0;
+}
+
+/**
+ * jpeg_v5_3_0_sw_fini - sw fini for JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * JPEG suspend and free up sw allocation
+ */
+static int jpeg_v5_3_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_jpeg_suspend(adev);
+ if (r)
+ return r;
+
+ amdgpu_jpeg_sysfs_reset_mask_fini(adev);
+ r = amdgpu_jpeg_sw_fini(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v5_3_0_hw_init - start and test JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ */
+static int jpeg_v5_3_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
+ int r;
+
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
+
+ /* Skip ring test because pause DPG is not implemented. */
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG)
+ return 0;
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_3_0_hw_fini - stop the hardware block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Stop the JPEG block, mark ring as not ready any more
+ */
+static int jpeg_v5_3_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
+
+ if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_STATUS))
+ jpeg_v5_3_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_3_0_suspend - suspend JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * HW fini and suspend JPEG block
+ */
+static int jpeg_v5_3_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ r = jpeg_v5_3_0_hw_fini(ip_block);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_suspend(ip_block->adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v5_3_0_resume - resume JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Resume firmware and hw init JPEG block
+ */
+static int jpeg_v5_3_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ r = amdgpu_jpeg_resume(ip_block->adev);
+ if (r)
+ return r;
+
+ r = jpeg_v5_3_0_hw_init(ip_block);
+
+ return r;
+}
+
+static void jpeg_v5_3_0_disable_clock_gating(struct amdgpu_device *adev)
+{
+ uint32_t data = 0;
+
+ WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data);
+
+ data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL);
+ data &= ~(JPEG_CGC_CTRL__JPEG0_DEC_MODE_MASK
+ | JPEG_CGC_CTRL__JPEG_ENC_MODE_MASK);
+ WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data);
+}
+
+static void jpeg_v5_3_0_enable_clock_gating(struct amdgpu_device *adev)
+{
+ uint32_t data = 0;
+
+ data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL);
+
+ data |= 1 << JPEG_CGC_CTRL__JPEG0_DEC_MODE__SHIFT;
+ WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE);
+ data |= (JPEG_CGC_GATE__JPEG0_DEC_MASK
+ |JPEG_CGC_GATE__JPEG_ENC_MASK
+ |JPEG_CGC_GATE__JMCIF_MASK
+ |JPEG_CGC_GATE__JRBBM_MASK);
+ WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data);
+}
+
+static int jpeg_v5_3_0_disable_power_gating(struct amdgpu_device *adev)
+{
+ uint32_t data = 0;
+
+ data = 1 << UVD_IPX_DLDO_CONFIG_ONO1__ONO1_PWR_CONFIG__SHIFT;
+ WREG32_SOC15(JPEG, 0, regUVD_IPX_DLDO_CONFIG_ONO1, data);
+ SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_IPX_DLDO_STATUS, 0,
+ UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK);
+
+ /* disable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ return 0;
+}
+
+static int jpeg_v5_3_0_enable_power_gating(struct amdgpu_device *adev)
+{
+ /* enable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS),
+ UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
+ WREG32(SOC15_REG_OFFSET(JPEG, 0, regUVD_IPX_DLDO_CONFIG_ONO1),
+ 2 << UVD_IPX_DLDO_CONFIG_ONO1__ONO1_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK);
+ }
+
+ return 0;
+}
+
+static void jpeg_engine_5_0_0_dpg_clock_gating_mode(struct amdgpu_device *adev,
+ int inst_idx, uint8_t indirect)
+{
+ uint32_t data = 0;
+
+ // JPEG disable CGC
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG)
+ data = 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data = 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+
+ if (indirect) {
+ ADD_SOC24_JPEG_TO_DPG_SRAM(inst_idx, vcnipJPEG_CGC_CTRL, data, indirect);
+
+ // Turn on All JPEG clocks
+ data = 0;
+ ADD_SOC24_JPEG_TO_DPG_SRAM(inst_idx, vcnipJPEG_CGC_GATE, data, indirect);
+ } else {
+ WREG32_SOC24_JPEG_DPG_MODE(inst_idx, vcnipJPEG_CGC_CTRL, data, indirect);
+
+ // Turn on All JPEG clocks
+ data = 0;
+ WREG32_SOC24_JPEG_DPG_MODE(inst_idx, vcnipJPEG_CGC_GATE, data, indirect);
+ }
+}
+
+/**
+ * jpeg_v5_3_0_start_dpg_mode - Jpeg start with dpg mode
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_idx: instance number index
+ * @indirect: indirectly write sram
+ *
+ * Start JPEG block with dpg mode
+ */
+static int jpeg_v5_3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+{
+ struct amdgpu_ring *ring = adev->jpeg.inst[inst_idx].ring_dec;
+ uint32_t reg_data = 0;
+
+ jpeg_v5_3_0_enable_power_gating(adev);
+
+ // enable dynamic power gating mode
+ reg_data = RREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS);
+ reg_data |= UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK;
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS, reg_data);
+
+ if (indirect)
+ adev->jpeg.inst[inst_idx].dpg_sram_curr_addr =
+ (uint32_t *)adev->jpeg.inst[inst_idx].dpg_sram_cpu_addr;
+
+ jpeg_engine_5_0_0_dpg_clock_gating_mode(adev, inst_idx, indirect);
+
+ /* MJPEG global tiling registers */
+ if (indirect)
+ ADD_SOC24_JPEG_TO_DPG_SRAM(inst_idx, vcnipJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config, indirect);
+ else
+ WREG32_SOC24_JPEG_DPG_MODE(inst_idx, vcnipJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config, 1);
+
+ /* enable System Interrupt for JRBC */
+ if (indirect)
+ ADD_SOC24_JPEG_TO_DPG_SRAM(inst_idx, vcnipJPEG_SYS_INT_EN,
+ JPEG_SYS_INT_EN__DJRBC0_MASK, indirect);
+ else
+ WREG32_SOC24_JPEG_DPG_MODE(inst_idx, vcnipJPEG_SYS_INT_EN,
+ JPEG_SYS_INT_EN__DJRBC0_MASK, 1);
+
+ if (indirect) {
+ /* add nop to workaround PSP size check */
+ ADD_SOC24_JPEG_TO_DPG_SRAM(inst_idx, vcnipUVD_NO_OP, 0, indirect);
+
+ amdgpu_jpeg_psp_update_sram(adev, inst_idx, 0);
+ }
+
+ WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL,
+ ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
+ VCN_JPEG_DB_CTRL__EN_MASK);
+
+ WREG32_SOC15(JPEG, inst_idx, regUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC0_UVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
+ WREG32_SOC15(JPEG, inst_idx, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, inst_idx, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC0_UVD_JRBC_RB_RPTR, 0);
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC0_UVD_JRBC_RB_WPTR, 0);
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC0_UVD_JRBC_RB_CNTL, 0x00000002L);
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC0_UVD_JRBC_RB_SIZE, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15(JPEG, inst_idx, regUVD_JRBC0_UVD_JRBC_RB_WPTR);
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_3_0_stop_dpg_mode - Jpeg stop with dpg mode
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_idx: instance number index
+ *
+ * Stop JPEG block with dpg mode
+ */
+static void jpeg_v5_3_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
+{
+ uint32_t reg_data = 0;
+
+ reg_data = RREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS);
+ reg_data &= ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK;
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS, reg_data);
+}
+
+/**
+ * jpeg_v5_3_0_start - start JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the JPEG block
+ */
+static int jpeg_v5_3_0_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
+ int r;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_jpeg(adev, true);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG) {
+ r = jpeg_v5_3_0_start_dpg_mode(adev, 0, adev->jpeg.indirect_sram);
+ return r;
+ }
+
+ /* disable power gating */
+ r = jpeg_v5_3_0_disable_power_gating(adev);
+ if (r)
+ return r;
+
+ /* JPEG disable CGC */
+ jpeg_v5_3_0_disable_clock_gating(adev);
+
+ /* MJPEG global tiling registers */
+ WREG32_SOC15(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* enable JMI channel */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL), 0,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ /* enable System Interrupt for JRBC */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regJPEG_SYS_INT_EN),
+ JPEG_SYS_INT_EN__DJRBC0_MASK,
+ ~JPEG_SYS_INT_EN__DJRBC0_MASK);
+
+ WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL,
+ ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
+ VCN_JPEG_DB_CTRL__EN_MASK);
+
+ WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
+ WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_RPTR, 0);
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_WPTR, 0);
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_CNTL, 0x00000002L);
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_SIZE, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_WPTR);
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_3_0_stop - stop JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the JPEG block
+ */
+static int jpeg_v5_3_0_stop(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG) {
+ jpeg_v5_3_0_stop_dpg_mode(adev, 0);
+ } else {
+
+ /* reset JMI */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL),
+ UVD_JMI_CNTL__SOFT_RESET_MASK,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ jpeg_v5_3_0_enable_clock_gating(adev);
+
+ /* enable power gating */
+ r = jpeg_v5_3_0_enable_power_gating(adev);
+ if (r)
+ return r;
+ }
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_jpeg(adev, false);
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_3_0_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t jpeg_v5_3_0_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_RPTR);
+}
+
+/**
+ * jpeg_v5_3_0_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t jpeg_v5_3_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_WPTR);
+}
+
+/**
+ * jpeg_v5_3_0_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void jpeg_v5_3_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+}
+
+static bool jpeg_v5_3_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret = 1;
+
+ ret &= (((RREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_STATUS) &
+ UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
+ UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK));
+
+ return ret;
+}
+
+static int jpeg_v5_3_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ return SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_JRBC0_UVD_JRBC_STATUS,
+ UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
+ UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+}
+
+static int jpeg_v5_3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
+
+ if (enable) {
+ if (!jpeg_v5_3_0_is_idle(ip_block))
+ return -EBUSY;
+ jpeg_v5_3_0_enable_clock_gating(adev);
+ } else {
+ jpeg_v5_3_0_disable_clock_gating(adev);
+ }
+
+ return 0;
+}
+
+static int jpeg_v5_3_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret;
+
+ if (state == adev->jpeg.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = jpeg_v5_3_0_stop(adev);
+ else
+ ret = jpeg_v5_3_0_start(adev);
+
+ if (!ret)
+ adev->jpeg.cur_state = state;
+
+ return ret;
+}
+
+static int jpeg_v5_3_0_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int jpeg_v5_3_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_DEBUG("IH: JPEG TRAP\n");
+
+ switch (entry->src_id) {
+ case VCN_5_0__SRCID__JPEG_DECODE:
+ amdgpu_fence_process(adev->jpeg.inst->ring_dec);
+ break;
+ default:
+ DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static int jpeg_v5_3_0_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ int r;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+ r = jpeg_v5_3_0_stop(ring->adev);
+ if (r)
+ return r;
+ r = jpeg_v5_3_0_start(ring->adev);
+ if (r)
+ return r;
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static const struct amd_ip_funcs jpeg_v5_3_0_ip_funcs = {
+ .name = "jpeg_v5_3_0",
+ .early_init = jpeg_v5_3_0_early_init,
+ .sw_init = jpeg_v5_3_0_sw_init,
+ .sw_fini = jpeg_v5_3_0_sw_fini,
+ .hw_init = jpeg_v5_3_0_hw_init,
+ .hw_fini = jpeg_v5_3_0_hw_fini,
+ .suspend = jpeg_v5_3_0_suspend,
+ .resume = jpeg_v5_3_0_resume,
+ .is_idle = jpeg_v5_3_0_is_idle,
+ .wait_for_idle = jpeg_v5_3_0_wait_for_idle,
+ .set_clockgating_state = jpeg_v5_3_0_set_clockgating_state,
+ .set_powergating_state = jpeg_v5_3_0_set_powergating_state,
+};
+
+static const struct amdgpu_ring_funcs jpeg_v5_3_0_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_JPEG,
+ .align_mask = 0xf,
+ .get_rptr = jpeg_v5_3_0_dec_ring_get_rptr,
+ .get_wptr = jpeg_v5_3_0_dec_ring_get_wptr,
+ .set_wptr = jpeg_v5_3_0_dec_ring_set_wptr,
+ .parse_cs = amdgpu_jpeg_dec_parse_cs,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* jpeg_v5_3_0_dec_ring_emit_vm_flush */
+ 22 + 22 + /* jpeg_v5_3_0_dec_ring_emit_fence x2 vm fence */
+ 8 + 16,
+ .emit_ib_size = 22, /* jpeg_v5_3_0_dec_ring_emit_ib */
+ .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib,
+ .emit_fence = jpeg_v4_0_3_dec_ring_emit_fence,
+ .emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush,
+ .test_ring = amdgpu_jpeg_dec_ring_test_ring,
+ .test_ib = amdgpu_jpeg_dec_ring_test_ib,
+ .insert_nop = jpeg_v4_0_3_dec_ring_nop,
+ .insert_start = jpeg_v4_0_3_dec_ring_insert_start,
+ .insert_end = jpeg_v4_0_3_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_jpeg_ring_begin_use,
+ .end_use = amdgpu_jpeg_ring_end_use,
+ .emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg,
+ .emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = jpeg_v5_3_0_ring_reset,
+};
+
+static void jpeg_v5_3_0_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.inst->ring_dec->funcs = &jpeg_v5_3_0_dec_ring_vm_funcs;
+}
+
+static const struct amdgpu_irq_src_funcs jpeg_v5_3_0_irq_funcs = {
+ .set = jpeg_v5_3_0_set_interrupt_state,
+ .process = jpeg_v5_3_0_process_interrupt,
+};
+
+static void jpeg_v5_3_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.inst->irq.num_types = 1;
+ adev->jpeg.inst->irq.funcs = &jpeg_v5_3_0_irq_funcs;
+}
+
+const struct amdgpu_ip_block_version jpeg_v5_3_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_JPEG,
+ .major = 5,
+ .minor = 3,
+ .rev = 0,
+ .funcs = &jpeg_v5_3_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_3_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_3_0.h
new file mode 100644
index 000000000000..c1e7537d0f18
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_3_0.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __JPEG_V5_3_0_H__
+#define __JPEG_V5_3_0_H__
+
+#define vcnipJPEG_CGC_GATE 0x4160
+#define vcnipJPEG_CGC_CTRL 0x4161
+#define vcnipJPEG_SYS_INT_EN 0x4141
+#define vcnipUVD_NO_OP 0x0029
+#define vcnipJPEG_DEC_GFX10_ADDR_CONFIG 0x404A
+
+extern const struct amdgpu_ip_block_version jpeg_v5_3_0_ip_block;
+
+#endif /* __JPEG_V5_0_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
index 64cae89357b6..f2309d72bbe6 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
@@ -59,7 +59,8 @@ err_reserve_bo_failed:
}
static int
-mes_userq_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr,
+mes_userq_create_wptr_mapping(struct amdgpu_device *adev,
+ struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_usermode_queue *queue,
uint64_t wptr)
{
@@ -93,8 +94,28 @@ mes_userq_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr,
return ret;
}
- queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset_no_check(wptr_obj->obj);
+ ret = amdgpu_bo_reserve(wptr_obj->obj, true);
+ if (ret) {
+ DRM_ERROR("Failed to reserve wptr bo\n");
+ return ret;
+ }
+
+ /* TODO use eviction fence instead of pinning. */
+ ret = amdgpu_bo_pin(wptr_obj->obj, AMDGPU_GEM_DOMAIN_GTT);
+ if (ret) {
+ drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin wptr bo\n");
+ goto unresv_bo;
+ }
+
+ queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset(wptr_obj->obj);
+ amdgpu_bo_unreserve(wptr_obj->obj);
+
return 0;
+
+unresv_bo:
+ amdgpu_bo_unreserve(wptr_obj->obj);
+ return ret;
+
}
static int convert_to_mes_priority(int priority)
@@ -112,9 +133,9 @@ static int convert_to_mes_priority(int priority)
}
}
-static int mes_userq_map(struct amdgpu_userq_mgr *uq_mgr,
- struct amdgpu_usermode_queue *queue)
+static int mes_userq_map(struct amdgpu_usermode_queue *queue)
{
+ struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
struct amdgpu_device *adev = uq_mgr->adev;
struct amdgpu_userq_obj *ctx = &queue->fw_obj;
struct amdgpu_mqd_prop *userq_props = queue->userq_prop;
@@ -157,9 +178,9 @@ static int mes_userq_map(struct amdgpu_userq_mgr *uq_mgr,
return 0;
}
-static int mes_userq_unmap(struct amdgpu_userq_mgr *uq_mgr,
- struct amdgpu_usermode_queue *queue)
+static int mes_userq_unmap(struct amdgpu_usermode_queue *queue)
{
+ struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
struct amdgpu_device *adev = uq_mgr->adev;
struct mes_remove_queue_input queue_input;
struct amdgpu_userq_obj *ctx = &queue->fw_obj;
@@ -223,7 +244,7 @@ static int mes_userq_detect_and_reset(struct amdgpu_device *adev,
amdgpu_mes_lock(&adev->mes);
r = amdgpu_mes_detect_and_reset_hung_queues(adev, queue_type, false,
- &hung_db_num, db_array);
+ &hung_db_num, db_array, 0);
amdgpu_mes_unlock(&adev->mes);
if (r) {
dev_err(adev->dev, "Failed to detect and reset queues, err (%d)\n", r);
@@ -251,10 +272,10 @@ static int mes_userq_detect_and_reset(struct amdgpu_device *adev,
return r;
}
-static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
- struct drm_amdgpu_userq_in *args_in,
- struct amdgpu_usermode_queue *queue)
+static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue,
+ struct drm_amdgpu_userq_in *args_in)
{
+ struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
struct amdgpu_device *adev = uq_mgr->adev;
struct amdgpu_mqd *mqd_hw_default = &adev->mqds[queue->queue_type];
struct drm_amdgpu_userq_in *mqd_user = args_in;
@@ -300,7 +321,7 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
goto free_mqd;
}
- r = amdgpu_userq_input_va_validate(queue, compute_mqd->eop_va,
+ r = amdgpu_userq_input_va_validate(adev, queue, compute_mqd->eop_va,
2048);
if (r)
goto free_mqd;
@@ -341,11 +362,11 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
userq_props->tmz_queue =
mqd_user->flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE;
- r = amdgpu_userq_input_va_validate(queue, mqd_gfx_v11->shadow_va,
+ r = amdgpu_userq_input_va_validate(adev, queue, mqd_gfx_v11->shadow_va,
shadow_info.shadow_size);
if (r)
goto free_mqd;
- r = amdgpu_userq_input_va_validate(queue, mqd_gfx_v11->csa_va,
+ r = amdgpu_userq_input_va_validate(adev, queue, mqd_gfx_v11->csa_va,
shadow_info.csa_size);
if (r)
goto free_mqd;
@@ -366,7 +387,7 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
r = -ENOMEM;
goto free_mqd;
}
- r = amdgpu_userq_input_va_validate(queue, mqd_sdma_v11->csa_va,
+ r = amdgpu_userq_input_va_validate(adev, queue, mqd_sdma_v11->csa_va,
32);
if (r)
goto free_mqd;
@@ -391,7 +412,7 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
}
/* FW expects WPTR BOs to be mapped into GART */
- r = mes_userq_create_wptr_mapping(uq_mgr, queue, userq_props->wptr_gpu_addr);
+ r = mes_userq_create_wptr_mapping(adev, uq_mgr, queue, userq_props->wptr_gpu_addr);
if (r) {
DRM_ERROR("Failed to create WPTR mapping\n");
goto free_ctx;
@@ -411,18 +432,18 @@ free_props:
return r;
}
-static void
-mes_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr,
- struct amdgpu_usermode_queue *queue)
+static void mes_userq_mqd_destroy(struct amdgpu_usermode_queue *queue)
{
+ struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
+
amdgpu_userq_destroy_object(uq_mgr, &queue->fw_obj);
kfree(queue->userq_prop);
amdgpu_userq_destroy_object(uq_mgr, &queue->mqd);
}
-static int mes_userq_preempt(struct amdgpu_userq_mgr *uq_mgr,
- struct amdgpu_usermode_queue *queue)
+static int mes_userq_preempt(struct amdgpu_usermode_queue *queue)
{
+ struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
struct amdgpu_device *adev = uq_mgr->adev;
struct mes_suspend_gang_input queue_input;
struct amdgpu_userq_obj *ctx = &queue->fw_obj;
@@ -466,9 +487,9 @@ out:
return r;
}
-static int mes_userq_restore(struct amdgpu_userq_mgr *uq_mgr,
- struct amdgpu_usermode_queue *queue)
+static int mes_userq_restore(struct amdgpu_usermode_queue *queue)
{
+ struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
struct amdgpu_device *adev = uq_mgr->adev;
struct mes_resume_gang_input queue_input;
struct amdgpu_userq_obj *ctx = &queue->fw_obj;
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 3a52754b5cad..81bf9385d55a 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -56,11 +56,13 @@ MODULE_FIRMWARE("amdgpu/gc_11_5_2_mes_2.bin");
MODULE_FIRMWARE("amdgpu/gc_11_5_2_mes1.bin");
MODULE_FIRMWARE("amdgpu/gc_11_5_3_mes_2.bin");
MODULE_FIRMWARE("amdgpu/gc_11_5_3_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_4_mes_2.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_4_mes1.bin");
static int mes_v11_0_hw_init(struct amdgpu_ip_block *ip_block);
static int mes_v11_0_hw_fini(struct amdgpu_ip_block *ip_block);
-static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev);
-static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev);
+static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id);
+static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id);
#define MES_EOP_SIZE 2048
#define GFX_MES_DRAM_SIZE 0x80000
@@ -660,7 +662,7 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes,
break;
default:
- DRM_ERROR("unsupported misc op (%d) \n", input->op);
+ drm_err(adev_to_drm(mes->adev), "unsupported misc op (%d)\n", input->op);
return -EINVAL;
}
@@ -811,7 +813,7 @@ static int mes_v11_0_detect_and_reset_hung_queues(struct amdgpu_mes *mes,
mes_reset_queue_pkt.queue_type =
convert_to_mes_queue_type(input->queue_type);
mes_reset_queue_pkt.doorbell_offset_addr =
- mes->hung_queue_db_array_gpu_addr;
+ mes->hung_queue_db_array_gpu_addr[0];
if (input->detect_only)
mes_reset_queue_pkt.hang_detect_only = 1;
@@ -1570,7 +1572,7 @@ static void mes_v11_0_kiq_clear(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
}
-static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev)
+static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id)
{
int r = 0;
struct amdgpu_ip_block *ip_block;
@@ -1625,7 +1627,7 @@ failure:
return r;
}
-static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev)
+static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id)
{
if (adev->mes.ring[0].sched.ready) {
mes_v11_0_kiq_dequeue(&adev->mes.ring[0]);
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
index 744e95d3984a..b1c864dc79a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
@@ -42,8 +42,8 @@ MODULE_FIRMWARE("amdgpu/gc_12_0_1_uni_mes.bin");
static int mes_v12_0_hw_init(struct amdgpu_ip_block *ip_block);
static int mes_v12_0_hw_fini(struct amdgpu_ip_block *ip_block);
-static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev);
-static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev);
+static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id);
+static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id);
#define MES_EOP_SIZE 2048
@@ -699,7 +699,7 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes,
break;
default:
- DRM_ERROR("unsupported misc op (%d) \n", input->op);
+ DRM_ERROR("unsupported misc op (%d)\n", input->op);
return -EINVAL;
}
@@ -939,7 +939,7 @@ static int mes_v12_0_detect_and_reset_hung_queues(struct amdgpu_mes *mes,
mes_reset_queue_pkt.queue_type =
convert_to_mes_queue_type(input->queue_type);
mes_reset_queue_pkt.doorbell_offset_addr =
- mes->hung_queue_db_array_gpu_addr;
+ mes->hung_queue_db_array_gpu_addr[0];
if (input->detect_only)
mes_reset_queue_pkt.hang_detect_only = 1;
@@ -1489,7 +1489,7 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev,
if (pipe == AMDGPU_MES_SCHED_PIPE) {
if (adev->enable_uni_mes)
- r = amdgpu_mes_map_legacy_queue(adev, ring);
+ r = amdgpu_mes_map_legacy_queue(adev, ring, 0);
else
r = mes_v12_0_kiq_enable_queue(adev);
if (r)
@@ -1739,7 +1739,7 @@ static void mes_v12_0_kiq_setting(struct amdgpu_ring *ring)
WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80);
}
-static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev)
+static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id)
{
int r = 0;
struct amdgpu_ip_block *ip_block;
@@ -1801,13 +1801,13 @@ failure:
return r;
}
-static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev)
+static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id)
{
if (adev->mes.ring[0].sched.ready) {
if (adev->enable_uni_mes)
amdgpu_mes_unmap_legacy_queue(adev,
&adev->mes.ring[AMDGPU_MES_SCHED_PIPE],
- RESET_QUEUES, 0, 0);
+ RESET_QUEUES, 0, 0, 0);
else
mes_v12_0_kiq_dequeue_sched(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c
new file mode 100644
index 000000000000..7b8c670d0a9e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c
@@ -0,0 +1,1968 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include "amdgpu.h"
+#include "soc15_common.h"
+#include "soc_v1_0.h"
+#include "gc/gc_12_1_0_offset.h"
+#include "gc/gc_12_1_0_sh_mask.h"
+#include "gc/gc_11_0_0_default.h"
+#include "v12_structs.h"
+#include "mes_v12_api_def.h"
+
+MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_1_0_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_1_0_uni_mes.bin");
+
+static int mes_v12_1_hw_init(struct amdgpu_ip_block *ip_block);
+static int mes_v12_1_xcc_hw_init(struct amdgpu_ip_block *ip_block, int xcc_id);
+static int mes_v12_1_hw_fini(struct amdgpu_ip_block *ip_block);
+static int mes_v12_1_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id);
+static int mes_v12_1_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id);
+
+#define MES_EOP_SIZE 2048
+
+#define regCP_HQD_IB_CONTROL_MES_12_1_DEFAULT 0x100000
+#define XCC_MID_MASK 0x41000000
+
+static void mes_v12_1_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ BUG();
+ }
+}
+
+static u64 mes_v12_1_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ return *ring->rptr_cpu_addr;
+}
+
+static u64 mes_v12_1_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ u64 wptr;
+
+ if (ring->use_doorbell)
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+ else
+ BUG();
+ return wptr;
+}
+
+static const struct amdgpu_ring_funcs mes_v12_1_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_MES,
+ .align_mask = 1,
+ .nop = 0,
+ .support_64bit_ptrs = true,
+ .get_rptr = mes_v12_1_ring_get_rptr,
+ .get_wptr = mes_v12_1_ring_get_wptr,
+ .set_wptr = mes_v12_1_ring_set_wptr,
+ .insert_nop = amdgpu_ring_insert_nop,
+};
+
+static const char *mes_v12_1_opcodes[] = {
+ "SET_HW_RSRC",
+ "SET_SCHEDULING_CONFIG",
+ "ADD_QUEUE",
+ "REMOVE_QUEUE",
+ "PERFORM_YIELD",
+ "SET_GANG_PRIORITY_LEVEL",
+ "SUSPEND",
+ "RESUME",
+ "RESET",
+ "SET_LOG_BUFFER",
+ "CHANGE_GANG_PRORITY",
+ "QUERY_SCHEDULER_STATUS",
+ "unused",
+ "SET_DEBUG_VMID",
+ "MISC",
+ "UPDATE_ROOT_PAGE_TABLE",
+ "AMD_LOG",
+ "SET_SE_MODE",
+ "SET_GANG_SUBMIT",
+ "SET_HW_RSRC_1",
+ "INVALIDATE_TLBS",
+};
+
+static const char *mes_v12_1_misc_opcodes[] = {
+ "WRITE_REG",
+ "INV_GART",
+ "QUERY_STATUS",
+ "READ_REG",
+ "WAIT_REG_MEM",
+ "SET_SHADER_DEBUGGER",
+ "NOTIFY_WORK_ON_UNMAPPED_QUEUE",
+ "NOTIFY_TO_UNMAP_PROCESSES",
+};
+
+static const char *mes_v12_1_get_op_string(union MESAPI__MISC *x_pkt)
+{
+ const char *op_str = NULL;
+
+ if (x_pkt->header.opcode < ARRAY_SIZE(mes_v12_1_opcodes))
+ op_str = mes_v12_1_opcodes[x_pkt->header.opcode];
+
+ return op_str;
+}
+
+static const char *mes_v12_1_get_misc_op_string(union MESAPI__MISC *x_pkt)
+{
+ const char *op_str = NULL;
+
+ if ((x_pkt->header.opcode == MES_SCH_API_MISC) &&
+ (x_pkt->opcode < ARRAY_SIZE(mes_v12_1_misc_opcodes)))
+ op_str = mes_v12_1_misc_opcodes[x_pkt->opcode];
+
+ return op_str;
+}
+
+static int mes_v12_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
+ int xcc_id, int pipe, void *pkt,
+ int size, int api_status_off)
+{
+ union MESAPI__QUERY_MES_STATUS mes_status_pkt;
+ signed long timeout = 2100000; /* 2100 ms */
+ struct amdgpu_device *adev = mes->adev;
+ struct amdgpu_ring *ring = &mes->ring[MES_PIPE_INST(xcc_id, pipe)];
+ spinlock_t *ring_lock = &mes->ring_lock[MES_PIPE_INST(xcc_id, pipe)];
+ struct MES_API_STATUS *api_status;
+ union MESAPI__MISC *x_pkt = pkt;
+ const char *op_str, *misc_op_str;
+ unsigned long flags;
+ u64 status_gpu_addr;
+ u32 seq, status_offset;
+ u64 *status_ptr;
+ signed long r;
+ int ret;
+
+ if (x_pkt->header.opcode >= MES_SCH_API_MAX)
+ return -EINVAL;
+
+ if (amdgpu_emu_mode) {
+ timeout *= 1000;
+ } else if (amdgpu_sriov_vf(adev)) {
+ /* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */
+ timeout = 15 * 600 * 1000;
+ }
+
+ ret = amdgpu_device_wb_get(adev, &status_offset);
+ if (ret)
+ return ret;
+
+ status_gpu_addr = adev->wb.gpu_addr + (status_offset * 4);
+ status_ptr = (u64 *)&adev->wb.wb[status_offset];
+ *status_ptr = 0;
+
+ spin_lock_irqsave(ring_lock, flags);
+ r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4);
+ if (r)
+ goto error_unlock_free;
+
+ seq = ++ring->fence_drv.sync_seq;
+ r = amdgpu_fence_wait_polling(ring,
+ seq - ring->fence_drv.num_fences_mask,
+ timeout);
+ if (r < 1)
+ goto error_undo;
+
+ api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
+ api_status->api_completion_fence_addr = status_gpu_addr;
+ api_status->api_completion_fence_value = 1;
+
+ amdgpu_ring_write_multiple(ring, pkt, size / 4);
+
+ memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
+ mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
+ mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+ mes_status_pkt.api_status.api_completion_fence_addr =
+ ring->fence_drv.gpu_addr;
+ mes_status_pkt.api_status.api_completion_fence_value = seq;
+
+ amdgpu_ring_write_multiple(ring, &mes_status_pkt,
+ sizeof(mes_status_pkt) / 4);
+
+ amdgpu_ring_commit(ring);
+ spin_unlock_irqrestore(ring_lock, flags);
+
+ op_str = mes_v12_1_get_op_string(x_pkt);
+ misc_op_str = mes_v12_1_get_misc_op_string(x_pkt);
+
+ if (misc_op_str)
+ dev_dbg(adev->dev, "MES(%d, %d) msg=%s (%s) was emitted\n",
+ xcc_id, pipe, op_str, misc_op_str);
+ else if (op_str)
+ dev_dbg(adev->dev, "MES(%d, %d) msg=%s was emitted\n",
+ xcc_id, pipe, op_str);
+ else
+ dev_dbg(adev->dev, "MES(%d, %d) msg=%d was emitted\n",
+ xcc_id, pipe, x_pkt->header.opcode);
+
+ r = amdgpu_fence_wait_polling(ring, seq, timeout);
+ if (r < 1 || !*status_ptr) {
+ if (misc_op_str)
+ dev_err(adev->dev,
+ "MES(%d, %d) failed to respond to msg=%s (%s)\n",
+ xcc_id, pipe, op_str, misc_op_str);
+ else if (op_str)
+ dev_err(adev->dev,
+ "MES(%d, %d) failed to respond to msg=%s\n",
+ xcc_id, pipe, op_str);
+ else
+ dev_err(adev->dev,
+ "MES(%d, %d) failed to respond to msg=%d\n",
+ xcc_id, pipe, x_pkt->header.opcode);
+
+ while (halt_if_hws_hang)
+ schedule();
+
+ r = -ETIMEDOUT;
+ goto error_wb_free;
+ }
+
+ amdgpu_device_wb_free(adev, status_offset);
+ return 0;
+
+error_undo:
+ dev_err(adev->dev, "MES(%d, %d) ring buffer is full.\n", xcc_id, pipe);
+ amdgpu_ring_undo(ring);
+
+error_unlock_free:
+ spin_unlock_irqrestore(ring_lock, flags);
+
+error_wb_free:
+ amdgpu_device_wb_free(adev, status_offset);
+ return r;
+}
+
+static int convert_to_mes_queue_type(int queue_type)
+{
+ if (queue_type == AMDGPU_RING_TYPE_GFX)
+ return MES_QUEUE_TYPE_GFX;
+ else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
+ return MES_QUEUE_TYPE_COMPUTE;
+ else if (queue_type == AMDGPU_RING_TYPE_SDMA)
+ return MES_QUEUE_TYPE_SDMA;
+ else if (queue_type == AMDGPU_RING_TYPE_MES)
+ return MES_QUEUE_TYPE_SCHQ;
+ else
+ BUG();
+ return -1;
+}
+
+static int mes_v12_1_add_hw_queue(struct amdgpu_mes *mes,
+ struct mes_add_queue_input *input)
+{
+ union MESAPI__ADD_QUEUE mes_add_queue_pkt;
+ int xcc_id = input->xcc_id;
+ int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE);
+
+ if (mes->enable_coop_mode)
+ xcc_id = mes->master_xcc_ids[inst];
+
+ memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
+
+ mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
+ mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_add_queue_pkt.process_id = input->process_id;
+ mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr;
+ mes_add_queue_pkt.process_va_start = input->process_va_start;
+ mes_add_queue_pkt.process_va_end = input->process_va_end;
+ mes_add_queue_pkt.process_quantum = input->process_quantum;
+ mes_add_queue_pkt.process_context_addr = input->process_context_addr;
+ mes_add_queue_pkt.gang_quantum = input->gang_quantum;
+ mes_add_queue_pkt.gang_context_addr = input->gang_context_addr;
+ mes_add_queue_pkt.inprocess_gang_priority =
+ input->inprocess_gang_priority;
+ mes_add_queue_pkt.gang_global_priority_level =
+ input->gang_global_priority_level;
+ mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
+ mes_add_queue_pkt.mqd_addr = input->mqd_addr;
+
+ mes_add_queue_pkt.wptr_addr = input->wptr_mc_addr;
+
+ mes_add_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
+ mes_add_queue_pkt.paging = input->paging;
+ mes_add_queue_pkt.vm_context_cntl = input->vm_cntx_cntl;
+ mes_add_queue_pkt.gws_base = input->gws_base;
+ mes_add_queue_pkt.gws_size = input->gws_size;
+ mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
+ mes_add_queue_pkt.tma_addr = input->tma_addr;
+ mes_add_queue_pkt.trap_en = input->trap_en;
+ mes_add_queue_pkt.skip_process_ctx_clear = input->skip_process_ctx_clear;
+ mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
+
+ /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */
+ mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
+ mes_add_queue_pkt.gds_size = input->queue_size;
+
+ /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */
+ mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
+ mes_add_queue_pkt.gds_size = input->queue_size;
+
+ mes_add_queue_pkt.full_sh_mem_config_data = input->sh_mem_config_data;
+
+ return mes_v12_1_submit_pkt_and_poll_completion(mes,
+ xcc_id, AMDGPU_MES_SCHED_PIPE,
+ &mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
+ offsetof(union MESAPI__ADD_QUEUE, api_status));
+}
+
+static int mes_v12_1_remove_hw_queue(struct amdgpu_mes *mes,
+ struct mes_remove_queue_input *input)
+{
+ union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
+ int xcc_id = input->xcc_id;
+ int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE);
+
+ if (mes->enable_coop_mode)
+ xcc_id = mes->master_xcc_ids[inst];
+
+ memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
+
+ mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
+ mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
+ mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
+
+ return mes_v12_1_submit_pkt_and_poll_completion(mes,
+ xcc_id, AMDGPU_MES_SCHED_PIPE,
+ &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
+ offsetof(union MESAPI__REMOVE_QUEUE, api_status));
+}
+
+static int mes_v12_1_reset_hw_queue(struct amdgpu_mes *mes,
+ struct mes_reset_queue_input *input)
+{
+ union MESAPI__RESET mes_reset_queue_pkt;
+ int pipe;
+
+ memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
+
+ mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET;
+ mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset;
+ /* mes_reset_queue_pkt.gang_context_addr = input->gang_context_addr; */
+ /*mes_reset_queue_pkt.reset_queue_only = 1;*/
+
+ if (mes->adev->enable_uni_mes)
+ pipe = AMDGPU_MES_KIQ_PIPE;
+ else
+ pipe = AMDGPU_MES_SCHED_PIPE;
+
+ return mes_v12_1_submit_pkt_and_poll_completion(mes,
+ input->xcc_id, pipe,
+ &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt),
+ offsetof(union MESAPI__REMOVE_QUEUE, api_status));
+}
+
+static int mes_v12_1_map_legacy_queue(struct amdgpu_mes *mes,
+ struct mes_map_legacy_queue_input *input)
+{
+ union MESAPI__ADD_QUEUE mes_add_queue_pkt;
+ int pipe;
+
+ memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
+
+ mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
+ mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_add_queue_pkt.pipe_id = input->pipe_id;
+ mes_add_queue_pkt.queue_id = input->queue_id;
+ mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
+ mes_add_queue_pkt.mqd_addr = input->mqd_addr;
+ mes_add_queue_pkt.wptr_addr = input->wptr_addr;
+ mes_add_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
+ mes_add_queue_pkt.map_legacy_kq = 1;
+
+ if (mes->adev->enable_uni_mes)
+ pipe = AMDGPU_MES_KIQ_PIPE;
+ else
+ pipe = AMDGPU_MES_SCHED_PIPE;
+
+ return mes_v12_1_submit_pkt_and_poll_completion(mes,
+ input->xcc_id, pipe,
+ &mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
+ offsetof(union MESAPI__ADD_QUEUE, api_status));
+}
+
+static int mes_v12_1_unmap_legacy_queue(struct amdgpu_mes *mes,
+ struct mes_unmap_legacy_queue_input *input)
+{
+ union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
+ int pipe;
+
+ memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
+
+ mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
+ mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
+ mes_remove_queue_pkt.gang_context_addr = 0;
+
+ mes_remove_queue_pkt.pipe_id = input->pipe_id;
+ mes_remove_queue_pkt.queue_id = input->queue_id;
+
+ if (input->action == PREEMPT_QUEUES_NO_UNMAP) {
+ mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1;
+ mes_remove_queue_pkt.tf_addr = input->trail_fence_addr;
+ mes_remove_queue_pkt.tf_data =
+ lower_32_bits(input->trail_fence_data);
+ } else {
+ mes_remove_queue_pkt.unmap_legacy_queue = 1;
+ mes_remove_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
+ }
+
+ if (mes->adev->enable_uni_mes)
+ pipe = AMDGPU_MES_KIQ_PIPE;
+ else
+ pipe = AMDGPU_MES_SCHED_PIPE;
+
+ return mes_v12_1_submit_pkt_and_poll_completion(mes,
+ input->xcc_id, pipe,
+ &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
+ offsetof(union MESAPI__REMOVE_QUEUE, api_status));
+}
+
+static int mes_v12_1_suspend_gang(struct amdgpu_mes *mes,
+ struct mes_suspend_gang_input *input)
+{
+ return 0;
+}
+
+static int mes_v12_1_resume_gang(struct amdgpu_mes *mes,
+ struct mes_resume_gang_input *input)
+{
+ return 0;
+}
+
+static int mes_v12_1_query_sched_status(struct amdgpu_mes *mes,
+ int pipe, int xcc_id)
+{
+ union MESAPI__QUERY_MES_STATUS mes_status_pkt;
+
+ memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
+
+ mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
+ mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe,
+ &mes_status_pkt, sizeof(mes_status_pkt),
+ offsetof(union MESAPI__QUERY_MES_STATUS, api_status));
+}
+static uint32_t mes_v12_1_get_xcc_from_reg(uint32_t reg_offset)
+{
+ /* Check xcc reg offset range */
+ uint32_t xcc = (reg_offset & XCC_MID_MASK) ? 4 : 0;
+ /* Each XCC has two register ranges.
+ * These are represented in reg_offset[17:16]
+ */
+ return ((reg_offset >> 16) & 0x3) + xcc;
+}
+
+static void mes_v12_1_get_rrmt(uint32_t reg, uint32_t xcc_id,
+ struct RRMT_OPTION *rrmt_opt)
+{
+ uint32_t normalized_reg = soc_v1_0_normalize_xcc_reg_offset(reg);
+
+ if (soc_v1_0_normalize_xcc_reg_range(normalized_reg)) {
+ rrmt_opt->xcd_die_id = mes_v12_1_get_xcc_from_reg(reg);
+ rrmt_opt->mode = (xcc_id == rrmt_opt->xcd_die_id) ?
+ MES_RRMT_MODE_LOCAL_XCD : MES_RRMT_MODE_REMOTE_XCD;
+ } else {
+ rrmt_opt->mode = MES_RRMT_MODE_LOCAL_REMOTE_AID;
+ }
+}
+
+static int mes_v12_1_misc_op(struct amdgpu_mes *mes,
+ struct mes_misc_op_input *input)
+{
+ struct amdgpu_device *adev = mes->adev;
+ union MESAPI__MISC misc_pkt;
+ int pipe;
+
+ if (mes->adev->enable_uni_mes)
+ pipe = AMDGPU_MES_KIQ_PIPE;
+ else
+ pipe = AMDGPU_MES_SCHED_PIPE;
+
+ memset(&misc_pkt, 0, sizeof(misc_pkt));
+
+ misc_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ misc_pkt.header.opcode = MES_SCH_API_MISC;
+ misc_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ switch (input->op) {
+ case MES_MISC_OP_READ_REG:
+ misc_pkt.opcode = MESAPI_MISC__READ_REG;
+ misc_pkt.read_reg.reg_offset = input->read_reg.reg_offset;
+ misc_pkt.read_reg.buffer_addr = input->read_reg.buffer_addr;
+ mes_v12_1_get_rrmt(input->read_reg.reg_offset,
+ GET_INST(GC, input->xcc_id),
+ &misc_pkt.read_reg.rrmt_opt);
+ if (misc_pkt.read_reg.rrmt_opt.mode != MES_RRMT_MODE_REMOTE_MID) {
+ misc_pkt.read_reg.reg_offset =
+ soc_v1_0_normalize_xcc_reg_offset(misc_pkt.read_reg.reg_offset);
+ }
+ break;
+ case MES_MISC_OP_WRITE_REG:
+ misc_pkt.opcode = MESAPI_MISC__WRITE_REG;
+ misc_pkt.write_reg.reg_offset = input->write_reg.reg_offset;
+ misc_pkt.write_reg.reg_value = input->write_reg.reg_value;
+ mes_v12_1_get_rrmt(input->write_reg.reg_offset,
+ GET_INST(GC, input->xcc_id),
+ &misc_pkt.write_reg.rrmt_opt);
+ if (misc_pkt.write_reg.rrmt_opt.mode != MES_RRMT_MODE_REMOTE_MID) {
+ misc_pkt.write_reg.reg_offset =
+ soc_v1_0_normalize_xcc_reg_offset(misc_pkt.write_reg.reg_offset);
+ }
+ break;
+ case MES_MISC_OP_WRM_REG_WAIT:
+ misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
+ misc_pkt.wait_reg_mem.op = WRM_OPERATION__WAIT_REG_MEM;
+ misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
+ misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
+ misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0;
+ misc_pkt.wait_reg_mem.reg_offset2 = 0;
+ mes_v12_1_get_rrmt(input->wrm_reg.reg0,
+ GET_INST(GC, input->xcc_id),
+ &misc_pkt.wait_reg_mem.rrmt_opt1);
+ if (misc_pkt.wait_reg_mem.rrmt_opt1.mode != MES_RRMT_MODE_REMOTE_MID) {
+ misc_pkt.wait_reg_mem.reg_offset1 =
+ soc_v1_0_normalize_xcc_reg_offset(misc_pkt.wait_reg_mem.reg_offset1);
+ }
+ break;
+ case MES_MISC_OP_WRM_REG_WR_WAIT:
+ misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
+ misc_pkt.wait_reg_mem.op = WRM_OPERATION__WR_WAIT_WR_REG;
+ misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
+ misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
+ misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0;
+ misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1;
+ mes_v12_1_get_rrmt(input->wrm_reg.reg0,
+ GET_INST(GC, input->xcc_id),
+ &misc_pkt.wait_reg_mem.rrmt_opt1);
+ mes_v12_1_get_rrmt(input->wrm_reg.reg1,
+ GET_INST(GC, input->xcc_id),
+ &misc_pkt.wait_reg_mem.rrmt_opt2);
+
+ if (misc_pkt.wait_reg_mem.rrmt_opt1.mode != MES_RRMT_MODE_REMOTE_MID) {
+ misc_pkt.wait_reg_mem.reg_offset1 =
+ soc_v1_0_normalize_xcc_reg_offset(misc_pkt.wait_reg_mem.reg_offset1);
+ }
+ if (misc_pkt.wait_reg_mem.rrmt_opt2.mode != MES_RRMT_MODE_REMOTE_MID) {
+ misc_pkt.wait_reg_mem.reg_offset2 =
+ soc_v1_0_normalize_xcc_reg_offset(misc_pkt.wait_reg_mem.reg_offset2);
+ }
+ break;
+ case MES_MISC_OP_SET_SHADER_DEBUGGER:
+ pipe = AMDGPU_MES_SCHED_PIPE;
+ misc_pkt.opcode = MESAPI_MISC__SET_SHADER_DEBUGGER;
+ misc_pkt.set_shader_debugger.process_context_addr =
+ input->set_shader_debugger.process_context_addr;
+ misc_pkt.set_shader_debugger.flags.u32all =
+ input->set_shader_debugger.flags.u32all;
+ misc_pkt.set_shader_debugger.spi_gdbg_per_vmid_cntl =
+ input->set_shader_debugger.spi_gdbg_per_vmid_cntl;
+ memcpy(misc_pkt.set_shader_debugger.tcp_watch_cntl,
+ input->set_shader_debugger.tcp_watch_cntl,
+ sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl));
+ misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en;
+ break;
+ case MES_MISC_OP_CHANGE_CONFIG:
+ misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG;
+ misc_pkt.change_config.opcode =
+ MESAPI_MISC__CHANGE_CONFIG_OPTION_LIMIT_SINGLE_PROCESS;
+ misc_pkt.change_config.option.bits.limit_single_process =
+ input->change_config.option.limit_single_process;
+ break;
+ default:
+ DRM_ERROR("unsupported misc op (%d) \n", input->op);
+ return -EINVAL;
+ }
+
+ return mes_v12_1_submit_pkt_and_poll_completion(mes,
+ input->xcc_id, pipe,
+ &misc_pkt, sizeof(misc_pkt),
+ offsetof(union MESAPI__MISC, api_status));
+}
+
+static int mes_v12_1_set_hw_resources_1(struct amdgpu_mes *mes,
+ int pipe, int xcc_id)
+{
+ union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_1_pkt;
+ int master_xcc_id, inst = MES_PIPE_INST(xcc_id, pipe);
+
+ memset(&mes_set_hw_res_1_pkt, 0, sizeof(mes_set_hw_res_1_pkt));
+
+ mes_set_hw_res_1_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_set_hw_res_1_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1;
+ mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+ mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 100;
+
+ if (mes->enable_coop_mode && pipe == AMDGPU_MES_SCHED_PIPE) {
+ master_xcc_id = mes->master_xcc_ids[inst];
+ mes_set_hw_res_1_pkt.mes_coop_mode = 1;
+ mes_set_hw_res_1_pkt.coop_sch_shared_mc_addr =
+ mes->shared_cmd_buf_gpu_addr[master_xcc_id];
+ }
+
+ return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe,
+ &mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt),
+ offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status));
+}
+
+static void mes_v12_1_set_gfx_hqd_mask(union MESAPI_SET_HW_RESOURCES *pkt)
+{
+ /*
+ * GFX V12 has only one GFX pipe, but 8 queues in it.
+ * GFX pipe 0 queue 0 is being used by Kernel queue.
+ * Set GFX pipe 0 queue 1-7 for MES scheduling
+ * mask = 1111 1110b
+ */
+ pkt->gfx_hqd_mask[0] = 0xFE;
+}
+
+static int mes_v12_1_set_hw_resources(struct amdgpu_mes *mes,
+ int pipe, int xcc_id)
+{
+ int i;
+ struct amdgpu_device *adev = mes->adev;
+ union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt;
+
+ memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt));
+
+ mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC;
+ mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ if (pipe == AMDGPU_MES_SCHED_PIPE) {
+ mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub;
+ mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub;
+ mes_set_hw_res_pkt.gds_size = adev->gds.gds_size;
+ mes_set_hw_res_pkt.paging_vmid = 0;
+
+ for (i = 0; i < MAX_COMPUTE_PIPES; i++)
+ mes_set_hw_res_pkt.compute_hqd_mask[i] =
+ mes->compute_hqd_mask[i];
+
+ mes_v12_1_set_gfx_hqd_mask(&mes_set_hw_res_pkt);
+
+ for (i = 0; i < MAX_SDMA_PIPES; i++)
+ mes_set_hw_res_pkt.sdma_hqd_mask[i] =
+ mes->sdma_hqd_mask[i];
+
+ for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
+ mes_set_hw_res_pkt.aggregated_doorbells[i] =
+ mes->aggregated_doorbells[i];
+ }
+
+ mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr =
+ mes->sch_ctx_gpu_addr[pipe];
+ mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr =
+ mes->query_status_fence_gpu_addr[pipe];
+
+ for (i = 0; i < 5; i++) {
+ mes_set_hw_res_pkt.gc_base[i] =
+ adev->reg_offset[GC_HWIP][0][i];
+ mes_set_hw_res_pkt.mmhub_base[i] =
+ adev->reg_offset[MMHUB_HWIP][0][i];
+ mes_set_hw_res_pkt.osssys_base[i] =
+ adev->reg_offset[OSSSYS_HWIP][0][i];
+ }
+
+ mes_set_hw_res_pkt.disable_reset = 1;
+ mes_set_hw_res_pkt.disable_mes_log = 1;
+ mes_set_hw_res_pkt.use_different_vmid_compute = 1;
+ mes_set_hw_res_pkt.enable_reg_active_poll = 1;
+ mes_set_hw_res_pkt.enable_level_process_quantum_check = 1;
+
+ /*
+ * Keep oversubscribe timer for sdma . When we have unmapped doorbell
+ * handling support, other queue will not use the oversubscribe timer.
+ * handling mode - 0: disabled; 1: basic version; 2: basic+ version
+ */
+ mes_set_hw_res_pkt.oversubscription_timer = 50;
+ mes_set_hw_res_pkt.unmapped_doorbell_handling = 1;
+
+ if (amdgpu_mes_log_enable) {
+ mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
+ mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr =
+ mes->event_log_gpu_addr + MES_PIPE_INST(xcc_id, pipe) * AMDGPU_MES_LOG_BUFFER_SIZE;
+ }
+
+ if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE)
+ mes_set_hw_res_pkt.limit_single_process = 1;
+
+ return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, pipe,
+ &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
+ offsetof(union MESAPI_SET_HW_RESOURCES, api_status));
+}
+
+static void mes_v12_1_init_aggregated_doorbell(struct amdgpu_mes *mes,
+ int xcc_id)
+{
+ struct amdgpu_device *adev = mes->adev;
+ uint32_t data;
+
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL1);
+ data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] <<
+ CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL1, data);
+
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL2);
+ data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] <<
+ CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL2, data);
+
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL3);
+ data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] <<
+ CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL3, data);
+
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL4);
+ data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] <<
+ CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL4, data);
+
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL5);
+ data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] <<
+ CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_DOORBELL_CONTROL5, data);
+
+ data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT;
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_GFX_CONTROL, data);
+}
+
+
+static void mes_v12_1_enable_unmapped_doorbell_handling(
+ struct amdgpu_mes *mes, bool enable, int xcc_id)
+{
+ struct amdgpu_device *adev = mes->adev;
+ uint32_t data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_UNMAPPED_DOORBELL);
+
+ /*
+ * The default PROC_LSB settng is 0xc which means doorbell
+ * addr[16:12] gives the doorbell page number. For kfd, each
+ * process will use 2 pages of doorbell, we need to change the
+ * setting to 0xd
+ */
+ data &= ~CP_UNMAPPED_DOORBELL__PROC_LSB_MASK;
+ data |= 0xd << CP_UNMAPPED_DOORBELL__PROC_LSB__SHIFT;
+
+ data |= (enable ? 1 : 0) << CP_UNMAPPED_DOORBELL__ENABLE__SHIFT;
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_UNMAPPED_DOORBELL, data);
+}
+
+#if 0
+static int mes_v12_1_reset_legacy_queue(struct amdgpu_mes *mes,
+ struct mes_reset_legacy_queue_input *input)
+{
+ union MESAPI__RESET mes_reset_queue_pkt;
+ int pipe;
+
+ memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
+
+ mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET;
+ mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_reset_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
+
+ if (mes_reset_queue_pkt.queue_type == MES_QUEUE_TYPE_GFX) {
+ mes_reset_queue_pkt.reset_legacy_gfx = 1;
+ mes_reset_queue_pkt.pipe_id_lp = input->pipe_id;
+ mes_reset_queue_pkt.queue_id_lp = input->queue_id;
+ mes_reset_queue_pkt.mqd_mc_addr_lp = input->mqd_addr;
+ mes_reset_queue_pkt.doorbell_offset_lp = input->doorbell_offset;
+ mes_reset_queue_pkt.wptr_addr_lp = input->wptr_addr;
+ mes_reset_queue_pkt.vmid_id_lp = input->vmid;
+ } else {
+ mes_reset_queue_pkt.reset_queue_only = 1;
+ mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset;
+ }
+
+ if (mes->adev->enable_uni_mes)
+ pipe = AMDGPU_MES_KIQ_PIPE;
+ else
+ pipe = AMDGPU_MES_SCHED_PIPE;
+
+ return mes_v12_1_submit_pkt_and_poll_completion(mes,
+ input->xcc_id, pipe,
+ &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt),
+ offsetof(union MESAPI__RESET, api_status));
+}
+#endif
+
+static int mes_v12_inv_tlb_convert_hub_id(uint8_t id)
+{
+ /*
+ * MES doesn't support invalidate gc_hub on slave xcc individually
+ * master xcc will invalidate all gc_hub for the partition
+ */
+ if (AMDGPU_IS_GFXHUB(id))
+ return 0;
+ else if (AMDGPU_IS_MMHUB0(id))
+ return 1;
+ else if (AMDGPU_IS_MMHUB1(id))
+ return 2;
+ return -EINVAL;
+
+}
+
+static int mes_v12_1_inv_tlbs_pasid(struct amdgpu_mes *mes,
+ struct mes_inv_tlbs_pasid_input *input)
+{
+ union MESAPI__INV_TLBS mes_inv_tlbs;
+ int xcc_id = input->xcc_id;
+ int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE);
+ int ret;
+
+ if (mes->enable_coop_mode)
+ xcc_id = mes->master_xcc_ids[inst];
+
+ memset(&mes_inv_tlbs, 0, sizeof(mes_inv_tlbs));
+
+ mes_inv_tlbs.header.type = MES_API_TYPE_SCHEDULER;
+ mes_inv_tlbs.header.opcode = MES_SCH_API_INV_TLBS;
+ mes_inv_tlbs.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_inv_tlbs.invalidate_tlbs.inv_sel = 0;
+ mes_inv_tlbs.invalidate_tlbs.flush_type = input->flush_type;
+ mes_inv_tlbs.invalidate_tlbs.inv_sel_id = input->pasid;
+
+ /*convert amdgpu_mes_hub_id to mes expected hub_id */
+ ret = mes_v12_inv_tlb_convert_hub_id(input->hub_id);
+ if (ret < 0)
+ return -EINVAL;
+ mes_inv_tlbs.invalidate_tlbs.hub_id = ret;
+ return mes_v12_1_submit_pkt_and_poll_completion(mes, xcc_id, AMDGPU_MES_KIQ_PIPE,
+ &mes_inv_tlbs, sizeof(mes_inv_tlbs),
+ offsetof(union MESAPI__INV_TLBS, api_status));
+
+}
+
+static const struct amdgpu_mes_funcs mes_v12_1_funcs = {
+ .add_hw_queue = mes_v12_1_add_hw_queue,
+ .remove_hw_queue = mes_v12_1_remove_hw_queue,
+ .map_legacy_queue = mes_v12_1_map_legacy_queue,
+ .unmap_legacy_queue = mes_v12_1_unmap_legacy_queue,
+ .suspend_gang = mes_v12_1_suspend_gang,
+ .resume_gang = mes_v12_1_resume_gang,
+ .misc_op = mes_v12_1_misc_op,
+ .reset_hw_queue = mes_v12_1_reset_hw_queue,
+ .invalidate_tlbs_pasid = mes_v12_1_inv_tlbs_pasid,
+};
+
+static int mes_v12_1_allocate_ucode_buffer(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe,
+ int xcc_id)
+{
+ int r, inst = MES_PIPE_INST(xcc_id, pipe);
+ const struct mes_firmware_header_v1_0 *mes_hdr;
+ const __le32 *fw_data;
+ unsigned fw_size;
+
+ mes_hdr = (const struct mes_firmware_header_v1_0 *)
+ adev->mes.fw[pipe]->data;
+
+ fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
+ le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
+ fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, fw_size,
+ PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->mes.ucode_fw_obj[inst],
+ &adev->mes.ucode_fw_gpu_addr[inst],
+ (void **)&adev->mes.ucode_fw_ptr[inst]);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r);
+ return r;
+ }
+
+ memcpy(adev->mes.ucode_fw_ptr[inst], fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->mes.ucode_fw_obj[inst]);
+ amdgpu_bo_unreserve(adev->mes.ucode_fw_obj[inst]);
+
+ return 0;
+}
+
+static int mes_v12_1_allocate_ucode_data_buffer(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe,
+ int xcc_id)
+{
+ int r, inst = MES_PIPE_INST(xcc_id, pipe);
+ const struct mes_firmware_header_v1_0 *mes_hdr;
+ const __le32 *fw_data;
+ unsigned fw_size;
+
+ mes_hdr = (const struct mes_firmware_header_v1_0 *)
+ adev->mes.fw[pipe]->data;
+
+ fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
+ le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
+ fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, fw_size,
+ 64 * 1024,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->mes.data_fw_obj[inst],
+ &adev->mes.data_fw_gpu_addr[inst],
+ (void **)&adev->mes.data_fw_ptr[inst]);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r);
+ return r;
+ }
+
+ memcpy(adev->mes.data_fw_ptr[inst], fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->mes.data_fw_obj[inst]);
+ amdgpu_bo_unreserve(adev->mes.data_fw_obj[inst]);
+
+ return 0;
+}
+
+static void mes_v12_1_free_ucode_buffers(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe,
+ int xcc_id)
+{
+ int inst = MES_PIPE_INST(xcc_id, pipe);
+
+ amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[inst],
+ &adev->mes.data_fw_gpu_addr[inst],
+ (void **)&adev->mes.data_fw_ptr[inst]);
+
+ amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj[inst],
+ &adev->mes.ucode_fw_gpu_addr[inst],
+ (void **)&adev->mes.ucode_fw_ptr[inst]);
+}
+
+static void mes_v12_1_enable(struct amdgpu_device *adev,
+ bool enable, int xcc_id)
+{
+ uint64_t ucode_addr;
+ uint32_t pipe, data = 0;
+
+ if (enable) {
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data);
+
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ soc_v1_0_grbm_select(adev, 3, pipe, 0, 0,
+ GET_INST(GC, xcc_id));
+
+ ucode_addr = adev->mes.uc_start_addr[pipe] >> 2;
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regCP_MES_PRGRM_CNTR_START,
+ lower_32_bits(ucode_addr));
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id),
+ regCP_MES_PRGRM_CNTR_START_HI,
+ upper_32_bits(ucode_addr));
+ }
+ soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+
+ /* unhalt MES and activate pipe0 */
+ data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data);
+
+ if (amdgpu_emu_mode)
+ msleep(500);
+ else if (adev->enable_uni_mes)
+ udelay(500);
+ else
+ udelay(50);
+ } else {
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 0);
+ data = REG_SET_FIELD(data, CP_MES_CNTL,
+ MES_INVALIDATE_ICACHE, 1);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_CNTL, data);
+ }
+}
+
+static void mes_v12_1_set_ucode_start_addr(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ uint64_t ucode_addr;
+ int pipe;
+
+ mes_v12_1_enable(adev, false, xcc_id);
+
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ /* me=3, queue=0 */
+ soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id));
+
+ /* set ucode start address */
+ ucode_addr = adev->mes.uc_start_addr[pipe] >> 2;
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_PRGRM_CNTR_START,
+ lower_32_bits(ucode_addr));
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_PRGRM_CNTR_START_HI,
+ upper_32_bits(ucode_addr));
+
+ soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ }
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+/* This function is for backdoor MES firmware */
+static int mes_v12_1_load_microcode(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe,
+ bool prime_icache, int xcc_id)
+{
+ int r, inst = MES_PIPE_INST(xcc_id, pipe);
+ uint32_t data;
+
+ mes_v12_1_enable(adev, false, xcc_id);
+
+ if (!adev->mes.fw[pipe])
+ return -EINVAL;
+
+ r = mes_v12_1_allocate_ucode_buffer(adev, pipe, xcc_id);
+ if (r)
+ return r;
+
+ r = mes_v12_1_allocate_ucode_data_buffer(adev, pipe, xcc_id);
+ if (r) {
+ mes_v12_1_free_ucode_buffers(adev, pipe, xcc_id);
+ return r;
+ }
+
+ mutex_lock(&adev->srbm_mutex);
+ /* me=3, pipe=0, queue=0 */
+ soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id));
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_CNTL, 0);
+
+ /* set ucode fimrware address */
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_LO,
+ lower_32_bits(adev->mes.ucode_fw_gpu_addr[inst]));
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_BASE_HI,
+ upper_32_bits(adev->mes.ucode_fw_gpu_addr[inst]));
+
+ /* set ucode instruction cache boundary to 2M-1 */
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MIBOUND_LO, 0x1FFFFF);
+
+ /* set ucode data firmware address */
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBASE_LO,
+ lower_32_bits(adev->mes.data_fw_gpu_addr[inst]));
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBASE_HI,
+ upper_32_bits(adev->mes.data_fw_gpu_addr[inst]));
+
+ /* Set data cache boundary CP_MES_MDBOUND_LO */
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_MDBOUND_LO, 0x7FFFF);
+
+ if (prime_icache) {
+ /* invalidate ICACHE */
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL);
+ data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0);
+ data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL, data);
+
+ /* prime the ICACHE. */
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL);
+ data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_IC_OP_CNTL, data);
+ }
+
+ soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+
+ return 0;
+}
+
+static int mes_v12_1_allocate_eop_buf(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe,
+ int xcc_id)
+{
+ int r, inst = MES_PIPE_INST(xcc_id, pipe);
+ u32 *eop;
+
+ r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->mes.eop_gpu_obj[inst],
+ &adev->mes.eop_gpu_addr[inst],
+ (void **)&eop);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create EOP bo failed\n", r);
+ return r;
+ }
+
+ memset(eop, 0,
+ adev->mes.eop_gpu_obj[inst]->tbo.base.size);
+
+ amdgpu_bo_kunmap(adev->mes.eop_gpu_obj[inst]);
+ amdgpu_bo_unreserve(adev->mes.eop_gpu_obj[inst]);
+
+ return 0;
+}
+
+static int mes_v12_1_allocate_shared_cmd_buf(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe,
+ int xcc_id)
+{
+ int r, inst = MES_PIPE_INST(xcc_id, pipe);
+
+ if (pipe == AMDGPU_MES_KIQ_PIPE)
+ return 0;
+
+ r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->mes.shared_cmd_buf_obj[inst],
+ &adev->mes.shared_cmd_buf_gpu_addr[inst],
+ NULL);
+ if (r) {
+ dev_err(adev->dev,
+ "(%d) failed to create shared cmd buf bo\n", r);
+ return r;
+ }
+
+ return 0;
+}
+
+static int mes_v12_1_mqd_init(struct amdgpu_ring *ring)
+{
+ struct v12_1_mes_mqd *mqd = ring->mqd_ptr;
+ uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
+ uint32_t tmp;
+
+ mqd->header = 0xC0310800;
+ mqd->compute_pipelinestat_enable = 0x00000001;
+ mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
+ mqd->compute_misc_reserved = 0x00000007;
+
+ eop_base_addr = ring->eop_gpu_addr >> 8;
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ tmp = regCP_HQD_EOP_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
+ (order_base_2(MES_EOP_SIZE / 4) - 1));
+
+ mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_base_addr);
+ mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
+ mqd->cp_hqd_eop_control = tmp;
+
+ /* disable the queue if it's active */
+ ring->wptr = 0;
+ mqd->cp_hqd_pq_rptr = 0;
+ mqd->cp_hqd_pq_wptr_lo = 0;
+ mqd->cp_hqd_pq_wptr_hi = 0;
+
+ /* set the pointer to the MQD */
+ mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
+
+ /* set MQD vmid to 0 */
+ tmp = regCP_MQD_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
+ mqd->cp_mqd_control = tmp;
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ hqd_gpu_addr = ring->gpu_addr >> 8;
+ mqd->cp_hqd_pq_base_lo = lower_32_bits(hqd_gpu_addr);
+ mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+
+ /* set the wb address whether it's enabled or not */
+ wb_gpu_addr = ring->rptr_gpu_addr;
+ mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_rptr_report_addr_hi =
+ upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ wb_gpu_addr = ring->wptr_gpu_addr;
+ mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8;
+ mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ tmp = regCP_HQD_PQ_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
+ (order_base_2(ring->ring_size / 4) - 1));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
+ ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, NO_UPDATE_RPTR, 1);
+ mqd->cp_hqd_pq_control = tmp;
+
+ /* enable doorbell */
+ tmp = 0;
+ if (ring->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, ring->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_SOURCE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 0);
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ }
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ mqd->cp_hqd_vmid = 0;
+ /* activate the queue */
+ mqd->cp_hqd_active = 1;
+
+ tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE,
+ PRELOAD_SIZE, 0x63);
+ mqd->cp_hqd_persistent_state = tmp;
+
+ mqd->cp_hqd_ib_control = regCP_HQD_IB_CONTROL_MES_12_1_DEFAULT;
+ mqd->cp_hqd_iq_timer = regCP_HQD_IQ_TIMER_DEFAULT;
+ mqd->cp_hqd_quantum = regCP_HQD_QUANTUM_DEFAULT;
+
+ /*
+ * Set CP_HQD_GFX_CONTROL.DB_UPDATED_MSG_EN[15] to enable unmapped
+ * doorbell handling. This is a reserved CP internal register can
+ * not be accesss by others
+ */
+ mqd->cp_hqd_gfx_control = BIT(15);
+
+ return 0;
+}
+
+static void mes_v12_1_queue_init_register(struct amdgpu_ring *ring,
+ int xcc_id)
+{
+ struct v12_1_mes_mqd *mqd = ring->mqd_ptr;
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t data = 0;
+
+ mutex_lock(&adev->srbm_mutex);
+ soc_v1_0_grbm_select(adev, 3, ring->pipe, 0, 0, GET_INST(GC, xcc_id));
+
+ /* set CP_HQD_VMID.VMID = 0. */
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID);
+ data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID, data);
+
+ /* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL);
+ data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+ /* set CP_MQD_BASE_ADDR/HI with the MQD base address */
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
+
+ /* set CP_MQD_CONTROL.VMID=0 */
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL);
+ data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL, 0);
+
+ /* set CP_HQD_PQ_BASE/HI with the ring buffer base address */
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
+
+ /* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR,
+ mqd->cp_hqd_pq_rptr_report_addr_lo);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
+ mqd->cp_hqd_pq_rptr_report_addr_hi);
+
+ /* set CP_HQD_PQ_CONTROL */
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
+
+ /* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR,
+ mqd->cp_hqd_pq_wptr_poll_addr_lo);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+ mqd->cp_hqd_pq_wptr_poll_addr_hi);
+
+ /* set CP_HQD_PQ_DOORBELL_CONTROL */
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL,
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
+
+ /* set CP_HQD_ACTIVE.ACTIVE=1 */
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE, mqd->cp_hqd_active);
+
+ soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static int mes_v12_1_kiq_enable_queue(struct amdgpu_device *adev, int xcc_id)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[xcc_id].ring;
+ int r, inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE);
+
+ if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
+ return -EINVAL;
+
+ r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
+ if (r) {
+ DRM_ERROR("Failed to lock KIQ (%d).\n", r);
+ return r;
+ }
+
+ kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[inst]);
+
+ r = amdgpu_ring_test_ring(kiq_ring);
+ if (r) {
+ DRM_ERROR("kfq enable failed\n");
+ kiq_ring->sched.ready = false;
+ }
+ return r;
+}
+
+static int mes_v12_1_queue_init(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe,
+ int xcc_id)
+{
+ struct amdgpu_ring *ring;
+ int r;
+
+ if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE)
+ ring = &adev->gfx.kiq[xcc_id].ring;
+ else
+ ring = &adev->mes.ring[MES_PIPE_INST(xcc_id, pipe)];
+
+ if ((adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) &&
+ (amdgpu_in_reset(adev) || adev->in_suspend)) {
+ *(ring->wptr_cpu_addr) = 0;
+ *(ring->rptr_cpu_addr) = 0;
+ amdgpu_ring_clear_ring(ring);
+ }
+
+ r = mes_v12_1_mqd_init(ring);
+ if (r)
+ return r;
+
+ if (pipe == AMDGPU_MES_SCHED_PIPE) {
+ if (adev->enable_uni_mes)
+ r = amdgpu_mes_map_legacy_queue(adev, ring, xcc_id);
+ else
+ r = mes_v12_1_kiq_enable_queue(adev, xcc_id);
+ if (r)
+ return r;
+ } else {
+ mes_v12_1_queue_init_register(ring, xcc_id);
+ }
+
+ /* get MES scheduler/KIQ versions */
+ mutex_lock(&adev->srbm_mutex);
+ soc_v1_0_grbm_select(adev, 3, pipe, 0, 0, GET_INST(GC, xcc_id));
+
+ if (pipe == AMDGPU_MES_SCHED_PIPE)
+ adev->mes.sched_version = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_GP3_LO);
+ else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq)
+ adev->mes.kiq_version = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MES_GP3_LO);
+
+ soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+
+ return 0;
+}
+
+static int mes_v12_1_ring_init(struct amdgpu_device *adev,
+ int xcc_id, int pipe)
+{
+ struct amdgpu_ring *ring;
+ int inst = MES_PIPE_INST(xcc_id, pipe);
+
+ ring = &adev->mes.ring[inst];
+
+ ring->funcs = &mes_v12_1_ring_funcs;
+
+ ring->me = 3;
+ ring->pipe = pipe;
+ ring->queue = 0;
+ ring->xcc_id = xcc_id;
+ ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->eop_gpu_addr = adev->mes.eop_gpu_addr[inst];
+ ring->no_scheduler = true;
+ snprintf(ring->name, sizeof(ring->name), "mes_%hhu.%hhu.%hhu.%hhu",
+ (unsigned char)xcc_id, (unsigned char)ring->me,
+ (unsigned char)ring->pipe, (unsigned char)ring->queue);
+
+ if (pipe == AMDGPU_MES_SCHED_PIPE)
+ ring->doorbell_index =
+ (adev->doorbell_index.mes_ring0 +
+ xcc_id * adev->doorbell_index.xcc_doorbell_range)
+ << 1;
+ else
+ ring->doorbell_index =
+ (adev->doorbell_index.mes_ring1 +
+ xcc_id * adev->doorbell_index.xcc_doorbell_range)
+ << 1;
+
+ return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+}
+
+static int mes_v12_1_kiq_ring_init(struct amdgpu_device *adev, int xcc_id)
+{
+ struct amdgpu_ring *ring;
+ int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_KIQ_PIPE);
+
+ spin_lock_init(&adev->gfx.kiq[xcc_id].ring_lock);
+
+ ring = &adev->gfx.kiq[xcc_id].ring;
+
+ ring->me = 3;
+ ring->pipe = 1;
+ ring->queue = 0;
+ ring->xcc_id = xcc_id;
+ ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
+
+ ring->adev = NULL;
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->eop_gpu_addr = adev->mes.eop_gpu_addr[inst];
+ ring->no_scheduler = true;
+ ring->doorbell_index =
+ (adev->doorbell_index.mes_ring1 +
+ xcc_id * adev->doorbell_index.xcc_doorbell_range)
+ << 1;
+
+ snprintf(ring->name, sizeof(ring->name), "mes_kiq_%hhu.%hhu.%hhu.%hhu",
+ (unsigned char)xcc_id, (unsigned char)ring->me,
+ (unsigned char)ring->pipe, (unsigned char)ring->queue);
+
+ return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+}
+
+static int mes_v12_1_mqd_sw_init(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe,
+ int xcc_id)
+{
+ int r, mqd_size = sizeof(struct v12_1_mes_mqd);
+ struct amdgpu_ring *ring;
+ int inst = MES_PIPE_INST(xcc_id, pipe);
+
+ if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE)
+ ring = &adev->gfx.kiq[xcc_id].ring;
+ else
+ ring = &adev->mes.ring[inst];
+
+ if (ring->mqd_obj)
+ return 0;
+
+ r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
+ &ring->mqd_gpu_addr, &ring->mqd_ptr);
+ if (r) {
+ dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
+ return r;
+ }
+
+ memset(ring->mqd_ptr, 0, mqd_size);
+
+ /* prepare MQD backup */
+ adev->mes.mqd_backup[inst] = kmalloc(mqd_size, GFP_KERNEL);
+ if (!adev->mes.mqd_backup[inst])
+ dev_warn(adev->dev,
+ "no memory to create MQD backup for ring %s\n",
+ ring->name);
+
+ return 0;
+}
+
+static int mes_v12_1_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int pipe, r, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+
+ adev->mes.funcs = &mes_v12_1_funcs;
+ adev->mes.kiq_hw_init = &mes_v12_1_kiq_hw_init;
+ adev->mes.kiq_hw_fini = &mes_v12_1_kiq_hw_fini;
+ adev->mes.enable_legacy_queue_map = true;
+
+ adev->mes.event_log_size =
+ adev->enable_uni_mes ? (AMDGPU_MAX_MES_PIPES * AMDGPU_MES_LOG_BUFFER_SIZE * num_xcc) : AMDGPU_MES_LOG_BUFFER_SIZE;
+
+ r = amdgpu_mes_init(adev);
+ if (r)
+ return r;
+
+ for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ r = mes_v12_1_allocate_eop_buf(adev, pipe, xcc_id);
+ if (r)
+ return r;
+
+ r = mes_v12_1_mqd_sw_init(adev, pipe, xcc_id);
+ if (r)
+ return r;
+
+ if (!adev->enable_uni_mes && pipe ==
+ AMDGPU_MES_KIQ_PIPE)
+ r = mes_v12_1_kiq_ring_init(adev, xcc_id);
+ else
+ r = mes_v12_1_ring_init(adev, xcc_id, pipe);
+ if (r)
+ return r;
+
+ if (adev->enable_uni_mes && num_xcc > 1) {
+ r = mes_v12_1_allocate_shared_cmd_buf(adev,
+ pipe, xcc_id);
+ if (r)
+ return r;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int mes_v12_1_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int pipe, inst, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+
+ for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ inst = MES_PIPE_INST(xcc_id, pipe);
+
+ amdgpu_bo_free_kernel(&adev->mes.shared_cmd_buf_obj[inst],
+ &adev->mes.shared_cmd_buf_gpu_addr[inst],
+ NULL);
+
+ kfree(adev->mes.mqd_backup[inst]);
+
+ amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[inst],
+ &adev->mes.eop_gpu_addr[inst],
+ NULL);
+ amdgpu_ucode_release(&adev->mes.fw[inst]);
+
+ if (adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) {
+ amdgpu_bo_free_kernel(&adev->mes.ring[inst].mqd_obj,
+ &adev->mes.ring[inst].mqd_gpu_addr,
+ &adev->mes.ring[inst].mqd_ptr);
+ amdgpu_ring_fini(&adev->mes.ring[inst]);
+ }
+ }
+ }
+
+ for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
+ if (!adev->enable_uni_mes) {
+ amdgpu_bo_free_kernel(&adev->gfx.kiq[xcc_id].ring.mqd_obj,
+ &adev->gfx.kiq[xcc_id].ring.mqd_gpu_addr,
+ &adev->gfx.kiq[xcc_id].ring.mqd_ptr);
+ amdgpu_ring_fini(&adev->gfx.kiq[xcc_id].ring);
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ mes_v12_1_free_ucode_buffers(adev,
+ AMDGPU_MES_KIQ_PIPE, xcc_id);
+ mes_v12_1_free_ucode_buffers(adev,
+ AMDGPU_MES_SCHED_PIPE, xcc_id);
+ }
+ }
+
+ amdgpu_mes_fini(adev);
+ return 0;
+}
+
+static void mes_v12_1_kiq_dequeue_sched(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ uint32_t data;
+ int i;
+
+ mutex_lock(&adev->srbm_mutex);
+ soc_v1_0_grbm_select(adev, 3, AMDGPU_MES_SCHED_PIPE, 0, 0,
+ GET_INST(GC, xcc_id));
+
+ /* disable the queue if it's active */
+ if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1) {
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 1);
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ }
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL);
+ data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 1);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 0);
+
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO, 0);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI, 0);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR, 0);
+
+ soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+
+ adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready = false;
+}
+
+static void mes_v12_1_kiq_setting(struct amdgpu_ring *ring, int xcc_id)
+{
+ uint32_t tmp;
+ struct amdgpu_device *adev = ring->adev;
+
+ /* tell RLC which is KIQ queue */
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS);
+ tmp &= 0xffffff00;
+ tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp);
+ tmp |= 0x80;
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp);
+}
+
+static int mes_v12_1_kiq_hw_init(struct amdgpu_device *adev, uint32_t xcc_id)
+{
+ int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_KIQ_PIPE);
+ int r = 0;
+ struct amdgpu_ip_block *ip_block;
+
+ if (adev->enable_uni_mes)
+ mes_v12_1_kiq_setting(&adev->mes.ring[inst], xcc_id);
+ else
+ mes_v12_1_kiq_setting(&adev->gfx.kiq[xcc_id].ring, xcc_id);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+
+ r = mes_v12_1_load_microcode(adev, AMDGPU_MES_SCHED_PIPE,
+ false, xcc_id);
+ if (r) {
+ DRM_ERROR("failed to load MES fw, r=%d\n", r);
+ return r;
+ }
+
+ r = mes_v12_1_load_microcode(adev, AMDGPU_MES_KIQ_PIPE,
+ true, xcc_id);
+ if (r) {
+ DRM_ERROR("failed to load MES kiq fw, r=%d\n", r);
+ return r;
+ }
+
+ mes_v12_1_set_ucode_start_addr(adev, xcc_id);
+
+ } else if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
+ mes_v12_1_set_ucode_start_addr(adev, xcc_id);
+
+ mes_v12_1_enable(adev, true, xcc_id);
+
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_MES);
+ if (unlikely(!ip_block)) {
+ dev_err(adev->dev, "Failed to get MES handle\n");
+ return -EINVAL;
+ }
+
+ r = mes_v12_1_queue_init(adev, AMDGPU_MES_KIQ_PIPE, xcc_id);
+ if (r)
+ goto failure;
+
+ if (adev->enable_uni_mes) {
+ r = mes_v12_1_set_hw_resources(&adev->mes,
+ AMDGPU_MES_KIQ_PIPE, xcc_id);
+ if (r)
+ goto failure;
+
+ mes_v12_1_set_hw_resources_1(&adev->mes,
+ AMDGPU_MES_KIQ_PIPE, xcc_id);
+ }
+
+ if (adev->mes.enable_legacy_queue_map) {
+ r = mes_v12_1_xcc_hw_init(ip_block, xcc_id);
+ if (r)
+ goto failure;
+ }
+
+ return r;
+
+failure:
+ mes_v12_1_hw_fini(ip_block);
+ return r;
+}
+
+static int mes_v12_1_kiq_hw_fini(struct amdgpu_device *adev, uint32_t xcc_id)
+{
+ int inst = MES_PIPE_INST(xcc_id, AMDGPU_MES_SCHED_PIPE);
+
+ if (adev->mes.ring[inst].sched.ready) {
+ if (adev->enable_uni_mes)
+ amdgpu_mes_unmap_legacy_queue(adev,
+ &adev->mes.ring[inst],
+ RESET_QUEUES, 0, 0, xcc_id);
+ else
+ mes_v12_1_kiq_dequeue_sched(adev, xcc_id);
+
+ adev->mes.ring[inst].sched.ready = false;
+ }
+
+ mes_v12_1_enable(adev, false, xcc_id);
+
+ return 0;
+}
+
+static int mes_v12_1_setup_coop_mode(struct amdgpu_device *adev, int xcc_id)
+{
+ u32 num_xcc_per_xcp, num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ int r = 0;
+
+ if (num_xcc == 1)
+ return r;
+
+ if (adev->gfx.funcs &&
+ adev->gfx.funcs->get_xccs_per_xcp)
+ num_xcc_per_xcp = adev->gfx.funcs->get_xccs_per_xcp(adev);
+ else
+ return -EINVAL;
+
+ switch (adev->xcp_mgr->mode) {
+ case AMDGPU_SPX_PARTITION_MODE:
+ adev->mes.enable_coop_mode = 1;
+ adev->mes.master_xcc_ids[xcc_id] = 0;
+ break;
+ case AMDGPU_DPX_PARTITION_MODE:
+ adev->mes.enable_coop_mode = 1;
+ adev->mes.master_xcc_ids[xcc_id] =
+ (xcc_id/num_xcc_per_xcp) * (num_xcc / 2);
+ break;
+ case AMDGPU_QPX_PARTITION_MODE:
+ adev->mes.enable_coop_mode = 1;
+ adev->mes.master_xcc_ids[xcc_id] =
+ (xcc_id/num_xcc_per_xcp) * (num_xcc / 4);
+ break;
+ case AMDGPU_CPX_PARTITION_MODE:
+ adev->mes.enable_coop_mode = 0;
+ break;
+ default:
+ r = -EINVAL;
+ break;
+ }
+ return r;
+}
+
+static int mes_v12_1_xcc_hw_init(struct amdgpu_ip_block *ip_block, int xcc_id)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready)
+ goto out;
+
+ if (!adev->enable_mes_kiq) {
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ r = mes_v12_1_load_microcode(adev,
+ AMDGPU_MES_SCHED_PIPE, true, xcc_id);
+ if (r) {
+ DRM_ERROR("failed to MES fw, r=%d\n", r);
+ return r;
+ }
+
+ mes_v12_1_set_ucode_start_addr(adev, xcc_id);
+
+ } else if (adev->firmware.load_type ==
+ AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+
+ mes_v12_1_set_ucode_start_addr(adev, xcc_id);
+ }
+
+ mes_v12_1_enable(adev, true, xcc_id);
+ }
+
+ /* Enable the MES to handle doorbell ring on unmapped queue */
+ mes_v12_1_enable_unmapped_doorbell_handling(&adev->mes, true, xcc_id);
+
+ r = mes_v12_1_queue_init(adev, AMDGPU_MES_SCHED_PIPE, xcc_id);
+ if (r)
+ goto failure;
+
+ r = mes_v12_1_set_hw_resources(&adev->mes,
+ AMDGPU_MES_SCHED_PIPE, xcc_id);
+ if (r)
+ goto failure;
+
+ if (adev->enable_uni_mes) {
+ r = mes_v12_1_setup_coop_mode(adev, xcc_id);
+ if (r)
+ goto failure;
+ mes_v12_1_set_hw_resources_1(&adev->mes,
+ AMDGPU_MES_SCHED_PIPE, xcc_id);
+ }
+ mes_v12_1_init_aggregated_doorbell(&adev->mes, xcc_id);
+
+ r = mes_v12_1_query_sched_status(&adev->mes,
+ AMDGPU_MES_SCHED_PIPE, xcc_id);
+ if (r) {
+ DRM_ERROR("MES is busy\n");
+ goto failure;
+ }
+
+out:
+ /*
+ * Disable KIQ ring usage from the driver once MES is enabled.
+ * MES uses KIQ ring exclusively so driver cannot access KIQ ring
+ * with MES enabled.
+ */
+ adev->gfx.kiq[xcc_id].ring.sched.ready = false;
+ adev->mes.ring[MES_PIPE_INST(xcc_id, 0)].sched.ready = true;
+
+ return 0;
+
+failure:
+ mes_v12_1_hw_fini(ip_block);
+ return r;
+}
+
+static int mes_v12_1_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, xcc_id, num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+
+ for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
+ r = mes_v12_1_xcc_hw_init(ip_block, xcc_id);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int mes_v12_1_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ return 0;
+}
+
+static int mes_v12_1_suspend(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ r = amdgpu_mes_suspend(ip_block->adev);
+ if (r)
+ return r;
+
+ return mes_v12_1_hw_fini(ip_block);
+}
+
+static int mes_v12_1_resume(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ r = mes_v12_1_hw_init(ip_block);
+ if (r)
+ return r;
+
+ return amdgpu_mes_resume(ip_block->adev);
+}
+
+static int mes_v12_1_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int pipe, r;
+
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ r = amdgpu_mes_init_microcode(adev, pipe);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static const struct amd_ip_funcs mes_v12_1_ip_funcs = {
+ .name = "mes_v12_1",
+ .early_init = mes_v12_1_early_init,
+ .late_init = NULL,
+ .sw_init = mes_v12_1_sw_init,
+ .sw_fini = mes_v12_1_sw_fini,
+ .hw_init = mes_v12_1_hw_init,
+ .hw_fini = mes_v12_1_hw_fini,
+ .suspend = mes_v12_1_suspend,
+ .resume = mes_v12_1_resume,
+};
+
+const struct amdgpu_ip_block_version mes_v12_1_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_MES,
+ .major = 12,
+ .minor = 1,
+ .rev = 0,
+ .funcs = &mes_v12_1_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_1.h b/drivers/gpu/drm/amd/amdgpu/mes_v12_1.h
new file mode 100644
index 000000000000..cfc048f652b0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_1.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MES_V12_1_H__
+#define __MES_V12_1_H__
+
+extern const struct amdgpu_ip_block_version mes_v12_1_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c
index f6fc9778bc30..daf1f8ad4cca 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c
@@ -163,6 +163,35 @@ static const char *mmhub_client_ids_v3_3_1[][2] = {
[32+30][1] = "VCN1",
};
+static const char *mmhub_client_ids_v3_4[][2] = {
+ [0][0] = "VMC",
+ [4][0] = "DCEDMC",
+ [5][0] = "MPXSP",
+ [6][0] = "MPASP",
+ [7][0] = "MP1",
+ [8][0] = "MPM",
+ [23][0] = "HDP",
+ [24][0] = "LSDMA",
+ [25][0] = "JPEG",
+ [26][0] = "VPE",
+ [27][0] = "VSCH",
+ [28][0] = "VCNU",
+ [30][0] = "VCNRD",
+ [3][1] = "DCEDWB",
+ [4][1] = "DCEDMC",
+ [5][1] = "MPXSP",
+ [6][1] = "MPASAP",
+ [7][1] = "MP1",
+ [8][1] = "MPM",
+ [21][1] = "OSSSYS",
+ [23][1] = "HDP",
+ [24][1] = "LSDMA",
+ [25][1] = "JPEG",
+ [26][1] = "VPE",
+ [27][1] = "VSCH",
+ [29][1] = "VCNWR",
+};
+
static uint32_t mmhub_v3_3_get_invalidate_req(unsigned int vmid,
uint32_t flush_type)
{
@@ -211,6 +240,11 @@ mmhub_v3_3_print_l2_protection_fault_status(struct amdgpu_device *adev,
mmhub_client_ids_v3_3_1[cid][rw] :
cid == 0x140 ? "UMSCH" : NULL;
break;
+ case IP_VERSION(3, 4, 0):
+ mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_4) ?
+ mmhub_client_ids_v3_4[cid][rw] :
+ cid == 0x140 ? "UMSCH" : NULL;
+ break;
default:
mmhub_cid = NULL;
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.c
new file mode 100644
index 000000000000..7e917eb47a8c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.c
@@ -0,0 +1,916 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "mmhub_v4_2_0.h"
+
+#include "mmhub/mmhub_4_2_0_offset.h"
+#include "mmhub/mmhub_4_2_0_sh_mask.h"
+
+#include "soc15_common.h"
+#include "soc24_enum.h"
+
+#define regMMVM_L2_CNTL3_DEFAULT 0x80100007
+#define regMMVM_L2_CNTL4_DEFAULT 0x000000c1
+#define regMMVM_L2_CNTL5_DEFAULT 0x00003fe0
+
+static const char *mmhub_client_ids_v4_2_0[][2] = {
+ [0][0] = "VMC",
+ [4][0] = "DCEDMC",
+ [5][0] = "DCEVGA",
+ [6][0] = "MP0",
+ [7][0] = "MP1",
+ [8][0] = "MPIO",
+ [16][0] = "HDP",
+ [17][0] = "LSDMA",
+ [18][0] = "JPEG",
+ [19][0] = "VCNU0",
+ [21][0] = "VSCH",
+ [22][0] = "VCNU1",
+ [23][0] = "VCN1",
+ [32+20][0] = "VCN0",
+ [2][1] = "DBGUNBIO",
+ [3][1] = "DCEDWB",
+ [4][1] = "DCEDMC",
+ [5][1] = "DCEVGA",
+ [6][1] = "MP0",
+ [7][1] = "MP1",
+ [8][1] = "MPIO",
+ [10][1] = "DBGU0",
+ [11][1] = "DBGU1",
+ [12][1] = "DBGU2",
+ [13][1] = "DBGU3",
+ [14][1] = "XDP",
+ [15][1] = "OSSSYS",
+ [16][1] = "HDP",
+ [17][1] = "LSDMA",
+ [18][1] = "JPEG",
+ [19][1] = "VCNU0",
+ [20][1] = "VCN0",
+ [21][1] = "VSCH",
+ [22][1] = "VCNU1",
+ [23][1] = "VCN1",
+};
+
+static u64 mmhub_v4_2_0_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base;
+
+ base = RREG32_SOC15(MMHUB, GET_INST(MMHUB, 0),
+ regMMMC_VM_FB_LOCATION_BASE_LO32);
+ base &= MMMC_VM_FB_LOCATION_BASE_LO32__FB_BASE_LO32_MASK;
+ base <<= 24;
+
+ base |= ((u64)(MMMC_VM_FB_LOCATION_BASE_HI32__FB_BASE_HI1_MASK &
+ RREG32_SOC15(MMHUB, GET_INST(MMHUB, 0),
+ regMMMC_VM_FB_LOCATION_BASE_HI32)) << 56);
+
+ return base;
+}
+
+static u64 mmhub_v4_2_0_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ return (u64)RREG32_SOC15(MMHUB, GET_INST(MMHUB, 0),
+ regMMMC_VM_FB_OFFSET) << 24;
+}
+
+static void mmhub_v4_2_0_mid_setup_vm_pt_regs(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint64_t page_table_base,
+ uint32_t mid_mask)
+{
+ struct amdgpu_vmhub *hub;
+ int i;
+
+ for_each_inst(i, mid_mask) {
+ hub = &adev->vmhub[AMDGPU_MMHUB0(i)];
+ WREG32_SOC15_OFFSET(MMHUB, GET_INST(MMHUB, i),
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(MMHUB, GET_INST(MMHUB, i),
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+ }
+}
+
+static void mmhub_v4_2_0_setup_vm_pt_regs(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint64_t page_table_base)
+{
+ uint32_t mid_mask;
+
+ mid_mask = adev->aid_mask;
+ mmhub_v4_2_0_mid_setup_vm_pt_regs(adev, vmid,
+ page_table_base,
+ mid_mask);
+}
+
+static void mmhub_v4_2_0_mid_init_gart_aperture_regs(struct amdgpu_device *adev,
+ uint32_t mid_mask)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+ int i;
+
+ if (adev->gmc.pdb0_bo)
+ pt_base = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo);
+ else
+ pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ mmhub_v4_2_0_mid_setup_vm_pt_regs(adev, 0, pt_base, mid_mask);
+
+ for_each_inst(i, mid_mask) {
+ if (adev->gmc.pdb0_bo) {
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.fb_start >> 12));
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.fb_start >> 44));
+
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.fb_end >> 12));
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.fb_end >> 44));
+ } else {
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+ }
+ }
+}
+
+static void mmhub_v4_2_0_mid_init_system_aperture_regs(struct amdgpu_device *adev,
+ uint32_t mid_mask)
+{
+ uint64_t value;
+ uint32_t tmp;
+ int i;
+
+ /*
+ * the new L1 policy will block SRIOV guest from writing
+ * these regs, and they will be programed at host.
+ * so skip programing these regs.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ for_each_inst(i, mid_mask) {
+ /* Program the AGP BAR */
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_AGP_BASE_LO32, 0);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_AGP_BASE_HI32, 0);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_AGP_BOT_LO32,
+ lower_32_bits(adev->gmc.agp_start >> 24));
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_AGP_BOT_HI32,
+ upper_32_bits(adev->gmc.agp_start >> 24));
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_AGP_TOP_LO32,
+ lower_32_bits(adev->gmc.agp_end >> 24));
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_AGP_TOP_HI32,
+ upper_32_bits(adev->gmc.agp_end >> 24));
+
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR_LO32,
+ lower_32_bits(min(adev->gmc.fb_start,
+ adev->gmc.agp_start) >> 18));
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR_HI32,
+ upper_32_bits(min(adev->gmc.fb_start,
+ adev->gmc.agp_start) >> 18));
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR_LO32,
+ lower_32_bits(max(adev->gmc.fb_end,
+ adev->gmc.agp_end) >> 18));
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR_HI32,
+ upper_32_bits(max(adev->gmc.fb_end,
+ adev->gmc.agp_end) >> 18));
+
+ /* Set default page address. */
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ tmp = RREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMVM_L2_PROTECTION_FAULT_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2,
+ ENABLE_RETRY_FAULT_INTERRUPT, 0x1);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMVM_L2_PROTECTION_FAULT_CNTL2, tmp);
+ }
+
+ /* In the case squeezing vram into GART aperture, we don't use
+ * FB aperture and AGP aperture. Disable them.
+ */
+ if (adev->gmc.pdb0_bo) {
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_FB_LOCATION_TOP_LO32, 0);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_FB_LOCATION_TOP_HI32, 0);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_FB_LOCATION_BASE_LO32, 0xFFFFFFFF);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_FB_LOCATION_BASE_HI32, 1);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_AGP_TOP_LO32, 0);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_AGP_TOP_HI32, 0);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_AGP_BOT_LO32, 0xFFFFFFFF);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_AGP_BOT_HI32, 1);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR_HI32,
+ 0x7F);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR_LO32, 0);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR_HI32, 0);
+ }
+}
+
+static void mmhub_v4_2_0_mid_init_tlb_regs(struct amdgpu_device *adev,
+ uint32_t mid_mask)
+{
+ uint32_t tmp;
+ int i;
+
+ for_each_inst(i, mid_mask) {
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMMC_VM_MX_L1_TLB_CNTL, tmp);
+ }
+}
+
+static void mmhub_v4_2_0_mid_init_cache_regs(struct amdgpu_device *adev,
+ uint32_t mid_mask)
+{
+ uint32_t tmp;
+ int i;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ for_each_inst(i, mid_mask) {
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, GET_INST(MMHUB, i), regMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL,
+ L2_PDE0_CACHE_TAG_GENERATION_MODE, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL,
+ PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL,
+ CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL,
+ IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), regMMVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(MMHUB, GET_INST(MMHUB, i), regMMVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2,
+ INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2,
+ INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), regMMVM_L2_CNTL2, tmp);
+
+ tmp = regMMVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), regMMVM_L2_CNTL3, tmp);
+
+ tmp = regMMVM_L2_CNTL4_DEFAULT;
+ /* For AMD APP APUs setup WC memory */
+ if (adev->gmc.xgmi.connected_to_cpu || adev->gmc.is_app_apu) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4,
+ VMC_TAP_PDE_REQUEST_PHYSICAL, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4,
+ VMC_TAP_PTE_REQUEST_PHYSICAL, 1);
+ } else {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4,
+ VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4,
+ VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ }
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), regMMVM_L2_CNTL4, tmp);
+
+ tmp = regMMVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5,
+ L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i), regMMVM_L2_CNTL5, tmp);
+ }
+}
+
+static void mmhub_v4_2_0_mid_enable_system_domain(struct amdgpu_device *adev,
+ uint32_t mid_mask)
+{
+ uint32_t tmp;
+ int i;
+
+ for_each_inst(i, mid_mask) {
+ tmp = RREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL,
+ ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL,
+ PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMVM_CONTEXT0_CNTL, tmp);
+ }
+}
+
+static void mmhub_v4_2_0_mid_disable_identity_aperture(struct amdgpu_device *adev,
+ uint32_t mid_mask)
+{
+ int i;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ for_each_inst(i, mid_mask) {
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x00001FFF);
+
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
+ 0);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
+ 0);
+
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32,
+ 0);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32,
+ 0);
+ }
+}
+
+static void mmhub_v4_2_0_mid_setup_vmid_config(struct amdgpu_device *adev,
+ uint32_t mid_mask)
+{
+ struct amdgpu_vmhub *hub;
+ uint32_t tmp;
+ int i, j;
+
+ for_each_inst(j, mid_mask) {
+ hub = &adev->vmhub[AMDGPU_MMHUB0(j)];
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(MMHUB, GET_INST(MMHUB, j),
+ regMMVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !amdgpu_noretry);
+ WREG32_SOC15_OFFSET(MMHUB, GET_INST(MMHUB, j), regMMVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(MMHUB, GET_INST(MMHUB, j), regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, GET_INST(MMHUB, j), regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, GET_INST(MMHUB, j), regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(MMHUB, GET_INST(MMHUB, j), regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+ }
+
+ hub->vm_cntx_cntl = tmp;
+}
+
+static void mmhub_v4_2_0_mid_program_invalidation(struct amdgpu_device *adev,
+ uint32_t mid_mask)
+{
+ struct amdgpu_vmhub *hub;
+ unsigned int i, j;
+
+ for_each_inst(j, mid_mask) {
+ hub = &adev->vmhub[AMDGPU_MMHUB0(j)];
+
+ for (i = 0; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(MMHUB, GET_INST(MMHUB, j),
+ regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(MMHUB, GET_INST(MMHUB, j),
+ regMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x3fff);
+ }
+ }
+}
+
+static int mmhub_v4_2_0_mid_gart_enable(struct amdgpu_device *adev,
+ uint32_t mid_mask)
+{
+ /* GART Enable. */
+ mmhub_v4_2_0_mid_init_gart_aperture_regs(adev, mid_mask);
+ mmhub_v4_2_0_mid_init_system_aperture_regs(adev, mid_mask);
+ mmhub_v4_2_0_mid_init_tlb_regs(adev, mid_mask);
+ mmhub_v4_2_0_mid_init_cache_regs(adev, mid_mask);
+
+ mmhub_v4_2_0_mid_enable_system_domain(adev, mid_mask);
+ mmhub_v4_2_0_mid_disable_identity_aperture(adev, mid_mask);
+ mmhub_v4_2_0_mid_setup_vmid_config(adev, mid_mask);
+ mmhub_v4_2_0_mid_program_invalidation(adev, mid_mask);
+
+ return 0;
+}
+static int mmhub_v4_2_0_gart_enable(struct amdgpu_device *adev)
+{
+ uint32_t mid_mask;
+
+ mid_mask = adev->aid_mask;
+ return mmhub_v4_2_0_mid_gart_enable(adev, mid_mask);
+}
+
+static void mmhub_v4_2_0_mid_gart_disable(struct amdgpu_device *adev,
+ uint32_t mid_mask)
+{
+ struct amdgpu_vmhub *hub;
+ u32 tmp;
+ u32 i, j;
+
+ for_each_inst(j, mid_mask) {
+ hub = &adev->vmhub[AMDGPU_MMHUB0(j)];
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(MMHUB, GET_INST(MMHUB, j),
+ regMMVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, GET_INST(MMHUB, j),
+ regMMMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, j),
+ regMMMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, GET_INST(MMHUB, j), regMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, j), regMMVM_L2_CNTL, tmp);
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, j), regMMVM_L2_CNTL3, 0);
+ }
+}
+
+static void mmhub_v4_2_0_gart_disable(struct amdgpu_device *adev)
+{
+ uint32_t mid_mask;
+
+ mid_mask = adev->aid_mask;
+ mmhub_v4_2_0_mid_gart_disable(adev, mid_mask);
+}
+
+static void
+mmhub_v4_2_0_mid_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value, uint32_t mid_mask)
+{
+ u32 tmp;
+ int i;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ for_each_inst(i, mid_mask) {
+ tmp = RREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMVM_L2_PROTECTION_FAULT_CNTL_LO32);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL_LO32,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL_LO32,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL_LO32,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL_LO32,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL_LO32,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL_LO32,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL_LO32,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL_LO32,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL_LO32,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL_LO32,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL_LO32,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL_LO32,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, i),
+ regMMVM_L2_PROTECTION_FAULT_CNTL_LO32, tmp);
+ }
+}
+
+
+/**
+ * mmhub_v4_2_0_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void
+mmhub_v4_2_0_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value)
+{
+ uint32_t mid_mask;
+
+ mid_mask = adev->aid_mask;
+ mmhub_v4_2_0_mid_set_fault_enable_default(adev, value, mid_mask);
+}
+
+static uint32_t mmhub_v4_2_0_get_invalidate_req(unsigned int vmid,
+ uint32_t flush_type)
+{
+ u32 req = 0;
+
+ /* invalidate using legacy mode on vmid*/
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vmid);
+ /* Only use legacy inv on mmhub side */
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 0);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE3, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+/*TODO: l2 protection fault status is increased to 64bits.
+ * some critical fields like FED are moved to STATUS_HI32 */
+static void
+mmhub_v4_2_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
+ uint32_t status)
+{
+ uint32_t cid, rw;
+ const char *mmhub_cid = NULL;
+
+ cid = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS_LO32, CID);
+ rw = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS_LO32, RW);
+
+ dev_err(adev->dev,
+ "MMVM_L2_PROTECTION_FAULT_STATUS_LO32:0x%08X\n",
+ status);
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
+ case IP_VERSION(4, 2, 0):
+ mmhub_cid = mmhub_client_ids_v4_2_0[cid][rw];
+ break;
+ default:
+ mmhub_cid = NULL;
+ break;
+ }
+ dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
+ mmhub_cid ? mmhub_cid : "unknown", cid);
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS_LO32, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS_LO32, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS_LO32, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS_LO32, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%x\n", rw);
+}
+
+
+static const struct amdgpu_vmhub_funcs mmhub_v4_2_0_vmhub_funcs = {
+ .print_l2_protection_fault_status = mmhub_v4_2_0_print_l2_protection_fault_status,
+ .get_invalidate_req = mmhub_v4_2_0_get_invalidate_req,
+};
+
+static void mmhub_v4_2_0_mid_init(struct amdgpu_device *adev,
+ uint32_t mid_mask)
+{
+ struct amdgpu_vmhub *hub;
+ int i;
+
+ for_each_inst(i, mid_mask) {
+ hub = &adev->vmhub[AMDGPU_MMHUB0(i)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(MMHUB, GET_INST(MMHUB, i),
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(MMHUB, GET_INST(MMHUB, i),
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(MMHUB, GET_INST(MMHUB, i),
+ regMMVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(MMHUB, GET_INST(MMHUB, i),
+ regMMVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(MMHUB, GET_INST(MMHUB, i),
+ regMMVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(MMHUB, GET_INST(MMHUB, i),
+ regMMVM_CONTEXT0_CNTL);
+ /* TODO: add a new member to accomandate additional fault status/cntl reg */
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(MMHUB, GET_INST(MMHUB, i),
+ regMMVM_L2_PROTECTION_FAULT_STATUS_LO32);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(MMHUB, GET_INST(MMHUB, i),
+ regMMVM_L2_PROTECTION_FAULT_CNTL_LO32);
+
+ hub->ctx_distance = regMMVM_CONTEXT1_CNTL - regMMVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = regMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regMMVM_INVALIDATE_ENG1_REQ -
+ regMMVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = regMMVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+
+ hub->vm_cntx_cntl_vm_fault = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ hub->vm_l2_bank_select_reserved_cid2 =
+ SOC15_REG_OFFSET(MMHUB, GET_INST(MMHUB, i), regMMVM_L2_BANK_SELECT_RESERVED_CID2);
+
+ hub->vm_contexts_disable =
+ SOC15_REG_OFFSET(MMHUB, GET_INST(MMHUB, i), regMMVM_CONTEXTS_DISABLE);
+
+ hub->vmhub_funcs = &mmhub_v4_2_0_vmhub_funcs;
+ }
+}
+
+static void mmhub_v4_2_0_init(struct amdgpu_device *adev)
+{
+ uint32_t mid_mask;
+
+ mid_mask = adev->aid_mask;
+ mmhub_v4_2_0_mid_init(adev, mid_mask);
+}
+
+static void
+mmhub_v4_2_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+ uint32_t def1, data1, def2 = 0, data2 = 0;
+ def = data = RREG32_SOC15(MMHUB, GET_INST(MMHUB, 0), regMM_ATC_L2_MISC_CG);
+ def1 = data1 = RREG32_SOC15(MMHUB, GET_INST(MMHUB, 0), regDAGB0_CNTL_MISC2);
+ def2 = data2 = RREG32_SOC15(MMHUB, GET_INST(MMHUB, 0), regDAGB1_CNTL_MISC2);
+
+ if (enable) {
+ data |= MM_ATC_L2_MISC_CG__ENABLE_MASK;
+ data1 &= ~(DAGB0_CNTL_MISC2__DISABLE_RDRET_TAP_CHAIN_FGCG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_TAP_CHAIN_FGCG_MASK);
+
+ data2 &= ~(DAGB1_CNTL_MISC2__DISABLE_RDRET_TAP_CHAIN_FGCG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_WRRET_TAP_CHAIN_FGCG_MASK);
+ } else {
+ data &= ~MM_ATC_L2_MISC_CG__ENABLE_MASK;
+ data1 |= (DAGB0_CNTL_MISC2__DISABLE_RDRET_TAP_CHAIN_FGCG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_TAP_CHAIN_FGCG_MASK);
+
+ data2 |= (DAGB1_CNTL_MISC2__DISABLE_RDRET_TAP_CHAIN_FGCG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_WRRET_TAP_CHAIN_FGCG_MASK);
+ }
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, 0), regMM_ATC_L2_MISC_CG, data);
+ if (def1 != data1)
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, 0), regDAGB0_CNTL_MISC2, data1);
+
+ if (def2 != data2)
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, 0), regDAGB1_CNTL_MISC2, data2);
+}
+
+static void
+mmhub_v4_2_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(MMHUB, GET_INST(MMHUB, 0), regMM_ATC_L2_MISC_CG);
+
+ if (enable)
+ data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+ else
+ data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, GET_INST(MMHUB, 0), regMM_ATC_L2_MISC_CG, data);
+}
+
+static int mmhub_v4_2_0_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)
+ mmhub_v4_2_0_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)
+ mmhub_v4_2_0_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+
+ return 0;
+}
+
+static void mmhub_v4_2_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+ int data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ data = RREG32_SOC15(MMHUB, GET_INST(MMHUB, 0), regMM_ATC_L2_MISC_CG);
+
+ /* AMD_CG_SUPPORT_MC_MGCG */
+ if (data & MM_ATC_L2_MISC_CG__ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_MGCG;
+
+ /* AMD_CG_SUPPORT_MC_LS */
+ if (data & MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_LS;
+}
+
+const struct amdgpu_mmhub_funcs mmhub_v4_2_0_funcs = {
+ .init = mmhub_v4_2_0_init,
+ .get_fb_location = mmhub_v4_2_0_get_fb_location,
+ .get_mc_fb_offset = mmhub_v4_2_0_get_mc_fb_offset,
+ .setup_vm_pt_regs = mmhub_v4_2_0_setup_vm_pt_regs,
+ .gart_enable = mmhub_v4_2_0_gart_enable,
+ .gart_disable = mmhub_v4_2_0_gart_disable,
+ .set_fault_enable_default = mmhub_v4_2_0_set_fault_enable_default,
+ .set_clockgating = mmhub_v4_2_0_set_clockgating,
+ .get_clockgating = mmhub_v4_2_0_get_clockgating,
+};
+
+static int mmhub_v4_2_0_xcp_resume(void *handle, uint32_t inst_mask)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool value;
+
+ if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
+ value = false;
+ else
+ value = true;
+
+ mmhub_v4_2_0_mid_set_fault_enable_default(adev, value, inst_mask);
+
+ if (!amdgpu_sriov_vf(adev))
+ return mmhub_v4_2_0_mid_gart_enable(adev, inst_mask);
+
+ return 0;
+}
+
+static int mmhub_v4_2_0_xcp_suspend(void *handle, uint32_t inst_mask)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (!amdgpu_sriov_vf(adev))
+ mmhub_v4_2_0_mid_gart_disable(adev, inst_mask);
+
+ return 0;
+}
+
+struct amdgpu_xcp_ip_funcs mmhub_v4_2_0_xcp_funcs = {
+ .suspend = &mmhub_v4_2_0_xcp_suspend,
+ .resume = &mmhub_v4_2_0_xcp_resume
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.h
new file mode 100644
index 000000000000..4ea6de7ac7ae
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_2_0.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __MMHUB_V4_2_0_H__
+#define __MMHUB_V4_2_0_H__
+
+extern const struct amdgpu_mmhub_funcs mmhub_v4_2_0_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index e7cd07383d56..f2e456390b27 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -96,7 +96,9 @@ static int xgpu_nv_poll_ack(struct amdgpu_device *adev)
timeout -= 5;
} while (timeout > 1);
- dev_err(adev->dev, "Doesn't get TRN_MSG_ACK from pf in %d msec \n", NV_MAILBOX_POLL_ACK_TIMEDOUT);
+ dev_err(adev->dev,
+ "Doesn't get TRN_MSG_ACK from pf in %d msec\n",
+ NV_MAILBOX_POLL_ACK_TIMEDOUT);
return -ETIME;
}
@@ -209,6 +211,9 @@ send_request:
case IDH_REQ_RAS_CHK_CRITI:
event = IDH_REQ_RAS_CHK_CRITI_READY;
break;
+ case IDH_REQ_RAS_REMOTE_CMD:
+ event = IDH_REQ_RAS_REMOTE_CMD_READY;
+ break;
default:
break;
}
@@ -585,6 +590,13 @@ static int xgpu_nv_check_vf_critical_region(struct amdgpu_device *adev, u64 addr
adev, IDH_REQ_RAS_CHK_CRITI, addr_hi, addr_lo, 0);
}
+static int xgpu_nv_req_remote_ras_cmd(struct amdgpu_device *adev,
+ u32 param1, u32 param2, u32 param3)
+{
+ return xgpu_nv_send_access_requests_with_param(
+ adev, IDH_REQ_RAS_REMOTE_CMD, param1, param2, param3);
+}
+
const struct amdgpu_virt_ops xgpu_nv_virt_ops = {
.req_full_gpu = xgpu_nv_request_full_gpu_access,
.rel_full_gpu = xgpu_nv_release_full_gpu_access,
@@ -598,5 +610,6 @@ const struct amdgpu_virt_ops xgpu_nv_virt_ops = {
.req_ras_err_count = xgpu_nv_req_ras_err_count,
.req_ras_cper_dump = xgpu_nv_req_ras_cper_dump,
.req_bad_pages = xgpu_nv_req_ras_bad_pages,
- .req_ras_chk_criti = xgpu_nv_check_vf_critical_region
+ .req_ras_chk_criti = xgpu_nv_check_vf_critical_region,
+ .req_remote_ras_cmd = xgpu_nv_req_remote_ras_cmd,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
index c1083e5e41e0..dc57a4f697ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
@@ -43,7 +43,8 @@ enum idh_request {
IDH_REQ_RAS_ERROR_COUNT = 203,
IDH_REQ_RAS_CPER_DUMP = 204,
IDH_REQ_RAS_BAD_PAGES = 205,
- IDH_REQ_RAS_CHK_CRITI = 206
+ IDH_REQ_RAS_CHK_CRITI = 206,
+ IDH_REQ_RAS_REMOTE_CMD = 207,
};
enum idh_event {
@@ -64,6 +65,7 @@ enum idh_event {
IDH_RAS_BAD_PAGES_NOTIFICATION = 16,
IDH_UNRECOV_ERR_NOTIFICATION = 17,
IDH_REQ_RAS_CHK_CRITI_READY = 18,
+ IDH_REQ_RAS_REMOTE_CMD_READY = 19,
IDH_TEXT_MESSAGE = 255,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c
index 9b4025c39e44..db14a1a326d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c
@@ -30,6 +30,31 @@
#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
#include <uapi/linux/kfd_ioctl.h>
+#define regGDC_S2A0_S2A_DOORBELL_ENTRY_0_CTRL_nbif_4_10 0x4f0aeb
+#define regGDC_S2A0_S2A_DOORBELL_ENTRY_0_CTRL_nbif_4_10_BASE_IDX 3
+#define regGDC_S2A0_S2A_DOORBELL_ENTRY_0_CTRL1_nbif_4_10 0x4f0aec
+#define regGDC_S2A0_S2A_DOORBELL_ENTRY_0_CTRL1_nbif_4_10_BASE_IDX 3
+#define regGDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL_nbif_4_10 0x4f0aed
+#define regGDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL_nbif_4_10_BASE_IDX 3
+#define regGDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL1_nbif_4_10 0x4f0aee
+#define regGDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL1_nbif_4_10_BASE_IDX 3
+#define regGDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL_nbif_4_10 0x4f0aef
+#define regGDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL_nbif_4_10_BASE_IDX 3
+#define regGDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL1_nbif_4_10 0x4f0af0
+#define regGDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL1_nbif_4_10_BASE_IDX 3
+#define regGDC_S2A0_S2A_DOORBELL_ENTRY_3_CTRL_nbif_4_10 0x4f0af1
+#define regGDC_S2A0_S2A_DOORBELL_ENTRY_3_CTRL_nbif_4_10_BASE_IDX 3
+#define regGDC_S2A0_S2A_DOORBELL_ENTRY_3_CTRL1_nbif_4_10 0x4f0af2
+#define regGDC_S2A0_S2A_DOORBELL_ENTRY_3_CTRL1_nbif_4_10_BASE_IDX 3
+#define regGDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL_nbif_4_10 0x4f0af3
+#define regGDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL_nbif_4_10_BASE_IDX 3
+#define regGDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL1_nbif_4_10 0x4f0af4
+#define regGDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL1_nbif_4_10_BASE_IDX 3
+#define regGDC_S2A0_S2A_DOORBELL_ENTRY_5_CTRL_nbif_4_10 0x4f0af5
+#define regGDC_S2A0_S2A_DOORBELL_ENTRY_5_CTRL_nbif_4_10_BASE_IDX 3
+#define regGDC_S2A0_S2A_DOORBELL_ENTRY_5_CTRL1_nbif_4_10 0x4f0af6
+#define regGDC_S2A0_S2A_DOORBELL_ENTRY_5_CTRL1_nbif_4_10_BASE_IDX 3
+
static void nbif_v6_3_1_remap_hdp_registers(struct amdgpu_device *adev)
{
WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL,
@@ -98,7 +123,11 @@ static void nbif_v6_3_1_sdma_doorbell_range(struct amdgpu_device *adev,
S2A_DOORBELL_PORT2_RANGE_SIZE,
0);
- WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL, doorbell_range);
+ if (amdgpu_ip_version(adev, NBIO_HWIP, 0) == IP_VERSION(7, 11, 4)) {
+ WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL_nbif_4_10, doorbell_range);
+ } else {
+ WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL, doorbell_range);
+ }
}
}
@@ -140,16 +169,28 @@ static void nbif_v6_3_1_vcn_doorbell_range(struct amdgpu_device *adev,
S2A_DOORBELL_PORT4_RANGE_SIZE,
0);
- if (instance)
- WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_5_CTRL, doorbell_range);
- else
- WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL, doorbell_range);
+ if (amdgpu_ip_version(adev, NBIO_HWIP, 0) == IP_VERSION(7, 11, 4)) {
+ if (instance)
+ WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_5_CTRL_nbif_4_10, doorbell_range);
+ else
+ WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL_nbif_4_10, doorbell_range);
+ } else {
+ if (instance)
+ WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_5_CTRL, doorbell_range);
+ else
+ WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL, doorbell_range);
+ }
}
static void nbif_v6_3_1_gc_doorbell_init(struct amdgpu_device *adev)
{
- WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_0_CTRL, 0x30000007);
- WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_3_CTRL, 0x3000000d);
+ if (amdgpu_ip_version(adev, NBIO_HWIP, 0) == IP_VERSION(7, 11, 4)) {
+ WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_0_CTRL_nbif_4_10, 0x30000007);
+ WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_3_CTRL_nbif_4_10, 0x3000000d);
+ } else {
+ WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_0_CTRL, 0x30000007);
+ WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_3_CTRL, 0x3000000d);
+ }
}
static void nbif_v6_3_1_enable_doorbell_aperture(struct amdgpu_device *adev,
@@ -214,7 +255,11 @@ static void nbif_v6_3_1_ih_doorbell_range(struct amdgpu_device *adev,
S2A_DOORBELL_PORT1_RANGE_SIZE,
0);
- WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL, ih_doorbell_range);
+ if (amdgpu_ip_version(adev, NBIO_HWIP, 0) == IP_VERSION(7, 11, 4)) {
+ WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL_nbif_4_10, ih_doorbell_range);
+ } else {
+ WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL, ih_doorbell_range);
+ }
}
static void nbif_v6_3_1_ih_control(struct amdgpu_device *adev)
@@ -269,12 +314,20 @@ static u32 nbif_v6_3_1_get_hdp_flush_done_offset(struct amdgpu_device *adev)
static u32 nbif_v6_3_1_get_pcie_index_offset(struct amdgpu_device *adev)
{
- return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_INDEX);
+ if (amdgpu_ip_version(adev, NBIO_HWIP, 0) == IP_VERSION(7, 11, 4)) {
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX0_PCIE_INDEX);
+ }
+ else {
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_INDEX);
+ }
}
static u32 nbif_v6_3_1_get_pcie_data_offset(struct amdgpu_device *adev)
{
- return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_DATA);
+ if (amdgpu_ip_version(adev, NBIO_HWIP, 0) == IP_VERSION(7, 11, 4))
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX0_PCIE_DATA);
+ else
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_DATA);
}
const struct nbio_hdp_flush_reg nbif_v6_3_1_hdp_flush_reg = {
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c
index bed5ef4d8788..05cbd65420d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c
@@ -279,6 +279,7 @@ static void nbio_v7_11_init_registers(struct amdgpu_device *adev)
case IP_VERSION(7, 11, 1):
case IP_VERSION(7, 11, 2):
case IP_VERSION(7, 11, 3):
+ case IP_VERSION(7, 11, 4):
data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF5_STRAP4) & ~BIT(23);
WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF5_STRAP4, data);
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index 50e77d9b30af..f17c3839aea1 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -593,10 +593,6 @@ static void nv_init_doorbell_index(struct amdgpu_device *adev)
adev->doorbell_index.sdma_doorbell_range = 20;
}
-static void nv_pre_asic_init(struct amdgpu_device *adev)
-{
-}
-
static int nv_update_umd_stable_pstate(struct amdgpu_device *adev,
bool enter)
{
@@ -630,7 +626,6 @@ static const struct amdgpu_asic_funcs nv_asic_funcs = {
.need_reset_on_init = &nv_need_reset_on_init,
.get_pcie_replay_count = &amdgpu_nbio_get_pcie_replay_count,
.supports_baco = &amdgpu_dpm_is_baco_supported,
- .pre_asic_init = &nv_pre_asic_init,
.update_umd_stable_pstate = &nv_update_umd_stable_pstate,
.query_video_codecs = &nv_query_video_codecs,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
index 73f87131a7e9..e8f768638fd5 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
@@ -299,6 +299,8 @@ enum psp_gfx_fw_type {
GFX_FW_TYPE_RS64_MEC_P1_STACK = 95, /* RS64 MEC stack P1 SOC21 */
GFX_FW_TYPE_RS64_MEC_P2_STACK = 96, /* RS64 MEC stack P2 SOC21 */
GFX_FW_TYPE_RS64_MEC_P3_STACK = 97, /* RS64 MEC stack P3 SOC21 */
+ GFX_FW_TYPE_RLX6_UCODE_CORE1 = 98, /* RLCV_IRAM MI */
+ GFX_FW_TYPE_RLX6_DRAM_BOOT_CORE1 = 99, /* RLCV DRAM BOOT MI */
GFX_FW_TYPE_VPEC_FW1 = 100, /* VPEC FW1 To Save VPE */
GFX_FW_TYPE_VPEC_FW2 = 101, /* VPEC FW2 To Save VPE */
GFX_FW_TYPE_VPE = 102,
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
index 3584b8c18fd9..8a6431487ed0 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
@@ -140,7 +140,7 @@ static int psp_v10_0_ring_destroy(struct psp_context *psp,
static int psp_v10_0_mode1_reset(struct psp_context *psp)
{
- DRM_INFO("psp mode 1 reset not supported now! \n");
+ drm_info(adev_to_drm(psp->adev), "psp mode 1 reset not supported now!\n");
return -EINVAL;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
index a9be7a505026..9aa988982304 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -412,7 +412,7 @@ static int psp_v11_0_mode1_reset(struct psp_context *psp)
MBOX_TOS_READY_MASK, 0);
if (ret) {
- DRM_INFO("psp is not working correctly before mode1 reset!\n");
+ drm_info(adev_to_drm(adev), "psp is not working correctly before mode1 reset!\n");
return -EINVAL;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
index 4c6450d62299..c3cae29eeca1 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
@@ -225,7 +225,7 @@ static int psp_v12_0_mode1_reset(struct psp_context *psp)
MBOX_TOS_READY_MASK, 0);
if (ret) {
- DRM_INFO("psp is not working correctly before mode1 reset!\n");
+ drm_info(adev_to_drm(adev), "psp is not working correctly before mode1 reset!\n");
return -EINVAL;
}
@@ -240,11 +240,11 @@ static int psp_v12_0_mode1_reset(struct psp_context *psp)
0);
if (ret) {
- DRM_INFO("psp mode 1 reset failed!\n");
+ drm_info(adev_to_drm(adev), "psp mode 1 reset failed!\n");
return -EINVAL;
}
- DRM_INFO("psp mode1 reset succeed \n");
+ drm_info(adev_to_drm(adev), "psp mode1 reset succeed\n");
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v15_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v15_0.c
new file mode 100644
index 000000000000..3aca293e2f0c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v15_0.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <drm/drm_drv.h>
+#include <linux/vmalloc.h>
+#include "amdgpu.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_ucode.h"
+#include "soc15_common.h"
+#include "psp_v15_0.h"
+
+#include "mp/mp_15_0_0_offset.h"
+#include "mp/mp_15_0_0_sh_mask.h"
+
+MODULE_FIRMWARE("amdgpu/psp_15_0_0_toc.bin");
+
+static int psp_v15_0_0_init_microcode(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ char ucode_prefix[30];
+ int err = 0;
+
+ amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix));
+
+ err = psp_init_toc_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int psp_v15_0_0_ring_stop(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ /* Write the ring destroy command*/
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING);
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ /* Wait for response flag (bit 31) */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_101),
+ 0x80000000, 0x80000000, false);
+ } else {
+ /* Write the ring destroy command*/
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_64,
+ GFX_CTRL_CMD_ID_DESTROY_RINGS);
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ /* Wait for response flag (bit 31) */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64),
+ 0x80000000, 0x80000000, false);
+ }
+
+ return ret;
+}
+
+static int psp_v15_0_0_ring_create(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ unsigned int psp_ring_reg = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ ret = psp_v15_0_0_ring_stop(psp, ring_type);
+ if (ret) {
+ DRM_ERROR("psp_v14_0_ring_stop_sriov failed!\n");
+ return ret;
+ }
+
+ /* Write low address of the ring to C2PMSG_102 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_102, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_103 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_103, psp_ring_reg);
+
+ /* Write the ring initialization command to C2PMSG_101 */
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_INIT_GPCOM_RING);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_101 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_101),
+ 0x80000000, 0x8000FFFF, false);
+
+ } else {
+ /* Wait for sOS ready for ring creation */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64),
+ 0x80000000, 0x80000000, false);
+ if (ret) {
+ DRM_ERROR("Failed to wait for trust OS ready for ring creation\n");
+ return ret;
+ }
+
+ /* Write low address of the ring to C2PMSG_69 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_69, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_70 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_70, psp_ring_reg);
+ /* Write size of ring to C2PMSG_71 */
+ psp_ring_reg = ring->ring_size;
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_71, psp_ring_reg);
+ /* Write the ring initialization command to C2PMSG_64 */
+ psp_ring_reg = ring_type;
+ psp_ring_reg = psp_ring_reg << 16;
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_64, psp_ring_reg);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_64 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64),
+ 0x80000000, 0x8000FFFF, false);
+ }
+
+ return ret;
+}
+
+static int psp_v15_0_0_ring_destroy(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ ret = psp_v15_0_0_ring_stop(psp, ring_type);
+ if (ret)
+ DRM_ERROR("Fail to stop psp ring\n");
+
+ amdgpu_bo_free_kernel(&adev->firmware.rbuf,
+ &ring->ring_mem_mc_addr,
+ (void **)&ring->ring_mem);
+
+ return ret;
+}
+
+static uint32_t psp_v15_0_0_ring_get_wptr(struct psp_context *psp)
+{
+ uint32_t data;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev))
+ data = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_102);
+ else
+ data = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_67);
+
+ return data;
+}
+
+static void psp_v15_0_0_ring_set_wptr(struct psp_context *psp, uint32_t value)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_102, value);
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_CONSUME_CMD);
+ } else
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_67, value);
+}
+
+static const struct psp_funcs psp_v15_0_0_funcs = {
+ .init_microcode = psp_v15_0_0_init_microcode,
+ .ring_create = psp_v15_0_0_ring_create,
+ .ring_stop = psp_v15_0_0_ring_stop,
+ .ring_destroy = psp_v15_0_0_ring_destroy,
+ .ring_get_wptr = psp_v15_0_0_ring_get_wptr,
+ .ring_set_wptr = psp_v15_0_0_ring_set_wptr,
+};
+
+void psp_v15_0_0_set_psp_funcs(struct psp_context *psp)
+{
+ psp->funcs = &psp_v15_0_0_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v15_0.h b/drivers/gpu/drm/amd/amdgpu/psp_v15_0.h
new file mode 100644
index 000000000000..ebd612103526
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v15_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __PSP_V15_0_0_H__
+#define __PSP_V15_0_0_H__
+
+#include "amdgpu_psp.h"
+
+void psp_v15_0_0_set_psp_funcs(struct psp_context *psp);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v15_0_8.c b/drivers/gpu/drm/amd/amdgpu/psp_v15_0_8.c
new file mode 100644
index 000000000000..5249f5bd2a10
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v15_0_8.c
@@ -0,0 +1,342 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <drm/drm_drv.h>
+#include <linux/vmalloc.h>
+#include "amdgpu.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_ucode.h"
+#include "soc15_common.h"
+#include "psp_v15_0_8.h"
+
+#include "mp/mp_15_0_8_offset.h"
+#include "mp/mp_15_0_8_sh_mask.h"
+
+MODULE_FIRMWARE("amdgpu/psp_15_0_8_toc.bin");
+
+static int psp_v15_0_8_init_microcode(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ char ucode_prefix[30];
+ int err = 0;
+
+ amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix));
+
+ err = psp_init_toc_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int psp_v15_0_8_ring_stop(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ /* Write the ring destroy command*/
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING);
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ /* Wait for response flag (bit 31) */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_101),
+ 0x80000000, 0x80000000, false);
+ } else {
+ /* Write the ring destroy command*/
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_64,
+ GFX_CTRL_CMD_ID_DESTROY_RINGS);
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ /* Wait for response flag (bit 31) */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64),
+ 0x80000000, 0x80000000, false);
+ }
+
+ return ret;
+}
+
+static int psp_v15_0_8_ring_create(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ unsigned int psp_ring_reg = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ ret = psp_v15_0_8_ring_stop(psp, ring_type);
+ if (ret) {
+ DRM_ERROR("psp_v14_0_ring_stop_sriov failed!\n");
+ return ret;
+ }
+
+ /* Write low address of the ring to C2PMSG_102 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_102, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_103 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_103, psp_ring_reg);
+
+ /* Write the ring initialization command to C2PMSG_101 */
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_INIT_GPCOM_RING);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_101 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_101),
+ 0x80000000, 0x8000FFFF, false);
+
+ } else {
+ /* Wait for sOS ready for ring creation */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64),
+ 0x80000000, 0x80000000, false);
+ if (ret) {
+ DRM_ERROR("Failed to wait for trust OS ready for ring creation\n");
+ return ret;
+ }
+
+ /* Write low address of the ring to C2PMSG_69 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_69, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_70 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_70, psp_ring_reg);
+ /* Write size of ring to C2PMSG_71 */
+ psp_ring_reg = ring->ring_size;
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_71, psp_ring_reg);
+ /* Write the ring initialization command to C2PMSG_64 */
+ psp_ring_reg = ring_type;
+ psp_ring_reg = psp_ring_reg << 16;
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_64, psp_ring_reg);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_64 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64),
+ 0x80000000, 0x8000FFFF, false);
+ }
+
+ return ret;
+}
+
+static int psp_v15_0_8_ring_destroy(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ ret = psp_v15_0_8_ring_stop(psp, ring_type);
+ if (ret)
+ DRM_ERROR("Fail to stop psp ring\n");
+
+ amdgpu_bo_free_kernel(&adev->firmware.rbuf,
+ &ring->ring_mem_mc_addr,
+ (void **)&ring->ring_mem);
+
+ return ret;
+}
+
+static uint32_t psp_v15_0_8_ring_get_wptr(struct psp_context *psp)
+{
+ uint32_t data;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev))
+ data = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_102);
+ else
+ data = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_67);
+
+ return data;
+}
+
+static void psp_v15_0_8_ring_set_wptr(struct psp_context *psp, uint32_t value)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_102, value);
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_CONSUME_CMD);
+ } else
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_67, value);
+}
+
+static int psp_v15_0_8_get_fw_type(struct amdgpu_firmware_info *ucode,
+ enum psp_gfx_fw_type *type)
+{
+ switch (ucode->ucode_id) {
+ case AMDGPU_UCODE_ID_CAP:
+ *type = GFX_FW_TYPE_CAP;
+ break;
+ case AMDGPU_UCODE_ID_SDMA0:
+ *type = GFX_FW_TYPE_SDMA0;
+ break;
+ case AMDGPU_UCODE_ID_SDMA1:
+ *type = GFX_FW_TYPE_SDMA1;
+ break;
+ case AMDGPU_UCODE_ID_SDMA2:
+ *type = GFX_FW_TYPE_SDMA2;
+ break;
+ case AMDGPU_UCODE_ID_SDMA3:
+ *type = GFX_FW_TYPE_SDMA3;
+ break;
+ case AMDGPU_UCODE_ID_SDMA4:
+ *type = GFX_FW_TYPE_SDMA4;
+ break;
+ case AMDGPU_UCODE_ID_SDMA5:
+ *type = GFX_FW_TYPE_SDMA5;
+ break;
+ case AMDGPU_UCODE_ID_SDMA6:
+ *type = GFX_FW_TYPE_SDMA6;
+ break;
+ case AMDGPU_UCODE_ID_SDMA7:
+ *type = GFX_FW_TYPE_SDMA7;
+ break;
+ case AMDGPU_UCODE_ID_CP_MES:
+ *type = GFX_FW_TYPE_RS64_MES;
+ break;
+ case AMDGPU_UCODE_ID_CP_MES_DATA:
+ *type = GFX_FW_TYPE_RS64_MES_STACK;
+ break;
+ case AMDGPU_UCODE_ID_CP_MES1:
+ *type = GFX_FW_TYPE_RS64_KIQ;
+ break;
+ case AMDGPU_UCODE_ID_CP_MES1_DATA:
+ *type = GFX_FW_TYPE_RS64_KIQ_STACK;
+ break;
+ case AMDGPU_UCODE_ID_RLC_P:
+ *type = GFX_FW_TYPE_RLC_P;
+ break;
+ case AMDGPU_UCODE_ID_RLC_V:
+ *type = GFX_FW_TYPE_RLC_V;
+ break;
+ case AMDGPU_UCODE_ID_RLC_G:
+ *type = GFX_FW_TYPE_RLC_G;
+ break;
+ case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL:
+ *type = GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_CNTL;
+ break;
+ case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM:
+ *type = GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM;
+ break;
+ case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM:
+ *type = GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM;
+ break;
+ case AMDGPU_UCODE_ID_RLC_IRAM:
+ *type = GFX_FW_TYPE_RLC_IRAM;
+ break;
+ case AMDGPU_UCODE_ID_RLC_DRAM:
+ *type = GFX_FW_TYPE_RLC_DRAM_BOOT;
+ break;
+ case AMDGPU_UCODE_ID_RLC_IRAM_1:
+ *type = GFX_FW_TYPE_RLX6_UCODE_CORE1;
+ break;
+ case AMDGPU_UCODE_ID_RLC_DRAM_1:
+ *type = GFX_FW_TYPE_RLX6_DRAM_BOOT_CORE1;
+ break;
+ case AMDGPU_UCODE_ID_SMC:
+ *type = GFX_FW_TYPE_SMU;
+ break;
+ case AMDGPU_UCODE_ID_PPTABLE:
+ *type = GFX_FW_TYPE_PPTABLE;
+ break;
+ case AMDGPU_UCODE_ID_VCN:
+ *type = GFX_FW_TYPE_VCN;
+ break;
+ case AMDGPU_UCODE_ID_VCN1:
+ *type = GFX_FW_TYPE_VCN1;
+ break;
+ case AMDGPU_UCODE_ID_VCN0_RAM:
+ *type = GFX_FW_TYPE_VCN0_RAM;
+ break;
+ case AMDGPU_UCODE_ID_VCN1_RAM:
+ *type = GFX_FW_TYPE_VCN1_RAM;
+ break;
+ case AMDGPU_UCODE_ID_SDMA_UCODE_TH0:
+ case AMDGPU_UCODE_ID_SDMA_RS64:
+ *type = GFX_FW_TYPE_SDMA0;
+ break;
+ case AMDGPU_UCODE_ID_SDMA_UCODE_TH1:
+ *type = GFX_FW_TYPE_SDMA_UCODE_TH1;
+ break;
+ case AMDGPU_UCODE_ID_IMU_I:
+ *type = GFX_FW_TYPE_IMU_I;
+ break;
+ case AMDGPU_UCODE_ID_IMU_D:
+ *type = GFX_FW_TYPE_IMU_D;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC:
+ *type = GFX_FW_TYPE_RS64_MEC;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK:
+ *type = GFX_FW_TYPE_RS64_MEC_P0_STACK;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK:
+ *type = GFX_FW_TYPE_RS64_MEC_P1_STACK;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK:
+ *type = GFX_FW_TYPE_RS64_MEC_P2_STACK;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK:
+ *type = GFX_FW_TYPE_RS64_MEC_P3_STACK;
+ break;
+ case AMDGPU_UCODE_ID_UMSCH_MM_UCODE:
+ *type = GFX_FW_TYPE_UMSCH_UCODE;
+ break;
+ case AMDGPU_UCODE_ID_UMSCH_MM_DATA:
+ *type = GFX_FW_TYPE_UMSCH_DATA;
+ break;
+ case AMDGPU_UCODE_ID_UMSCH_MM_CMD_BUFFER:
+ *type = GFX_FW_TYPE_UMSCH_CMD_BUFFER;
+ break;
+ case AMDGPU_UCODE_ID_P2S_TABLE:
+ *type = GFX_FW_TYPE_P2S_TABLE;
+ break;
+ case AMDGPU_UCODE_ID_MAXIMUM:
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const struct psp_funcs psp_v15_0_8_funcs = {
+ .init_microcode = psp_v15_0_8_init_microcode,
+ .ring_create = psp_v15_0_8_ring_create,
+ .ring_stop = psp_v15_0_8_ring_stop,
+ .ring_destroy = psp_v15_0_8_ring_destroy,
+ .ring_get_wptr = psp_v15_0_8_ring_get_wptr,
+ .ring_set_wptr = psp_v15_0_8_ring_set_wptr,
+ .get_fw_type = psp_v15_0_8_get_fw_type,
+};
+
+void psp_v15_0_8_set_psp_funcs(struct psp_context *psp)
+{
+ psp->funcs = &psp_v15_0_8_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v15_0_8.h b/drivers/gpu/drm/amd/amdgpu/psp_v15_0_8.h
new file mode 100644
index 000000000000..6bb1bb517007
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v15_0_8.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __PSP_V15_0_8_H__
+#define __PSP_V15_0_8_H__
+
+#include "amdgpu_psp.h"
+
+void psp_v15_0_8_set_psp_funcs(struct psp_context *psp);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index 833830bc3e2e..f5030efc6c80 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -315,7 +315,7 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp)
ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, 0);
if (ret) {
- DRM_INFO("psp is not working correctly before mode1 reset!\n");
+ drm_info(adev_to_drm(adev), "psp is not working correctly before mode1 reset!\n");
return -EINVAL;
}
@@ -329,11 +329,11 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp)
ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, 0);
if (ret) {
- DRM_INFO("psp mode 1 reset failed!\n");
+ drm_info(adev_to_drm(adev), "psp mode 1 reset failed!\n");
return -EINVAL;
}
- DRM_INFO("psp mode1 reset succeed \n");
+ drm_info(adev_to_drm(adev), "psp mode1 reset succeed\n");
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index 8ddc4df06a1f..7811cbb1f7ba 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -1031,7 +1031,7 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring)
r = amdgpu_ring_alloc(ring, 20);
if (r) {
- DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
+ drm_err(adev_to_drm(adev), "dma failed to lock ring %d (%d).\n", ring->idx, r);
amdgpu_device_wb_free(adev, index);
return r;
}
@@ -1096,7 +1096,7 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = amdgpu_ib_get(adev, NULL, 256,
AMDGPU_IB_POOL_DIRECT, &ib);
if (r) {
- DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ drm_err(adev_to_drm(adev), "failed to get ib (%ld).\n", r);
goto err0;
}
@@ -1117,11 +1117,11 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = dma_fence_wait_timeout(f, false, timeout);
if (r == 0) {
- DRM_ERROR("amdgpu: IB test timed out\n");
+ drm_err(adev_to_drm(adev), "IB test timed out\n");
r = -ETIMEDOUT;
goto err1;
} else if (r < 0) {
- DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+ drm_err(adev_to_drm(adev), "fence wait failed (%ld).\n", r);
goto err1;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index 51101b0aa2fa..dbe5b8f109f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -931,7 +931,7 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring)
r = amdgpu_ring_alloc(ring, 20);
if (r) {
- DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
+ drm_err(adev_to_drm(adev), "dma failed to lock ring %d (%d).\n", ring->idx, r);
amdgpu_device_wb_free(adev, index);
return r;
}
@@ -995,7 +995,7 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
if (r) {
- DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ drm_err(adev_to_drm(adev), "failed to get ib (%ld).\n", r);
goto err0;
}
@@ -1016,11 +1016,11 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = dma_fence_wait_timeout(f, false, timeout);
if (r == 0) {
- DRM_ERROR("amdgpu: IB test timed out\n");
+ drm_err(adev_to_drm(adev), "IB test timed out\n");
r = -ETIMEDOUT;
goto err1;
} else if (r < 0) {
- DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+ drm_err(adev_to_drm(adev), "fence wait failed (%ld).\n", r);
goto err1;
}
@@ -1325,8 +1325,8 @@ static int sdma_v5_2_sw_init(struct amdgpu_ip_block *ip_block)
ring->use_doorbell = true;
ring->me = i;
- DRM_INFO("use_doorbell being set to: [%s]\n",
- ring->use_doorbell?"true":"false");
+ drm_info(adev_to_drm(adev), "use_doorbell being set to: [%s]\n",
+ ring->use_doorbell?"true":"false");
ring->doorbell_index =
(adev->doorbell_index.sdma_engine[i] << 1); //get DWORD offset
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
index 217040044987..eec659194718 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
@@ -54,6 +54,7 @@ MODULE_FIRMWARE("amdgpu/sdma_6_1_0.bin");
MODULE_FIRMWARE("amdgpu/sdma_6_1_1.bin");
MODULE_FIRMWARE("amdgpu/sdma_6_1_2.bin");
MODULE_FIRMWARE("amdgpu/sdma_6_1_3.bin");
+MODULE_FIRMWARE("amdgpu/sdma_6_1_4.bin");
#define SDMA1_REG_OFFSET 0x600
#define SDMA0_HYP_DEC_REG_START 0x5880
@@ -937,7 +938,7 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring)
r = amdgpu_ring_alloc(ring, 5);
if (r) {
- DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
+ drm_err(adev_to_drm(adev), "dma failed to lock ring %d (%d).\n", ring->idx, r);
amdgpu_device_wb_free(adev, index);
return r;
}
@@ -1001,7 +1002,7 @@ static int sdma_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
if (r) {
- DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ drm_err(adev_to_drm(adev), "failed to get ib (%ld).\n", r);
goto err0;
}
@@ -1022,11 +1023,11 @@ static int sdma_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = dma_fence_wait_timeout(f, false, timeout);
if (r == 0) {
- DRM_ERROR("amdgpu: IB test timed out\n");
+ drm_err(adev_to_drm(adev), "IB test timed out\n");
r = -ETIMEDOUT;
goto err1;
} else if (r < 0) {
- DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+ drm_err(adev_to_drm(adev), "fence wait failed (%ld).\n", r);
goto err1;
}
@@ -1268,6 +1269,17 @@ static void sdma_v6_0_set_ras_funcs(struct amdgpu_device *adev)
}
}
+/* all sizes are in bytes */
+#define SDMA6_CSA_SIZE 32
+#define SDMA6_CSA_ALIGNMENT 4
+
+static void sdma_v6_0_get_csa_info(struct amdgpu_device *adev,
+ struct amdgpu_sdma_csa_info *csa_info)
+{
+ csa_info->size = SDMA6_CSA_SIZE;
+ csa_info->alignment = SDMA6_CSA_ALIGNMENT;
+}
+
static int sdma_v6_0_early_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
@@ -1300,6 +1312,7 @@ static int sdma_v6_0_early_init(struct amdgpu_ip_block *ip_block)
sdma_v6_0_set_irq_funcs(adev);
sdma_v6_0_set_mqd_funcs(adev);
sdma_v6_0_set_ras_funcs(adev);
+ adev->sdma.get_csa_info = &sdma_v6_0_get_csa_info;
return 0;
}
@@ -1586,7 +1599,7 @@ static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring,
amdgpu_ring_reset_helper_begin(ring, timedout_fence);
- r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true);
+ r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true, 0);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
index 2b81344dcd66..8d16ef257bcb 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
@@ -816,7 +816,7 @@ static int sdma_v7_0_reset_queue(struct amdgpu_ring *ring,
amdgpu_ring_reset_helper_begin(ring, timedout_fence);
- r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true);
+ r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true, 0);
if (r)
return r;
@@ -954,7 +954,7 @@ static int sdma_v7_0_ring_test_ring(struct amdgpu_ring *ring)
r = amdgpu_ring_alloc(ring, 5);
if (r) {
- DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
+ drm_err(adev_to_drm(adev), "dma failed to lock ring %d (%d).\n", ring->idx, r);
amdgpu_device_wb_free(adev, index);
return r;
}
@@ -1018,7 +1018,7 @@ static int sdma_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
if (r) {
- DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ drm_err(adev_to_drm(adev), "failed to get ib (%ld).\n", r);
goto err0;
}
@@ -1039,11 +1039,11 @@ static int sdma_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = dma_fence_wait_timeout(f, false, timeout);
if (r == 0) {
- DRM_ERROR("amdgpu: IB test timed out\n");
+ drm_err(adev_to_drm(adev), "IB test timed out\n");
r = -ETIMEDOUT;
goto err1;
} else if (r < 0) {
- DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+ drm_err(adev_to_drm(adev), "fence wait failed (%ld).\n", r);
goto err1;
}
@@ -1253,6 +1253,17 @@ static void sdma_v7_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
}
+/* all sizes are in bytes */
+#define SDMA7_CSA_SIZE 32
+#define SDMA7_CSA_ALIGNMENT 4
+
+static void sdma_v7_0_get_csa_info(struct amdgpu_device *adev,
+ struct amdgpu_sdma_csa_info *csa_info)
+{
+ csa_info->size = SDMA7_CSA_SIZE;
+ csa_info->alignment = SDMA7_CSA_ALIGNMENT;
+}
+
static int sdma_v7_0_early_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
@@ -1286,6 +1297,7 @@ static int sdma_v7_0_early_init(struct amdgpu_ip_block *ip_block)
sdma_v7_0_set_vm_pte_funcs(adev);
sdma_v7_0_set_irq_funcs(adev);
sdma_v7_0_set_mqd_funcs(adev);
+ adev->sdma.get_csa_info = &sdma_v7_0_get_csa_info;
return 0;
}
@@ -1492,7 +1504,7 @@ static int sdma_v7_0_ring_preempt_ib(struct amdgpu_ring *ring)
ring->trail_seq += 1;
r = amdgpu_ring_alloc(ring, 10);
if (r) {
- DRM_ERROR("ring %d failed to be allocated \n", ring->idx);
+ DRM_ERROR("ring %d failed to be allocated\n", ring->idx);
return r;
}
sdma_v7_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c
new file mode 100644
index 000000000000..5bc45c3e00d1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c
@@ -0,0 +1,1817 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_trace.h"
+
+#include "gc/gc_12_1_0_offset.h"
+#include "gc/gc_12_1_0_sh_mask.h"
+#include "ivsrcid/gfx/irqsrcs_gfx_12_1_0.h"
+
+#include "soc15_common.h"
+#include "soc15.h"
+#include "sdma_v7_1_0_pkt_open.h"
+#include "nbio_v4_3.h"
+#include "sdma_common.h"
+#include "sdma_v7_1.h"
+#include "v12_structs.h"
+#include "mes_userqueue.h"
+#include "soc_v1_0.h"
+
+MODULE_FIRMWARE("amdgpu/sdma_7_1_0.bin");
+
+#define SDMA1_REG_OFFSET 0x600
+#define SDMA0_SDMA_IDX_0_END 0x450
+#define SDMA1_HYP_DEC_REG_OFFSET 0x30
+
+static const struct amdgpu_hwip_reg_entry sdma_reg_list_7_1[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS1_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS2_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS3_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS4_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS5_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS6_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UCODE_REV),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_RB_RPTR_FETCH_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_RB_RPTR_FETCH),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_RD_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_WR_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_RD_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_RD_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_WR_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_WR_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_SUB_REMAIN),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE_STATUS0),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_SUB_REMAIN),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_SUB_REMAIN),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_INT_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_VM_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_CHICKEN_BITS),
+};
+
+static void sdma_v7_1_set_ring_funcs(struct amdgpu_device *adev);
+static void sdma_v7_1_set_buffer_funcs(struct amdgpu_device *adev);
+static void sdma_v7_1_set_vm_pte_funcs(struct amdgpu_device *adev);
+static void sdma_v7_1_set_irq_funcs(struct amdgpu_device *adev);
+static int sdma_v7_1_inst_start(struct amdgpu_device *adev,
+ uint32_t inst_mask);
+
+static u32 sdma_v7_1_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset)
+{
+ u32 base;
+ u32 dev_inst = GET_INST(SDMA0, instance);
+ int xcc_id = adev->sdma.instance[instance].xcc_id;
+ int xcc_inst = dev_inst % adev->sdma.num_inst_per_xcc;
+
+ if (internal_offset >= SDMA0_SDMA_IDX_0_END) {
+ base = adev->reg_offset[GC_HWIP][xcc_id][1];
+ if (xcc_inst != 0)
+ internal_offset += SDMA1_HYP_DEC_REG_OFFSET * xcc_inst;
+ } else {
+ base = adev->reg_offset[GC_HWIP][xcc_id][0];
+ if (xcc_inst != 0)
+ internal_offset += SDMA1_REG_OFFSET * xcc_inst;
+ }
+
+ return base + internal_offset;
+}
+
+static unsigned sdma_v7_1_ring_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
+{
+ unsigned ret;
+
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COND_EXE));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, 1);
+ /* this is the offset we need patch later */
+ ret = ring->wptr & ring->buf_mask;
+ /* insert dummy here and patch it later */
+ amdgpu_ring_write(ring, 0);
+
+ return ret;
+}
+
+/**
+ * sdma_v7_1_ring_get_rptr - get the current read pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current rptr from the hardware.
+ */
+static uint64_t sdma_v7_1_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ u64 *rptr;
+
+ /* XXX check if swapping is necessary on BE */
+ rptr = (u64 *)ring->rptr_cpu_addr;
+
+ DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
+ return ((*rptr) >> 2);
+}
+
+/**
+ * sdma_v7_1_ring_get_wptr - get the current write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current wptr from the hardware.
+ */
+static uint64_t sdma_v7_1_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ u64 wptr = 0;
+
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr));
+ DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
+ }
+
+ return wptr >> 2;
+}
+
+/**
+ * sdma_v7_1_ring_set_wptr - commit the write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Write the wptr back to the hardware.
+ */
+static void sdma_v7_1_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ DRM_DEBUG("Setting write pointer\n");
+
+ if (ring->use_doorbell) {
+ DRM_DEBUG("Using doorbell -- "
+ "wptr_offs == 0x%08x "
+ "lower_32_bits(ring->wptr) << 2 == 0x%08x "
+ "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ ring->wptr_offs,
+ lower_32_bits(ring->wptr << 2),
+ upper_32_bits(ring->wptr << 2));
+ /* XXX check if swapping is necessary on BE */
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr << 2);
+ DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
+ ring->doorbell_index, ring->wptr << 2);
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+ } else {
+ DRM_DEBUG("Not using doorbell -- "
+ "regSDMA%i_GFX_RB_WPTR == 0x%08x "
+ "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
+ ring->me,
+ lower_32_bits(ring->wptr << 2),
+ ring->me,
+ upper_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev,
+ ring->me,
+ regSDMA0_SDMA_QUEUE0_RB_WPTR),
+ lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev,
+ ring->me,
+ regSDMA0_SDMA_QUEUE0_RB_WPTR_HI),
+ upper_32_bits(ring->wptr << 2));
+ }
+}
+
+static void sdma_v7_1_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+ int i;
+
+ for (i = 0; i < count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ amdgpu_ring_write(ring, ring->funcs->nop |
+ SDMA_PKT_NOP_HEADER_COUNT(count - 1));
+ else
+ amdgpu_ring_write(ring, ring->funcs->nop);
+}
+
+/**
+ * sdma_v7_1_ring_emit_ib - Schedule an IB on the DMA engine
+ *
+ * @ring: amdgpu ring pointer
+ * @job: job to retrieve vmid from
+ * @ib: IB object to schedule
+ * @flags: unused
+ *
+ * Schedule an IB in the DMA ring.
+ */
+static void sdma_v7_1_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
+
+ /* An IB packet must end on a 8 DW boundary--the next dword
+ * must be on a 8-dword boundary. Our IB packet below is 6
+ * dwords long, thus add x number of NOPs, such that, in
+ * modular arithmetic,
+ * wptr + 6 + x = 8k, k >= 0, which in C is,
+ * (wptr + 6 + x) % 8 = 0.
+ * The expression below, is a solution of x.
+ */
+ sdma_v7_1_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
+
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_INDIRECT) |
+ SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
+ /* base must be 32 byte aligned */
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, ib->length_dw);
+ amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr));
+ amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr));
+}
+
+/**
+ * sdma_v7_1_ring_emit_mem_sync - flush the IB by graphics cache rinse
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * flush the IB by graphics cache rinse.
+ */
+static void sdma_v7_1_ring_emit_mem_sync(struct amdgpu_ring *ring)
+{
+ uint32_t gcr_cntl = SDMA_GCR_GL2_INV | SDMA_GCR_GL2_WB | SDMA_GCR_GLM_INV |
+ SDMA_GCR_GL1_INV | SDMA_GCR_GLV_INV | SDMA_GCR_GLK_INV |
+ SDMA_GCR_GLI_INV(1);
+
+ /* flush entire cache L0/L1/L2, this can be optimized by performance requirement */
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_GCR_REQ));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(0));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_56_32(0));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_0(gcr_cntl) |
+ SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_15_7(0));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_16(0));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD5_LIMIT_VA_56_48(0) |
+ SDMA_PKT_GCR_REQ_PAYLOAD5_VMID(0));
+}
+
+
+/**
+ * sdma_v7_1_ring_emit_fence - emit a fence on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ * @addr: address
+ * @seq: fence seq number
+ * @flags: fence flags
+ *
+ * Add a DMA fence packet to the ring to write
+ * the fence seq number and DMA trap packet to generate
+ * an interrupt if needed.
+ */
+static void sdma_v7_1_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ /* write the fence */
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) |
+ SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* Ucached(UC) */
+ /* zero in first two bits */
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+
+ /* optionally write high bits as well */
+ if (write64bit) {
+ addr += 4;
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) |
+ SDMA_PKT_FENCE_HEADER_MTYPE(0x3));
+ /* zero in first two bits */
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+ }
+
+ if (flags & AMDGPU_FENCE_FLAG_INT) {
+ /* generate an interrupt */
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_TRAP));
+ amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
+ }
+}
+
+/**
+ * sdma_v7_1_inst_gfx_stop - stop the gfx async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_mask: mask of dma engine instances to be disabled
+ *
+ * Stop the gfx async dma ring buffers.
+ */
+static void sdma_v7_1_inst_gfx_stop(struct amdgpu_device *adev,
+ uint32_t inst_mask)
+{
+ u32 rb_cntl, ib_cntl;
+ int i;
+
+ for_each_inst(i, inst_mask) {
+ rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_ENABLE, 0);
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL), rb_cntl);
+ ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_SDMA_QUEUE0_IB_CNTL, IB_ENABLE, 0);
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_IB_CNTL), ib_cntl);
+ }
+}
+
+/**
+ * sdma_v7_1_inst_rlc_stop - stop the compute async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_mask: mask of dma engine instances to be disabled
+ *
+ * Stop the compute async dma queues.
+ */
+static void sdma_v7_1_inst_rlc_stop(struct amdgpu_device *adev,
+ uint32_t inst_mask)
+{
+ /* XXX todo */
+}
+
+/**
+ * sdma_v7_1_inst_ctx_switch_enable - stop the async dma engines context switch
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs context switch.
+ * @inst_mask: mask of dma engine instances to be enabled
+ *
+ * Halt or unhalt the async dma engines context switch.
+ */
+static void sdma_v7_1_inst_ctx_switch_enable(struct amdgpu_device *adev,
+ bool enable, uint32_t inst_mask)
+{
+ int i;
+
+ for_each_inst(i, inst_mask) {
+ WREG32_SOC15_IP(GC,
+ sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_TIMEOUT), 0x80);
+ }
+}
+
+/**
+ * sdma_v7_1_inst_enable - stop the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs.
+ * @inst_mask: mask of dma engine instances to be enabled
+ *
+ * Halt or unhalt the async dma engines.
+ */
+static void sdma_v7_1_inst_enable(struct amdgpu_device *adev,
+ bool enable, uint32_t inst_mask)
+{
+ u32 mcu_cntl;
+ int i;
+
+ if (!enable) {
+ sdma_v7_1_inst_gfx_stop(adev, inst_mask);
+ sdma_v7_1_inst_rlc_stop(adev, inst_mask);
+ }
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ for_each_inst(i, inst_mask) {
+ mcu_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL));
+ mcu_cntl = REG_SET_FIELD(mcu_cntl, SDMA0_SDMA_MCU_CNTL, HALT, enable ? 0 : 1);
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL), mcu_cntl);
+ }
+}
+
+/**
+ * sdma_v7_1_gfx_resume_instance - start/restart a certain sdma engine
+ *
+ * @adev: amdgpu_device pointer
+ * @i: instance
+ * @restore: used to restore wptr when restart
+ *
+ * Set up the gfx DMA ring buffers and enable them. On restart, we will restore wptr and rptr.
+ * Return 0 for success.
+ */
+static int sdma_v7_1_gfx_resume_instance(struct amdgpu_device *adev, int i, bool restore)
+{
+ struct amdgpu_ring *ring;
+ u32 rb_cntl, ib_cntl;
+ u32 rb_bufsz;
+ u32 doorbell;
+ u32 doorbell_offset;
+ u32 temp;
+ u64 wptr_gpu_addr;
+ int r;
+
+ ring = &adev->sdma.instance[i].ring;
+
+ /* Set ring buffer size in dwords */
+ rb_bufsz = order_base_2(ring->ring_size / 4);
+ rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz);
+#ifdef __BIG_ENDIAN
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL,
+ RPTR_WRITEBACK_SWAP_ENABLE, 1);
+#endif
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_PRIV, 1);
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL), rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ if (restore) {
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR_HI), upper_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
+ } else {
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR), 0);
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR_HI), 0);
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR), 0);
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_HI), 0);
+ }
+ /* setup the wptr shadow polling */
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_POLL_ADDR_LO),
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_POLL_ADDR_HI),
+ upper_32_bits(wptr_gpu_addr));
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR_ADDR_HI),
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR_ADDR_LO),
+ lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
+ if (amdgpu_sriov_vf(adev))
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 1);
+ else
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, MCU_WPTR_POLL_ENABLE, 1);
+
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_BASE), ring->gpu_addr >> 8);
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40);
+
+ if (!restore)
+ ring->wptr = 0;
+
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_MINOR_PTR_UPDATE), 1);
+
+ if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2);
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
+ }
+
+ doorbell = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_DOORBELL));
+ doorbell_offset = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_DOORBELL_OFFSET));
+
+ if (ring->use_doorbell) {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_SDMA_QUEUE0_DOORBELL, ENABLE, 1);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_SDMA_QUEUE0_DOORBELL_OFFSET,
+ OFFSET, ring->doorbell_index);
+ } else {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_SDMA_QUEUE0_DOORBELL, ENABLE, 0);
+ }
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_DOORBELL), doorbell);
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_DOORBELL_OFFSET), doorbell_offset);
+
+ if (i == 0)
+ adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
+ ring->doorbell_index,
+ adev->doorbell_index.sdma_doorbell_range * adev->sdma.num_instances);
+
+ if (amdgpu_sriov_vf(adev))
+ sdma_v7_1_ring_set_wptr(ring);
+
+ /* set minor_ptr_update to 0 after wptr programed */
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_MINOR_PTR_UPDATE), 0);
+
+ /* Set up sdma hang watchdog */
+ temp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_WATCHDOG_CNTL));
+ /* 100ms per unit */
+ temp = REG_SET_FIELD(temp, SDMA0_SDMA_WATCHDOG_CNTL, QUEUE_HANG_COUNT,
+ max(adev->usec_timeout/100000, 1));
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_WATCHDOG_CNTL), temp);
+
+ /* Set up RESP_MODE to non-copy addresses */
+ temp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_SDMA_UTCL1_CNTL, RESP_MODE, 3);
+ temp = REG_SET_FIELD(temp, SDMA0_SDMA_UTCL1_CNTL, REDO_DELAY, 9);
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_CNTL), temp);
+
+ /* program default cache read and write policy */
+ temp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_PAGE));
+ /* clean read policy and write policy bits */
+ temp &= 0xFF0FFF;
+ temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
+ (CACHE_WRITE_POLICY_L2__DEFAULT << 14));
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_PAGE), temp);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* unhalt engine */
+ temp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_SDMA_MCU_CNTL, HALT, 0);
+ temp = REG_SET_FIELD(temp, SDMA0_SDMA_MCU_CNTL, RESET, 0);
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL), temp);
+ }
+
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_ENABLE, 1);
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL), rb_cntl);
+
+ ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_SDMA_QUEUE0_IB_CNTL, IB_ENABLE, 1);
+#ifdef __BIG_ENDIAN
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_SDMA_QUEUE0_IB_CNTL, IB_SWAP_ENABLE, 1);
+#endif
+ /* enable DMA IBs */
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_IB_CNTL), ib_cntl);
+ ring->sched.ready = true;
+
+ if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
+ sdma_v7_1_inst_ctx_switch_enable(adev, true, i);
+ sdma_v7_1_inst_enable(adev, true, i);
+ }
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ ring->sched.ready = false;
+
+ return r;
+}
+
+/**
+ * sdma_v7_1_inst_gfx_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_mask: mask of dma engine instances to be enabled
+ *
+ * Set up the gfx DMA ring buffers and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v7_1_inst_gfx_resume(struct amdgpu_device *adev,
+ uint32_t inst_mask)
+{
+ int i, r;
+
+ for_each_inst(i, inst_mask) {
+ r = sdma_v7_1_gfx_resume_instance(adev, i, false);
+ if (r)
+ return r;
+ }
+
+ return 0;
+
+}
+
+/**
+ * sdma_v7_1_inst_rlc_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_mask: mask of dma engine instances to be enabled
+ *
+ * Set up the compute DMA queues and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v7_1_inst_rlc_resume(struct amdgpu_device *adev,
+ uint32_t inst_mask)
+{
+ return 0;
+}
+
+static void sdma_v7_1_inst_free_ucode_buffer(struct amdgpu_device *adev,
+ uint32_t inst_mask)
+{
+ int i;
+
+ for_each_inst(i, inst_mask) {
+ amdgpu_bo_free_kernel(&adev->sdma.instance[i].sdma_fw_obj,
+ &adev->sdma.instance[i].sdma_fw_gpu_addr,
+ (void **)&adev->sdma.instance[i].sdma_fw_ptr);
+ }
+}
+
+/**
+ * sdma_v7_1_inst_load_microcode - load the sDMA ME ucode
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_mask: mask of dma engine instances to be enabled
+ *
+ * Loads the sDMA0/1 ucode.
+ * Returns 0 for success, -EINVAL if the ucode is not available.
+ */
+static int sdma_v7_1_inst_load_microcode(struct amdgpu_device *adev,
+ uint32_t inst_mask)
+{
+ const struct sdma_firmware_header_v3_0 *hdr;
+ const __le32 *fw_data;
+ u32 fw_size;
+ uint32_t tmp, sdma_status, ic_op_cntl;
+ int i, r, j;
+
+ /* halt the MEs */
+ sdma_v7_1_inst_enable(adev, false, inst_mask);
+
+ if (!adev->sdma.instance[0].fw)
+ return -EINVAL;
+
+ hdr = (const struct sdma_firmware_header_v3_0 *)
+ adev->sdma.instance[0].fw->data;
+ amdgpu_ucode_print_sdma_hdr(&hdr->header);
+
+ fw_data = (const __le32 *)(adev->sdma.instance[0].fw->data +
+ le32_to_cpu(hdr->ucode_offset_bytes));
+ fw_size = le32_to_cpu(hdr->ucode_size_bytes);
+
+ for_each_inst(i, inst_mask) {
+ r = amdgpu_bo_create_reserved(adev, fw_size,
+ PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->sdma.instance[i].sdma_fw_obj,
+ &adev->sdma.instance[i].sdma_fw_gpu_addr,
+ (void **)&adev->sdma.instance[i].sdma_fw_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create sdma ucode bo\n", r);
+ return r;
+ }
+
+ memcpy(adev->sdma.instance[i].sdma_fw_ptr, fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->sdma.instance[i].sdma_fw_obj);
+ amdgpu_bo_unreserve(adev->sdma.instance[i].sdma_fw_obj);
+
+ tmp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_CNTL));
+ tmp = REG_SET_FIELD(tmp, SDMA0_SDMA_IC_CNTL, GPA, 0);
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_CNTL), tmp);
+
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_BASE_LO),
+ lower_32_bits(adev->sdma.instance[i].sdma_fw_gpu_addr));
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_BASE_HI),
+ upper_32_bits(adev->sdma.instance[i].sdma_fw_gpu_addr));
+
+ tmp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_OP_CNTL));
+ tmp = REG_SET_FIELD(tmp, SDMA0_SDMA_IC_OP_CNTL, PRIME_ICACHE, 1);
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_OP_CNTL), tmp);
+
+ /* Wait for sdma ucode init complete */
+ for (j = 0; j < adev->usec_timeout; j++) {
+ ic_op_cntl = RREG32_SOC15_IP(GC,
+ sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_OP_CNTL));
+ sdma_status = RREG32_SOC15_IP(GC,
+ sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_STATUS_REG));
+ if ((REG_GET_FIELD(ic_op_cntl, SDMA0_SDMA_IC_OP_CNTL, ICACHE_PRIMED) == 1) &&
+ (REG_GET_FIELD(sdma_status, SDMA0_SDMA_STATUS_REG, UCODE_INIT_DONE) == 1))
+ break;
+ udelay(1);
+ }
+
+ if (j >= adev->usec_timeout) {
+ dev_err(adev->dev, "failed to init sdma ucode\n");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int sdma_v7_1_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t inst_mask;
+ u32 tmp;
+ int i;
+
+ inst_mask = GENMASK(NUM_XCC(adev->sdma.sdma_mask) - 1, 0);
+ sdma_v7_1_inst_gfx_stop(adev, inst_mask);
+
+ for_each_inst(i, inst_mask) {
+ //tmp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_FREEZE));
+ //tmp |= SDMA0_SDMA_FREEZE__FREEZE_MASK;
+ //WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_FREEZE), tmp);
+ tmp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL));
+ tmp |= SDMA0_SDMA_MCU_CNTL__HALT_MASK;
+ tmp |= SDMA0_SDMA_MCU_CNTL__RESET_MASK;
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL), tmp);
+
+ WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_PREEMPT), 0);
+
+ udelay(100);
+
+ tmp = GRBM_SOFT_RESET__SOFT_RESET_SDMA0_MASK << i;
+ WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
+
+ udelay(100);
+
+ WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, 0);
+ tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
+
+ udelay(100);
+ }
+
+ return sdma_v7_1_inst_start(adev, inst_mask);
+}
+
+static bool sdma_v7_1_check_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, r;
+ long tmo = msecs_to_jiffies(1000);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+ r = amdgpu_ring_test_ib(ring, tmo);
+ if (r)
+ return true;
+ }
+
+ return false;
+}
+
+static int sdma_v7_1_reset_queue(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int r;
+
+ if (ring->me >= adev->sdma.num_instances) {
+ dev_err(adev->dev, "sdma instance not found\n");
+ return -EINVAL;
+ }
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true, 0);
+ if (r)
+ return r;
+
+ r = sdma_v7_1_gfx_resume_instance(adev, ring->me, true);
+ if (r)
+ return r;
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+/**
+ * sdma_v7_1_inst_start - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_mask: mask of dma engine instances to be enabled
+ *
+ * Set up the DMA engines and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v7_1_inst_start(struct amdgpu_device *adev,
+ uint32_t inst_mask)
+{
+ int r = 0;
+
+ if (amdgpu_sriov_vf(adev)) {
+ sdma_v7_1_inst_ctx_switch_enable(adev, false, inst_mask);
+ sdma_v7_1_inst_enable(adev, false, inst_mask);
+
+ /* set RB registers */
+ r = sdma_v7_1_inst_gfx_resume(adev, inst_mask);
+ return r;
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ r = sdma_v7_1_inst_load_microcode(adev, inst_mask);
+ if (r) {
+ sdma_v7_1_inst_free_ucode_buffer(adev, inst_mask);
+ return r;
+ }
+
+ if (amdgpu_emu_mode == 1)
+ msleep(1000);
+ }
+
+ /* unhalt the MEs */
+ sdma_v7_1_inst_enable(adev, true, inst_mask);
+ /* enable sdma ring preemption */
+ sdma_v7_1_inst_ctx_switch_enable(adev, true, inst_mask);
+
+ /* start the gfx rings and rlc compute queues */
+ r = sdma_v7_1_inst_gfx_resume(adev, inst_mask);
+ if (r)
+ return r;
+ r = sdma_v7_1_inst_rlc_resume(adev, inst_mask);
+
+ return r;
+}
+
+static int sdma_v7_1_mqd_init(struct amdgpu_device *adev, void *mqd,
+ struct amdgpu_mqd_prop *prop)
+{
+ struct v12_sdma_mqd *m = mqd;
+ uint64_t wb_gpu_addr;
+
+ m->sdmax_rlcx_rb_cntl =
+ order_base_2(prop->queue_size / 4) << SDMA0_SDMA_QUEUE0_RB_CNTL__RB_SIZE__SHIFT |
+ 1 << SDMA0_SDMA_QUEUE0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
+ 4 << SDMA0_SDMA_QUEUE0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT |
+ 1 << SDMA0_SDMA_QUEUE0_RB_CNTL__MCU_WPTR_POLL_ENABLE__SHIFT;
+
+ m->sdmax_rlcx_rb_base = lower_32_bits(prop->hqd_base_gpu_addr >> 8);
+ m->sdmax_rlcx_rb_base_hi = upper_32_bits(prop->hqd_base_gpu_addr >> 8);
+
+ wb_gpu_addr = prop->wptr_gpu_addr;
+ m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits(wb_gpu_addr);
+ m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr);
+
+ wb_gpu_addr = prop->rptr_gpu_addr;
+ m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits(wb_gpu_addr);
+ m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits(wb_gpu_addr);
+
+ m->sdmax_rlcx_ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, 0,
+ regSDMA0_SDMA_QUEUE0_IB_CNTL));
+
+ m->sdmax_rlcx_doorbell_offset =
+ prop->doorbell_index << SDMA0_SDMA_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+ m->sdmax_rlcx_doorbell = REG_SET_FIELD(0, SDMA0_SDMA_QUEUE0_DOORBELL, ENABLE, 1);
+
+ m->sdmax_rlcx_doorbell_log = 0;
+ m->sdmax_rlcx_rb_aql_cntl = 0x4000; //regSDMA0_SDMA_QUEUE0_RB_AQL_CNTL_DEFAULT;
+ m->sdmax_rlcx_dummy_reg = 0xf; //regSDMA0_SDMA_QUEUE0_DUMMY_REG_DEFAULT;
+
+ m->sdmax_rlcx_csa_addr_lo = lower_32_bits(prop->csa_addr);
+ m->sdmax_rlcx_csa_addr_hi = upper_32_bits(prop->csa_addr);
+
+ return 0;
+}
+
+static void sdma_v7_1_set_mqd_funcs(struct amdgpu_device *adev)
+{
+ adev->mqds[AMDGPU_HW_IP_DMA].mqd_size = sizeof(struct v12_sdma_mqd);
+ adev->mqds[AMDGPU_HW_IP_DMA].init_mqd = sdma_v7_1_mqd_init;
+}
+
+/**
+ * sdma_v7_1_ring_test_ring - simple async dma engine test
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ *
+ * Test the DMA engine by writing using it to write an
+ * value to memory.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v7_1_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned i;
+ unsigned index;
+ int r;
+ u32 tmp;
+ u64 gpu_addr;
+
+ tmp = 0xCAFEDEAD;
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
+ return r;
+ }
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+
+ r = amdgpu_ring_alloc(ring, 5);
+ if (r) {
+ DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
+ amdgpu_device_wb_free(adev, index);
+ return r;
+ }
+
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
+ amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0));
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+ if (tmp == 0xDEADBEEF)
+ break;
+ if (amdgpu_emu_mode == 1)
+ msleep(1);
+ else
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+ amdgpu_device_wb_free(adev, index);
+
+ return r;
+}
+
+/**
+ * sdma_v7_1_ring_test_ib - test an IB on the DMA engine
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ *
+ * Test a simple IB in the DMA ring.
+ * Returns 0 on success, error on failure.
+ */
+static int sdma_v7_1_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+ unsigned index;
+ long r;
+ u32 tmp = 0;
+ u64 gpu_addr;
+
+ tmp = 0xCAFEDEAD;
+ memset(&ib, 0, sizeof(ib));
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
+ return r;
+ }
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+
+ r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ goto err0;
+ }
+
+ ib.ptr[0] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
+ ib.ptr[1] = lower_32_bits(gpu_addr);
+ ib.ptr[2] = upper_32_bits(gpu_addr);
+ ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
+ ib.ptr[4] = 0xDEADBEEF;
+ ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.length_dw = 8;
+
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto err1;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ DRM_ERROR("amdgpu: IB test timed out\n");
+ r = -ETIMEDOUT;
+ goto err1;
+ } else if (r < 0) {
+ DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+ goto err1;
+ }
+
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+
+ if (tmp == 0xDEADBEEF)
+ r = 0;
+ else
+ r = -EINVAL;
+
+err1:
+ amdgpu_ib_free(&ib, NULL);
+ dma_fence_put(f);
+err0:
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+
+/**
+ * sdma_v7_1_vm_copy_pte - update PTEs by copying them from the GART
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @src: src addr to copy from
+ * @count: number of page entries to update
+ *
+ * Update PTEs by copying them from the GART using sDMA.
+ */
+static void sdma_v7_1_vm_copy_pte(struct amdgpu_ib *ib,
+ uint64_t pe, uint64_t src,
+ unsigned count)
+{
+ unsigned bytes = count * 8;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+
+ ib->ptr[ib->length_dw++] = bytes - 1;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+
+}
+
+/**
+ * sdma_v7_1_vm_write_pte - update PTEs by writing them manually
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @value: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ *
+ * Update PTEs by writing them manually using sDMA.
+ */
+static void sdma_v7_1_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
+ uint64_t value, unsigned count,
+ uint32_t incr)
+{
+ unsigned ndw = count * 2;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = ndw - 1;
+ for (; ndw > 0; ndw -= 2) {
+ ib->ptr[ib->length_dw++] = lower_32_bits(value);
+ ib->ptr[ib->length_dw++] = upper_32_bits(value);
+ value += incr;
+ }
+}
+
+/**
+ * sdma_v7_1_vm_set_pte_pde - update the page tables using sDMA
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ * @flags: access flags
+ *
+ * Update the page tables using sDMA.
+ */
+static void sdma_v7_1_vm_set_pte_pde(struct amdgpu_ib *ib,
+ uint64_t pe,
+ uint64_t addr, unsigned count,
+ uint32_t incr, uint64_t flags)
+{
+ /* for physically contiguous pages (vram) */
+ u32 header = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_PTEPDE);
+
+ if (amdgpu_mtype_local)
+ header |= SDMA_PKT_PTEPDE_COPY_HEADER_MTYPE(0x3);
+ else
+ header |= (SDMA_PKT_PTEPDE_COPY_HEADER_MTYPE(0x2) |
+ SDMA_PKT_PTEPDE_COPY_HEADER_SNOOP(0x1) |
+ SDMA_PKT_PTEPDE_COPY_HEADER_SCOPE(0x3));
+
+ ib->ptr[ib->length_dw++] = header;
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
+ ib->ptr[ib->length_dw++] = upper_32_bits(flags);
+ ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = incr; /* increment size */
+ ib->ptr[ib->length_dw++] = 0;
+ ib->ptr[ib->length_dw++] = count - 1; /* number of entries */
+}
+
+/**
+ * sdma_v7_1_ring_pad_ib - pad the IB
+ *
+ * @ring: amdgpu ring pointer
+ * @ib: indirect buffer to fill with padding
+ *
+ * Pad the IB with NOPs to a boundary multiple of 8.
+ */
+static void sdma_v7_1_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
+{
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+ u32 pad_count;
+ int i;
+
+ pad_count = (-ib->length_dw) & 0x7;
+ for (i = 0; i < pad_count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_NOP) |
+ SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
+ else
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_NOP);
+}
+
+/**
+ * sdma_v7_1_ring_emit_pipeline_sync - sync the pipeline
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Make sure all previous operations are completed (CIK).
+ */
+static void sdma_v7_1_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ /* wait for idle */
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
+ SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+ amdgpu_ring_write(ring, seq); /* reference */
+ amdgpu_ring_write(ring, 0xffffffff); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
+}
+
+/**
+ * sdma_v7_1_ring_emit_vm_flush - vm flush using sDMA
+ *
+ * @ring: amdgpu_ring pointer
+ * @vmid: vmid number to use
+ * @pd_addr: address
+ *
+ * Update the page table base and flush the VM TLB
+ * using sDMA.
+ */
+static void sdma_v7_1_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+}
+
+static void sdma_v7_1_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ /* SRBM WRITE command will not support on sdma v7.
+ * Use Register WRITE command instead, which OPCODE is same as SRBM WRITE
+ */
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_SRBM_WRITE));
+ amdgpu_ring_write(ring, soc_v1_0_normalize_xcc_reg_offset(reg) << 2);
+ amdgpu_ring_write(ring, val);
+}
+
+static void sdma_v7_1_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
+ amdgpu_ring_write(ring, soc_v1_0_normalize_xcc_reg_offset(reg) << 2);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val); /* reference */
+ amdgpu_ring_write(ring, mask); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
+}
+
+static void sdma_v7_1_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ amdgpu_ring_emit_wreg(ring, reg0, ref);
+ /* wait for a cycle to reset vm_inv_eng*_ack */
+ amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
+ amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
+}
+
+static int sdma_v7_1_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_sdma_init_microcode(adev, 0, true);
+ if (r) {
+ DRM_ERROR("Failed to init sdma firmware!\n");
+ return r;
+ }
+
+ sdma_v7_1_set_ring_funcs(adev);
+ sdma_v7_1_set_buffer_funcs(adev);
+ sdma_v7_1_set_vm_pte_funcs(adev);
+ sdma_v7_1_set_irq_funcs(adev);
+ sdma_v7_1_set_mqd_funcs(adev);
+
+ return 0;
+}
+
+static int sdma_v7_1_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_ring *ring;
+ int r, i;
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_1);
+ uint32_t *ptr;
+ u32 xcc_id;
+
+ /* SDMA trap event */
+ r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_GFX,
+ GFX_12_1_0__SRCID__SDMA_TRAP,
+ &adev->sdma.trap_irq);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->me = i;
+
+ for (xcc_id = 0; xcc_id < fls(adev->gfx.xcc_mask); xcc_id++) {
+ if (adev->sdma.instance[i].xcc_id == GET_INST(GC, xcc_id))
+ break;
+ }
+
+ DRM_DEBUG("SDMA%d.%d use_doorbell being set to: [%s]\n",
+ xcc_id, GET_INST(SDMA0, i) % adev->sdma.num_inst_per_xcc,
+ ring->use_doorbell?"true":"false");
+
+ ring->doorbell_index =
+ (adev->doorbell_index.sdma_engine[i] << 1); // get DWORD offset
+
+ ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
+ sprintf(ring->name, "sdma%d.%d", xcc_id,
+ GET_INST(SDMA0, i) % adev->sdma.num_inst_per_xcc);
+ r = amdgpu_ring_init(adev, ring, 1024,
+ &adev->sdma.trap_irq,
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+ }
+
+ adev->sdma.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
+ if (!amdgpu_sriov_vf(adev) &&
+ !adev->debug_disable_gpu_ring_reset)
+ adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+
+ r = amdgpu_sdma_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+ /* Allocate memory for SDMA IP Dump buffer */
+ ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (ptr)
+ adev->sdma.ip_dump = ptr;
+ else
+ DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
+
+#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ
+ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+#endif
+
+ return r;
+}
+
+static int sdma_v7_1_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+
+ amdgpu_sdma_sysfs_reset_mask_fini(adev);
+ amdgpu_sdma_destroy_inst_ctx(adev, true);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)
+ sdma_v7_1_inst_free_ucode_buffer(adev, adev->sdma.sdma_mask);
+
+ kfree(adev->sdma.ip_dump);
+
+ return 0;
+}
+
+static int sdma_v7_1_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t inst_mask;
+
+ inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
+
+ return sdma_v7_1_inst_start(adev, inst_mask);
+}
+
+static int sdma_v7_1_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ sdma_v7_1_inst_ctx_switch_enable(adev, false, adev->sdma.sdma_mask);
+ sdma_v7_1_inst_enable(adev, false, adev->sdma.sdma_mask);
+
+ return 0;
+}
+
+static int sdma_v7_1_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return sdma_v7_1_hw_fini(ip_block);
+}
+
+static int sdma_v7_1_resume(struct amdgpu_ip_block *ip_block)
+{
+ return sdma_v7_1_hw_init(ip_block);
+}
+
+static bool sdma_v7_1_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ u32 i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ u32 tmp = RREG32(sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_STATUS_REG));
+
+ if (!(tmp & SDMA0_SDMA_STATUS_REG__IDLE_MASK))
+ return false;
+ }
+
+ return true;
+}
+
+static int sdma_v7_1_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ unsigned i, j;
+ u32 sdma[AMDGPU_MAX_SDMA_INSTANCES];
+ struct amdgpu_device *adev = ip_block->adev;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ for (j = 0; j < adev->sdma.num_instances; j++) {
+ sdma[j] = RREG32(sdma_v7_1_get_reg_offset(adev,
+ j, regSDMA0_SDMA_STATUS_REG));
+ if (!(sdma[j] & SDMA0_SDMA_STATUS_REG__IDLE_MASK))
+ break;
+ }
+ if (j == adev->sdma.num_instances)
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static int sdma_v7_1_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+ int i, r = 0;
+ struct amdgpu_device *adev = ring->adev;
+ u32 index = 0;
+ u64 sdma_gfx_preempt;
+
+ amdgpu_sdma_get_index_from_ring(ring, &index);
+ sdma_gfx_preempt =
+ sdma_v7_1_get_reg_offset(adev, index, regSDMA0_SDMA_QUEUE0_PREEMPT);
+
+ /* assert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+ /* emit the trailing fence */
+ ring->trail_seq += 1;
+ r = amdgpu_ring_alloc(ring, 10);
+ if (r) {
+ DRM_ERROR("ring %d failed to be allocated \n", ring->idx);
+ return r;
+ }
+ sdma_v7_1_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
+ ring->trail_seq, 0);
+ amdgpu_ring_commit(ring);
+
+ /* assert IB preemption */
+ WREG32(sdma_gfx_preempt, 1);
+
+ /* poll the trailing fence */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (ring->trail_seq ==
+ le32_to_cpu(*(ring->trail_fence_cpu_addr)))
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ r = -EINVAL;
+ DRM_ERROR("ring %d failed to be preempted\n", ring->idx);
+ }
+
+ /* deassert IB preemption */
+ WREG32(sdma_gfx_preempt, 0);
+
+ /* deassert the preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, true);
+ return r;
+}
+
+static int sdma_v7_1_set_trap_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 sdma_cntl;
+
+ u32 reg_offset = sdma_v7_1_get_reg_offset(adev, type, regSDMA0_SDMA_CNTL);
+
+ sdma_cntl = RREG32(reg_offset);
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_SDMA_CNTL, TRAP_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32(reg_offset, sdma_cntl);
+
+ return 0;
+}
+
+static int sdma_v7_1_process_trap_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ int inst, instances, queue, xcc_id = 0;
+ uint32_t mes_queue_id = entry->src_data[0];
+
+ DRM_DEBUG("IH: SDMA trap\n");
+
+ if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
+ struct amdgpu_mes_queue *queue;
+
+ mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
+
+ spin_lock(&adev->mes.queue_id_lock);
+ queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
+ if (queue) {
+ DRM_DEBUG("process smda queue id = %d\n", mes_queue_id);
+ amdgpu_fence_process(queue->ring);
+ }
+ spin_unlock(&adev->mes.queue_id_lock);
+ return 0;
+ }
+
+ queue = entry->ring_id & 0xf;
+ if (adev->gfx.funcs && adev->gfx.funcs->ih_node_to_logical_xcc)
+ xcc_id = adev->gfx.funcs->ih_node_to_logical_xcc(adev, entry->node_id);
+ else
+ dev_warn(adev->dev, "IH: SDMA may get wrong xcc id as gfx function not available\n");
+ inst = ((entry->ring_id & 0xf0) >> 4) +
+ GET_INST(GC, xcc_id) * adev->sdma.num_inst_per_xcc;
+ for (instances = 0; instances < adev->sdma.num_instances; instances++) {
+ if (inst == GET_INST(SDMA0, instances))
+ break;
+ }
+ if (instances > adev->sdma.num_instances - 1) {
+ DRM_ERROR("IH: wrong ring_ID detected, as wrong sdma instance\n");
+ return -EINVAL;
+ }
+
+ switch (entry->client_id) {
+ case SOC_V1_0_IH_CLIENTID_GFX:
+ switch (queue) {
+ case 0:
+ amdgpu_fence_process(&adev->sdma.instance[instances].ring);
+ break;
+ default:
+ break;
+ }
+ break;
+ }
+ return 0;
+}
+
+static int sdma_v7_1_process_illegal_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ return 0;
+}
+
+static int sdma_v7_1_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static int sdma_v7_1_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static void sdma_v7_1_get_clockgating_state(struct amdgpu_ip_block *ip_block,
+ u64 *flags)
+{
+}
+
+static void sdma_v7_1_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_1);
+ uint32_t instance_offset;
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ drm_printf(p, "\nInstance:%d\n", i);
+
+ for (j = 0; j < reg_count; j++)
+ drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_7_1[j].reg_name,
+ adev->sdma.ip_dump[instance_offset + j]);
+ }
+}
+
+static void sdma_v7_1_dump_ip_state(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t instance_offset;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_1);
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ amdgpu_gfx_off_ctrl(adev, false);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ for (j = 0; j < reg_count; j++)
+ adev->sdma.ip_dump[instance_offset + j] =
+ RREG32(sdma_v7_1_get_reg_offset(adev, i,
+ sdma_reg_list_7_1[j].reg_offset));
+ }
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
+const struct amd_ip_funcs sdma_v7_1_ip_funcs = {
+ .name = "sdma_v7_1",
+ .early_init = sdma_v7_1_early_init,
+ .late_init = NULL,
+ .sw_init = sdma_v7_1_sw_init,
+ .sw_fini = sdma_v7_1_sw_fini,
+ .hw_init = sdma_v7_1_hw_init,
+ .hw_fini = sdma_v7_1_hw_fini,
+ .suspend = sdma_v7_1_suspend,
+ .resume = sdma_v7_1_resume,
+ .is_idle = sdma_v7_1_is_idle,
+ .wait_for_idle = sdma_v7_1_wait_for_idle,
+ .soft_reset = sdma_v7_1_soft_reset,
+ .check_soft_reset = sdma_v7_1_check_soft_reset,
+ .set_clockgating_state = sdma_v7_1_set_clockgating_state,
+ .set_powergating_state = sdma_v7_1_set_powergating_state,
+ .get_clockgating_state = sdma_v7_1_get_clockgating_state,
+ .dump_ip_state = sdma_v7_1_dump_ip_state,
+ .print_ip_state = sdma_v7_1_print_ip_state,
+};
+
+static const struct amdgpu_ring_funcs sdma_v7_1_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_SDMA,
+ .align_mask = 0xf,
+ .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
+ .support_64bit_ptrs = true,
+ .secure_submission_supported = true,
+ .get_rptr = sdma_v7_1_ring_get_rptr,
+ .get_wptr = sdma_v7_1_ring_get_wptr,
+ .set_wptr = sdma_v7_1_ring_set_wptr,
+ .emit_frame_size =
+ 5 + /* sdma_v7_1_ring_init_cond_exec */
+ 6 + /* sdma_v7_1_ring_emit_pipeline_sync */
+ /* sdma_v7_1_ring_emit_vm_flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
+ 10 + 10 + 10, /* sdma_v7_1_ring_emit_fence x3 for user fence, vm fence */
+ .emit_ib_size = 5 + 7 + 6, /* sdma_v7_1_ring_emit_ib */
+ .emit_ib = sdma_v7_1_ring_emit_ib,
+ .emit_mem_sync = sdma_v7_1_ring_emit_mem_sync,
+ .emit_fence = sdma_v7_1_ring_emit_fence,
+ .emit_pipeline_sync = sdma_v7_1_ring_emit_pipeline_sync,
+ .emit_vm_flush = sdma_v7_1_ring_emit_vm_flush,
+ .test_ring = sdma_v7_1_ring_test_ring,
+ .test_ib = sdma_v7_1_ring_test_ib,
+ .insert_nop = sdma_v7_1_ring_insert_nop,
+ .pad_ib = sdma_v7_1_ring_pad_ib,
+ .emit_wreg = sdma_v7_1_ring_emit_wreg,
+ .emit_reg_wait = sdma_v7_1_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = sdma_v7_1_ring_emit_reg_write_reg_wait,
+ .init_cond_exec = sdma_v7_1_ring_init_cond_exec,
+ .preempt_ib = sdma_v7_1_ring_preempt_ib,
+ .reset = sdma_v7_1_reset_queue,
+};
+
+static void sdma_v7_1_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i, dev_inst;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ adev->sdma.instance[i].ring.funcs = &sdma_v7_1_ring_funcs;
+ adev->sdma.instance[i].ring.me = i;
+
+ dev_inst = GET_INST(SDMA0, i);
+ /* XCC to which SDMA belongs depends on physical instance */
+ adev->sdma.instance[i].xcc_id =
+ dev_inst / adev->sdma.num_inst_per_xcc;
+ }
+}
+
+static const struct amdgpu_irq_src_funcs sdma_v7_1_trap_irq_funcs = {
+ .set = sdma_v7_1_set_trap_irq_state,
+ .process = sdma_v7_1_process_trap_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v7_1_illegal_inst_irq_funcs = {
+ .process = sdma_v7_1_process_illegal_inst_irq,
+};
+
+static void sdma_v7_1_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 +
+ adev->sdma.num_instances;
+ adev->sdma.trap_irq.funcs = &sdma_v7_1_trap_irq_funcs;
+ adev->sdma.illegal_inst_irq.funcs = &sdma_v7_1_illegal_inst_irq_funcs;
+}
+
+/**
+ * sdma_v7_1_emit_copy_buffer - copy buffer using the sDMA engine
+ *
+ * @ib: indirect buffer to fill with commands
+ * @src_offset: src GPU address
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ * @copy_flags: copy flags for the buffers
+ *
+ * Copy GPU buffers using the DMA engine.
+ * Used by the amdgpu ttm implementation to move pages if
+ * registered as the asic copy callback.
+ */
+static void sdma_v7_1_emit_copy_buffer(struct amdgpu_ib *ib,
+ uint64_t src_offset,
+ uint64_t dst_offset,
+ uint32_t byte_count,
+ uint32_t copy_flags)
+{
+ ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
+ SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0);
+
+ ib->ptr[ib->length_dw++] = byte_count - 1;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+}
+
+/**
+ * sdma_v7_1_emit_fill_buffer - fill buffer using the sDMA engine
+ *
+ * @ib: indirect buffer to fill
+ * @src_data: value to write to buffer
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ *
+ * Fill GPU buffers using the DMA engine.
+ */
+static void sdma_v7_1_emit_fill_buffer(struct amdgpu_ib *ib,
+ uint32_t src_data,
+ uint64_t dst_offset,
+ uint32_t byte_count)
+{
+ ib->ptr[ib->length_dw++] = SDMA_PKT_CONSTANT_FILL_HEADER_OP(SDMA_OP_CONST_FILL);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = src_data;
+ ib->ptr[ib->length_dw++] = byte_count - 1;
+}
+
+static const struct amdgpu_buffer_funcs sdma_v7_1_buffer_funcs = {
+ .copy_max_bytes = 0x400000,
+ .copy_num_dw = 8,
+ .emit_copy_buffer = sdma_v7_1_emit_copy_buffer,
+ .fill_max_bytes = 0x400000,
+ .fill_num_dw = 5,
+ .emit_fill_buffer = sdma_v7_1_emit_fill_buffer,
+};
+
+static void sdma_v7_1_set_buffer_funcs(struct amdgpu_device *adev)
+{
+ adev->mman.buffer_funcs = &sdma_v7_1_buffer_funcs;
+ adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
+}
+
+static const struct amdgpu_vm_pte_funcs sdma_v7_1_vm_pte_funcs = {
+ .copy_pte_num_dw = 8,
+ .copy_pte = sdma_v7_1_vm_copy_pte,
+ .write_pte = sdma_v7_1_vm_write_pte,
+ .set_pte_pde = sdma_v7_1_vm_set_pte_pde,
+};
+
+static void sdma_v7_1_set_vm_pte_funcs(struct amdgpu_device *adev)
+{
+ unsigned i;
+
+ adev->vm_manager.vm_pte_funcs = &sdma_v7_1_vm_pte_funcs;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ adev->vm_manager.vm_pte_scheds[i] =
+ &adev->sdma.instance[i].ring.sched;
+ }
+ adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
+}
+
+const struct amdgpu_ip_block_version sdma_v7_1_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_SDMA,
+ .major = 7,
+ .minor = 1,
+ .rev = 0,
+ .funcs = &sdma_v7_1_ip_funcs,
+};
+
+static int sdma_v7_1_xcp_resume(void *handle, uint32_t inst_mask)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = sdma_v7_1_inst_start(adev, inst_mask);
+
+ return r;
+}
+
+static int sdma_v7_1_xcp_suspend(void *handle, uint32_t inst_mask)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ sdma_v7_1_inst_ctx_switch_enable(adev, false, inst_mask);
+ sdma_v7_1_inst_enable(adev, false, inst_mask);
+
+ return 0;
+}
+
+struct amdgpu_xcp_ip_funcs sdma_v7_1_xcp_funcs = {
+ .suspend = &sdma_v7_1_xcp_suspend,
+ .resume = &sdma_v7_1_xcp_resume
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.h b/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.h
new file mode 100644
index 000000000000..1a07ef09a103
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __SDMA_V7_1_H__
+#define __SDMA_V7_1_H__
+
+extern const struct amd_ip_funcs sdma_v7_1_ip_funcs;
+extern const struct amdgpu_ip_block_version sdma_v7_1_ip_block;
+extern struct amdgpu_xcp_ip_funcs sdma_v7_1_xcp_funcs;
+
+#endif /* __SDMA_V7_1_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_1_0_pkt_open.h b/drivers/gpu/drm/amd/amdgpu/sdma_v7_1_0_pkt_open.h
new file mode 100644
index 000000000000..b051e4f92088
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_1_0_pkt_open.h
@@ -0,0 +1,5673 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __SDMA_V7_1_0_PKT_OPEN_H_
+#define __SDMA_V7_1_0_PKT_OPEN_H_
+
+#define SDMA_OP_NOP 0
+#define SDMA_OP_COPY 1
+#define SDMA_OP_WRITE 2
+#define SDMA_OP_INDIRECT 4
+#define SDMA_OP_FENCE 5
+#define SDMA_OP_TRAP 6
+#define SDMA_OP_SEM 7
+#define SDMA_OP_POLL_REGMEM 8
+#define SDMA_OP_COND_EXE 9
+#define SDMA_OP_ATOMIC 10
+#define SDMA_OP_CONST_FILL 11
+#define SDMA_OP_PTEPDE 12
+#define SDMA_OP_TIMESTAMP 13
+#define SDMA_OP_SRBM_WRITE 14
+#define SDMA_OP_PRE_EXE 15
+#define SDMA_OP_GPUVM_INV 16
+#define SDMA_OP_GCR_REQ 17
+#define SDMA_OP_DUMMY_TRAP 32
+#define SDMA_SUBOP_TIMESTAMP_SET 0
+#define SDMA_SUBOP_TIMESTAMP_GET 1
+#define SDMA_SUBOP_TIMESTAMP_GET_GLOBAL 2
+#define SDMA_SUBOP_COPY_LINEAR 0
+#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND 4
+#define SDMA_SUBOP_COPY_TILED 1
+#define SDMA_SUBOP_COPY_TILED_SUB_WIND 5
+#define SDMA_SUBOP_COPY_T2T_SUB_WIND 6
+#define SDMA_SUBOP_COPY_SOA 3
+#define SDMA_SUBOP_COPY_DIRTY_PAGE 7
+#define SDMA_SUBOP_COPY_LINEAR_PHY 8
+#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND_LARGE 36
+#define SDMA_SUBOP_COPY_LINEAR_BC 16
+#define SDMA_SUBOP_COPY_TILED_BC 17
+#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND_BC 20
+#define SDMA_SUBOP_COPY_TILED_SUB_WIND_BC 21
+#define SDMA_SUBOP_COPY_T2T_SUB_WIND_BC 22
+#define SDMA_SUBOP_WRITE_LINEAR 0
+#define SDMA_SUBOP_WRITE_TILED 1
+#define SDMA_SUBOP_WRITE_TILED_BC 17
+#define SDMA_SUBOP_PTEPDE_GEN 0
+#define SDMA_SUBOP_PTEPDE_COPY 1
+#define SDMA_SUBOP_PTEPDE_RMW 2
+#define SDMA_SUBOP_PTEPDE_COPY_BACKWARDS 3
+#define SDMA_SUBOP_MEM_INCR 1
+#define SDMA_SUBOP_DATA_FILL_MULTI 1
+#define SDMA_SUBOP_POLL_REG_WRITE_MEM 1
+#define SDMA_SUBOP_POLL_DBIT_WRITE_MEM 2
+#define SDMA_SUBOP_POLL_MEM_VERIFY 3
+#define SDMA_SUBOP_VM_INVALIDATION 4
+#define HEADER_AGENT_DISPATCH 4
+#define HEADER_BARRIER 5
+#define SDMA_OP_AQL_COPY 0
+#define SDMA_OP_AQL_BARRIER_OR 0
+
+#define SDMA_GCR_RANGE_IS_PA (1 << 18)
+#define SDMA_GCR_SEQ(x) (((x) & 0x3) << 16)
+#define SDMA_GCR_GL2_WB (1 << 15)
+#define SDMA_GCR_GL2_INV (1 << 14)
+#define SDMA_GCR_GL2_DISCARD (1 << 13)
+#define SDMA_GCR_GL2_RANGE(x) (((x) & 0x3) << 11)
+#define SDMA_GCR_GL2_US (1 << 10)
+#define SDMA_GCR_GL1_INV (1 << 9)
+#define SDMA_GCR_GLV_INV (1 << 8)
+#define SDMA_GCR_GLK_INV (1 << 7)
+#define SDMA_GCR_GLK_WB (1 << 6)
+#define SDMA_GCR_GLM_INV (1 << 5)
+#define SDMA_GCR_GLM_WB (1 << 4)
+#define SDMA_GCR_GL1_RANGE(x) (((x) & 0x3) << 2)
+#define SDMA_GCR_GLI_INV(x) (((x) & 0x3) << 0)
+
+#define SDMA_DCC_DATA_FORMAT(x) ((x) & 0x3f)
+#define SDMA_DCC_NUM_TYPE(x) (((x) & 0x7) << 9)
+#define SDMA_DCC_READ_CM(x) (((x) & 0x3) << 16)
+#define SDMA_DCC_WRITE_CM(x) (((x) & 0x3) << 18)
+#define SDMA_DCC_MAX_COM(x) (((x) & 0x3) << 24)
+#define SDMA_DCC_MAX_UCOM(x) (((x) & 0x1) << 26)
+
+/*
+** Definitions for SDMA_PKT_COPY_LINEAR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_HEADER_op_shift 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_encrypt_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_HEADER_encrypt_shift 16
+#define SDMA_PKT_COPY_LINEAR_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_encrypt_mask) << SDMA_PKT_COPY_LINEAR_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_LINEAR_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_tmz_mask) << SDMA_PKT_COPY_LINEAR_HEADER_tmz_shift)
+
+/*define for npd field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_npd_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_npd_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_HEADER_npd_shift 28
+#define SDMA_PKT_COPY_LINEAR_HEADER_NPD(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_npd_mask) << SDMA_PKT_COPY_LINEAR_HEADER_npd_shift)
+
+/*define for backwards field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_backwards_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_backwards_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_HEADER_backwards_shift 25
+#define SDMA_PKT_COPY_LINEAR_HEADER_BACKWARDS(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_backwards_mask) << SDMA_PKT_COPY_LINEAR_HEADER_backwards_shift)
+
+/*define for broadcast field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_shift 27
+#define SDMA_PKT_COPY_LINEAR_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_broadcast_mask) << SDMA_PKT_COPY_LINEAR_HEADER_broadcast_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_LINEAR_COUNT_count_offset 1
+#define SDMA_PKT_COPY_LINEAR_COUNT_count_mask 0x3FFFFFFF
+#define SDMA_PKT_COPY_LINEAR_COUNT_count_shift 0
+#define SDMA_PKT_COPY_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_LINEAR_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_offset 2
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift 16
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift)
+
+/*define for dst_cache_policy field*/
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_offset 2
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_shift 18
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_DST_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_shift)
+
+/*define for src_cache_policy field*/
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_offset 2
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_shift 26
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_SRC_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_offset 5
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_offset 6
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_LINEAR_BC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_op_shift 0
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_BC_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_LINEAR_BC_COUNT_count_offset 1
+#define SDMA_PKT_COPY_LINEAR_BC_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_COPY_LINEAR_BC_COUNT_count_shift 0
+#define SDMA_PKT_COPY_LINEAR_BC_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_COUNT_count_mask) << SDMA_PKT_COPY_LINEAR_BC_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_offset 2
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_shift 16
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_shift)
+
+/*define for dst_ha field*/
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_offset 2
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_shift 19
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_DST_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_mask) << SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_shift)
+
+/*define for src_ha field*/
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_offset 2
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_shift 27
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_SRC_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_mask) << SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_offset 5
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_offset 6
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_DIRTY_PAGE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_offset 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_OP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_cpv_shift 19
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_cpv_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_cpv_shift)
+
+/*define for all field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_offset 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_shift 31
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_ALL(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_offset 1
+#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_mask) << SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst_mtype field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_mask 0x00000007
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_shift 3
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_MTYPE(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_shift)
+
+/*define for dst_l2_policy field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_mask 0x00000003
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_shift 6
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_L2_POLICY(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_shift)
+
+/*define for dst_llc field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_llc_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_llc_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_llc_shift 8
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_LLC(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_llc_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_llc_shift)
+
+/*define for src_mtype field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_mask 0x00000007
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_shift 11
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_MTYPE(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_shift)
+
+/*define for src_l2_policy field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_mask 0x00000003
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_shift 14
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_L2_POLICY(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_shift)
+
+/*define for src_llc field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_llc_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_llc_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_llc_shift 16
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_LLC(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_llc_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_llc_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_shift 17
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_shift)
+
+/*define for dst_gcc field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_shift 19
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_GCC(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_shift)
+
+/*define for dst_sys field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_shift 20
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_SYS(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_shift)
+
+/*define for dst_snoop field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_shift 22
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_SNOOP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_shift)
+
+/*define for dst_gpa field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_shift 23
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_GPA(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_shift)
+
+/*define for src_sys field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_shift 28
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_SYS(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_shift)
+
+/*define for src_snoop field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_shift 30
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_SNOOP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_shift)
+
+/*define for src_gpa field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_shift 31
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_GPA(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_offset 5
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_offset 6
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_PHYSICAL_LINEAR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_offset 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_cpv_shift 19
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_cpv_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_cpv_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_offset 1
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_shift)
+
+/*define for addr_pair_num field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_addr_pair_num_offset 1
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_addr_pair_num_mask 0x000000FF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_addr_pair_num_shift 24
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_ADDR_PAIR_NUM(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_addr_pair_num_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_addr_pair_num_shift)
+
+/*define for PARAMETER word*/
+/*define for dst_mtype field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_mask 0x00000007
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_shift 3
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_MTYPE(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_shift)
+
+/*define for dst_l2_policy field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_mask 0x00000003
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_shift 6
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_L2_POLICY(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_shift)
+
+/*define for dst_llc field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_llc_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_llc_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_llc_shift 8
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_LLC(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_llc_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_llc_shift)
+
+/*define for src_mtype field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_mask 0x00000007
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_shift 11
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_MTYPE(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_shift)
+
+/*define for src_l2_policy field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_mask 0x00000003
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_shift 14
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_L2_POLICY(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_shift)
+
+/*define for src_llc field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_llc_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_llc_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_llc_shift 16
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_LLC(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_llc_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_llc_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_shift 17
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_shift)
+
+/*define for dst_gcc field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_shift 19
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_GCC(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_shift)
+
+/*define for dst_sys field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_shift 20
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_SYS(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_shift)
+
+/*define for dst_log field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_shift 21
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_LOG(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_shift)
+
+/*define for dst_snoop field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_shift 22
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_SNOOP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_shift)
+
+/*define for dst_gpa field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_shift 23
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_GPA(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_shift)
+
+/*define for src_gcc field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_shift 27
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_GCC(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_shift)
+
+/*define for src_sys field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_shift 28
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_SYS(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_shift)
+
+/*define for src_snoop field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_shift 30
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_SNOOP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_shift)
+
+/*define for src_gpa field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_shift 31
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_GPA(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_offset 5
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_offset 6
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_BROADCAST_LINEAR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_shift 16
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_cpv_shift 19
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_cpv_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_cpv_shift)
+
+/*define for broadcast field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_mask 0x00000001
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_shift 27
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_offset 1
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_mask 0x3FFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst2_sw field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_mask 0x00000003
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_shift 8
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST2_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_shift)
+
+/*define for dst2_cache_policy field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_cache_policy_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_cache_policy_shift 10
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST2_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_cache_policy_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_cache_policy_shift)
+
+/*define for dst1_sw field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_mask 0x00000003
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_shift 16
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST1_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_shift)
+
+/*define for dst1_cache_policy field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_cache_policy_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_cache_policy_shift 18
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST1_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_cache_policy_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_cache_policy_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_shift)
+
+/*define for src_cache_policy field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_cache_policy_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_cache_policy_shift 26
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_SRC_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_cache_policy_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_cache_policy_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST1_ADDR_LO word*/
+/*define for dst1_addr_31_0 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_offset 5
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_DST1_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_shift)
+
+/*define for DST1_ADDR_HI word*/
+/*define for dst1_addr_63_32 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_offset 6
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_DST1_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_shift)
+
+/*define for DST2_ADDR_LO word*/
+/*define for dst2_addr_31_0 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_offset 7
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_DST2_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_shift)
+
+/*define for DST2_ADDR_HI word*/
+/*define for dst2_addr_63_32 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_offset 8
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_DST2_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_LINEAR_SUBWIN packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_cpv_shift 19
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_cpv_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_cpv_shift)
+
+/*define for elementsize field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_shift 29
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_ELEMENTSIZE(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for src_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_offset 3
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_shift)
+
+/*define for src_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_offset 3
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_shift)
+
+/*define for DW_4 word*/
+/*define for src_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_offset 4
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_shift)
+
+/*define for src_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_offset 4
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_shift 13
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_SRC_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_shift)
+
+/*define for DW_5 word*/
+/*define for src_slice_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_offset 5
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_SRC_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_offset 6
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_offset 7
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_8 word*/
+/*define for dst_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_offset 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_DST_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_shift)
+
+/*define for dst_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_offset 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_DST_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_shift)
+
+/*define for DW_9 word*/
+/*define for dst_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_offset 9
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_DST_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_shift)
+
+/*define for dst_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_offset 9
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_shift 13
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_DST_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_shift)
+
+/*define for DW_10 word*/
+/*define for dst_slice_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_offset 10
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_DST_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_shift)
+
+/*define for DW_11 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_offset 11
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_RECT_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_offset 11
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_RECT_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_shift)
+
+/*define for DW_12 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_RECT_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_shift)
+
+/*define for dst_cache_policy field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_cache_policy_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_cache_policy_shift 18
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_DST_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_cache_policy_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_cache_policy_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_shift 24
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_shift)
+
+/*define for src_cache_policy field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_cache_policy_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_cache_policy_shift 26
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_SRC_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_cache_policy_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_cache_policy_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_op_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_tmz_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_cpv_shift 19
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_cpv_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_HEADER_cpv_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for src_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_3_src_x_offset 3
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_3_src_x_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_3_src_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_3_src_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_3_src_x_shift)
+
+/*define for DW_4 word*/
+/*define for src_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_4_src_y_offset 4
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_4_src_y_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_4_src_y_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_4_SRC_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_4_src_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_4_src_y_shift)
+
+/*define for DW_5 word*/
+/*define for src_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_5_src_z_offset 5
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_5_src_z_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_5_src_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_5_SRC_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_5_src_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_5_src_z_shift)
+
+/*define for DW_6 word*/
+/*define for src_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_6_src_pitch_offset 6
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_6_src_pitch_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_6_src_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_6_SRC_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_6_src_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_6_src_pitch_shift)
+
+/*define for DW_7 word*/
+/*define for src_slice_pitch_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_7_src_slice_pitch_31_0_offset 7
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_7_src_slice_pitch_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_7_src_slice_pitch_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_7_SRC_SLICE_PITCH_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_7_src_slice_pitch_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_7_src_slice_pitch_31_0_shift)
+
+/*define for DW_8 word*/
+/*define for src_slice_pitch_47_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_8_src_slice_pitch_47_32_offset 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_8_src_slice_pitch_47_32_mask 0x0000FFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_8_src_slice_pitch_47_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_8_SRC_SLICE_PITCH_47_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_8_src_slice_pitch_47_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_8_src_slice_pitch_47_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_LO_dst_addr_31_0_offset 9
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_HI_dst_addr_63_32_offset 10
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_11 word*/
+/*define for dst_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_11_dst_x_offset 11
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_11_dst_x_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_11_dst_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_11_DST_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_11_dst_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_11_dst_x_shift)
+
+/*define for DW_12 word*/
+/*define for dst_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_12_dst_y_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_12_dst_y_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_12_dst_y_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_12_DST_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_12_dst_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_12_dst_y_shift)
+
+/*define for DW_13 word*/
+/*define for dst_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_13_dst_z_offset 13
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_13_dst_z_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_13_dst_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_13_DST_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_13_dst_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_13_dst_z_shift)
+
+/*define for DW_14 word*/
+/*define for dst_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_14_dst_pitch_offset 14
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_14_dst_pitch_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_14_dst_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_14_DST_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_14_dst_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_14_dst_pitch_shift)
+
+/*define for DW_15 word*/
+/*define for dst_slice_pitch_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_15_dst_slice_pitch_31_0_offset 15
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_15_dst_slice_pitch_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_15_dst_slice_pitch_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_15_DST_SLICE_PITCH_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_15_dst_slice_pitch_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_15_dst_slice_pitch_31_0_shift)
+
+/*define for DW_16 word*/
+/*define for dst_slice_pitch_47_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_slice_pitch_47_32_offset 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_slice_pitch_47_32_mask 0x0000FFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_slice_pitch_47_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_DST_SLICE_PITCH_47_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_slice_pitch_47_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_slice_pitch_47_32_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_sw_offset 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_sw_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_sw_shift)
+
+/*define for dst_policy field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_policy_offset 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_policy_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_policy_shift 18
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_DST_POLICY(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_policy_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_dst_policy_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_sw_offset 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_sw_shift 24
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_sw_shift)
+
+/*define for src_policy field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_policy_offset 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_policy_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_policy_shift 26
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_SRC_POLICY(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_policy_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_16_src_policy_shift)
+
+/*define for DW_17 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_17_rect_x_offset 17
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_17_rect_x_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_17_rect_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_17_RECT_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_17_rect_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_17_rect_x_shift)
+
+/*define for DW_18 word*/
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_18_rect_y_offset 18
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_18_rect_y_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_18_rect_y_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_18_RECT_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_18_rect_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_18_rect_y_shift)
+
+/*define for DW_19 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_19_rect_z_offset 19
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_19_rect_z_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_19_rect_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_19_RECT_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_19_rect_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_LARGE_DW_19_rect_z_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_LINEAR_SUBWIN_BC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_shift)
+
+/*define for elementsize field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_shift 29
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_ELEMENTSIZE(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for src_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_offset 3
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_shift)
+
+/*define for src_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_offset 3
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_shift)
+
+/*define for DW_4 word*/
+/*define for src_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_offset 4
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_mask 0x000007FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_shift)
+
+/*define for src_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_offset 4
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_shift 13
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_SRC_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_shift)
+
+/*define for DW_5 word*/
+/*define for src_slice_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_offset 5
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_SRC_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_offset 6
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_offset 7
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_8 word*/
+/*define for dst_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_offset 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_DST_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_shift)
+
+/*define for dst_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_offset 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_DST_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_shift)
+
+/*define for DW_9 word*/
+/*define for dst_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_offset 9
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_mask 0x000007FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_DST_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_shift)
+
+/*define for dst_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_offset 9
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_shift 13
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_DST_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_shift)
+
+/*define for DW_10 word*/
+/*define for dst_slice_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_offset 10
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_DST_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_shift)
+
+/*define for DW_11 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_offset 11
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_RECT_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_offset 11
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_RECT_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_shift)
+
+/*define for DW_12 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_mask 0x000007FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_RECT_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_shift)
+
+/*define for dst_ha field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_shift 19
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_DST_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_shift 24
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_shift)
+
+/*define for src_ha field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_shift 27
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_SRC_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_TILED packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_TILED_HEADER_op_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_HEADER_op_shift 0
+#define SDMA_PKT_COPY_TILED_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_op_mask) << SDMA_PKT_COPY_TILED_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_TILED_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_TILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_COPY_TILED_HEADER_encrypt_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_HEADER_encrypt_shift 16
+#define SDMA_PKT_COPY_TILED_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_encrypt_mask) << SDMA_PKT_COPY_TILED_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_TILED_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_TILED_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_tmz_mask) << SDMA_PKT_COPY_TILED_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_TILED_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_HEADER_cpv_shift 19
+#define SDMA_PKT_COPY_TILED_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_cpv_mask) << SDMA_PKT_COPY_TILED_HEADER_cpv_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_TILED_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_TILED_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_HEADER_detile_shift)
+
+/*define for TILED_ADDR_LO word*/
+/*define for tiled_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_offset 1
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_shift)
+
+/*define for TILED_ADDR_HI word*/
+/*define for tiled_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_offset 2
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for width field*/
+#define SDMA_PKT_COPY_TILED_DW_3_width_offset 3
+#define SDMA_PKT_COPY_TILED_DW_3_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_DW_3_width_shift 0
+#define SDMA_PKT_COPY_TILED_DW_3_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_DW_3_width_mask) << SDMA_PKT_COPY_TILED_DW_3_width_shift)
+
+/*define for DW_4 word*/
+/*define for height field*/
+#define SDMA_PKT_COPY_TILED_DW_4_height_offset 4
+#define SDMA_PKT_COPY_TILED_DW_4_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_DW_4_height_shift 0
+#define SDMA_PKT_COPY_TILED_DW_4_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_DW_4_height_mask) << SDMA_PKT_COPY_TILED_DW_4_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_COPY_TILED_DW_4_depth_offset 4
+#define SDMA_PKT_COPY_TILED_DW_4_depth_mask 0x00001FFF
+#define SDMA_PKT_COPY_TILED_DW_4_depth_shift 16
+#define SDMA_PKT_COPY_TILED_DW_4_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_DW_4_depth_mask) << SDMA_PKT_COPY_TILED_DW_4_depth_shift)
+
+/*define for DW_5 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_TILED_DW_5_element_size_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_DW_5_element_size_shift 0
+#define SDMA_PKT_COPY_TILED_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_element_size_mask) << SDMA_PKT_COPY_TILED_DW_5_element_size_shift)
+
+/*define for swizzle_mode field*/
+#define SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_shift 3
+#define SDMA_PKT_COPY_TILED_DW_5_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_mask) << SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_shift)
+
+/*define for dimension field*/
+#define SDMA_PKT_COPY_TILED_DW_5_dimension_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_dimension_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_DW_5_dimension_shift 9
+#define SDMA_PKT_COPY_TILED_DW_5_DIMENSION(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_dimension_mask) << SDMA_PKT_COPY_TILED_DW_5_dimension_shift)
+
+/*define for mip_max field*/
+#define SDMA_PKT_COPY_TILED_DW_5_mip_max_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_mip_max_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_DW_5_mip_max_shift 16
+#define SDMA_PKT_COPY_TILED_DW_5_MIP_MAX(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_mip_max_mask) << SDMA_PKT_COPY_TILED_DW_5_mip_max_shift)
+
+/*define for DW_6 word*/
+/*define for x field*/
+#define SDMA_PKT_COPY_TILED_DW_6_x_offset 6
+#define SDMA_PKT_COPY_TILED_DW_6_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_DW_6_x_shift 0
+#define SDMA_PKT_COPY_TILED_DW_6_X(x) (((x) & SDMA_PKT_COPY_TILED_DW_6_x_mask) << SDMA_PKT_COPY_TILED_DW_6_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_COPY_TILED_DW_6_y_offset 6
+#define SDMA_PKT_COPY_TILED_DW_6_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_DW_6_y_shift 16
+#define SDMA_PKT_COPY_TILED_DW_6_Y(x) (((x) & SDMA_PKT_COPY_TILED_DW_6_y_mask) << SDMA_PKT_COPY_TILED_DW_6_y_shift)
+
+/*define for DW_7 word*/
+/*define for z field*/
+#define SDMA_PKT_COPY_TILED_DW_7_z_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_TILED_DW_7_z_shift 0
+#define SDMA_PKT_COPY_TILED_DW_7_Z(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_z_mask) << SDMA_PKT_COPY_TILED_DW_7_z_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_shift 16
+#define SDMA_PKT_COPY_TILED_DW_7_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_linear_sw_mask) << SDMA_PKT_COPY_TILED_DW_7_linear_sw_shift)
+
+/*define for linear_cache_policy field*/
+#define SDMA_PKT_COPY_TILED_DW_7_linear_cache_policy_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_linear_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_DW_7_linear_cache_policy_shift 18
+#define SDMA_PKT_COPY_TILED_DW_7_LINEAR_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_linear_cache_policy_mask) << SDMA_PKT_COPY_TILED_DW_7_linear_cache_policy_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_shift 24
+#define SDMA_PKT_COPY_TILED_DW_7_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_tile_sw_mask) << SDMA_PKT_COPY_TILED_DW_7_tile_sw_shift)
+
+/*define for tile_cache_policy field*/
+#define SDMA_PKT_COPY_TILED_DW_7_tile_cache_policy_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_tile_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_DW_7_tile_cache_policy_shift 26
+#define SDMA_PKT_COPY_TILED_DW_7_TILE_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_tile_cache_policy_mask) << SDMA_PKT_COPY_TILED_DW_7_tile_cache_policy_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_offset 8
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_offset 9
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for LINEAR_PITCH word*/
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_offset 10
+#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_mask) << SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_shift)
+
+/*define for LINEAR_SLICE_PITCH word*/
+/*define for linear_slice_pitch field*/
+#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_offset 11
+#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_TILED_COUNT_count_offset 12
+#define SDMA_PKT_COPY_TILED_COUNT_count_mask 0x3FFFFFFF
+#define SDMA_PKT_COPY_TILED_COUNT_count_shift 0
+#define SDMA_PKT_COPY_TILED_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_TILED_COUNT_count_mask) << SDMA_PKT_COPY_TILED_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_TILED_BC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_TILED_BC_HEADER_op_offset 0
+#define SDMA_PKT_COPY_TILED_BC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_BC_HEADER_op_shift 0
+#define SDMA_PKT_COPY_TILED_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_BC_HEADER_op_mask) << SDMA_PKT_COPY_TILED_BC_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_TILED_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_TILED_BC_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_TILED_BC_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_BC_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_TILED_BC_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_BC_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_BC_HEADER_detile_shift)
+
+/*define for TILED_ADDR_LO word*/
+/*define for tiled_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_offset 1
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_shift)
+
+/*define for TILED_ADDR_HI word*/
+/*define for tiled_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_offset 2
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for width field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_3_width_offset 3
+#define SDMA_PKT_COPY_TILED_BC_DW_3_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_BC_DW_3_width_shift 0
+#define SDMA_PKT_COPY_TILED_BC_DW_3_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_3_width_mask) << SDMA_PKT_COPY_TILED_BC_DW_3_width_shift)
+
+/*define for DW_4 word*/
+/*define for height field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_4_height_offset 4
+#define SDMA_PKT_COPY_TILED_BC_DW_4_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_BC_DW_4_height_shift 0
+#define SDMA_PKT_COPY_TILED_BC_DW_4_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_4_height_mask) << SDMA_PKT_COPY_TILED_BC_DW_4_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_4_depth_offset 4
+#define SDMA_PKT_COPY_TILED_BC_DW_4_depth_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_BC_DW_4_depth_shift 16
+#define SDMA_PKT_COPY_TILED_BC_DW_4_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_4_depth_mask) << SDMA_PKT_COPY_TILED_BC_DW_4_depth_shift)
+
+/*define for DW_5 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_element_size_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_BC_DW_5_element_size_shift 0
+#define SDMA_PKT_COPY_TILED_BC_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_element_size_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_element_size_shift)
+
+/*define for array_mode field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_shift 3
+#define SDMA_PKT_COPY_TILED_BC_DW_5_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_shift)
+
+/*define for mit_mode field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_shift 8
+#define SDMA_PKT_COPY_TILED_BC_DW_5_MIT_MODE(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_shift)
+
+/*define for tilesplit_size field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_shift 11
+#define SDMA_PKT_COPY_TILED_BC_DW_5_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_shift)
+
+/*define for bank_w field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_shift 15
+#define SDMA_PKT_COPY_TILED_BC_DW_5_BANK_W(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_shift)
+
+/*define for bank_h field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_shift 18
+#define SDMA_PKT_COPY_TILED_BC_DW_5_BANK_H(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_shift)
+
+/*define for num_bank field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_shift 21
+#define SDMA_PKT_COPY_TILED_BC_DW_5_NUM_BANK(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_shift)
+
+/*define for mat_aspt field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_shift 24
+#define SDMA_PKT_COPY_TILED_BC_DW_5_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_shift)
+
+/*define for pipe_config field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_mask 0x0000001F
+#define SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_shift 26
+#define SDMA_PKT_COPY_TILED_BC_DW_5_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_shift)
+
+/*define for DW_6 word*/
+/*define for x field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_6_x_offset 6
+#define SDMA_PKT_COPY_TILED_BC_DW_6_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_BC_DW_6_x_shift 0
+#define SDMA_PKT_COPY_TILED_BC_DW_6_X(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_6_x_mask) << SDMA_PKT_COPY_TILED_BC_DW_6_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_6_y_offset 6
+#define SDMA_PKT_COPY_TILED_BC_DW_6_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_BC_DW_6_y_shift 16
+#define SDMA_PKT_COPY_TILED_BC_DW_6_Y(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_6_y_mask) << SDMA_PKT_COPY_TILED_BC_DW_6_y_shift)
+
+/*define for DW_7 word*/
+/*define for z field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_7_z_offset 7
+#define SDMA_PKT_COPY_TILED_BC_DW_7_z_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_BC_DW_7_z_shift 0
+#define SDMA_PKT_COPY_TILED_BC_DW_7_Z(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_7_z_mask) << SDMA_PKT_COPY_TILED_BC_DW_7_z_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_offset 7
+#define SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_shift 16
+#define SDMA_PKT_COPY_TILED_BC_DW_7_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_mask) << SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_offset 7
+#define SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_shift 24
+#define SDMA_PKT_COPY_TILED_BC_DW_7_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_mask) << SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_offset 8
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_offset 9
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for LINEAR_PITCH word*/
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_offset 10
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_mask) << SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_shift)
+
+/*define for LINEAR_SLICE_PITCH word*/
+/*define for linear_slice_pitch field*/
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_SLICE_PITCH_linear_slice_pitch_offset 11
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_SLICE_PITCH_linear_slice_pitch_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_SLICE_PITCH_linear_slice_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_SLICE_PITCH_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_BC_LINEAR_SLICE_PITCH_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_BC_LINEAR_SLICE_PITCH_linear_slice_pitch_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_TILED_BC_COUNT_count_offset 12
+#define SDMA_PKT_COPY_TILED_BC_COUNT_count_mask 0x000FFFFF
+#define SDMA_PKT_COPY_TILED_BC_COUNT_count_shift 2
+#define SDMA_PKT_COPY_TILED_BC_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_TILED_BC_COUNT_count_mask) << SDMA_PKT_COPY_TILED_BC_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_L2T_BROADCAST packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_OP(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_cpv_shift 19
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_cpv_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_cpv_shift)
+
+/*define for videocopy field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_shift 26
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_VIDEOCOPY(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_shift)
+
+/*define for broadcast field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_shift 27
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_shift)
+
+/*define for TILED_ADDR_LO_0 word*/
+/*define for tiled_addr0_31_0 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_offset 1
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_TILED_ADDR0_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_shift)
+
+/*define for TILED_ADDR_HI_0 word*/
+/*define for tiled_addr0_63_32 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_offset 2
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_TILED_ADDR0_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_shift)
+
+/*define for TILED_ADDR_LO_1 word*/
+/*define for tiled_addr1_31_0 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_offset 3
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_TILED_ADDR1_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_shift)
+
+/*define for TILED_ADDR_HI_1 word*/
+/*define for tiled_addr1_63_32 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_offset 4
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_TILED_ADDR1_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_shift)
+
+/*define for DW_5 word*/
+/*define for width field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_offset 5
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_WIDTH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_shift)
+
+/*define for DW_6 word*/
+/*define for height field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_offset 6
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_HEIGHT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_offset 6
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_mask 0x00001FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_DEPTH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_shift)
+
+/*define for DW_7 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_shift)
+
+/*define for swizzle_mode field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_shift 3
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_shift)
+
+/*define for dimension field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_shift 9
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_DIMENSION(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_shift)
+
+/*define for mip_max field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_mask 0x0000000F
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_MIP_MAX(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_shift)
+
+/*define for DW_8 word*/
+/*define for x field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_offset 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_X(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_offset 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_Y(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_shift)
+
+/*define for DW_9 word*/
+/*define for z field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_offset 9
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_Z(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_shift)
+
+/*define for DW_10 word*/
+/*define for dst2_sw field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_shift 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_DST2_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_shift)
+
+/*define for dst2_cache_policy field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_cache_policy_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_cache_policy_shift 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_DST2_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_cache_policy_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_cache_policy_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_shift)
+
+/*define for linear_cache_policy field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_cache_policy_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_cache_policy_shift 18
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_LINEAR_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_cache_policy_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_cache_policy_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_shift 24
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_TILE_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_shift)
+
+/*define for tile_cache_policy field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_cache_policy_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_cache_policy_shift 26
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_TILE_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_cache_policy_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_cache_policy_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_offset 11
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_offset 12
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for LINEAR_PITCH word*/
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_offset 13
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_shift)
+
+/*define for LINEAR_SLICE_PITCH word*/
+/*define for linear_slice_pitch field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_offset 14
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_offset 15
+#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_mask 0x3FFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_mask) << SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_T2T packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_T2T_HEADER_op_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_T2T_HEADER_op_shift 0
+#define SDMA_PKT_COPY_T2T_HEADER_OP(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_op_mask) << SDMA_PKT_COPY_T2T_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_T2T_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_T2T_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_T2T_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_sub_op_mask) << SDMA_PKT_COPY_T2T_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_T2T_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_T2T_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_tmz_mask) << SDMA_PKT_COPY_T2T_HEADER_tmz_shift)
+
+/*define for dcc field*/
+#define SDMA_PKT_COPY_T2T_HEADER_dcc_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_dcc_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_HEADER_dcc_shift 19
+#define SDMA_PKT_COPY_T2T_HEADER_DCC(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_dcc_mask) << SDMA_PKT_COPY_T2T_HEADER_dcc_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_T2T_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_HEADER_cpv_shift 28
+#define SDMA_PKT_COPY_T2T_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_cpv_mask) << SDMA_PKT_COPY_T2T_HEADER_cpv_shift)
+
+/*define for dcc_dir field*/
+#define SDMA_PKT_COPY_T2T_HEADER_dcc_dir_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_dcc_dir_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_HEADER_dcc_dir_shift 31
+#define SDMA_PKT_COPY_T2T_HEADER_DCC_DIR(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_dcc_dir_mask) << SDMA_PKT_COPY_T2T_HEADER_dcc_dir_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for src_x field*/
+#define SDMA_PKT_COPY_T2T_DW_3_src_x_offset 3
+#define SDMA_PKT_COPY_T2T_DW_3_src_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_3_src_x_shift 0
+#define SDMA_PKT_COPY_T2T_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_3_src_x_mask) << SDMA_PKT_COPY_T2T_DW_3_src_x_shift)
+
+/*define for src_y field*/
+#define SDMA_PKT_COPY_T2T_DW_3_src_y_offset 3
+#define SDMA_PKT_COPY_T2T_DW_3_src_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_3_src_y_shift 16
+#define SDMA_PKT_COPY_T2T_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_3_src_y_mask) << SDMA_PKT_COPY_T2T_DW_3_src_y_shift)
+
+/*define for DW_4 word*/
+/*define for src_z field*/
+#define SDMA_PKT_COPY_T2T_DW_4_src_z_offset 4
+#define SDMA_PKT_COPY_T2T_DW_4_src_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_T2T_DW_4_src_z_shift 0
+#define SDMA_PKT_COPY_T2T_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_4_src_z_mask) << SDMA_PKT_COPY_T2T_DW_4_src_z_shift)
+
+/*define for src_width field*/
+#define SDMA_PKT_COPY_T2T_DW_4_src_width_offset 4
+#define SDMA_PKT_COPY_T2T_DW_4_src_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_4_src_width_shift 16
+#define SDMA_PKT_COPY_T2T_DW_4_SRC_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_4_src_width_mask) << SDMA_PKT_COPY_T2T_DW_4_src_width_shift)
+
+/*define for DW_5 word*/
+/*define for src_height field*/
+#define SDMA_PKT_COPY_T2T_DW_5_src_height_offset 5
+#define SDMA_PKT_COPY_T2T_DW_5_src_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_5_src_height_shift 0
+#define SDMA_PKT_COPY_T2T_DW_5_SRC_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_DW_5_src_height_mask) << SDMA_PKT_COPY_T2T_DW_5_src_height_shift)
+
+/*define for src_depth field*/
+#define SDMA_PKT_COPY_T2T_DW_5_src_depth_offset 5
+#define SDMA_PKT_COPY_T2T_DW_5_src_depth_mask 0x00001FFF
+#define SDMA_PKT_COPY_T2T_DW_5_src_depth_shift 16
+#define SDMA_PKT_COPY_T2T_DW_5_SRC_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_5_src_depth_mask) << SDMA_PKT_COPY_T2T_DW_5_src_depth_shift)
+
+/*define for DW_6 word*/
+/*define for src_element_size field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_shift 0
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_element_size_mask) << SDMA_PKT_COPY_T2T_DW_6_src_element_size_shift)
+
+/*define for src_swizzle_mode field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_shift 3
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_mask) << SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_shift)
+
+/*define for src_dimension field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_dimension_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_dimension_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_6_src_dimension_shift 9
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_DIMENSION(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_dimension_mask) << SDMA_PKT_COPY_T2T_DW_6_src_dimension_shift)
+
+/*define for src_mip_max field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_mip_max_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_mip_max_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_DW_6_src_mip_max_shift 16
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_MIP_MAX(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_mip_max_mask) << SDMA_PKT_COPY_T2T_DW_6_src_mip_max_shift)
+
+/*define for src_mip_id field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_mip_id_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_mip_id_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_DW_6_src_mip_id_shift 20
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_MIP_ID(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_mip_id_mask) << SDMA_PKT_COPY_T2T_DW_6_src_mip_id_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_offset 7
+#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_offset 8
+#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_9 word*/
+/*define for dst_x field*/
+#define SDMA_PKT_COPY_T2T_DW_9_dst_x_offset 9
+#define SDMA_PKT_COPY_T2T_DW_9_dst_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_9_dst_x_shift 0
+#define SDMA_PKT_COPY_T2T_DW_9_DST_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_9_dst_x_mask) << SDMA_PKT_COPY_T2T_DW_9_dst_x_shift)
+
+/*define for dst_y field*/
+#define SDMA_PKT_COPY_T2T_DW_9_dst_y_offset 9
+#define SDMA_PKT_COPY_T2T_DW_9_dst_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_9_dst_y_shift 16
+#define SDMA_PKT_COPY_T2T_DW_9_DST_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_9_dst_y_mask) << SDMA_PKT_COPY_T2T_DW_9_dst_y_shift)
+
+/*define for DW_10 word*/
+/*define for dst_z field*/
+#define SDMA_PKT_COPY_T2T_DW_10_dst_z_offset 10
+#define SDMA_PKT_COPY_T2T_DW_10_dst_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_T2T_DW_10_dst_z_shift 0
+#define SDMA_PKT_COPY_T2T_DW_10_DST_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_10_dst_z_mask) << SDMA_PKT_COPY_T2T_DW_10_dst_z_shift)
+
+/*define for dst_width field*/
+#define SDMA_PKT_COPY_T2T_DW_10_dst_width_offset 10
+#define SDMA_PKT_COPY_T2T_DW_10_dst_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_10_dst_width_shift 16
+#define SDMA_PKT_COPY_T2T_DW_10_DST_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_10_dst_width_mask) << SDMA_PKT_COPY_T2T_DW_10_dst_width_shift)
+
+/*define for DW_11 word*/
+/*define for dst_height field*/
+#define SDMA_PKT_COPY_T2T_DW_11_dst_height_offset 11
+#define SDMA_PKT_COPY_T2T_DW_11_dst_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_11_dst_height_shift 0
+#define SDMA_PKT_COPY_T2T_DW_11_DST_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_DW_11_dst_height_mask) << SDMA_PKT_COPY_T2T_DW_11_dst_height_shift)
+
+/*define for dst_depth field*/
+#define SDMA_PKT_COPY_T2T_DW_11_dst_depth_offset 11
+#define SDMA_PKT_COPY_T2T_DW_11_dst_depth_mask 0x00001FFF
+#define SDMA_PKT_COPY_T2T_DW_11_dst_depth_shift 16
+#define SDMA_PKT_COPY_T2T_DW_11_DST_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_11_dst_depth_mask) << SDMA_PKT_COPY_T2T_DW_11_dst_depth_shift)
+
+/*define for DW_12 word*/
+/*define for dst_element_size field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_element_size_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_DW_12_dst_element_size_shift 0
+#define SDMA_PKT_COPY_T2T_DW_12_DST_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_element_size_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_element_size_shift)
+
+/*define for dst_swizzle_mode field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_shift 3
+#define SDMA_PKT_COPY_T2T_DW_12_DST_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_shift)
+
+/*define for dst_dimension field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_dimension_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_dimension_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_12_dst_dimension_shift 9
+#define SDMA_PKT_COPY_T2T_DW_12_DST_DIMENSION(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_dimension_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_dimension_shift)
+
+/*define for dst_mip_max field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_shift 16
+#define SDMA_PKT_COPY_T2T_DW_12_DST_MIP_MAX(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_shift)
+
+/*define for dst_mip_id field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_shift 20
+#define SDMA_PKT_COPY_T2T_DW_12_DST_MIP_ID(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_shift)
+
+/*define for DW_13 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_T2T_DW_13_rect_x_offset 13
+#define SDMA_PKT_COPY_T2T_DW_13_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_13_rect_x_shift 0
+#define SDMA_PKT_COPY_T2T_DW_13_RECT_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_13_rect_x_mask) << SDMA_PKT_COPY_T2T_DW_13_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_T2T_DW_13_rect_y_offset 13
+#define SDMA_PKT_COPY_T2T_DW_13_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_13_rect_y_shift 16
+#define SDMA_PKT_COPY_T2T_DW_13_RECT_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_13_rect_y_mask) << SDMA_PKT_COPY_T2T_DW_13_rect_y_shift)
+
+/*define for DW_14 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_T2T_DW_14_rect_z_offset 14
+#define SDMA_PKT_COPY_T2T_DW_14_rect_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_T2T_DW_14_rect_z_shift 0
+#define SDMA_PKT_COPY_T2T_DW_14_RECT_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_rect_z_mask) << SDMA_PKT_COPY_T2T_DW_14_rect_z_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_offset 14
+#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_shift 16
+#define SDMA_PKT_COPY_T2T_DW_14_DST_SW(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_dst_sw_mask) << SDMA_PKT_COPY_T2T_DW_14_dst_sw_shift)
+
+/*define for dst_cache_policy field*/
+#define SDMA_PKT_COPY_T2T_DW_14_dst_cache_policy_offset 14
+#define SDMA_PKT_COPY_T2T_DW_14_dst_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_DW_14_dst_cache_policy_shift 18
+#define SDMA_PKT_COPY_T2T_DW_14_DST_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_dst_cache_policy_mask) << SDMA_PKT_COPY_T2T_DW_14_dst_cache_policy_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_T2T_DW_14_src_sw_offset 14
+#define SDMA_PKT_COPY_T2T_DW_14_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_14_src_sw_shift 24
+#define SDMA_PKT_COPY_T2T_DW_14_SRC_SW(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_src_sw_mask) << SDMA_PKT_COPY_T2T_DW_14_src_sw_shift)
+
+/*define for src_cache_policy field*/
+#define SDMA_PKT_COPY_T2T_DW_14_src_cache_policy_offset 14
+#define SDMA_PKT_COPY_T2T_DW_14_src_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_DW_14_src_cache_policy_shift 26
+#define SDMA_PKT_COPY_T2T_DW_14_SRC_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_src_cache_policy_mask) << SDMA_PKT_COPY_T2T_DW_14_src_cache_policy_shift)
+
+/*define for META_ADDR_LO word*/
+/*define for meta_addr_31_0 field*/
+#define SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_offset 15
+#define SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_shift 0
+#define SDMA_PKT_COPY_T2T_META_ADDR_LO_META_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_mask) << SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_shift)
+
+/*define for META_ADDR_HI word*/
+/*define for meta_addr_63_32 field*/
+#define SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_offset 16
+#define SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_shift 0
+#define SDMA_PKT_COPY_T2T_META_ADDR_HI_META_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_mask) << SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_shift)
+
+/*define for META_CONFIG word*/
+/*define for data_format field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_data_format_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_data_format_mask 0x0000007F
+#define SDMA_PKT_COPY_T2T_META_CONFIG_data_format_shift 0
+#define SDMA_PKT_COPY_T2T_META_CONFIG_DATA_FORMAT(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_data_format_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_data_format_shift)
+
+/*define for color_transform_disable field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_shift 7
+#define SDMA_PKT_COPY_T2T_META_CONFIG_COLOR_TRANSFORM_DISABLE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_shift)
+
+/*define for alpha_is_on_msb field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_shift 8
+#define SDMA_PKT_COPY_T2T_META_CONFIG_ALPHA_IS_ON_MSB(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_shift)
+
+/*define for number_type field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_number_type_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_number_type_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_META_CONFIG_number_type_shift 9
+#define SDMA_PKT_COPY_T2T_META_CONFIG_NUMBER_TYPE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_number_type_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_number_type_shift)
+
+/*define for surface_type field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_shift 12
+#define SDMA_PKT_COPY_T2T_META_CONFIG_SURFACE_TYPE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_shift)
+
+/*define for meta_llc field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_llc_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_llc_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_llc_shift 14
+#define SDMA_PKT_COPY_T2T_META_CONFIG_META_LLC(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_meta_llc_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_meta_llc_shift)
+
+/*define for max_comp_block_size field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_shift 24
+#define SDMA_PKT_COPY_T2T_META_CONFIG_MAX_COMP_BLOCK_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_shift)
+
+/*define for max_uncomp_block_size field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_shift 26
+#define SDMA_PKT_COPY_T2T_META_CONFIG_MAX_UNCOMP_BLOCK_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_shift)
+
+/*define for write_compress_enable field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_shift 28
+#define SDMA_PKT_COPY_T2T_META_CONFIG_WRITE_COMPRESS_ENABLE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_shift)
+
+/*define for meta_tmz field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_shift 29
+#define SDMA_PKT_COPY_T2T_META_CONFIG_META_TMZ(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_shift)
+
+/*define for pipe_aligned field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_pipe_aligned_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_pipe_aligned_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_META_CONFIG_pipe_aligned_shift 31
+#define SDMA_PKT_COPY_T2T_META_CONFIG_PIPE_ALIGNED(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_pipe_aligned_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_pipe_aligned_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_T2T_BC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_T2T_BC_HEADER_op_offset 0
+#define SDMA_PKT_COPY_T2T_BC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_T2T_BC_HEADER_op_shift 0
+#define SDMA_PKT_COPY_T2T_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_T2T_BC_HEADER_op_mask) << SDMA_PKT_COPY_T2T_BC_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_T2T_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for src_x field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_3_src_x_offset 3
+#define SDMA_PKT_COPY_T2T_BC_DW_3_src_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_3_src_x_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_3_src_x_mask) << SDMA_PKT_COPY_T2T_BC_DW_3_src_x_shift)
+
+/*define for src_y field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_3_src_y_offset 3
+#define SDMA_PKT_COPY_T2T_BC_DW_3_src_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_3_src_y_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_3_src_y_mask) << SDMA_PKT_COPY_T2T_BC_DW_3_src_y_shift)
+
+/*define for DW_4 word*/
+/*define for src_z field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_4_src_z_offset 4
+#define SDMA_PKT_COPY_T2T_BC_DW_4_src_z_mask 0x000007FF
+#define SDMA_PKT_COPY_T2T_BC_DW_4_src_z_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_4_src_z_mask) << SDMA_PKT_COPY_T2T_BC_DW_4_src_z_shift)
+
+/*define for src_width field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_4_src_width_offset 4
+#define SDMA_PKT_COPY_T2T_BC_DW_4_src_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_4_src_width_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_4_SRC_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_4_src_width_mask) << SDMA_PKT_COPY_T2T_BC_DW_4_src_width_shift)
+
+/*define for DW_5 word*/
+/*define for src_height field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_5_src_height_offset 5
+#define SDMA_PKT_COPY_T2T_BC_DW_5_src_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_5_src_height_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_5_SRC_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_5_src_height_mask) << SDMA_PKT_COPY_T2T_BC_DW_5_src_height_shift)
+
+/*define for src_depth field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_offset 5
+#define SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_mask 0x000007FF
+#define SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_5_SRC_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_mask) << SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_shift)
+
+/*define for DW_6 word*/
+/*define for src_element_size field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_shift)
+
+/*define for src_array_mode field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_shift 3
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_shift)
+
+/*define for src_mit_mode field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_shift 8
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_MIT_MODE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_shift)
+
+/*define for src_tilesplit_size field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_shift 11
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_shift)
+
+/*define for src_bank_w field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_shift 15
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_BANK_W(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_shift)
+
+/*define for src_bank_h field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_shift 18
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_BANK_H(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_shift)
+
+/*define for src_num_bank field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_shift 21
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_NUM_BANK(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_shift)
+
+/*define for src_mat_aspt field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_shift 24
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_shift)
+
+/*define for src_pipe_config field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_mask 0x0000001F
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_shift 26
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_offset 7
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_offset 8
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_9 word*/
+/*define for dst_x field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_offset 9
+#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_9_DST_X(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_mask) << SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_shift)
+
+/*define for dst_y field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_offset 9
+#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_9_DST_Y(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_mask) << SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_shift)
+
+/*define for DW_10 word*/
+/*define for dst_z field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_offset 10
+#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_mask 0x000007FF
+#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_10_DST_Z(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_mask) << SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_shift)
+
+/*define for dst_width field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_offset 10
+#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_10_DST_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_mask) << SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_shift)
+
+/*define for DW_11 word*/
+/*define for dst_height field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_offset 11
+#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_11_DST_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_mask) << SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_shift)
+
+/*define for dst_depth field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_offset 11
+#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_mask 0x00000FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_11_DST_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_mask) << SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_shift)
+
+/*define for DW_12 word*/
+/*define for dst_element_size field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_shift)
+
+/*define for dst_array_mode field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_shift 3
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_shift)
+
+/*define for dst_mit_mode field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_shift 8
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_MIT_MODE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_shift)
+
+/*define for dst_tilesplit_size field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_shift 11
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_shift)
+
+/*define for dst_bank_w field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_shift 15
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_BANK_W(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_shift)
+
+/*define for dst_bank_h field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_shift 18
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_BANK_H(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_shift)
+
+/*define for dst_num_bank field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_shift 21
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_NUM_BANK(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_shift)
+
+/*define for dst_mat_aspt field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_shift 24
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_shift)
+
+/*define for dst_pipe_config field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_mask 0x0000001F
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_shift 26
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_shift)
+
+/*define for DW_13 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_offset 13
+#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_13_RECT_X(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_mask) << SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_offset 13
+#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_13_RECT_Y(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_mask) << SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_shift)
+
+/*define for DW_14 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_offset 14
+#define SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_mask 0x000007FF
+#define SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_14_RECT_Z(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_mask) << SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_offset 14
+#define SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_14_DST_SW(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_mask) << SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_offset 14
+#define SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_shift 24
+#define SDMA_PKT_COPY_T2T_BC_DW_14_SRC_SW(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_mask) << SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_TILED_SUBWIN packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_shift)
+
+/*define for dcc field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_shift 19
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_DCC(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_cpv_shift 28
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_cpv_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_cpv_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_shift)
+
+/*define for TILED_ADDR_LO word*/
+/*define for tiled_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_offset 1
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_shift)
+
+/*define for TILED_ADDR_HI word*/
+/*define for tiled_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_offset 2
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for tiled_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_offset 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_TILED_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_shift)
+
+/*define for tiled_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_offset 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_TILED_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_shift)
+
+/*define for DW_4 word*/
+/*define for tiled_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_offset 4
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_TILED_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_shift)
+
+/*define for width field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_offset 4
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_shift)
+
+/*define for DW_5 word*/
+/*define for height field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_offset 5
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_offset 5
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_mask 0x00001FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_shift)
+
+/*define for DW_6 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_shift)
+
+/*define for swizzle_mode field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_shift 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_shift)
+
+/*define for dimension field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_shift 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_DIMENSION(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_shift)
+
+/*define for mip_max field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_MIP_MAX(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_shift)
+
+/*define for mip_id field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_shift 20
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_MIP_ID(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_offset 7
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_offset 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for DW_9 word*/
+/*define for linear_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_offset 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_LINEAR_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_shift)
+
+/*define for linear_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_offset 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_LINEAR_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_shift)
+
+/*define for DW_10 word*/
+/*define for linear_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_offset 10
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_LINEAR_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_shift)
+
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_offset 10
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_shift)
+
+/*define for DW_11 word*/
+/*define for linear_slice_pitch field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_offset 11
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_shift)
+
+/*define for DW_12 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_offset 12
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_RECT_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_offset 12
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_RECT_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_shift)
+
+/*define for DW_13 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_RECT_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_shift)
+
+/*define for linear_cache_policy field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_cache_policy_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_cache_policy_shift 18
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_LINEAR_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_cache_policy_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_cache_policy_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_shift 24
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_shift)
+
+/*define for tile_cache_policy field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_cache_policy_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_cache_policy_shift 26
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_TILE_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_cache_policy_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_cache_policy_shift)
+
+/*define for META_ADDR_LO word*/
+/*define for meta_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_offset 14
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_META_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_shift)
+
+/*define for META_ADDR_HI word*/
+/*define for meta_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_offset 15
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_META_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_shift)
+
+/*define for META_CONFIG word*/
+/*define for data_format field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_mask 0x0000007F
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_DATA_FORMAT(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_shift)
+
+/*define for color_transform_disable field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_shift 7
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_COLOR_TRANSFORM_DISABLE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_shift)
+
+/*define for alpha_is_on_msb field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_shift 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_ALPHA_IS_ON_MSB(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_shift)
+
+/*define for number_type field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_shift 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_NUMBER_TYPE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_shift)
+
+/*define for surface_type field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_shift 12
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_SURFACE_TYPE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_shift)
+
+/*define for meta_llc field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_llc_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_llc_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_llc_shift 14
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_META_LLC(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_llc_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_llc_shift)
+
+/*define for max_comp_block_size field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_shift 24
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_MAX_COMP_BLOCK_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_shift)
+
+/*define for max_uncomp_block_size field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_shift 26
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_MAX_UNCOMP_BLOCK_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_shift)
+
+/*define for write_compress_enable field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_shift 28
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_WRITE_COMPRESS_ENABLE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_shift)
+
+/*define for meta_tmz field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_shift 29
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_META_TMZ(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_shift)
+
+/*define for pipe_aligned field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_pipe_aligned_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_pipe_aligned_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_pipe_aligned_shift 31
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_PIPE_ALIGNED(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_pipe_aligned_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_pipe_aligned_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_TILED_SUBWIN_BC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_shift)
+
+/*define for TILED_ADDR_LO word*/
+/*define for tiled_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_offset 1
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_shift)
+
+/*define for TILED_ADDR_HI word*/
+/*define for tiled_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_offset 2
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for tiled_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_offset 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_TILED_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_shift)
+
+/*define for tiled_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_offset 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_TILED_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_shift)
+
+/*define for DW_4 word*/
+/*define for tiled_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_offset 4
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_TILED_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_shift)
+
+/*define for width field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_offset 4
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_shift)
+
+/*define for DW_5 word*/
+/*define for height field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_offset 5
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_offset 5
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_shift)
+
+/*define for DW_6 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_shift)
+
+/*define for array_mode field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_shift 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_shift)
+
+/*define for mit_mode field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_shift 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_MIT_MODE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_shift)
+
+/*define for tilesplit_size field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_shift 11
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_shift)
+
+/*define for bank_w field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_shift 15
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_BANK_W(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_shift)
+
+/*define for bank_h field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_shift 18
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_BANK_H(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_shift)
+
+/*define for num_bank field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_shift 21
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_NUM_BANK(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_shift)
+
+/*define for mat_aspt field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_shift 24
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_shift)
+
+/*define for pipe_config field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_mask 0x0000001F
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_shift 26
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_offset 7
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_offset 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for DW_9 word*/
+/*define for linear_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_offset 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_LINEAR_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_shift)
+
+/*define for linear_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_offset 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_LINEAR_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_shift)
+
+/*define for DW_10 word*/
+/*define for linear_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_offset 10
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_LINEAR_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_shift)
+
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_offset 10
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_shift)
+
+/*define for DW_11 word*/
+/*define for linear_slice_pitch field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_offset 11
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_shift)
+
+/*define for DW_12 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_offset 12
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_RECT_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_offset 12
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_RECT_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_shift)
+
+/*define for DW_13 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_RECT_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_shift 24
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_STRUCT packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_op_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_STRUCT_HEADER_op_shift 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_OP(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_op_mask) << SDMA_PKT_COPY_STRUCT_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_STRUCT_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_sub_op_mask) << SDMA_PKT_COPY_STRUCT_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_STRUCT_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_STRUCT_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_tmz_mask) << SDMA_PKT_COPY_STRUCT_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_cpv_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COPY_STRUCT_HEADER_cpv_shift 28
+#define SDMA_PKT_COPY_STRUCT_HEADER_CPV(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_cpv_mask) << SDMA_PKT_COPY_STRUCT_HEADER_cpv_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_STRUCT_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_STRUCT_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_detile_mask) << SDMA_PKT_COPY_STRUCT_HEADER_detile_shift)
+
+/*define for SB_ADDR_LO word*/
+/*define for sb_addr_31_0 field*/
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_offset 1
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_shift 0
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_SB_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_mask) << SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_shift)
+
+/*define for SB_ADDR_HI word*/
+/*define for sb_addr_63_32 field*/
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_offset 2
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_shift 0
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_SB_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_mask) << SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_shift)
+
+/*define for START_INDEX word*/
+/*define for start_index field*/
+#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_offset 3
+#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_shift 0
+#define SDMA_PKT_COPY_STRUCT_START_INDEX_START_INDEX(x) (((x) & SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_mask) << SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_STRUCT_COUNT_count_offset 4
+#define SDMA_PKT_COPY_STRUCT_COUNT_count_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_COUNT_count_shift 0
+#define SDMA_PKT_COPY_STRUCT_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_STRUCT_COUNT_count_mask) << SDMA_PKT_COPY_STRUCT_COUNT_count_shift)
+
+/*define for DW_5 word*/
+/*define for stride field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_stride_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_stride_mask 0x000007FF
+#define SDMA_PKT_COPY_STRUCT_DW_5_stride_shift 0
+#define SDMA_PKT_COPY_STRUCT_DW_5_STRIDE(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_stride_mask) << SDMA_PKT_COPY_STRUCT_DW_5_stride_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_shift 16
+#define SDMA_PKT_COPY_STRUCT_DW_5_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_mask) << SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_shift)
+
+/*define for linear_cache_policy field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_cache_policy_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_cache_policy_shift 18
+#define SDMA_PKT_COPY_STRUCT_DW_5_LINEAR_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_linear_cache_policy_mask) << SDMA_PKT_COPY_STRUCT_DW_5_linear_cache_policy_shift)
+
+/*define for struct_sw field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_mask 0x00000003
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_shift 24
+#define SDMA_PKT_COPY_STRUCT_DW_5_STRUCT_SW(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_mask) << SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_shift)
+
+/*define for struct_cache_policy field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_cache_policy_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_cache_policy_mask 0x00000007
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_cache_policy_shift 26
+#define SDMA_PKT_COPY_STRUCT_DW_5_STRUCT_CACHE_POLICY(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_struct_cache_policy_mask) << SDMA_PKT_COPY_STRUCT_DW_5_struct_cache_policy_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_offset 6
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_offset 7
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_WRITE_UNTILED packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_WRITE_UNTILED_HEADER_op_offset 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_UNTILED_HEADER_op_shift 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_op_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_offset 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_shift 8
+#define SDMA_PKT_WRITE_UNTILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_offset 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_shift 16
+#define SDMA_PKT_WRITE_UNTILED_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_WRITE_UNTILED_HEADER_tmz_offset 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_WRITE_UNTILED_HEADER_tmz_shift 18
+#define SDMA_PKT_WRITE_UNTILED_HEADER_TMZ(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_tmz_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_WRITE_UNTILED_HEADER_cpv_offset 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_WRITE_UNTILED_HEADER_cpv_shift 28
+#define SDMA_PKT_WRITE_UNTILED_HEADER_CPV(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_cpv_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_cpv_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for count field*/
+#define SDMA_PKT_WRITE_UNTILED_DW_3_count_offset 3
+#define SDMA_PKT_WRITE_UNTILED_DW_3_count_mask 0x000FFFFF
+#define SDMA_PKT_WRITE_UNTILED_DW_3_count_shift 0
+#define SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(x) (((x) & SDMA_PKT_WRITE_UNTILED_DW_3_count_mask) << SDMA_PKT_WRITE_UNTILED_DW_3_count_shift)
+
+/*define for sw field*/
+#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_offset 3
+#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_mask 0x00000003
+#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_shift 24
+#define SDMA_PKT_WRITE_UNTILED_DW_3_SW(x) (((x) & SDMA_PKT_WRITE_UNTILED_DW_3_sw_mask) << SDMA_PKT_WRITE_UNTILED_DW_3_sw_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_WRITE_UNTILED_DW_3_cache_policy_offset 3
+#define SDMA_PKT_WRITE_UNTILED_DW_3_cache_policy_mask 0x00000007
+#define SDMA_PKT_WRITE_UNTILED_DW_3_cache_policy_shift 26
+#define SDMA_PKT_WRITE_UNTILED_DW_3_CACHE_POLICY(x) (((x) & SDMA_PKT_WRITE_UNTILED_DW_3_cache_policy_mask) << SDMA_PKT_WRITE_UNTILED_DW_3_cache_policy_shift)
+
+/*define for DATA0 word*/
+/*define for data0 field*/
+#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_offset 4
+#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_shift 0
+#define SDMA_PKT_WRITE_UNTILED_DATA0_DATA0(x) (((x) & SDMA_PKT_WRITE_UNTILED_DATA0_data0_mask) << SDMA_PKT_WRITE_UNTILED_DATA0_data0_shift)
+
+
+/*
+** Definitions for SDMA_PKT_WRITE_TILED packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_op_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_TILED_HEADER_op_shift 0
+#define SDMA_PKT_WRITE_TILED_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_op_mask) << SDMA_PKT_WRITE_TILED_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_shift 8
+#define SDMA_PKT_WRITE_TILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_sub_op_mask) << SDMA_PKT_WRITE_TILED_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_encrypt_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_WRITE_TILED_HEADER_encrypt_shift 16
+#define SDMA_PKT_WRITE_TILED_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_encrypt_mask) << SDMA_PKT_WRITE_TILED_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_tmz_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_WRITE_TILED_HEADER_tmz_shift 18
+#define SDMA_PKT_WRITE_TILED_HEADER_TMZ(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_tmz_mask) << SDMA_PKT_WRITE_TILED_HEADER_tmz_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_cpv_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_WRITE_TILED_HEADER_cpv_shift 28
+#define SDMA_PKT_WRITE_TILED_HEADER_CPV(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_cpv_mask) << SDMA_PKT_WRITE_TILED_HEADER_cpv_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for width field*/
+#define SDMA_PKT_WRITE_TILED_DW_3_width_offset 3
+#define SDMA_PKT_WRITE_TILED_DW_3_width_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_DW_3_width_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_3_WIDTH(x) (((x) & SDMA_PKT_WRITE_TILED_DW_3_width_mask) << SDMA_PKT_WRITE_TILED_DW_3_width_shift)
+
+/*define for DW_4 word*/
+/*define for height field*/
+#define SDMA_PKT_WRITE_TILED_DW_4_height_offset 4
+#define SDMA_PKT_WRITE_TILED_DW_4_height_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_DW_4_height_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_4_HEIGHT(x) (((x) & SDMA_PKT_WRITE_TILED_DW_4_height_mask) << SDMA_PKT_WRITE_TILED_DW_4_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_WRITE_TILED_DW_4_depth_offset 4
+#define SDMA_PKT_WRITE_TILED_DW_4_depth_mask 0x00001FFF
+#define SDMA_PKT_WRITE_TILED_DW_4_depth_shift 16
+#define SDMA_PKT_WRITE_TILED_DW_4_DEPTH(x) (((x) & SDMA_PKT_WRITE_TILED_DW_4_depth_mask) << SDMA_PKT_WRITE_TILED_DW_4_depth_shift)
+
+/*define for DW_5 word*/
+/*define for element_size field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_element_size_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_element_size_mask 0x00000007
+#define SDMA_PKT_WRITE_TILED_DW_5_element_size_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_element_size_mask) << SDMA_PKT_WRITE_TILED_DW_5_element_size_shift)
+
+/*define for swizzle_mode field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_shift 3
+#define SDMA_PKT_WRITE_TILED_DW_5_SWIZZLE_MODE(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_mask) << SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_shift)
+
+/*define for dimension field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_dimension_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_dimension_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_DW_5_dimension_shift 9
+#define SDMA_PKT_WRITE_TILED_DW_5_DIMENSION(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_dimension_mask) << SDMA_PKT_WRITE_TILED_DW_5_dimension_shift)
+
+/*define for mip_max field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_mip_max_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_mip_max_mask 0x0000000F
+#define SDMA_PKT_WRITE_TILED_DW_5_mip_max_shift 16
+#define SDMA_PKT_WRITE_TILED_DW_5_MIP_MAX(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_mip_max_mask) << SDMA_PKT_WRITE_TILED_DW_5_mip_max_shift)
+
+/*define for DW_6 word*/
+/*define for x field*/
+#define SDMA_PKT_WRITE_TILED_DW_6_x_offset 6
+#define SDMA_PKT_WRITE_TILED_DW_6_x_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_DW_6_x_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_6_X(x) (((x) & SDMA_PKT_WRITE_TILED_DW_6_x_mask) << SDMA_PKT_WRITE_TILED_DW_6_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_WRITE_TILED_DW_6_y_offset 6
+#define SDMA_PKT_WRITE_TILED_DW_6_y_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_DW_6_y_shift 16
+#define SDMA_PKT_WRITE_TILED_DW_6_Y(x) (((x) & SDMA_PKT_WRITE_TILED_DW_6_y_mask) << SDMA_PKT_WRITE_TILED_DW_6_y_shift)
+
+/*define for DW_7 word*/
+/*define for z field*/
+#define SDMA_PKT_WRITE_TILED_DW_7_z_offset 7
+#define SDMA_PKT_WRITE_TILED_DW_7_z_mask 0x00001FFF
+#define SDMA_PKT_WRITE_TILED_DW_7_z_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_7_Z(x) (((x) & SDMA_PKT_WRITE_TILED_DW_7_z_mask) << SDMA_PKT_WRITE_TILED_DW_7_z_shift)
+
+/*define for sw field*/
+#define SDMA_PKT_WRITE_TILED_DW_7_sw_offset 7
+#define SDMA_PKT_WRITE_TILED_DW_7_sw_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_DW_7_sw_shift 24
+#define SDMA_PKT_WRITE_TILED_DW_7_SW(x) (((x) & SDMA_PKT_WRITE_TILED_DW_7_sw_mask) << SDMA_PKT_WRITE_TILED_DW_7_sw_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_WRITE_TILED_DW_7_cache_policy_offset 7
+#define SDMA_PKT_WRITE_TILED_DW_7_cache_policy_mask 0x00000007
+#define SDMA_PKT_WRITE_TILED_DW_7_cache_policy_shift 26
+#define SDMA_PKT_WRITE_TILED_DW_7_CACHE_POLICY(x) (((x) & SDMA_PKT_WRITE_TILED_DW_7_cache_policy_mask) << SDMA_PKT_WRITE_TILED_DW_7_cache_policy_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_WRITE_TILED_COUNT_count_offset 8
+#define SDMA_PKT_WRITE_TILED_COUNT_count_mask 0x000FFFFF
+#define SDMA_PKT_WRITE_TILED_COUNT_count_shift 0
+#define SDMA_PKT_WRITE_TILED_COUNT_COUNT(x) (((x) & SDMA_PKT_WRITE_TILED_COUNT_count_mask) << SDMA_PKT_WRITE_TILED_COUNT_count_shift)
+
+/*define for DATA0 word*/
+/*define for data0 field*/
+#define SDMA_PKT_WRITE_TILED_DATA0_data0_offset 9
+#define SDMA_PKT_WRITE_TILED_DATA0_data0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_DATA0_data0_shift 0
+#define SDMA_PKT_WRITE_TILED_DATA0_DATA0(x) (((x) & SDMA_PKT_WRITE_TILED_DATA0_data0_mask) << SDMA_PKT_WRITE_TILED_DATA0_data0_shift)
+
+
+/*
+** Definitions for SDMA_PKT_WRITE_TILED_BC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_op_offset 0
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_op_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_TILED_BC_HEADER_op_mask) << SDMA_PKT_WRITE_TILED_BC_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_offset 0
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_shift 8
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_mask) << SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for width field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_3_width_offset 3
+#define SDMA_PKT_WRITE_TILED_BC_DW_3_width_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_BC_DW_3_width_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DW_3_WIDTH(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_3_width_mask) << SDMA_PKT_WRITE_TILED_BC_DW_3_width_shift)
+
+/*define for DW_4 word*/
+/*define for height field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_height_offset 4
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_height_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_height_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_HEIGHT(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_4_height_mask) << SDMA_PKT_WRITE_TILED_BC_DW_4_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_depth_offset 4
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_depth_mask 0x000007FF
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_depth_shift 16
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_DEPTH(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_4_depth_mask) << SDMA_PKT_WRITE_TILED_BC_DW_4_depth_shift)
+
+/*define for DW_5 word*/
+/*define for element_size field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_mask 0x00000007
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_shift)
+
+/*define for array_mode field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_mask 0x0000000F
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_shift 3
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_ARRAY_MODE(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_shift)
+
+/*define for mit_mode field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_mask 0x00000007
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_shift 8
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_MIT_MODE(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_shift)
+
+/*define for tilesplit_size field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_shift 11
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_shift)
+
+/*define for bank_w field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_shift 15
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_BANK_W(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_shift)
+
+/*define for bank_h field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_shift 18
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_BANK_H(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_shift)
+
+/*define for num_bank field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_shift 21
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_NUM_BANK(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_shift)
+
+/*define for mat_aspt field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_shift 24
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_MAT_ASPT(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_shift)
+
+/*define for pipe_config field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_mask 0x0000001F
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_shift 26
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_PIPE_CONFIG(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_shift)
+
+/*define for DW_6 word*/
+/*define for x field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_x_offset 6
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_x_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_x_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_X(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_6_x_mask) << SDMA_PKT_WRITE_TILED_BC_DW_6_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_y_offset 6
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_y_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_y_shift 16
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_Y(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_6_y_mask) << SDMA_PKT_WRITE_TILED_BC_DW_6_y_shift)
+
+/*define for DW_7 word*/
+/*define for z field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_z_offset 7
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_z_mask 0x000007FF
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_z_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_Z(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_7_z_mask) << SDMA_PKT_WRITE_TILED_BC_DW_7_z_shift)
+
+/*define for sw field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_sw_offset 7
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_sw_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_sw_shift 24
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_SW(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_7_sw_mask) << SDMA_PKT_WRITE_TILED_BC_DW_7_sw_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_WRITE_TILED_BC_COUNT_count_offset 8
+#define SDMA_PKT_WRITE_TILED_BC_COUNT_count_mask 0x000FFFFF
+#define SDMA_PKT_WRITE_TILED_BC_COUNT_count_shift 2
+#define SDMA_PKT_WRITE_TILED_BC_COUNT_COUNT(x) (((x) & SDMA_PKT_WRITE_TILED_BC_COUNT_count_mask) << SDMA_PKT_WRITE_TILED_BC_COUNT_count_shift)
+
+/*define for DATA0 word*/
+/*define for data0 field*/
+#define SDMA_PKT_WRITE_TILED_BC_DATA0_data0_offset 9
+#define SDMA_PKT_WRITE_TILED_BC_DATA0_data0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_BC_DATA0_data0_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DATA0_DATA0(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DATA0_data0_mask) << SDMA_PKT_WRITE_TILED_BC_DATA0_data0_shift)
+
+
+/*
+** Definitions for SDMA_PKT_PTEPDE_COPY packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_PTEPDE_COPY_HEADER_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_HEADER_op_shift 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_op_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_shift 8
+#define SDMA_PKT_PTEPDE_COPY_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_shift)
+
+/*define for mtype field*/
+#define SDMA_PKT_PTEPDE_COPY_HEADER_mtype_offset 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_mtype_mask 0x00000003
+#define SDMA_PKT_PTEPDE_COPY_HEADER_mtype_shift 16
+#define SDMA_PKT_PTEPDE_COPY_HEADER_MTYPE(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_mtype_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_mtype_shift)
+
+/*define for snoop field*/
+#define SDMA_PKT_PTEPDE_COPY_HEADER_snoop_offset 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_snoop_mask 0x00000001
+#define SDMA_PKT_PTEPDE_COPY_HEADER_snoop_shift 22
+#define SDMA_PKT_PTEPDE_COPY_HEADER_SNOOP(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_snoop_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_snoop_shift)
+
+/*define for scope field*/
+#define SDMA_PKT_PTEPDE_COPY_HEADER_scope_offset 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_scope_mask 0x00000003
+#define SDMA_PKT_PTEPDE_COPY_HEADER_scope_shift 24
+#define SDMA_PKT_PTEPDE_COPY_HEADER_SCOPE(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_scope_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_scope_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_offset 3
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_offset 4
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for MASK_DW0 word*/
+/*define for mask_dw0 field*/
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_offset 5
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_shift 0
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_MASK_DW0(x) (((x) & SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_mask) << SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_shift)
+
+/*define for MASK_DW1 word*/
+/*define for mask_dw1 field*/
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_offset 6
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_shift 0
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_MASK_DW1(x) (((x) & SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_mask) << SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_PTEPDE_COPY_COUNT_count_offset 7
+#define SDMA_PKT_PTEPDE_COPY_COUNT_count_mask 0x0007FFFF
+#define SDMA_PKT_PTEPDE_COPY_COUNT_count_shift 0
+#define SDMA_PKT_PTEPDE_COPY_COUNT_COUNT(x) (((x) & SDMA_PKT_PTEPDE_COPY_COUNT_count_mask) << SDMA_PKT_PTEPDE_COPY_COUNT_count_shift)
+
+/*define for dst_cache_policy field*/
+#define SDMA_PKT_PTEPDE_COPY_COUNT_dst_cache_policy_offset 7
+#define SDMA_PKT_PTEPDE_COPY_COUNT_dst_cache_policy_mask 0x00000007
+#define SDMA_PKT_PTEPDE_COPY_COUNT_dst_cache_policy_shift 22
+#define SDMA_PKT_PTEPDE_COPY_COUNT_DST_CACHE_POLICY(x) (((x) & SDMA_PKT_PTEPDE_COPY_COUNT_dst_cache_policy_mask) << SDMA_PKT_PTEPDE_COPY_COUNT_dst_cache_policy_shift)
+
+/*define for src_cache_policy field*/
+#define SDMA_PKT_PTEPDE_COPY_COUNT_src_cache_policy_offset 7
+#define SDMA_PKT_PTEPDE_COPY_COUNT_src_cache_policy_mask 0x00000007
+#define SDMA_PKT_PTEPDE_COPY_COUNT_src_cache_policy_shift 29
+#define SDMA_PKT_PTEPDE_COPY_COUNT_SRC_CACHE_POLICY(x) (((x) & SDMA_PKT_PTEPDE_COPY_COUNT_src_cache_policy_mask) << SDMA_PKT_PTEPDE_COPY_COUNT_src_cache_policy_shift)
+
+
+/*
+** Definitions for SDMA_PKT_PTEPDE_COPY_BACKWARDS packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_shift 8
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_shift)
+
+/*define for pte_size field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_offset 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_mask 0x00000003
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_shift 28
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_PTE_SIZE(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_shift)
+
+/*define for direction field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_offset 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_mask 0x00000001
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_shift 30
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_DIRECTION(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_shift)
+
+/*define for ptepde_op field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_mask 0x00000001
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_shift 31
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_PTEPDE_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_offset 3
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_offset 4
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for MASK_BIT_FOR_DW word*/
+/*define for mask_first_xfer field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_offset 5
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_MASK_FIRST_XFER(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_shift)
+
+/*define for mask_last_xfer field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_offset 5
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_shift 8
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_MASK_LAST_XFER(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_shift)
+
+/*define for COUNT_IN_32B_XFER word*/
+/*define for count field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_offset 6
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_mask 0x0001FFFF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_COUNT(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_PTEPDE_RMW packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_op_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_RMW_HEADER_op_shift 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_OP(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_op_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_shift 8
+#define SDMA_PKT_PTEPDE_RMW_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_shift)
+
+/*define for mtype field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_mtype_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_mtype_mask 0x00000007
+#define SDMA_PKT_PTEPDE_RMW_HEADER_mtype_shift 16
+#define SDMA_PKT_PTEPDE_RMW_HEADER_MTYPE(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_mtype_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_mtype_shift)
+
+/*define for gcc field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gcc_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gcc_mask 0x00000001
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gcc_shift 19
+#define SDMA_PKT_PTEPDE_RMW_HEADER_GCC(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_gcc_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_gcc_shift)
+
+/*define for sys field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sys_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sys_mask 0x00000001
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sys_shift 20
+#define SDMA_PKT_PTEPDE_RMW_HEADER_SYS(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_sys_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_sys_shift)
+
+/*define for snp field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_snp_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_snp_mask 0x00000001
+#define SDMA_PKT_PTEPDE_RMW_HEADER_snp_shift 22
+#define SDMA_PKT_PTEPDE_RMW_HEADER_SNP(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_snp_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_snp_shift)
+
+/*define for gpa field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gpa_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gpa_mask 0x00000001
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gpa_shift 23
+#define SDMA_PKT_PTEPDE_RMW_HEADER_GPA(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_gpa_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_gpa_shift)
+
+/*define for l2_policy field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_mask 0x00000003
+#define SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_shift 24
+#define SDMA_PKT_PTEPDE_RMW_HEADER_L2_POLICY(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_shift)
+
+/*define for llc_policy field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_llc_policy_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_llc_policy_mask 0x00000001
+#define SDMA_PKT_PTEPDE_RMW_HEADER_llc_policy_shift 26
+#define SDMA_PKT_PTEPDE_RMW_HEADER_LLC_POLICY(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_llc_policy_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_llc_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_cpv_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_PTEPDE_RMW_HEADER_cpv_shift 28
+#define SDMA_PKT_PTEPDE_RMW_HEADER_CPV(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_cpv_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_cpv_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_mask) << SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_mask) << SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_shift)
+
+/*define for MASK_LO word*/
+/*define for mask_31_0 field*/
+#define SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_offset 3
+#define SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_shift 0
+#define SDMA_PKT_PTEPDE_RMW_MASK_LO_MASK_31_0(x) (((x) & SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_mask) << SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_shift)
+
+/*define for MASK_HI word*/
+/*define for mask_63_32 field*/
+#define SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_offset 4
+#define SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_shift 0
+#define SDMA_PKT_PTEPDE_RMW_MASK_HI_MASK_63_32(x) (((x) & SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_mask) << SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_shift)
+
+/*define for VALUE_LO word*/
+/*define for value_31_0 field*/
+#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_offset 5
+#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_shift 0
+#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_VALUE_31_0(x) (((x) & SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_mask) << SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_shift)
+
+/*define for VALUE_HI word*/
+/*define for value_63_32 field*/
+#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_offset 6
+#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_shift 0
+#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_VALUE_63_32(x) (((x) & SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_mask) << SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_shift)
+
+/*define for COUNT word*/
+/*define for num_of_pte field*/
+#define SDMA_PKT_PTEPDE_RMW_COUNT_num_of_pte_offset 7
+#define SDMA_PKT_PTEPDE_RMW_COUNT_num_of_pte_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_COUNT_num_of_pte_shift 0
+#define SDMA_PKT_PTEPDE_RMW_COUNT_NUM_OF_PTE(x) (((x) & SDMA_PKT_PTEPDE_RMW_COUNT_num_of_pte_mask) << SDMA_PKT_PTEPDE_RMW_COUNT_num_of_pte_shift)
+
+
+/*
+** Definitions for SDMA_PKT_REGISTER_RMW packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_REGISTER_RMW_HEADER_op_offset 0
+#define SDMA_PKT_REGISTER_RMW_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_REGISTER_RMW_HEADER_op_shift 0
+#define SDMA_PKT_REGISTER_RMW_HEADER_OP(x) (((x) & SDMA_PKT_REGISTER_RMW_HEADER_op_mask) << SDMA_PKT_REGISTER_RMW_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_REGISTER_RMW_HEADER_sub_op_offset 0
+#define SDMA_PKT_REGISTER_RMW_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_REGISTER_RMW_HEADER_sub_op_shift 8
+#define SDMA_PKT_REGISTER_RMW_HEADER_SUB_OP(x) (((x) & SDMA_PKT_REGISTER_RMW_HEADER_sub_op_mask) << SDMA_PKT_REGISTER_RMW_HEADER_sub_op_shift)
+
+/*define for ADDR word*/
+/*define for addr field*/
+#define SDMA_PKT_REGISTER_RMW_ADDR_addr_offset 1
+#define SDMA_PKT_REGISTER_RMW_ADDR_addr_mask 0x000FFFFF
+#define SDMA_PKT_REGISTER_RMW_ADDR_addr_shift 0
+#define SDMA_PKT_REGISTER_RMW_ADDR_ADDR(x) (((x) & SDMA_PKT_REGISTER_RMW_ADDR_addr_mask) << SDMA_PKT_REGISTER_RMW_ADDR_addr_shift)
+
+/*define for aperture_id field*/
+#define SDMA_PKT_REGISTER_RMW_ADDR_aperture_id_offset 1
+#define SDMA_PKT_REGISTER_RMW_ADDR_aperture_id_mask 0x00000FFF
+#define SDMA_PKT_REGISTER_RMW_ADDR_aperture_id_shift 20
+#define SDMA_PKT_REGISTER_RMW_ADDR_APERTURE_ID(x) (((x) & SDMA_PKT_REGISTER_RMW_ADDR_aperture_id_mask) << SDMA_PKT_REGISTER_RMW_ADDR_aperture_id_shift)
+
+/*define for MASK word*/
+/*define for mask field*/
+#define SDMA_PKT_REGISTER_RMW_MASK_mask_offset 2
+#define SDMA_PKT_REGISTER_RMW_MASK_mask_mask 0xFFFFFFFF
+#define SDMA_PKT_REGISTER_RMW_MASK_mask_shift 0
+#define SDMA_PKT_REGISTER_RMW_MASK_MASK(x) (((x) & SDMA_PKT_REGISTER_RMW_MASK_mask_mask) << SDMA_PKT_REGISTER_RMW_MASK_mask_shift)
+
+/*define for VALUE word*/
+/*define for value field*/
+#define SDMA_PKT_REGISTER_RMW_VALUE_value_offset 3
+#define SDMA_PKT_REGISTER_RMW_VALUE_value_mask 0xFFFFFFFF
+#define SDMA_PKT_REGISTER_RMW_VALUE_value_shift 0
+#define SDMA_PKT_REGISTER_RMW_VALUE_VALUE(x) (((x) & SDMA_PKT_REGISTER_RMW_VALUE_value_mask) << SDMA_PKT_REGISTER_RMW_VALUE_value_shift)
+
+/*define for MISC word*/
+/*define for stride field*/
+#define SDMA_PKT_REGISTER_RMW_MISC_stride_offset 4
+#define SDMA_PKT_REGISTER_RMW_MISC_stride_mask 0x000FFFFF
+#define SDMA_PKT_REGISTER_RMW_MISC_stride_shift 0
+#define SDMA_PKT_REGISTER_RMW_MISC_STRIDE(x) (((x) & SDMA_PKT_REGISTER_RMW_MISC_stride_mask) << SDMA_PKT_REGISTER_RMW_MISC_stride_shift)
+
+/*define for num_of_reg field*/
+#define SDMA_PKT_REGISTER_RMW_MISC_num_of_reg_offset 4
+#define SDMA_PKT_REGISTER_RMW_MISC_num_of_reg_mask 0x00000FFF
+#define SDMA_PKT_REGISTER_RMW_MISC_num_of_reg_shift 20
+#define SDMA_PKT_REGISTER_RMW_MISC_NUM_OF_REG(x) (((x) & SDMA_PKT_REGISTER_RMW_MISC_num_of_reg_mask) << SDMA_PKT_REGISTER_RMW_MISC_num_of_reg_shift)
+
+
+/*
+** Definitions for SDMA_PKT_WRITE_INCR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_WRITE_INCR_HEADER_op_offset 0
+#define SDMA_PKT_WRITE_INCR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_INCR_HEADER_op_shift 0
+#define SDMA_PKT_WRITE_INCR_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_INCR_HEADER_op_mask) << SDMA_PKT_WRITE_INCR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_offset 0
+#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_shift 8
+#define SDMA_PKT_WRITE_INCR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_INCR_HEADER_sub_op_mask) << SDMA_PKT_WRITE_INCR_HEADER_sub_op_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_WRITE_INCR_HEADER_cache_policy_offset 0
+#define SDMA_PKT_WRITE_INCR_HEADER_cache_policy_mask 0x00000007
+#define SDMA_PKT_WRITE_INCR_HEADER_cache_policy_shift 24
+#define SDMA_PKT_WRITE_INCR_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_WRITE_INCR_HEADER_cache_policy_mask) << SDMA_PKT_WRITE_INCR_HEADER_cache_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_WRITE_INCR_HEADER_cpv_offset 0
+#define SDMA_PKT_WRITE_INCR_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_WRITE_INCR_HEADER_cpv_shift 28
+#define SDMA_PKT_WRITE_INCR_HEADER_CPV(x) (((x) & SDMA_PKT_WRITE_INCR_HEADER_cpv_mask) << SDMA_PKT_WRITE_INCR_HEADER_cpv_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for MASK_DW0 word*/
+/*define for mask_dw0 field*/
+#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_offset 3
+#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_shift 0
+#define SDMA_PKT_WRITE_INCR_MASK_DW0_MASK_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_mask) << SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_shift)
+
+/*define for MASK_DW1 word*/
+/*define for mask_dw1 field*/
+#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_offset 4
+#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_shift 0
+#define SDMA_PKT_WRITE_INCR_MASK_DW1_MASK_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_mask) << SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_shift)
+
+/*define for INIT_DW0 word*/
+/*define for init_dw0 field*/
+#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_offset 5
+#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_shift 0
+#define SDMA_PKT_WRITE_INCR_INIT_DW0_INIT_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_mask) << SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_shift)
+
+/*define for INIT_DW1 word*/
+/*define for init_dw1 field*/
+#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_offset 6
+#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_shift 0
+#define SDMA_PKT_WRITE_INCR_INIT_DW1_INIT_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_mask) << SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_shift)
+
+/*define for INCR_DW0 word*/
+/*define for incr_dw0 field*/
+#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_offset 7
+#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_shift 0
+#define SDMA_PKT_WRITE_INCR_INCR_DW0_INCR_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_mask) << SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_shift)
+
+/*define for INCR_DW1 word*/
+/*define for incr_dw1 field*/
+#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_offset 8
+#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_shift 0
+#define SDMA_PKT_WRITE_INCR_INCR_DW1_INCR_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_mask) << SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_WRITE_INCR_COUNT_count_offset 9
+#define SDMA_PKT_WRITE_INCR_COUNT_count_mask 0x0007FFFF
+#define SDMA_PKT_WRITE_INCR_COUNT_count_shift 0
+#define SDMA_PKT_WRITE_INCR_COUNT_COUNT(x) (((x) & SDMA_PKT_WRITE_INCR_COUNT_count_mask) << SDMA_PKT_WRITE_INCR_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_INDIRECT packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_INDIRECT_HEADER_op_offset 0
+#define SDMA_PKT_INDIRECT_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_INDIRECT_HEADER_op_shift 0
+#define SDMA_PKT_INDIRECT_HEADER_OP(x) (((x) & SDMA_PKT_INDIRECT_HEADER_op_mask) << SDMA_PKT_INDIRECT_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_INDIRECT_HEADER_sub_op_offset 0
+#define SDMA_PKT_INDIRECT_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_INDIRECT_HEADER_sub_op_shift 8
+#define SDMA_PKT_INDIRECT_HEADER_SUB_OP(x) (((x) & SDMA_PKT_INDIRECT_HEADER_sub_op_mask) << SDMA_PKT_INDIRECT_HEADER_sub_op_shift)
+
+/*define for vmid field*/
+#define SDMA_PKT_INDIRECT_HEADER_vmid_offset 0
+#define SDMA_PKT_INDIRECT_HEADER_vmid_mask 0x0000000F
+#define SDMA_PKT_INDIRECT_HEADER_vmid_shift 16
+#define SDMA_PKT_INDIRECT_HEADER_VMID(x) (((x) & SDMA_PKT_INDIRECT_HEADER_vmid_mask) << SDMA_PKT_INDIRECT_HEADER_vmid_shift)
+
+/*define for priv field*/
+#define SDMA_PKT_INDIRECT_HEADER_priv_offset 0
+#define SDMA_PKT_INDIRECT_HEADER_priv_mask 0x00000001
+#define SDMA_PKT_INDIRECT_HEADER_priv_shift 31
+#define SDMA_PKT_INDIRECT_HEADER_PRIV(x) (((x) & SDMA_PKT_INDIRECT_HEADER_priv_mask) << SDMA_PKT_INDIRECT_HEADER_priv_shift)
+
+/*define for BASE_LO word*/
+/*define for ib_base_31_0 field*/
+#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_offset 1
+#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_shift 0
+#define SDMA_PKT_INDIRECT_BASE_LO_IB_BASE_31_0(x) (((x) & SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_mask) << SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_shift)
+
+/*define for BASE_HI word*/
+/*define for ib_base_63_32 field*/
+#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_offset 2
+#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_shift 0
+#define SDMA_PKT_INDIRECT_BASE_HI_IB_BASE_63_32(x) (((x) & SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_mask) << SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_shift)
+
+/*define for IB_SIZE word*/
+/*define for ib_size field*/
+#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_offset 3
+#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_mask 0x000FFFFF
+#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_shift 0
+#define SDMA_PKT_INDIRECT_IB_SIZE_IB_SIZE(x) (((x) & SDMA_PKT_INDIRECT_IB_SIZE_ib_size_mask) << SDMA_PKT_INDIRECT_IB_SIZE_ib_size_shift)
+
+/*define for CSA_ADDR_LO word*/
+/*define for csa_addr_31_0 field*/
+#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_offset 4
+#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_shift 0
+#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_CSA_ADDR_31_0(x) (((x) & SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_mask) << SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_shift)
+
+/*define for CSA_ADDR_HI word*/
+/*define for csa_addr_63_32 field*/
+#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_offset 5
+#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_shift 0
+#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_CSA_ADDR_63_32(x) (((x) & SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_mask) << SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_SEMAPHORE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_op_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_SEMAPHORE_HEADER_op_shift 0
+#define SDMA_PKT_SEMAPHORE_HEADER_OP(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_op_mask) << SDMA_PKT_SEMAPHORE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_shift 8
+#define SDMA_PKT_SEMAPHORE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_sub_op_mask) << SDMA_PKT_SEMAPHORE_HEADER_sub_op_shift)
+
+/*define for write_one field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_write_one_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_write_one_mask 0x00000001
+#define SDMA_PKT_SEMAPHORE_HEADER_write_one_shift 29
+#define SDMA_PKT_SEMAPHORE_HEADER_WRITE_ONE(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_write_one_mask) << SDMA_PKT_SEMAPHORE_HEADER_write_one_shift)
+
+/*define for signal field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_signal_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_signal_mask 0x00000001
+#define SDMA_PKT_SEMAPHORE_HEADER_signal_shift 30
+#define SDMA_PKT_SEMAPHORE_HEADER_SIGNAL(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_signal_mask) << SDMA_PKT_SEMAPHORE_HEADER_signal_shift)
+
+/*define for mailbox field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_mask 0x00000001
+#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_shift 31
+#define SDMA_PKT_SEMAPHORE_HEADER_MAILBOX(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_mailbox_mask) << SDMA_PKT_SEMAPHORE_HEADER_mailbox_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_SEMAPHORE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_SEMAPHORE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_MEM_INCR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_MEM_INCR_HEADER_op_offset 0
+#define SDMA_PKT_MEM_INCR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_MEM_INCR_HEADER_op_shift 0
+#define SDMA_PKT_MEM_INCR_HEADER_OP(x) (((x) & SDMA_PKT_MEM_INCR_HEADER_op_mask) << SDMA_PKT_MEM_INCR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_MEM_INCR_HEADER_sub_op_offset 0
+#define SDMA_PKT_MEM_INCR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_MEM_INCR_HEADER_sub_op_shift 8
+#define SDMA_PKT_MEM_INCR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_MEM_INCR_HEADER_sub_op_mask) << SDMA_PKT_MEM_INCR_HEADER_sub_op_shift)
+
+/*define for l2_policy field*/
+#define SDMA_PKT_MEM_INCR_HEADER_l2_policy_offset 0
+#define SDMA_PKT_MEM_INCR_HEADER_l2_policy_mask 0x00000003
+#define SDMA_PKT_MEM_INCR_HEADER_l2_policy_shift 24
+#define SDMA_PKT_MEM_INCR_HEADER_L2_POLICY(x) (((x) & SDMA_PKT_MEM_INCR_HEADER_l2_policy_mask) << SDMA_PKT_MEM_INCR_HEADER_l2_policy_shift)
+
+/*define for llc_policy field*/
+#define SDMA_PKT_MEM_INCR_HEADER_llc_policy_offset 0
+#define SDMA_PKT_MEM_INCR_HEADER_llc_policy_mask 0x00000001
+#define SDMA_PKT_MEM_INCR_HEADER_llc_policy_shift 26
+#define SDMA_PKT_MEM_INCR_HEADER_LLC_POLICY(x) (((x) & SDMA_PKT_MEM_INCR_HEADER_llc_policy_mask) << SDMA_PKT_MEM_INCR_HEADER_llc_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_MEM_INCR_HEADER_cpv_offset 0
+#define SDMA_PKT_MEM_INCR_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_MEM_INCR_HEADER_cpv_shift 28
+#define SDMA_PKT_MEM_INCR_HEADER_CPV(x) (((x) & SDMA_PKT_MEM_INCR_HEADER_cpv_mask) << SDMA_PKT_MEM_INCR_HEADER_cpv_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_MEM_INCR_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_MEM_INCR_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_MEM_INCR_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_MEM_INCR_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_MEM_INCR_ADDR_LO_addr_31_0_mask) << SDMA_PKT_MEM_INCR_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_MEM_INCR_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_MEM_INCR_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_MEM_INCR_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_MEM_INCR_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_MEM_INCR_ADDR_HI_addr_63_32_mask) << SDMA_PKT_MEM_INCR_ADDR_HI_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_VM_INVALIDATION packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_VM_INVALIDATION_HEADER_op_offset 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_VM_INVALIDATION_HEADER_op_shift 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_OP(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_op_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_offset 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_shift 8
+#define SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_shift)
+
+/*define for gfx_eng_id field*/
+#define SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_offset 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_mask 0x0000001F
+#define SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_shift 16
+#define SDMA_PKT_VM_INVALIDATION_HEADER_GFX_ENG_ID(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_shift)
+
+/*define for mm_eng_id field*/
+#define SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_offset 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_mask 0x0000001F
+#define SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_shift 24
+#define SDMA_PKT_VM_INVALIDATION_HEADER_MM_ENG_ID(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_shift)
+
+/*define for INVALIDATEREQ word*/
+/*define for invalidatereq field*/
+#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_offset 1
+#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_mask 0xFFFFFFFF
+#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_shift 0
+#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_INVALIDATEREQ(x) (((x) & SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_mask) << SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_shift)
+
+/*define for ADDRESSRANGELO word*/
+/*define for addressrangelo field*/
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_offset 2
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_mask 0xFFFFFFFF
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_shift 0
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_ADDRESSRANGELO(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_shift)
+
+/*define for ADDRESSRANGEHI word*/
+/*define for invalidateack field*/
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_offset 3
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_mask 0x0000FFFF
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_shift 0
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_shift)
+
+/*define for addressrangehi field*/
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_offset 3
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_mask 0x0000001F
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_shift 16
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_ADDRESSRANGEHI(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_shift)
+
+/*define for reserved field*/
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_offset 3
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_mask 0x000001FF
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_shift 23
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_RESERVED(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_shift)
+
+
+/*
+** Definitions for SDMA_PKT_FENCE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_FENCE_HEADER_op_offset 0
+#define SDMA_PKT_FENCE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_FENCE_HEADER_op_shift 0
+#define SDMA_PKT_FENCE_HEADER_OP(x) (((x) & SDMA_PKT_FENCE_HEADER_op_mask) << SDMA_PKT_FENCE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_FENCE_HEADER_sub_op_offset 0
+#define SDMA_PKT_FENCE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_FENCE_HEADER_sub_op_shift 8
+#define SDMA_PKT_FENCE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_FENCE_HEADER_sub_op_mask) << SDMA_PKT_FENCE_HEADER_sub_op_shift)
+
+/*define for mtype field*/
+#define SDMA_PKT_FENCE_HEADER_mtype_offset 0
+#define SDMA_PKT_FENCE_HEADER_mtype_mask 0x00000007
+#define SDMA_PKT_FENCE_HEADER_mtype_shift 16
+#define SDMA_PKT_FENCE_HEADER_MTYPE(x) (((x) & SDMA_PKT_FENCE_HEADER_mtype_mask) << SDMA_PKT_FENCE_HEADER_mtype_shift)
+
+/*define for gcc field*/
+#define SDMA_PKT_FENCE_HEADER_gcc_offset 0
+#define SDMA_PKT_FENCE_HEADER_gcc_mask 0x00000001
+#define SDMA_PKT_FENCE_HEADER_gcc_shift 19
+#define SDMA_PKT_FENCE_HEADER_GCC(x) (((x) & SDMA_PKT_FENCE_HEADER_gcc_mask) << SDMA_PKT_FENCE_HEADER_gcc_shift)
+
+/*define for sys field*/
+#define SDMA_PKT_FENCE_HEADER_sys_offset 0
+#define SDMA_PKT_FENCE_HEADER_sys_mask 0x00000001
+#define SDMA_PKT_FENCE_HEADER_sys_shift 20
+#define SDMA_PKT_FENCE_HEADER_SYS(x) (((x) & SDMA_PKT_FENCE_HEADER_sys_mask) << SDMA_PKT_FENCE_HEADER_sys_shift)
+
+/*define for snp field*/
+#define SDMA_PKT_FENCE_HEADER_snp_offset 0
+#define SDMA_PKT_FENCE_HEADER_snp_mask 0x00000001
+#define SDMA_PKT_FENCE_HEADER_snp_shift 22
+#define SDMA_PKT_FENCE_HEADER_SNP(x) (((x) & SDMA_PKT_FENCE_HEADER_snp_mask) << SDMA_PKT_FENCE_HEADER_snp_shift)
+
+/*define for gpa field*/
+#define SDMA_PKT_FENCE_HEADER_gpa_offset 0
+#define SDMA_PKT_FENCE_HEADER_gpa_mask 0x00000001
+#define SDMA_PKT_FENCE_HEADER_gpa_shift 23
+#define SDMA_PKT_FENCE_HEADER_GPA(x) (((x) & SDMA_PKT_FENCE_HEADER_gpa_mask) << SDMA_PKT_FENCE_HEADER_gpa_shift)
+
+/*define for l2_policy field*/
+#define SDMA_PKT_FENCE_HEADER_l2_policy_offset 0
+#define SDMA_PKT_FENCE_HEADER_l2_policy_mask 0x00000003
+#define SDMA_PKT_FENCE_HEADER_l2_policy_shift 24
+#define SDMA_PKT_FENCE_HEADER_L2_POLICY(x) (((x) & SDMA_PKT_FENCE_HEADER_l2_policy_mask) << SDMA_PKT_FENCE_HEADER_l2_policy_shift)
+
+/*define for llc_policy field*/
+#define SDMA_PKT_FENCE_HEADER_llc_policy_offset 0
+#define SDMA_PKT_FENCE_HEADER_llc_policy_mask 0x00000001
+#define SDMA_PKT_FENCE_HEADER_llc_policy_shift 26
+#define SDMA_PKT_FENCE_HEADER_LLC_POLICY(x) (((x) & SDMA_PKT_FENCE_HEADER_llc_policy_mask) << SDMA_PKT_FENCE_HEADER_llc_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_FENCE_HEADER_cpv_offset 0
+#define SDMA_PKT_FENCE_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_FENCE_HEADER_cpv_shift 28
+#define SDMA_PKT_FENCE_HEADER_CPV(x) (((x) & SDMA_PKT_FENCE_HEADER_cpv_mask) << SDMA_PKT_FENCE_HEADER_cpv_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_FENCE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_FENCE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_FENCE_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_FENCE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_FENCE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_FENCE_ADDR_HI_addr_63_32_shift)
+
+/*define for DATA word*/
+/*define for data field*/
+#define SDMA_PKT_FENCE_DATA_data_offset 3
+#define SDMA_PKT_FENCE_DATA_data_mask 0xFFFFFFFF
+#define SDMA_PKT_FENCE_DATA_data_shift 0
+#define SDMA_PKT_FENCE_DATA_DATA(x) (((x) & SDMA_PKT_FENCE_DATA_data_mask) << SDMA_PKT_FENCE_DATA_data_shift)
+
+
+/*
+** Definitions for SDMA_PKT_SRBM_WRITE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_SRBM_WRITE_HEADER_op_offset 0
+#define SDMA_PKT_SRBM_WRITE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_SRBM_WRITE_HEADER_op_shift 0
+#define SDMA_PKT_SRBM_WRITE_HEADER_OP(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_op_mask) << SDMA_PKT_SRBM_WRITE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_offset 0
+#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_shift 8
+#define SDMA_PKT_SRBM_WRITE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_sub_op_mask) << SDMA_PKT_SRBM_WRITE_HEADER_sub_op_shift)
+
+/*define for byte_en field*/
+#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_offset 0
+#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_mask 0x0000000F
+#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_shift 28
+#define SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_byte_en_mask) << SDMA_PKT_SRBM_WRITE_HEADER_byte_en_shift)
+
+/*define for ADDR word*/
+/*define for addr field*/
+#define SDMA_PKT_SRBM_WRITE_ADDR_addr_offset 1
+#define SDMA_PKT_SRBM_WRITE_ADDR_addr_mask 0x0003FFFF
+#define SDMA_PKT_SRBM_WRITE_ADDR_addr_shift 0
+#define SDMA_PKT_SRBM_WRITE_ADDR_ADDR(x) (((x) & SDMA_PKT_SRBM_WRITE_ADDR_addr_mask) << SDMA_PKT_SRBM_WRITE_ADDR_addr_shift)
+
+/*define for apertureid field*/
+#define SDMA_PKT_SRBM_WRITE_ADDR_apertureid_offset 1
+#define SDMA_PKT_SRBM_WRITE_ADDR_apertureid_mask 0x00000FFF
+#define SDMA_PKT_SRBM_WRITE_ADDR_apertureid_shift 20
+#define SDMA_PKT_SRBM_WRITE_ADDR_APERTUREID(x) (((x) & SDMA_PKT_SRBM_WRITE_ADDR_apertureid_mask) << SDMA_PKT_SRBM_WRITE_ADDR_apertureid_shift)
+
+/*define for DATA word*/
+/*define for data field*/
+#define SDMA_PKT_SRBM_WRITE_DATA_data_offset 2
+#define SDMA_PKT_SRBM_WRITE_DATA_data_mask 0xFFFFFFFF
+#define SDMA_PKT_SRBM_WRITE_DATA_data_shift 0
+#define SDMA_PKT_SRBM_WRITE_DATA_DATA(x) (((x) & SDMA_PKT_SRBM_WRITE_DATA_data_mask) << SDMA_PKT_SRBM_WRITE_DATA_data_shift)
+
+
+/*
+** Definitions for SDMA_PKT_PRE_EXE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_PRE_EXE_HEADER_op_offset 0
+#define SDMA_PKT_PRE_EXE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_PRE_EXE_HEADER_op_shift 0
+#define SDMA_PKT_PRE_EXE_HEADER_OP(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_op_mask) << SDMA_PKT_PRE_EXE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_PRE_EXE_HEADER_sub_op_offset 0
+#define SDMA_PKT_PRE_EXE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_PRE_EXE_HEADER_sub_op_shift 8
+#define SDMA_PKT_PRE_EXE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_sub_op_mask) << SDMA_PKT_PRE_EXE_HEADER_sub_op_shift)
+
+/*define for dev_sel field*/
+#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_offset 0
+#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_mask 0x000000FF
+#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_shift 16
+#define SDMA_PKT_PRE_EXE_HEADER_DEV_SEL(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_dev_sel_mask) << SDMA_PKT_PRE_EXE_HEADER_dev_sel_shift)
+
+/*define for EXEC_COUNT word*/
+/*define for exec_count field*/
+#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_offset 1
+#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_mask 0x00003FFF
+#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_shift 0
+#define SDMA_PKT_PRE_EXE_EXEC_COUNT_EXEC_COUNT(x) (((x) & SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_mask) << SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COND_EXE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COND_EXE_HEADER_op_offset 0
+#define SDMA_PKT_COND_EXE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COND_EXE_HEADER_op_shift 0
+#define SDMA_PKT_COND_EXE_HEADER_OP(x) (((x) & SDMA_PKT_COND_EXE_HEADER_op_mask) << SDMA_PKT_COND_EXE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COND_EXE_HEADER_sub_op_offset 0
+#define SDMA_PKT_COND_EXE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COND_EXE_HEADER_sub_op_shift 8
+#define SDMA_PKT_COND_EXE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COND_EXE_HEADER_sub_op_mask) << SDMA_PKT_COND_EXE_HEADER_sub_op_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_COND_EXE_HEADER_cache_policy_offset 0
+#define SDMA_PKT_COND_EXE_HEADER_cache_policy_mask 0x00000007
+#define SDMA_PKT_COND_EXE_HEADER_cache_policy_shift 24
+#define SDMA_PKT_COND_EXE_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_COND_EXE_HEADER_cache_policy_mask) << SDMA_PKT_COND_EXE_HEADER_cache_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_COND_EXE_HEADER_cpv_offset 0
+#define SDMA_PKT_COND_EXE_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_COND_EXE_HEADER_cpv_shift 28
+#define SDMA_PKT_COND_EXE_HEADER_CPV(x) (((x) & SDMA_PKT_COND_EXE_HEADER_cpv_mask) << SDMA_PKT_COND_EXE_HEADER_cpv_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_COND_EXE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_COND_EXE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_shift)
+
+/*define for REFERENCE word*/
+/*define for reference field*/
+#define SDMA_PKT_COND_EXE_REFERENCE_reference_offset 3
+#define SDMA_PKT_COND_EXE_REFERENCE_reference_mask 0xFFFFFFFF
+#define SDMA_PKT_COND_EXE_REFERENCE_reference_shift 0
+#define SDMA_PKT_COND_EXE_REFERENCE_REFERENCE(x) (((x) & SDMA_PKT_COND_EXE_REFERENCE_reference_mask) << SDMA_PKT_COND_EXE_REFERENCE_reference_shift)
+
+/*define for EXEC_COUNT word*/
+/*define for exec_count field*/
+#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_offset 4
+#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_mask 0x00003FFF
+#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_shift 0
+#define SDMA_PKT_COND_EXE_EXEC_COUNT_EXEC_COUNT(x) (((x) & SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_mask) << SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_CONSTANT_FILL packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_op_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_CONSTANT_FILL_HEADER_op_shift 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_OP(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_op_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_shift 8
+#define SDMA_PKT_CONSTANT_FILL_HEADER_SUB_OP(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_shift)
+
+/*define for sw field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_mask 0x00000003
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_shift 16
+#define SDMA_PKT_CONSTANT_FILL_HEADER_SW(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_sw_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_sw_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_cache_policy_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_cache_policy_mask 0x00000007
+#define SDMA_PKT_CONSTANT_FILL_HEADER_cache_policy_shift 24
+#define SDMA_PKT_CONSTANT_FILL_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_cache_policy_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_cache_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_cpv_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_CONSTANT_FILL_HEADER_cpv_shift 28
+#define SDMA_PKT_CONSTANT_FILL_HEADER_CPV(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_cpv_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_cpv_shift)
+
+/*define for fillsize field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_mask 0x00000003
+#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_shift 30
+#define SDMA_PKT_CONSTANT_FILL_HEADER_FILLSIZE(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DATA word*/
+/*define for src_data_31_0 field*/
+#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_offset 3
+#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_shift 0
+#define SDMA_PKT_CONSTANT_FILL_DATA_SRC_DATA_31_0(x) (((x) & SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_mask) << SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_CONSTANT_FILL_COUNT_count_offset 4
+#define SDMA_PKT_CONSTANT_FILL_COUNT_count_mask 0x3FFFFFFF
+#define SDMA_PKT_CONSTANT_FILL_COUNT_count_shift 0
+#define SDMA_PKT_CONSTANT_FILL_COUNT_COUNT(x) (((x) & SDMA_PKT_CONSTANT_FILL_COUNT_count_mask) << SDMA_PKT_CONSTANT_FILL_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_DATA_FILL_MULTI packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_op_offset 0
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_op_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_OP(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_op_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_offset 0
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_shift 8
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_SUB_OP(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_cache_policy_offset 0
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_cache_policy_mask 0x00000007
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_cache_policy_shift 24
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_cache_policy_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_cache_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_cpv_offset 0
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_cpv_shift 28
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_CPV(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_cpv_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_cpv_shift)
+
+/*define for memlog_clr field*/
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_offset 0
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_mask 0x00000001
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_shift 31
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_MEMLOG_CLR(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_shift)
+
+/*define for BYTE_STRIDE word*/
+/*define for byte_stride field*/
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_offset 1
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_mask 0xFFFFFFFF
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_BYTE_STRIDE(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_mask) << SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_shift)
+
+/*define for DMA_COUNT word*/
+/*define for dma_count field*/
+#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_offset 2
+#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_mask 0xFFFFFFFF
+#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_DMA_COUNT(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_mask) << SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_offset 3
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_offset 4
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for BYTE_COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_offset 5
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_mask 0x03FFFFFF
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_COUNT(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_mask) << SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_POLL_REGMEM packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_op_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_POLL_REGMEM_HEADER_op_shift 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_OP(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_op_mask) << SDMA_PKT_POLL_REGMEM_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_shift 8
+#define SDMA_PKT_POLL_REGMEM_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_sub_op_mask) << SDMA_PKT_POLL_REGMEM_HEADER_sub_op_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_cache_policy_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_cache_policy_mask 0x00000007
+#define SDMA_PKT_POLL_REGMEM_HEADER_cache_policy_shift 20
+#define SDMA_PKT_POLL_REGMEM_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_cache_policy_mask) << SDMA_PKT_POLL_REGMEM_HEADER_cache_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_cpv_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_POLL_REGMEM_HEADER_cpv_shift 24
+#define SDMA_PKT_POLL_REGMEM_HEADER_CPV(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_cpv_mask) << SDMA_PKT_POLL_REGMEM_HEADER_cpv_shift)
+
+/*define for hdp_flush field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_mask 0x00000001
+#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_shift 26
+#define SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_mask) << SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_shift)
+
+/*define for func field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_func_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_func_mask 0x00000007
+#define SDMA_PKT_POLL_REGMEM_HEADER_func_shift 28
+#define SDMA_PKT_POLL_REGMEM_HEADER_FUNC(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_func_mask) << SDMA_PKT_POLL_REGMEM_HEADER_func_shift)
+
+/*define for mem_poll field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_mask 0x00000001
+#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_shift 31
+#define SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_mask) << SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_POLL_REGMEM_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_mask) << SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_POLL_REGMEM_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_mask) << SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_shift)
+
+/*define for VALUE word*/
+/*define for value field*/
+#define SDMA_PKT_POLL_REGMEM_VALUE_value_offset 3
+#define SDMA_PKT_POLL_REGMEM_VALUE_value_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REGMEM_VALUE_value_shift 0
+#define SDMA_PKT_POLL_REGMEM_VALUE_VALUE(x) (((x) & SDMA_PKT_POLL_REGMEM_VALUE_value_mask) << SDMA_PKT_POLL_REGMEM_VALUE_value_shift)
+
+/*define for MASK word*/
+/*define for mask field*/
+#define SDMA_PKT_POLL_REGMEM_MASK_mask_offset 4
+#define SDMA_PKT_POLL_REGMEM_MASK_mask_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REGMEM_MASK_mask_shift 0
+#define SDMA_PKT_POLL_REGMEM_MASK_MASK(x) (((x) & SDMA_PKT_POLL_REGMEM_MASK_mask_mask) << SDMA_PKT_POLL_REGMEM_MASK_mask_shift)
+
+/*define for DW5 word*/
+/*define for interval field*/
+#define SDMA_PKT_POLL_REGMEM_DW5_interval_offset 5
+#define SDMA_PKT_POLL_REGMEM_DW5_interval_mask 0x0000FFFF
+#define SDMA_PKT_POLL_REGMEM_DW5_interval_shift 0
+#define SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(x) (((x) & SDMA_PKT_POLL_REGMEM_DW5_interval_mask) << SDMA_PKT_POLL_REGMEM_DW5_interval_shift)
+
+/*define for retry_count field*/
+#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_offset 5
+#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_mask 0x00000FFF
+#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_shift 16
+#define SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(x) (((x) & SDMA_PKT_POLL_REGMEM_DW5_retry_count_mask) << SDMA_PKT_POLL_REGMEM_DW5_retry_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_POLL_REG_WRITE_MEM packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_offset 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_shift 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_OP(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_offset 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_shift 8
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cache_policy_offset 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cache_policy_mask 0x00000007
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cache_policy_shift 24
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cache_policy_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cache_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cpv_offset 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cpv_shift 28
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_CPV(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cpv_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_cpv_shift)
+
+/*define for SRC_ADDR word*/
+/*define for addr_31_2 field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_offset 1
+#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_mask 0x3FFFFFFF
+#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_shift 2
+#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_ADDR_31_2(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_offset 2
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_offset 3
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_POLL_DBIT_WRITE_MEM packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_offset 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_shift 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_OP(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_offset 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_shift 8
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_shift)
+
+/*define for ea field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_offset 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_mask 0x00000003
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_shift 16
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_EA(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cache_policy_offset 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cache_policy_mask 0x00000007
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cache_policy_shift 24
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cache_policy_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cache_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cpv_offset 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cpv_shift 28
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_CPV(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cpv_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_cpv_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift)
+
+/*define for START_PAGE word*/
+/*define for addr_31_4 field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_offset 3
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_mask 0x0FFFFFFF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_shift 4
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_ADDR_31_4(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_shift)
+
+/*define for PAGE_NUM word*/
+/*define for page_num_31_0 field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_offset 4
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_shift 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_PAGE_NUM_31_0(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_shift)
+
+
+/*
+** Definitions for SDMA_PKT_POLL_MEM_VERIFY packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_offset 0
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_OP(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_offset 0
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_shift 8
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_cache_policy_offset 0
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_cache_policy_mask 0x00000007
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_cache_policy_shift 24
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_cache_policy_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_cache_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_cpv_offset 0
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_cpv_shift 28
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_CPV(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_cpv_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_cpv_shift)
+
+/*define for mode field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_offset 0
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_mask 0x00000001
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_shift 31
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_MODE(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_shift)
+
+/*define for PATTERN word*/
+/*define for pattern field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_offset 1
+#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_PATTERN(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_mask) << SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_shift)
+
+/*define for CMP0_ADDR_START_LO word*/
+/*define for cmp0_start_31_0 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_offset 2
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_CMP0_START_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_shift)
+
+/*define for CMP0_ADDR_START_HI word*/
+/*define for cmp0_start_63_32 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_offset 3
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_CMP0_START_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_shift)
+
+/*define for CMP0_ADDR_END_LO word*/
+/*define for cmp0_end_31_0 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp0_end_31_0_offset 4
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp0_end_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp0_end_31_0_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_CMP0_END_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp0_end_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp0_end_31_0_shift)
+
+/*define for CMP0_ADDR_END_HI word*/
+/*define for cmp0_end_63_32 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp0_end_63_32_offset 5
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp0_end_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp0_end_63_32_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_CMP0_END_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp0_end_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp0_end_63_32_shift)
+
+/*define for CMP1_ADDR_START_LO word*/
+/*define for cmp1_start_31_0 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_offset 6
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_CMP1_START_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_shift)
+
+/*define for CMP1_ADDR_START_HI word*/
+/*define for cmp1_start_63_32 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_offset 7
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_CMP1_START_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_shift)
+
+/*define for CMP1_ADDR_END_LO word*/
+/*define for cmp1_end_31_0 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_offset 8
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_CMP1_END_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_shift)
+
+/*define for CMP1_ADDR_END_HI word*/
+/*define for cmp1_end_63_32 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_offset 9
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_CMP1_END_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_shift)
+
+/*define for REC_ADDR_LO word*/
+/*define for rec_31_0 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_offset 10
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_REC_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_shift)
+
+/*define for REC_ADDR_HI word*/
+/*define for rec_63_32 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_offset 11
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_REC_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_shift)
+
+/*define for RESERVED word*/
+/*define for reserved field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_offset 12
+#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_RESERVED(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_mask) << SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_shift)
+
+
+/*
+** Definitions for SDMA_PKT_ATOMIC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_ATOMIC_HEADER_op_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_ATOMIC_HEADER_op_shift 0
+#define SDMA_PKT_ATOMIC_HEADER_OP(x) (((x) & SDMA_PKT_ATOMIC_HEADER_op_mask) << SDMA_PKT_ATOMIC_HEADER_op_shift)
+
+/*define for loop field*/
+#define SDMA_PKT_ATOMIC_HEADER_loop_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_loop_mask 0x00000001
+#define SDMA_PKT_ATOMIC_HEADER_loop_shift 16
+#define SDMA_PKT_ATOMIC_HEADER_LOOP(x) (((x) & SDMA_PKT_ATOMIC_HEADER_loop_mask) << SDMA_PKT_ATOMIC_HEADER_loop_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_ATOMIC_HEADER_tmz_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_ATOMIC_HEADER_tmz_shift 18
+#define SDMA_PKT_ATOMIC_HEADER_TMZ(x) (((x) & SDMA_PKT_ATOMIC_HEADER_tmz_mask) << SDMA_PKT_ATOMIC_HEADER_tmz_shift)
+
+/*define for cache_policy field*/
+#define SDMA_PKT_ATOMIC_HEADER_cache_policy_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_cache_policy_mask 0x00000007
+#define SDMA_PKT_ATOMIC_HEADER_cache_policy_shift 20
+#define SDMA_PKT_ATOMIC_HEADER_CACHE_POLICY(x) (((x) & SDMA_PKT_ATOMIC_HEADER_cache_policy_mask) << SDMA_PKT_ATOMIC_HEADER_cache_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_ATOMIC_HEADER_cpv_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_ATOMIC_HEADER_cpv_shift 24
+#define SDMA_PKT_ATOMIC_HEADER_CPV(x) (((x) & SDMA_PKT_ATOMIC_HEADER_cpv_mask) << SDMA_PKT_ATOMIC_HEADER_cpv_shift)
+
+/*define for atomic_op field*/
+#define SDMA_PKT_ATOMIC_HEADER_atomic_op_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_atomic_op_mask 0x0000007F
+#define SDMA_PKT_ATOMIC_HEADER_atomic_op_shift 25
+#define SDMA_PKT_ATOMIC_HEADER_ATOMIC_OP(x) (((x) & SDMA_PKT_ATOMIC_HEADER_atomic_op_mask) << SDMA_PKT_ATOMIC_HEADER_atomic_op_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_ATOMIC_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_mask) << SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_ATOMIC_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_mask) << SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_shift)
+
+/*define for SRC_DATA_LO word*/
+/*define for src_data_31_0 field*/
+#define SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_offset 3
+#define SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_shift 0
+#define SDMA_PKT_ATOMIC_SRC_DATA_LO_SRC_DATA_31_0(x) (((x) & SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_mask) << SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_shift)
+
+/*define for SRC_DATA_HI word*/
+/*define for src_data_63_32 field*/
+#define SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_offset 4
+#define SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_shift 0
+#define SDMA_PKT_ATOMIC_SRC_DATA_HI_SRC_DATA_63_32(x) (((x) & SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_mask) << SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_shift)
+
+/*define for CMP_DATA_LO word*/
+/*define for cmp_data_31_0 field*/
+#define SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_offset 5
+#define SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_shift 0
+#define SDMA_PKT_ATOMIC_CMP_DATA_LO_CMP_DATA_31_0(x) (((x) & SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_mask) << SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_shift)
+
+/*define for CMP_DATA_HI word*/
+/*define for cmp_data_63_32 field*/
+#define SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_offset 6
+#define SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_shift 0
+#define SDMA_PKT_ATOMIC_CMP_DATA_HI_CMP_DATA_63_32(x) (((x) & SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_mask) << SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_shift)
+
+/*define for LOOP_INTERVAL word*/
+/*define for loop_interval field*/
+#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_offset 7
+#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_mask 0x00001FFF
+#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_shift 0
+#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_LOOP_INTERVAL(x) (((x) & SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_mask) << SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_shift)
+
+
+/*
+** Definitions for SDMA_PKT_TIMESTAMP_SET packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_offset 0
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_shift 0
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_SET_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_SET_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_offset 0
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_shift 8
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_shift)
+
+/*define for INIT_DATA_LO word*/
+/*define for init_data_31_0 field*/
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_offset 1
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_shift 0
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_INIT_DATA_31_0(x) (((x) & SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_mask) << SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_shift)
+
+/*define for INIT_DATA_HI word*/
+/*define for init_data_63_32 field*/
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_offset 2
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_shift 0
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_INIT_DATA_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_mask) << SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_TIMESTAMP_GET packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_shift 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_shift 8
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_shift)
+
+/*define for l2_policy field*/
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_l2_policy_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_l2_policy_mask 0x00000003
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_l2_policy_shift 24
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_L2_POLICY(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_l2_policy_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_l2_policy_shift)
+
+/*define for llc_policy field*/
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_llc_policy_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_llc_policy_mask 0x00000001
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_llc_policy_shift 26
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_LLC_POLICY(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_llc_policy_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_llc_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_cpv_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_cpv_shift 28
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_CPV(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_cpv_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_cpv_shift)
+
+/*define for WRITE_ADDR_LO word*/
+/*define for write_addr_31_3 field*/
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_offset 1
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_mask 0x1FFFFFFF
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_shift 3
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_WRITE_ADDR_31_3(x) (((x) & SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_mask) << SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_shift)
+
+/*define for WRITE_ADDR_HI word*/
+/*define for write_addr_63_32 field*/
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_offset 2
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_shift 0
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_WRITE_ADDR_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_mask) << SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_TIMESTAMP_GET_GLOBAL packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_shift 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_shift 8
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_shift)
+
+/*define for l2_policy field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_l2_policy_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_l2_policy_mask 0x00000003
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_l2_policy_shift 24
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_L2_POLICY(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_l2_policy_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_l2_policy_shift)
+
+/*define for llc_policy field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_llc_policy_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_llc_policy_mask 0x00000001
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_llc_policy_shift 26
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_LLC_POLICY(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_llc_policy_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_llc_policy_shift)
+
+/*define for cpv field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_cpv_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_cpv_mask 0x00000001
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_cpv_shift 28
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_CPV(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_cpv_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_cpv_shift)
+
+/*define for WRITE_ADDR_LO word*/
+/*define for write_addr_31_3 field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_offset 1
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_mask 0x1FFFFFFF
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_shift 3
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_WRITE_ADDR_31_3(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_shift)
+
+/*define for WRITE_ADDR_HI word*/
+/*define for write_addr_63_32 field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_offset 2
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_shift 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_WRITE_ADDR_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_TRAP packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_TRAP_HEADER_op_offset 0
+#define SDMA_PKT_TRAP_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_TRAP_HEADER_op_shift 0
+#define SDMA_PKT_TRAP_HEADER_OP(x) (((x) & SDMA_PKT_TRAP_HEADER_op_mask) << SDMA_PKT_TRAP_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_TRAP_HEADER_sub_op_offset 0
+#define SDMA_PKT_TRAP_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_TRAP_HEADER_sub_op_shift 8
+#define SDMA_PKT_TRAP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TRAP_HEADER_sub_op_mask) << SDMA_PKT_TRAP_HEADER_sub_op_shift)
+
+/*define for INT_CONTEXT word*/
+/*define for int_context field*/
+#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_offset 1
+#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_mask 0x0FFFFFFF
+#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_shift 0
+#define SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(x) (((x) & SDMA_PKT_TRAP_INT_CONTEXT_int_context_mask) << SDMA_PKT_TRAP_INT_CONTEXT_int_context_shift)
+
+
+/*
+** Definitions for SDMA_PKT_DUMMY_TRAP packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_DUMMY_TRAP_HEADER_op_offset 0
+#define SDMA_PKT_DUMMY_TRAP_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_DUMMY_TRAP_HEADER_op_shift 0
+#define SDMA_PKT_DUMMY_TRAP_HEADER_OP(x) (((x) & SDMA_PKT_DUMMY_TRAP_HEADER_op_mask) << SDMA_PKT_DUMMY_TRAP_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_offset 0
+#define SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_shift 8
+#define SDMA_PKT_DUMMY_TRAP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_mask) << SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_shift)
+
+/*define for INT_CONTEXT word*/
+/*define for int_context field*/
+#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_offset 1
+#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_mask 0x0FFFFFFF
+#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_shift 0
+#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_INT_CONTEXT(x) (((x) & SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_mask) << SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_shift)
+
+
+/*
+** Definitions for SDMA_PKT_GPUVM_INV packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_GPUVM_INV_HEADER_op_offset 0
+#define SDMA_PKT_GPUVM_INV_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_GPUVM_INV_HEADER_op_shift 0
+#define SDMA_PKT_GPUVM_INV_HEADER_OP(x) (((x) & SDMA_PKT_GPUVM_INV_HEADER_op_mask) << SDMA_PKT_GPUVM_INV_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_GPUVM_INV_HEADER_sub_op_offset 0
+#define SDMA_PKT_GPUVM_INV_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_GPUVM_INV_HEADER_sub_op_shift 8
+#define SDMA_PKT_GPUVM_INV_HEADER_SUB_OP(x) (((x) & SDMA_PKT_GPUVM_INV_HEADER_sub_op_mask) << SDMA_PKT_GPUVM_INV_HEADER_sub_op_shift)
+
+/*define for PAYLOAD1 word*/
+/*define for per_vmid_inv_req field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_mask 0x0000FFFF
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_shift 0
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_PER_VMID_INV_REQ(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_shift)
+
+/*define for flush_type field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_mask 0x00000007
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_shift 16
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_FLUSH_TYPE(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_shift)
+
+/*define for l2_ptes field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_shift 19
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L2_PTES(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_shift)
+
+/*define for l2_pde0 field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_shift 20
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L2_PDE0(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_shift)
+
+/*define for l2_pde1 field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_shift 21
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L2_PDE1(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_shift)
+
+/*define for l2_pde2 field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_shift 22
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L2_PDE2(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_shift)
+
+/*define for l1_ptes field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_shift 23
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L1_PTES(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_shift)
+
+/*define for clr_protection_fault_status_addr field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_shift 24
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_CLR_PROTECTION_FAULT_STATUS_ADDR(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_shift)
+
+/*define for log_request field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_shift 25
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_LOG_REQUEST(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_shift)
+
+/*define for four_kilobytes field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_shift 26
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_FOUR_KILOBYTES(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_shift)
+
+/*define for PAYLOAD2 word*/
+/*define for s field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_s_offset 2
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_s_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_s_shift 0
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_S(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD2_s_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD2_s_shift)
+
+/*define for page_va_42_12 field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_offset 2
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_mask 0x7FFFFFFF
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_shift 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_PAGE_VA_42_12(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_shift)
+
+/*define for PAYLOAD3 word*/
+/*define for page_va_47_43 field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_offset 3
+#define SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_mask 0x0000003F
+#define SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_shift 0
+#define SDMA_PKT_GPUVM_INV_PAYLOAD3_PAGE_VA_47_43(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_shift)
+
+
+/*
+** Definitions for SDMA_PKT_GCR_REQ packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_GCR_REQ_HEADER_op_offset 0
+#define SDMA_PKT_GCR_REQ_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_GCR_REQ_HEADER_op_shift 0
+#define SDMA_PKT_GCR_REQ_HEADER_OP(x) (((x) & SDMA_PKT_GCR_REQ_HEADER_op_mask) << SDMA_PKT_GCR_REQ_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_GCR_REQ_HEADER_sub_op_offset 0
+#define SDMA_PKT_GCR_REQ_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_GCR_REQ_HEADER_sub_op_shift 8
+#define SDMA_PKT_GCR_REQ_HEADER_SUB_OP(x) (((x) & SDMA_PKT_GCR_REQ_HEADER_sub_op_mask) << SDMA_PKT_GCR_REQ_HEADER_sub_op_shift)
+
+/*define for PAYLOAD1 word*/
+/*define for base_va_31_7 field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_offset 1
+#define SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_mask 0x01FFFFFF
+#define SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_shift 7
+#define SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_mask) << SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_shift)
+
+/*define for PAYLOAD2 word*/
+/*define for base_va_56_32 field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_56_32_offset 2
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_56_32_mask 0x00FFFFFF
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_56_32_shift 0
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_56_32(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_56_32_mask) << SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_56_32_shift)
+
+/*define for PAYLOAD3 word*/
+/*define for gcr_control_18_0 field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_0_offset 3
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_0_mask 0x0007FFFF
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_0_shift 0
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_0(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_0_mask) << SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_0_shift)
+
+/*define for limit_va_15_7 field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_15_7_offset 3
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_15_7_mask 0x000001FF
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_15_7_shift 23
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_15_7(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_15_7_mask) << SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_15_7_shift)
+
+/*define for PAYLOAD4 word*/
+/*define for limit_va_47_16 field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_16_offset 4
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_16_mask 0xFFFFFFFF
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_16_shift 0
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_16(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_16_mask) << SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_16_shift)
+
+/*define for PAYLOAD5 word*/
+/*define for limit_va_56_48 field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD5_limit_va_56_48_offset 5
+#define SDMA_PKT_GCR_REQ_PAYLOAD5_limit_va_56_48_mask 0x000001FF
+#define SDMA_PKT_GCR_REQ_PAYLOAD5_limit_va_56_48_shift 0
+#define SDMA_PKT_GCR_REQ_PAYLOAD5_LIMIT_VA_56_48(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD5_limit_va_56_48_mask) << SDMA_PKT_GCR_REQ_PAYLOAD5_limit_va_56_48_shift)
+
+/*define for vmid field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD5_vmid_offset 5
+#define SDMA_PKT_GCR_REQ_PAYLOAD5_vmid_mask 0x0000000F
+#define SDMA_PKT_GCR_REQ_PAYLOAD5_vmid_shift 26
+#define SDMA_PKT_GCR_REQ_PAYLOAD5_VMID(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD5_vmid_mask) << SDMA_PKT_GCR_REQ_PAYLOAD5_vmid_shift)
+
+
+/*
+** Definitions for SDMA_PKT_NOP packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_NOP_HEADER_op_offset 0
+#define SDMA_PKT_NOP_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_NOP_HEADER_op_shift 0
+#define SDMA_PKT_NOP_HEADER_OP(x) (((x) & SDMA_PKT_NOP_HEADER_op_mask) << SDMA_PKT_NOP_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_NOP_HEADER_sub_op_offset 0
+#define SDMA_PKT_NOP_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_NOP_HEADER_sub_op_shift 8
+#define SDMA_PKT_NOP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_NOP_HEADER_sub_op_mask) << SDMA_PKT_NOP_HEADER_sub_op_shift)
+
+/*define for count field*/
+#define SDMA_PKT_NOP_HEADER_count_offset 0
+#define SDMA_PKT_NOP_HEADER_count_mask 0x00003FFF
+#define SDMA_PKT_NOP_HEADER_count_shift 16
+#define SDMA_PKT_NOP_HEADER_COUNT(x) (((x) & SDMA_PKT_NOP_HEADER_count_mask) << SDMA_PKT_NOP_HEADER_count_shift)
+
+/*define for DATA0 word*/
+/*define for data0 field*/
+#define SDMA_PKT_NOP_DATA0_data0_offset 1
+#define SDMA_PKT_NOP_DATA0_data0_mask 0xFFFFFFFF
+#define SDMA_PKT_NOP_DATA0_data0_shift 0
+#define SDMA_PKT_NOP_DATA0_DATA0(x) (((x) & SDMA_PKT_NOP_DATA0_data0_mask) << SDMA_PKT_NOP_DATA0_data0_shift)
+
+
+/*
+** Definitions for SDMA_AQL_PKT_HEADER packet
+*/
+
+/*define for HEADER word*/
+/*define for format field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_format_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_format_mask 0x000000FF
+#define SDMA_AQL_PKT_HEADER_HEADER_format_shift 0
+#define SDMA_AQL_PKT_HEADER_HEADER_FORMAT(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_format_mask) << SDMA_AQL_PKT_HEADER_HEADER_format_shift)
+
+/*define for barrier field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_barrier_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_barrier_mask 0x00000001
+#define SDMA_AQL_PKT_HEADER_HEADER_barrier_shift 8
+#define SDMA_AQL_PKT_HEADER_HEADER_BARRIER(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_barrier_mask) << SDMA_AQL_PKT_HEADER_HEADER_barrier_shift)
+
+/*define for acquire_fence_scope field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_shift 9
+#define SDMA_AQL_PKT_HEADER_HEADER_ACQUIRE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_mask) << SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_shift)
+
+/*define for release_fence_scope field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_shift 11
+#define SDMA_AQL_PKT_HEADER_HEADER_RELEASE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_mask) << SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_shift)
+
+/*define for reserved field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_reserved_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_reserved_mask 0x00000007
+#define SDMA_AQL_PKT_HEADER_HEADER_reserved_shift 13
+#define SDMA_AQL_PKT_HEADER_HEADER_RESERVED(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_reserved_mask) << SDMA_AQL_PKT_HEADER_HEADER_reserved_shift)
+
+/*define for op field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_op_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_op_mask 0x0000000F
+#define SDMA_AQL_PKT_HEADER_HEADER_op_shift 16
+#define SDMA_AQL_PKT_HEADER_HEADER_OP(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_op_mask) << SDMA_AQL_PKT_HEADER_HEADER_op_shift)
+
+/*define for subop field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_subop_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_subop_mask 0x00000007
+#define SDMA_AQL_PKT_HEADER_HEADER_subop_shift 20
+#define SDMA_AQL_PKT_HEADER_HEADER_SUBOP(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_subop_mask) << SDMA_AQL_PKT_HEADER_HEADER_subop_shift)
+
+/*define for cpv field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_cpv_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_cpv_mask 0x00000001
+#define SDMA_AQL_PKT_HEADER_HEADER_cpv_shift 28
+#define SDMA_AQL_PKT_HEADER_HEADER_CPV(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_cpv_mask) << SDMA_AQL_PKT_HEADER_HEADER_cpv_shift)
+
+
+/*
+** Definitions for SDMA_AQL_PKT_COPY_LINEAR packet
+*/
+
+/*define for HEADER word*/
+/*define for format field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_mask 0x000000FF
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_FORMAT(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_shift)
+
+/*define for barrier field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_mask 0x00000001
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_shift 8
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_BARRIER(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_shift)
+
+/*define for acquire_fence_scope field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_shift 9
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_ACQUIRE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_shift)
+
+/*define for release_fence_scope field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_shift 11
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_RELEASE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_shift)
+
+/*define for reserved field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_mask 0x00000007
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_shift 13
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_RESERVED(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_shift)
+
+/*define for op field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_mask 0x0000000F
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_shift 16
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_OP(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_shift)
+
+/*define for subop field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_mask 0x00000007
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_shift 20
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_SUBOP(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_shift)
+
+/*define for cpv field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_cpv_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_cpv_mask 0x00000001
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_cpv_shift 28
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_CPV(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_cpv_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_cpv_shift)
+
+/*define for RESERVED_DW1 word*/
+/*define for reserved_dw1 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_offset 1
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_RESERVED_DW1(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_shift)
+
+/*define for RETURN_ADDR_LO word*/
+/*define for return_addr_31_0 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_offset 2
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_RETURN_ADDR_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_shift)
+
+/*define for RETURN_ADDR_HI word*/
+/*define for return_addr_63_32 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_offset 3
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_RETURN_ADDR_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_offset 4
+#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_mask 0x003FFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_COUNT(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_mask) << SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst_sw field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_offset 5
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask 0x00000003
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift 16
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_DST_SW(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask) << SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift)
+
+/*define for dst_cache_policy field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_offset 5
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_mask 0x00000007
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_shift 18
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_DST_CACHE_POLICY(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_mask) << SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_cache_policy_shift)
+
+/*define for src_sw field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_offset 5
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_shift 24
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_mask) << SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_shift)
+
+/*define for src_cache_policy field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_offset 5
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_mask 0x00000007
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_shift 26
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_SRC_CACHE_POLICY(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_mask) << SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_cache_policy_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 6
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 7
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_offset 8
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_offset 9
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for RESERVED_DW10 word*/
+/*define for reserved_dw10 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_offset 10
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_RESERVED_DW10(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_shift)
+
+/*define for RESERVED_DW11 word*/
+/*define for reserved_dw11 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_offset 11
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_RESERVED_DW11(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_shift)
+
+/*define for RESERVED_DW12 word*/
+/*define for reserved_dw12 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_offset 12
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_RESERVED_DW12(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_shift)
+
+/*define for RESERVED_DW13 word*/
+/*define for reserved_dw13 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_offset 13
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_RESERVED_DW13(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_shift)
+
+/*define for COMPLETION_SIGNAL_LO word*/
+/*define for completion_signal_31_0 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_offset 14
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_COMPLETION_SIGNAL_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift)
+
+/*define for COMPLETION_SIGNAL_HI word*/
+/*define for completion_signal_63_32 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_offset 15
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_COMPLETION_SIGNAL_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift)
+
+
+/*
+** Definitions for SDMA_AQL_PKT_BARRIER_OR packet
+*/
+
+/*define for HEADER word*/
+/*define for format field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_format_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_format_mask 0x000000FF
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_format_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_FORMAT(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_format_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_format_shift)
+
+/*define for barrier field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_mask 0x00000001
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_shift 8
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_BARRIER(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_shift)
+
+/*define for acquire_fence_scope field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_shift 9
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_ACQUIRE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_shift)
+
+/*define for release_fence_scope field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_shift 11
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_RELEASE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_shift)
+
+/*define for reserved field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_mask 0x00000007
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_shift 13
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_RESERVED(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_shift)
+
+/*define for op field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_op_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_op_mask 0x0000000F
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_op_shift 16
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_OP(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_op_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_op_shift)
+
+/*define for subop field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_mask 0x00000007
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_shift 20
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_SUBOP(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_shift)
+
+/*define for cpv field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_cpv_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_cpv_mask 0x00000001
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_cpv_shift 28
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_CPV(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_cpv_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_cpv_shift)
+
+/*define for RESERVED_DW1 word*/
+/*define for reserved_dw1 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_offset 1
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_RESERVED_DW1(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_mask) << SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_shift)
+
+/*define for DEPENDENT_ADDR_0_LO word*/
+/*define for dependent_addr_0_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_offset 2
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_DEPENDENT_ADDR_0_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_shift)
+
+/*define for DEPENDENT_ADDR_0_HI word*/
+/*define for dependent_addr_0_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_offset 3
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_DEPENDENT_ADDR_0_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_shift)
+
+/*define for DEPENDENT_ADDR_1_LO word*/
+/*define for dependent_addr_1_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_offset 4
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_DEPENDENT_ADDR_1_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_shift)
+
+/*define for DEPENDENT_ADDR_1_HI word*/
+/*define for dependent_addr_1_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_offset 5
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_DEPENDENT_ADDR_1_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_shift)
+
+/*define for DEPENDENT_ADDR_2_LO word*/
+/*define for dependent_addr_2_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_offset 6
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_DEPENDENT_ADDR_2_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_shift)
+
+/*define for DEPENDENT_ADDR_2_HI word*/
+/*define for dependent_addr_2_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_offset 7
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_DEPENDENT_ADDR_2_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_shift)
+
+/*define for DEPENDENT_ADDR_3_LO word*/
+/*define for dependent_addr_3_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_offset 8
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_DEPENDENT_ADDR_3_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_shift)
+
+/*define for DEPENDENT_ADDR_3_HI word*/
+/*define for dependent_addr_3_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_offset 9
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_DEPENDENT_ADDR_3_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_shift)
+
+/*define for DEPENDENT_ADDR_4_LO word*/
+/*define for dependent_addr_4_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_offset 10
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_DEPENDENT_ADDR_4_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_shift)
+
+/*define for DEPENDENT_ADDR_4_HI word*/
+/*define for dependent_addr_4_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_offset 11
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_DEPENDENT_ADDR_4_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_shift)
+
+/*define for CACHE_POLICY word*/
+/*define for cache_policy0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy0_offset 12
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy0_mask 0x00000007
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_CACHE_POLICY0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy0_mask) << SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy0_shift)
+
+/*define for cache_policy1 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy1_offset 12
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy1_mask 0x00000007
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy1_shift 5
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_CACHE_POLICY1(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy1_mask) << SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy1_shift)
+
+/*define for cache_policy2 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy2_offset 12
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy2_mask 0x00000007
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy2_shift 10
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_CACHE_POLICY2(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy2_mask) << SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy2_shift)
+
+/*define for cache_policy3 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy3_offset 12
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy3_mask 0x00000007
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy3_shift 15
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_CACHE_POLICY3(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy3_mask) << SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy3_shift)
+
+/*define for cache_policy4 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy4_offset 12
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy4_mask 0x00000007
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy4_shift 20
+#define SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_CACHE_POLICY4(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy4_mask) << SDMA_AQL_PKT_BARRIER_OR_CACHE_POLICY_cache_policy4_shift)
+
+/*define for RESERVED_DW13 word*/
+/*define for reserved_dw13 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_offset 13
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_RESERVED_DW13(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_mask) << SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_shift)
+
+/*define for COMPLETION_SIGNAL_LO word*/
+/*define for completion_signal_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_offset 14
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_COMPLETION_SIGNAL_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift)
+
+/*define for COMPLETION_SIGNAL_HI word*/
+/*define for completion_signal_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_offset 15
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_COMPLETION_SIGNAL_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift)
+
+
+#endif /* __SDMA_V7_1_0_PKT_OPEN_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index f7288372ee61..509d43b238f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -2003,10 +2003,6 @@ static int si_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk)
return 0;
}
-static void si_pre_asic_init(struct amdgpu_device *adev)
-{
-}
-
static const struct amdgpu_asic_funcs si_asic_funcs =
{
.read_disabled_bios = &si_read_disabled_bios,
@@ -2028,7 +2024,6 @@ static const struct amdgpu_asic_funcs si_asic_funcs =
.need_reset_on_init = &si_need_reset_on_init,
.get_pcie_replay_count = &si_get_pcie_replay_count,
.supports_baco = &si_asic_supports_baco,
- .pre_asic_init = &si_pre_asic_init,
.query_video_codecs = &si_query_video_codecs,
};
@@ -2260,16 +2255,16 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT;
if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) {
if (current_data_rate == 2) {
- DRM_INFO("PCIE gen 3 link speeds already enabled\n");
+ drm_info(adev_to_drm(adev), "PCIE gen 3 link speeds already enabled\n");
return;
}
- DRM_INFO("enabling PCIE gen 3 link speeds, disable with amdgpu.pcie_gen2=0\n");
+ drm_info(adev_to_drm(adev), "enabling PCIE gen 3 link speeds, disable with amdgpu.pcie_gen2=0\n");
} else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) {
if (current_data_rate == 1) {
- DRM_INFO("PCIE gen 2 link speeds already enabled\n");
+ drm_info(adev_to_drm(adev), "PCIE gen 2 link speeds already enabled\n");
return;
}
- DRM_INFO("enabling PCIE gen 2 link speeds, disable with amdgpu.pcie_gen2=0\n");
+ drm_info(adev_to_drm(adev), "enabling PCIE gen 2 link speeds, disable with amdgpu.pcie_gen2=0\n");
}
if (!pci_is_pcie(root) || !pci_is_pcie(adev->pdev))
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index 7f18e4875287..74fcaa340d9b 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -584,7 +584,7 @@ static int si_dma_wait_for_idle(struct amdgpu_ip_block *ip_block)
static int si_dma_soft_reset(struct amdgpu_ip_block *ip_block)
{
- DRM_INFO("si_dma_soft_reset --- not implemented !!!!!!!\n");
+ drm_info(adev_to_drm(ip_block->adev), "si_dma_soft_reset --- not implemented !!!!!!!\n");
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c
index 68aef47254a9..c86acee05d0b 100644
--- a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c
+++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c
@@ -196,7 +196,9 @@ static uint32_t smu_v11_0_i2c_poll_tx_status(struct i2c_adapter *control)
if (REG_GET_FIELD(reg, CKSVII2C_IC_INTR_STAT, R_TX_ABRT) == 1) {
reg_c_tx_abrt_source = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_TX_ABRT_SOURCE);
- DRM_INFO("TX was terminated, IC_TX_ABRT_SOURCE val is:%x", reg_c_tx_abrt_source);
+ drm_info(adev_to_drm(adev),
+ "TX was terminated, IC_TX_ABRT_SOURCE val is:%x",
+ reg_c_tx_abrt_source);
/* Check for stop due to NACK */
if (REG_GET_FIELD(reg_c_tx_abrt_source,
@@ -769,7 +771,7 @@ bool smu_v11_0_i2c_test_bus(struct i2c_adapter *control)
uint8_t data[6] = {0xf, 0, 0xde, 0xad, 0xbe, 0xef};
- DRM_INFO("Begin");
+ drm_info(adev_to_drm(adev), "Begin");
if (!smu_v11_0_i2c_bus_lock(control)) {
DRM_ERROR("Failed to lock the bus!.");
@@ -788,7 +790,7 @@ bool smu_v11_0_i2c_test_bus(struct i2c_adapter *control)
smu_v11_0_i2c_bus_unlock(control);
- DRM_INFO("End");
+ drm_info(adev_to_drm(adev), "End");
return true;
}
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v15_0_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v15_0_0.c
new file mode 100644
index 000000000000..eccc76650d82
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v15_0_0.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "smuio_v15_0_0.h"
+#include "smuio/smuio_15_0_0_offset.h"
+#include "smuio/smuio_15_0_0_sh_mask.h"
+#include <linux/preempt.h>
+
+static u64 smuio_v15_0_0_get_gpu_clock_counter(struct amdgpu_device *adev)
+{
+ u64 clock;
+ u64 clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after;
+
+ preempt_disable();
+ clock_counter_hi_pre = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER);
+ clock_counter_lo = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER);
+ /* the clock counter may be udpated during polling the counters */
+ clock_counter_hi_after = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER);
+ if (clock_counter_hi_pre != clock_counter_hi_after)
+ clock_counter_lo = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER);
+ preempt_enable();
+
+ clock = clock_counter_lo | (clock_counter_hi_after << 32ULL);
+
+ return clock;
+}
+
+const struct amdgpu_smuio_funcs smuio_v15_0_0_funcs = {
+ .get_gpu_clock_counter = smuio_v15_0_0_get_gpu_clock_counter,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v15_0_0.h b/drivers/gpu/drm/amd/amdgpu/smuio_v15_0_0.h
new file mode 100644
index 000000000000..85e0f08283d9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v15_0_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __SMUIO_V15_0_0_H__
+#define __SMUIO_V15_0_0_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_smuio_funcs smuio_v15_0_0_funcs;
+
+#endif /* __SMUIO_V15_0_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v15_0_8.c b/drivers/gpu/drm/amd/amdgpu/smuio_v15_0_8.c
new file mode 100644
index 000000000000..ef29424c26cc
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v15_0_8.c
@@ -0,0 +1,213 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "smuio_v15_0_8.h"
+#include "smuio/smuio_15_0_8_offset.h"
+#include "smuio/smuio_15_0_8_sh_mask.h"
+
+#define SMUIO_MCM_CONFIG__HOST_GPU_XGMI_MASK 0x00000001L
+#define SMUIO_MCM_CONFIG__ETHERNET_SWITCH_MASK 0x00000008L
+#define SMUIO_MCM_CONFIG__CUSTOM_HBM_MASK 0x00000001L
+
+static u32 smuio_v15_0_8_get_rom_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(SMUIO, 0, regROM_INDEX);
+}
+
+static u32 smuio_v15_0_8_get_rom_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(SMUIO, 0, regROM_DATA);
+}
+
+static void smuio_v15_0_8_update_rom_clock_gating(struct amdgpu_device *adev, bool enable)
+{
+ return;
+}
+
+static u64 smuio_v15_0_8_get_gpu_clock_counter(struct amdgpu_device *adev)
+{
+ u64 clock;
+ u64 clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after;
+
+ preempt_disable();
+ clock_counter_hi_pre = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER);
+ clock_counter_lo = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER);
+ /* the clock counter may be udpated during polling the counters */
+ clock_counter_hi_after = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER);
+ if (clock_counter_hi_pre != clock_counter_hi_after)
+ clock_counter_lo = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER);
+ preempt_enable();
+
+ clock = clock_counter_lo | (clock_counter_hi_after << 32ULL);
+
+ return clock;
+}
+
+static void smuio_v15_0_8_get_clock_gating_state(struct amdgpu_device *adev, u64 *flags)
+{
+ u32 data;
+
+ /* CGTT_ROM_CLK_CTRL0 is not available for APU */
+ if (adev->flags & AMD_IS_APU)
+ return;
+
+ data = RREG32_SOC15(SMUIO, 0, regCGTT_ROM_CLK_CTRL0);
+ if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK))
+ *flags |= AMD_CG_SUPPORT_ROM_MGCG;
+}
+
+/**
+ * smuio_v15_0_8_get_die_id - query die id from FCH.
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Returns die id
+ */
+static u32 smuio_v15_0_8_get_die_id(struct amdgpu_device *adev)
+{
+ u32 data, die_id;
+
+ data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG);
+ die_id = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, DIE_ID);
+
+ return die_id;
+}
+
+/**
+ * smuio_v15_0_8_get_socket_id - query socket id from FCH
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Returns socket id
+ */
+static u32 smuio_v15_0_8_get_socket_id(struct amdgpu_device *adev)
+{
+ u32 data, socket_id;
+
+ data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG);
+ socket_id = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, SOCKET_ID);
+
+ return socket_id;
+}
+
+/**
+ * smuio_v15_0_8_is_host_gpu_xgmi_supported - detect xgmi interface between cpu and gpu/s.
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Returns true on success or false otherwise.
+ */
+static bool smuio_v15_0_8_is_host_gpu_xgmi_supported(struct amdgpu_device *adev)
+{
+ u32 data;
+
+ data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG);
+ data = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, TOPOLOGY_ID);
+ /* data[4:0]
+ * bit 0 == 0 host-gpu interface is PCIE
+ * bit 0 == 1 host-gpu interface is Alternate Protocal
+ * for AMD, this is XGMI
+ */
+ data &= SMUIO_MCM_CONFIG__HOST_GPU_XGMI_MASK;
+
+ return data ? true : false;
+}
+
+#if 0
+/*
+ * smuio_v15_0_8_is_connected_with_ethernet_switch - detect systems connected with ethernet switch
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Returns true on success or false otherwise.
+ */
+static bool smuio_v15_0_8_is_connected_with_ethernet_switch(struct amdgpu_device *adev)
+{
+ u32 data;
+
+ if (!(adev->flags & AMD_IS_APU))
+ return false;
+
+ data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG);
+ data = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, TOPOLOGY_ID);
+ /* data[4:0]
+ * bit 3 == 0 systems connected with ethernet switch
+ */
+ data &= SMUIO_MCM_CONFIG__ETHERNET_SWITCH_MASK;
+
+ return data ? false : true;
+}
+#endif
+
+static enum amdgpu_pkg_type smuio_v15_0_8_get_pkg_type(struct amdgpu_device *adev)
+{
+ enum amdgpu_pkg_type pkg_type;
+ u32 data;
+
+ data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG);
+ data = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, PKG_TYPE);
+
+ /* data [3:0]
+ bit 2 and bit 3 identifies the pkg type */
+ switch (data & 0xC) {
+ case 0x0:
+ pkg_type = AMDGPU_PKG_TYPE_BB;
+ break;
+ case 0x8:
+ pkg_type = AMDGPU_PKG_TYPE_CEM;
+ break;
+ default:
+ pkg_type = AMDGPU_PKG_TYPE_UNKNOWN;
+ break;
+ }
+
+ return pkg_type;
+}
+
+#if 0
+static bool smuio_v15_0_8_is_custom_hbm_supported(struct amdgpu_device *adev)
+{
+ u32 data;
+
+ data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG);
+ data = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, PKG_TYPE);
+
+ /* data [3:0]
+ * bit 0 identifies custom HBM module */
+ data &= SMUIO_MCM_CONFIG__CUSTOM_HBM_MASK;
+
+ return data ? true : false;
+}
+#endif
+
+const struct amdgpu_smuio_funcs smuio_v15_0_8_funcs = {
+ .get_rom_index_offset = smuio_v15_0_8_get_rom_index_offset,
+ .get_rom_data_offset = smuio_v15_0_8_get_rom_data_offset,
+ .get_gpu_clock_counter = smuio_v15_0_8_get_gpu_clock_counter,
+ .get_die_id = smuio_v15_0_8_get_die_id,
+ .get_socket_id = smuio_v15_0_8_get_socket_id,
+ .is_host_gpu_xgmi_supported = smuio_v15_0_8_is_host_gpu_xgmi_supported,
+ .update_rom_clock_gating = smuio_v15_0_8_update_rom_clock_gating,
+ .get_clock_gating_state = smuio_v15_0_8_get_clock_gating_state,
+ .get_pkg_type = smuio_v15_0_8_get_pkg_type,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v15_0_8.h b/drivers/gpu/drm/amd/amdgpu/smuio_v15_0_8.h
new file mode 100644
index 000000000000..508547fcce03
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v15_0_8.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __SMUIO_V15_0_8_H__
+#define __SMUIO_V15_0_8_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_smuio_funcs smuio_v15_0_8_funcs;
+
+#endif /* __SMUIO_V15_0_8_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 42f5d9c0e3af..54b14751fd7a 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -951,7 +951,6 @@ static const struct amdgpu_asic_funcs aqua_vanjaram_asic_funcs =
.need_reset_on_init = &soc15_need_reset_on_init,
.get_pcie_replay_count = &amdgpu_nbio_get_pcie_replay_count,
.supports_baco = &soc15_supports_baco,
- .pre_asic_init = &soc15_pre_asic_init,
.query_video_codecs = &soc15_query_video_codecs,
.encode_ext_smn_addressing = &aqua_vanjaram_encode_ext_smn_addressing,
.get_reg_state = &aqua_vanjaram_get_reg_state,
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
index ad36c96478a8..2da733b45c21 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -141,6 +141,31 @@ static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_decode_vcn1 = {
.codec_array = sriov_vcn_4_0_0_video_codecs_decode_array_vcn1,
};
+static const struct amdgpu_video_codec_info vcn_5_3_0_video_codecs_encode_array_vcn0[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codecs vcn_5_3_0_video_codecs_encode_vcn0 = {
+ .codec_count = ARRAY_SIZE(vcn_5_3_0_video_codecs_encode_array_vcn0),
+ .codec_array = vcn_5_3_0_video_codecs_encode_array_vcn0,
+};
+
+static const struct amdgpu_video_codec_info vcn_5_3_0_video_codecs_decode_array_vcn0[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codecs vcn_5_3_0_video_codecs_decode_vcn0 = {
+ .codec_count = ARRAY_SIZE(vcn_5_3_0_video_codecs_decode_array_vcn0),
+ .codec_array = vcn_5_3_0_video_codecs_decode_array_vcn0,
+};
+
+
static int soc21_query_video_codecs(struct amdgpu_device *adev, bool encode,
const struct amdgpu_video_codecs **codecs)
{
@@ -185,6 +210,12 @@ static int soc21_query_video_codecs(struct amdgpu_device *adev, bool encode,
else
*codecs = &vcn_4_0_0_video_codecs_decode_vcn0;
return 0;
+ case IP_VERSION(5, 3, 0):
+ if (encode)
+ *codecs = &vcn_5_3_0_video_codecs_encode_vcn0;
+ else
+ *codecs = &vcn_5_3_0_video_codecs_decode_vcn0;
+ return 0;
default:
return -EINVAL;
}
@@ -513,10 +544,6 @@ static void soc21_init_doorbell_index(struct amdgpu_device *adev)
adev->doorbell_index.sdma_doorbell_range = 20;
}
-static void soc21_pre_asic_init(struct amdgpu_device *adev)
-{
-}
-
static int soc21_update_umd_stable_pstate(struct amdgpu_device *adev,
bool enter)
{
@@ -546,7 +573,6 @@ static const struct amdgpu_asic_funcs soc21_asic_funcs = {
.need_reset_on_init = &soc21_need_reset_on_init,
.get_pcie_replay_count = &amdgpu_nbio_get_pcie_replay_count,
.supports_baco = &amdgpu_dpm_is_baco_supported,
- .pre_asic_init = &soc21_pre_asic_init,
.query_video_codecs = &soc21_query_video_codecs,
.update_umd_stable_pstate = &soc21_update_umd_stable_pstate,
};
@@ -804,6 +830,13 @@ static int soc21_common_early_init(struct amdgpu_ip_block *ip_block)
AMD_PG_SUPPORT_GFX_PG;
adev->external_rev_id = adev->rev_id + 0x50;
break;
+ case IP_VERSION(11, 5, 4):
+ adev->cg_flags = AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_JPEG;
+ adev->external_rev_id = adev->rev_id + 0x1;
+ break;
default:
/* FIXME: not supported yet */
return -EINVAL;
@@ -965,6 +998,7 @@ static int soc21_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
case IP_VERSION(7, 11, 1):
case IP_VERSION(7, 11, 2):
case IP_VERSION(7, 11, 3):
+ case IP_VERSION(7, 11, 4):
adev->nbio.funcs->update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE);
adev->nbio.funcs->update_medium_grain_light_sleep(adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/soc24.c b/drivers/gpu/drm/amd/amdgpu/soc24.c
index 972b449ab89f..ecb6c3fcfbd1 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc24.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc24.c
@@ -327,10 +327,6 @@ static void soc24_init_doorbell_index(struct amdgpu_device *adev)
adev->doorbell_index.sdma_doorbell_range = 20;
}
-static void soc24_pre_asic_init(struct amdgpu_device *adev)
-{
-}
-
static int soc24_update_umd_stable_pstate(struct amdgpu_device *adev,
bool enter)
{
@@ -357,7 +353,6 @@ static const struct amdgpu_asic_funcs soc24_asic_funcs = {
.need_reset_on_init = &soc24_need_reset_on_init,
.get_pcie_replay_count = &soc24_get_pcie_replay_count,
.supports_baco = &amdgpu_dpm_is_baco_supported,
- .pre_asic_init = &soc24_pre_asic_init,
.query_video_codecs = &soc24_query_video_codecs,
.update_umd_stable_pstate = &soc24_update_umd_stable_pstate,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/soc_v1_0.c b/drivers/gpu/drm/amd/amdgpu/soc_v1_0.c
new file mode 100644
index 000000000000..59ab952d5cce
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/soc_v1_0.c
@@ -0,0 +1,862 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "soc15.h"
+#include "soc15_common.h"
+#include "soc_v1_0.h"
+#include "amdgpu_ip.h"
+#include "amdgpu_imu.h"
+#include "gfxhub_v12_1.h"
+#include "sdma_v7_1.h"
+#include "gfx_v12_1.h"
+
+#include "gc/gc_12_1_0_offset.h"
+#include "gc/gc_12_1_0_sh_mask.h"
+#include "mp/mp_15_0_8_offset.h"
+
+#define XCC_REG_RANGE_0_LOW 0x1260 /* XCC gfxdec0 lower Bound */
+#define XCC_REG_RANGE_0_HIGH 0x3C00 /* XCC gfxdec0 upper Bound */
+#define XCC_REG_RANGE_1_LOW 0xA000 /* XCC gfxdec1 lower Bound */
+#define XCC_REG_RANGE_1_HIGH 0x10000 /* XCC gfxdec1 upper Bound */
+#define NORMALIZE_XCC_REG_OFFSET(offset) \
+ (offset & 0xFFFF)
+
+/* Initialized doorbells for amdgpu including multimedia
+ * KFD can use all the rest in 2M doorbell bar */
+static void soc_v1_0_doorbell_index_init(struct amdgpu_device *adev)
+{
+ int i;
+
+ adev->doorbell_index.kiq = AMDGPU_SOC_V1_0_DOORBELL_KIQ_START;
+
+ adev->doorbell_index.mec_ring0 = AMDGPU_SOC_V1_0_DOORBELL_MEC_RING_START;
+ adev->doorbell_index.mes_ring0 = AMDGPU_SOC_V1_0_DOORBELL_MES_RING0;
+ adev->doorbell_index.mes_ring1 = AMDGPU_SOC_V1_0_DOORBELL_MES_RING1;
+
+ adev->doorbell_index.userqueue_start = AMDGPU_SOC_V1_0_DOORBELL_USERQUEUE_START;
+ adev->doorbell_index.userqueue_end = AMDGPU_SOC_V1_0_DOORBELL_USERQUEUE_END;
+ adev->doorbell_index.xcc_doorbell_range = AMDGPU_SOC_V1_0_DOORBELL_XCC_RANGE;
+
+ adev->doorbell_index.sdma_doorbell_range = 20;
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ adev->doorbell_index.sdma_engine[i] =
+ AMDGPU_SOC_V1_0_DOORBELL_sDMA_ENGINE_START +
+ i * (adev->doorbell_index.sdma_doorbell_range >> 1);
+
+ adev->doorbell_index.ih = AMDGPU_SOC_V1_0_DOORBELL_IH;
+ adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_SOC_V1_0_DOORBELL_VCN_START;
+
+ adev->doorbell_index.first_non_cp = AMDGPU_SOC_V1_0_DOORBELL_FIRST_NON_CP;
+ adev->doorbell_index.last_non_cp = AMDGPU_SOC_V1_0_DOORBELL_LAST_NON_CP;
+
+ adev->doorbell_index.max_assignment = AMDGPU_SOC_V1_0_DOORBELL_MAX_ASSIGNMENT << 1;
+}
+
+/* Fixed pattern for upper 32bits smn addressing.
+ * bit[47:40]: Socket ID
+ * bit[39:34]: Die ID
+ * bit[32]: local or remote die in same socket
+ * The ext_id is comprised of socket_id and die_id.
+ * ext_id = (socket_id << 6) | (die_id)
+*/
+u64 soc_v1_0_encode_ext_smn_addressing(int ext_id)
+{
+ u64 ext_offset;
+ int socket_id, die_id;
+
+ /* local die routing for MID0 on local socket */
+ if (ext_id == 0)
+ return 0;
+
+ die_id = ext_id & 0x3;
+ socket_id = (ext_id >> 6) & 0xff;
+
+ /* Initiated from host, accessing to non-MID0 is cross-die traffic */
+ if (socket_id == 0)
+ ext_offset = ((u64)die_id << 34) | (1ULL << 32);
+ else if (socket_id != 0 && die_id != 0)
+ ext_offset = ((u64)socket_id << 40) | ((u64)die_id << 34) |
+ (3ULL << 32);
+ else
+ ext_offset = ((u64)socket_id << 40) | (1ULL << 33);
+
+ return ext_offset;
+}
+
+static u32 soc_v1_0_get_config_memsize(struct amdgpu_device *adev)
+{
+ return adev->nbio.funcs->get_memsize(adev);
+}
+
+static u32 soc_v1_0_get_xclk(struct amdgpu_device *adev)
+{
+ return adev->clock.spll.reference_freq;
+}
+
+void soc_v1_0_grbm_select(struct amdgpu_device *adev,
+ u32 me, u32 pipe,
+ u32 queue, u32 vmid,
+ int xcc_id)
+{
+ u32 grbm_gfx_cntl = 0;
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, PIPEID, pipe);
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, MEID, me);
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid);
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue);
+
+ WREG32_SOC15_RLC_SHADOW(GC, xcc_id, regGRBM_GFX_CNTL, grbm_gfx_cntl);
+}
+
+static struct soc15_allowed_register_entry soc_v1_0_allowed_read_registers[] = {
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS) },
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS2) },
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS3) },
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE0) },
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE1) },
+ { SOC15_REG_ENTRY(GC, 0, regCP_STAT) },
+ { SOC15_REG_ENTRY(GC, 0, regCP_STALLED_STAT1) },
+ { SOC15_REG_ENTRY(GC, 0, regCP_STALLED_STAT2) },
+ { SOC15_REG_ENTRY(GC, 0, regCP_STALLED_STAT3) },
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPF_BUSY_STAT) },
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPF_STALLED_STAT1) },
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPF_STATUS) },
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPC_BUSY_STAT) },
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPC_STALLED_STAT1) },
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPC_STATUS) },
+ { SOC15_REG_ENTRY(GC, 0, regGB_ADDR_CONFIG_1) },
+};
+
+static uint32_t soc_v1_0_read_indexed_register(struct amdgpu_device *adev,
+ u32 se_num,
+ u32 sh_num,
+ u32 reg_offset)
+{
+ uint32_t val;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ if (se_num != 0xffffffff || sh_num != 0xffffffff)
+ amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0);
+
+ val = RREG32(reg_offset);
+
+ if (se_num != 0xffffffff || sh_num != 0xffffffff)
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+ return val;
+}
+
+static uint32_t soc_v1_0_get_register_value(struct amdgpu_device *adev,
+ bool indexed, u32 se_num,
+ u32 sh_num, u32 reg_offset)
+{
+ if (indexed) {
+ return soc_v1_0_read_indexed_register(adev, se_num, sh_num, reg_offset);
+ } else {
+ if (reg_offset == SOC15_REG_OFFSET(GC, 0, regGB_ADDR_CONFIG_1) &&
+ adev->gfx.config.gb_addr_config)
+ return adev->gfx.config.gb_addr_config;
+ return RREG32(reg_offset);
+ }
+}
+
+static int soc_v1_0_read_register(struct amdgpu_device *adev,
+ u32 se_num, u32 sh_num,
+ u32 reg_offset, u32 *value)
+{
+ uint32_t i;
+ struct soc15_allowed_register_entry *en;
+
+ *value = 0;
+ for (i = 0; i < ARRAY_SIZE(soc_v1_0_allowed_read_registers); i++) {
+ en = &soc_v1_0_allowed_read_registers[i];
+ if (!adev->reg_offset[en->hwip][en->inst])
+ continue;
+ else if (reg_offset != (adev->reg_offset[en->hwip][en->inst][en->seg]
+ + en->reg_offset))
+ continue;
+
+ *value = soc_v1_0_get_register_value(adev,
+ soc_v1_0_allowed_read_registers[i].grbm_indexed,
+ se_num, sh_num, reg_offset);
+ return 0;
+ }
+ return -EINVAL;
+}
+
+static bool soc_v1_0_need_full_reset(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 1, 0):
+ default:
+ return true;
+ }
+}
+
+static bool soc_v1_0_need_reset_on_init(struct amdgpu_device *adev)
+{
+ u32 sol_reg;
+
+ if (adev->flags & AMD_IS_APU)
+ return false;
+
+ /* Check sOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ sol_reg = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_81);
+ if (sol_reg)
+ return true;
+
+ return false;
+}
+
+static int soc_v1_0_asic_reset(struct amdgpu_device *adev)
+{
+ return 0;
+}
+
+static const struct amdgpu_asic_funcs soc_v1_0_asic_funcs = {
+ .read_bios_from_rom = &amdgpu_soc15_read_bios_from_rom,
+ .read_register = &soc_v1_0_read_register,
+ .get_config_memsize = &soc_v1_0_get_config_memsize,
+ .get_xclk = &soc_v1_0_get_xclk,
+ .need_full_reset = &soc_v1_0_need_full_reset,
+ .init_doorbell_index = &soc_v1_0_doorbell_index_init,
+ .need_reset_on_init = &soc_v1_0_need_reset_on_init,
+ .encode_ext_smn_addressing = &soc_v1_0_encode_ext_smn_addressing,
+ .reset = soc_v1_0_asic_reset,
+};
+
+static int soc_v1_0_common_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->smc_rreg = NULL;
+ adev->smc_wreg = NULL;
+ adev->pcie_rreg = &amdgpu_device_indirect_rreg;
+ adev->pcie_wreg = &amdgpu_device_indirect_wreg;
+ adev->pcie_rreg_ext = &amdgpu_device_indirect_rreg_ext;
+ adev->pcie_wreg_ext = &amdgpu_device_indirect_wreg_ext;
+ adev->pcie_rreg64 = &amdgpu_device_indirect_rreg64;
+ adev->pcie_wreg64 = &amdgpu_device_indirect_wreg64;
+ adev->pciep_rreg = amdgpu_device_pcie_port_rreg;
+ adev->pciep_wreg = amdgpu_device_pcie_port_wreg;
+ adev->pcie_rreg64_ext = &amdgpu_device_indirect_rreg64_ext;
+ adev->pcie_wreg64_ext = &amdgpu_device_indirect_wreg64_ext;
+ adev->uvd_ctx_rreg = NULL;
+ adev->uvd_ctx_wreg = NULL;
+ adev->didt_rreg = NULL;
+ adev->didt_wreg = NULL;
+
+ adev->asic_funcs = &soc_v1_0_asic_funcs;
+
+ adev->rev_id = amdgpu_device_get_rev_id(adev);
+ adev->external_rev_id = 0xff;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 1, 0):
+ adev->cg_flags = 0;
+ adev->pg_flags = 0;
+ adev->external_rev_id = adev->rev_id + 0x50;
+ break;
+ default:
+ /* FIXME: not supported yet */
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int soc_v1_0_common_late_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ /* Enable selfring doorbell aperture late because doorbell BAR
+ * aperture will change if resize BAR successfully in gmc sw_init.
+ */
+ adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, true);
+
+ return 0;
+}
+
+static int soc_v1_0_common_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ return 0;
+}
+
+static int soc_v1_0_common_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ /* enable the doorbell aperture */
+ adev->nbio.funcs->enable_doorbell_aperture(adev, true);
+
+ return 0;
+}
+
+static int soc_v1_0_common_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->nbio.funcs->enable_doorbell_aperture(adev, false);
+ adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, false);
+
+ return 0;
+}
+
+static int soc_v1_0_common_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return soc_v1_0_common_hw_fini(ip_block);
+}
+
+static int soc_v1_0_common_resume(struct amdgpu_ip_block *ip_block)
+{
+ return soc_v1_0_common_hw_init(ip_block);
+}
+
+static bool soc_v1_0_common_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ return true;
+}
+
+static int soc_v1_0_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static int soc_v1_0_common_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static void soc_v1_0_common_get_clockgating_state(struct amdgpu_ip_block *ip_block,
+ u64 *flags)
+{
+ return;
+}
+
+static const struct amd_ip_funcs soc_v1_0_common_ip_funcs = {
+ .name = "soc_v1_0_common",
+ .early_init = soc_v1_0_common_early_init,
+ .late_init = soc_v1_0_common_late_init,
+ .sw_init = soc_v1_0_common_sw_init,
+ .hw_init = soc_v1_0_common_hw_init,
+ .hw_fini = soc_v1_0_common_hw_fini,
+ .suspend = soc_v1_0_common_suspend,
+ .resume = soc_v1_0_common_resume,
+ .is_idle = soc_v1_0_common_is_idle,
+ .set_clockgating_state = soc_v1_0_common_set_clockgating_state,
+ .set_powergating_state = soc_v1_0_common_set_powergating_state,
+ .get_clockgating_state = soc_v1_0_common_get_clockgating_state,
+};
+
+const struct amdgpu_ip_block_version soc_v1_0_common_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_COMMON,
+ .major = 1,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &soc_v1_0_common_ip_funcs,
+};
+
+static enum amdgpu_gfx_partition __soc_v1_0_calc_xcp_mode(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ struct amdgpu_device *adev = xcp_mgr->adev;
+ int num_xcc, num_xcc_per_xcp = 0, mode = 0;
+
+ num_xcc = NUM_XCC(xcp_mgr->adev->gfx.xcc_mask);
+ if (adev->gfx.funcs &&
+ adev->gfx.funcs->get_xccs_per_xcp)
+ num_xcc_per_xcp = adev->gfx.funcs->get_xccs_per_xcp(adev);
+ if ((num_xcc_per_xcp) && (num_xcc % num_xcc_per_xcp == 0))
+ mode = num_xcc / num_xcc_per_xcp;
+
+ if (num_xcc_per_xcp == 1)
+ return AMDGPU_CPX_PARTITION_MODE;
+
+ switch (mode) {
+ case 1:
+ return AMDGPU_SPX_PARTITION_MODE;
+ case 2:
+ return AMDGPU_DPX_PARTITION_MODE;
+ case 3:
+ return AMDGPU_TPX_PARTITION_MODE;
+ case 4:
+ return AMDGPU_QPX_PARTITION_MODE;
+ default:
+ return AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE;
+ }
+
+ return AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE;
+}
+
+static int soc_v1_0_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ enum amdgpu_gfx_partition derv_mode, mode;
+ struct amdgpu_device *adev = xcp_mgr->adev;
+
+ mode = AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE;
+ derv_mode = __soc_v1_0_calc_xcp_mode(xcp_mgr);
+
+ if (amdgpu_sriov_vf(adev) || !adev->psp.funcs)
+ return derv_mode;
+
+ if (adev->nbio.funcs &&
+ adev->nbio.funcs->get_compute_partition_mode) {
+ mode = adev->nbio.funcs->get_compute_partition_mode(adev);
+ if (mode != derv_mode)
+ dev_warn(adev->dev,
+ "Mismatch in compute partition mode - reported : %d derived : %d",
+ mode, derv_mode);
+ }
+
+ return mode;
+}
+
+static int __soc_v1_0_get_xcc_per_xcp(struct amdgpu_xcp_mgr *xcp_mgr, int mode)
+{
+ int num_xcc, num_xcc_per_xcp = 0;
+
+ num_xcc = NUM_XCC(xcp_mgr->adev->gfx.xcc_mask);
+
+ switch (mode) {
+ case AMDGPU_SPX_PARTITION_MODE:
+ num_xcc_per_xcp = num_xcc;
+ break;
+ case AMDGPU_DPX_PARTITION_MODE:
+ num_xcc_per_xcp = num_xcc / 2;
+ break;
+ case AMDGPU_TPX_PARTITION_MODE:
+ num_xcc_per_xcp = num_xcc / 3;
+ break;
+ case AMDGPU_QPX_PARTITION_MODE:
+ num_xcc_per_xcp = num_xcc / 4;
+ break;
+ case AMDGPU_CPX_PARTITION_MODE:
+ num_xcc_per_xcp = 1;
+ break;
+ }
+
+ return num_xcc_per_xcp;
+}
+
+static int __soc_v1_0_get_xcp_ip_info(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
+ enum AMDGPU_XCP_IP_BLOCK ip_id,
+ struct amdgpu_xcp_ip *ip)
+{
+ struct amdgpu_device *adev = xcp_mgr->adev;
+ int num_sdma, num_vcn, num_shared_vcn, num_xcp;
+ int num_xcc_xcp, num_sdma_xcp, num_vcn_xcp;
+
+ num_sdma = adev->sdma.num_instances;
+ num_vcn = adev->vcn.num_vcn_inst;
+ num_shared_vcn = 1;
+
+ num_xcc_xcp = adev->gfx.num_xcc_per_xcp;
+ num_xcp = NUM_XCC(adev->gfx.xcc_mask) / num_xcc_xcp;
+
+ switch (xcp_mgr->mode) {
+ case AMDGPU_SPX_PARTITION_MODE:
+ case AMDGPU_DPX_PARTITION_MODE:
+ case AMDGPU_TPX_PARTITION_MODE:
+ case AMDGPU_QPX_PARTITION_MODE:
+ case AMDGPU_CPX_PARTITION_MODE:
+ num_sdma_xcp = DIV_ROUND_UP(num_sdma, num_xcp);
+ num_vcn_xcp = DIV_ROUND_UP(num_vcn, num_xcp);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (num_vcn && num_xcp > num_vcn)
+ num_shared_vcn = num_xcp / num_vcn;
+
+ switch (ip_id) {
+ case AMDGPU_XCP_GFXHUB:
+ ip->inst_mask = XCP_INST_MASK(num_xcc_xcp, xcp_id);
+ ip->ip_funcs = &gfxhub_v12_1_xcp_funcs;
+ break;
+ case AMDGPU_XCP_GFX:
+ ip->inst_mask = XCP_INST_MASK(num_xcc_xcp, xcp_id);
+ ip->ip_funcs = &gfx_v12_1_xcp_funcs;
+ break;
+ case AMDGPU_XCP_SDMA:
+ ip->inst_mask = XCP_INST_MASK(num_sdma_xcp, xcp_id);
+ ip->ip_funcs = &sdma_v7_1_xcp_funcs;
+ break;
+ case AMDGPU_XCP_VCN:
+ ip->inst_mask =
+ XCP_INST_MASK(num_vcn_xcp, xcp_id / num_shared_vcn);
+ /* TODO : Assign IP funcs */
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ip->ip_id = ip_id;
+
+ return 0;
+}
+
+static int soc_v1_0_get_xcp_res_info(struct amdgpu_xcp_mgr *xcp_mgr,
+ int mode,
+ struct amdgpu_xcp_cfg *xcp_cfg)
+{
+ struct amdgpu_device *adev = xcp_mgr->adev;
+ int max_res[AMDGPU_XCP_RES_MAX] = {};
+ bool res_lt_xcp;
+ int num_xcp, i;
+ u16 nps_modes;
+
+ if (!(xcp_mgr->supp_xcp_modes & BIT(mode)))
+ return -EINVAL;
+
+ max_res[AMDGPU_XCP_RES_XCC] = NUM_XCC(adev->gfx.xcc_mask);
+ max_res[AMDGPU_XCP_RES_DMA] = adev->sdma.num_instances;
+ max_res[AMDGPU_XCP_RES_DEC] = adev->vcn.num_vcn_inst;
+ max_res[AMDGPU_XCP_RES_JPEG] = adev->jpeg.num_jpeg_inst;
+
+ switch (mode) {
+ case AMDGPU_SPX_PARTITION_MODE:
+ num_xcp = 1;
+ nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE);
+ break;
+ case AMDGPU_DPX_PARTITION_MODE:
+ num_xcp = 2;
+ nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE);
+ break;
+ case AMDGPU_TPX_PARTITION_MODE:
+ num_xcp = 3;
+ nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) |
+ BIT(AMDGPU_NPS4_PARTITION_MODE);
+ break;
+ case AMDGPU_QPX_PARTITION_MODE:
+ num_xcp = 4;
+ nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) |
+ BIT(AMDGPU_NPS4_PARTITION_MODE);
+ break;
+ case AMDGPU_CPX_PARTITION_MODE:
+ num_xcp = NUM_XCC(adev->gfx.xcc_mask);
+ nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) |
+ BIT(AMDGPU_NPS4_PARTITION_MODE);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ xcp_cfg->compatible_nps_modes =
+ (adev->gmc.supported_nps_modes & nps_modes);
+ xcp_cfg->num_res = ARRAY_SIZE(max_res);
+
+ for (i = 0; i < xcp_cfg->num_res; i++) {
+ res_lt_xcp = max_res[i] < num_xcp;
+ xcp_cfg->xcp_res[i].id = i;
+ xcp_cfg->xcp_res[i].num_inst =
+ res_lt_xcp ? 1 : max_res[i] / num_xcp;
+ xcp_cfg->xcp_res[i].num_inst =
+ i == AMDGPU_XCP_RES_JPEG ?
+ xcp_cfg->xcp_res[i].num_inst *
+ adev->jpeg.num_jpeg_rings : xcp_cfg->xcp_res[i].num_inst;
+ xcp_cfg->xcp_res[i].num_shared =
+ res_lt_xcp ? num_xcp / max_res[i] : 1;
+ }
+
+ return 0;
+}
+
+static enum amdgpu_gfx_partition __soc_v1_0_get_auto_mode(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ struct amdgpu_device *adev = xcp_mgr->adev;
+ int num_xcc;
+
+ num_xcc = NUM_XCC(xcp_mgr->adev->gfx.xcc_mask);
+
+ if (adev->gmc.num_mem_partitions == 1)
+ return AMDGPU_SPX_PARTITION_MODE;
+
+ if (adev->gmc.num_mem_partitions == num_xcc)
+ return AMDGPU_CPX_PARTITION_MODE;
+
+ if (adev->gmc.num_mem_partitions == 2)
+ return AMDGPU_DPX_PARTITION_MODE;
+
+ return AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE;
+}
+
+static bool __soc_v1_0_is_valid_mode(struct amdgpu_xcp_mgr *xcp_mgr,
+ enum amdgpu_gfx_partition mode)
+{
+ struct amdgpu_device *adev = xcp_mgr->adev;
+ int num_xcc, num_xccs_per_xcp;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ switch (mode) {
+ case AMDGPU_SPX_PARTITION_MODE:
+ return adev->gmc.num_mem_partitions == 1 && num_xcc > 0;
+ case AMDGPU_DPX_PARTITION_MODE:
+ return adev->gmc.num_mem_partitions <= 2 && (num_xcc % 4) == 0;
+ case AMDGPU_TPX_PARTITION_MODE:
+ return (adev->gmc.num_mem_partitions == 1 ||
+ adev->gmc.num_mem_partitions == 3) &&
+ ((num_xcc % 3) == 0);
+ case AMDGPU_QPX_PARTITION_MODE:
+ num_xccs_per_xcp = num_xcc / 4;
+ return (adev->gmc.num_mem_partitions == 1 ||
+ adev->gmc.num_mem_partitions == 4) &&
+ (num_xccs_per_xcp >= 2);
+ case AMDGPU_CPX_PARTITION_MODE:
+ /* (num_xcc > 1) because 1 XCC is considered SPX, not CPX.
+ * (num_xcc % adev->gmc.num_mem_partitions) == 0 because
+ * num_compute_partitions can't be less than num_mem_partitions
+ */
+ return ((num_xcc > 1) &&
+ (num_xcc % adev->gmc.num_mem_partitions) == 0);
+ default:
+ return false;
+ }
+
+ return false;
+}
+
+static void __soc_v1_0_update_available_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ int mode;
+
+ xcp_mgr->avail_xcp_modes = 0;
+
+ for_each_inst(mode, xcp_mgr->supp_xcp_modes) {
+ if (__soc_v1_0_is_valid_mode(xcp_mgr, mode))
+ xcp_mgr->avail_xcp_modes |= BIT(mode);
+ }
+}
+
+static int soc_v1_0_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr,
+ int mode, int *num_xcps)
+{
+ int num_xcc_per_xcp, num_xcc, ret;
+ struct amdgpu_device *adev;
+ u32 flags = 0;
+
+ adev = xcp_mgr->adev;
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+
+ if (mode == AMDGPU_AUTO_COMPUTE_PARTITION_MODE) {
+ mode = __soc_v1_0_get_auto_mode(xcp_mgr);
+ if (mode == AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) {
+ dev_err(adev->dev,
+ "Invalid config, no compatible compute partition mode found, available memory partitions: %d",
+ adev->gmc.num_mem_partitions);
+ return -EINVAL;
+ }
+ } else if (!__soc_v1_0_is_valid_mode(xcp_mgr, mode)) {
+ dev_err(adev->dev,
+ "Invalid compute partition mode requested, requested: %s, available memory partitions: %d",
+ amdgpu_gfx_compute_mode_desc(mode), adev->gmc.num_mem_partitions);
+ return -EINVAL;
+ }
+
+ if (adev->kfd.init_complete && !amdgpu_in_reset(adev))
+ flags |= AMDGPU_XCP_OPS_KFD;
+
+ if (flags & AMDGPU_XCP_OPS_KFD) {
+ ret = amdgpu_amdkfd_check_and_lock_kfd(adev);
+ if (ret)
+ goto out;
+ }
+
+ ret = amdgpu_xcp_pre_partition_switch(xcp_mgr, flags);
+ if (ret)
+ goto unlock;
+
+ num_xcc_per_xcp = __soc_v1_0_get_xcc_per_xcp(xcp_mgr, mode);
+ if (adev->gfx.imu.funcs &&
+ adev->gfx.imu.funcs->switch_compute_partition) {
+ ret = adev->gfx.imu.funcs->switch_compute_partition(xcp_mgr->adev, num_xcc_per_xcp, mode);
+ if (ret)
+ goto out;
+ }
+ if (adev->gfx.imu.funcs &&
+ adev->gfx.imu.funcs->init_mcm_addr_lut &&
+ amdgpu_emu_mode)
+ adev->gfx.imu.funcs->init_mcm_addr_lut(adev);
+
+ /* Init info about new xcps */
+ *num_xcps = num_xcc / num_xcc_per_xcp;
+ amdgpu_xcp_init(xcp_mgr, *num_xcps, mode);
+
+ ret = amdgpu_xcp_post_partition_switch(xcp_mgr, flags);
+ if (!ret)
+ __soc_v1_0_update_available_partition_mode(xcp_mgr);
+unlock:
+ if (flags & AMDGPU_XCP_OPS_KFD)
+ amdgpu_amdkfd_unlock_kfd(adev);
+out:
+ return ret;
+}
+
+#ifdef HAVE_ACPI_DEV_GET_FIRST_MATCH_DEV
+static int __soc_v1_0_get_xcp_mem_id(struct amdgpu_device *adev,
+ int xcc_id, uint8_t *mem_id)
+{
+ /* memory/spatial modes validation check is already done */
+ *mem_id = xcc_id / adev->gfx.num_xcc_per_xcp;
+ *mem_id /= adev->xcp_mgr->num_xcp_per_mem_partition;
+
+ return 0;
+}
+
+static int soc_v1_0_get_xcp_mem_id(struct amdgpu_xcp_mgr *xcp_mgr,
+ struct amdgpu_xcp *xcp, uint8_t *mem_id)
+{
+ struct amdgpu_numa_info numa_info;
+ struct amdgpu_device *adev;
+ uint32_t xcc_mask;
+ int r, i, xcc_id;
+
+ adev = xcp_mgr->adev;
+ /* TODO: BIOS is not returning the right info now
+ * Check on this later
+ */
+ /*
+ if (adev->gmc.gmc_funcs->query_mem_partition_mode)
+ mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+ */
+ if (adev->gmc.num_mem_partitions == 1) {
+ /* Only one range */
+ *mem_id = 0;
+ return 0;
+ }
+
+ r = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_GFX, &xcc_mask);
+ if (r || !xcc_mask)
+ return -EINVAL;
+
+ xcc_id = ffs(xcc_mask) - 1;
+ if (!adev->gmc.is_app_apu)
+ return __soc_v1_0_get_xcp_mem_id(adev, xcc_id, mem_id);
+
+ r = amdgpu_acpi_get_mem_info(adev, xcc_id, &numa_info);
+
+ if (r)
+ return r;
+
+ r = -EINVAL;
+ for (i = 0; i < adev->gmc.num_mem_partitions; ++i) {
+ if (adev->gmc.mem_partitions[i].numa.node == numa_info.nid) {
+ *mem_id = i;
+ r = 0;
+ break;
+ }
+ }
+
+ return r;
+}
+#endif
+
+static int soc_v1_0_get_xcp_ip_details(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
+ enum AMDGPU_XCP_IP_BLOCK ip_id,
+ struct amdgpu_xcp_ip *ip)
+{
+ if (!ip)
+ return -EINVAL;
+
+ return __soc_v1_0_get_xcp_ip_info(xcp_mgr, xcp_id, ip_id, ip);
+}
+
+struct amdgpu_xcp_mgr_funcs soc_v1_0_xcp_funcs = {
+ .switch_partition_mode = &soc_v1_0_switch_partition_mode,
+ .query_partition_mode = &soc_v1_0_query_partition_mode,
+ .get_ip_details = &soc_v1_0_get_xcp_ip_details,
+ .get_xcp_res_info = &soc_v1_0_get_xcp_res_info,
+#ifdef HAVE_ACPI_DEV_GET_FIRST_MATCH_DEV
+ .get_xcp_mem_id = &soc_v1_0_get_xcp_mem_id,
+#endif
+};
+
+static int soc_v1_0_xcp_mgr_init(struct amdgpu_device *adev)
+{
+ int ret;
+
+ if (amdgpu_sriov_vf(adev))
+ soc_v1_0_xcp_funcs.switch_partition_mode = NULL;
+
+ ret = amdgpu_xcp_mgr_init(adev, AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE,
+ 1, &soc_v1_0_xcp_funcs);
+ if (ret)
+ return ret;
+
+ amdgpu_xcp_update_supported_modes(adev->xcp_mgr);
+ /* TODO: Default memory node affinity init */
+
+ return ret;
+}
+
+int soc_v1_0_init_soc_config(struct amdgpu_device *adev)
+{
+ int ret, i;
+ int xcc_inst_per_aid = 4;
+ uint16_t xcc_mask;
+
+ xcc_mask = adev->gfx.xcc_mask;
+ adev->aid_mask = 0;
+ for (i = 0; xcc_mask; xcc_mask >>= xcc_inst_per_aid, i++) {
+ if (xcc_mask & ((1U << xcc_inst_per_aid) - 1))
+ adev->aid_mask |= (1 << i);
+ }
+
+ adev->sdma.num_inst_per_xcc = 2;
+ adev->sdma.num_instances =
+ NUM_XCC(adev->gfx.xcc_mask) * adev->sdma.num_inst_per_xcc;
+ adev->sdma.sdma_mask =
+ GENMASK(adev->sdma.num_instances - 1, 0);
+
+ ret = soc_v1_0_xcp_mgr_init(adev);
+ if (ret)
+ return ret;
+
+ amdgpu_ip_map_init(adev);
+
+ return 0;
+}
+
+bool soc_v1_0_normalize_xcc_reg_range(uint32_t reg)
+{
+ if (((reg >= XCC_REG_RANGE_0_LOW) && (reg < XCC_REG_RANGE_0_HIGH)) ||
+ ((reg >= XCC_REG_RANGE_1_LOW) && (reg < XCC_REG_RANGE_1_HIGH)))
+ return true;
+ else
+ return false;
+}
+
+uint32_t soc_v1_0_normalize_xcc_reg_offset(uint32_t reg)
+{
+ uint32_t normalized_reg = NORMALIZE_XCC_REG_OFFSET(reg);
+
+ /* If it is an XCC reg, normalize the reg to keep
+ * lower 16 bits in local xcc */
+
+ if (soc_v1_0_normalize_xcc_reg_range(normalized_reg))
+ return normalized_reg;
+ else
+ return reg;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/soc_v1_0.h b/drivers/gpu/drm/amd/amdgpu/soc_v1_0.h
new file mode 100644
index 000000000000..146996101aa0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/soc_v1_0.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __SOC_V1_0_H__
+#define __SOC_V1_0_H__
+
+extern const struct amdgpu_ip_block_version soc_v1_0_common_ip_block;
+
+void soc_v1_0_grbm_select(struct amdgpu_device *adev,
+ u32 me, u32 pipe,
+ u32 queue, u32 vmid,
+ int xcc_id);
+int soc_v1_0_init_soc_config(struct amdgpu_device *adev);
+bool soc_v1_0_normalize_xcc_reg_range(uint32_t reg);
+uint32_t soc_v1_0_normalize_xcc_reg_offset(uint32_t reg);
+u64 soc_v1_0_encode_ext_smn_addressing(int ext_id);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
index 8a3f326474e5..f6178da910d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
@@ -155,6 +155,7 @@ struct ta_ras_init_flags {
uint8_t channel_dis_num;
uint8_t nps_mode;
uint32_t active_umc_mask;
+ uint8_t vram_type;
};
struct ta_ras_mca_addr {
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
index f17d297b594b..f2dfacb952b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
@@ -267,7 +267,8 @@ static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev,
if ((adev->asic_type == CHIP_ARCTURUS) &&
amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
- DRM_WARN("Fail to disable DF-Cstate.\n");
+ drm_warn(adev_to_drm(adev),
+ "Fail to disable DF-Cstate.\n");
LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
umc_reg_offset = get_umc_6_reg_offset(adev,
@@ -284,7 +285,7 @@ static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev,
if ((adev->asic_type == CHIP_ARCTURUS) &&
amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
- DRM_WARN("Fail to enable DF-Cstate\n");
+ drm_warn(adev_to_drm(adev), "Fail to enable DF-Cstate\n");
if (rsmu_umc_index_state)
umc_v6_1_enable_umc_index_mode(adev);
@@ -366,7 +367,7 @@ static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev,
if ((adev->asic_type == CHIP_ARCTURUS) &&
amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
- DRM_WARN("Fail to disable DF-Cstate.\n");
+ drm_warn(adev_to_drm(adev), "Fail to disable DF-Cstate.\n");
LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
umc_reg_offset = get_umc_6_reg_offset(adev,
@@ -382,7 +383,7 @@ static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev,
if ((adev->asic_type == CHIP_ARCTURUS) &&
amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
- DRM_WARN("Fail to enable DF-Cstate\n");
+ drm_warn(adev_to_drm(adev), "Fail to enable DF-Cstate\n");
if (rsmu_umc_index_state)
umc_v6_1_enable_umc_index_mode(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c
index 2e79a3afc774..fea576a7f397 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c
@@ -660,7 +660,7 @@ static int uvd_v3_1_hw_init(struct amdgpu_ip_block *ip_block)
r = uvd_v3_1_fw_validate(adev);
if (r) {
- DRM_ERROR("amdgpu: UVD Firmware validate fail (%d).\n", r);
+ drm_err(adev_to_drm(adev), "UVD Firmware validate fail (%d).\n", r);
return r;
}
@@ -668,13 +668,13 @@ static int uvd_v3_1_hw_init(struct amdgpu_ip_block *ip_block)
r = amdgpu_ring_test_helper(ring);
if (r) {
- DRM_ERROR("amdgpu: UVD ring test fail (%d).\n", r);
+ drm_err(adev_to_drm(adev), "UVD ring test failed (%d).\n", r);
goto done;
}
r = amdgpu_ring_alloc(ring, 10);
if (r) {
- DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r);
+ drm_err(adev_to_drm(adev), "ring alloc failed (%d).\n", r);
goto done;
}
@@ -701,7 +701,7 @@ static int uvd_v3_1_hw_init(struct amdgpu_ip_block *ip_block)
done:
if (!r)
- DRM_INFO("UVD initialized successfully.\n");
+ drm_info(adev_to_drm(adev), "UVD initialized successfully.\n");
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index 4b96fd583772..73ce3d211ed6 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -167,7 +167,7 @@ static int uvd_v4_2_hw_init(struct amdgpu_ip_block *ip_block)
r = amdgpu_ring_alloc(ring, 10);
if (r) {
- DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r);
+ drm_err(adev_to_drm(adev), "ring alloc failed (%d).\n", r);
goto done;
}
@@ -194,7 +194,7 @@ static int uvd_v4_2_hw_init(struct amdgpu_ip_block *ip_block)
done:
if (!r)
- DRM_INFO("UVD initialized successfully.\n");
+ drm_info(adev_to_drm(adev), "UVD initialized successfully.\n");
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index 71409ad8b7ed..454f109cbb2e 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -164,7 +164,7 @@ static int uvd_v5_0_hw_init(struct amdgpu_ip_block *ip_block)
r = amdgpu_ring_alloc(ring, 10);
if (r) {
- DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r);
+ drm_err(adev_to_drm(adev), "ring alloc failed (%d).\n", r);
goto done;
}
@@ -191,7 +191,7 @@ static int uvd_v5_0_hw_init(struct amdgpu_ip_block *ip_block)
done:
if (!r)
- DRM_INFO("UVD initialized successfully.\n");
+ drm_info(adev_to_drm(adev), "UVD initialized successfully.\n");
return r;
@@ -846,7 +846,7 @@ static void uvd_v5_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64
if (RREG32_SMC(ixCURRENT_PG_STATUS) &
CURRENT_PG_STATUS__UVD_PG_STATUS_MASK) {
- DRM_INFO("Cannot get clockgating state when UVD is powergated.\n");
+ drm_info(adev_to_drm(adev), "Cannot get clockgating state when UVD is powergated.\n");
goto out;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index ceb94bbb03a4..ecd7ead7a60b 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -408,7 +408,7 @@ static int uvd_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
adev->uvd.inst->irq.num_types = 1;
adev->uvd.num_enc_rings = 0;
- DRM_INFO("UVD ENC is disabled\n");
+ drm_info(adev_to_drm(adev), "UVD ENC is disabled\n");
}
ring = &adev->uvd.inst->ring;
@@ -478,7 +478,7 @@ static int uvd_v6_0_hw_init(struct amdgpu_ip_block *ip_block)
r = amdgpu_ring_alloc(ring, 10);
if (r) {
- DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r);
+ drm_err(adev_to_drm(adev), "ring alloc failed (%d).\n", r);
goto done;
}
@@ -515,9 +515,9 @@ static int uvd_v6_0_hw_init(struct amdgpu_ip_block *ip_block)
done:
if (!r) {
if (uvd_v6_0_enc_support(adev))
- DRM_INFO("UVD and UVD ENC initialized successfully.\n");
+ drm_info(adev_to_drm(adev), "UVD and UVD ENC initialized successfully.\n");
else
- DRM_INFO("UVD initialized successfully.\n");
+ drm_info(adev_to_drm(adev), "UVD initialized successfully.\n");
}
return r;
@@ -1513,7 +1513,7 @@ static void uvd_v6_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64
data = RREG32_SMC(ixCURRENT_PG_STATUS);
if (data & CURRENT_PG_STATUS__UVD_PG_STATUS_MASK) {
- DRM_INFO("Cannot get clockgating state when UVD is powergated.\n");
+ drm_info(adev_to_drm(adev), "Cannot get clockgating state when UVD is powergated.\n");
goto out;
}
@@ -1633,10 +1633,10 @@ static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev)
{
if (adev->asic_type >= CHIP_POLARIS10) {
adev->uvd.inst->ring.funcs = &uvd_v6_0_ring_vm_funcs;
- DRM_INFO("UVD is enabled in VM mode\n");
+ drm_info(adev_to_drm(adev), "UVD is enabled in VM mode\n");
} else {
adev->uvd.inst->ring.funcs = &uvd_v6_0_ring_phys_funcs;
- DRM_INFO("UVD is enabled in physical mode\n");
+ drm_info(adev_to_drm(adev), "UVD is enabled in physical mode\n");
}
}
@@ -1647,7 +1647,7 @@ static void uvd_v6_0_set_enc_ring_funcs(struct amdgpu_device *adev)
for (i = 0; i < adev->uvd.num_enc_rings; ++i)
adev->uvd.inst->ring_enc[i].funcs = &uvd_v6_0_enc_ring_vm_funcs;
- DRM_INFO("UVD ENC is enabled in VM mode\n");
+ drm_info(adev_to_drm(adev), "UVD ENC is enabled in VM mode\n");
}
static const struct amdgpu_irq_src_funcs uvd_v6_0_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 1f8866f3f63c..df2c83348315 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -438,7 +438,7 @@ static int uvd_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
adev->firmware.fw_size +=
ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
}
- DRM_INFO("PSP loading UVD firmware\n");
+ drm_info(adev_to_drm(adev), "PSP loading UVD firmware\n");
}
for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
@@ -542,7 +542,7 @@ static int uvd_v7_0_hw_init(struct amdgpu_ip_block *ip_block)
r = amdgpu_ring_alloc(ring, 10);
if (r) {
- DRM_ERROR("amdgpu: (%d)ring failed to lock UVD ring (%d).\n", j, r);
+ drm_err(adev_to_drm(adev), "ring alloc failed (%d).\n", r);
goto done;
}
@@ -582,7 +582,7 @@ static int uvd_v7_0_hw_init(struct amdgpu_ip_block *ip_block)
}
done:
if (!r)
- DRM_INFO("UVD and UVD ENC initialized successfully.\n");
+ drm_info(adev_to_drm(adev), "UVD and UVD ENC initialized successfully.\n");
return r;
}
@@ -1606,7 +1606,7 @@ static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev)
continue;
adev->uvd.inst[i].ring.funcs = &uvd_v7_0_ring_vm_funcs;
adev->uvd.inst[i].ring.me = i;
- DRM_INFO("UVD(%d) is enabled in VM mode\n", i);
+ drm_info(adev_to_drm(adev), "UVD(%d) is enabled in VM mode\n", i);
}
}
@@ -1622,7 +1622,7 @@ static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev)
adev->uvd.inst[j].ring_enc[i].me = j;
}
- DRM_INFO("UVD(%d) ENC is enabled in VM mode\n", j);
+ drm_info(adev_to_drm(adev), "UVD(%d) ENC is enabled in VM mode\n", j);
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
index 8ea8a6193492..db149eda6204 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -280,7 +280,7 @@ static int vce_v2_0_stop(struct amdgpu_device *adev)
if (vce_v2_0_lmi_clean(adev)) {
- DRM_INFO("VCE is not idle \n");
+ drm_info(adev_to_drm(adev), "VCE is not idle\n");
return 0;
}
@@ -289,7 +289,7 @@ static int vce_v2_0_stop(struct amdgpu_device *adev)
return -EINVAL;
if (vce_v2_0_wait_for_idle(ip_block)) {
- DRM_INFO("VCE is busy, Can't set clock gating");
+ drm_info(adev_to_drm(adev), "VCE is busy, Can't set clock gating");
return 0;
}
@@ -481,7 +481,7 @@ static int vce_v2_0_hw_init(struct amdgpu_ip_block *ip_block)
return r;
}
- DRM_INFO("VCE initialized successfully.\n");
+ drm_info(adev_to_drm(adev), "VCE initialized successfully.\n");
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 719e9643c43d..03d79e464f04 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -485,7 +485,7 @@ static int vce_v3_0_hw_init(struct amdgpu_ip_block *ip_block)
return r;
}
- DRM_INFO("VCE initialized successfully.\n");
+ drm_info(adev_to_drm(adev), "VCE initialized successfully.\n");
return 0;
}
@@ -846,7 +846,7 @@ static void vce_v3_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64
data = RREG32_SMC(ixCURRENT_PG_STATUS);
if (data & CURRENT_PG_STATUS__VCE_PG_STATUS_MASK) {
- DRM_INFO("Cannot get clockgating state when VCE is powergated.\n");
+ drm_info(adev_to_drm(adev), "Cannot get clockgating state when VCE is powergated.\n");
goto out;
}
@@ -978,13 +978,13 @@ static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev)
adev->vce.ring[i].funcs = &vce_v3_0_ring_vm_funcs;
adev->vce.ring[i].me = i;
}
- DRM_INFO("VCE enabled in VM mode\n");
+ drm_info(adev_to_drm(adev), "VCE enabled in VM mode\n");
} else {
for (i = 0; i < adev->vce.num_rings; i++) {
adev->vce.ring[i].funcs = &vce_v3_0_ring_phys_funcs;
adev->vce.ring[i].me = i;
}
- DRM_INFO("VCE enabled in physical mode\n");
+ drm_info(adev_to_drm(adev), "VCE enabled in physical mode\n");
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index 2d64002bed61..ee445d8abe47 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -460,7 +460,7 @@ static int vce_v4_0_sw_init(struct amdgpu_ip_block *ip_block)
adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
adev->firmware.fw_size +=
ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
- DRM_INFO("PSP loading VCE firmware\n");
+ drm_info(adev_to_drm(adev), "PSP loading VCE firmware\n");
} else {
r = amdgpu_vce_resume(adev);
if (r)
@@ -536,7 +536,7 @@ static int vce_v4_0_hw_init(struct amdgpu_ip_block *ip_block)
return r;
}
- DRM_INFO("VCE initialized successfully.\n");
+ drm_info(adev_to_drm(adev), "VCE initialized successfully.\n");
return 0;
}
@@ -864,7 +864,7 @@ static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
adev->vce.ring[i].me = i;
}
- DRM_INFO("VCE enabled in VM mode\n");
+ drm_info(adev_to_drm(adev), "VCE enabled in VM mode\n");
}
static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index a316797875a8..e9d790914761 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -2053,6 +2053,7 @@ static int vcn_v1_0_validate_bo(struct amdgpu_cs_parser *parser,
{
struct ttm_operation_ctx ctx = { false, false };
struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
+ struct amdgpu_device *adev = parser->adev;
struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_bo_va_mapping *mapping;
struct amdgpu_bo *bo;
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
index 8897dcc9c1a0..e35fae9cdaf6 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
@@ -1964,7 +1964,8 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev)
struct mmsch_v2_0_cmd_end end = { {0} };
struct mmsch_v2_0_init_header *header;
uint32_t *init_table = adev->virt.mm_table.cpu_addr;
- uint8_t i = 0;
+
+ /* This path only programs VCN instance 0. */
header = (struct mmsch_v2_0_init_header *)init_table;
direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
@@ -1983,93 +1984,93 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev)
size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4);
MMSCH_V2_0_INSERT_DIRECT_RD_MOD_WT(
- SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS),
+ SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS),
0xFFFFFFFF, 0x00000004);
/* mc resume*/
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i,
+ SOC15_REG_OFFSET(UVD, 0,
mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo);
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i,
+ SOC15_REG_OFFSET(UVD, 0,
mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi);
offset = 0;
} else {
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i,
+ SOC15_REG_OFFSET(UVD, 0,
mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
lower_32_bits(adev->vcn.inst->gpu_addr));
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i,
+ SOC15_REG_OFFSET(UVD, 0,
mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
upper_32_bits(adev->vcn.inst->gpu_addr));
offset = size;
}
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET0),
+ SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0),
0);
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE0),
+ SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE0),
size);
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i,
+ SOC15_REG_OFFSET(UVD, 0,
mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
lower_32_bits(adev->vcn.inst->gpu_addr + offset));
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i,
+ SOC15_REG_OFFSET(UVD, 0,
mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
upper_32_bits(adev->vcn.inst->gpu_addr + offset));
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET1),
+ SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1),
0);
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE1),
+ SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE1),
AMDGPU_VCN_STACK_SIZE);
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i,
+ SOC15_REG_OFFSET(UVD, 0,
mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
lower_32_bits(adev->vcn.inst->gpu_addr + offset +
AMDGPU_VCN_STACK_SIZE));
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i,
+ SOC15_REG_OFFSET(UVD, 0,
mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
upper_32_bits(adev->vcn.inst->gpu_addr + offset +
AMDGPU_VCN_STACK_SIZE));
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET2),
+ SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2),
0);
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE2),
+ SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE2),
AMDGPU_VCN_CONTEXT_SIZE);
for (r = 0; r < adev->vcn.inst[0].num_enc_rings; ++r) {
ring = &adev->vcn.inst->ring_enc[r];
ring->wptr = 0;
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_LO),
+ SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_LO),
lower_32_bits(ring->gpu_addr));
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_HI),
+ SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_HI),
upper_32_bits(ring->gpu_addr));
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i, mmUVD_RB_SIZE),
+ SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_SIZE),
ring->ring_size / 4);
}
ring = &adev->vcn.inst->ring_dec;
ring->wptr = 0;
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i,
+ SOC15_REG_OFFSET(UVD, 0,
mmUVD_LMI_RBC_RB_64BIT_BAR_LOW),
lower_32_bits(ring->gpu_addr));
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i,
+ SOC15_REG_OFFSET(UVD, 0,
mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH),
upper_32_bits(ring->gpu_addr));
/* force RBC into idle state */
@@ -2080,7 +2081,7 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
MMSCH_V2_0_INSERT_DIRECT_WT(
- SOC15_REG_OFFSET(UVD, i, mmUVD_RBC_RB_CNTL), tmp);
+ SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), tmp);
/* add end packet */
tmp = sizeof(struct mmsch_v2_0_cmd_end);
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
index d9cf8f0feeb3..02d5c5af65f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
@@ -1907,6 +1907,7 @@ static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
uint64_t addr)
{
struct ttm_operation_ctx ctx = { false, false };
+ struct amdgpu_device *adev = p->adev;
struct amdgpu_bo_va_mapping *map;
uint32_t *msg, num_buffers;
struct amdgpu_bo *bo;
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index 3ae666522d57..d17219be50f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -1824,6 +1824,7 @@ static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
uint64_t addr)
{
struct ttm_operation_ctx ctx = { false, false };
+ struct amdgpu_device *adev = p->adev;
struct amdgpu_bo_va_mapping *map;
uint32_t *msg, num_buffers;
struct amdgpu_bo *bo;
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index a611a7345125..6a574b6c8e63 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -1424,10 +1424,6 @@ static bool vi_need_reset_on_init(struct amdgpu_device *adev)
return false;
}
-static void vi_pre_asic_init(struct amdgpu_device *adev)
-{
-}
-
static const struct amdgpu_asic_funcs vi_asic_funcs =
{
.read_disabled_bios = &vi_read_disabled_bios,
@@ -1447,7 +1443,6 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
.need_reset_on_init = &vi_need_reset_on_init,
.get_pcie_replay_count = &vi_get_pcie_replay_count,
.supports_baco = &vi_asic_supports_baco,
- .pre_asic_init = &vi_pre_asic_init,
.query_video_codecs = &vi_query_video_codecs,
};