summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-11-21 14:56:17 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2024-11-21 14:56:17 -0800
commit28eb75e178d389d325f1666e422bc13bbbb9804c (patch)
tree20417b4e798f98fc5687e80c1e0126afcf437c70 /drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
parent071b34dcf71523a559b6c39f5d21a268a9531b50 (diff)
parenta163b895077861598be48c1cf7f4a88413c28b22 (diff)
Merge tag 'drm-next-2024-11-21' of https://gitlab.freedesktop.org/drm/kernel
Pull drm updates from Dave Airlie: "There's a lot of rework, the panic helper support is being added to more drivers, v3d gets support for HW superpages, scheduler documentation, drm client and video aperture reworks, some new MAINTAINERS added, amdgpu has the usual lots of IP refactors, Intel has some Pantherlake enablement and xe is getting some SRIOV bits, but just lots of stuff everywhere. core: - split DSC helpers from DP helpers - clang build fixes for drm/mm test - drop simple pipeline support for gem vram - document submission error signaling - move drm_rect to drm core module from kms helper - add default client setup to most drivers - move to video aperture helpers instead of drm ones tests: - new framebuffer tests ttm: - remove swapped and pinned BOs from TTM lru panic: - fix uninit spinlock - add ABGR2101010 support bridge: - add TI TDP158 support - use standard PM OPS dma-fence: - use read_trylock instead of read_lock to help lockdep scheduler: - add errno to sched start to report different errors - add locking to drm_sched_entity_modify_sched - improve documentation xe: - add drm_line_printer - lots of refactoring - Enable Xe2 + PES disaggregation - add new ARL PCI ID - SRIOV development work - fix exec unnecessary implicit fence - define and parse OA sync props - forcewake refactoring i915: - Enable BMG/LNL ultra joiner - Enable 10bpx + CCS scanout on ICL+, fp16/CCS on TGL+ - use DSB for plane/color mgmt - Arrow lake PCI IDs - lots of i915/xe display refactoring - enable PXP GuC autoteardown - Pantherlake (PTL) Xe3 LPD display enablement - Allow fastset HDR infoframe changes - write DP source OUI for non-eDP sinks - share PCI IDs between i915 and xe amdgpu: - SDMA queue reset support - SMU 13.0.6, JPEG 4.0.3 updates - Initial runtime repartitioning support - rework IP structs for multiple IP instances - Fetch EDID from _DDC if available - SMU13 zero rpm user control - lots of fixes/cleanups amdkfd: - Increase event FIFO size - add topology cap flag for per queue reset msm: - DPU: - SA8775P support - (disabled by default) MSM8917, MSM8937, MSM8953 and MSM8996 support - Enable large framebuffer support - Drop MSM8998 and SDM845 - DP: - SA8775P support - GPU: - a7xx preemption support - Adreno A663 support ast: - warn about unsupported TX chips ivpu: - add coredump - add pantherlake support rockchip: - 4K@60Hz display enablement - generate pll programming tables panthor: - add timestamp query API - add realtime group priority - add fdinfo support etnaviv: - improve handling of DMA address limits - improve GPU hangcheck exynos: - Decon Exynos7870 support mediatek: - add OF graph support omap: - locking fixes bochs: - convert to gem/shmem from simpledrm v3d: - support big/super pages - add gemfs vc4: - BCM2712 support refactoring - add YUV444 format support udmabuf: - folio related fixes nouveau: - add panic support on nv50+" * tag 'drm-next-2024-11-21' of https://gitlab.freedesktop.org/drm/kernel: (1583 commits) drm/xe/guc: Fix dereference before NULL check drm/amd: Fix initialization mistake for NBIO 7.7.0 Revert "drm/amd/display: parse umc_info or vram_info based on ASIC" drm/amd/display: Fix failure to read vram info due to static BP_RESULT drm/amdgpu: enable GTT fallback handling for dGPUs only drm/amd/amdgpu: limit single process inside MES drm/fourcc: add AMD_FMT_MOD_TILE_GFX9_4K_D_X drm/amdgpu/mes12: correct kiq unmap latency drm/amdgpu: Support vcn and jpeg error info parsing drm/amd : Update MES API header file for v11 & v12 drm/amd/amdkfd: add/remove kfd queues on start/stop KFD scheduling drm/amdkfd: change kfd process kref count at creation drm/amdgpu: Cleanup shift coding style drm/amd/amdgpu: Increase MES log buffer to dump mes scratch data drm/amdgpu: Implement virt req_ras_err_count drm/amdgpu: VF Query RAS Caps from Host if supported drm/amdgpu: Add msg handlers for SRIOV RAS Telemetry drm/amdgpu: Update SRIOV Exchange Headers for RAS Telemetry Support drm/amd/display: 3.2.309 drm/amd/display: Adjust VSDB parser for replay feature ...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c440
1 files changed, 300 insertions, 140 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index bc9b240a3488..ba5160399ab2 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -522,14 +522,17 @@ static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
}
/**
- * sdma_v5_2_gfx_resume - setup and start the async dma engines
+ * sdma_v5_2_gfx_resume_instance - start/restart a certain sdma engine
*
* @adev: amdgpu_device pointer
+ * @i: instance
+ * @restore: used to restore wptr when restart
*
- * Set up the gfx DMA ring buffers and enable them.
- * Returns 0 for success, error for failure.
+ * Set up the gfx DMA ring buffers and enable them. On restart, we will restore wptr and rptr.
+ * Return 0 for success.
*/
-static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
+
+static int sdma_v5_2_gfx_resume_instance(struct amdgpu_device *adev, int i, bool restore)
{
struct amdgpu_ring *ring;
u32 rb_cntl, ib_cntl;
@@ -539,139 +542,161 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
u32 temp;
u32 wptr_poll_cntl;
u64 wptr_gpu_addr;
- int i, r;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- ring = &adev->sdma.instance[i].ring;
+ ring = &adev->sdma.instance[i].ring;
- if (!amdgpu_sriov_vf(adev))
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
+ if (!amdgpu_sriov_vf(adev))
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
- /* Set ring buffer size in dwords */
- rb_bufsz = order_base_2(ring->ring_size / 4);
- rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
+ /* Set ring buffer size in dwords */
+ rb_bufsz = order_base_2(ring->ring_size / 4);
+ rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
#ifdef __BIG_ENDIAN
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
- RPTR_WRITEBACK_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
+ RPTR_WRITEBACK_SWAP_ENABLE, 1);
#endif
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
-
- /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ if (restore) {
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), upper_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
+ } else {
WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
+ }
- /* setup the wptr shadow polling */
- wptr_gpu_addr = ring->wptr_gpu_addr;
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
- lower_32_bits(wptr_gpu_addr));
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
- upper_32_bits(wptr_gpu_addr));
- wptr_poll_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
- mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
- wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
- SDMA0_GFX_RB_WPTR_POLL_CNTL,
- F32_POLL_ENABLE, 1);
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL),
- wptr_poll_cntl);
-
- /* set the wb address whether it's enabled or not */
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
- upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
- lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
-
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
-
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
-
+ /* setup the wptr shadow polling */
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
+ upper_32_bits(wptr_gpu_addr));
+ wptr_poll_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
+ mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
+ wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
+ SDMA0_GFX_RB_WPTR_POLL_CNTL,
+ F32_POLL_ENABLE, 1);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL),
+ wptr_poll_cntl);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
+ lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
+
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
+
+ if (!restore)
ring->wptr = 0;
- /* before programing wptr to a less value, need set minor_ptr_update first */
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
- if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
- }
+ if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
+ }
- doorbell = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
- doorbell_offset = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
+ doorbell = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
+ doorbell_offset = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
- if (ring->use_doorbell) {
- doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
- doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
- OFFSET, ring->doorbell_index);
- } else {
- doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
- }
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
+ if (ring->use_doorbell) {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
+ OFFSET, ring->doorbell_index);
+ } else {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
+ }
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
- adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
- ring->doorbell_index,
- adev->doorbell_index.sdma_doorbell_range);
+ adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
+ ring->doorbell_index,
+ adev->doorbell_index.sdma_doorbell_range);
- if (amdgpu_sriov_vf(adev))
- sdma_v5_2_ring_set_wptr(ring);
+ if (amdgpu_sriov_vf(adev))
+ sdma_v5_2_ring_set_wptr(ring);
- /* set minor_ptr_update to 0 after wptr programed */
+ /* set minor_ptr_update to 0 after wptr programed */
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
- /* SRIOV VF has no control of any of registers below */
- if (!amdgpu_sriov_vf(adev)) {
- /* set utc l1 enable flag always to 1 */
- temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
- temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
-
- /* enable MCBP */
- temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1);
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
-
- /* Set up RESP_MODE to non-copy addresses */
- temp = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL));
- temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
- temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp);
-
- /* program default cache read and write policy */
- temp = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE));
- /* clean read policy and write policy bits */
- temp &= 0xFF0FFF;
- temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
- (CACHE_WRITE_POLICY_L2__DEFAULT << 14) |
- SDMA0_UTCL1_PAGE__LLC_NOALLOC_MASK);
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp);
-
- /* unhalt engine */
- temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
- temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
- }
+ /* SRIOV VF has no control of any of registers below */
+ if (!amdgpu_sriov_vf(adev)) {
+ /* set utc l1 enable flag always to 1 */
+ temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
+
+ /* enable MCBP */
+ temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
+
+ /* Set up RESP_MODE to non-copy addresses */
+ temp = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp);
+
+ /* program default cache read and write policy */
+ temp = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE));
+ /* clean read policy and write policy bits */
+ temp &= 0xFF0FFF;
+ temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
+ (CACHE_WRITE_POLICY_L2__DEFAULT << 14) |
+ SDMA0_UTCL1_PAGE__LLC_NOALLOC_MASK);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp);
+
+ /* unhalt engine */
+ temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
+ }
- /* enable DMA RB */
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
- ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
+ ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
#ifdef __BIG_ENDIAN
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
#endif
- /* enable DMA IBs */
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
+ /* enable DMA IBs */
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
- if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
- sdma_v5_2_ctx_switch_enable(adev, true);
- sdma_v5_2_enable(adev, true);
- }
+ if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
+ sdma_v5_2_ctx_switch_enable(adev, true);
+ sdma_v5_2_enable(adev, true);
+ }
+
+ return amdgpu_ring_test_helper(ring);
+}
- r = amdgpu_ring_test_helper(ring);
+/**
+ * sdma_v5_2_gfx_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the gfx DMA ring buffers and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ r = sdma_v5_2_gfx_resume_instance(adev, i, false);
if (r)
return r;
}
@@ -736,9 +761,9 @@ static int sdma_v5_2_load_microcode(struct amdgpu_device *adev)
return 0;
}
-static int sdma_v5_2_soft_reset(void *handle)
+static int sdma_v5_2_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 grbm_soft_reset;
u32 tmp;
int i;
@@ -778,6 +803,7 @@ static int sdma_v5_2_soft_reset(void *handle)
static int sdma_v5_2_start(struct amdgpu_device *adev)
{
int r = 0;
+ struct amdgpu_ip_block *ip_block;
if (amdgpu_sriov_vf(adev)) {
sdma_v5_2_ctx_switch_enable(adev, false);
@@ -798,7 +824,11 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)
msleep(1000);
}
- sdma_v5_2_soft_reset(adev);
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_SDMA);
+ if (!ip_block)
+ return -EINVAL;
+
+ sdma_v5_2_soft_reset(ip_block);
/* unhalt the MEs */
sdma_v5_2_enable(adev, true);
/* enable sdma ring preemption */
@@ -1180,7 +1210,28 @@ static void sdma_v5_2_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
static void sdma_v5_2_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
- amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
+ uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0);
+
+ /* Update the PD address for this VMID. */
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
+ (hub->ctx_addr_distance * vmid),
+ lower_32_bits(pd_addr));
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
+ (hub->ctx_addr_distance * vmid),
+ upper_32_bits(pd_addr));
+
+ /* Trigger invalidation. */
+ amdgpu_ring_write(ring,
+ SDMA_PKT_VM_INVALIDATION_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(SDMA_SUBOP_VM_INVALIDATION) |
+ SDMA_PKT_VM_INVALIDATION_HEADER_GFX_ENG_ID(ring->vm_inv_eng) |
+ SDMA_PKT_VM_INVALIDATION_HEADER_MM_ENG_ID(0x1f));
+ amdgpu_ring_write(ring, req);
+ amdgpu_ring_write(ring, 0xFFFFFFFF);
+ amdgpu_ring_write(ring,
+ SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(1 << vmid) |
+ SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_ADDRESSRANGEHI(0x1F));
}
static void sdma_v5_2_ring_emit_wreg(struct amdgpu_ring *ring,
@@ -1216,9 +1267,9 @@ static void sdma_v5_2_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
}
-static int sdma_v5_2_early_init(void *handle)
+static int sdma_v5_2_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_sdma_init_microcode(adev, 0, true);
@@ -1268,11 +1319,11 @@ static unsigned sdma_v5_2_seq_to_trap_id(int seq_num)
return -EINVAL;
}
-static int sdma_v5_2_sw_init(void *handle)
+static int sdma_v5_2_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_2);
uint32_t *ptr;
@@ -1306,6 +1357,24 @@ static int sdma_v5_2_sw_init(void *handle)
return r;
}
+ adev->sdma.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(5, 2, 0):
+ case IP_VERSION(5, 2, 2):
+ case IP_VERSION(5, 2, 3):
+ case IP_VERSION(5, 2, 4):
+ if (adev->sdma.instance[0].fw_version >= 76)
+ adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ break;
+ case IP_VERSION(5, 2, 5):
+ if (adev->sdma.instance[0].fw_version >= 34)
+ adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ break;
+ default:
+ break;
+ }
+
/* Allocate memory for SDMA IP Dump buffer */
ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL);
if (ptr)
@@ -1313,17 +1382,22 @@ static int sdma_v5_2_sw_init(void *handle)
else
DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
+ r = amdgpu_sdma_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+
return r;
}
-static int sdma_v5_2_sw_fini(void *handle)
+static int sdma_v5_2_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++)
amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+ amdgpu_sdma_sysfs_reset_mask_fini(adev);
amdgpu_sdma_destroy_inst_ctx(adev, true);
kfree(adev->sdma.ip_dump);
@@ -1331,16 +1405,16 @@ static int sdma_v5_2_sw_fini(void *handle)
return 0;
}
-static int sdma_v5_2_hw_init(void *handle)
+static int sdma_v5_2_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return sdma_v5_2_start(adev);
}
-static int sdma_v5_2_hw_fini(void *handle)
+static int sdma_v5_2_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -1351,18 +1425,14 @@ static int sdma_v5_2_hw_fini(void *handle)
return 0;
}
-static int sdma_v5_2_suspend(void *handle)
+static int sdma_v5_2_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v5_2_hw_fini(adev);
+ return sdma_v5_2_hw_fini(ip_block);
}
-static int sdma_v5_2_resume(void *handle)
+static int sdma_v5_2_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v5_2_hw_init(adev);
+ return sdma_v5_2_hw_init(ip_block);
}
static bool sdma_v5_2_is_idle(void *handle)
@@ -1380,11 +1450,11 @@ static bool sdma_v5_2_is_idle(void *handle)
return true;
}
-static int sdma_v5_2_wait_for_idle(void *handle)
+static int sdma_v5_2_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 sdma0, sdma1, sdma2, sdma3;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
sdma0 = RREG32(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG));
@@ -1399,6 +1469,96 @@ static int sdma_v5_2_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
+static int sdma_v5_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int i, j, r;
+ u32 rb_cntl, ib_cntl, f32_cntl, freeze, cntl, preempt, soft_reset, stat1_reg;
+
+ if (amdgpu_sriov_vf(adev))
+ return -EINVAL;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (ring == &adev->sdma.instance[i].ring)
+ break;
+ }
+
+ if (i == adev->sdma.num_instances) {
+ DRM_ERROR("sdma instance not found\n");
+ return -EINVAL;
+ }
+
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+ /* stop queue */
+ ib_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
+
+ rb_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+
+ /*engine stop SDMA1_F32_CNTL.HALT to 1 and SDMAx_FREEZE freeze bit to 1 */
+ freeze = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_FREEZE));
+ freeze = REG_SET_FIELD(freeze, SDMA0_FREEZE, FREEZE, 1);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_FREEZE), freeze);
+
+ for (j = 0; j < adev->usec_timeout; j++) {
+ freeze = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_FREEZE));
+
+ if (REG_GET_FIELD(freeze, SDMA0_FREEZE, FROZEN) & 1)
+ break;
+ udelay(1);
+ }
+
+
+ if (j == adev->usec_timeout) {
+ stat1_reg = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_STATUS1_REG));
+ if ((stat1_reg & 0x3FF) != 0x3FF) {
+ DRM_ERROR("cannot soft reset as sdma not idle\n");
+ r = -ETIMEDOUT;
+ goto err0;
+ }
+ }
+
+ f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
+
+ cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
+ cntl = REG_SET_FIELD(cntl, SDMA0_CNTL, UTC_L1_ENABLE, 0);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), cntl);
+
+ /* soft reset SDMA_GFX_PREEMPT.IB_PREEMPT = 0 mmGRBM_SOFT_RESET.SOFT_RESET_SDMA0/1 = 1 */
+ preempt = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_PREEMPT));
+ preempt = REG_SET_FIELD(preempt, SDMA0_GFX_PREEMPT, IB_PREEMPT, 0);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_PREEMPT), preempt);
+
+ soft_reset = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ soft_reset |= 1 << GRBM_SOFT_RESET__SOFT_RESET_SDMA0__SHIFT << i;
+
+
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, soft_reset);
+
+ udelay(50);
+
+ soft_reset &= ~(1 << GRBM_SOFT_RESET__SOFT_RESET_SDMA0__SHIFT << i);
+
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, soft_reset);
+
+ /* unfreeze and unhalt */
+ freeze = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_FREEZE));
+ freeze = REG_SET_FIELD(freeze, SDMA0_FREEZE, FREEZE, 0);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_FREEZE), freeze);
+
+ r = sdma_v5_2_gfx_resume_instance(adev, i, true);
+
+err0:
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+ return r;
+}
+
static int sdma_v5_2_ring_preempt_ib(struct amdgpu_ring *ring)
{
int i, r = 0;
@@ -1736,9 +1896,9 @@ static void sdma_v5_2_ring_end_use(struct amdgpu_ring *ring)
amdgpu_gfx_off_ctrl(adev, true);
}
-static void sdma_v5_2_print_ip_state(void *handle, struct drm_printer *p)
+static void sdma_v5_2_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, j;
uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_2);
uint32_t instance_offset;
@@ -1757,9 +1917,9 @@ static void sdma_v5_2_print_ip_state(void *handle, struct drm_printer *p)
}
}
-static void sdma_v5_2_dump_ip_state(void *handle)
+static void sdma_v5_2_dump_ip_state(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, j;
uint32_t instance_offset;
uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_2);
@@ -1781,7 +1941,6 @@ static void sdma_v5_2_dump_ip_state(void *handle)
static const struct amd_ip_funcs sdma_v5_2_ip_funcs = {
.name = "sdma_v5_2",
.early_init = sdma_v5_2_early_init,
- .late_init = NULL,
.sw_init = sdma_v5_2_sw_init,
.sw_fini = sdma_v5_2_sw_fini,
.hw_init = sdma_v5_2_hw_init,
@@ -1834,6 +1993,7 @@ static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = {
.emit_reg_write_reg_wait = sdma_v5_2_ring_emit_reg_write_reg_wait,
.init_cond_exec = sdma_v5_2_ring_init_cond_exec,
.preempt_ib = sdma_v5_2_ring_preempt_ib,
+ .reset = sdma_v5_2_reset_queue,
};
static void sdma_v5_2_set_ring_funcs(struct amdgpu_device *adev)