From 77cc0da39c7ce203cd3ce6bc5696421947a979d7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 27 May 2025 21:35:00 -0400 Subject: drm/amdgpu: track ring state associated with a fence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to know the wptr and sequence number associated with a fence so that we can re-emit the unprocessed state after a ring reset. Pre-allocate storage space for the ring buffer contents and add helpers to save off and re-emit the unprocessed state so that it can be re-emitted after the queue is reset. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 784ba2ec354c..309e7bb6001b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -118,6 +118,7 @@ struct amdgpu_fence_driver { /* sync_seq is protected by ring emission lock */ uint32_t sync_seq; atomic_t last_seq; + u64 signalled_wptr; bool initialized; struct amdgpu_irq_src *irq_src; unsigned irq_type; @@ -141,6 +142,12 @@ struct amdgpu_fence { /* RB, DMA, etc. */ struct amdgpu_ring *ring; ktime_t start_timestamp; + + /* wptr for the fence for resets */ + u64 wptr; + /* fence context for resets */ + u64 context; + uint32_t seq; }; extern const struct drm_sched_backend_ops amdgpu_sched_ops; @@ -148,6 +155,8 @@ extern const struct drm_sched_backend_ops amdgpu_sched_ops; void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring); void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error); void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring); +void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *fence); +void amdgpu_fence_save_wptr(struct dma_fence *fence); int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring); int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, @@ -284,6 +293,9 @@ struct amdgpu_ring { struct amdgpu_bo *ring_obj; uint32_t *ring; + /* backups for resets */ + uint32_t *ring_backup; + unsigned int ring_backup_entries_to_copy; unsigned rptr_offs; u64 rptr_gpu_addr; volatile u32 *rptr_cpu_addr; @@ -550,4 +562,10 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev); void amdgpu_ib_pool_fini(struct amdgpu_device *adev); int amdgpu_ib_ring_tests(struct amdgpu_device *adev); bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring); +void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring, + struct amdgpu_fence *guilty_fence); +void amdgpu_ring_reset_helper_begin(struct amdgpu_ring *ring, + struct amdgpu_fence *guilty_fence); +int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring, + struct amdgpu_fence *guilty_fence); #endif -- cgit v1.2.3 From 6ac55eab4fc41e0ea80f9064945e4340f13d8b5c Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 15 Jul 2025 11:55:05 -0400 Subject: drm/amdgpu: move reset support type checks into the caller Rather than checking in the callbacks, check if the reset type is supported in the caller. Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 309e7bb6001b..7670f5d82b9e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -568,4 +568,6 @@ void amdgpu_ring_reset_helper_begin(struct amdgpu_ring *ring, struct amdgpu_fence *guilty_fence); int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring, struct amdgpu_fence *guilty_fence); +bool amdgpu_ring_is_reset_type_supported(struct amdgpu_ring *ring, + u32 reset_type); #endif -- cgit v1.2.3