summaryrefslogtreecommitdiff
path: root/drivers/gpu
diff options
context:
space:
mode:
authorAlex Deucher <alexander.deucher@amd.com>2025-11-13 13:24:10 -0500
committerAlex Deucher <alexander.deucher@amd.com>2026-01-05 16:59:57 -0500
commitfb62a2067ca4555a6572d911e05919a311c010aa (patch)
treebf9506be10a2a36d49ec1f9d92578cad6efbd0f9 /drivers/gpu
parent216779827f8d52a39d94d43c47737a829991ad8c (diff)
drm/amdgpu: don't reemit ring contents more than once
If we cancel a bad job and reemit the ring contents, and we get another timeout, cancel everything rather than reemitting. The wptr markers are only relevant for the original emit. If we reemit, the wptr markers are no longer correct. Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h2
2 files changed, 19 insertions, 5 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index c7843e336310..4f74a02a9a05 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -709,6 +709,7 @@ void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af)
struct amdgpu_ring *ring = af->ring;
unsigned long flags;
u32 seq, last_seq;
+ bool reemitted = false;
last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask;
seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask;
@@ -726,7 +727,9 @@ void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af)
if (unprocessed && !dma_fence_is_signaled_locked(unprocessed)) {
fence = container_of(unprocessed, struct amdgpu_fence, base);
- if (fence == af)
+ if (fence->reemitted > 1)
+ reemitted = true;
+ else if (fence == af)
dma_fence_set_error(&fence->base, -ETIME);
else if (fence->context == af->context)
dma_fence_set_error(&fence->base, -ECANCELED);
@@ -734,9 +737,16 @@ void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af)
rcu_read_unlock();
} while (last_seq != seq);
spin_unlock_irqrestore(&ring->fence_drv.lock, flags);
- /* signal the guilty fence */
- amdgpu_fence_write(ring, (u32)af->base.seqno);
- amdgpu_fence_process(ring);
+
+ if (reemitted) {
+ /* if we've already reemitted once then just cancel everything */
+ amdgpu_fence_driver_force_completion(af->ring);
+ af->ring->ring_backup_entries_to_copy = 0;
+ } else {
+ /* signal the guilty fence */
+ amdgpu_fence_write(ring, (u32)af->base.seqno);
+ amdgpu_fence_process(ring);
+ }
}
void amdgpu_fence_save_wptr(struct amdgpu_fence *af)
@@ -784,10 +794,12 @@ void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring,
/* save everything if the ring is not guilty, otherwise
* just save the content from other contexts.
*/
- if (!guilty_fence || (fence->context != guilty_fence->context))
+ if (!fence->reemitted &&
+ (!guilty_fence || (fence->context != guilty_fence->context)))
amdgpu_ring_backup_unprocessed_command(ring, wptr,
fence->wptr);
wptr = fence->wptr;
+ fence->reemitted++;
}
rcu_read_unlock();
} while (last_seq != seq);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index a1fb0fadb6ea..d88182952897 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -150,6 +150,8 @@ struct amdgpu_fence {
u64 wptr;
/* fence context for resets */
u64 context;
+ /* has this fence been reemitted */
+ unsigned int reemitted;
};
extern const struct drm_sched_backend_ops amdgpu_sched_ops;