From 1f22fcb88bfef26a966e9eb242c692c6bf253d47 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 15 Sep 2025 12:37:32 -0400 Subject: drm/amdgpu: handle wrap around in reemit handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compare the sequence numbers directly. Fixes: 77cc0da39c7c ("drm/amdgpu: track ring state associated with a fence") Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index fd8cca241da6..e270df30c279 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -790,14 +790,19 @@ void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring, struct dma_fence *unprocessed; struct dma_fence __rcu **ptr; struct amdgpu_fence *fence; - u64 wptr, i, seqno; + u64 wptr; + u32 seq, last_seq; - seqno = amdgpu_fence_read(ring); + last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask; + seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask; wptr = ring->fence_drv.signalled_wptr; ring->ring_backup_entries_to_copy = 0; - for (i = seqno + 1; i <= ring->fence_drv.sync_seq; ++i) { - ptr = &ring->fence_drv.fences[i & ring->fence_drv.num_fences_mask]; + do { + last_seq++; + last_seq &= ring->fence_drv.num_fences_mask; + + ptr = &ring->fence_drv.fences[last_seq]; rcu_read_lock(); unprocessed = rcu_dereference(*ptr); @@ -813,7 +818,7 @@ void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring, wptr = fence->wptr; } rcu_read_unlock(); - } + } while (last_seq != seq); } /* -- cgit v1.2.3 From ff780f4f80323148d43198f2052c14160c8428d3 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 3 Sep 2025 13:48:23 -0400 Subject: drm/amdgpu: set an error on all fences from a bad context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we backup ring contents to reemit after a queue reset, we don't backup ring contents from the bad context. When we signal the fences, we should set an error on those fences as well. v2: misc cleanups v3: add locking for fence error, fix comment (Christian) v4: fix wrap around, locking (Christian) Fixes: 77cc0da39c7c ("drm/amdgpu: track ring state associated with a fence") Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 39 +++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index e270df30c279..18a7829122d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -758,11 +758,42 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring) * @fence: fence of the ring to signal * */ -void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *fence) +void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af) { - dma_fence_set_error(&fence->base, -ETIME); - amdgpu_fence_write(fence->ring, fence->seq); - amdgpu_fence_process(fence->ring); + struct dma_fence *unprocessed; + struct dma_fence __rcu **ptr; + struct amdgpu_fence *fence; + struct amdgpu_ring *ring = af->ring; + unsigned long flags; + u32 seq, last_seq; + + last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask; + seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask; + + /* mark all fences from the guilty context with an error */ + spin_lock_irqsave(&ring->fence_drv.lock, flags); + do { + last_seq++; + last_seq &= ring->fence_drv.num_fences_mask; + + ptr = &ring->fence_drv.fences[last_seq]; + rcu_read_lock(); + unprocessed = rcu_dereference(*ptr); + + if (unprocessed && !dma_fence_is_signaled_locked(unprocessed)) { + fence = container_of(unprocessed, struct amdgpu_fence, base); + + if (fence == af) + dma_fence_set_error(&fence->base, -ETIME); + else if (fence->context == af->context) + dma_fence_set_error(&fence->base, -ECANCELED); + } + rcu_read_unlock(); + } while (last_seq != seq); + spin_unlock_irqrestore(&ring->fence_drv.lock, flags); + /* signal the guilty fence */ + amdgpu_fence_write(ring, af->seq); + amdgpu_fence_process(ring); } void amdgpu_fence_save_wptr(struct dma_fence *fence) -- cgit v1.2.3