From bf1cd14f9e2e1fdf981eed273ddd595863f5288c Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 2 Jun 2025 11:31:52 -0400 Subject: drm/amdgpu: switch job hw_fence to amdgpu_fence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the amdgpu fence container so we can store additional data in the fence. This also fixes the start_time handling for MCBP since we were casting the fence to an amdgpu_fence and it wasn't. Fixes: 3f4c175d62d8 ("drm/amdgpu: MCBP based on DRM scheduler (v9)") Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 30 +++++++----------------------- 1 file changed, 7 insertions(+), 23 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 5f5c00ace96b..f5855c412321 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -41,22 +41,6 @@ #include "amdgpu_trace.h" #include "amdgpu_reset.h" -/* - * Fences mark an event in the GPUs pipeline and are used - * for GPU/CPU synchronization. When the fence is written, - * it is expected that all buffers associated with that fence - * are no longer in use by the associated ring on the GPU and - * that the relevant GPU caches have been flushed. - */ - -struct amdgpu_fence { - struct dma_fence base; - - /* RB, DMA, etc. */ - struct amdgpu_ring *ring; - ktime_t start_timestamp; -}; - static struct kmem_cache *amdgpu_fence_slab; int amdgpu_fence_slab_init(void) @@ -151,12 +135,12 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd am_fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_ATOMIC); if (am_fence == NULL) return -ENOMEM; - fence = &am_fence->base; - am_fence->ring = ring; } else { /* take use of job-embedded fence */ - fence = &job->hw_fence; + am_fence = &job->hw_fence; } + fence = &am_fence->base; + am_fence->ring = ring; seq = ++ring->fence_drv.sync_seq; if (job && job->job_run_counter) { @@ -718,7 +702,7 @@ void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring) * it right here or we won't be able to track them in fence_drv * and they will remain unsignaled during sa_bo free. */ - job = container_of(old, struct amdgpu_job, hw_fence); + job = container_of(old, struct amdgpu_job, hw_fence.base); if (!job->base.s_fence && !dma_fence_is_signaled(old)) dma_fence_signal(old); RCU_INIT_POINTER(*ptr, NULL); @@ -780,7 +764,7 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f) static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence *f) { - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); + struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base); return (const char *)to_amdgpu_ring(job->base.sched)->name; } @@ -810,7 +794,7 @@ static bool amdgpu_fence_enable_signaling(struct dma_fence *f) */ static bool amdgpu_job_fence_enable_signaling(struct dma_fence *f) { - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); + struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base); if (!timer_pending(&to_amdgpu_ring(job->base.sched)->fence_drv.fallback_timer)) amdgpu_fence_schedule_fallback(to_amdgpu_ring(job->base.sched)); @@ -845,7 +829,7 @@ static void amdgpu_job_fence_free(struct rcu_head *rcu) struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); /* free job if fence has a parent job */ - kfree(container_of(f, struct amdgpu_job, hw_fence)); + kfree(container_of(f, struct amdgpu_job, hw_fence.base)); } /** -- cgit v1.2.3 From 684385273de5de3c979843a7804a57361cc60753 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 16 Jun 2025 14:28:32 -0400 Subject: drm/amdgpu: remove fence slab MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just use kmalloc for the fences in the rare case we need an independent fence. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index f5855c412321..343c2bfdefb2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -41,21 +41,6 @@ #include "amdgpu_trace.h" #include "amdgpu_reset.h" -static struct kmem_cache *amdgpu_fence_slab; - -int amdgpu_fence_slab_init(void) -{ - amdgpu_fence_slab = KMEM_CACHE(amdgpu_fence, SLAB_HWCACHE_ALIGN); - if (!amdgpu_fence_slab) - return -ENOMEM; - return 0; -} - -void amdgpu_fence_slab_fini(void) -{ - rcu_barrier(); - kmem_cache_destroy(amdgpu_fence_slab); -} /* * Cast helper */ @@ -131,9 +116,9 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd int r; if (job == NULL) { - /* create a sperate hw fence */ - am_fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_ATOMIC); - if (am_fence == NULL) + /* create a separate hw fence */ + am_fence = kzalloc(sizeof(*am_fence), GFP_KERNEL); + if (!am_fence) return -ENOMEM; } else { /* take use of job-embedded fence */ @@ -814,7 +799,7 @@ static void amdgpu_fence_free(struct rcu_head *rcu) struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); /* free fence_slab if it's separated fence*/ - kmem_cache_free(amdgpu_fence_slab, to_amdgpu_fence(f)); + kfree(to_amdgpu_fence(f)); } /** -- cgit v1.2.3 From a3e510fd69c315a6b55f7a982caadee4e3ba6301 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Tue, 24 Jun 2025 11:20:56 +0530 Subject: drm/amdgpu: Convert from DRM_* to dev_* Convert from generic DRM_* to dev_* calls to have device context info. Signed-off-by: Lijo Lazar Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 343c2bfdefb2..57cbeb25d0f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -295,7 +295,9 @@ static void amdgpu_fence_fallback(struct timer_list *t) fence_drv.fallback_timer); if (amdgpu_fence_process(ring)) - DRM_WARN("Fence fallback timer expired on ring %s\n", ring->name); + dev_warn(ring->adev->dev, + "Fence fallback timer expired on ring %s\n", + ring->name); } /** -- cgit v1.2.3 From d0c35c84dcfa66947b66ee7a5d8d2d7f52e9c1f3 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 16 Jun 2025 11:56:23 -0400 Subject: drm/amdgpu: remove job parameter from amdgpu_fence_emit() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit What we actually care about is the amdgpu_fence object so pass that in explicitly to avoid possible mistakes in the future. The job_run_counter handling can be safely removed at this point as we no longer support job resubmission. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 34 ++++++++++++------------------- 1 file changed, 13 insertions(+), 21 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 57cbeb25d0f8..3d170060282e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -99,14 +99,14 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring) * * @ring: ring the fence is associated with * @f: resulting fence object - * @job: job the fence is embedded in + * @af: amdgpu fence input * @flags: flags to pass into the subordinate .emit_fence() call * * Emits a fence command on the requested ring (all asics). * Returns 0 on success, -ENOMEM on failure. */ -int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amdgpu_job *job, - unsigned int flags) +int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, + struct amdgpu_fence *af, unsigned int flags) { struct amdgpu_device *adev = ring->adev; struct dma_fence *fence; @@ -115,36 +115,28 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd uint32_t seq; int r; - if (job == NULL) { + if (!af) { /* create a separate hw fence */ am_fence = kzalloc(sizeof(*am_fence), GFP_KERNEL); if (!am_fence) return -ENOMEM; } else { - /* take use of job-embedded fence */ - am_fence = &job->hw_fence; + am_fence = af; } fence = &am_fence->base; am_fence->ring = ring; seq = ++ring->fence_drv.sync_seq; - if (job && job->job_run_counter) { - /* reinit seq for resubmitted jobs */ - fence->seqno = seq; - /* TO be inline with external fence creation and other drivers */ + if (af) { + dma_fence_init(fence, &amdgpu_job_fence_ops, + &ring->fence_drv.lock, + adev->fence_context + ring->idx, seq); + /* Against remove in amdgpu_job_{free, free_cb} */ dma_fence_get(fence); } else { - if (job) { - dma_fence_init(fence, &amdgpu_job_fence_ops, - &ring->fence_drv.lock, - adev->fence_context + ring->idx, seq); - /* Against remove in amdgpu_job_{free, free_cb} */ - dma_fence_get(fence); - } else { - dma_fence_init(fence, &amdgpu_fence_ops, - &ring->fence_drv.lock, - adev->fence_context + ring->idx, seq); - } + dma_fence_init(fence, &amdgpu_fence_ops, + &ring->fence_drv.lock, + adev->fence_context + ring->idx, seq); } amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, -- cgit v1.2.3 From 77cc0da39c7ce203cd3ce6bc5696421947a979d7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 27 May 2025 21:35:00 -0400 Subject: drm/amdgpu: track ring state associated with a fence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to know the wptr and sequence number associated with a fence so that we can re-emit the unprocessed state after a ring reset. Pre-allocate storage space for the ring buffer contents and add helpers to save off and re-emit the unprocessed state so that it can be re-emitted after the queue is reset. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 90 +++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 2c3547f4cea4..9e7506965cab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -120,6 +120,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, am_fence = kzalloc(sizeof(*am_fence), GFP_KERNEL); if (!am_fence) return -ENOMEM; + am_fence->context = 0; } else { am_fence = af; } @@ -127,6 +128,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, am_fence->ring = ring; seq = ++ring->fence_drv.sync_seq; + am_fence->seq = seq; if (af) { dma_fence_init(fence, &amdgpu_job_fence_ops, &ring->fence_drv.lock, @@ -141,6 +143,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, seq, flags | AMDGPU_FENCE_FLAG_INT); + amdgpu_fence_save_wptr(fence); pm_runtime_get_noresume(adev_to_drm(adev)->dev); ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; if (unlikely(rcu_dereference_protected(*ptr, 1))) { @@ -253,6 +256,7 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring) do { struct dma_fence *fence, **ptr; + struct amdgpu_fence *am_fence; ++last_seq; last_seq &= drv->num_fences_mask; @@ -265,6 +269,12 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring) if (!fence) continue; + /* Save the wptr in the fence driver so we know what the last processed + * wptr was. This is required for re-emitting the ring state for + * queues that are reset but are not guilty and thus have no guilty fence. + */ + am_fence = container_of(fence, struct amdgpu_fence, base); + drv->signalled_wptr = am_fence->wptr; dma_fence_signal(fence); dma_fence_put(fence); pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); @@ -727,6 +737,86 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring) amdgpu_fence_process(ring); } + +/** + * Kernel queue reset handling + * + * The driver can reset individual queues for most engines, but those queues + * may contain work from multiple contexts. Resetting the queue will reset + * lose all of that state. In order to minimize the collateral damage, the + * driver will save the ring contents which are not associated with the guilty + * context prior to resetting the queue. After resetting the queue the queue + * contents from the other contexts is re-emitted to the rings so that it can + * be processed by the engine. To handle this, we save the queue's write + * pointer (wptr) in the fences associated with each context. If we get a + * queue timeout, we can then use the wptrs from the fences to determine + * which data needs to be saved out of the queue's ring buffer. + */ + +/** + * amdgpu_fence_driver_guilty_force_completion - force signal of specified sequence + * + * @fence: fence of the ring to signal + * + */ +void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *fence) +{ + dma_fence_set_error(&fence->base, -ETIME); + amdgpu_fence_write(fence->ring, fence->seq); + amdgpu_fence_process(fence->ring); +} + +void amdgpu_fence_save_wptr(struct dma_fence *fence) +{ + struct amdgpu_fence *am_fence = container_of(fence, struct amdgpu_fence, base); + + am_fence->wptr = am_fence->ring->wptr; +} + +static void amdgpu_ring_backup_unprocessed_command(struct amdgpu_ring *ring, + u64 start_wptr, u32 end_wptr) +{ + unsigned int first_idx = start_wptr & ring->buf_mask; + unsigned int last_idx = end_wptr & ring->buf_mask; + unsigned int i; + + /* Backup the contents of the ring buffer. */ + for (i = first_idx; i != last_idx; ++i, i &= ring->buf_mask) + ring->ring_backup[ring->ring_backup_entries_to_copy++] = ring->ring[i]; +} + +void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring, + struct amdgpu_fence *guilty_fence) +{ + struct dma_fence *unprocessed; + struct dma_fence __rcu **ptr; + struct amdgpu_fence *fence; + u64 wptr, i, seqno; + + seqno = amdgpu_fence_read(ring); + wptr = ring->fence_drv.signalled_wptr; + ring->ring_backup_entries_to_copy = 0; + + for (i = seqno + 1; i <= ring->fence_drv.sync_seq; ++i) { + ptr = &ring->fence_drv.fences[i & ring->fence_drv.num_fences_mask]; + rcu_read_lock(); + unprocessed = rcu_dereference(*ptr); + + if (unprocessed && !dma_fence_is_signaled(unprocessed)) { + fence = container_of(unprocessed, struct amdgpu_fence, base); + + /* save everything if the ring is not guilty, otherwise + * just save the content from other contexts. + */ + if (!guilty_fence || (fence->context != guilty_fence->context)) + amdgpu_ring_backup_unprocessed_command(ring, wptr, + fence->wptr); + wptr = fence->wptr; + } + rcu_read_unlock(); + } +} + /* * Common fence implementation */ -- cgit v1.2.3