diff options
Diffstat (limited to 'drivers/gpu')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 99 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 26 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 46 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 20 |
4 files changed, 74 insertions, 117 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 07568516c506..d209591e3710 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -89,16 +89,6 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring) return seq; } -static void amdgpu_fence_save_fence_wptr_start(struct amdgpu_fence *af) -{ - af->fence_wptr_start = af->ring->wptr; -} - -static void amdgpu_fence_save_fence_wptr_end(struct amdgpu_fence *af) -{ - af->fence_wptr_end = af->ring->wptr; -} - /** * amdgpu_fence_emit - emit a fence on the requested ring * @@ -124,11 +114,9 @@ void amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af, &ring->fence_drv.lock, adev->fence_context + ring->idx, seq); - amdgpu_fence_save_fence_wptr_start(af); amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, seq, flags | AMDGPU_FENCE_FLAG_INT); - amdgpu_fence_save_fence_wptr_end(af); - amdgpu_fence_save_wptr(af); + pm_runtime_get_noresume(adev_to_drm(adev)->dev); ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; if (unlikely(rcu_dereference_protected(*ptr, 1))) { @@ -240,7 +228,6 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring) do { struct dma_fence *fence, **ptr; - struct amdgpu_fence *am_fence; ++last_seq; last_seq &= drv->num_fences_mask; @@ -253,12 +240,6 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring) if (!fence) continue; - /* Save the wptr in the fence driver so we know what the last processed - * wptr was. This is required for re-emitting the ring state for - * queues that are reset but are not guilty and thus have no guilty fence. - */ - am_fence = container_of(fence, struct amdgpu_fence, base); - drv->signalled_wptr = am_fence->wptr; dma_fence_signal(fence); dma_fence_put(fence); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); @@ -707,25 +688,29 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring) */ /** - * amdgpu_fence_driver_update_timedout_fence_state - Update fence state and set errors + * amdgpu_ring_set_fence_errors_and_reemit - Set dma_fence errors and reemit * - * @af: fence of the ring to update + * @ring: the ring to operate on + * @guilty_fence: fence of the ring to update * */ -void amdgpu_fence_driver_update_timedout_fence_state(struct amdgpu_fence *af) +void amdgpu_ring_set_fence_errors_and_reemit(struct amdgpu_ring *ring, + struct amdgpu_fence *guilty_fence) { struct dma_fence *unprocessed; struct dma_fence __rcu **ptr; struct amdgpu_fence *fence; - struct amdgpu_ring *ring = af->ring; unsigned long flags; u32 seq, last_seq; - bool reemitted = false; + unsigned int i; + bool is_guilty_fence; + bool is_guilty_context; last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask; seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask; - /* mark all fences from the guilty context with an error */ + ring->reemit = true; + amdgpu_ring_alloc(ring, ring->ring_backup_entries_to_copy); spin_lock_irqsave(&ring->fence_drv.lock, flags); do { last_seq++; @@ -737,39 +722,45 @@ void amdgpu_fence_driver_update_timedout_fence_state(struct amdgpu_fence *af) if (unprocessed && !dma_fence_is_signaled_locked(unprocessed)) { fence = container_of(unprocessed, struct amdgpu_fence, base); + is_guilty_fence = fence == guilty_fence; + is_guilty_context = fence->context == guilty_fence->context; - if (fence->reemitted > 1) - reemitted = true; - else if (fence == af) + /* mark all fences from the guilty context with an error */ + if (is_guilty_fence) dma_fence_set_error(&fence->base, -ETIME); - else if (fence->context == af->context) + else if (is_guilty_context) dma_fence_set_error(&fence->base, -ECANCELED); + + /* reemit the packet stream and update wptrs */ + fence->ib_wptr = ring->wptr; + for (i = 0; i < fence->ib_dw_size; i++) { + /* Skip the IB(s) for the guilty context. */ + if (is_guilty_context && + i >= fence->skip_ib_dw_start_offset && + i < fence->skip_ib_dw_end_offset) + amdgpu_ring_write(ring, ring->funcs->nop); + else + amdgpu_ring_write(ring, + ring->ring_backup[fence->backup_idx + i]); + } } rcu_read_unlock(); } while (last_seq != seq); spin_unlock_irqrestore(&ring->fence_drv.lock, flags); - - if (reemitted) { - /* if we've already reemitted once then just cancel everything */ - amdgpu_fence_driver_force_completion(af->ring); - af->ring->ring_backup_entries_to_copy = 0; - } -} - -void amdgpu_fence_save_wptr(struct amdgpu_fence *af) -{ - af->wptr = af->ring->wptr; + amdgpu_ring_commit(ring); + ring->reemit = false; } static void amdgpu_ring_backup_unprocessed_command(struct amdgpu_ring *ring, - u64 start_wptr, u64 end_wptr) + struct amdgpu_fence *af) { - unsigned int first_idx = start_wptr & ring->buf_mask; - unsigned int last_idx = end_wptr & ring->buf_mask; + unsigned int first_idx = af->ib_wptr & ring->buf_mask; + unsigned int dw_size = af->ib_dw_size; unsigned int i; + af->backup_idx = ring->ring_backup_entries_to_copy; /* Backup the contents of the ring buffer. */ - for (i = first_idx; i != last_idx; ++i, i &= ring->buf_mask) + for (i = first_idx; dw_size > 0; ++i, i &= ring->buf_mask, --dw_size) ring->ring_backup[ring->ring_backup_entries_to_copy++] = ring->ring[i]; } @@ -779,12 +770,10 @@ void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring, struct dma_fence *unprocessed; struct dma_fence __rcu **ptr; struct amdgpu_fence *fence; - u64 wptr; u32 seq, last_seq; last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask; seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask; - wptr = ring->fence_drv.signalled_wptr; ring->ring_backup_entries_to_copy = 0; do { @@ -798,21 +787,7 @@ void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring, if (unprocessed && !dma_fence_is_signaled(unprocessed)) { fence = container_of(unprocessed, struct amdgpu_fence, base); - /* save everything if the ring is not guilty, otherwise - * just save the content from other contexts. - */ - if (!fence->reemitted && - (!guilty_fence || (fence->context != guilty_fence->context))) { - amdgpu_ring_backup_unprocessed_command(ring, wptr, - fence->wptr); - } else if (!fence->reemitted) { - /* always save the fence */ - amdgpu_ring_backup_unprocessed_command(ring, - fence->fence_wptr_start, - fence->fence_wptr_end); - } - wptr = fence->wptr; - fence->reemitted++; + amdgpu_ring_backup_unprocessed_command(ring, fence); } rcu_read_unlock(); } while (last_seq != seq); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 276e0236db45..63f62c670df5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -129,6 +129,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, struct amdgpu_ib *ib = &ibs[0]; struct dma_fence *tmp = NULL; struct amdgpu_fence *af; + struct amdgpu_fence *vm_af; bool need_ctx_switch; struct amdgpu_vm *vm; uint64_t fence_ctx; @@ -215,9 +216,17 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, dma_fence_put(tmp); } - if (job) + if (job) { + vm_af = job->hw_vm_fence; + /* VM sequence */ + vm_af->ib_wptr = ring->wptr; amdgpu_vm_flush(ring, job, need_pipe_sync); + vm_af->ib_dw_size = + amdgpu_ring_get_dw_distance(ring, vm_af->ib_wptr, ring->wptr); + } + /* IB sequence */ + af->ib_wptr = ring->wptr; amdgpu_ring_ib_begin(ring); if (ring->funcs->insert_start) @@ -238,6 +247,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, cond_exec = amdgpu_ring_init_cond_exec(ring, ring->cond_exe_gpu_addr); + /* Skip the IB for guilty contexts */ + af->skip_ib_dw_start_offset = + amdgpu_ring_get_dw_distance(ring, af->ib_wptr, ring->wptr); amdgpu_device_flush_hdp(adev, ring); if (need_ctx_switch) @@ -276,6 +288,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, amdgpu_ring_emit_frame_cntl(ring, false, secure); amdgpu_device_invalidate_hdp(adev, ring); + /* Skip the IB for guilty contexts */ + af->skip_ib_dw_end_offset = + amdgpu_ring_get_dw_distance(ring, af->ib_wptr, ring->wptr); if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE) fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY; @@ -312,13 +327,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, ring->funcs->emit_wave_limit(ring, false); amdgpu_ring_ib_end(ring); - /* Save the wptr associated with this fence. - * This must be last for resets to work properly - * as we need to save the wptr associated with this - * fence so we know what rings contents to backup - * after we reset the queue. - */ - amdgpu_fence_save_wptr(af); + + af->ib_dw_size = amdgpu_ring_get_dw_distance(ring, af->ib_wptr, ring->wptr); amdgpu_ring_commit(ring); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 4638a686a84e..a345c3fb8ff4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -90,10 +90,13 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw) ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask; /* Make sure we aren't trying to allocate more space - * than the maximum for one submission + * than the maximum for one submission. Skip for reemit + * since we may be reemitting several submissions. */ - if (WARN_ON_ONCE(ndw > ring->max_dw)) - return -ENOMEM; + if (!ring->reemit) { + if (WARN_ON_ONCE(ndw > ring->max_dw)) + return -ENOMEM; + } ring->count_dw = ndw; ring->wptr_old = ring->wptr; @@ -105,29 +108,6 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw) } /** - * amdgpu_ring_alloc_reemit - allocate space on the ring buffer for reemit - * - * @ring: amdgpu_ring structure holding ring information - * @ndw: number of dwords to allocate in the ring buffer - * - * Allocate @ndw dwords in the ring buffer (all asics). - * doesn't check the max_dw limit as we may be reemitting - * several submissions. - */ -static void amdgpu_ring_alloc_reemit(struct amdgpu_ring *ring, unsigned int ndw) -{ - /* Align requested size with padding so unlock_commit can - * pad safely */ - ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask; - - ring->count_dw = ndw; - ring->wptr_old = ring->wptr; - - if (ring->funcs->begin_use) - ring->funcs->begin_use(ring); -} - -/** * amdgpu_ring_insert_nop - insert NOP packets * * @ring: amdgpu_ring structure holding ring information @@ -875,7 +855,6 @@ void amdgpu_ring_reset_helper_begin(struct amdgpu_ring *ring, int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring, struct amdgpu_fence *guilty_fence) { - unsigned int i; int r; /* verify that the ring is functional */ @@ -883,16 +862,9 @@ int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring, if (r) return r; - /* set an error on all fences from the context */ - if (guilty_fence) - amdgpu_fence_driver_update_timedout_fence_state(guilty_fence); - /* Re-emit the non-guilty commands */ - if (ring->ring_backup_entries_to_copy) { - amdgpu_ring_alloc_reemit(ring, ring->ring_backup_entries_to_copy); - for (i = 0; i < ring->ring_backup_entries_to_copy; i++) - amdgpu_ring_write(ring, ring->ring_backup[i]); - amdgpu_ring_commit(ring); - } + /* set an error on all fences from the context and reemit */ + amdgpu_ring_set_fence_errors_and_reemit(ring, guilty_fence); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 5a82db0888f0..ce5af137ee40 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -121,7 +121,6 @@ struct amdgpu_fence_driver { /* sync_seq is protected by ring emission lock */ uint32_t sync_seq; atomic_t last_seq; - u64 signalled_wptr; bool initialized; struct amdgpu_irq_src *irq_src; unsigned irq_type; @@ -146,23 +145,23 @@ struct amdgpu_fence { struct amdgpu_ring *ring; ktime_t start_timestamp; - /* wptr for the total submission for resets */ - u64 wptr; + /* location and size of the IB */ + u64 ib_wptr; + unsigned int ib_dw_size; + unsigned int skip_ib_dw_start_offset; + unsigned int skip_ib_dw_end_offset; /* fence context for resets */ u64 context; - /* has this fence been reemitted */ - unsigned int reemitted; - /* wptr for the fence for the submission */ - u64 fence_wptr_start; - u64 fence_wptr_end; + /* idx for ring backups */ + unsigned int backup_idx; }; extern const struct drm_sched_backend_ops amdgpu_sched_ops; void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error); void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring); -void amdgpu_fence_driver_update_timedout_fence_state(struct amdgpu_fence *af); -void amdgpu_fence_save_wptr(struct amdgpu_fence *af); +void amdgpu_ring_set_fence_errors_and_reemit(struct amdgpu_ring *ring, + struct amdgpu_fence *guilty_fence); int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring); int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, @@ -313,6 +312,7 @@ struct amdgpu_ring { /* backups for resets */ uint32_t *ring_backup; unsigned int ring_backup_entries_to_copy; + bool reemit; unsigned rptr_offs; u64 rptr_gpu_addr; u32 *rptr_cpu_addr; |
