diff options
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 143 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 17 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 43 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 3 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 8 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 7 |
8 files changed, 63 insertions, 167 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index a70651050acf..0a62727e74f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1902,7 +1902,7 @@ no_preempt: continue; } job = to_amdgpu_job(s_job); - if (preempted && (&job->hw_fence.base) == fence) + if (preempted && (&job->hw_fence->base) == fence) /* mark the job as preempted */ job->preemption_status |= AMDGPU_IB_PREEMPTED; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index c7c999ae2a28..b6d2b9c0bac0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5809,11 +5809,6 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, if (!amdgpu_ring_sched_ready(ring)) continue; - /* Clear job fence from fence drv to avoid force_completion - * leave NULL and vm flush fence in fence drv - */ - amdgpu_fence_driver_clear_job_fences(ring); - /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ amdgpu_fence_driver_force_completion(ring); } @@ -6542,7 +6537,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, * * job->base holds a reference to parent fence */ - if (job && dma_fence_is_signaled(&job->hw_fence.base)) { + if (job && dma_fence_is_signaled(&job->hw_fence->base)) { job_signaled = true; dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); goto skip_hw_reset; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 18a7829122d2..1fe31d2f2706 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -45,16 +45,11 @@ * Cast helper */ static const struct dma_fence_ops amdgpu_fence_ops; -static const struct dma_fence_ops amdgpu_job_fence_ops; static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f) { struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base); - if (__f->base.ops == &amdgpu_fence_ops || - __f->base.ops == &amdgpu_job_fence_ops) - return __f; - - return NULL; + return __f; } /** @@ -98,51 +93,32 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring) * amdgpu_fence_emit - emit a fence on the requested ring * * @ring: ring the fence is associated with - * @f: resulting fence object * @af: amdgpu fence input * @flags: flags to pass into the subordinate .emit_fence() call * * Emits a fence command on the requested ring (all asics). * Returns 0 on success, -ENOMEM on failure. */ -int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, - struct amdgpu_fence *af, unsigned int flags) +int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af, + unsigned int flags) { struct amdgpu_device *adev = ring->adev; struct dma_fence *fence; - struct amdgpu_fence *am_fence; struct dma_fence __rcu **ptr; uint32_t seq; int r; - if (!af) { - /* create a separate hw fence */ - am_fence = kzalloc(sizeof(*am_fence), GFP_KERNEL); - if (!am_fence) - return -ENOMEM; - } else { - am_fence = af; - } - fence = &am_fence->base; - am_fence->ring = ring; + fence = &af->base; + af->ring = ring; seq = ++ring->fence_drv.sync_seq; - am_fence->seq = seq; - if (af) { - dma_fence_init(fence, &amdgpu_job_fence_ops, - &ring->fence_drv.lock, - adev->fence_context + ring->idx, seq); - /* Against remove in amdgpu_job_{free, free_cb} */ - dma_fence_get(fence); - } else { - dma_fence_init(fence, &amdgpu_fence_ops, - &ring->fence_drv.lock, - adev->fence_context + ring->idx, seq); - } + dma_fence_init(fence, &amdgpu_fence_ops, + &ring->fence_drv.lock, + adev->fence_context + ring->idx, seq); amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, seq, flags | AMDGPU_FENCE_FLAG_INT); - amdgpu_fence_save_wptr(fence); + amdgpu_fence_save_wptr(af); pm_runtime_get_noresume(adev_to_drm(adev)->dev); ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; if (unlikely(rcu_dereference_protected(*ptr, 1))) { @@ -167,8 +143,6 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, */ rcu_assign_pointer(*ptr, dma_fence_get(fence)); - *f = fence; - return 0; } @@ -670,36 +644,6 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev) } /** - * amdgpu_fence_driver_clear_job_fences - clear job embedded fences of ring - * - * @ring: fence of the ring to be cleared - * - */ -void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring) -{ - int i; - struct dma_fence *old, **ptr; - - for (i = 0; i <= ring->fence_drv.num_fences_mask; i++) { - ptr = &ring->fence_drv.fences[i]; - old = rcu_dereference_protected(*ptr, 1); - if (old && old->ops == &amdgpu_job_fence_ops) { - struct amdgpu_job *job; - - /* For non-scheduler bad job, i.e. failed ib test, we need to signal - * it right here or we won't be able to track them in fence_drv - * and they will remain unsignaled during sa_bo free. - */ - job = container_of(old, struct amdgpu_job, hw_fence.base); - if (!job->base.s_fence && !dma_fence_is_signaled(old)) - dma_fence_signal(old); - RCU_INIT_POINTER(*ptr, NULL); - dma_fence_put(old); - } - } -} - -/** * amdgpu_fence_driver_set_error - set error code on fences * @ring: the ring which contains the fences * @error: the error code to set @@ -755,7 +699,7 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring) /** * amdgpu_fence_driver_guilty_force_completion - force signal of specified sequence * - * @fence: fence of the ring to signal + * @af: fence of the ring to signal * */ void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af) @@ -792,15 +736,13 @@ void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af) } while (last_seq != seq); spin_unlock_irqrestore(&ring->fence_drv.lock, flags); /* signal the guilty fence */ - amdgpu_fence_write(ring, af->seq); + amdgpu_fence_write(ring, (u32)af->base.seqno); amdgpu_fence_process(ring); } -void amdgpu_fence_save_wptr(struct dma_fence *fence) +void amdgpu_fence_save_wptr(struct amdgpu_fence *af) { - struct amdgpu_fence *am_fence = container_of(fence, struct amdgpu_fence, base); - - am_fence->wptr = am_fence->ring->wptr; + af->wptr = af->ring->wptr; } static void amdgpu_ring_backup_unprocessed_command(struct amdgpu_ring *ring, @@ -866,13 +808,6 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f) return (const char *)to_amdgpu_fence(f)->ring->name; } -static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence *f) -{ - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base); - - return (const char *)to_amdgpu_ring(job->base.sched)->name; -} - /** * amdgpu_fence_enable_signaling - enable signalling on fence * @f: fence @@ -890,23 +825,6 @@ static bool amdgpu_fence_enable_signaling(struct dma_fence *f) } /** - * amdgpu_job_fence_enable_signaling - enable signalling on job fence - * @f: fence - * - * This is the simliar function with amdgpu_fence_enable_signaling above, it - * only handles the job embedded fence. - */ -static bool amdgpu_job_fence_enable_signaling(struct dma_fence *f) -{ - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base); - - if (!timer_pending(&to_amdgpu_ring(job->base.sched)->fence_drv.fallback_timer)) - amdgpu_fence_schedule_fallback(to_amdgpu_ring(job->base.sched)); - - return true; -} - -/** * amdgpu_fence_free - free up the fence memory * * @rcu: RCU callback head @@ -922,21 +840,6 @@ static void amdgpu_fence_free(struct rcu_head *rcu) } /** - * amdgpu_job_fence_free - free up the job with embedded fence - * - * @rcu: RCU callback head - * - * Free up the job with embedded fence after the RCU grace period. - */ -static void amdgpu_job_fence_free(struct rcu_head *rcu) -{ - struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); - - /* free job if fence has a parent job */ - kfree(container_of(f, struct amdgpu_job, hw_fence.base)); -} - -/** * amdgpu_fence_release - callback that fence can be freed * * @f: fence @@ -949,19 +852,6 @@ static void amdgpu_fence_release(struct dma_fence *f) call_rcu(&f->rcu, amdgpu_fence_free); } -/** - * amdgpu_job_fence_release - callback that job embedded fence can be freed - * - * @f: fence - * - * This is the simliar function with amdgpu_fence_release above, it - * only handles the job embedded fence. - */ -static void amdgpu_job_fence_release(struct dma_fence *f) -{ - call_rcu(&f->rcu, amdgpu_job_fence_free); -} - static const struct dma_fence_ops amdgpu_fence_ops = { .get_driver_name = amdgpu_fence_get_driver_name, .get_timeline_name = amdgpu_fence_get_timeline_name, @@ -969,13 +859,6 @@ static const struct dma_fence_ops amdgpu_fence_ops = { .release = amdgpu_fence_release, }; -static const struct dma_fence_ops amdgpu_job_fence_ops = { - .get_driver_name = amdgpu_fence_get_driver_name, - .get_timeline_name = amdgpu_job_fence_get_timeline_name, - .enable_signaling = amdgpu_job_fence_enable_signaling, - .release = amdgpu_job_fence_release, -}; - /* * Fence debugfs */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 7d9bcb72e8dd..39229ece83f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -149,17 +149,19 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, if (job) { vm = job->vm; fence_ctx = job->base.s_fence ? - job->base.s_fence->scheduled.context : 0; + job->base.s_fence->finished.context : 0; shadow_va = job->shadow_va; csa_va = job->csa_va; gds_va = job->gds_va; init_shadow = job->init_shadow; - af = &job->hw_fence; + af = job->hw_fence; /* Save the context of the job for reset handling. * The driver needs this so it can skip the ring * contents for guilty contexts. */ - af->context = job->base.s_fence ? job->base.s_fence->finished.context : 0; + af->context = fence_ctx; + /* the vm fence is also part of the job's context */ + job->hw_vm_fence->context = fence_ctx; } else { vm = NULL; fence_ctx = 0; @@ -167,7 +169,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, csa_va = 0; gds_va = 0; init_shadow = false; - af = NULL; + af = kzalloc(sizeof(*af), GFP_ATOMIC); + if (!af) + return -ENOMEM; } if (!ring->sched.ready) { @@ -289,7 +293,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, amdgpu_ring_init_cond_exec(ring, ring->cond_exe_gpu_addr); } - r = amdgpu_fence_emit(ring, f, af, fence_flags); + r = amdgpu_fence_emit(ring, af, fence_flags); if (r) { dev_err(adev->dev, "failed to emit fence (%d)\n", r); if (job && job->vmid) @@ -297,6 +301,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, amdgpu_ring_undo(ring); return r; } + *f = &af->base; if (ring->funcs->insert_end) ring->funcs->insert_end(ring); @@ -317,7 +322,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, * fence so we know what rings contents to backup * after we reset the queue. */ - amdgpu_fence_save_wptr(*f); + amdgpu_fence_save_wptr(af); amdgpu_ring_ib_end(ring); amdgpu_ring_commit(ring); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index d020a890a0ea..e08d837668f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -137,7 +137,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) ring->funcs->reset) { dev_err(adev->dev, "Starting %s ring reset\n", s_job->sched->name); - r = amdgpu_ring_reset(ring, job->vmid, &job->hw_fence); + r = amdgpu_ring_reset(ring, job->vmid, job->hw_fence); if (!r) { atomic_inc(&ring->adev->gpu_reset_counter); dev_err(adev->dev, "Ring %s reset succeeded\n", @@ -186,6 +186,9 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned int num_ibs, struct amdgpu_job **job, u64 drm_client_id) { + struct amdgpu_fence *af; + int r; + if (num_ibs == 0) return -EINVAL; @@ -193,6 +196,20 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (!*job) return -ENOMEM; + af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL); + if (!af) { + r = -ENOMEM; + goto err_job; + } + (*job)->hw_fence = af; + + af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL); + if (!af) { + r = -ENOMEM; + goto err_fence; + } + (*job)->hw_vm_fence = af; + (*job)->vm = vm; amdgpu_sync_create(&(*job)->explicit_sync); @@ -204,6 +221,13 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm, return drm_sched_job_init(&(*job)->base, entity, 1, owner, drm_client_id); + +err_fence: + kfree((*job)->hw_fence); +err_job: + kfree(*job); + + return r; } int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, @@ -251,11 +275,11 @@ void amdgpu_job_free_resources(struct amdgpu_job *job) struct dma_fence *f; unsigned i; - /* Check if any fences where initialized */ + /* Check if any fences were initialized */ if (job->base.s_fence && job->base.s_fence->finished.ops) f = &job->base.s_fence->finished; - else if (job->hw_fence.base.ops) - f = &job->hw_fence.base; + else if (job->hw_fence && job->hw_fence->base.ops) + f = &job->hw_fence->base; else f = NULL; @@ -271,11 +295,7 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job) amdgpu_sync_free(&job->explicit_sync); - /* only put the hw fence if has embedded fence */ - if (!job->hw_fence.base.ops) - kfree(job); - else - dma_fence_put(&job->hw_fence.base); + kfree(job); } void amdgpu_job_set_gang_leader(struct amdgpu_job *job, @@ -304,10 +324,7 @@ void amdgpu_job_free(struct amdgpu_job *job) if (job->gang_submit != &job->base.s_fence->scheduled) dma_fence_put(job->gang_submit); - if (!job->hw_fence.base.ops) - kfree(job); - else - dma_fence_put(&job->hw_fence.base); + kfree(job); } struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index 4a6487eb6cb5..7abf069d17d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -64,7 +64,8 @@ struct amdgpu_job { struct drm_sched_job base; struct amdgpu_vm *vm; struct amdgpu_sync explicit_sync; - struct amdgpu_fence hw_fence; + struct amdgpu_fence *hw_fence; + struct amdgpu_fence *hw_vm_fence; struct dma_fence *gang_submit; uint32_t preamble_status; uint32_t preemption_status; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 4b46e3c26ff3..87b962df5460 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -147,16 +147,14 @@ struct amdgpu_fence { u64 wptr; /* fence context for resets */ u64 context; - uint32_t seq; }; extern const struct drm_sched_backend_ops amdgpu_sched_ops; -void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring); void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error); void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring); void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af); -void amdgpu_fence_save_wptr(struct dma_fence *fence); +void amdgpu_fence_save_wptr(struct amdgpu_fence *af); int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring); int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, @@ -166,8 +164,8 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev); void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev); int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev); void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev); -int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, - struct amdgpu_fence *af, unsigned int flags); +int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af, + unsigned int flags); int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s, uint32_t timeout); bool amdgpu_fence_process(struct amdgpu_ring *ring); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 3a660ce75913..9309830821b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -779,7 +779,6 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool cleaner_shader_needed = false; bool pasid_mapping_needed = false; struct dma_fence *fence = NULL; - struct amdgpu_fence *af; unsigned int patch; int r; @@ -842,12 +841,10 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, } if (vm_flush_needed || pasid_mapping_needed || cleaner_shader_needed) { - r = amdgpu_fence_emit(ring, &fence, NULL, 0); + r = amdgpu_fence_emit(ring, job->hw_vm_fence, 0); if (r) return r; - /* this is part of the job's context */ - af = container_of(fence, struct amdgpu_fence, base); - af->context = job->base.s_fence ? job->base.s_fence->finished.context : 0; + fence = &job->hw_vm_fence->base; } if (vm_flush_needed) { |
