diff options
| author | Dennis Li <Dennis.Li@amd.com> | 2020-09-01 09:03:53 +0800 |
|---|---|---|
| committer | Alex Deucher <alexander.deucher@amd.com> | 2020-09-03 14:46:55 -0400 |
| commit | 81202807ae6096bfbbfa28c2bc83cae1841bba38 (patch) | |
| tree | c165a6dea757faa271208675fae516c67b74b402 /drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | |
| parent | f6eb433954bf32ab582208d9b58ec397f7814e5a (diff) | |
drm/amdgpu: block ring buffer access during GPU recovery
When GPU is in reset, its status isn't stable and ring buffer also need
be reset when resuming. Therefore driver should protect GPU recovery
thread from ring buffer accessed by other threads. Otherwise GPU will
randomly hang during recovery.
v2: correct indent
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Dennis Li <Dennis.Li@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 11 |
1 files changed, 7 insertions, 4 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 1ca79030e95e..93ee77b14cc9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -503,13 +503,14 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * as GFXOFF under bare metal */ if (adev->gfx.kiq.ring.sched.ready && - (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && - !amdgpu_in_reset(adev)) { + (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && + down_read_trylock(&adev->reset_sem)) { uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng; uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, 1 << vmid); + up_read(&adev->reset_sem); return; } @@ -602,7 +603,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, if (amdgpu_in_reset(adev)) return -EIO; - if (ring->sched.ready) { + if (ring->sched.ready && down_read_trylock(&adev->reset_sem)) { /* Vega20+XGMI caches PTEs in TC and TLB. Add a * heavy-weight TLB flush (type 2), which flushes * both. Due to a race condition with concurrent @@ -629,6 +630,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, if (r) { amdgpu_ring_undo(ring); spin_unlock(&adev->gfx.kiq.ring_lock); + up_read(&adev->reset_sem); return -ETIME; } @@ -637,9 +639,10 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); if (r < 1) { dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); + up_read(&adev->reset_sem); return -ETIME; } - + up_read(&adev->reset_sem); return 0; } |
