diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 84 |
1 files changed, 44 insertions, 40 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 8841955927bb..aa6d4c71fba6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -82,19 +82,11 @@ static bool amdgpu_userq_is_reset_type_supported(struct amdgpu_device *adev, return false; } -static void amdgpu_userq_gpu_reset(struct amdgpu_device *adev) -{ - if (amdgpu_device_should_recover_gpu(adev)) { - amdgpu_reset_domain_schedule(adev->reset_domain, - &adev->userq_reset_work); - /* Wait for the reset job to complete */ - flush_work(&adev->userq_reset_work); - } -} - -static int -amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr) +static void amdgpu_userq_mgr_reset_work(struct work_struct *work) { + struct amdgpu_userq_mgr *uq_mgr = + container_of(work, struct amdgpu_userq_mgr, + reset_work); struct amdgpu_device *adev = uq_mgr->adev; const int queue_types[] = { AMDGPU_RING_TYPE_COMPUTE, @@ -103,12 +95,11 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr) }; const int num_queue_types = ARRAY_SIZE(queue_types); bool gpu_reset = false; - int r = 0; - int i; + int i, r; if (unlikely(adev->debug_disable_gpu_ring_reset)) { dev_err(adev->dev, "userq reset disabled by debug mask\n"); - return 0; + return; } /* @@ -116,7 +107,7 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr) * skip all reset detection logic */ if (!amdgpu_gpu_recovery) - return 0; + return; /* * Iterate through all queue types to detect and reset problematic queues @@ -141,10 +132,19 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr) } } - if (gpu_reset) - amdgpu_userq_gpu_reset(adev); + if (gpu_reset) { + struct amdgpu_reset_context reset_context; - return r; + memset(&reset_context, 0, sizeof(reset_context)); + + reset_context.method = AMD_RESET_METHOD_NONE; + reset_context.reset_req_dev = adev; + reset_context.src = AMDGPU_RESET_SRC_USERQ; + set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + /*set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);*/ + + amdgpu_device_gpu_recover(adev, NULL, &reset_context); + } } static void amdgpu_userq_hang_detect_work(struct work_struct *work) @@ -153,7 +153,11 @@ static void amdgpu_userq_hang_detect_work(struct work_struct *work) container_of(work, struct amdgpu_usermode_queue, hang_detect_work.work); - amdgpu_userq_detect_and_reset_queues(queue->userq_mgr); + /* + * Don't schedule the work here! Scheduling or queue work from one reset + * handler to another is illegal if you don't take extra precautions! + */ + amdgpu_userq_mgr_reset_work(&queue->userq_mgr->reset_work); } /* @@ -182,8 +186,8 @@ void amdgpu_userq_start_hang_detect_work(struct amdgpu_usermode_queue *queue) break; } - schedule_delayed_work(&queue->hang_detect_work, - msecs_to_jiffies(timeout_ms)); + queue_delayed_work(adev->reset_domain->wq, &queue->hang_detect_work, + msecs_to_jiffies(timeout_ms)); } void amdgpu_userq_process_fence_irq(struct amdgpu_device *adev, u32 doorbell) @@ -1259,28 +1263,13 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr) if (ret) { drm_file_err(uq_mgr->file, "Couldn't unmap all the queues, eviction failed ret=%d\n", ret); - amdgpu_userq_detect_and_reset_queues(uq_mgr); + amdgpu_reset_domain_schedule(uq_mgr->adev->reset_domain, + &uq_mgr->reset_work); + flush_work(&uq_mgr->reset_work); } return ret; } -void amdgpu_userq_reset_work(struct work_struct *work) -{ - struct amdgpu_device *adev = container_of(work, struct amdgpu_device, - userq_reset_work); - struct amdgpu_reset_context reset_context; - - memset(&reset_context, 0, sizeof(reset_context)); - - reset_context.method = AMD_RESET_METHOD_NONE; - reset_context.reset_req_dev = adev; - reset_context.src = AMDGPU_RESET_SRC_USERQ; - set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); - /*set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);*/ - - amdgpu_device_gpu_recover(adev, NULL, &reset_context); -} - static void amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr) { @@ -1314,9 +1303,24 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *f userq_mgr->file = file_priv; INIT_DELAYED_WORK(&userq_mgr->resume_work, amdgpu_userq_restore_worker); + INIT_WORK(&userq_mgr->reset_work, amdgpu_userq_mgr_reset_work); return 0; } +void amdgpu_userq_mgr_cancel_reset_work(struct amdgpu_device *adev) +{ + struct xarray *xa = &adev->userq_doorbell_xa; + struct amdgpu_usermode_queue *queue; + unsigned long flags, queue_id; + + xa_lock_irqsave(xa, flags); + xa_for_each(xa, queue_id, queue) { + cancel_delayed_work(&queue->hang_detect_work); + cancel_work(&queue->userq_mgr->reset_work); + } + xa_unlock_irqrestore(xa, flags); +} + void amdgpu_userq_mgr_cancel_resume(struct amdgpu_userq_mgr *userq_mgr) { cancel_delayed_work_sync(&userq_mgr->resume_work); |
