summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c84
1 files changed, 44 insertions, 40 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 8841955927bb..aa6d4c71fba6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -82,19 +82,11 @@ static bool amdgpu_userq_is_reset_type_supported(struct amdgpu_device *adev,
return false;
}
-static void amdgpu_userq_gpu_reset(struct amdgpu_device *adev)
-{
- if (amdgpu_device_should_recover_gpu(adev)) {
- amdgpu_reset_domain_schedule(adev->reset_domain,
- &adev->userq_reset_work);
- /* Wait for the reset job to complete */
- flush_work(&adev->userq_reset_work);
- }
-}
-
-static int
-amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr)
+static void amdgpu_userq_mgr_reset_work(struct work_struct *work)
{
+ struct amdgpu_userq_mgr *uq_mgr =
+ container_of(work, struct amdgpu_userq_mgr,
+ reset_work);
struct amdgpu_device *adev = uq_mgr->adev;
const int queue_types[] = {
AMDGPU_RING_TYPE_COMPUTE,
@@ -103,12 +95,11 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr)
};
const int num_queue_types = ARRAY_SIZE(queue_types);
bool gpu_reset = false;
- int r = 0;
- int i;
+ int i, r;
if (unlikely(adev->debug_disable_gpu_ring_reset)) {
dev_err(adev->dev, "userq reset disabled by debug mask\n");
- return 0;
+ return;
}
/*
@@ -116,7 +107,7 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr)
* skip all reset detection logic
*/
if (!amdgpu_gpu_recovery)
- return 0;
+ return;
/*
* Iterate through all queue types to detect and reset problematic queues
@@ -141,10 +132,19 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr)
}
}
- if (gpu_reset)
- amdgpu_userq_gpu_reset(adev);
+ if (gpu_reset) {
+ struct amdgpu_reset_context reset_context;
- return r;
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ reset_context.src = AMDGPU_RESET_SRC_USERQ;
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ /*set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);*/
+
+ amdgpu_device_gpu_recover(adev, NULL, &reset_context);
+ }
}
static void amdgpu_userq_hang_detect_work(struct work_struct *work)
@@ -153,7 +153,11 @@ static void amdgpu_userq_hang_detect_work(struct work_struct *work)
container_of(work, struct amdgpu_usermode_queue,
hang_detect_work.work);
- amdgpu_userq_detect_and_reset_queues(queue->userq_mgr);
+ /*
+ * Don't schedule the work here! Scheduling or queue work from one reset
+ * handler to another is illegal if you don't take extra precautions!
+ */
+ amdgpu_userq_mgr_reset_work(&queue->userq_mgr->reset_work);
}
/*
@@ -182,8 +186,8 @@ void amdgpu_userq_start_hang_detect_work(struct amdgpu_usermode_queue *queue)
break;
}
- schedule_delayed_work(&queue->hang_detect_work,
- msecs_to_jiffies(timeout_ms));
+ queue_delayed_work(adev->reset_domain->wq, &queue->hang_detect_work,
+ msecs_to_jiffies(timeout_ms));
}
void amdgpu_userq_process_fence_irq(struct amdgpu_device *adev, u32 doorbell)
@@ -1259,28 +1263,13 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr)
if (ret) {
drm_file_err(uq_mgr->file,
"Couldn't unmap all the queues, eviction failed ret=%d\n", ret);
- amdgpu_userq_detect_and_reset_queues(uq_mgr);
+ amdgpu_reset_domain_schedule(uq_mgr->adev->reset_domain,
+ &uq_mgr->reset_work);
+ flush_work(&uq_mgr->reset_work);
}
return ret;
}
-void amdgpu_userq_reset_work(struct work_struct *work)
-{
- struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
- userq_reset_work);
- struct amdgpu_reset_context reset_context;
-
- memset(&reset_context, 0, sizeof(reset_context));
-
- reset_context.method = AMD_RESET_METHOD_NONE;
- reset_context.reset_req_dev = adev;
- reset_context.src = AMDGPU_RESET_SRC_USERQ;
- set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
- /*set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);*/
-
- amdgpu_device_gpu_recover(adev, NULL, &reset_context);
-}
-
static void
amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr)
{
@@ -1314,9 +1303,24 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *f
userq_mgr->file = file_priv;
INIT_DELAYED_WORK(&userq_mgr->resume_work, amdgpu_userq_restore_worker);
+ INIT_WORK(&userq_mgr->reset_work, amdgpu_userq_mgr_reset_work);
return 0;
}
+void amdgpu_userq_mgr_cancel_reset_work(struct amdgpu_device *adev)
+{
+ struct xarray *xa = &adev->userq_doorbell_xa;
+ struct amdgpu_usermode_queue *queue;
+ unsigned long flags, queue_id;
+
+ xa_lock_irqsave(xa, flags);
+ xa_for_each(xa, queue_id, queue) {
+ cancel_delayed_work(&queue->hang_detect_work);
+ cancel_work(&queue->userq_mgr->reset_work);
+ }
+ xa_unlock_irqrestore(xa, flags);
+}
+
void amdgpu_userq_mgr_cancel_resume(struct amdgpu_userq_mgr *userq_mgr)
{
cancel_delayed_work_sync(&userq_mgr->resume_work);