diff options
author | Chia-I Wu <olvaffe@gmail.com> | 2025-07-17 23:38:16 -0700 |
---|---|---|
committer | Steven Price <steven.price@arm.com> | 2025-07-24 13:43:04 +0100 |
commit | e48123c607a0db8b9ad02f83c8c3d39918dbda06 (patch) | |
tree | 37f050ca630d1c77bb2c22cf5ba2bf7353cf18ed | |
parent | 33b9cb6dcda2520600ac4fec725946af32c2e586 (diff) |
panthor: dump task pid and comm on gpu errors
It is useful to know which tasks cause gpu errors.
Signed-off-by: Chia-I Wu <olvaffe@gmail.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Link: https://lore.kernel.org/r/20250718063816.1452123-4-olvaffe@gmail.com
-rw-r--r-- | drivers/gpu/drm/panthor/panthor_sched.c | 24 |
1 files changed, 19 insertions, 5 deletions
diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index 9f4d04214053..ba5dc3e443d9 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -1364,8 +1364,12 @@ cs_slot_process_fatal_event_locked(struct panthor_device *ptdev, fatal = cs_iface->output->fatal; info = cs_iface->output->fatal_info; - if (group) + if (group) { + drm_warn(&ptdev->base, "CS_FATAL: pid=%d, comm=%s\n", + group->task_info.pid, group->task_info.comm); + group->fatal_queues |= BIT(cs_id); + } if (CS_EXCEPTION_TYPE(fatal) == DRM_PANTHOR_EXCEPTION_CS_UNRECOVERABLE) { /* If this exception is unrecoverable, queue a reset, and make @@ -1425,6 +1429,11 @@ cs_slot_process_fault_event_locked(struct panthor_device *ptdev, spin_unlock(&queue->fence_ctx.lock); } + if (group) { + drm_warn(&ptdev->base, "CS_FAULT: pid=%d, comm=%s\n", + group->task_info.pid, group->task_info.comm); + } + drm_warn(&ptdev->base, "CSG slot %d CS slot: %d\n" "CS_FAULT.EXCEPTION_TYPE: 0x%x (%s)\n" @@ -1641,11 +1650,15 @@ csg_slot_process_progress_timer_event_locked(struct panthor_device *ptdev, u32 c lockdep_assert_held(&sched->lock); - drm_warn(&ptdev->base, "CSG slot %d progress timeout\n", csg_id); - group = csg_slot->group; - if (!drm_WARN_ON(&ptdev->base, !group)) + if (!drm_WARN_ON(&ptdev->base, !group)) { + drm_warn(&ptdev->base, "CSG_PROGRESS_TIMER_EVENT: pid=%d, comm=%s\n", + group->task_info.pid, group->task_info.comm); + group->timedout = true; + } + + drm_warn(&ptdev->base, "CSG slot %d progress timeout\n", csg_id); sched_queue_delayed_work(sched, tick, 0); } @@ -3227,7 +3240,8 @@ queue_timedout_job(struct drm_sched_job *sched_job) struct panthor_scheduler *sched = ptdev->scheduler; struct panthor_queue *queue = group->queues[job->queue_idx]; - drm_warn(&ptdev->base, "job timeout\n"); + drm_warn(&ptdev->base, "job timeout: pid=%d, comm=%s, seqno=%llu\n", + group->task_info.pid, group->task_info.comm, job->done_fence->seqno); drm_WARN_ON(&ptdev->base, atomic_read(&sched->reset.in_progress)); |