diff options
Diffstat (limited to 'drivers/gpu/drm/scheduler')
| -rw-r--r-- | drivers/gpu/drm/scheduler/gpu_scheduler_trace.h | 103 | ||||
| -rw-r--r-- | drivers/gpu/drm/scheduler/sched_entity.c | 26 | ||||
| -rw-r--r-- | drivers/gpu/drm/scheduler/sched_fence.c | 6 | ||||
| -rw-r--r-- | drivers/gpu/drm/scheduler/sched_internal.h | 2 | ||||
| -rw-r--r-- | drivers/gpu/drm/scheduler/sched_main.c | 37 | ||||
| -rw-r--r-- | drivers/gpu/drm/scheduler/tests/mock_scheduler.c | 8 | ||||
| -rw-r--r-- | drivers/gpu/drm/scheduler/tests/sched_tests.h | 1 |
7 files changed, 121 insertions, 62 deletions
diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h index f56e77e7f6d0..261713dd7d5a 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h +++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h @@ -32,78 +32,123 @@ #define TRACE_SYSTEM gpu_scheduler #define TRACE_INCLUDE_FILE gpu_scheduler_trace +/** + * DOC: uAPI trace events + * + * ``drm_sched_job_queue``, ``drm_sched_job_run``, ``drm_sched_job_add_dep``, + * ``drm_sched_job_done`` and ``drm_sched_job_unschedulable`` are considered + * stable uAPI. + * + * Common trace events attributes: + * + * * ``dev`` - the dev_name() of the device running the job. + * + * * ``ring`` - the hardware ring running the job. Together with ``dev`` it + * uniquely identifies where the job is going to be executed. + * + * * ``fence`` - the &struct dma_fence.context and the &struct dma_fence.seqno of + * &struct drm_sched_fence.finished + * + * All the events depends on drm_sched_job_arm() having been called already for + * the job because they use &struct drm_sched_job.sched or + * &struct drm_sched_job.s_fence. + */ + DECLARE_EVENT_CLASS(drm_sched_job, TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity *entity), TP_ARGS(sched_job, entity), TP_STRUCT__entry( - __field(struct drm_sched_entity *, entity) - __field(struct dma_fence *, fence) __string(name, sched_job->sched->name) - __field(uint64_t, id) __field(u32, job_count) __field(int, hw_job_count) + __string(dev, dev_name(sched_job->sched->dev)) + __field(u64, fence_context) + __field(u64, fence_seqno) + __field(u64, client_id) ), TP_fast_assign( - __entry->entity = entity; - __entry->id = sched_job->id; - __entry->fence = &sched_job->s_fence->finished; __assign_str(name); __entry->job_count = spsc_queue_count(&entity->job_queue); __entry->hw_job_count = atomic_read( &sched_job->sched->credit_count); + __assign_str(dev); + __entry->fence_context = sched_job->s_fence->finished.context; + __entry->fence_seqno = sched_job->s_fence->finished.seqno; + __entry->client_id = sched_job->s_fence->drm_client_id; ), - TP_printk("entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d", - __entry->entity, __entry->id, - __entry->fence, __get_str(name), - __entry->job_count, __entry->hw_job_count) + TP_printk("dev=%s, fence=%llu:%llu, ring=%s, job count:%u, hw job count:%d, client_id:%llu", + __get_str(dev), + __entry->fence_context, __entry->fence_seqno, __get_str(name), + __entry->job_count, __entry->hw_job_count, __entry->client_id) ); -DEFINE_EVENT(drm_sched_job, drm_sched_job, +DEFINE_EVENT(drm_sched_job, drm_sched_job_queue, TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity *entity), TP_ARGS(sched_job, entity) ); -DEFINE_EVENT(drm_sched_job, drm_run_job, +DEFINE_EVENT(drm_sched_job, drm_sched_job_run, TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity *entity), TP_ARGS(sched_job, entity) ); -TRACE_EVENT(drm_sched_process_job, +TRACE_EVENT(drm_sched_job_done, TP_PROTO(struct drm_sched_fence *fence), TP_ARGS(fence), TP_STRUCT__entry( - __field(struct dma_fence *, fence) + __field(u64, fence_context) + __field(u64, fence_seqno) ), TP_fast_assign( - __entry->fence = &fence->finished; + __entry->fence_context = fence->finished.context; + __entry->fence_seqno = fence->finished.seqno; ), - TP_printk("fence=%p signaled", __entry->fence) + TP_printk("fence=%llu:%llu signaled", + __entry->fence_context, __entry->fence_seqno) ); -TRACE_EVENT(drm_sched_job_wait_dep, +TRACE_EVENT(drm_sched_job_add_dep, + TP_PROTO(struct drm_sched_job *sched_job, struct dma_fence *fence), + TP_ARGS(sched_job, fence), + TP_STRUCT__entry( + __field(u64, fence_context) + __field(u64, fence_seqno) + __field(u64, ctx) + __field(u64, seqno) + ), + + TP_fast_assign( + __entry->fence_context = sched_job->s_fence->finished.context; + __entry->fence_seqno = sched_job->s_fence->finished.seqno; + __entry->ctx = fence->context; + __entry->seqno = fence->seqno; + ), + TP_printk("fence=%llu:%llu depends on fence=%llu:%llu", + __entry->fence_context, __entry->fence_seqno, + __entry->ctx, __entry->seqno) +); + +TRACE_EVENT(drm_sched_job_unschedulable, TP_PROTO(struct drm_sched_job *sched_job, struct dma_fence *fence), TP_ARGS(sched_job, fence), TP_STRUCT__entry( - __string(name, sched_job->sched->name) - __field(uint64_t, id) - __field(struct dma_fence *, fence) - __field(uint64_t, ctx) - __field(unsigned, seqno) + __field(u64, fence_context) + __field(u64, fence_seqno) + __field(u64, ctx) + __field(u64, seqno) ), TP_fast_assign( - __assign_str(name); - __entry->id = sched_job->id; - __entry->fence = fence; + __entry->fence_context = sched_job->s_fence->finished.context; + __entry->fence_seqno = sched_job->s_fence->finished.seqno; __entry->ctx = fence->context; __entry->seqno = fence->seqno; ), - TP_printk("job ring=%s, id=%llu, depends fence=%p, context=%llu, seq=%u", - __get_str(name), __entry->id, - __entry->fence, __entry->ctx, - __entry->seqno) + TP_printk("fence=%llu:%llu depends on unsignalled fence=%llu:%llu", + __entry->fence_context, __entry->fence_seqno, + __entry->ctx, __entry->seqno) ); #endif /* _GPU_SCHED_TRACE_H_ */ diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index e671aa241720..5635b3a826d8 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -21,7 +21,7 @@ * */ -#include <linux/kthread.h> +#include <linux/export.h> #include <linux/slab.h> #include <linux/completion.h> @@ -478,10 +478,10 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity) while ((entity->dependency = drm_sched_job_dependency(sched_job, entity))) { - trace_drm_sched_job_wait_dep(sched_job, entity->dependency); - - if (drm_sched_entity_add_dependency_cb(entity)) + if (drm_sched_entity_add_dependency_cb(entity)) { + trace_drm_sched_job_unschedulable(sched_job, entity->dependency); return NULL; + } } /* skip jobs from entity that marked guilty */ @@ -546,10 +546,10 @@ void drm_sched_entity_select_rq(struct drm_sched_entity *entity) return; /* - * Only when the queue is empty are we guaranteed that the scheduler - * thread cannot change ->last_scheduled. To enforce ordering we need - * a read barrier here. See drm_sched_entity_pop_job() for the other - * side. + * Only when the queue is empty are we guaranteed that + * drm_sched_run_job_work() cannot change entity->last_scheduled. To + * enforce ordering we need a read barrier here. See + * drm_sched_entity_pop_job() for the other side. */ smp_rmb(); @@ -587,7 +587,15 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job) bool first; ktime_t submit_ts; - trace_drm_sched_job(sched_job, entity); + trace_drm_sched_job_queue(sched_job, entity); + + if (trace_drm_sched_job_add_dep_enabled()) { + struct dma_fence *entry; + unsigned long index; + + xa_for_each(&sched_job->dependencies, index, entry) + trace_drm_sched_job_add_dep(sched_job, entry); + } atomic_inc(entity->rq->sched->score); WRITE_ONCE(entity->last_user, current->group_leader); diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c index e971528504a5..9391d6f0dc01 100644 --- a/drivers/gpu/drm/scheduler/sched_fence.c +++ b/drivers/gpu/drm/scheduler/sched_fence.c @@ -21,7 +21,7 @@ * */ -#include <linux/kthread.h> +#include <linux/export.h> #include <linux/module.h> #include <linux/sched.h> #include <linux/slab.h> @@ -206,7 +206,8 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f) EXPORT_SYMBOL(to_drm_sched_fence); struct drm_sched_fence *drm_sched_fence_alloc(struct drm_sched_entity *entity, - void *owner) + void *owner, + u64 drm_client_id) { struct drm_sched_fence *fence = NULL; @@ -215,6 +216,7 @@ struct drm_sched_fence *drm_sched_fence_alloc(struct drm_sched_entity *entity, return NULL; fence->owner = owner; + fence->drm_client_id = drm_client_id; spin_lock_init(&fence->lock); return fence; diff --git a/drivers/gpu/drm/scheduler/sched_internal.h b/drivers/gpu/drm/scheduler/sched_internal.h index 599cf6e1bb74..7ea5a6736f98 100644 --- a/drivers/gpu/drm/scheduler/sched_internal.h +++ b/drivers/gpu/drm/scheduler/sched_internal.h @@ -24,7 +24,7 @@ void drm_sched_entity_select_rq(struct drm_sched_entity *entity); struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity); struct drm_sched_fence *drm_sched_fence_alloc(struct drm_sched_entity *s_entity, - void *owner); + void *owner, u64 drm_client_id); void drm_sched_fence_init(struct drm_sched_fence *fence, struct drm_sched_entity *entity); void drm_sched_fence_free(struct drm_sched_fence *fence); diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 829579c41c6b..c63543132f9d 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -66,6 +66,7 @@ * This implies waiting for previously executed jobs. */ +#include <linux/export.h> #include <linux/wait.h> #include <linux/sched.h> #include <linux/completion.h> @@ -391,7 +392,7 @@ static void drm_sched_run_free_queue(struct drm_gpu_scheduler *sched) * drm_sched_job_done - complete a job * @s_job: pointer to the job which is done * - * Finish the job's fence and wake up the worker thread. + * Finish the job's fence and resubmit the work items. */ static void drm_sched_job_done(struct drm_sched_job *s_job, int result) { @@ -401,7 +402,7 @@ static void drm_sched_job_done(struct drm_sched_job *s_job, int result) atomic_sub(s_job->credits, &sched->credit_count); atomic_dec(sched->score); - trace_drm_sched_process_job(s_fence); + trace_drm_sched_job_done(s_fence); dma_fence_get(&s_fence->finished); drm_sched_fence_finished(s_fence, result); @@ -551,9 +552,10 @@ static void drm_sched_job_timedout(struct work_struct *work) if (job) { /* - * Remove the bad job so it cannot be freed by concurrent - * drm_sched_cleanup_jobs. It will be reinserted back after sched->thread - * is parked at which point it's safe. + * Remove the bad job so it cannot be freed by a concurrent + * &struct drm_sched_backend_ops.free_job. It will be + * reinserted after the scheduler's work items have been + * cancelled, at which point it's safe. */ list_del_init(&job->list); spin_unlock(&sched->job_list_lock); @@ -599,10 +601,10 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad) /* * Reinsert back the bad job here - now it's safe as - * drm_sched_get_finished_job cannot race against us and release the + * drm_sched_get_finished_job() cannot race against us and release the * bad job at this point - we parked (waited for) any in progress - * (earlier) cleanups and drm_sched_get_finished_job will not be called - * now until the scheduler thread is unparked. + * (earlier) cleanups and drm_sched_get_finished_job() will not be + * called now until the scheduler's work items are submitted again. */ if (bad && bad->sched == sched) /* @@ -615,7 +617,8 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad) * Iterate the job list from later to earlier one and either deactive * their HW callbacks or remove them from pending list if they already * signaled. - * This iteration is thread safe as sched thread is stopped. + * This iteration is thread safe as the scheduler's work items have been + * cancelled. */ list_for_each_entry_safe_reverse(s_job, tmp, &sched->pending_list, list) { @@ -680,9 +683,9 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, int errno) struct drm_sched_job *s_job, *tmp; /* - * Locking the list is not required here as the sched thread is parked - * so no new jobs are being inserted or removed. Also concurrent - * GPU recovers can't run in parallel. + * Locking the list is not required here as the scheduler's work items + * are currently not running, so no new jobs are being inserted or + * removed. Also concurrent GPU recovers can't run in parallel. */ list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) { struct dma_fence *fence = s_job->s_fence->parent; @@ -764,6 +767,8 @@ EXPORT_SYMBOL(drm_sched_resubmit_jobs); * @credits: the number of credits this job contributes to the schedulers * credit limit * @owner: job owner for debugging + * @drm_client_id: &struct drm_file.client_id of the owner (used by trace + * events) * * Refer to drm_sched_entity_push_job() documentation * for locking considerations. @@ -784,7 +789,8 @@ EXPORT_SYMBOL(drm_sched_resubmit_jobs); */ int drm_sched_job_init(struct drm_sched_job *job, struct drm_sched_entity *entity, - u32 credits, void *owner) + u32 credits, void *owner, + uint64_t drm_client_id) { if (!entity->rq) { /* This will most likely be followed by missing frames @@ -810,7 +816,7 @@ int drm_sched_job_init(struct drm_sched_job *job, job->entity = entity; job->credits = credits; - job->s_fence = drm_sched_fence_alloc(entity, owner); + job->s_fence = drm_sched_fence_alloc(entity, owner, drm_client_id); if (!job->s_fence) return -ENOMEM; @@ -850,7 +856,6 @@ void drm_sched_job_arm(struct drm_sched_job *job) job->sched = sched; job->s_priority = entity->priority; - job->id = atomic64_inc_return(&sched->job_id_count); drm_sched_fence_init(job->s_fence, job->entity); } @@ -1229,7 +1234,7 @@ static void drm_sched_run_job_work(struct work_struct *w) atomic_add(sched_job->credits, &sched->credit_count); drm_sched_job_begin(sched_job); - trace_drm_run_job(sched_job, entity); + trace_drm_sched_job_run(sched_job, entity); /* * The run_job() callback must by definition return a fence whose * refcount has been incremented for the scheduler already. diff --git a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c index f999c8859cf7..7f947ab9d322 100644 --- a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c +++ b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c @@ -64,7 +64,7 @@ static void drm_mock_sched_job_complete(struct drm_mock_sched_job *job) job->flags |= DRM_MOCK_SCHED_JOB_DONE; list_move_tail(&job->link, &sched->done_list); - dma_fence_signal(&job->hw_fence); + dma_fence_signal_locked(&job->hw_fence); complete(&job->done); } @@ -117,13 +117,13 @@ drm_mock_sched_job_new(struct kunit *test, ret = drm_sched_job_init(&job->base, &entity->base, 1, - NULL); + NULL, + 1); KUNIT_ASSERT_EQ(test, ret, 0); job->test = test; init_completion(&job->done); - spin_lock_init(&job->lock); INIT_LIST_HEAD(&job->link); hrtimer_setup(&job->timer, drm_mock_sched_job_signal_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); @@ -169,7 +169,7 @@ static struct dma_fence *mock_sched_run_job(struct drm_sched_job *sched_job) dma_fence_init(&job->hw_fence, &drm_mock_sched_hw_fence_ops, - &job->lock, + &sched->lock, sched->hw_timeline.context, atomic_inc_return(&sched->hw_timeline.next_seqno)); diff --git a/drivers/gpu/drm/scheduler/tests/sched_tests.h b/drivers/gpu/drm/scheduler/tests/sched_tests.h index 27caf8285fb7..fbba38137f0c 100644 --- a/drivers/gpu/drm/scheduler/tests/sched_tests.h +++ b/drivers/gpu/drm/scheduler/tests/sched_tests.h @@ -106,7 +106,6 @@ struct drm_mock_sched_job { unsigned int duration_us; ktime_t finish_at; - spinlock_t lock; struct dma_fence hw_fence; struct kunit *test; |
