From 1584f16ca96ef124aad79efa3303cff5f3530e2c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 28 Nov 2018 15:09:25 -0800 Subject: drm/v3d: Add support for submitting jobs to the TFU. The TFU can copy from raster, UIF, and SAND input images to UIF output images, with optional mipmap generation. This will certainly be useful for media EGL image input, but is also useful immediately for mipmap generation without bogging the V3D core down. For now we only run the queue 1 job deep, and don't have any hang recovery (though I don't think we should need it, with TFU). Queuing multiple jobs in the HW will require synchronizing the YUV coefficient regs updates since they don't get FIFOed with the job. v2: Change the ioctl to IOW instead of IOWR, always set COEF0, explain why TFU is AUTH, clarify the syncing docs, drop the unused TFU interrupt regs (you're expected to use the hub's), don't take &bo->base for NULL bos. v3: Fix a little whitespace alignment (noticed by checkpatch), rebase on drm_sched_job_cleanup() changes. Signed-off-by: Eric Anholt Reviewed-by: Dave Emett (v2) Link: https://patchwork.freedesktop.org/patch/264607/ --- drivers/gpu/drm/v3d/v3d_gem.c | 178 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 158 insertions(+), 20 deletions(-) (limited to 'drivers/gpu/drm/v3d/v3d_gem.c') diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index 1e8947c7d954..6abe2fa43306 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -207,26 +207,27 @@ v3d_flush_caches(struct v3d_dev *v3d) } static void -v3d_attach_object_fences(struct v3d_exec_info *exec) +v3d_attach_object_fences(struct v3d_bo **bos, int bo_count, + struct dma_fence *fence) { - struct dma_fence *out_fence = exec->render_done_fence; int i; - for (i = 0; i < exec->bo_count; i++) { + for (i = 0; i < bo_count; i++) { /* XXX: Use shared fences for read-only objects. */ - reservation_object_add_excl_fence(exec->bo[i]->resv, out_fence); + reservation_object_add_excl_fence(bos[i]->resv, fence); } } static void v3d_unlock_bo_reservations(struct drm_device *dev, - struct v3d_exec_info *exec, + struct v3d_bo **bos, + int bo_count, struct ww_acquire_ctx *acquire_ctx) { int i; - for (i = 0; i < exec->bo_count; i++) - ww_mutex_unlock(&exec->bo[i]->resv->lock); + for (i = 0; i < bo_count; i++) + ww_mutex_unlock(&bos[i]->resv->lock); ww_acquire_fini(acquire_ctx); } @@ -240,7 +241,8 @@ v3d_unlock_bo_reservations(struct drm_device *dev, */ static int v3d_lock_bo_reservations(struct drm_device *dev, - struct v3d_exec_info *exec, + struct v3d_bo **bos, + int bo_count, struct ww_acquire_ctx *acquire_ctx) { int contended_lock = -1; @@ -250,7 +252,7 @@ v3d_lock_bo_reservations(struct drm_device *dev, retry: if (contended_lock != -1) { - struct v3d_bo *bo = exec->bo[contended_lock]; + struct v3d_bo *bo = bos[contended_lock]; ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock, acquire_ctx); @@ -260,20 +262,20 @@ retry: } } - for (i = 0; i < exec->bo_count; i++) { + for (i = 0; i < bo_count; i++) { if (i == contended_lock) continue; - ret = ww_mutex_lock_interruptible(&exec->bo[i]->resv->lock, + ret = ww_mutex_lock_interruptible(&bos[i]->resv->lock, acquire_ctx); if (ret) { int j; for (j = 0; j < i; j++) - ww_mutex_unlock(&exec->bo[j]->resv->lock); + ww_mutex_unlock(&bos[j]->resv->lock); if (contended_lock != -1 && contended_lock >= i) { - struct v3d_bo *bo = exec->bo[contended_lock]; + struct v3d_bo *bo = bos[contended_lock]; ww_mutex_unlock(&bo->resv->lock); } @@ -293,10 +295,11 @@ retry: /* Reserve space for our shared (read-only) fence references, * before we commit the CL to the hardware. */ - for (i = 0; i < exec->bo_count; i++) { - ret = reservation_object_reserve_shared(exec->bo[i]->resv, 1); + for (i = 0; i < bo_count; i++) { + ret = reservation_object_reserve_shared(bos[i]->resv, 1); if (ret) { - v3d_unlock_bo_reservations(dev, exec, acquire_ctx); + v3d_unlock_bo_reservations(dev, bos, bo_count, + acquire_ctx); return ret; } } @@ -419,6 +422,33 @@ void v3d_exec_put(struct v3d_exec_info *exec) kref_put(&exec->refcount, v3d_exec_cleanup); } +static void +v3d_tfu_job_cleanup(struct kref *ref) +{ + struct v3d_tfu_job *job = container_of(ref, struct v3d_tfu_job, + refcount); + struct v3d_dev *v3d = job->v3d; + unsigned int i; + + dma_fence_put(job->in_fence); + dma_fence_put(job->done_fence); + + for (i = 0; i < ARRAY_SIZE(job->bo); i++) { + if (job->bo[i]) + drm_gem_object_put_unlocked(&job->bo[i]->base); + } + + pm_runtime_mark_last_busy(v3d->dev); + pm_runtime_put_autosuspend(v3d->dev); + + kfree(job); +} + +void v3d_tfu_job_put(struct v3d_tfu_job *job) +{ + kref_put(&job->refcount, v3d_tfu_job_cleanup); +} + int v3d_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) @@ -536,7 +566,8 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, if (ret) goto fail; - ret = v3d_lock_bo_reservations(dev, exec, &acquire_ctx); + ret = v3d_lock_bo_reservations(dev, exec->bo, exec->bo_count, + &acquire_ctx); if (ret) goto fail; @@ -570,9 +601,10 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, &v3d_priv->sched_entity[V3D_RENDER]); mutex_unlock(&v3d->sched_lock); - v3d_attach_object_fences(exec); + v3d_attach_object_fences(exec->bo, exec->bo_count, + exec->render_done_fence); - v3d_unlock_bo_reservations(dev, exec, &acquire_ctx); + v3d_unlock_bo_reservations(dev, exec->bo, exec->bo_count, &acquire_ctx); /* Update the return sync object for the */ sync_out = drm_syncobj_find(file_priv, args->out_sync); @@ -588,13 +620,119 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, fail_unreserve: mutex_unlock(&v3d->sched_lock); - v3d_unlock_bo_reservations(dev, exec, &acquire_ctx); + v3d_unlock_bo_reservations(dev, exec->bo, exec->bo_count, &acquire_ctx); fail: v3d_exec_put(exec); return ret; } +/** + * v3d_submit_tfu_ioctl() - Submits a TFU (texture formatting) job to the V3D. + * @dev: DRM device + * @data: ioctl argument + * @file_priv: DRM file for this fd + * + * Userspace provides the register setup for the TFU, which we don't + * need to validate since the TFU is behind the MMU. + */ +int +v3d_submit_tfu_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct v3d_dev *v3d = to_v3d_dev(dev); + struct v3d_file_priv *v3d_priv = file_priv->driver_priv; + struct drm_v3d_submit_tfu *args = data; + struct v3d_tfu_job *job; + struct ww_acquire_ctx acquire_ctx; + struct drm_syncobj *sync_out; + struct dma_fence *sched_done_fence; + int ret = 0; + int bo_count; + + job = kcalloc(1, sizeof(*job), GFP_KERNEL); + if (!job) + return -ENOMEM; + + ret = pm_runtime_get_sync(v3d->dev); + if (ret < 0) { + kfree(job); + return ret; + } + + kref_init(&job->refcount); + + ret = drm_syncobj_find_fence(file_priv, args->in_sync, + 0, 0, &job->in_fence); + if (ret == -EINVAL) + goto fail; + + job->args = *args; + job->v3d = v3d; + + spin_lock(&file_priv->table_lock); + for (bo_count = 0; bo_count < ARRAY_SIZE(job->bo); bo_count++) { + struct drm_gem_object *bo; + + if (!args->bo_handles[bo_count]) + break; + + bo = idr_find(&file_priv->object_idr, + args->bo_handles[bo_count]); + if (!bo) { + DRM_DEBUG("Failed to look up GEM BO %d: %d\n", + bo_count, args->bo_handles[bo_count]); + ret = -ENOENT; + spin_unlock(&file_priv->table_lock); + goto fail; + } + drm_gem_object_get(bo); + job->bo[bo_count] = to_v3d_bo(bo); + } + spin_unlock(&file_priv->table_lock); + + ret = v3d_lock_bo_reservations(dev, job->bo, bo_count, &acquire_ctx); + if (ret) + goto fail; + + mutex_lock(&v3d->sched_lock); + ret = drm_sched_job_init(&job->base, + &v3d_priv->sched_entity[V3D_TFU], + v3d_priv); + if (ret) + goto fail_unreserve; + + sched_done_fence = dma_fence_get(&job->base.s_fence->finished); + + kref_get(&job->refcount); /* put by scheduler job completion */ + drm_sched_entity_push_job(&job->base, &v3d_priv->sched_entity[V3D_TFU]); + mutex_unlock(&v3d->sched_lock); + + v3d_attach_object_fences(job->bo, bo_count, sched_done_fence); + + v3d_unlock_bo_reservations(dev, job->bo, bo_count, &acquire_ctx); + + /* Update the return sync object */ + sync_out = drm_syncobj_find(file_priv, args->out_sync); + if (sync_out) { + drm_syncobj_replace_fence(sync_out, 0, sched_done_fence); + drm_syncobj_put(sync_out); + } + dma_fence_put(sched_done_fence); + + v3d_tfu_job_put(job); + + return 0; + +fail_unreserve: + mutex_unlock(&v3d->sched_lock); + v3d_unlock_bo_reservations(dev, job->bo, bo_count, &acquire_ctx); +fail: + v3d_tfu_job_put(job); + + return ret; +} + int v3d_gem_init(struct drm_device *dev) { -- cgit v1.2.3 From e14a07fc4b961a75f6c275d6bd670ba54fbdae14 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 28 Nov 2018 15:09:26 -0800 Subject: drm/v3d: Drop the "dev" argument to lock/unlock of BO reservations. They were unused, as Dave Emett noticed in TFU review. Signed-off-by: Eric Anholt Cc: Dave Emett Link: https://patchwork.freedesktop.org/patch/msgid/20181128230927.10951-2-eric@anholt.net Reviewed-by: Daniel Vetter --- drivers/gpu/drm/v3d/v3d_gem.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/v3d/v3d_gem.c') diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index 6abe2fa43306..8b4af512450f 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -219,8 +219,7 @@ v3d_attach_object_fences(struct v3d_bo **bos, int bo_count, } static void -v3d_unlock_bo_reservations(struct drm_device *dev, - struct v3d_bo **bos, +v3d_unlock_bo_reservations(struct v3d_bo **bos, int bo_count, struct ww_acquire_ctx *acquire_ctx) { @@ -240,8 +239,7 @@ v3d_unlock_bo_reservations(struct drm_device *dev, * to v3d, so we don't attach dma-buf fences to them. */ static int -v3d_lock_bo_reservations(struct drm_device *dev, - struct v3d_bo **bos, +v3d_lock_bo_reservations(struct v3d_bo **bos, int bo_count, struct ww_acquire_ctx *acquire_ctx) { @@ -298,7 +296,7 @@ retry: for (i = 0; i < bo_count; i++) { ret = reservation_object_reserve_shared(bos[i]->resv, 1); if (ret) { - v3d_unlock_bo_reservations(dev, bos, bo_count, + v3d_unlock_bo_reservations(bos, bo_count, acquire_ctx); return ret; } @@ -566,7 +564,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, if (ret) goto fail; - ret = v3d_lock_bo_reservations(dev, exec->bo, exec->bo_count, + ret = v3d_lock_bo_reservations(exec->bo, exec->bo_count, &acquire_ctx); if (ret) goto fail; @@ -604,7 +602,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, v3d_attach_object_fences(exec->bo, exec->bo_count, exec->render_done_fence); - v3d_unlock_bo_reservations(dev, exec->bo, exec->bo_count, &acquire_ctx); + v3d_unlock_bo_reservations(exec->bo, exec->bo_count, &acquire_ctx); /* Update the return sync object for the */ sync_out = drm_syncobj_find(file_priv, args->out_sync); @@ -620,7 +618,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, fail_unreserve: mutex_unlock(&v3d->sched_lock); - v3d_unlock_bo_reservations(dev, exec->bo, exec->bo_count, &acquire_ctx); + v3d_unlock_bo_reservations(exec->bo, exec->bo_count, &acquire_ctx); fail: v3d_exec_put(exec); @@ -691,7 +689,7 @@ v3d_submit_tfu_ioctl(struct drm_device *dev, void *data, } spin_unlock(&file_priv->table_lock); - ret = v3d_lock_bo_reservations(dev, job->bo, bo_count, &acquire_ctx); + ret = v3d_lock_bo_reservations(job->bo, bo_count, &acquire_ctx); if (ret) goto fail; @@ -710,7 +708,7 @@ v3d_submit_tfu_ioctl(struct drm_device *dev, void *data, v3d_attach_object_fences(job->bo, bo_count, sched_done_fence); - v3d_unlock_bo_reservations(dev, job->bo, bo_count, &acquire_ctx); + v3d_unlock_bo_reservations(job->bo, bo_count, &acquire_ctx); /* Update the return sync object */ sync_out = drm_syncobj_find(file_priv, args->out_sync); @@ -726,7 +724,7 @@ v3d_submit_tfu_ioctl(struct drm_device *dev, void *data, fail_unreserve: mutex_unlock(&v3d->sched_lock); - v3d_unlock_bo_reservations(dev, job->bo, bo_count, &acquire_ctx); + v3d_unlock_bo_reservations(job->bo, bo_count, &acquire_ctx); fail: v3d_tfu_job_put(job); -- cgit v1.2.3 From 55a9b74846ed5e6219c7d81a8e1bf96f25d8ad5e Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 30 Nov 2018 16:57:58 -0800 Subject: drm/v3d: Add more tracepoints for V3D GPU rendering. The core scheduler tells us when the job is pushed to the scheduler's queue, and I had the job_run functions saying when they actually queue the job to the hardware. By adding tracepoints for the very top of the ioctls and the IRQs signaling job completion, "perf record -a -e v3d:.\* -e gpu_scheduler:.\* ; perf script" gets you a pretty decent timeline. Signed-off-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/20181201005759.28093-5-eric@anholt.net Reviewed-by: Dave Emett --- drivers/gpu/drm/v3d/v3d_gem.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu/drm/v3d/v3d_gem.c') diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index 8b4af512450f..f565b197cba9 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -521,6 +521,8 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, struct drm_syncobj *sync_out; int ret = 0; + trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end); + if (args->pad != 0) { DRM_INFO("pad must be zero: %d\n", args->pad); return -EINVAL; @@ -648,6 +650,8 @@ v3d_submit_tfu_ioctl(struct drm_device *dev, void *data, int ret = 0; int bo_count; + trace_v3d_submit_tfu_ioctl(&v3d->drm, args->iia); + job = kcalloc(1, sizeof(*job), GFP_KERNEL); if (!job) return -ENOMEM; -- cgit v1.2.3 From 0b258ed1a219a9776e8f6967eb34837ae0332e64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 14 Nov 2018 14:24:27 +0100 Subject: drm: revert "expand replace_fence to support timeline point v2" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 9a09a42369a4a37a959c051d8e1a1f948c1529a4. The whole interface isn't thought through. Since this function can't fail we actually can't allocate an object to store the sync point. Sorry, I should have taken the lead on this from the very beginning and reviewed it more thoughtfully. Going to propose a new interface as a follow up change. Signed-off-by: Christian König Reviewed-by: Chunming Zhou Link: https://patchwork.freedesktop.org/patch/265580/ --- drivers/gpu/drm/v3d/v3d_gem.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/gpu/drm/v3d/v3d_gem.c') diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index f565b197cba9..cb99e53f7607 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -609,8 +609,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, /* Update the return sync object for the */ sync_out = drm_syncobj_find(file_priv, args->out_sync); if (sync_out) { - drm_syncobj_replace_fence(sync_out, 0, - exec->render_done_fence); + drm_syncobj_replace_fence(sync_out, exec->render_done_fence); drm_syncobj_put(sync_out); } -- cgit v1.2.3