From f4e9af4f5af5dab9aee632f3aff0bd8040f1b2c5 Mon Sep 17 00:00:00 2001 From: Akash Goel Date: Wed, 12 Oct 2016 21:54:30 +0530 Subject: drm/i915: Add low level set of routines for programming PM IER/IIR/IMR register set So far PM IER/IIR/IMR registers were being used only for Turbo related interrupts. But interrupts coming from GuC also use the same set. As a precursor to supporting GuC interrupts, added new low level routines so as to allow sharing the programming of PM IER/IIR/IMR registers between Turbo & GuC. Also similar to PM IMR, maintaining a bitmask for PM IER register, to allow easy sharing of it between Turbo & GuC without involving a rmw operation. v2: - For appropriateness & avoid any ambiguity, rename old functions enable/disable pm_irq to mask/unmask pm_irq and rename new functions enable/disable pm_interrupts to enable/disable pm_irq. (Tvrtko) - Use u32 in place of uint32_t. (Tvrtko) v3: - Rename the fields pm_irq_mask & pm_ier_mask and do some cleanup. (Chris) - Rebase. v4: Fix the inadvertent disabling of User interrupt for VECS ring causing failure for certain IGTs. v5: Use dev_priv with HAS_VEBOX macro. (Tvrtko) Suggested-by: Chris Wilson Signed-off-by: Akash Goel Reviewed-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_ringbuffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 32786ba199b9..67a70c5e6453 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1608,7 +1608,7 @@ hsw_vebox_irq_enable(struct intel_engine_cs *engine) struct drm_i915_private *dev_priv = engine->i915; I915_WRITE_IMR(engine, ~engine->irq_enable_mask); - gen6_enable_pm_irq(dev_priv, engine->irq_enable_mask); + gen6_unmask_pm_irq(dev_priv, engine->irq_enable_mask); } static void @@ -1617,7 +1617,7 @@ hsw_vebox_irq_disable(struct intel_engine_cs *engine) struct drm_i915_private *dev_priv = engine->i915; I915_WRITE_IMR(engine, ~0); - gen6_disable_pm_irq(dev_priv, engine->irq_enable_mask); + gen6_mask_pm_irq(dev_priv, engine->irq_enable_mask); } static void -- cgit v1.2.3 From e95433c73a11759203af1cae5958f998c9673370 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:27 +0100 Subject: drm/i915: Rearrange i915_wait_request() accounting with callers Our low-level wait routine has evolved from our generic wait interface that handled unlocked, RPS boosting, waits with time tracking. If we push our GEM fence tracking to use reservation_objects (required for handling multiple timelines), we lose the ability to pass the required information down to i915_wait_request(). However, if we push the extra functionality from i915_wait_request() to the individual callsites (i915_gem_object_wait_rendering and i915_gem_wait_ioctl) that make use of those extras, we can both simplify our low level wait and prepare for extending the GEM interface for use of reservation_objects. v2: Rewrite i915_wait_request() kerneldocs Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 67a70c5e6453..8ef735faa603 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2155,7 +2155,9 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) { struct intel_ring *ring = req->ring; struct drm_i915_gem_request *target; - int ret; + long timeout; + + lockdep_assert_held(&req->i915->drm.struct_mutex); intel_ring_update_space(ring); if (ring->space >= bytes) @@ -2185,11 +2187,11 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) if (WARN_ON(&target->ring_link == &ring->request_list)) return -ENOSPC; - ret = i915_wait_request(target, - I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, - NULL, NO_WAITBOOST); - if (ret) - return ret; + timeout = i915_wait_request(target, + I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + if (timeout < 0) + return timeout; i915_gem_request_retire_upto(target); -- cgit v1.2.3 From f8a7fde4561067a8ebc956b27afeb530ac97cb9d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:29 +0100 Subject: drm/i915: Defer active reference until required We only need the active reference to keep the object alive after the handle has been deleted (so as to prevent a synchronous gem_close). Why then pay the price of a kref on every execbuf when we can insert that final active ref just in time for the handle deletion? Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-6-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 8ef735faa603..8eee9675d3bf 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1762,14 +1762,19 @@ static void cleanup_phys_status_page(struct intel_engine_cs *engine) static void cleanup_status_page(struct intel_engine_cs *engine) { struct i915_vma *vma; + struct drm_i915_gem_object *obj; vma = fetch_and_zero(&engine->status_page.vma); if (!vma) return; + obj = vma->obj; + i915_vma_unpin(vma); - i915_gem_object_unpin_map(vma->obj); - i915_vma_put(vma); + i915_vma_close(vma); + + i915_gem_object_unpin_map(obj); + __i915_gem_object_release_unless_active(obj); } static int init_status_page(struct intel_engine_cs *engine) @@ -1967,7 +1972,11 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size) void intel_ring_free(struct intel_ring *ring) { - i915_vma_put(ring->vma); + struct drm_i915_gem_object *obj = ring->vma->obj; + + i915_vma_close(ring->vma); + __i915_gem_object_release_unless_active(obj); + kfree(ring); } -- cgit v1.2.3 From 920cf4194954ec6f971506013c7fe3b7def178b6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:30 +0100 Subject: drm/i915: Introduce an internal allocator for disposable private objects Quite a few of our objects used for internal hardware programming do not benefit from being swappable or from being zero initialised. As such they do not benefit from using a shmemfs backing storage and since they are internal and never directly exposed to the user, we do not need to worry about providing a filp. For these we can use an drm_i915_gem_object wrapper around a sg_table of plain struct page. They are not swap backed and not automatically pinned. If they are reaped by the shrinker, the pages are released and the contents discarded. For the internal use case, this is fine as for example, ringbuffers are pinned from being written by a request to be read by the hardware. Once they are idle, they can be discarded entirely. As such they are a good match for execlist ringbuffers and a small variety of other internal objects. In the first iteration, this is limited to the scratch batch buffers we use (for command parsing and state initialisation). v2: Allocate physically contiguous pages, where possible. v3: Reduce maximum order on subsequent requests following an allocation failure. v4: Fix up mismatch between swiotlb segment size and page count (it counts in 2k units, not 4k pages) Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-7-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 8eee9675d3bf..a15b9b5f2924 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1782,9 +1782,10 @@ static int init_status_page(struct intel_engine_cs *engine) struct drm_i915_gem_object *obj; struct i915_vma *vma; unsigned int flags; + void *vaddr; int ret; - obj = i915_gem_object_create(&engine->i915->drm, 4096); + obj = i915_gem_object_create_internal(engine->i915, 4096); if (IS_ERR(obj)) { DRM_ERROR("Failed to allocate status page\n"); return PTR_ERR(obj); @@ -1817,15 +1818,22 @@ static int init_status_page(struct intel_engine_cs *engine) if (ret) goto err; + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + ret = PTR_ERR(vaddr); + goto err_unpin; + } + engine->status_page.vma = vma; engine->status_page.ggtt_offset = i915_ggtt_offset(vma); - engine->status_page.page_addr = - i915_gem_object_pin_map(obj, I915_MAP_WB); + engine->status_page.page_addr = memset(vaddr, 0, 4096); DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n", engine->name, i915_ggtt_offset(vma)); return 0; +err_unpin: + i915_vma_unpin(vma); err: i915_gem_object_put(obj); return ret; -- cgit v1.2.3 From 4e50f082ac51c95046a8315612ce1d9acb2b3d63 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:31 +0100 Subject: drm/i915: Reuse the active golden render state batch The golden render state is constant, but we recreate the batch setting it up for every new context. If we keep that batch in a volatile cache we can safely reuse it whenever we need to initialise a new context. We mark the pages as purgeable and use the shrinker to recover pages from the batch whenever we face memory pressues, recreating that batch afresh on the next new context. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-8-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index a15b9b5f2924..aaa46d9ffbc1 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -648,7 +648,7 @@ static int intel_rcs_ctx_init(struct drm_i915_gem_request *req) if (ret != 0) return ret; - ret = i915_gem_render_state_init(req); + ret = i915_gem_render_state_emit(req); if (ret) return ret; -- cgit v1.2.3 From 65e4760e3920c21073a9d737929dc36df561380f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:49 +0100 Subject: drm/i915: Introduce a global_seqno for each request Though we will have multiple timelines, we still have a single timeline of execution. This we can use to provide an execution and retirement order of requests. This keeps tracking execution of requests simple, and vital for preserving a single waiter (i.e. so that we can order the waiters so that only the earliest to wakeup need be woken). To accomplish this we distinguish the seqno used to order requests per-context (external) and that used internally for execution. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-26-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index aaa46d9ffbc1..76c6b70303fb 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1238,7 +1238,7 @@ static int gen8_rcs_signal(struct drm_i915_gem_request *req) PIPE_CONTROL_CS_STALL); intel_ring_emit(ring, lower_32_bits(gtt_offset)); intel_ring_emit(ring, upper_32_bits(gtt_offset)); - intel_ring_emit(ring, req->fence.seqno); + intel_ring_emit(ring, req->global_seqno); intel_ring_emit(ring, 0); intel_ring_emit(ring, MI_SEMAPHORE_SIGNAL | @@ -1274,7 +1274,7 @@ static int gen8_xcs_signal(struct drm_i915_gem_request *req) lower_32_bits(gtt_offset) | MI_FLUSH_DW_USE_GTT); intel_ring_emit(ring, upper_32_bits(gtt_offset)); - intel_ring_emit(ring, req->fence.seqno); + intel_ring_emit(ring, req->global_seqno); intel_ring_emit(ring, MI_SEMAPHORE_SIGNAL | MI_SEMAPHORE_TARGET(waiter->hw_id)); @@ -1308,7 +1308,7 @@ static int gen6_signal(struct drm_i915_gem_request *req) if (i915_mmio_reg_valid(mbox_reg)) { intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); intel_ring_emit_reg(ring, mbox_reg); - intel_ring_emit(ring, req->fence.seqno); + intel_ring_emit(ring, req->global_seqno); } } @@ -1339,7 +1339,7 @@ static int i9xx_emit_request(struct drm_i915_gem_request *req) intel_ring_emit(ring, MI_STORE_DWORD_INDEX); intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); - intel_ring_emit(ring, req->fence.seqno); + intel_ring_emit(ring, req->global_seqno); intel_ring_emit(ring, MI_USER_INTERRUPT); intel_ring_advance(ring); @@ -1389,7 +1389,7 @@ static int gen8_render_emit_request(struct drm_i915_gem_request *req) PIPE_CONTROL_QW_WRITE)); intel_ring_emit(ring, intel_hws_seqno_address(engine)); intel_ring_emit(ring, 0); - intel_ring_emit(ring, i915_gem_request_get_seqno(req)); + intel_ring_emit(ring, req->global_seqno); /* We're thrashing one dword of HWS. */ intel_ring_emit(ring, 0); intel_ring_emit(ring, MI_USER_INTERRUPT); @@ -1427,7 +1427,7 @@ gen8_ring_sync_to(struct drm_i915_gem_request *req, MI_SEMAPHORE_WAIT | MI_SEMAPHORE_GLOBAL_GTT | MI_SEMAPHORE_SAD_GTE_SDD); - intel_ring_emit(ring, signal->fence.seqno); + intel_ring_emit(ring, signal->global_seqno); intel_ring_emit(ring, lower_32_bits(offset)); intel_ring_emit(ring, upper_32_bits(offset)); intel_ring_advance(ring); @@ -1465,7 +1465,7 @@ gen6_ring_sync_to(struct drm_i915_gem_request *req, * seqno is >= the last seqno executed. However for hardware the * comparison is strictly greater than. */ - intel_ring_emit(ring, signal->fence.seqno - 1); + intel_ring_emit(ring, signal->global_seqno - 1); intel_ring_emit(ring, 0); intel_ring_emit(ring, MI_NOOP); intel_ring_advance(ring); -- cgit v1.2.3 From 9b81d556b11fe58827dcd87bc5deaf8da2f716ae Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:50 +0100 Subject: drm/i915: Rename ->emit_request to ->emit_breadcrumb Now that the emission of the request tail and its submission to hardware are two separate steps, engine->emit_request() is confusing. engine->emit_request() is called to emit the breadcrumb commands for the request into the ring, name it such (engine->emit_breadcrumb). Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-27-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 76c6b70303fb..54c3981cf716 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1328,7 +1328,7 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request) intel_ring_offset(request->ring, request->tail)); } -static int i9xx_emit_request(struct drm_i915_gem_request *req) +static int i9xx_emit_breadcrumb(struct drm_i915_gem_request *req) { struct intel_ring *ring = req->ring; int ret; @@ -1349,14 +1349,14 @@ static int i9xx_emit_request(struct drm_i915_gem_request *req) } /** - * gen6_sema_emit_request - Update the semaphore mailbox registers + * gen6_sema_emit_breadcrumb - Update the semaphore mailbox registers * * @request - request to write to the ring * * Update the mailbox registers in the *other* rings with the current seqno. * This acts like a signal in the canonical semaphore. */ -static int gen6_sema_emit_request(struct drm_i915_gem_request *req) +static int gen6_sema_emit_breadcrumb(struct drm_i915_gem_request *req) { int ret; @@ -1364,10 +1364,10 @@ static int gen6_sema_emit_request(struct drm_i915_gem_request *req) if (ret) return ret; - return i9xx_emit_request(req); + return i9xx_emit_breadcrumb(req); } -static int gen8_render_emit_request(struct drm_i915_gem_request *req) +static int gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req) { struct intel_engine_cs *engine = req->engine; struct intel_ring *ring = req->ring; @@ -2637,9 +2637,9 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, engine->init_hw = init_ring_common; engine->reset_hw = reset_ring_common; - engine->emit_request = i9xx_emit_request; + engine->emit_breadcrumb = i9xx_emit_breadcrumb; if (i915.semaphores) - engine->emit_request = gen6_sema_emit_request; + engine->emit_breadcrumb = gen6_sema_emit_breadcrumb; engine->submit_request = i9xx_submit_request; if (INTEL_GEN(dev_priv) >= 8) @@ -2666,7 +2666,7 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine) if (INTEL_GEN(dev_priv) >= 8) { engine->init_context = intel_rcs_ctx_init; - engine->emit_request = gen8_render_emit_request; + engine->emit_breadcrumb = gen8_render_emit_breadcrumb; engine->emit_flush = gen8_render_ring_flush; if (i915.semaphores) engine->semaphore.signal = gen8_rcs_signal; -- cgit v1.2.3 From 98f29e8d908f2b9e3d966f6f7d63cd69b4aaf0a2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:51 +0100 Subject: drm/i915: Record space required for breadcrumb emission In the next patch, we will use deferred breadcrumb emission. That requires reserving sufficient space in the ringbuffer to emit the breadcrumb, which first requires us to know how large the breadcrumb is. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-28-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 54c3981cf716..ae9cf6bb4def 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1348,6 +1348,8 @@ static int i9xx_emit_breadcrumb(struct drm_i915_gem_request *req) return 0; } +static const int i9xx_emit_breadcrumb_sz = 4; + /** * gen6_sema_emit_breadcrumb - Update the semaphore mailbox registers * @@ -1401,6 +1403,8 @@ static int gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req) return 0; } +static const int gen8_render_emit_breadcrumb_sz = 8; + /** * intel_ring_sync - sync the waiter to the signaller on seqno * @@ -2638,8 +2642,21 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, engine->reset_hw = reset_ring_common; engine->emit_breadcrumb = i9xx_emit_breadcrumb; - if (i915.semaphores) + engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz; + if (i915.semaphores) { + int num_rings; + engine->emit_breadcrumb = gen6_sema_emit_breadcrumb; + + num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask) - 1; + if (INTEL_GEN(dev_priv) >= 8) { + engine->emit_breadcrumb_sz += num_rings * 6; + } else { + engine->emit_breadcrumb_sz += num_rings * 3; + if (num_rings & 1) + engine->emit_breadcrumb_sz++; + } + } engine->submit_request = i9xx_submit_request; if (INTEL_GEN(dev_priv) >= 8) @@ -2667,9 +2684,17 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine) if (INTEL_GEN(dev_priv) >= 8) { engine->init_context = intel_rcs_ctx_init; engine->emit_breadcrumb = gen8_render_emit_breadcrumb; + engine->emit_breadcrumb_sz = gen8_render_emit_breadcrumb_sz; engine->emit_flush = gen8_render_ring_flush; - if (i915.semaphores) + if (i915.semaphores) { + int num_rings; + engine->semaphore.signal = gen8_rcs_signal; + + num_rings = + hweight32(INTEL_INFO(dev_priv)->ring_mask) - 1; + engine->emit_breadcrumb_sz += num_rings * 6; + } } else if (INTEL_GEN(dev_priv) >= 6) { engine->init_context = intel_rcs_ctx_init; engine->emit_flush = gen7_render_ring_flush; -- cgit v1.2.3 From caddfe7192f5e74d65ebcfdae614f99e8fd87222 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:52 +0100 Subject: drm/i915: Defer breadcrumb emission Move the actual emission of the breadcrumb for closing the request from i915_add_request() to the submit callback. (It can be moved later when required.) This allows us to defer the allocation of the global_seqno from request construction to actual submission, allowing us to emit the requests out of order (wrt to the order of their construction, they still will only be executed one all of their dependencies are resolved including that all earlier requests on their timeline have been submitted.) We have to specialise how we then emit the request in order to write into the preallocated space, rather than at the tail of the ringbuffer (which will have been advanced by the addition of new requests). Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-29-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 169 +++++++++++--------------------- 1 file changed, 57 insertions(+), 112 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index ae9cf6bb4def..16244775b9d1 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1213,90 +1213,62 @@ static void render_ring_cleanup(struct intel_engine_cs *engine) i915_vma_unpin_and_release(&dev_priv->semaphore); } -static int gen8_rcs_signal(struct drm_i915_gem_request *req) +static u32 *gen8_rcs_signal(struct drm_i915_gem_request *req, u32 *out) { - struct intel_ring *ring = req->ring; struct drm_i915_private *dev_priv = req->i915; struct intel_engine_cs *waiter; enum intel_engine_id id; - int ret, num_rings; - - num_rings = INTEL_INFO(dev_priv)->num_rings; - ret = intel_ring_begin(req, (num_rings-1) * 8); - if (ret) - return ret; for_each_engine(waiter, dev_priv, id) { u64 gtt_offset = req->engine->semaphore.signal_ggtt[id]; if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) continue; - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(ring, - PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_QW_WRITE | - PIPE_CONTROL_CS_STALL); - intel_ring_emit(ring, lower_32_bits(gtt_offset)); - intel_ring_emit(ring, upper_32_bits(gtt_offset)); - intel_ring_emit(ring, req->global_seqno); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, - MI_SEMAPHORE_SIGNAL | - MI_SEMAPHORE_TARGET(waiter->hw_id)); - intel_ring_emit(ring, 0); + *out++ = GFX_OP_PIPE_CONTROL(6); + *out++ = (PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_QW_WRITE | + PIPE_CONTROL_CS_STALL); + *out++ = lower_32_bits(gtt_offset); + *out++ = upper_32_bits(gtt_offset); + *out++ = req->global_seqno; + *out++ = 0; + *out++ = (MI_SEMAPHORE_SIGNAL | + MI_SEMAPHORE_TARGET(waiter->hw_id)); + *out++ = 0; } - intel_ring_advance(ring); - return 0; + return out; } -static int gen8_xcs_signal(struct drm_i915_gem_request *req) +static u32 *gen8_xcs_signal(struct drm_i915_gem_request *req, u32 *out) { - struct intel_ring *ring = req->ring; struct drm_i915_private *dev_priv = req->i915; struct intel_engine_cs *waiter; enum intel_engine_id id; - int ret, num_rings; - - num_rings = INTEL_INFO(dev_priv)->num_rings; - ret = intel_ring_begin(req, (num_rings-1) * 6); - if (ret) - return ret; for_each_engine(waiter, dev_priv, id) { u64 gtt_offset = req->engine->semaphore.signal_ggtt[id]; if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) continue; - intel_ring_emit(ring, - (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW); - intel_ring_emit(ring, - lower_32_bits(gtt_offset) | - MI_FLUSH_DW_USE_GTT); - intel_ring_emit(ring, upper_32_bits(gtt_offset)); - intel_ring_emit(ring, req->global_seqno); - intel_ring_emit(ring, - MI_SEMAPHORE_SIGNAL | - MI_SEMAPHORE_TARGET(waiter->hw_id)); - intel_ring_emit(ring, 0); + *out++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; + *out++ = lower_32_bits(gtt_offset) | MI_FLUSH_DW_USE_GTT; + *out++ = upper_32_bits(gtt_offset); + *out++ = req->global_seqno; + *out++ = (MI_SEMAPHORE_SIGNAL | + MI_SEMAPHORE_TARGET(waiter->hw_id)); + *out++ = 0; } - intel_ring_advance(ring); - return 0; + return out; } -static int gen6_signal(struct drm_i915_gem_request *req) +static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *out) { - struct intel_ring *ring = req->ring; struct drm_i915_private *dev_priv = req->i915; struct intel_engine_cs *engine; enum intel_engine_id id; - int ret, num_rings; - - num_rings = INTEL_INFO(dev_priv)->num_rings; - ret = intel_ring_begin(req, round_up((num_rings-1) * 3, 2)); - if (ret) - return ret; + int num_rings = 0; for_each_engine(engine, dev_priv, id) { i915_reg_t mbox_reg; @@ -1306,46 +1278,34 @@ static int gen6_signal(struct drm_i915_gem_request *req) mbox_reg = req->engine->semaphore.mbox.signal[engine->hw_id]; if (i915_mmio_reg_valid(mbox_reg)) { - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(ring, mbox_reg); - intel_ring_emit(ring, req->global_seqno); + *out++ = MI_LOAD_REGISTER_IMM(1); + *out++ = i915_mmio_reg_offset(mbox_reg); + *out++ = req->global_seqno; + num_rings++; } } + if (num_rings & 1) + *out++ = MI_NOOP; - /* If num_dwords was rounded, make sure the tail pointer is correct */ - if (num_rings % 2 == 0) - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); - - return 0; + return out; } static void i9xx_submit_request(struct drm_i915_gem_request *request) { struct drm_i915_private *dev_priv = request->i915; - I915_WRITE_TAIL(request->engine, - intel_ring_offset(request->ring, request->tail)); + I915_WRITE_TAIL(request->engine, request->tail); } -static int i9xx_emit_breadcrumb(struct drm_i915_gem_request *req) +static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, + u32 *out) { - struct intel_ring *ring = req->ring; - int ret; - - ret = intel_ring_begin(req, 4); - if (ret) - return ret; - - intel_ring_emit(ring, MI_STORE_DWORD_INDEX); - intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); - intel_ring_emit(ring, req->global_seqno); - intel_ring_emit(ring, MI_USER_INTERRUPT); - intel_ring_advance(ring); - - req->tail = ring->tail; + *out++ = MI_STORE_DWORD_INDEX; + *out++ = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT; + *out++ = req->global_seqno; + *out++ = MI_USER_INTERRUPT; - return 0; + req->tail = intel_ring_offset(req->ring, out); } static const int i9xx_emit_breadcrumb_sz = 4; @@ -1358,49 +1318,34 @@ static const int i9xx_emit_breadcrumb_sz = 4; * Update the mailbox registers in the *other* rings with the current seqno. * This acts like a signal in the canonical semaphore. */ -static int gen6_sema_emit_breadcrumb(struct drm_i915_gem_request *req) +static void gen6_sema_emit_breadcrumb(struct drm_i915_gem_request *req, + u32 *out) { - int ret; - - ret = req->engine->semaphore.signal(req); - if (ret) - return ret; - - return i9xx_emit_breadcrumb(req); + return i9xx_emit_breadcrumb(req, + req->engine->semaphore.signal(req, out)); } -static int gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req) +static void gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req, + u32 *out) { struct intel_engine_cs *engine = req->engine; - struct intel_ring *ring = req->ring; - int ret; - if (engine->semaphore.signal) { - ret = engine->semaphore.signal(req); - if (ret) - return ret; - } - - ret = intel_ring_begin(req, 8); - if (ret) - return ret; + if (engine->semaphore.signal) + out = engine->semaphore.signal(req, out); - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(ring, (PIPE_CONTROL_GLOBAL_GTT_IVB | + *out++ = GFX_OP_PIPE_CONTROL(6); + *out++ = (PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE)); - intel_ring_emit(ring, intel_hws_seqno_address(engine)); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, req->global_seqno); + PIPE_CONTROL_QW_WRITE); + *out++ = intel_hws_seqno_address(engine); + *out++ = 0; + *out++ = req->global_seqno; /* We're thrashing one dword of HWS. */ - intel_ring_emit(ring, 0); - intel_ring_emit(ring, MI_USER_INTERRUPT); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); - - req->tail = ring->tail; + *out++ = 0; + *out++ = MI_USER_INTERRUPT; + *out++ = MI_NOOP; - return 0; + req->tail = intel_ring_offset(req->ring, out); } static const int gen8_render_emit_breadcrumb_sz = 8; -- cgit v1.2.3 From 85e17f5974b357bc4a127be09de71b430be265e0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:53 +0100 Subject: drm/i915: Move the global sync optimisation to the timeline Currently we try to reduce the number of synchronisations (now the number of requests we need to wait upon) by noting that if we have earlier waited upon a request, all subsequent requests in the timeline will be after the wait. This only applies to requests in this timeline, as other timelines will not be ordered by that waiter. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-30-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 16244775b9d1..188fdec5fa6b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2003,9 +2003,6 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) intel_engine_setup_common(engine); - memset(engine->semaphore.sync_seqno, 0, - sizeof(engine->semaphore.sync_seqno)); - ret = intel_engine_init_common(engine); if (ret) goto error; -- cgit v1.2.3 From 07c9a21a0d594c3acc0983e5e0a25d2364188d51 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 30 Oct 2016 13:28:20 +0000 Subject: drm/i915: Export a function to flush the context upon pinning For legacy contexts we employ an optimisation to only flush the context when binding into the global GTT. This avoids stalling on the GPU when reloading an active context. Wrap this detail up into a helper and export it for a potential third user. (Longer term, context pinning needs to be reworked as the current handling of switch context pins too late and so risks eviction and corrupting the request. Plans, plans, plans.) v2: Expand the comment explaining the optimisation for avoiding the stall on active contexts. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20161030132820.32163-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/intel_ringbuffer.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 188fdec5fa6b..700e93d80616 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1949,14 +1949,13 @@ static int intel_ring_context_pin(struct i915_gem_context *ctx, return 0; if (ce->state) { - ret = i915_gem_object_set_to_gtt_domain(ce->state->obj, false); - if (ret) - goto error; + struct i915_vma *vma; - ret = i915_vma_pin(ce->state, 0, ctx->ggtt_alignment, - PIN_GLOBAL | PIN_HIGH); - if (ret) + vma = i915_gem_context_pin_legacy(ctx, PIN_HIGH); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); goto error; + } } /* The kernel context is only used as a placeholder for flushing the -- cgit v1.2.3