From c9203e82773a3cc965292bdf13eee422682fc018 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 18 Mar 2017 10:28:59 +0000 Subject: drm/i915: Reset tasklet back to execlists after disabling guc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When switching back to execlists, we also now need to restore the tasklet handler. Reported-by: Oscar Mateo Fixes: 31de73501ac9 ("drm/i915/scheduler: emulate a scheduler for guc") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Michał Winiarski Cc: Oscar Mateo Link: http://patchwork.freedesktop.org/patch/msgid/20170318102859.24101-1-chris@chris-wilson.co.uk Reviewed-by: Michał Winiarski --- drivers/gpu/drm/i915/intel_lrc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 77168e673e0a..dab73e7d9a6b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1575,6 +1575,7 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine) { engine->submit_request = execlists_submit_request; engine->schedule = execlists_schedule; + engine->irq_tasklet.func = intel_lrc_irq_handler; } static void -- cgit v1.2.3 From 54ec12af2fb8ee761b092fabbf6e135648d23041 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 18 Mar 2017 10:42:57 +0000 Subject: drm/i915: Skip force-wake for uncached mmio flush of GGTT writes The trick of using an uncached mmio read to ensure that the GGTT writes are flushed does not require us to do the forcewake dance, so avoid it in the hope of reducing the frequency that we do keep the device forced awake. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170318104257.694-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 58e1db77d70e..bb65072f688d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3307,8 +3307,11 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) * system agents we cannot reproduce this behaviour). */ wmb(); - if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) - POSTING_READ(RING_ACTHD(dev_priv->engine[RCS]->mmio_base)); + if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) { + spin_lock_irq(&dev_priv->uncore.lock); + POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base)); + spin_unlock_irq(&dev_priv->uncore.lock); + } intel_fb_obj_flush(obj, write_origin(obj, I915_GEM_DOMAIN_GTT)); -- cgit v1.2.3 From 66e303e988c8df9f43cb7e61a7ec6d1b44ad0bb2 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 20 Mar 2017 13:25:56 +0000 Subject: drm/i915/guc: Correct the request_in tracepoint position MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It has to be called after the global seqno has been assigned. Signed-off-by: Tvrtko Ursulin Fixes: 31de73501ac9 ("drm/i915/scheduler: emulate a scheduler for guc") Cc: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Michał Winiarski Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170320132556.29286-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_guc_submission.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 832ac9e45801..7a78e1286759 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -609,8 +609,8 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine) RB_CLEAR_NODE(&rq->priotree.node); rq->priotree.priority = INT_MAX; - trace_i915_gem_request_in(rq, port - engine->execlist_port); i915_guc_submit(rq); + trace_i915_gem_request_in(rq, port - engine->execlist_port); last = rq; submit = true; } -- cgit v1.2.3 From 467221bc60c1b500b2f7dc1df77f1a88c725cd4a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 20 Mar 2017 14:31:33 +0000 Subject: drm/i915: Protect intel_engine_wakeup() for call from irq context intel_engine_wakeup() is called by nop_request_submit() which is installed to handle third party fences completed from within irq context. As such, it needs the full irqsave/irqrestore and not the partial spin_irq_lock handling. [18942.714467] ================================= [18942.719076] [ INFO: inconsistent lock state ] [18942.723522] 4.11.0-rc2-CI-CI_DRM_2368+ #1 Tainted: G U W [18942.729970] --------------------------------- [18942.734466] inconsistent {IN-SOFTIRQ-W} -> {SOFTIRQ-ON-W} usage. [18942.740594] gem_eio/1275 [HC0[0]:SC0[0]:HE1:SE1] takes: [18942.745932] (&(&fence->lock)->rlock){+.?...}, at: [] dma_fence_signal+0x100/0x 230 [18942.755331] {IN-SOFTIRQ-W} state was registered at: [18942.760356] __lock_acquire+0x5d0/0x1bb0 [18942.764444] lock_acquire+0xc9/0x220 [18942.768196] _raw_spin_lock_irqsave+0x41/0x60 [18942.772747] dma_fence_signal+0x100/0x230 [18942.776927] vgem_fence_timeout+0x9/0x10 [vgem] [18942.781701] call_timer_fn+0x92/0x380 [18942.785557] expire_timers+0x150/0x1f0 [18942.789491] run_timer_softirq+0x7c/0x160 [18942.793705] __do_softirq+0x116/0x4c0 [18942.797560] irq_exit+0xa9/0xc0 [18942.800873] smp_apic_timer_interrupt+0x38/0x50 [18942.805611] apic_timer_interrupt+0x90/0xa0 [18942.810008] cpuidle_enter_state+0x135/0x380 [18942.814503] cpuidle_enter+0x12/0x20 [18942.818250] call_cpuidle+0x1e/0x40 [18942.821906] do_idle+0x17e/0x1f0 [18942.825333] cpu_startup_entry+0x18/0x20 [18942.829463] rest_init+0x127/0x130 [18942.833025] start_kernel+0x3f1/0x3fe [18942.836908] x86_64_start_reservations+0x2a/0x2c [18942.841733] x86_64_start_kernel+0x173/0x186 [18942.846234] verify_cpu+0x0/0xfc [18942.849604] irq event stamp: 30568 [18942.853140] hardirqs last enabled at (30567): [] ktime_get+0xef/0x120 [18942.861468] hardirqs last disabled at (30568): [] _raw_spin_lock_irqsave+0x17/0 x60 [18942.870812] softirqs last enabled at (30462): [] __do_softirq+0x1d9/0x4c0 [18942.879443] softirqs last disabled at (30439): [] irq_exit+0xa9/0xc0 [18942.887616] [18942.887616] other info that might help us debug this: [18942.894279] Possible unsafe locking scenario: [18942.894279] [18942.900336] CPU0 [18942.902851] ---- [18942.905362] lock(&(&fence->lock)->rlock); [18942.909647] [18942.912330] lock(&(&fence->lock)->rlock); [18942.916821] [18942.916821] *** DEADLOCK *** [18942.916821] [18942.922862] 1 lock held by gem_eio/1275: [18942.926859] #0: (&(&fence->lock)->rlock){+.?...}, at: [] dma_fence_signal+0x1 00/0x230 [18942.936651] [18942.936651] stack backtrace: [18942.941142] CPU: 3 PID: 1275 Comm: gem_eio Tainted: G U W 4.11.0-rc2-CI-CI_DRM_2368+ # 1 [18942.950367] Hardware name: Gigabyte Technology Co., Ltd. Z170X-UD5/Z170X-UD5-CF, BIOS F21 01/06/2 017 [18942.959756] Call Trace: [18942.962244] dump_stack+0x67/0x92 [18942.965626] print_usage_bug.part.23+0x259/0x268 [18942.970362] mark_lock+0x12c/0x6f0 [18942.973851] ? check_usage_forwards+0x130/0x130 [18942.978487] mark_held_locks+0x6f/0xa0 [18942.982329] ? _raw_spin_unlock_irq+0x27/0x50 [18942.986797] trace_hardirqs_on_caller+0x150/0x200 [18942.991599] trace_hardirqs_on+0xd/0x10 [18942.995515] _raw_spin_unlock_irq+0x27/0x50 [18942.999796] intel_engine_wakeup+0x26/0x30 [i915] [18943.004670] intel_engine_init_global_seqno+0x131/0x1a0 [i915] [18943.010745] nop_submit_request+0x2e/0x40 [i915] [18943.015476] submit_notify+0x3f/0x5c [i915] [18943.019763] __i915_sw_fence_complete+0x176/0x220 [i915] [18943.025234] ? try_to_del_timer_sync+0x4d/0x60 [18943.029825] i915_sw_fence_complete+0x25/0x40 [i915] [18943.034887] dma_i915_sw_fence_wake+0x26/0x60 [i915] [18943.039959] dma_fence_signal+0x146/0x230 [18943.044109] vgem_fence_signal_ioctl+0x6c/0xc0 [vgem] [18943.049275] drm_ioctl+0x200/0x450 [18943.052758] ? vgem_fence_attach_ioctl+0x270/0x270 [vgem] [18943.058334] do_vfs_ioctl+0x90/0x6e0 [18943.061991] ? entry_SYSCALL_64_fastpath+0x5/0xb1 [18943.066843] ? __this_cpu_preempt_check+0x13/0x20 [18943.071643] ? trace_hardirqs_on_caller+0xe7/0x200 [18943.076532] SyS_ioctl+0x3c/0x70 [18943.079842] entry_SYSCALL_64_fastpath+0x1c/0xb1 [18943.084558] RIP: 0033:0x7f0dfcc14357 [18943.088240] RSP: 002b:00007ffeb4628da8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [18943.095996] RAX: ffffffffffffffda RBX: ffffffff8147eb93 RCX: 00007f0dfcc14357 [18943.103311] RDX: 00007ffeb4628de0 RSI: 0000000040086442 RDI: 0000000000000005 [18943.110574] RBP: ffffc9000176ff88 R08: 0000000000000004 R09: 0000000000000000 [18943.117845] R10: 0000000000000029 R11: 0000000000000246 R12: 0000000000000001 [18943.125168] R13: 0000000000000005 R14: 0000000040086442 R15: 0000000000000000 [18943.132520] ? __this_cpu_preempt_check+0x13/0x20 Fixes: cdc3a4539034 ("drm/i915: No need to save/restore irq status in intel_engine_wakeup") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170320143133.1507-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index ba986edee312..b6ea192ad550 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -47,11 +47,12 @@ static unsigned int __intel_breadcrumbs_wakeup(struct intel_breadcrumbs *b) unsigned int intel_engine_wakeup(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; + unsigned long flags; unsigned int result; - spin_lock_irq(&b->irq_lock); + spin_lock_irqsave(&b->irq_lock, flags); result = __intel_breadcrumbs_wakeup(b); - spin_unlock_irq(&b->irq_lock); + spin_unlock_irqrestore(&b->irq_lock, flags); return result; } -- cgit v1.2.3 From 24caf6559393f68eeefa39faf8c47c0d51a63cee Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 20 Mar 2017 14:56:09 +0000 Subject: drm/i915: intel_engine_init_global_seqno() requires atomic kmap As intel_engine_init_global_seqno() may be called by nop_submit_request() from inside irq context, we have to use atomic versions of kmap/kunmap. This is rare as this requires using gen8 legacy ringbuffer submission. Reported-by: Tvrtko Ursulin Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170320145609.4898-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_engine_cs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 4200faa520c7..ef3c62000697 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -242,12 +242,12 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) void *semaphores; /* Semaphores are in noncoherent memory, flush to be safe */ - semaphores = kmap(page); + semaphores = kmap_atomic(page); memset(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0), 0, I915_NUM_ENGINES * gen8_semaphore_seqno_size); drm_clflush_virt_range(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0), I915_NUM_ENGINES * gen8_semaphore_seqno_size); - kunmap(page); + kunmap_atomic(semaphores); } intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); -- cgit v1.2.3 From 272bce17cc3ef4629e28c38324d81ca72a862115 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 20 Mar 2017 10:40:28 +0100 Subject: drm/i915: split out check for noncontiguous pfn range We get a warning with gcc-7 about a pointless comparison when using a linear memmap: drivers/gpu/drm/i915/selftests/scatterlist.c: In function 'alloc_table': drivers/gpu/drm/i915/selftests/scatterlist.c:219:66: error: self-comparison always evaluates to false [-Werror=tautological-compare] Splitting out the comparison into a separate function avoids the warning and makes it slightly more obvious what happens. Fixes: 935a2f776aa5 ("drm/i915: Add some selftests for sg_table manipulation") Signed-off-by: Arnd Bergmann Link: http://patchwork.freedesktop.org/patch/msgid/20170320094335.1266306-2-arnd@arndb.de Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/selftests/scatterlist.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/scatterlist.c b/drivers/gpu/drm/i915/selftests/scatterlist.c index eb2cda8e2b9f..1cc5d2931753 100644 --- a/drivers/gpu/drm/i915/selftests/scatterlist.c +++ b/drivers/gpu/drm/i915/selftests/scatterlist.c @@ -189,6 +189,13 @@ static unsigned int random(unsigned long n, return 1 + (prandom_u32_state(rnd) % 1024); } +static inline bool page_contiguous(struct page *first, + struct page *last, + unsigned long npages) +{ + return first + npages == last; +} + static int alloc_table(struct pfn_table *pt, unsigned long count, unsigned long max, npages_fn_t npages_fn, @@ -216,7 +223,9 @@ static int alloc_table(struct pfn_table *pt, unsigned long npages = npages_fn(n, count, rnd); /* Nobody expects the Sparse Memmap! */ - if (pfn_to_page(pfn + npages) != pfn_to_page(pfn) + npages) { + if (!page_contiguous(pfn_to_page(pfn), + pfn_to_page(pfn + npages), + npages)) { sg_free_table(&pt->st); return -ENOSPC; } -- cgit v1.2.3 From 899f6204c0f8117d33226e586d3a630b3cf9bce0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 21 Mar 2017 11:33:20 +0000 Subject: drm/i915/execlists: Split the atomic test_and_clear_bit for irq handler Rather than impose the cost of a locked test before queuing a new request, reduce it to a simple test_bit() with a following clear_bit() prior to doing the CSB check. This ensure that if an interrupt does occur whilst reading from the CSB, we still detect it (the interrupt would trigger a rescheduling of the tasklet anyway). Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170321113320.2603-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_lrc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index dab73e7d9a6b..a88380876ce9 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -530,13 +530,18 @@ static void intel_lrc_irq_handler(unsigned long data) intel_uncore_forcewake_get(dev_priv, engine->fw_domains); - while (test_and_clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) { + /* Prefer doing test_and_clear_bit() as a two stage operation to avoid + * imposing the cost of a locked atomic transaction when submitting a + * new request (outside of the context-switch interrupt). + */ + while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) { u32 __iomem *csb_mmio = dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)); u32 __iomem *buf = dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)); unsigned int csb, head, tail; + clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); csb = readl(csb_mmio); head = GEN8_CSB_READ_PTR(csb); tail = GEN8_CSB_WRITE_PTR(csb); -- cgit v1.2.3 From fe085f13c7901203445fd2ab26c0f499313b8258 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 21 Mar 2017 10:25:52 +0000 Subject: drm/i915: Remove intel_ring.last_retired_head Storing the position of the breadcrumb of the last retired request as a separate last_retired_head is superfluous as we always copy that into head prior to recalculation of the intel_ring.space. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170321102552.24357-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 5 ++--- drivers/gpu/drm/i915/i915_gem_request.c | 2 +- drivers/gpu/drm/i915/intel_lrc.c | 2 -- drivers/gpu/drm/i915/intel_ringbuffer.c | 21 ++++----------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 10 ---------- drivers/gpu/drm/i915/selftests/mock_engine.c | 1 - 6 files changed, 7 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 47e707d83c4d..29bf11d8b620 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1938,9 +1938,8 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data) static void describe_ctx_ring(struct seq_file *m, struct intel_ring *ring) { - seq_printf(m, " (ringbuffer, space: %d, head: %u, tail: %u, last head: %d)", - ring->space, ring->head, ring->tail, - ring->last_retired_head); + seq_printf(m, " (ringbuffer, space: %d, head: %u, tail: %u)", + ring->space, ring->head, ring->tail); } static int i915_context_status(struct seq_file *m, void *unused) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 0e8d1010cecb..dbfa9db2419d 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -295,7 +295,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) * completion order. */ list_del(&request->ring_link); - request->ring->last_retired_head = request->postfix; + request->ring->head = request->postfix; if (!--request->i915->gt.active_requests) { GEM_BUG_ON(!request->i915->gt.awake); mod_delayed_work(request->i915->wq, diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index a88380876ce9..090238eefd49 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1260,7 +1260,6 @@ static void reset_common_ring(struct intel_engine_cs *engine, ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix; request->ring->head = request->postfix; - request->ring->last_retired_head = -1; intel_ring_update_space(request->ring); /* Catch up with any missed context-switch interrupts */ @@ -2047,7 +2046,6 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv) i915_gem_object_unpin_map(ce->state->obj); ce->ring->head = ce->ring->tail = 0; - ce->ring->last_retired_head = -1; intel_ring_update_space(ce->ring); } } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index d9b8d17c3fc6..0ca5ea7a9407 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -49,13 +49,7 @@ static int __intel_ring_space(int head, int tail, int size) void intel_ring_update_space(struct intel_ring *ring) { - if (ring->last_retired_head != -1) { - ring->head = ring->last_retired_head; - ring->last_retired_head = -1; - } - - ring->space = __intel_ring_space(ring->head & HEAD_ADDR, - ring->tail, ring->size); + ring->space = __intel_ring_space(ring->head, ring->tail, ring->size); } static int @@ -618,12 +612,8 @@ static void reset_ring_common(struct intel_engine_cs *engine, } /* If the rq hung, jump to its breadcrumb and skip the batch */ - if (request->fence.error == -EIO) { - struct intel_ring *ring = request->ring; - - ring->head = request->postfix; - ring->last_retired_head = -1; - } + if (request->fence.error == -EIO) + request->ring->head = request->postfix; } else { engine->legacy_active_context = NULL; } @@ -1392,7 +1382,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size) if (IS_I830(engine->i915) || IS_I845G(engine->i915)) ring->effective_size -= 2 * CACHELINE_BYTES; - ring->last_retired_head = -1; intel_ring_update_space(ring); vma = intel_ring_create_vma(engine->i915, size); @@ -1571,10 +1560,8 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv) struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, dev_priv, id) { + for_each_engine(engine, dev_priv, id) engine->buffer->head = engine->buffer->tail; - engine->buffer->last_retired_head = -1; - } } static int ring_request_alloc(struct drm_i915_gem_request *request) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 847aea554464..2ecb41788fb6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -149,16 +149,6 @@ struct intel_ring { int space; int size; int effective_size; - - /** We track the position of the requests in the ring buffer, and - * when each is retired we increment last_retired_head as the GPU - * must have finished processing the request and so we know we - * can advance the ringbuffer up to that position. - * - * last_retired_head is set to -1 after the value is consumed so - * we can detect new retirements. - */ - u32 last_retired_head; }; struct i915_gem_context; diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 8d5ba037064c..0ad624a1db90 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -118,7 +118,6 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) ring->vaddr = (void *)(ring + 1); INIT_LIST_HEAD(&ring->request_list); - ring->last_retired_head = -1; intel_ring_update_space(ring); return ring; -- cgit v1.2.3 From 9f7886d07f36f9be23c9bcc526b7f9dae65db8c5 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 21 Mar 2017 10:55:11 +0000 Subject: drm/i915: Spinlocks in tasklets can use spin_(un)lock_irq The tasklets callbacks are only called from tasklet context so it is safe do to this. Signed-off-by: Tvrtko Ursulin Cc: Chris Wilson Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170321105511.18269-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_guc_submission.c | 5 ++--- drivers/gpu/drm/i915/intel_lrc.c | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 7a78e1286759..055467a53dc3 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -573,7 +573,6 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine) { struct execlist_port *port = engine->execlist_port; struct drm_i915_gem_request *last = port[0].request; - unsigned long flags; struct rb_node *rb; bool submit = false; @@ -589,7 +588,7 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine) if (!READ_ONCE(engine->execlist_first)) return false; - spin_lock_irqsave(&engine->timeline->lock, flags); + spin_lock_irq(&engine->timeline->lock); rb = engine->execlist_first; while (rb) { struct drm_i915_gem_request *rq = @@ -619,7 +618,7 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine) nested_enable_signaling(last); engine->execlist_first = rb; } - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irq(&engine->timeline->lock); return submit; } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 090238eefd49..eec1e714f531 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -399,7 +399,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) { struct drm_i915_gem_request *last; struct execlist_port *port = engine->execlist_port; - unsigned long flags; struct rb_node *rb; bool submit = false; @@ -448,7 +447,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * and context switches) submission. */ - spin_lock_irqsave(&engine->timeline->lock, flags); + spin_lock_irq(&engine->timeline->lock); rb = engine->execlist_first; while (rb) { struct drm_i915_gem_request *cursor = @@ -500,7 +499,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) i915_gem_request_assign(&port->request, last); engine->execlist_first = rb; } - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irq(&engine->timeline->lock); if (submit) execlists_submit_ports(engine); -- cgit v1.2.3 From 78cfa580f81e69857815c59c8908aee454726da1 Mon Sep 17 00:00:00 2001 From: "Pandiyan, Dhinakaran" Date: Tue, 7 Mar 2017 16:12:51 -0800 Subject: drm/i915/glk: Apply cdclk workaround for DP audio Implement the DP-Audio cdclk restriction for GLK, similar to what is implemented for BDW and other GEN9 platforms. The max. pixel clock adjustment for GLK, however factors in the 2 pixels per clock output that GLK generates. Separating min. cdclk and max. pixel_rate would be nicer, but let's defer that to future and fix the GLK bug for now. Signed-off-by: Dhinakaran Pandiyan Reviewed-by: Paulo Zanoni Signed-off-by: Paulo Zanoni Link: http://patchwork.freedesktop.org/patch/msgid/1488931972-2865-1-git-send-email-dhinakaran.pandiyan@intel.com --- drivers/gpu/drm/i915/intel_cdclk.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index c2cc33f3d888..dd350642eba1 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -1442,16 +1442,21 @@ static int bdw_adjust_min_pipe_pixel_rate(struct intel_crtc_state *crtc_state, if (IS_BROADWELL(dev_priv) && crtc_state->ips_enabled) pixel_rate = DIV_ROUND_UP(pixel_rate * 100, 95); - /* BSpec says "Do not use DisplayPort with CDCLK less than - * 432 MHz, audio enabled, port width x4, and link rate - * HBR2 (5.4 GHz), or else there may be audio corruption or - * screen corruption." + /* BSpec says "Do not use DisplayPort with CDCLK less than 432 MHz, + * audio enabled, port width x4, and link rate HBR2 (5.4 GHz), or else + * there may be audio corruption or screen corruption." This cdclk + * restriction for GLK is 316.8 MHz and since GLK can output two + * pixels per clock, the pixel rate becomes 2 * 316.8 MHz. */ if (intel_crtc_has_dp_encoder(crtc_state) && crtc_state->has_audio && crtc_state->port_clock >= 540000 && - crtc_state->lane_count == 4) - pixel_rate = max(432000, pixel_rate); + crtc_state->lane_count == 4) { + if (IS_GEMINILAKE(dev_priv)) + pixel_rate = max(2 * 316800, pixel_rate); + else + pixel_rate = max(432000, pixel_rate); + } return pixel_rate; } -- cgit v1.2.3 From 8cbeb06dc6b584009d7492c667a3c9a4b96cdefa Mon Sep 17 00:00:00 2001 From: "Pandiyan, Dhinakaran" Date: Tue, 14 Mar 2017 15:45:56 -0700 Subject: drm/i915: Implement cdclk restrictions based on Azalia BCLK According to BSpec, "The CD clock frequency must be at least twice the frequency of the Azalia BCLK." and BCLK is configured to 96 MHz by default. This check is needed because BXT and GLK support cdclk frequencies less than 192 MHz. v2: Include other Gen9 platforms too for completeness.(Paulo) Cc: Paulo Zanoni Signed-off-by: Dhinakaran Pandiyan Reviewed-by: Paulo Zanoni Signed-off-by: Paulo Zanoni Link: http://patchwork.freedesktop.org/patch/msgid/1489531556-2926-1-git-send-email-dhinakaran.pandiyan@intel.com --- drivers/gpu/drm/i915/intel_cdclk.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index dd350642eba1..dd3ad52b7dfe 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -1458,6 +1458,18 @@ static int bdw_adjust_min_pipe_pixel_rate(struct intel_crtc_state *crtc_state, pixel_rate = max(432000, pixel_rate); } + /* According to BSpec, "The CD clock frequency must be at least twice + * the frequency of the Azalia BCLK." and BCLK is 96 MHz by default. + * The check for GLK has to be adjusted as the platform can output + * two pixels per clock. + */ + if (crtc_state->has_audio && INTEL_GEN(dev_priv) >= 9) { + if (IS_GEMINILAKE(dev_priv)) + pixel_rate = max(2 * 2 * 96000, pixel_rate); + else + pixel_rate = max(2 * 96000, pixel_rate); + } + return pixel_rate; } -- cgit v1.2.3 From 396a1200d8202f7cd154e26dcc47438e28ef6c70 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Wed, 22 Mar 2017 15:58:44 -0300 Subject: drm/i915: simplify intel_ddi_pll_select() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Because {hsw,skl,bxt}_ddi_pll_select all pretty much do the same thing in slightly different ways. Replace everything with a simple copy of the function and inline it inside intle_ddi_pll_select(). v2: s/return pll/return pll != NULL/ (Ville) Reviewed-by: Ville Syrjälä Signed-off-by: Paulo Zanoni Link: http://patchwork.freedesktop.org/patch/msgid/1490209125-20046-1-git-send-email-paulo.r.zanoni@intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 58 +++++----------------------------------- 1 file changed, 7 insertions(+), 51 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index d8214ba8da14..b30898c2a076 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1127,47 +1127,6 @@ void intel_ddi_clock_get(struct intel_encoder *encoder, bxt_ddi_clock_get(encoder, pipe_config); } -static bool -hsw_ddi_pll_select(struct intel_crtc *intel_crtc, - struct intel_crtc_state *crtc_state, - struct intel_encoder *encoder) -{ - struct intel_shared_dpll *pll; - - pll = intel_get_shared_dpll(intel_crtc, crtc_state, - encoder); - if (!pll) - DRM_DEBUG_DRIVER("failed to find PLL for pipe %c\n", - pipe_name(intel_crtc->pipe)); - - return pll; -} - -static bool -skl_ddi_pll_select(struct intel_crtc *intel_crtc, - struct intel_crtc_state *crtc_state, - struct intel_encoder *encoder) -{ - struct intel_shared_dpll *pll; - - pll = intel_get_shared_dpll(intel_crtc, crtc_state, encoder); - if (pll == NULL) { - DRM_DEBUG_DRIVER("failed to find PLL for pipe %c\n", - pipe_name(intel_crtc->pipe)); - return false; - } - - return true; -} - -static bool -bxt_ddi_pll_select(struct intel_crtc *intel_crtc, - struct intel_crtc_state *crtc_state, - struct intel_encoder *encoder) -{ - return !!intel_get_shared_dpll(intel_crtc, crtc_state, encoder); -} - /* * Tries to find a *shared* PLL for the CRTC and store it in * intel_crtc->ddi_pll_sel. @@ -1178,19 +1137,16 @@ bxt_ddi_pll_select(struct intel_crtc *intel_crtc, bool intel_ddi_pll_select(struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); struct intel_encoder *encoder = intel_ddi_get_crtc_new_encoder(crtc_state); + struct intel_shared_dpll *pll; - if (IS_GEN9_BC(dev_priv)) - return skl_ddi_pll_select(intel_crtc, crtc_state, - encoder); - else if (IS_GEN9_LP(dev_priv)) - return bxt_ddi_pll_select(intel_crtc, crtc_state, - encoder); - else - return hsw_ddi_pll_select(intel_crtc, crtc_state, - encoder); + pll = intel_get_shared_dpll(intel_crtc, crtc_state, encoder); + if (!pll) + DRM_DEBUG_DRIVER("failed to find PLL for pipe %c\n", + pipe_name(intel_crtc->pipe)); + + return pll != NULL; } void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state) -- cgit v1.2.3 From 24f8e00a8a2eae92491f8668939dbfdc123d6e75 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 22 Mar 2017 11:05:21 +0000 Subject: drm/i915: Prefer to report ENOMEM rather than incur the oom for gfx allocations Since gfx allocations tend to be large, unmovable and disposable, report the allocation failure back to userspace as an ENOMEM rather than incur the oomkiller. We have already tried to make room by purging our own cached gfx objects, and the oomkiller doesn't attribute ownership of gfx objects so will likely pick the wrong candidate. Instead, let userspace see the ENOMEM. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170322110521.29930-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen Acked-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index bb65072f688d..9d710bd8c6c9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2321,7 +2321,7 @@ rebuild_st: st->nents = 0; for (i = 0; i < page_count; i++) { page = shmem_read_mapping_page_gfp(mapping, i, gfp); - if (IS_ERR(page)) { + if (unlikely(IS_ERR(page))) { i915_gem_shrink(dev_priv, page_count, I915_SHRINK_BOUND | @@ -2329,12 +2329,21 @@ rebuild_st: I915_SHRINK_PURGEABLE); page = shmem_read_mapping_page_gfp(mapping, i, gfp); } - if (IS_ERR(page)) { + if (unlikely(IS_ERR(page))) { + gfp_t reclaim; + /* We've tried hard to allocate the memory by reaping * our own buffer, now let the real VM do its job and * go down in flames if truly OOM. + * + * However, since graphics tend to be disposable, + * defer the oom here by reporting the ENOMEM back + * to userspace. */ - page = shmem_read_mapping_page(mapping, i); + reclaim = mapping_gfp_constraint(mapping, 0); + reclaim |= __GFP_NORETRY; /* reclaim, but no oom */ + + page = shmem_read_mapping_page_gfp(mapping, i, gfp); if (IS_ERR(page)) { ret = PTR_ERR(page); goto err_sg; -- cgit v1.2.3 From 24304d81930fdee0784ac27c0bf8d6a27f529382 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 14 Mar 2017 17:10:49 +0200 Subject: drm/i915: Extract intel_wm_plane_visible() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All platforms that lack double buffered watermarks will need to handle the legacy cursor updates in the same way. So let's extract the logic to determine the plane visibility into a small helper. For simplicity we'll make the function DTRT for any plane, but only apply the special sauce for cursor planes. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170314151050.12194-1-ville.syrjala@linux.intel.com Tested-by: Dorota Czaplejewicz Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_pm.c | 39 +++++++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index aece0ff88a5d..735ebd53c9d0 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -655,6 +655,29 @@ static unsigned long intel_calculate_wm(unsigned long clock_in_khz, return wm_size; } +static bool intel_wm_plane_visible(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + + /* FIXME check the 'enable' instead */ + if (!crtc_state->base.active) + return false; + + /* + * Treat cursor with fb as always visible since cursor updates + * can happen faster than the vrefresh rate, and the current + * watermark code doesn't handle that correctly. Cursor updates + * which set/clear the fb or change the cursor size are going + * to get throttled by intel_legacy_cursor_update() to work + * around this problem with the watermark code. + */ + if (plane->id == PLANE_CURSOR) + return plane_state->base.fb != NULL; + else + return plane_state->base.visible; +} + static struct intel_crtc *single_enabled_crtc(struct drm_i915_private *dev_priv) { struct intel_crtc *crtc, *enabled = NULL; @@ -1961,7 +1984,7 @@ static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate, uint32_t method1, method2; int cpp; - if (!cstate->base.active || !pstate->base.visible) + if (!intel_wm_plane_visible(cstate, pstate)) return 0; cpp = pstate->base.fb->format->cpp[0]; @@ -1990,7 +2013,7 @@ static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate, uint32_t method1, method2; int cpp; - if (!cstate->base.active || !pstate->base.visible) + if (!intel_wm_plane_visible(cstate, pstate)) return 0; cpp = pstate->base.fb->format->cpp[0]; @@ -2013,15 +2036,7 @@ static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate, { int cpp; - /* - * Treat cursor with fb as always visible since cursor updates - * can happen faster than the vrefresh rate, and the current - * watermark code doesn't handle that correctly. Cursor updates - * which set/clear the fb or change the cursor size are going - * to get throttled by intel_legacy_cursor_update() to work - * around this problem with the watermark code. - */ - if (!cstate->base.active || !pstate->base.fb) + if (!intel_wm_plane_visible(cstate, pstate)) return 0; cpp = pstate->base.fb->format->cpp[0]; @@ -2038,7 +2053,7 @@ static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate, { int cpp; - if (!cstate->base.active || !pstate->base.visible) + if (!intel_wm_plane_visible(cstate, pstate)) return 0; cpp = pstate->base.fb->format->cpp[0]; -- cgit v1.2.3 From 93aa2a1c25e562cc0ca69c3175333fe33fdf055b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 14 Mar 2017 17:10:50 +0200 Subject: drm/i915: Fix SKL cursor watermarks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use intel_wm_plane_visible() to determine cursor visibility for SKL+ also. Previously SKL+ would check the actual visibility which now conflicts with the assumptions in intel_legacy_cursor_update(). We also change SKL+ to compute the cursor watermarks based on the unclipped cursor size, just as we do on all the other platforms. Using the clipped size could now result in garbage results. Testcase: igt/kms_chv_cursor_fail Fixes: a5509abda48e ("drm/i915: Fix legacy cursor vs. watermarks for ILK-BDW") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100195 Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170314151050.12194-2-ville.syrjala@linux.intel.com Tested-by: Dorota Czaplejewicz Tested-by: Jari Tahvanainen Reviewed-by: Maarten Lankhorst Acked-by: Ander Conselvan de Oliveira --- drivers/gpu/drm/i915/intel_pm.c | 44 +++++++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 735ebd53c9d0..13ada8560e71 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3361,19 +3361,29 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, * Caller should take care of dividing & rounding off the value. */ static uint32_t -skl_plane_downscale_amount(const struct intel_plane_state *pstate) +skl_plane_downscale_amount(const struct intel_crtc_state *cstate, + const struct intel_plane_state *pstate) { + struct intel_plane *plane = to_intel_plane(pstate->base.plane); uint32_t downscale_h, downscale_w; uint32_t src_w, src_h, dst_w, dst_h; - if (WARN_ON(!pstate->base.visible)) + if (WARN_ON(!intel_wm_plane_visible(cstate, pstate))) return DRM_PLANE_HELPER_NO_SCALING; /* n.b., src is 16.16 fixed point, dst is whole integer */ - src_w = drm_rect_width(&pstate->base.src); - src_h = drm_rect_height(&pstate->base.src); - dst_w = drm_rect_width(&pstate->base.dst); - dst_h = drm_rect_height(&pstate->base.dst); + if (plane->id == PLANE_CURSOR) { + src_w = pstate->base.src_w; + src_h = pstate->base.src_h; + dst_w = pstate->base.crtc_w; + dst_h = pstate->base.crtc_h; + } else { + src_w = drm_rect_width(&pstate->base.src); + src_h = drm_rect_height(&pstate->base.src); + dst_w = drm_rect_width(&pstate->base.dst); + dst_h = drm_rect_height(&pstate->base.dst); + } + if (drm_rotation_90_or_270(pstate->base.rotation)) swap(dst_w, dst_h); @@ -3389,6 +3399,7 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate, const struct drm_plane_state *pstate, int y) { + struct intel_plane *plane = to_intel_plane(pstate->plane); struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate); uint32_t down_scale_amount, data_rate; uint32_t width = 0, height = 0; @@ -3401,7 +3412,7 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate, fb = pstate->fb; format = fb->format->format; - if (pstate->plane->type == DRM_PLANE_TYPE_CURSOR) + if (plane->id == PLANE_CURSOR) return 0; if (y && format != DRM_FORMAT_NV12) return 0; @@ -3425,7 +3436,7 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate, data_rate = width * height * fb->format->cpp[0]; } - down_scale_amount = skl_plane_downscale_amount(intel_pstate); + down_scale_amount = skl_plane_downscale_amount(cstate, intel_pstate); return (uint64_t)data_rate * down_scale_amount >> 16; } @@ -3717,7 +3728,7 @@ static uint32_t skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cst uint64_t pixel_rate; /* Shouldn't reach here on disabled planes... */ - if (WARN_ON(!pstate->base.visible)) + if (WARN_ON(!intel_wm_plane_visible(cstate, pstate))) return 0; /* @@ -3725,7 +3736,7 @@ static uint32_t skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cst * with additional adjustments for plane-specific scaling. */ adjusted_pixel_rate = cstate->pixel_rate; - downscale_amount = skl_plane_downscale_amount(pstate); + downscale_amount = skl_plane_downscale_amount(cstate, pstate); pixel_rate = adjusted_pixel_rate * downscale_amount >> 16; WARN_ON(pixel_rate != clamp_t(uint32_t, pixel_rate, 0, ~0)); @@ -3742,6 +3753,7 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, uint8_t *out_lines, /* out */ bool *enabled /* out */) { + struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane); struct drm_plane_state *pstate = &intel_pstate->base; struct drm_framebuffer *fb = pstate->fb; uint32_t latency = dev_priv->wm.skl_latency[level]; @@ -3761,7 +3773,8 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); bool y_tiled, x_tiled; - if (latency == 0 || !cstate->base.active || !intel_pstate->base.visible) { + if (latency == 0 || + !intel_wm_plane_visible(cstate, intel_pstate)) { *enabled = false; return 0; } @@ -3777,8 +3790,13 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, if (apply_memory_bw_wa && x_tiled) latency += 15; - width = drm_rect_width(&intel_pstate->base.src) >> 16; - height = drm_rect_height(&intel_pstate->base.src) >> 16; + if (plane->id == PLANE_CURSOR) { + width = intel_pstate->base.crtc_w; + height = intel_pstate->base.crtc_h; + } else { + width = drm_rect_width(&intel_pstate->base.src) >> 16; + height = drm_rect_height(&intel_pstate->base.src) >> 16; + } if (drm_rotation_90_or_270(pstate->rotation)) swap(width, height); -- cgit v1.2.3 From 40149f00fb55263cf10066bafc8470d33b1ddcfc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 22 Mar 2017 22:34:47 +0000 Subject: drm/i915: Actually pass the reclaim gfp_t along to shmemfs! Words cannot describe the embarrassment at creating a new gfp_t relaim to only prevent the oomkiller but allow direct|kswapd reclaim, and then not use it in the shmem_read_mapping_page_gfp(). Fixes: 24f8e00a8a2e ("drm/i915: Prefer to report ENOMEM rather than incur the oom for gfx allocations") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170322223447.7493-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9d710bd8c6c9..2a32d7c120fc 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2343,7 +2343,7 @@ rebuild_st: reclaim = mapping_gfp_constraint(mapping, 0); reclaim |= __GFP_NORETRY; /* reclaim, but no oom */ - page = shmem_read_mapping_page_gfp(mapping, i, gfp); + page = shmem_read_mapping_page_gfp(mapping, i, reclaim); if (IS_ERR(page)) { ret = PTR_ERR(page); goto err_sg; -- cgit v1.2.3 From e555e326459d6f6fca4ef859c471ecf8302ceb4d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 22 Mar 2017 21:03:50 +0000 Subject: drm/i915: Remove superfluous hw_flags from mi_set_context() Why have both hw_flags and flags, when just one will do? Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170322210350.6208-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem_context.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 486051ed681d..8fc8b3d15a0f 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -576,25 +576,25 @@ void i915_gem_context_close(struct drm_device *dev, struct drm_file *file) } static inline int -mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) +mi_set_context(struct drm_i915_gem_request *req, u32 flags) { struct drm_i915_private *dev_priv = req->i915; struct intel_engine_cs *engine = req->engine; enum intel_engine_id id; - u32 *cs, flags = hw_flags | MI_MM_SPACE_GTT; const int num_rings = /* Use an extended w/a on ivb+ if signalling from other rings */ i915.semaphores ? INTEL_INFO(dev_priv)->num_rings - 1 : 0; int len; + u32 *cs; - /* These flags are for resource streamer on HSW+ */ + flags |= MI_MM_SPACE_GTT; if (IS_HASWELL(dev_priv) || INTEL_GEN(dev_priv) >= 8) - flags |= (HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN); - else if (INTEL_GEN(dev_priv) < 8) - flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN); - + /* These flags are for resource streamer on HSW+ */ + flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN; + else + flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN; len = 4; if (INTEL_GEN(dev_priv) >= 7) -- cgit v1.2.3 From 5d4bac5503fcc67dd7999571e243cee49371aef7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 22 Mar 2017 20:59:30 +0000 Subject: drm/i915: Restore marking context objects as dirty on pinning Commit e8a9c58fcd9a ("drm/i915: Unify active context tracking between legacy/execlists/guc") converted the legacy intel_ringbuffer submission to the same context pinning mechanism as execlists - that is to pin the context until the subsequent request is retired. Previously it used the vma retirement of the context object to keep itself pinned until the next request (after i915_vma_move_to_active()). In the conversion, I missed that the vma retirement was also responsible for marking the object as dirty. Mark the context object as dirty when pinning (equivalent to execlists) which ensures that if the context is swapped out due to mempressure or suspend/hibernation, when it is loaded back in it does so with the previous state (and not all zero). Fixes: e8a9c58fcd9a ("drm/i915: Unify active context tracking between legacy/execlists/guc") Reported-by: Dennis Gilmore Reported-by: Mathieu Marquer Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99993 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100181 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: # v4.11-rc1 Link: http://patchwork.freedesktop.org/patch/msgid/20170322205930.12762-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 0ca5ea7a9407..62756eb2bd4a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1440,6 +1440,8 @@ static int intel_ring_context_pin(struct intel_engine_cs *engine, ret = context_pin(ctx); if (ret) goto error; + + ce->state->obj->mm.dirty = true; } /* The kernel context is only used as a placeholder for flushing the -- cgit v1.2.3 From 577ac4bd4caeb0a2263b61c8b4fa8924ade2d9ee Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Mar 2017 10:19:38 +0000 Subject: drm/i915: Eliminate per-fw_domain i915 backpointer Pass along the drm_i915_private pointer from the caller, rather than looking it up from each fw_domain during fw_domains_get/_put. This allows us to then eliminate the backpointer, in exchange for a more complicated unwrapping procedure in the rare intel_uncore_fw_release_timer(). Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170323101944.21627-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 33 ++++++++-------- drivers/gpu/drm/i915/intel_uncore.c | 77 ++++++++++++++++++++----------------- 2 files changed, 60 insertions(+), 50 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a5947a496d0a..4c9de7d00eaf 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -732,21 +732,25 @@ intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv, struct intel_uncore_funcs { void (*force_wake_get)(struct drm_i915_private *dev_priv, - enum forcewake_domains domains); + enum forcewake_domains domains); void (*force_wake_put)(struct drm_i915_private *dev_priv, - enum forcewake_domains domains); - - uint8_t (*mmio_readb)(struct drm_i915_private *dev_priv, i915_reg_t r, bool trace); - uint16_t (*mmio_readw)(struct drm_i915_private *dev_priv, i915_reg_t r, bool trace); - uint32_t (*mmio_readl)(struct drm_i915_private *dev_priv, i915_reg_t r, bool trace); - uint64_t (*mmio_readq)(struct drm_i915_private *dev_priv, i915_reg_t r, bool trace); - - void (*mmio_writeb)(struct drm_i915_private *dev_priv, i915_reg_t r, - uint8_t val, bool trace); - void (*mmio_writew)(struct drm_i915_private *dev_priv, i915_reg_t r, - uint16_t val, bool trace); - void (*mmio_writel)(struct drm_i915_private *dev_priv, i915_reg_t r, - uint32_t val, bool trace); + enum forcewake_domains domains); + + uint8_t (*mmio_readb)(struct drm_i915_private *dev_priv, + i915_reg_t r, bool trace); + uint16_t (*mmio_readw)(struct drm_i915_private *dev_priv, + i915_reg_t r, bool trace); + uint32_t (*mmio_readl)(struct drm_i915_private *dev_priv, + i915_reg_t r, bool trace); + uint64_t (*mmio_readq)(struct drm_i915_private *dev_priv, + i915_reg_t r, bool trace); + + void (*mmio_writeb)(struct drm_i915_private *dev_priv, + i915_reg_t r, uint8_t val, bool trace); + void (*mmio_writew)(struct drm_i915_private *dev_priv, + i915_reg_t r, uint16_t val, bool trace); + void (*mmio_writel)(struct drm_i915_private *dev_priv, + i915_reg_t r, uint32_t val, bool trace); }; struct intel_forcewake_range { @@ -771,7 +775,6 @@ struct intel_uncore { enum forcewake_domains fw_domains_active; struct intel_uncore_forcewake_domain { - struct drm_i915_private *i915; enum forcewake_domain_id id; enum forcewake_domains mask; unsigned wake_count; diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 09f5f02d7901..aa2e7401efdc 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -52,10 +52,11 @@ intel_uncore_forcewake_domain_to_str(const enum forcewake_domain_id id) } static inline void -fw_domain_reset(const struct intel_uncore_forcewake_domain *d) +fw_domain_reset(struct drm_i915_private *i915, + const struct intel_uncore_forcewake_domain *d) { WARN_ON(!i915_mmio_reg_valid(d->reg_set)); - __raw_i915_write32(d->i915, d->reg_set, d->val_reset); + __raw_i915_write32(i915, d->reg_set, d->val_reset); } static inline void @@ -69,9 +70,10 @@ fw_domain_arm_timer(struct intel_uncore_forcewake_domain *d) } static inline void -fw_domain_wait_ack_clear(const struct intel_uncore_forcewake_domain *d) +fw_domain_wait_ack_clear(struct drm_i915_private *i915, + const struct intel_uncore_forcewake_domain *d) { - if (wait_for_atomic((__raw_i915_read32(d->i915, d->reg_ack) & + if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & FORCEWAKE_KERNEL) == 0, FORCEWAKE_ACK_TIMEOUT_MS)) DRM_ERROR("%s: timed out waiting for forcewake ack to clear.\n", @@ -79,15 +81,17 @@ fw_domain_wait_ack_clear(const struct intel_uncore_forcewake_domain *d) } static inline void -fw_domain_get(const struct intel_uncore_forcewake_domain *d) +fw_domain_get(struct drm_i915_private *i915, + const struct intel_uncore_forcewake_domain *d) { - __raw_i915_write32(d->i915, d->reg_set, d->val_set); + __raw_i915_write32(i915, d->reg_set, d->val_set); } static inline void -fw_domain_wait_ack(const struct intel_uncore_forcewake_domain *d) +fw_domain_wait_ack(struct drm_i915_private *i915, + const struct intel_uncore_forcewake_domain *d) { - if (wait_for_atomic((__raw_i915_read32(d->i915, d->reg_ack) & + if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & FORCEWAKE_KERNEL), FORCEWAKE_ACK_TIMEOUT_MS)) DRM_ERROR("%s: timed out waiting for forcewake ack request.\n", @@ -95,72 +99,75 @@ fw_domain_wait_ack(const struct intel_uncore_forcewake_domain *d) } static inline void -fw_domain_put(const struct intel_uncore_forcewake_domain *d) +fw_domain_put(struct drm_i915_private *i915, + const struct intel_uncore_forcewake_domain *d) { - __raw_i915_write32(d->i915, d->reg_set, d->val_clear); + __raw_i915_write32(i915, d->reg_set, d->val_clear); } static inline void -fw_domain_posting_read(const struct intel_uncore_forcewake_domain *d) +fw_domain_posting_read(struct drm_i915_private *i915, + const struct intel_uncore_forcewake_domain *d) { /* something from same cacheline, but not from the set register */ if (i915_mmio_reg_valid(d->reg_post)) - __raw_posting_read(d->i915, d->reg_post); + __raw_posting_read(i915, d->reg_post); } static void -fw_domains_get(struct drm_i915_private *dev_priv, enum forcewake_domains fw_domains) +fw_domains_get(struct drm_i915_private *i915, enum forcewake_domains fw_domains) { struct intel_uncore_forcewake_domain *d; - for_each_fw_domain_masked(d, fw_domains, dev_priv) { - fw_domain_wait_ack_clear(d); - fw_domain_get(d); + for_each_fw_domain_masked(d, fw_domains, i915) { + fw_domain_wait_ack_clear(i915, d); + fw_domain_get(i915, d); } - for_each_fw_domain_masked(d, fw_domains, dev_priv) - fw_domain_wait_ack(d); + for_each_fw_domain_masked(d, fw_domains, i915) + fw_domain_wait_ack(i915, d); - dev_priv->uncore.fw_domains_active |= fw_domains; + i915->uncore.fw_domains_active |= fw_domains; } static void -fw_domains_put(struct drm_i915_private *dev_priv, enum forcewake_domains fw_domains) +fw_domains_put(struct drm_i915_private *i915, enum forcewake_domains fw_domains) { struct intel_uncore_forcewake_domain *d; - for_each_fw_domain_masked(d, fw_domains, dev_priv) { - fw_domain_put(d); - fw_domain_posting_read(d); + for_each_fw_domain_masked(d, fw_domains, i915) { + fw_domain_put(i915, d); + fw_domain_posting_read(i915, d); } - dev_priv->uncore.fw_domains_active &= ~fw_domains; + i915->uncore.fw_domains_active &= ~fw_domains; } static void -fw_domains_posting_read(struct drm_i915_private *dev_priv) +fw_domains_posting_read(struct drm_i915_private *i915) { struct intel_uncore_forcewake_domain *d; /* No need to do for all, just do for first found */ - for_each_fw_domain(d, dev_priv) { - fw_domain_posting_read(d); + for_each_fw_domain(d, i915) { + fw_domain_posting_read(i915, d); break; } } static void -fw_domains_reset(struct drm_i915_private *dev_priv, enum forcewake_domains fw_domains) +fw_domains_reset(struct drm_i915_private *i915, + enum forcewake_domains fw_domains) { struct intel_uncore_forcewake_domain *d; - if (dev_priv->uncore.fw_domains == 0) + if (i915->uncore.fw_domains == 0) return; - for_each_fw_domain_masked(d, fw_domains, dev_priv) - fw_domain_reset(d); + for_each_fw_domain_masked(d, fw_domains, i915) + fw_domain_reset(i915, d); - fw_domains_posting_read(dev_priv); + fw_domains_posting_read(i915); } static void __gen6_gt_wait_for_thread_c0(struct drm_i915_private *dev_priv) @@ -236,7 +243,8 @@ intel_uncore_fw_release_timer(struct hrtimer *timer) { struct intel_uncore_forcewake_domain *domain = container_of(timer, struct intel_uncore_forcewake_domain, timer); - struct drm_i915_private *dev_priv = domain->i915; + struct drm_i915_private *dev_priv = + container_of(domain, struct drm_i915_private, uncore.fw_domain[domain->id]); unsigned long irqflags; assert_rpm_device_not_suspended(dev_priv); @@ -1161,7 +1169,6 @@ static void fw_domain_init(struct drm_i915_private *dev_priv, else if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv) || IS_GEN8(dev_priv)) d->reg_post = ECOBUS; - d->i915 = dev_priv; d->id = domain_id; BUILD_BUG_ON(FORCEWAKE_RENDER != (1 << FW_DOMAIN_ID_RENDER)); @@ -1175,7 +1182,7 @@ static void fw_domain_init(struct drm_i915_private *dev_priv, dev_priv->uncore.fw_domains |= (1 << domain_id); - fw_domain_reset(d); + fw_domain_reset(dev_priv, d); } static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) -- cgit v1.2.3 From bd52750485e9d4b8372360a347c63788eb76db8a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Mar 2017 10:19:39 +0000 Subject: drm/i915: Use correct fw_domains during initialisation In the next patch we will begin to sanity check that we do not attempt to obtain the forcewake on an unsupport domain. However, that is exactly what we do during our actual initialisation of fw_domains - rectify it before it explodes. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170323101944.21627-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_uncore.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index aa2e7401efdc..6e415e8a3aa3 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1253,9 +1253,9 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) FORCEWAKE_MT, FORCEWAKE_MT_ACK); spin_lock_irq(&dev_priv->uncore.lock); - fw_domains_get_with_thread_status(dev_priv, FORCEWAKE_ALL); + fw_domains_get_with_thread_status(dev_priv, FORCEWAKE_RENDER); ecobus = __raw_i915_read32(dev_priv, ECOBUS); - fw_domains_put_with_fifo(dev_priv, FORCEWAKE_ALL); + fw_domains_put_with_fifo(dev_priv, FORCEWAKE_RENDER); spin_unlock_irq(&dev_priv->uncore.lock); if (!(ecobus & FORCEWAKE_MT_ENABLE)) { -- cgit v1.2.3 From cb3600db6e2d310532d250818d58ef83aef9ce90 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Mar 2017 10:19:40 +0000 Subject: drm/i915: Use correct fw_domains during reset In the next patch we will begin to sanity check that we do not attempt to obtain the forcewake on an unsupport domain. However, that is exactly what we do during reset of the fw_domains - rectify it before it explodes. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170323101944.21627-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_uncore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 6e415e8a3aa3..4ad5751a3faf 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -308,7 +308,7 @@ static void intel_uncore_forcewake_reset(struct drm_i915_private *dev_priv, if (fw) dev_priv->uncore.funcs.force_wake_put(dev_priv, fw); - fw_domains_reset(dev_priv, FORCEWAKE_ALL); + fw_domains_reset(dev_priv, dev_priv->uncore.fw_domains); if (restore) { /* If reset with a user forcewake, try to restore */ if (fw) -- cgit v1.2.3 From d2dc94bce295e638a15e077cbaec410a74db86c6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Mar 2017 10:19:41 +0000 Subject: drm/i915: Skip unused fw_domains Use find-first-set bitop to quickly scan through the fw_domains mask and skip iterating over unused domains. v2: Move the WARN into the caller, to prevent compiler warnings in normal builds. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170323101944.21627-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 3 ++- drivers/gpu/drm/i915/i915_drv.h | 30 +++++++++++++------------- drivers/gpu/drm/i915/intel_uncore.c | 42 +++++++++++++++++++++++++------------ 3 files changed, 45 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 29bf11d8b620..fcac795d4396 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1461,9 +1461,10 @@ static int i915_forcewake_domains(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); struct intel_uncore_forcewake_domain *fw_domain; + unsigned int tmp; spin_lock_irq(&dev_priv->uncore.lock); - for_each_fw_domain(fw_domain, dev_priv) { + for_each_fw_domain(fw_domain, dev_priv, tmp) { seq_printf(m, "%s.wake_count = %u\n", intel_uncore_forcewake_domain_to_str(fw_domain->id), fw_domain->wake_count); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4c9de7d00eaf..589f91c4e585 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -703,9 +703,9 @@ enum forcewake_domain_id { }; enum forcewake_domains { - FORCEWAKE_RENDER = (1 << FW_DOMAIN_ID_RENDER), - FORCEWAKE_BLITTER = (1 << FW_DOMAIN_ID_BLITTER), - FORCEWAKE_MEDIA = (1 << FW_DOMAIN_ID_MEDIA), + FORCEWAKE_RENDER = BIT(FW_DOMAIN_ID_RENDER), + FORCEWAKE_BLITTER = BIT(FW_DOMAIN_ID_BLITTER), + FORCEWAKE_MEDIA = BIT(FW_DOMAIN_ID_MEDIA), FORCEWAKE_ALL = (FORCEWAKE_RENDER | FORCEWAKE_BLITTER | FORCEWAKE_MEDIA) @@ -790,15 +790,19 @@ struct intel_uncore { int unclaimed_mmio_check; }; +#define __mask_next_bit(mask) ({ \ + int __idx = ffs(mask) - 1; \ + mask &= ~BIT(__idx); \ + __idx; \ +}) + /* Iterate over initialised fw domains */ -#define for_each_fw_domain_masked(domain__, mask__, dev_priv__) \ - for ((domain__) = &(dev_priv__)->uncore.fw_domain[0]; \ - (domain__) < &(dev_priv__)->uncore.fw_domain[FW_DOMAIN_ID_COUNT]; \ - (domain__)++) \ - for_each_if ((mask__) & (domain__)->mask) +#define for_each_fw_domain_masked(domain__, mask__, dev_priv__, tmp__) \ + for (tmp__ = (mask__); \ + tmp__ ? (domain__ = &(dev_priv__)->uncore.fw_domain[__mask_next_bit(tmp__)]), 1 : 0;) -#define for_each_fw_domain(domain__, dev_priv__) \ - for_each_fw_domain_masked(domain__, FORCEWAKE_ALL, dev_priv__) +#define for_each_fw_domain(domain__, dev_priv__, tmp__) \ + for_each_fw_domain_masked(domain__, (dev_priv__)->uncore.fw_domains, dev_priv__, tmp__) #define CSR_VERSION(major, minor) ((major) << 16 | (minor)) #define CSR_VERSION_MAJOR(version) ((version) >> 16) @@ -2581,12 +2585,6 @@ static inline struct drm_i915_private *huc_to_i915(struct intel_huc *huc) (id__)++) \ for_each_if ((engine__) = (dev_priv__)->engine[(id__)]) -#define __mask_next_bit(mask) ({ \ - int __idx = ffs(mask) - 1; \ - mask &= ~BIT(__idx); \ - __idx; \ -}) - /* Iterator over subset of engines selected by mask */ #define for_each_engine_masked(engine__, dev_priv__, mask__, tmp__) \ for (tmp__ = mask__ & INTEL_INFO(dev_priv__)->ring_mask; \ diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 4ad5751a3faf..2e79ca129202 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -118,13 +118,16 @@ static void fw_domains_get(struct drm_i915_private *i915, enum forcewake_domains fw_domains) { struct intel_uncore_forcewake_domain *d; + unsigned int tmp; + + GEM_BUG_ON(fw_domains & ~i915->uncore.fw_domains); - for_each_fw_domain_masked(d, fw_domains, i915) { + for_each_fw_domain_masked(d, fw_domains, i915, tmp) { fw_domain_wait_ack_clear(i915, d); fw_domain_get(i915, d); } - for_each_fw_domain_masked(d, fw_domains, i915) + for_each_fw_domain_masked(d, fw_domains, i915, tmp) fw_domain_wait_ack(i915, d); i915->uncore.fw_domains_active |= fw_domains; @@ -134,8 +137,11 @@ static void fw_domains_put(struct drm_i915_private *i915, enum forcewake_domains fw_domains) { struct intel_uncore_forcewake_domain *d; + unsigned int tmp; + + GEM_BUG_ON(fw_domains & ~i915->uncore.fw_domains); - for_each_fw_domain_masked(d, fw_domains, i915) { + for_each_fw_domain_masked(d, fw_domains, i915, tmp) { fw_domain_put(i915, d); fw_domain_posting_read(i915, d); } @@ -147,9 +153,10 @@ static void fw_domains_posting_read(struct drm_i915_private *i915) { struct intel_uncore_forcewake_domain *d; + unsigned int tmp; /* No need to do for all, just do for first found */ - for_each_fw_domain(d, i915) { + for_each_fw_domain(d, i915, tmp) { fw_domain_posting_read(i915, d); break; } @@ -160,11 +167,14 @@ fw_domains_reset(struct drm_i915_private *i915, enum forcewake_domains fw_domains) { struct intel_uncore_forcewake_domain *d; + unsigned int tmp; - if (i915->uncore.fw_domains == 0) + if (!fw_domains) return; - for_each_fw_domain_masked(d, fw_domains, i915) + GEM_BUG_ON(fw_domains & ~i915->uncore.fw_domains); + + for_each_fw_domain_masked(d, fw_domains, i915, tmp) fw_domain_reset(i915, d); fw_domains_posting_read(i915); @@ -274,9 +284,11 @@ static void intel_uncore_forcewake_reset(struct drm_i915_private *dev_priv, * timers are run before holding. */ while (1) { + unsigned int tmp; + active_domains = 0; - for_each_fw_domain(domain, dev_priv) { + for_each_fw_domain(domain, dev_priv, tmp) { if (hrtimer_cancel(&domain->timer) == 0) continue; @@ -285,7 +297,7 @@ static void intel_uncore_forcewake_reset(struct drm_i915_private *dev_priv, spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); - for_each_fw_domain(domain, dev_priv) { + for_each_fw_domain(domain, dev_priv, tmp) { if (hrtimer_active(&domain->timer)) active_domains |= domain->mask; } @@ -465,13 +477,13 @@ static void __intel_uncore_forcewake_get(struct drm_i915_private *dev_priv, enum forcewake_domains fw_domains) { struct intel_uncore_forcewake_domain *domain; + unsigned int tmp; fw_domains &= dev_priv->uncore.fw_domains; - for_each_fw_domain_masked(domain, fw_domains, dev_priv) { + for_each_fw_domain_masked(domain, fw_domains, dev_priv, tmp) if (domain->wake_count++) fw_domains &= ~domain->mask; - } if (fw_domains) dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains); @@ -528,10 +540,11 @@ static void __intel_uncore_forcewake_put(struct drm_i915_private *dev_priv, enum forcewake_domains fw_domains) { struct intel_uncore_forcewake_domain *domain; + unsigned int tmp; fw_domains &= dev_priv->uncore.fw_domains; - for_each_fw_domain_masked(domain, fw_domains, dev_priv) { + for_each_fw_domain_masked(domain, fw_domains, dev_priv, tmp) { if (WARN_ON(domain->wake_count == 0)) continue; @@ -936,8 +949,11 @@ static noinline void ___force_wake_auto(struct drm_i915_private *dev_priv, enum forcewake_domains fw_domains) { struct intel_uncore_forcewake_domain *domain; + unsigned int tmp; + + GEM_BUG_ON(fw_domains & ~dev_priv->uncore.fw_domains); - for_each_fw_domain_masked(domain, fw_domains, dev_priv) + for_each_fw_domain_masked(domain, fw_domains, dev_priv, tmp) fw_domain_arm_timer(domain); dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains); @@ -1175,7 +1191,7 @@ static void fw_domain_init(struct drm_i915_private *dev_priv, BUILD_BUG_ON(FORCEWAKE_BLITTER != (1 << FW_DOMAIN_ID_BLITTER)); BUILD_BUG_ON(FORCEWAKE_MEDIA != (1 << FW_DOMAIN_ID_MEDIA)); - d->mask = 1 << domain_id; + d->mask = BIT(domain_id); hrtimer_init(&d->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); d->timer.function = intel_uncore_fw_release_timer; -- cgit v1.2.3 From 0f966aaf5f8485576c56bab5a1ab03e0da445e3b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Mar 2017 10:19:42 +0000 Subject: drm/i915: Remove posting-read for forcewake put We can relax the requirement upon ourselves that the forcewake is released immediately and just allow it to occur naturally following our mmio request. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170323101944.21627-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 1 - drivers/gpu/drm/i915/intel_uncore.c | 33 +-------------------------------- 2 files changed, 1 insertion(+), 33 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 589f91c4e585..bb0e89d6255c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -783,7 +783,6 @@ struct intel_uncore { u32 val_set; u32 val_clear; i915_reg_t reg_ack; - i915_reg_t reg_post; u32 val_reset; } fw_domain[FW_DOMAIN_ID_COUNT]; diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 2e79ca129202..5464c0418bab 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -105,15 +105,6 @@ fw_domain_put(struct drm_i915_private *i915, __raw_i915_write32(i915, d->reg_set, d->val_clear); } -static inline void -fw_domain_posting_read(struct drm_i915_private *i915, - const struct intel_uncore_forcewake_domain *d) -{ - /* something from same cacheline, but not from the set register */ - if (i915_mmio_reg_valid(d->reg_post)) - __raw_posting_read(i915, d->reg_post); -} - static void fw_domains_get(struct drm_i915_private *i915, enum forcewake_domains fw_domains) { @@ -141,27 +132,12 @@ fw_domains_put(struct drm_i915_private *i915, enum forcewake_domains fw_domains) GEM_BUG_ON(fw_domains & ~i915->uncore.fw_domains); - for_each_fw_domain_masked(d, fw_domains, i915, tmp) { + for_each_fw_domain_masked(d, fw_domains, i915, tmp) fw_domain_put(i915, d); - fw_domain_posting_read(i915, d); - } i915->uncore.fw_domains_active &= ~fw_domains; } -static void -fw_domains_posting_read(struct drm_i915_private *i915) -{ - struct intel_uncore_forcewake_domain *d; - unsigned int tmp; - - /* No need to do for all, just do for first found */ - for_each_fw_domain(d, i915, tmp) { - fw_domain_posting_read(i915, d); - break; - } -} - static void fw_domains_reset(struct drm_i915_private *i915, enum forcewake_domains fw_domains) @@ -176,8 +152,6 @@ fw_domains_reset(struct drm_i915_private *i915, for_each_fw_domain_masked(d, fw_domains, i915, tmp) fw_domain_reset(i915, d); - - fw_domains_posting_read(i915); } static void __gen6_gt_wait_for_thread_c0(struct drm_i915_private *dev_priv) @@ -1180,11 +1154,6 @@ static void fw_domain_init(struct drm_i915_private *dev_priv, d->val_clear = _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL); } - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - d->reg_post = FORCEWAKE_ACK_VLV; - else if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv) || IS_GEN8(dev_priv)) - d->reg_post = ECOBUS; - d->id = domain_id; BUILD_BUG_ON(FORCEWAKE_RENDER != (1 << FW_DOMAIN_ID_RENDER)); -- cgit v1.2.3 From 6e3955a5af1c3a0b7d9a50be327a71e9065c64ae Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Mar 2017 10:19:43 +0000 Subject: drm/i915: All fw_domains share the same set/clear/reset values Since we reuse the same values for each fw_domain, move them onto uncore. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170323101944.21627-6-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 11 +++++----- drivers/gpu/drm/i915/intel_uncore.c | 40 +++++++++++++++++++------------------ 2 files changed, 27 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index bb0e89d6255c..2911c49113b0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -774,16 +774,17 @@ struct intel_uncore { enum forcewake_domains fw_domains; enum forcewake_domains fw_domains_active; + u32 fw_set; + u32 fw_clear; + u32 fw_reset; + struct intel_uncore_forcewake_domain { enum forcewake_domain_id id; enum forcewake_domains mask; unsigned wake_count; struct hrtimer timer; i915_reg_t reg_set; - u32 val_set; - u32 val_clear; i915_reg_t reg_ack; - u32 val_reset; } fw_domain[FW_DOMAIN_ID_COUNT]; int unclaimed_mmio_check; @@ -3956,14 +3957,14 @@ u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv, #define POSTING_READ16(reg) (void)I915_READ16_NOTRACE(reg) #define __raw_read(x, s) \ -static inline uint##x##_t __raw_i915_read##x(struct drm_i915_private *dev_priv, \ +static inline uint##x##_t __raw_i915_read##x(const struct drm_i915_private *dev_priv, \ i915_reg_t reg) \ { \ return read##s(dev_priv->regs + i915_mmio_reg_offset(reg)); \ } #define __raw_write(x, s) \ -static inline void __raw_i915_write##x(struct drm_i915_private *dev_priv, \ +static inline void __raw_i915_write##x(const struct drm_i915_private *dev_priv, \ i915_reg_t reg, uint##x##_t val) \ { \ write##s(val, dev_priv->regs + i915_mmio_reg_offset(reg)); \ diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 5464c0418bab..6d1ea26b2493 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -55,8 +55,7 @@ static inline void fw_domain_reset(struct drm_i915_private *i915, const struct intel_uncore_forcewake_domain *d) { - WARN_ON(!i915_mmio_reg_valid(d->reg_set)); - __raw_i915_write32(i915, d->reg_set, d->val_reset); + __raw_i915_write32(i915, d->reg_set, i915->uncore.fw_reset); } static inline void @@ -70,7 +69,7 @@ fw_domain_arm_timer(struct intel_uncore_forcewake_domain *d) } static inline void -fw_domain_wait_ack_clear(struct drm_i915_private *i915, +fw_domain_wait_ack_clear(const struct drm_i915_private *i915, const struct intel_uncore_forcewake_domain *d) { if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & @@ -84,11 +83,11 @@ static inline void fw_domain_get(struct drm_i915_private *i915, const struct intel_uncore_forcewake_domain *d) { - __raw_i915_write32(i915, d->reg_set, d->val_set); + __raw_i915_write32(i915, d->reg_set, i915->uncore.fw_set); } static inline void -fw_domain_wait_ack(struct drm_i915_private *i915, +fw_domain_wait_ack(const struct drm_i915_private *i915, const struct intel_uncore_forcewake_domain *d) { if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & @@ -99,10 +98,10 @@ fw_domain_wait_ack(struct drm_i915_private *i915, } static inline void -fw_domain_put(struct drm_i915_private *i915, +fw_domain_put(const struct drm_i915_private *i915, const struct intel_uncore_forcewake_domain *d) { - __raw_i915_write32(i915, d->reg_set, d->val_clear); + __raw_i915_write32(i915, d->reg_set, i915->uncore.fw_clear); } static void @@ -1139,21 +1138,13 @@ static void fw_domain_init(struct drm_i915_private *dev_priv, WARN_ON(d->wake_count); + WARN_ON(!i915_mmio_reg_valid(reg_set)); + WARN_ON(!i915_mmio_reg_valid(reg_ack)); + d->wake_count = 0; d->reg_set = reg_set; d->reg_ack = reg_ack; - if (IS_GEN6(dev_priv)) { - d->val_reset = 0; - d->val_set = FORCEWAKE_KERNEL; - d->val_clear = 0; - } else { - /* WaRsClearFWBitsAtReset:bdw,skl */ - d->val_reset = _MASKED_BIT_DISABLE(0xffff); - d->val_set = _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL); - d->val_clear = _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL); - } - d->id = domain_id; BUILD_BUG_ON(FORCEWAKE_RENDER != (1 << FW_DOMAIN_ID_RENDER)); @@ -1165,7 +1156,7 @@ static void fw_domain_init(struct drm_i915_private *dev_priv, hrtimer_init(&d->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); d->timer.function = intel_uncore_fw_release_timer; - dev_priv->uncore.fw_domains |= (1 << domain_id); + dev_priv->uncore.fw_domains |= BIT(domain_id); fw_domain_reset(dev_priv, d); } @@ -1175,6 +1166,17 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) <= 5 || intel_vgpu_active(dev_priv)) return; + if (IS_GEN6(dev_priv)) { + dev_priv->uncore.fw_reset = 0; + dev_priv->uncore.fw_set = FORCEWAKE_KERNEL; + dev_priv->uncore.fw_clear = 0; + } else { + /* WaRsClearFWBitsAtReset:bdw,skl */ + dev_priv->uncore.fw_reset = _MASKED_BIT_DISABLE(0xffff); + dev_priv->uncore.fw_set = _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL); + dev_priv->uncore.fw_clear = _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL); + } + if (IS_GEN9(dev_priv)) { dev_priv->uncore.funcs.force_wake_get = fw_domains_get; dev_priv->uncore.funcs.force_wake_put = fw_domains_put; -- cgit v1.2.3 From 233ebf5733d51cb7581b8d330799520d69484cc4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Mar 2017 10:19:44 +0000 Subject: drm/i915: Drop uncore spinlock for reading debugfs forcewake counters The set of available structs is not protected by the spinlock, and for the single read we can use READ_ONCE instead. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170323101944.21627-7-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index fcac795d4396..f30591f44d3a 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1459,17 +1459,14 @@ static int ironlake_drpc_info(struct seq_file *m) static int i915_forcewake_domains(struct seq_file *m, void *data) { - struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_i915_private *i915 = node_to_i915(m->private); struct intel_uncore_forcewake_domain *fw_domain; unsigned int tmp; - spin_lock_irq(&dev_priv->uncore.lock); - for_each_fw_domain(fw_domain, dev_priv, tmp) { + for_each_fw_domain(fw_domain, i915, tmp) seq_printf(m, "%s.wake_count = %u\n", intel_uncore_forcewake_domain_to_str(fw_domain->id), - fw_domain->wake_count); - } - spin_unlock_irq(&dev_priv->uncore.lock); + READ_ONCE(fw_domain->wake_count)); return 0; } -- cgit v1.2.3 From d223760f38b13f3c0a14978b6c4261b9bd690494 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Mar 2017 08:57:58 +0000 Subject: drm/i915: Wait for all fences before installing an exclusive clflush fence Ensure that before we overwrite the reservation_object with our exclusive fence for the pending clflush operation, that we do wait upon all the fences in the current reservation_object. Fixes: 57822dc6b9cf ("drm/i915: Perform object clflushing asynchronously") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170323085758.11695-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem_clflush.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c index d925fb582ba7..ffd01e02fe94 100644 --- a/drivers/gpu/drm/i915/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/i915_gem_clflush.c @@ -168,7 +168,7 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj, i915_sw_fence_await_reservation(&clflush->wait, obj->resv, NULL, - false, I915_FENCE_TIMEOUT, + true, I915_FENCE_TIMEOUT, GFP_KERNEL); reservation_object_lock(obj->resv, NULL); -- cgit v1.2.3 From abddffdf3620e3974061fa0715a1db2142b6b893 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Wed, 22 Mar 2017 10:39:44 -0700 Subject: drm/i915/guc: Sanitize GuC client initialization Started adding proper teardown to guc_client_alloc, ended up removing quite a few dead ends where errors communicating with the GuC were silently ignored. There also seemed to be quite a few erronous teardown actions performed in case of an error (ordering wrong). v2: - Increase function symmetry/proximity (Michal/Daniele) - Fix __reserve_doorbell accounting for high priority (Daniele) - Call __update_doorbell_desc! (Daniele) - Isolate __guc_{,de}allocate_doorbell (Michal/Daniele) v3: - "Select" a cacheline is a more accurate verb than "reserve" (Daniele). - We cannot update & create the doorbell without reserving it first, so move the whole doorbell creation for execbuf_client to the submission enable (Oscar).i - Add a fixme for ignoring possible doorbell destroy errors. v4: - Remove comment about is_high_priority (Daniele) - Debug message typo (Daniele) - Reuse __get_doorbell in more places (Daniele) - Do not do arithmetic on void pointers (Daniele) - Add comment to __reset_doorbell (Daniele) v5: - gccisms like arithmetic on void pointers are not frowned upon (Oscar) Signed-off-by: Joonas Lahtinen Cc: Michal Wajdeczko Cc: Arkadiusz Hiler Cc: Tvrtko Ursulin Cc: Chris Wilson Signed-off-by: Oscar Mateo Reviewed-by: Daniele Ceraolo Spurio --- drivers/gpu/drm/i915/i915_debugfs.c | 4 +- drivers/gpu/drm/i915/i915_guc_submission.c | 398 ++++++++++++++++------------- drivers/gpu/drm/i915/intel_guc_fwif.h | 4 +- drivers/gpu/drm/i915/intel_uc.h | 11 +- 4 files changed, 233 insertions(+), 184 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index f30591f44d3a..e2390df22e08 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2473,7 +2473,7 @@ static void i915_guc_client_info(struct seq_file *m, seq_printf(m, "\tPriority %d, GuC ctx index: %u, PD offset 0x%x\n", client->priority, client->ctx_index, client->proc_desc_offset); - seq_printf(m, "\tDoorbell id %d, offset: 0x%x, cookie 0x%x\n", + seq_printf(m, "\tDoorbell id %d, offset: 0x%lx, cookie 0x%x\n", client->doorbell_id, client->doorbell_offset, client->doorbell_cookie); seq_printf(m, "\tWQ size %d, offset: 0x%x, tail %d\n", client->wq_size, client->wq_offset, client->wq_tail); @@ -2508,7 +2508,7 @@ static int i915_guc_info(struct seq_file *m, void *data) } seq_printf(m, "Doorbell map:\n"); - seq_printf(m, "\t%*pb\n", GUC_MAX_DOORBELLS, guc->doorbell_bitmap); + seq_printf(m, "\t%*pb\n", GUC_NUM_DOORBELLS, guc->doorbell_bitmap); seq_printf(m, "Doorbell next cacheline: 0x%x\n\n", guc->db_cacheline); seq_printf(m, "GuC total action count: %llu\n", guc->action_count); diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 055467a53dc3..be88bbebd155 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -64,27 +64,70 @@ * */ +static inline bool is_high_priority(struct i915_guc_client* client) +{ + return client->priority <= GUC_CTX_PRIORITY_HIGH; +} + +static int __reserve_doorbell(struct i915_guc_client *client) +{ + unsigned long offset; + unsigned long end; + u16 id; + + GEM_BUG_ON(client->doorbell_id != GUC_DOORBELL_INVALID); + + /* + * The bitmap tracks which doorbell registers are currently in use. + * It is split into two halves; the first half is used for normal + * priority contexts, the second half for high-priority ones. + */ + offset = 0; + end = GUC_NUM_DOORBELLS/2; + if (is_high_priority(client)) { + offset = end; + end += offset; + } + + id = find_next_zero_bit(client->guc->doorbell_bitmap, offset, end); + if (id == end) + return -ENOSPC; + + __set_bit(id, client->guc->doorbell_bitmap); + client->doorbell_id = id; + DRM_DEBUG_DRIVER("client %u (high prio=%s) reserved doorbell: %d\n", + client->ctx_index, yesno(is_high_priority(client)), + id); + return 0; +} + +static void __unreserve_doorbell(struct i915_guc_client *client) +{ + GEM_BUG_ON(client->doorbell_id == GUC_DOORBELL_INVALID); + + __clear_bit(client->doorbell_id, client->guc->doorbell_bitmap); + client->doorbell_id = GUC_DOORBELL_INVALID; +} + /* * Tell the GuC to allocate or deallocate a specific doorbell */ -static int guc_allocate_doorbell(struct intel_guc *guc, - struct i915_guc_client *client) +static int __guc_allocate_doorbell(struct intel_guc *guc, u32 ctx_index) { u32 action[] = { INTEL_GUC_ACTION_ALLOCATE_DOORBELL, - client->ctx_index + ctx_index }; return intel_guc_send(guc, action, ARRAY_SIZE(action)); } -static int guc_release_doorbell(struct intel_guc *guc, - struct i915_guc_client *client) +static int __guc_deallocate_doorbell(struct intel_guc *guc, u32 ctx_index) { u32 action[] = { INTEL_GUC_ACTION_DEALLOCATE_DOORBELL, - client->ctx_index + ctx_index }; return intel_guc_send(guc, action, ARRAY_SIZE(action)); @@ -97,104 +140,100 @@ static int guc_release_doorbell(struct intel_guc *guc, * client object which contains the page being used for the doorbell */ -static int guc_update_doorbell_id(struct intel_guc *guc, - struct i915_guc_client *client, - u16 new_id) +static int __update_doorbell_desc(struct i915_guc_client *client, u16 new_id) { - struct sg_table *sg = guc->ctx_pool_vma->pages; - void *doorbell_bitmap = guc->doorbell_bitmap; - struct guc_doorbell_info *doorbell; + struct sg_table *sg = client->guc->ctx_pool_vma->pages; struct guc_context_desc desc; size_t len; - doorbell = client->vaddr + client->doorbell_offset; - - if (client->doorbell_id != GUC_INVALID_DOORBELL_ID && - test_bit(client->doorbell_id, doorbell_bitmap)) { - /* Deactivate the old doorbell */ - doorbell->db_status = GUC_DOORBELL_DISABLED; - (void)guc_release_doorbell(guc, client); - __clear_bit(client->doorbell_id, doorbell_bitmap); - } - /* Update the GuC's idea of the doorbell ID */ len = sg_pcopy_to_buffer(sg->sgl, sg->nents, &desc, sizeof(desc), - sizeof(desc) * client->ctx_index); + sizeof(desc) * client->ctx_index); if (len != sizeof(desc)) return -EFAULT; + desc.db_id = new_id; len = sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc), - sizeof(desc) * client->ctx_index); + sizeof(desc) * client->ctx_index); if (len != sizeof(desc)) return -EFAULT; - client->doorbell_id = new_id; - if (new_id == GUC_INVALID_DOORBELL_ID) - return 0; + return 0; +} - /* Activate the new doorbell */ - __set_bit(new_id, doorbell_bitmap); +static struct guc_doorbell_info *__get_doorbell(struct i915_guc_client *client) +{ + return client->vaddr + client->doorbell_offset; +} + +static bool has_doorbell(struct i915_guc_client *client) +{ + if (client->doorbell_id == GUC_DOORBELL_INVALID) + return false; + + return test_bit(client->doorbell_id, client->guc->doorbell_bitmap); +} + +static int __create_doorbell(struct i915_guc_client *client) +{ + struct guc_doorbell_info *doorbell; + int err; + + doorbell = __get_doorbell(client); doorbell->db_status = GUC_DOORBELL_ENABLED; doorbell->cookie = client->doorbell_cookie; - return guc_allocate_doorbell(guc, client); + + err = __guc_allocate_doorbell(client->guc, client->ctx_index); + if (err) { + doorbell->db_status = GUC_DOORBELL_DISABLED; + doorbell->cookie = 0; + } + return err; } -static void guc_disable_doorbell(struct intel_guc *guc, - struct i915_guc_client *client) +static int __destroy_doorbell(struct i915_guc_client *client) { - (void)guc_update_doorbell_id(guc, client, GUC_INVALID_DOORBELL_ID); + struct guc_doorbell_info *doorbell; - /* XXX: wait for any interrupts */ - /* XXX: wait for workqueue to drain */ + doorbell = __get_doorbell(client); + doorbell->db_status = GUC_DOORBELL_DISABLED; + doorbell->cookie = 0; + + return __guc_deallocate_doorbell(client->guc, client->ctx_index); } -static uint16_t -select_doorbell_register(struct intel_guc *guc, uint32_t priority) +static int destroy_doorbell(struct i915_guc_client *client) { - /* - * The bitmap tracks which doorbell registers are currently in use. - * It is split into two halves; the first half is used for normal - * priority contexts, the second half for high-priority ones. - * Note that logically higher priorities are numerically less than - * normal ones, so the test below means "is it high-priority?" - */ - const bool hi_pri = (priority <= GUC_CTX_PRIORITY_HIGH); - const uint16_t half = GUC_MAX_DOORBELLS / 2; - const uint16_t start = hi_pri ? half : 0; - const uint16_t end = start + half; - uint16_t id; + int err; - id = find_next_zero_bit(guc->doorbell_bitmap, end, start); - if (id == end) - id = GUC_INVALID_DOORBELL_ID; + GEM_BUG_ON(!has_doorbell(client)); + + /* XXX: wait for any interrupts */ + /* XXX: wait for workqueue to drain */ - DRM_DEBUG_DRIVER("assigned %s priority doorbell id 0x%x\n", - hi_pri ? "high" : "normal", id); + err = __destroy_doorbell(client); + if (err) + return err; - return id; -} + __update_doorbell_desc(client, GUC_DOORBELL_INVALID); -/* - * Select, assign and relase doorbell cachelines - * - * These functions track which doorbell cachelines are in use. - * The data they manipulate is protected by the intel_guc_send lock. - */ + __unreserve_doorbell(client); + + return 0; +} -static uint32_t select_doorbell_cacheline(struct intel_guc *guc) +static unsigned long __select_cacheline(struct intel_guc* guc) { - const uint32_t cacheline_size = cache_line_size(); - uint32_t offset; + unsigned long offset; /* Doorbell uses a single cache line within a page */ offset = offset_in_page(guc->db_cacheline); /* Moving to next cache line to reduce contention */ - guc->db_cacheline += cacheline_size; - - DRM_DEBUG_DRIVER("selected doorbell cacheline 0x%x, next 0x%x, linesize %u\n", - offset, guc->db_cacheline, cacheline_size); + guc->db_cacheline += cache_line_size(); + DRM_DEBUG_DRIVER("reserved cacheline 0x%lx, next 0x%x, linesize %u\n", + offset, guc->db_cacheline, cache_line_size()); return offset; } @@ -293,8 +332,7 @@ static void guc_ctx_desc_init(struct intel_guc *guc, gfx_addr = guc_ggtt_offset(client->vma); desc.db_trigger_phy = sg_dma_address(client->vma->pages->sgl) + client->doorbell_offset; - desc.db_trigger_cpu = - (uintptr_t)client->vaddr + client->doorbell_offset; + desc.db_trigger_cpu = (uintptr_t)__get_doorbell(client); desc.db_trigger_uk = gfx_addr + client->doorbell_offset; desc.process_desc = gfx_addr + client->proc_desc_offset; desc.wq_addr = gfx_addr + client->wq_offset; @@ -463,7 +501,7 @@ static int guc_ring_doorbell(struct i915_guc_client *client) db_exc.cookie = 1; /* pointer of current doorbell cacheline */ - db = client->vaddr + client->doorbell_offset; + db = (union guc_doorbell_qw *)__get_doorbell(client); while (attempt--) { /* lets ring the doorbell */ @@ -694,93 +732,101 @@ err: return vma; } -static void -guc_client_free(struct drm_i915_private *dev_priv, - struct i915_guc_client *client) +static void guc_client_free(struct i915_guc_client *client) { - struct intel_guc *guc = &dev_priv->guc; - - if (!client) - return; - /* * XXX: wait for any outstanding submissions before freeing memory. * Be sure to drop any locks */ - - if (client->vaddr) { - /* - * If we got as far as setting up a doorbell, make sure we - * shut it down before unmapping & deallocating the memory. - */ - guc_disable_doorbell(guc, client); - - i915_gem_object_unpin_map(client->vma->obj); - } - + guc_ctx_desc_fini(client->guc, client); + i915_gem_object_unpin_map(client->vma->obj); i915_vma_unpin_and_release(&client->vma); - - if (client->ctx_index != GUC_INVALID_CTX_ID) { - guc_ctx_desc_fini(guc, client); - ida_simple_remove(&guc->ctx_ids, client->ctx_index); - } - + ida_simple_remove(&client->guc->ctx_ids, client->ctx_index); kfree(client); } /* Check that a doorbell register is in the expected state */ -static bool guc_doorbell_check(struct intel_guc *guc, uint16_t db_id) +static bool doorbell_ok(struct intel_guc *guc, u16 db_id) { struct drm_i915_private *dev_priv = guc_to_i915(guc); - i915_reg_t drbreg = GEN8_DRBREGL(db_id); - uint32_t value = I915_READ(drbreg); - bool enabled = (value & GUC_DOORBELL_ENABLED) != 0; - bool expected = test_bit(db_id, guc->doorbell_bitmap); + u32 drbregl; + bool valid; + + GEM_BUG_ON(db_id >= GUC_DOORBELL_INVALID); + + drbregl = I915_READ(GEN8_DRBREGL(db_id)); + valid = drbregl & GEN8_DRB_VALID; - if (enabled == expected) + if (test_bit(db_id, guc->doorbell_bitmap) == valid) return true; - DRM_DEBUG_DRIVER("Doorbell %d (reg 0x%x) 0x%x, should be %s\n", - db_id, drbreg.reg, value, - expected ? "active" : "inactive"); + DRM_DEBUG_DRIVER("Doorbell %d has unexpected state (0x%x): valid=%s\n", + db_id, drbregl, yesno(valid)); return false; } +/* + * If the GuC thinks that the doorbell is unassigned (e.g. because we reset and + * reloaded the GuC FW) we can use this function to tell the GuC to reassign the + * doorbell to the rightful owner. + */ +static int __reset_doorbell(struct i915_guc_client* client, u16 db_id) +{ + int err; + + err = __update_doorbell_desc(client, db_id); + if (!err) + err = __create_doorbell(client); + if (!err) + err = __destroy_doorbell(client); + + return err; +} + /* * Borrow the first client to set up & tear down each unused doorbell * in turn, to ensure that all doorbell h/w is (re)initialised. */ -static void guc_init_doorbell_hw(struct intel_guc *guc) +static int guc_init_doorbell_hw(struct intel_guc *guc) { struct i915_guc_client *client = guc->execbuf_client; - uint16_t db_id; - int i, err; + int err; + int i; - guc_disable_doorbell(guc, client); + if (has_doorbell(client)) + destroy_doorbell(client); - for (i = 0; i < GUC_MAX_DOORBELLS; ++i) { - /* Skip if doorbell is OK */ - if (guc_doorbell_check(guc, i)) + for (i = 0; i < GUC_NUM_DOORBELLS; ++i) { + if (doorbell_ok(guc, i)) continue; - err = guc_update_doorbell_id(guc, client, i); - if (err) - DRM_DEBUG_DRIVER("Doorbell %d update failed, err %d\n", - i, err); + err = __reset_doorbell(client, i); + WARN(err, "Doorbell %d reset failed, err %d\n", i, err); } - db_id = select_doorbell_register(guc, client->priority); - WARN_ON(db_id == GUC_INVALID_DOORBELL_ID); + /* Read back & verify all doorbell registers */ + for (i = 0; i < GUC_NUM_DOORBELLS; ++i) + WARN_ON(!doorbell_ok(guc, i)); - err = guc_update_doorbell_id(guc, client, db_id); + err = __reserve_doorbell(client); if (err) - DRM_WARN("Failed to restore doorbell to %d, err %d\n", - db_id, err); + return err; - /* Read back & verify all doorbell registers */ - for (i = 0; i < GUC_MAX_DOORBELLS; ++i) - (void)guc_doorbell_check(guc, i); + err = __update_doorbell_desc(client, client->doorbell_id); + if (err) + goto err_reserve; + + err = __create_doorbell(client); + if (err) + goto err_update; + + return 0; +err_reserve: + __unreserve_doorbell(client); +err_update: + __update_doorbell_desc(client, GUC_DOORBELL_INVALID); + return err; } /** @@ -806,49 +852,46 @@ guc_client_alloc(struct drm_i915_private *dev_priv, struct intel_guc *guc = &dev_priv->guc; struct i915_vma *vma; void *vaddr; - uint16_t db_id; + int ret; client = kzalloc(sizeof(*client), GFP_KERNEL); if (!client) - return NULL; + return ERR_PTR(-ENOMEM); - client->owner = ctx; client->guc = guc; + client->owner = ctx; client->engines = engines; client->priority = priority; - client->doorbell_id = GUC_INVALID_DOORBELL_ID; + client->doorbell_id = GUC_DOORBELL_INVALID; + client->wq_offset = GUC_DB_SIZE; + client->wq_size = GUC_WQ_SIZE; + spin_lock_init(&client->wq_lock); - client->ctx_index = (uint32_t)ida_simple_get(&guc->ctx_ids, 0, - GUC_MAX_GPU_CONTEXTS, GFP_KERNEL); - if (client->ctx_index >= GUC_MAX_GPU_CONTEXTS) { - client->ctx_index = GUC_INVALID_CTX_ID; - goto err; - } + ret = ida_simple_get(&guc->ctx_ids, 0, GUC_MAX_GPU_CONTEXTS, + GFP_KERNEL); + if (ret < 0) + goto err_client; + + client->ctx_index = ret; /* The first page is doorbell/proc_desc. Two followed pages are wq. */ vma = intel_guc_allocate_vma(guc, GUC_DB_SIZE + GUC_WQ_SIZE); - if (IS_ERR(vma)) - goto err; + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto err_id; + } /* We'll keep just the first (doorbell/proc) page permanently kmap'd. */ client->vma = vma; vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); - if (IS_ERR(vaddr)) - goto err; - + if (IS_ERR(vaddr)) { + ret = PTR_ERR(vaddr); + goto err_vma; + } client->vaddr = vaddr; - spin_lock_init(&client->wq_lock); - client->wq_offset = GUC_DB_SIZE; - client->wq_size = GUC_WQ_SIZE; - - db_id = select_doorbell_register(guc, client->priority); - if (db_id == GUC_INVALID_DOORBELL_ID) - /* XXX: evict a doorbell instead? */ - goto err; - - client->doorbell_offset = select_doorbell_cacheline(guc); + client->doorbell_offset = __select_cacheline(guc); /* * Since the doorbell only requires a single cacheline, we can save @@ -863,27 +906,28 @@ guc_client_alloc(struct drm_i915_private *dev_priv, guc_proc_desc_init(guc, client); guc_ctx_desc_init(guc, client); - /* For runtime client allocation we need to enable the doorbell. Not - * required yet for the static execbuf_client as this special kernel - * client is enabled from i915_guc_submission_enable(). - * - * guc_update_doorbell_id(guc, client, db_id); - */ + /* FIXME: Runtime client allocation (which currently we don't do) will + * require that the doorbell gets created now. The static execbuf_client + * is now getting its doorbell later (on submission enable) but maybe we + * also want to reorder things in the future so that we don't have to + * special case the doorbell creation */ DRM_DEBUG_DRIVER("new priority %u client %p for engine(s) 0x%x: ctx_index %u\n", - priority, client, client->engines, client->ctx_index); - DRM_DEBUG_DRIVER("doorbell id %u, cacheline offset 0x%x\n", - client->doorbell_id, client->doorbell_offset); + priority, client, client->engines, client->ctx_index); + DRM_DEBUG_DRIVER("doorbell id %u, cacheline offset 0x%lx\n", + client->doorbell_id, client->doorbell_offset); return client; +err_vma: + i915_vma_unpin_and_release(&client->vma); +err_id: + ida_simple_remove(&guc->ctx_ids, client->ctx_index); +err_client: + kfree(client); -err: - guc_client_free(dev_priv, client); - return NULL; + return ERR_PTR(ret); } - - static void guc_policies_init(struct guc_policies *policies) { struct guc_policy *policy; @@ -984,7 +1028,7 @@ int i915_guc_submission_init(struct drm_i915_private *dev_priv) return 0; /* Wipe bitmap & delete client in case of reinitialisation */ - bitmap_clear(guc->doorbell_bitmap, 0, GUC_MAX_DOORBELLS); + bitmap_clear(guc->doorbell_bitmap, 0, GUC_NUM_DOORBELLS); i915_guc_submission_disable(dev_priv); if (!i915.enable_guc_submission) @@ -1006,7 +1050,7 @@ int i915_guc_submission_init(struct drm_i915_private *dev_priv) INTEL_INFO(dev_priv)->ring_mask, GUC_CTX_PRIORITY_KMD_NORMAL, dev_priv->kernel_context); - if (!guc->execbuf_client) { + if (IS_ERR(guc->execbuf_client)) { DRM_ERROR("Failed to create GuC client for execbuf!\n"); goto err; } @@ -1077,14 +1121,19 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) struct i915_guc_client *client = guc->execbuf_client; struct intel_engine_cs *engine; enum intel_engine_id id; + int err; if (!client) return -ENODEV; - intel_guc_sample_forcewake(guc); + err = intel_guc_sample_forcewake(guc); + if (err) + return err; guc_reset_wq(client); - guc_init_doorbell_hw(guc); + err = guc_init_doorbell_hw(guc); + if (err) + return err; /* Take over from manual control of ELSP (execlists) */ guc_interrupts_capture(dev_priv); @@ -1146,6 +1195,11 @@ void i915_guc_submission_disable(struct drm_i915_private *dev_priv) if (!guc->execbuf_client) return; + /* FIXME: in many cases, by the time we get here the GuC has been + * reset, so we cannot destroy the doorbell properly. Ignore the + * error message for now */ + destroy_doorbell(guc->execbuf_client); + /* Revert back to manual ELSP submission */ intel_engines_reset_default_submission(dev_priv); } @@ -1156,10 +1210,8 @@ void i915_guc_submission_fini(struct drm_i915_private *dev_priv) struct i915_guc_client *client; client = fetch_and_zero(&guc->execbuf_client); - if (!client) - return; - - guc_client_free(dev_priv, client); + if (client && !IS_ERR(client)) + guc_client_free(client); i915_vma_unpin_and_release(&guc->ads_vma); i915_vma_unpin_and_release(&guc->log.vma); @@ -1221,5 +1273,3 @@ int intel_guc_resume(struct drm_i915_private *dev_priv) return intel_guc_send(guc, data, ARRAY_SIZE(data)); } - - diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h index 25691f0e4c50..3ae8cefa47b3 100644 --- a/drivers/gpu/drm/i915/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/intel_guc_fwif.h @@ -241,8 +241,8 @@ union guc_doorbell_qw { u64 value_qw; } __packed; -#define GUC_MAX_DOORBELLS 256 -#define GUC_INVALID_DOORBELL_ID (GUC_MAX_DOORBELLS) +#define GUC_NUM_DOORBELLS 256 +#define GUC_DOORBELL_INVALID (GUC_NUM_DOORBELLS) #define GUC_DB_SIZE (PAGE_SIZE) #define GUC_WQ_SIZE (PAGE_SIZE * 2) diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index a35ededfaa40..c3a3843e702a 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -72,13 +72,12 @@ struct i915_guc_client { uint32_t engines; /* bitmap of (host) engine ids */ uint32_t priority; - uint32_t ctx_index; + u32 ctx_index; uint32_t proc_desc_offset; - uint32_t doorbell_offset; - uint32_t doorbell_cookie; - uint16_t doorbell_id; - uint16_t padding[3]; /* Maintain alignment */ + u16 doorbell_id; + unsigned long doorbell_offset; + u32 doorbell_cookie; spinlock_t wq_lock; uint32_t wq_offset; @@ -159,7 +158,7 @@ struct intel_guc { struct i915_guc_client *execbuf_client; - DECLARE_BITMAP(doorbell_bitmap, GUC_MAX_DOORBELLS); + DECLARE_BITMAP(doorbell_bitmap, GUC_NUM_DOORBELLS); uint32_t db_cacheline; /* Cyclic counter mod pagesize */ /* Action status & statistics */ -- cgit v1.2.3 From 73b055349c40ac721f4939263f173c2238524091 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Wed, 22 Mar 2017 10:39:45 -0700 Subject: drm/i915/guc: Keep the ctx_pool_vaddr mapped, for easy access The GuC descriptor is big in size. If we use a local definition of guc_desc we have a chance to overflow stack, so avoid it. Also, Chris abhors scatterlists :) v2: Rebased, helper function to retrieve the context descriptor, s/ctx_pool_vma/ctx_pool/ v3: Zero out guc_context_desc before initialization v4: Do not do arithmetic on void pointers (Daniele) v5: Nicer than arithmetic on pointers (Chris, Joonas) Signed-off-by: Oscar Mateo Cc: Daniele Ceraolo Spurio Reviewed-by: Chris Wilson Reviewed-by: Joonas Lahtinen Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_guc_submission.c | 94 +++++++++++++++--------------- drivers/gpu/drm/i915/intel_guc_loader.c | 2 +- drivers/gpu/drm/i915/intel_uc.h | 3 +- 3 files changed, 49 insertions(+), 50 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index be88bbebd155..9ba69276754b 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -133,6 +133,13 @@ static int __guc_deallocate_doorbell(struct intel_guc *guc, u32 ctx_index) return intel_guc_send(guc, action, ARRAY_SIZE(action)); } +static struct guc_context_desc *__get_context_desc(struct i915_guc_client *client) +{ + struct guc_context_desc *base = client->guc->ctx_pool_vaddr; + + return &base[client->ctx_index]; +} + /* * Initialise, update, or clear doorbell data shared with the GuC * @@ -142,21 +149,11 @@ static int __guc_deallocate_doorbell(struct intel_guc *guc, u32 ctx_index) static int __update_doorbell_desc(struct i915_guc_client *client, u16 new_id) { - struct sg_table *sg = client->guc->ctx_pool_vma->pages; - struct guc_context_desc desc; - size_t len; + struct guc_context_desc *desc; /* Update the GuC's idea of the doorbell ID */ - len = sg_pcopy_to_buffer(sg->sgl, sg->nents, &desc, sizeof(desc), - sizeof(desc) * client->ctx_index); - if (len != sizeof(desc)) - return -EFAULT; - - desc.db_id = new_id; - len = sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc), - sizeof(desc) * client->ctx_index); - if (len != sizeof(desc)) - return -EFAULT; + desc = __get_context_desc(client); + desc->db_id = new_id; return 0; } @@ -271,29 +268,28 @@ static void guc_proc_desc_init(struct intel_guc *guc, * data structures relating to this client (doorbell, process descriptor, * write queue, etc). */ - static void guc_ctx_desc_init(struct intel_guc *guc, struct i915_guc_client *client) { struct drm_i915_private *dev_priv = guc_to_i915(guc); struct intel_engine_cs *engine; struct i915_gem_context *ctx = client->owner; - struct guc_context_desc desc; - struct sg_table *sg; + struct guc_context_desc *desc; unsigned int tmp; u32 gfx_addr; - memset(&desc, 0, sizeof(desc)); + desc = __get_context_desc(client); + memset(desc, 0, sizeof(*desc)); - desc.attribute = GUC_CTX_DESC_ATTR_ACTIVE | GUC_CTX_DESC_ATTR_KERNEL; - desc.context_id = client->ctx_index; - desc.priority = client->priority; - desc.db_id = client->doorbell_id; + desc->attribute = GUC_CTX_DESC_ATTR_ACTIVE | GUC_CTX_DESC_ATTR_KERNEL; + desc->context_id = client->ctx_index; + desc->priority = client->priority; + desc->db_id = client->doorbell_id; for_each_engine_masked(engine, dev_priv, client->engines, tmp) { struct intel_context *ce = &ctx->engine[engine->id]; uint32_t guc_engine_id = engine->guc_id; - struct guc_execlist_context *lrc = &desc.lrc[guc_engine_id]; + struct guc_execlist_context *lrc = &desc->lrc[guc_engine_id]; /* TODO: We have a design issue to be solved here. Only when we * receive the first batch, we know which engine is used by the @@ -318,49 +314,40 @@ static void guc_ctx_desc_init(struct intel_guc *guc, lrc->ring_next_free_location = lrc->ring_begin; lrc->ring_current_tail_pointer_value = 0; - desc.engines_used |= (1 << guc_engine_id); + desc->engines_used |= (1 << guc_engine_id); } DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n", - client->engines, desc.engines_used); - WARN_ON(desc.engines_used == 0); + client->engines, desc->engines_used); + WARN_ON(desc->engines_used == 0); /* * The doorbell, process descriptor, and workqueue are all parts * of the client object, which the GuC will reference via the GGTT */ gfx_addr = guc_ggtt_offset(client->vma); - desc.db_trigger_phy = sg_dma_address(client->vma->pages->sgl) + + desc->db_trigger_phy = sg_dma_address(client->vma->pages->sgl) + client->doorbell_offset; - desc.db_trigger_cpu = (uintptr_t)__get_doorbell(client); - desc.db_trigger_uk = gfx_addr + client->doorbell_offset; - desc.process_desc = gfx_addr + client->proc_desc_offset; - desc.wq_addr = gfx_addr + client->wq_offset; - desc.wq_size = client->wq_size; + desc->db_trigger_cpu = (uintptr_t)__get_doorbell(client); + desc->db_trigger_uk = gfx_addr + client->doorbell_offset; + desc->process_desc = gfx_addr + client->proc_desc_offset; + desc->wq_addr = gfx_addr + client->wq_offset; + desc->wq_size = client->wq_size; /* * XXX: Take LRCs from an existing context if this is not an * IsKMDCreatedContext client */ - desc.desc_private = (uintptr_t)client; - - /* Pool context is pinned already */ - sg = guc->ctx_pool_vma->pages; - sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc), - sizeof(desc) * client->ctx_index); + desc->desc_private = (uintptr_t)client; } static void guc_ctx_desc_fini(struct intel_guc *guc, struct i915_guc_client *client) { - struct guc_context_desc desc; - struct sg_table *sg; + struct guc_context_desc *desc; - memset(&desc, 0, sizeof(desc)); - - sg = guc->ctx_pool_vma->pages; - sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc), - sizeof(desc) * client->ctx_index); + desc = __get_context_desc(client); + memset(desc, 0, sizeof(*desc)); } /** @@ -1023,6 +1010,7 @@ int i915_guc_submission_init(struct drm_i915_private *dev_priv) const size_t gemsize = round_up(poolsize, PAGE_SIZE); struct intel_guc *guc = &dev_priv->guc; struct i915_vma *vma; + void *vaddr; if (!HAS_GUC_SCHED(dev_priv)) return 0; @@ -1034,14 +1022,21 @@ int i915_guc_submission_init(struct drm_i915_private *dev_priv) if (!i915.enable_guc_submission) return 0; /* not enabled */ - if (guc->ctx_pool_vma) + if (guc->ctx_pool) return 0; /* already allocated */ vma = intel_guc_allocate_vma(guc, gemsize); if (IS_ERR(vma)) return PTR_ERR(vma); - guc->ctx_pool_vma = vma; + guc->ctx_pool = vma; + + vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); + if (IS_ERR(vaddr)) + goto err; + + guc->ctx_pool_vaddr = vaddr; + ida_init(&guc->ctx_ids); intel_guc_log_create(guc); guc_addon_create(guc); @@ -1216,9 +1211,12 @@ void i915_guc_submission_fini(struct drm_i915_private *dev_priv) i915_vma_unpin_and_release(&guc->ads_vma); i915_vma_unpin_and_release(&guc->log.vma); - if (guc->ctx_pool_vma) + if (guc->ctx_pool_vaddr) { ida_destroy(&guc->ctx_ids); - i915_vma_unpin_and_release(&guc->ctx_pool_vma); + i915_gem_object_unpin_map(guc->ctx_pool->obj); + } + + i915_vma_unpin_and_release(&guc->ctx_pool); } /** diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 2f270d02894c..1a6e47818a58 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -156,7 +156,7 @@ static void guc_params_init(struct drm_i915_private *dev_priv) /* If GuC submission is enabled, set up additional parameters here */ if (i915.enable_guc_submission) { - u32 pgs = guc_ggtt_offset(dev_priv->guc.ctx_pool_vma); + u32 pgs = guc_ggtt_offset(dev_priv->guc.ctx_pool); u32 ctx_in_16 = GUC_MAX_GPU_CONTEXTS / 16; pgs >>= PAGE_SHIFT; diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index c3a3843e702a..77f7153556e8 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -153,7 +153,8 @@ struct intel_guc { bool interrupts_enabled; struct i915_vma *ads_vma; - struct i915_vma *ctx_pool_vma; + struct i915_vma *ctx_pool; + void *ctx_pool_vaddr; struct ida ctx_ids; struct i915_guc_client *execbuf_client; -- cgit v1.2.3 From 3950bf3dbff1010e3309416f9fe2acbea03156d5 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Wed, 22 Mar 2017 10:39:46 -0700 Subject: drm/i915/guc: Add onion teardown to the GuC setup Starting with intel_guc_loader, down to intel_guc_submission and finally to intel_guc_log. v2: - Null execbuf client outside guc_client_free (Daniele) - Assert if things try to get allocated twice (Daniele/Joonas) - Null guc->log.buf_addr when destroyed (Daniele) - Newline between returning success and error labels (Joonas) - Remove some unnecessary comments (Joonas) - Keep guc_log_create_extras naming convention (Joonas) - Helper function guc_log_has_extras (Joonas) - No need for separate relay_channel create/destroy. It's just another extra. - No need to nullify guc->log.flush_wq when destroyed (Joonas) - Hoist the check for has_extras out of guc_log_create_extras (Joonas) - Try to do i915_guc_log_register/unregister calls (kind of) symmetric (Daniele) - Make sure initel_guc_fini is not called before init is ever called (Daniele) v3: - Remove unnecessary parenthesis (Joonas) - Check for logs enabled on debugfs registration - Rebase on top of Tvrtko's "Fix request re-submission after reset" v4: - Rebased - Comment around enabling/disabling interrupts inside GuC logging (Joonas) Signed-off-by: Oscar Mateo Cc: Daniele Ceraolo Spurio Reviewed-by: Joonas Lahtinen Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_drv.c | 11 +- drivers/gpu/drm/i915/i915_gem.c | 10 +- drivers/gpu/drm/i915/i915_guc_submission.c | 97 ++++---- drivers/gpu/drm/i915/intel_guc_loader.c | 21 -- drivers/gpu/drm/i915/intel_guc_log.c | 364 ++++++++++++++--------------- drivers/gpu/drm/i915/intel_uc.c | 55 +++-- drivers/gpu/drm/i915/intel_uc.h | 8 +- 7 files changed, 297 insertions(+), 269 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 03d9e45694c9..6d9944a00b7d 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -549,6 +549,8 @@ static const struct vga_switcheroo_client_ops i915_switcheroo_ops = { static void i915_gem_fini(struct drm_i915_private *dev_priv) { mutex_lock(&dev_priv->drm.struct_mutex); + if (i915.enable_guc_loading) + intel_uc_fini_hw(dev_priv); i915_gem_cleanup_engines(dev_priv); i915_gem_context_fini(dev_priv); mutex_unlock(&dev_priv->drm.struct_mutex); @@ -609,7 +611,7 @@ static int i915_load_modeset_init(struct drm_device *dev) ret = i915_gem_init(dev_priv); if (ret) - goto cleanup_irq; + goto cleanup_uc; intel_modeset_gem_init(dev); @@ -631,9 +633,9 @@ cleanup_gem: if (i915_gem_suspend(dev_priv)) DRM_ERROR("failed to idle hardware; continuing to unload!\n"); i915_gem_fini(dev_priv); +cleanup_uc: + intel_uc_fini_fw(dev_priv); cleanup_irq: - intel_guc_fini(dev_priv); - intel_huc_fini(dev_priv); drm_irq_uninstall(dev); intel_teardown_gmbus(dev_priv); cleanup_csr: @@ -1369,9 +1371,8 @@ void i915_driver_unload(struct drm_device *dev) /* Flush any outstanding unpin_work. */ drain_workqueue(dev_priv->wq); - intel_guc_fini(dev_priv); - intel_huc_fini(dev_priv); i915_gem_fini(dev_priv); + intel_uc_fini_fw(dev_priv); intel_fbc_cleanup_cfb(dev_priv); intel_power_domains_fini(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2a32d7c120fc..9515daea6358 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4608,10 +4608,12 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv) intel_mocs_init_l3cc_table(dev_priv); - /* We can't enable contexts until all firmware is loaded */ - ret = intel_uc_init_hw(dev_priv); - if (ret) - goto out; + if (i915.enable_guc_loading) { + /* We can't enable contexts until all firmware is loaded */ + ret = intel_uc_init_hw(dev_priv); + if (ret) + goto out; + } out: intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 9ba69276754b..7426eb6b089a 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -911,7 +911,6 @@ err_id: ida_simple_remove(&guc->ctx_ids, client->ctx_index); err_client: kfree(client); - return ERR_PTR(ret); } @@ -937,7 +936,7 @@ static void guc_policies_init(struct guc_policies *policies) policies->is_valid = 1; } -static void guc_addon_create(struct intel_guc *guc) +static int guc_addon_create(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); struct i915_vma *vma; @@ -953,14 +952,13 @@ static void guc_addon_create(struct intel_guc *guc) enum intel_engine_id id; u32 base; - vma = guc->ads_vma; - if (!vma) { - vma = intel_guc_allocate_vma(guc, PAGE_ALIGN(sizeof(*blob))); - if (IS_ERR(vma)) - return; + GEM_BUG_ON(guc->ads_vma); - guc->ads_vma = vma; - } + vma = intel_guc_allocate_vma(guc, PAGE_ALIGN(sizeof(*blob))); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + guc->ads_vma = vma; page = i915_vma_first_page(vma); blob = kmap(page); @@ -997,6 +995,13 @@ static void guc_addon_create(struct intel_guc *guc) blob->ads.reg_state_addr = base + ptr_offset(blob, reg_state); kunmap(page); + + return 0; +} + +static void guc_addon_destroy(struct intel_guc *guc) +{ + i915_vma_unpin_and_release(&guc->ads_vma); } /* @@ -1011,6 +1016,7 @@ int i915_guc_submission_init(struct drm_i915_private *dev_priv) struct intel_guc *guc = &dev_priv->guc; struct i915_vma *vma; void *vaddr; + int ret; if (!HAS_GUC_SCHED(dev_priv)) return 0; @@ -1020,10 +1026,10 @@ int i915_guc_submission_init(struct drm_i915_private *dev_priv) i915_guc_submission_disable(dev_priv); if (!i915.enable_guc_submission) - return 0; /* not enabled */ + return 0; if (guc->ctx_pool) - return 0; /* already allocated */ + return 0; vma = intel_guc_allocate_vma(guc, gemsize); if (IS_ERR(vma)) @@ -1031,15 +1037,23 @@ int i915_guc_submission_init(struct drm_i915_private *dev_priv) guc->ctx_pool = vma; - vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); - if (IS_ERR(vaddr)) - goto err; + vaddr = i915_gem_object_pin_map(guc->ctx_pool->obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + ret = PTR_ERR(vaddr); + goto err_vma; + } guc->ctx_pool_vaddr = vaddr; + ret = intel_guc_log_create(guc); + if (ret < 0) + goto err_vaddr; + + ret = guc_addon_create(guc); + if (ret < 0) + goto err_log; + ida_init(&guc->ctx_ids); - intel_guc_log_create(guc); - guc_addon_create(guc); guc->execbuf_client = guc_client_alloc(dev_priv, INTEL_INFO(dev_priv)->ring_mask, @@ -1047,14 +1061,37 @@ int i915_guc_submission_init(struct drm_i915_private *dev_priv) dev_priv->kernel_context); if (IS_ERR(guc->execbuf_client)) { DRM_ERROR("Failed to create GuC client for execbuf!\n"); - goto err; + ret = PTR_ERR(guc->execbuf_client); + goto err_ads; } return 0; -err: - i915_guc_submission_fini(dev_priv); - return -ENOMEM; +err_ads: + guc_addon_destroy(guc); +err_log: + intel_guc_log_destroy(guc); +err_vaddr: + i915_gem_object_unpin_map(guc->ctx_pool->obj); +err_vma: + i915_vma_unpin_and_release(&guc->ctx_pool); + return ret; +} + +void i915_guc_submission_fini(struct drm_i915_private *dev_priv) +{ + struct intel_guc *guc = &dev_priv->guc; + + if (!i915.enable_guc_submission) + return 0; + + guc_client_free(guc->execbuf_client); + guc->execbuf_client = NULL; + ida_destroy(&guc->ctx_ids); + guc_addon_destroy(guc); + intel_guc_log_destroy(guc); + i915_gem_object_unpin_map(guc->ctx_pool->obj); + i915_vma_unpin_and_release(&guc->ctx_pool); } static void guc_reset_wq(struct i915_guc_client *client) @@ -1199,26 +1236,6 @@ void i915_guc_submission_disable(struct drm_i915_private *dev_priv) intel_engines_reset_default_submission(dev_priv); } -void i915_guc_submission_fini(struct drm_i915_private *dev_priv) -{ - struct intel_guc *guc = &dev_priv->guc; - struct i915_guc_client *client; - - client = fetch_and_zero(&guc->execbuf_client); - if (client && !IS_ERR(client)) - guc_client_free(client); - - i915_vma_unpin_and_release(&guc->ads_vma); - i915_vma_unpin_and_release(&guc->log.vma); - - if (guc->ctx_pool_vaddr) { - ida_destroy(&guc->ctx_ids); - i915_gem_object_unpin_map(guc->ctx_pool->obj); - } - - i915_vma_unpin_and_release(&guc->ctx_pool); -} - /** * intel_guc_suspend() - notify GuC entering suspend state * @dev_priv: i915 device private diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 1a6e47818a58..b8ba28d27584 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -430,24 +430,3 @@ int intel_guc_select_fw(struct intel_guc *guc) return 0; } - -/** - * intel_guc_fini() - clean up all allocated resources - * @dev_priv: i915 device private - */ -void intel_guc_fini(struct drm_i915_private *dev_priv) -{ - struct intel_uc_fw *guc_fw = &dev_priv->guc.fw; - struct drm_i915_gem_object *obj; - - mutex_lock(&dev_priv->drm.struct_mutex); - i915_guc_submission_disable(dev_priv); - i915_guc_submission_fini(dev_priv); - mutex_unlock(&dev_priv->drm.struct_mutex); - - obj = fetch_and_zero(&guc_fw->obj); - if (obj) - i915_gem_object_put(obj); - - guc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; -} diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c index 5c0f9a49da0e..b39cdd6b1326 100644 --- a/drivers/gpu/drm/i915/intel_guc_log.c +++ b/drivers/gpu/drm/i915/intel_guc_log.c @@ -66,7 +66,6 @@ static int guc_log_control(struct intel_guc *guc, u32 control_val) return intel_guc_send(guc, action, ARRAY_SIZE(action)); } - /* * Sub buffer switch callback. Called whenever relay has to switch to a new * sub buffer, relay stays on the same sub buffer if 0 is returned. @@ -139,45 +138,15 @@ static struct rchan_callbacks relay_callbacks = { .remove_buf_file = remove_buf_file_callback, }; -static void guc_log_remove_relay_file(struct intel_guc *guc) -{ - relay_close(guc->log.relay_chan); -} - -static int guc_log_create_relay_channel(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - struct rchan *guc_log_relay_chan; - size_t n_subbufs, subbuf_size; - - /* Keep the size of sub buffers same as shared log buffer */ - subbuf_size = guc->log.vma->obj->base.size; - - /* Store up to 8 snapshots, which is large enough to buffer sufficient - * boot time logs and provides enough leeway to User, in terms of - * latency, for consuming the logs from relay. Also doesn't take - * up too much memory. - */ - n_subbufs = 8; - - guc_log_relay_chan = relay_open(NULL, NULL, subbuf_size, - n_subbufs, &relay_callbacks, dev_priv); - if (!guc_log_relay_chan) { - DRM_ERROR("Couldn't create relay chan for GuC logging\n"); - return -ENOMEM; - } - - GEM_BUG_ON(guc_log_relay_chan->subbuf_size < subbuf_size); - guc->log.relay_chan = guc_log_relay_chan; - return 0; -} - -static int guc_log_create_relay_file(struct intel_guc *guc) +static int guc_log_relay_file_create(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); struct dentry *log_dir; int ret; + if (i915.guc_log_level < 0) + return 0; + /* For now create the log file in /sys/kernel/debug/dri/0 dir */ log_dir = dev_priv->drm.primary->debugfs_root; @@ -198,7 +167,7 @@ static int guc_log_create_relay_file(struct intel_guc *guc) } ret = relay_late_setup_files(guc->log.relay_chan, "guc_log", log_dir); - if (ret) { + if (ret < 0 && ret != -EEXIST) { DRM_ERROR("Couldn't associate relay chan with file %d\n", ret); return ret; } @@ -371,31 +340,6 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) } } -static void guc_log_cleanup(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - - lockdep_assert_held(&dev_priv->drm.struct_mutex); - - /* First disable the flush interrupt */ - gen9_disable_guc_interrupts(dev_priv); - - if (guc->log.flush_wq) - destroy_workqueue(guc->log.flush_wq); - - guc->log.flush_wq = NULL; - - if (guc->log.relay_chan) - guc_log_remove_relay_file(guc); - - guc->log.relay_chan = NULL; - - if (guc->log.buf_addr) - i915_gem_object_unpin_map(guc->log.vma->obj); - - guc->log.buf_addr = NULL; -} - static void capture_logs_work(struct work_struct *work) { struct intel_guc *guc = @@ -404,120 +348,105 @@ static void capture_logs_work(struct work_struct *work) guc_log_capture_logs(guc); } +static bool guc_log_has_extras(struct intel_guc *guc) +{ + return guc->log.buf_addr != NULL; +} + static int guc_log_create_extras(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); void *vaddr; - int ret; + struct rchan *guc_log_relay_chan; + size_t n_subbufs, subbuf_size; + int ret = 0; lockdep_assert_held(&dev_priv->drm.struct_mutex); - /* Nothing to do */ - if (i915.guc_log_level < 0) - return 0; - - if (!guc->log.buf_addr) { - /* Create a WC (Uncached for read) vmalloc mapping of log - * buffer pages, so that we can directly get the data - * (up-to-date) from memory. - */ - vaddr = i915_gem_object_pin_map(guc->log.vma->obj, I915_MAP_WC); - if (IS_ERR(vaddr)) { - ret = PTR_ERR(vaddr); - DRM_ERROR("Couldn't map log buffer pages %d\n", ret); - return ret; - } - - guc->log.buf_addr = vaddr; - } + GEM_BUG_ON(guc_log_has_extras(guc)); - if (!guc->log.relay_chan) { - /* Create a relay channel, so that we have buffers for storing - * the GuC firmware logs, the channel will be linked with a file - * later on when debugfs is registered. - */ - ret = guc_log_create_relay_channel(guc); - if (ret) - return ret; + /* Create a WC (Uncached for read) vmalloc mapping of log + * buffer pages, so that we can directly get the data + * (up-to-date) from memory. + */ + vaddr = i915_gem_object_pin_map(guc->log.vma->obj, I915_MAP_WC); + if (IS_ERR(vaddr)) { + DRM_ERROR("Couldn't map log buffer pages %d\n", ret); + return PTR_ERR(vaddr); } - if (!guc->log.flush_wq) { - INIT_WORK(&guc->log.flush_work, capture_logs_work); - - /* - * GuC log buffer flush work item has to do register access to - * send the ack to GuC and this work item, if not synced before - * suspend, can potentially get executed after the GFX device is - * suspended. - * By marking the WQ as freezable, we don't have to bother about - * flushing of this work item from the suspend hooks, the pending - * work item if any will be either executed before the suspend - * or scheduled later on resume. This way the handling of work - * item can be kept same between system suspend & rpm suspend. - */ - guc->log.flush_wq = alloc_ordered_workqueue("i915-guc_log", - WQ_HIGHPRI | WQ_FREEZABLE); - if (guc->log.flush_wq == NULL) { - DRM_ERROR("Couldn't allocate the wq for GuC logging\n"); - return -ENOMEM; - } - } + guc->log.buf_addr = vaddr; - return 0; -} + /* Keep the size of sub buffers same as shared log buffer */ + subbuf_size = guc->log.vma->obj->base.size; -void intel_guc_log_create(struct intel_guc *guc) -{ - struct i915_vma *vma; - unsigned long offset; - uint32_t size, flags; + /* Store up to 8 snapshots, which is large enough to buffer sufficient + * boot time logs and provides enough leeway to User, in terms of + * latency, for consuming the logs from relay. Also doesn't take + * up too much memory. + */ + n_subbufs = 8; - if (i915.guc_log_level > GUC_LOG_VERBOSITY_MAX) - i915.guc_log_level = GUC_LOG_VERBOSITY_MAX; + /* Create a relay channel, so that we have buffers for storing + * the GuC firmware logs, the channel will be linked with a file + * later on when debugfs is registered. + */ + guc_log_relay_chan = relay_open(NULL, NULL, subbuf_size, + n_subbufs, &relay_callbacks, dev_priv); + if (!guc_log_relay_chan) { + DRM_ERROR("Couldn't create relay chan for GuC logging\n"); - /* The first page is to save log buffer state. Allocate one - * extra page for others in case for overlap */ - size = (1 + GUC_LOG_DPC_PAGES + 1 + - GUC_LOG_ISR_PAGES + 1 + - GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT; + ret = -ENOMEM; + goto err_vaddr; + } - vma = guc->log.vma; - if (!vma) { - /* We require SSE 4.1 for fast reads from the GuC log buffer and - * it should be present on the chipsets supporting GuC based - * submisssions. - */ - if (WARN_ON(!i915_has_memcpy_from_wc())) { - /* logging will not be enabled */ - i915.guc_log_level = -1; - return; - } + GEM_BUG_ON(guc_log_relay_chan->subbuf_size < subbuf_size); + guc->log.relay_chan = guc_log_relay_chan; - vma = intel_guc_allocate_vma(guc, size); - if (IS_ERR(vma)) { - /* logging will be off */ - i915.guc_log_level = -1; - return; - } + INIT_WORK(&guc->log.flush_work, capture_logs_work); + + /* + * GuC log buffer flush work item has to do register access to + * send the ack to GuC and this work item, if not synced before + * suspend, can potentially get executed after the GFX device is + * suspended. + * By marking the WQ as freezable, we don't have to bother about + * flushing of this work item from the suspend hooks, the pending + * work item if any will be either executed before the suspend + * or scheduled later on resume. This way the handling of work + * item can be kept same between system suspend & rpm suspend. + */ + guc->log.flush_wq = alloc_ordered_workqueue("i915-guc_log", + WQ_HIGHPRI | WQ_FREEZABLE); + if (!guc->log.flush_wq) { + DRM_ERROR("Couldn't allocate the wq for GuC logging\n"); + ret = -ENOMEM; + goto err_relaychan; + } - guc->log.vma = vma; + return 0; - if (guc_log_create_extras(guc)) { - guc_log_cleanup(guc); - i915_vma_unpin_and_release(&guc->log.vma); - i915.guc_log_level = -1; - return; - } - } +err_relaychan: + relay_close(guc->log.relay_chan); +err_vaddr: + i915_gem_object_unpin_map(guc->log.vma->obj); + guc->log.buf_addr = NULL; + return ret; +} - /* each allocated unit is a page */ - flags = GUC_LOG_VALID | GUC_LOG_NOTIFY_ON_HALF_FULL | - (GUC_LOG_DPC_PAGES << GUC_LOG_DPC_SHIFT) | - (GUC_LOG_ISR_PAGES << GUC_LOG_ISR_SHIFT) | - (GUC_LOG_CRASH_PAGES << GUC_LOG_CRASH_SHIFT); +static void guc_log_destroy_extras(struct intel_guc *guc) +{ + /* + * It's possible that extras were never allocated because guc_log_level + * was < 0 at the time + **/ + if (!guc_log_has_extras(guc)) + return; - offset = guc_ggtt_offset(vma) >> PAGE_SHIFT; /* in pages */ - guc->log.flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags; + destroy_workqueue(guc->log.flush_wq); + relay_close(guc->log.relay_chan); + i915_gem_object_unpin_map(guc->log.vma->obj); + guc->log.buf_addr = NULL; } static int guc_log_late_setup(struct intel_guc *guc) @@ -527,24 +456,25 @@ static int guc_log_late_setup(struct intel_guc *guc) lockdep_assert_held(&dev_priv->drm.struct_mutex); - if (i915.guc_log_level < 0) - return -EINVAL; + if (!guc_log_has_extras(guc)) { + /* If log_level was set as -1 at boot time, then setup needed to + * handle log buffer flush interrupts would not have been done yet, + * so do that now. + */ + ret = guc_log_create_extras(guc); + if (ret) + goto err; + } - /* If log_level was set as -1 at boot time, then setup needed to - * handle log buffer flush interrupts would not have been done yet, - * so do that now. - */ - ret = guc_log_create_extras(guc); + ret = guc_log_relay_file_create(guc); if (ret) - goto err; - - ret = guc_log_create_relay_file(guc); - if (ret) - goto err; + goto err_extras; return 0; + +err_extras: + guc_log_destroy_extras(guc); err: - guc_log_cleanup(guc); /* logging will remain off */ i915.guc_log_level = -1; return ret; @@ -586,6 +516,72 @@ static void guc_flush_logs(struct intel_guc *guc) guc_log_capture_logs(guc); } +int intel_guc_log_create(struct intel_guc *guc) +{ + struct i915_vma *vma; + unsigned long offset; + uint32_t size, flags; + int ret; + + GEM_BUG_ON(guc->log.vma); + + if (i915.guc_log_level > GUC_LOG_VERBOSITY_MAX) + i915.guc_log_level = GUC_LOG_VERBOSITY_MAX; + + /* The first page is to save log buffer state. Allocate one + * extra page for others in case for overlap */ + size = (1 + GUC_LOG_DPC_PAGES + 1 + + GUC_LOG_ISR_PAGES + 1 + + GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT; + + /* We require SSE 4.1 for fast reads from the GuC log buffer and + * it should be present on the chipsets supporting GuC based + * submisssions. + */ + if (WARN_ON(!i915_has_memcpy_from_wc())) { + ret = -EINVAL; + goto err; + } + + vma = intel_guc_allocate_vma(guc, size); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto err; + } + + guc->log.vma = vma; + + if (i915.guc_log_level >= 0) { + ret = guc_log_create_extras(guc); + if (ret < 0) + goto err_vma; + } + + /* each allocated unit is a page */ + flags = GUC_LOG_VALID | GUC_LOG_NOTIFY_ON_HALF_FULL | + (GUC_LOG_DPC_PAGES << GUC_LOG_DPC_SHIFT) | + (GUC_LOG_ISR_PAGES << GUC_LOG_ISR_SHIFT) | + (GUC_LOG_CRASH_PAGES << GUC_LOG_CRASH_SHIFT); + + offset = guc_ggtt_offset(vma) >> PAGE_SHIFT; /* in pages */ + guc->log.flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags; + + return 0; + +err_vma: + i915_vma_unpin_and_release(&guc->log.vma); +err: + /* logging will be off */ + i915.guc_log_level = -1; + return ret; +} + +void intel_guc_log_destroy(struct intel_guc *guc) +{ + guc_log_destroy_extras(guc); + i915_vma_unpin_and_release(&guc->log.vma); +} + int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val) { struct intel_guc *guc = &dev_priv->guc; @@ -609,17 +605,22 @@ int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val) return ret; } - i915.guc_log_level = log_param.verbosity; + if (log_param.logging_enabled) { + i915.guc_log_level = log_param.verbosity; - /* If log_level was set as -1 at boot time, then the relay channel file - * wouldn't have been created by now and interrupts also would not have - * been enabled. - */ - if (!dev_priv->guc.log.relay_chan) { + /* If log_level was set as -1 at boot time, then the relay channel file + * wouldn't have been created by now and interrupts also would not have + * been enabled. Try again now, just in case. + */ ret = guc_log_late_setup(guc); - if (!ret) - gen9_enable_guc_interrupts(dev_priv); - } else if (!log_param.logging_enabled) { + if (ret < 0) { + DRM_DEBUG_DRIVER("GuC log late setup failed %d\n", ret); + return ret; + } + + /* GuC logging is currently the only user of Guc2Host interrupts */ + gen9_enable_guc_interrupts(dev_priv); + } else { /* Once logging is disabled, GuC won't generate logs & send an * interrupt. But there could be some data in the log buffer * which is yet to be captured. So request GuC to update the log @@ -629,9 +630,6 @@ int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val) /* As logging is disabled, update log level to reflect that */ i915.guc_log_level = -1; - } else { - /* In case interrupts were disabled, enable them now */ - gen9_enable_guc_interrupts(dev_priv); } return ret; @@ -639,7 +637,7 @@ int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val) void i915_guc_log_register(struct drm_i915_private *dev_priv) { - if (!i915.enable_guc_submission) + if (!i915.enable_guc_submission || i915.guc_log_level < 0) return; mutex_lock(&dev_priv->drm.struct_mutex); @@ -653,6 +651,8 @@ void i915_guc_log_unregister(struct drm_i915_private *dev_priv) return; mutex_lock(&dev_priv->drm.struct_mutex); - guc_log_cleanup(&dev_priv->guc); + /* GuC logging is currently the only user of Guc2Host interrupts */ + gen9_disable_guc_interrupts(dev_priv); + guc_log_destroy_extras(&dev_priv->guc); mutex_unlock(&dev_priv->drm.struct_mutex); } diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index d15a7d9d4eb0..438e8f32abde 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -95,24 +95,41 @@ void intel_uc_init_fw(struct drm_i915_private *dev_priv) intel_uc_prepare_fw(dev_priv, &dev_priv->guc.fw); } +void intel_uc_fini_fw(struct drm_i915_private *dev_priv) +{ + struct intel_uc_fw *guc_fw = &dev_priv->guc.fw; + struct intel_uc_fw *huc_fw = &dev_priv->huc.fw; + struct drm_i915_gem_object *obj; + + obj = fetch_and_zero(&guc_fw->obj); + if (obj) + i915_gem_object_put(obj); + + guc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; + + obj = fetch_and_zero(&huc_fw->obj); + if (obj) + i915_gem_object_put(obj); + + huc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; +} + int intel_uc_init_hw(struct drm_i915_private *dev_priv) { int ret, attempts; - /* GuC not enabled, nothing to do */ - if (!i915.enable_guc_loading) - return 0; - gen9_reset_guc_interrupts(dev_priv); /* We need to notify the guc whenever we change the GGTT */ i915_ggtt_enable_guc(dev_priv); - if (i915.enable_guc_submission) { - ret = i915_guc_submission_init(dev_priv); - if (ret) - goto err; - } + /* + * This is stuff we need to have available at fw load time + * if we are planning to enable submission later + */ + ret = i915_guc_submission_init(dev_priv); + if (ret) + goto err_guc; /* WaEnableuKernelHeaderValidFix:skl */ /* WaEnableGuCBootHashCheckNotSet:skl,bxt,kbl */ @@ -150,7 +167,7 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) ret = i915_guc_submission_enable(dev_priv); if (ret) - goto err_submission; + goto err_interrupts; } return 0; @@ -164,11 +181,11 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) * nonfatal error (i.e. it doesn't prevent driver load, but * marks the GPU as wedged until reset). */ +err_interrupts: + gen9_disable_guc_interrupts(dev_priv); err_submission: - if (i915.enable_guc_submission) - i915_guc_submission_fini(dev_priv); - -err: + i915_guc_submission_fini(dev_priv); +err_guc: i915_ggtt_disable_guc(dev_priv); DRM_ERROR("GuC init failed\n"); @@ -185,6 +202,16 @@ err: return ret; } +void intel_uc_fini_hw(struct drm_i915_private *dev_priv) +{ + if (i915.enable_guc_submission) { + i915_guc_submission_disable(dev_priv); + gen9_disable_guc_interrupts(dev_priv); + } + i915_guc_submission_fini(dev_priv); + i915_ggtt_disable_guc(dev_priv); +} + /* * Read GuC command/status register (SOFT_SCRATCH_0) * Return true if it contains a response rather than a command diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index 77f7153556e8..eb30e7af255a 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -187,7 +187,9 @@ struct intel_huc { void intel_uc_sanitize_options(struct drm_i915_private *dev_priv); void intel_uc_init_early(struct drm_i915_private *dev_priv); void intel_uc_init_fw(struct drm_i915_private *dev_priv); +void intel_uc_fini_fw(struct drm_i915_private *dev_priv); int intel_uc_init_hw(struct drm_i915_private *dev_priv); +void intel_uc_fini_hw(struct drm_i915_private *dev_priv); void intel_uc_prepare_fw(struct drm_i915_private *dev_priv, struct intel_uc_fw *uc_fw); int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len); @@ -196,7 +198,6 @@ int intel_guc_sample_forcewake(struct intel_guc *guc); /* intel_guc_loader.c */ int intel_guc_select_fw(struct intel_guc *guc); int intel_guc_init_hw(struct intel_guc *guc); -void intel_guc_fini(struct drm_i915_private *dev_priv); const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status); int intel_guc_suspend(struct drm_i915_private *dev_priv); int intel_guc_resume(struct drm_i915_private *dev_priv); @@ -212,10 +213,11 @@ void i915_guc_submission_fini(struct drm_i915_private *dev_priv); struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size); /* intel_guc_log.c */ -void intel_guc_log_create(struct intel_guc *guc); +int intel_guc_log_create(struct intel_guc *guc); +void intel_guc_log_destroy(struct intel_guc *guc); +int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val); void i915_guc_log_register(struct drm_i915_private *dev_priv); void i915_guc_log_unregister(struct drm_i915_private *dev_priv); -int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val); static inline u32 guc_ggtt_offset(struct i915_vma *vma) { -- cgit v1.2.3 From 0704df2b08e76dfb1fd2688f74bc4409dc66228d Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Wed, 22 Mar 2017 10:39:47 -0700 Subject: drm/i915/guc: The Additional Data Struct (ADS) should get enabled together with GuC submission It's mandatory and it gets created if and only if GuC submission is enabled, so that should be the condition for informing the GuC about it. Also s/guc_addon_create/guc_ads_create and s/guc_addon_destroy/guc_ads_destroy and, while at it, add an explanation of what things go inside the ADS object. Signed-off-by: Oscar Mateo Cc: Joonas Lahtinen Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_guc_submission.c | 17 ++++++++++++----- drivers/gpu/drm/i915/intel_guc_loader.c | 10 ++++------ 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 7426eb6b089a..ab91dac15b5b 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -62,6 +62,13 @@ * ELSP context descriptor dword into Work Item. * See guc_wq_item_append() * + * ADS: + * The Additional Data Struct (ADS) has pointers for different buffers used by + * the GuC. One single gem object contains the ADS struct itself (guc_ads), the + * scheduling policies (guc_policies), a structure describing a collection of + * register sets (guc_mmio_reg_state) and some extra pages for the GuC to save + * its internal state for sleep. + * */ static inline bool is_high_priority(struct i915_guc_client* client) @@ -936,7 +943,7 @@ static void guc_policies_init(struct guc_policies *policies) policies->is_valid = 1; } -static int guc_addon_create(struct intel_guc *guc) +static int guc_ads_create(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); struct i915_vma *vma; @@ -999,7 +1006,7 @@ static int guc_addon_create(struct intel_guc *guc) return 0; } -static void guc_addon_destroy(struct intel_guc *guc) +static void guc_ads_destroy(struct intel_guc *guc) { i915_vma_unpin_and_release(&guc->ads_vma); } @@ -1049,7 +1056,7 @@ int i915_guc_submission_init(struct drm_i915_private *dev_priv) if (ret < 0) goto err_vaddr; - ret = guc_addon_create(guc); + ret = guc_ads_create(guc); if (ret < 0) goto err_log; @@ -1068,7 +1075,7 @@ int i915_guc_submission_init(struct drm_i915_private *dev_priv) return 0; err_ads: - guc_addon_destroy(guc); + guc_ads_destroy(guc); err_log: intel_guc_log_destroy(guc); err_vaddr: @@ -1088,7 +1095,7 @@ void i915_guc_submission_fini(struct drm_i915_private *dev_priv) guc_client_free(guc->execbuf_client); guc->execbuf_client = NULL; ida_destroy(&guc->ctx_ids); - guc_addon_destroy(guc); + guc_ads_destroy(guc); intel_guc_log_destroy(guc); i915_gem_object_unpin_map(guc->ctx_pool->obj); i915_vma_unpin_and_release(&guc->ctx_pool); diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index b8ba28d27584..62ebf5605f6e 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -148,17 +148,15 @@ static void guc_params_init(struct drm_i915_private *dev_priv) } else params[GUC_CTL_DEBUG] = GUC_LOG_DISABLED; - if (guc->ads_vma) { - u32 ads = guc_ggtt_offset(guc->ads_vma) >> PAGE_SHIFT; - params[GUC_CTL_DEBUG] |= ads << GUC_ADS_ADDR_SHIFT; - params[GUC_CTL_DEBUG] |= GUC_ADS_ENABLED; - } - /* If GuC submission is enabled, set up additional parameters here */ if (i915.enable_guc_submission) { + u32 ads = guc_ggtt_offset(guc->ads_vma) >> PAGE_SHIFT; u32 pgs = guc_ggtt_offset(dev_priv->guc.ctx_pool); u32 ctx_in_16 = GUC_MAX_GPU_CONTEXTS / 16; + params[GUC_CTL_DEBUG] |= ads << GUC_ADS_ADDR_SHIFT; + params[GUC_CTL_DEBUG] |= GUC_ADS_ENABLED; + pgs >>= PAGE_SHIFT; params[GUC_CTL_CTXINFO] = (pgs << GUC_CTL_BASE_ADDR_SHIFT) | (ctx_in_16 << GUC_CTL_CTXNUM_IN16_SHIFT); -- cgit v1.2.3 From e74654738baf465c92f0eec37ce4be891bac36da Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Wed, 22 Mar 2017 10:39:48 -0700 Subject: drm/i915/guc: Break out the GuC log extras into their own "runtime" struct When initializing the GuC log struct, there is an object we need to allocate always, since the GuC needs its address at fw load time. The rest is only needed during runtime, in the sense that we only create if we actually enable GuC logging. Make that distinction explicit by subdividing further the intel_guc_log struct. v2: Call the new struct "runtime", instead of "extras" (Joonas) v3: Check indent (Joonas) Signed-off-by: Oscar Mateo Cc: Daniele Ceraolo Spurio Reviewed-by: Michal Wajdeczko Reviewed-by: Joonas Lahtinen Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_irq.c | 4 +- drivers/gpu/drm/i915/intel_guc_log.c | 72 ++++++++++++++++++------------------ drivers/gpu/drm/i915/intel_uc.h | 12 +++--- 3 files changed, 45 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index cb20c9408b12..3d2e6232f17a 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1741,8 +1741,8 @@ static void gen9_guc_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir) I915_WRITE(SOFT_SCRATCH(15), msg & ~flush); /* Handle flush interrupt in bottom half */ - queue_work(dev_priv->guc.log.flush_wq, - &dev_priv->guc.log.flush_work); + queue_work(dev_priv->guc.log.runtime.flush_wq, + &dev_priv->guc.log.runtime.flush_work); dev_priv->guc.log.flush_interrupt_count++; } else { diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c index b39cdd6b1326..6fb63a3c65b0 100644 --- a/drivers/gpu/drm/i915/intel_guc_log.c +++ b/drivers/gpu/drm/i915/intel_guc_log.c @@ -166,7 +166,7 @@ static int guc_log_relay_file_create(struct intel_guc *guc) return -ENODEV; } - ret = relay_late_setup_files(guc->log.relay_chan, "guc_log", log_dir); + ret = relay_late_setup_files(guc->log.runtime.relay_chan, "guc_log", log_dir); if (ret < 0 && ret != -EEXIST) { DRM_ERROR("Couldn't associate relay chan with file %d\n", ret); return ret; @@ -183,15 +183,15 @@ static void guc_move_to_next_buf(struct intel_guc *guc) smp_wmb(); /* All data has been written, so now move the offset of sub buffer. */ - relay_reserve(guc->log.relay_chan, guc->log.vma->obj->base.size); + relay_reserve(guc->log.runtime.relay_chan, guc->log.vma->obj->base.size); /* Switch to the next sub buffer */ - relay_flush(guc->log.relay_chan); + relay_flush(guc->log.runtime.relay_chan); } static void *guc_get_write_buffer(struct intel_guc *guc) { - if (!guc->log.relay_chan) + if (!guc->log.runtime.relay_chan) return NULL; /* Just get the base address of a new sub buffer and copy data into it @@ -202,7 +202,7 @@ static void *guc_get_write_buffer(struct intel_guc *guc) * done without using relay_reserve() along with relay_write(). So its * better to use relay_reserve() alone. */ - return relay_reserve(guc->log.relay_chan, 0); + return relay_reserve(guc->log.runtime.relay_chan, 0); } static bool guc_check_log_buf_overflow(struct intel_guc *guc, @@ -253,11 +253,11 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) void *src_data, *dst_data; bool new_overflow; - if (WARN_ON(!guc->log.buf_addr)) + if (WARN_ON(!guc->log.runtime.buf_addr)) return; /* Get the pointer to shared GuC log buffer */ - log_buf_state = src_data = guc->log.buf_addr; + log_buf_state = src_data = guc->log.runtime.buf_addr; /* Get the pointer to local buffer to store the logs */ log_buf_snapshot_state = dst_data = guc_get_write_buffer(guc); @@ -343,17 +343,17 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) static void capture_logs_work(struct work_struct *work) { struct intel_guc *guc = - container_of(work, struct intel_guc, log.flush_work); + container_of(work, struct intel_guc, log.runtime.flush_work); guc_log_capture_logs(guc); } -static bool guc_log_has_extras(struct intel_guc *guc) +static bool guc_log_has_runtime(struct intel_guc *guc) { - return guc->log.buf_addr != NULL; + return guc->log.runtime.buf_addr != NULL; } -static int guc_log_create_extras(struct intel_guc *guc) +static int guc_log_runtime_create(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); void *vaddr; @@ -363,7 +363,7 @@ static int guc_log_create_extras(struct intel_guc *guc) lockdep_assert_held(&dev_priv->drm.struct_mutex); - GEM_BUG_ON(guc_log_has_extras(guc)); + GEM_BUG_ON(guc_log_has_runtime(guc)); /* Create a WC (Uncached for read) vmalloc mapping of log * buffer pages, so that we can directly get the data @@ -375,7 +375,7 @@ static int guc_log_create_extras(struct intel_guc *guc) return PTR_ERR(vaddr); } - guc->log.buf_addr = vaddr; + guc->log.runtime.buf_addr = vaddr; /* Keep the size of sub buffers same as shared log buffer */ subbuf_size = guc->log.vma->obj->base.size; @@ -401,9 +401,9 @@ static int guc_log_create_extras(struct intel_guc *guc) } GEM_BUG_ON(guc_log_relay_chan->subbuf_size < subbuf_size); - guc->log.relay_chan = guc_log_relay_chan; + guc->log.runtime.relay_chan = guc_log_relay_chan; - INIT_WORK(&guc->log.flush_work, capture_logs_work); + INIT_WORK(&guc->log.runtime.flush_work, capture_logs_work); /* * GuC log buffer flush work item has to do register access to @@ -416,9 +416,9 @@ static int guc_log_create_extras(struct intel_guc *guc) * or scheduled later on resume. This way the handling of work * item can be kept same between system suspend & rpm suspend. */ - guc->log.flush_wq = alloc_ordered_workqueue("i915-guc_log", - WQ_HIGHPRI | WQ_FREEZABLE); - if (!guc->log.flush_wq) { + guc->log.runtime.flush_wq = alloc_ordered_workqueue("i915-guc_log", + WQ_HIGHPRI | WQ_FREEZABLE); + if (!guc->log.runtime.flush_wq) { DRM_ERROR("Couldn't allocate the wq for GuC logging\n"); ret = -ENOMEM; goto err_relaychan; @@ -427,26 +427,26 @@ static int guc_log_create_extras(struct intel_guc *guc) return 0; err_relaychan: - relay_close(guc->log.relay_chan); + relay_close(guc->log.runtime.relay_chan); err_vaddr: i915_gem_object_unpin_map(guc->log.vma->obj); - guc->log.buf_addr = NULL; + guc->log.runtime.buf_addr = NULL; return ret; } -static void guc_log_destroy_extras(struct intel_guc *guc) +static void guc_log_runtime_destroy(struct intel_guc *guc) { /* - * It's possible that extras were never allocated because guc_log_level - * was < 0 at the time + * It's possible that the runtime stuff was never allocated because + * guc_log_level was < 0 at the time **/ - if (!guc_log_has_extras(guc)) + if (!guc_log_has_runtime(guc)) return; - destroy_workqueue(guc->log.flush_wq); - relay_close(guc->log.relay_chan); + destroy_workqueue(guc->log.runtime.flush_wq); + relay_close(guc->log.runtime.relay_chan); i915_gem_object_unpin_map(guc->log.vma->obj); - guc->log.buf_addr = NULL; + guc->log.runtime.buf_addr = NULL; } static int guc_log_late_setup(struct intel_guc *guc) @@ -456,24 +456,24 @@ static int guc_log_late_setup(struct intel_guc *guc) lockdep_assert_held(&dev_priv->drm.struct_mutex); - if (!guc_log_has_extras(guc)) { + if (!guc_log_has_runtime(guc)) { /* If log_level was set as -1 at boot time, then setup needed to * handle log buffer flush interrupts would not have been done yet, * so do that now. */ - ret = guc_log_create_extras(guc); + ret = guc_log_runtime_create(guc); if (ret) goto err; } ret = guc_log_relay_file_create(guc); if (ret) - goto err_extras; + goto err_runtime; return 0; -err_extras: - guc_log_destroy_extras(guc); +err_runtime: + guc_log_runtime_destroy(guc); err: /* logging will remain off */ i915.guc_log_level = -1; @@ -507,7 +507,7 @@ static void guc_flush_logs(struct intel_guc *guc) /* Before initiating the forceful flush, wait for any pending/ongoing * flush to complete otherwise forceful flush may not actually happen. */ - flush_work(&guc->log.flush_work); + flush_work(&guc->log.runtime.flush_work); /* Ask GuC to update the log buffer state */ guc_log_flush(guc); @@ -552,7 +552,7 @@ int intel_guc_log_create(struct intel_guc *guc) guc->log.vma = vma; if (i915.guc_log_level >= 0) { - ret = guc_log_create_extras(guc); + ret = guc_log_runtime_create(guc); if (ret < 0) goto err_vma; } @@ -578,7 +578,7 @@ err: void intel_guc_log_destroy(struct intel_guc *guc) { - guc_log_destroy_extras(guc); + guc_log_runtime_destroy(guc); i915_vma_unpin_and_release(&guc->log.vma); } @@ -653,6 +653,6 @@ void i915_guc_log_unregister(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->drm.struct_mutex); /* GuC logging is currently the only user of Guc2Host interrupts */ gen9_disable_guc_interrupts(dev_priv); - guc_log_destroy_extras(&dev_priv->guc); + guc_log_runtime_destroy(&dev_priv->guc); mutex_unlock(&dev_priv->drm.struct_mutex); } diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index eb30e7af255a..c6f880c365a0 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -132,11 +132,13 @@ struct intel_uc_fw { struct intel_guc_log { uint32_t flags; struct i915_vma *vma; - void *buf_addr; - struct workqueue_struct *flush_wq; - struct work_struct flush_work; - struct rchan *relay_chan; - + /* The runtime stuff gets created only when GuC logging gets enabled */ + struct { + void *buf_addr; + struct workqueue_struct *flush_wq; + struct work_struct flush_work; + struct rchan *relay_chan; + } runtime; /* logging related stats */ u32 capture_miss_count; u32 flush_interrupt_count; -- cgit v1.2.3 From 5e7cd37d68673d2f51d1bdd2e60f42289831af54 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Wed, 22 Mar 2017 10:39:49 -0700 Subject: drm/i915/guc: Make intel_guc_send a function pointer Prepare for an alternate GuC communication interface. v2: Make a few functions static and name them correctly while we are at it (Oscar), but leave an intel_guc_send_mmio interface for users that require old-style communication. v3: Send intel_uc_init_early back to the top (Michal). Signed-off-by: Michel Thierry Signed-off-by: Michal Wajdeczko Signed-off-by: Oscar Mateo Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_uc.c | 16 +++++++++++----- drivers/gpu/drm/i915/intel_uc.h | 9 ++++++++- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 438e8f32abde..0b8b2505a44a 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -83,7 +83,10 @@ void intel_uc_sanitize_options(struct drm_i915_private *dev_priv) void intel_uc_init_early(struct drm_i915_private *dev_priv) { - mutex_init(&dev_priv->guc.send_mutex); + struct intel_guc *guc = &dev_priv->guc; + + mutex_init(&guc->send_mutex); + guc->send = intel_guc_send_mmio; } void intel_uc_init_fw(struct drm_i915_private *dev_priv) @@ -216,7 +219,7 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv) * Read GuC command/status register (SOFT_SCRATCH_0) * Return true if it contains a response rather than a command */ -static bool intel_guc_recv(struct intel_guc *guc, u32 *status) +static bool guc_recv(struct intel_guc *guc, u32 *status) { struct drm_i915_private *dev_priv = guc_to_i915(guc); @@ -225,7 +228,10 @@ static bool intel_guc_recv(struct intel_guc *guc, u32 *status) return INTEL_GUC_RECV_IS_RESPONSE(val); } -int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) +/* + * This function implements the MMIO based host to GuC interface. + */ +int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len) { struct drm_i915_private *dev_priv = guc_to_i915(guc); u32 status; @@ -253,9 +259,9 @@ int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) * up to that length of time, then switch to a slower sleep-wait loop. * No inte_guc_send command should ever take longer than 10ms. */ - ret = wait_for_us(intel_guc_recv(guc, &status), 10); + ret = wait_for_us(guc_recv(guc, &status), 10); if (ret) - ret = wait_for(intel_guc_recv(guc, &status), 10); + ret = wait_for(guc_recv(guc, &status), 10); if (status != INTEL_GUC_STATUS_SUCCESS) { /* * Either the GuC explicitly returned an error (which diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index c6f880c365a0..3ec54a141d2e 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -176,6 +176,9 @@ struct intel_guc { /* To serialize the intel_guc_send actions */ struct mutex send_mutex; + + /* GuC's FW specific send function */ + int (*send)(struct intel_guc *guc, const u32 *data, u32 len); }; struct intel_huc { @@ -194,8 +197,12 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv); void intel_uc_fini_hw(struct drm_i915_private *dev_priv); void intel_uc_prepare_fw(struct drm_i915_private *dev_priv, struct intel_uc_fw *uc_fw); -int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len); int intel_guc_sample_forcewake(struct intel_guc *guc); +int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len); +static inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) +{ + return guc->send(guc, action, len); +} /* intel_guc_loader.c */ int intel_guc_select_fw(struct intel_guc *guc); -- cgit v1.2.3 From 0d7681261453e6f10e0a98e7b744fe4085743291 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Wed, 22 Mar 2017 10:39:50 -0700 Subject: drm/i915/guc: Improve the GuC documentation & comments about proxy submissions While at it, fix a typo (s/ring_lcra/ring_lrca) and improve the naming of one firware interface field (s/ring_tail/submit_element_info, since it can contain more than just the ring tail). No change in functionality. v2: - Remove reference to "unique user" of the GuC (Daniele) - Keep mention to renaming from "GuC context" to "client" (Daniele) Signed-off-by: Oscar Mateo Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_guc_submission.c | 42 +++++++++++++++++++----------- drivers/gpu/drm/i915/intel_guc_fwif.h | 4 +-- drivers/gpu/drm/i915/intel_uc.h | 6 +++-- 3 files changed, 33 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index ab91dac15b5b..e98cb743090f 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -30,16 +30,25 @@ /** * DOC: GuC-based command submission * - * i915_guc_client: - * We use the term client to avoid confusion with contexts. A i915_guc_client is - * equivalent to GuC object guc_context_desc. This context descriptor is - * allocated from a pool of 1024 entries. Kernel driver will allocate doorbell - * and workqueue for it. Also the process descriptor (guc_process_desc), which - * is mapped to client space. So the client can write Work Item then ring the - * doorbell. + * GuC client: + * A i915_guc_client refers to a submission path through GuC. Currently, there + * is only one of these (the execbuf_client) and this one is charged with all + * submissions to the GuC. This struct is the owner of a doorbell, a process + * descriptor and a workqueue (all of them inside a single gem object that + * contains all required pages for these elements). * - * To simplify the implementation, we allocate one gem object that contains all - * pages for doorbell, process descriptor and workqueue. + * GuC context descriptor: + * During initialization, the driver allocates a static pool of 1024 such + * descriptors, and shares them with the GuC. + * Currently, there exists a 1:1 mapping between a i915_guc_client and a + * guc_context_desc (via the client's context_index), so effectively only + * one guc_context_desc gets used. This context descriptor lets the GuC know + * about the doorbell, workqueue and process descriptor. Theoretically, it also + * lets the GuC know about our HW contexts (Context ID, etc...), but we actually + * employ a kind of submission where the GuC uses the LRCA sent via the work + * item instead (the single guc_context_desc associated to execbuf client + * contains information about the default kernel context only, but this is + * essentially unused). This is called a "proxy" submission. * * The Scratch registers: * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes @@ -308,10 +317,17 @@ static void guc_ctx_desc_init(struct intel_guc *guc, if (!ce->state) break; /* XXX: continue? */ + /* + * XXX: When this is a GUC_CTX_DESC_ATTR_KERNEL client (proxy + * submission or, in other words, not using a direct submission + * model) the KMD's LRCA is not used for any work submission. + * Instead, the GuC uses the LRCA of the user mode context (see + * guc_wq_item_append below). + */ lrc->context_desc = lower_32_bits(ce->lrc_desc); /* The state page is after PPHWSP */ - lrc->ring_lcra = + lrc->ring_lrca = guc_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE; lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) | (guc_engine_id << GUC_ELC_ENGINE_OFFSET); @@ -341,10 +357,6 @@ static void guc_ctx_desc_init(struct intel_guc *guc, desc->wq_addr = gfx_addr + client->wq_offset; desc->wq_size = client->wq_size; - /* - * XXX: Take LRCs from an existing context if this is not an - * IsKMDCreatedContext client - */ desc->desc_private = (uintptr_t)client; } @@ -468,7 +480,7 @@ static void guc_wq_item_append(struct i915_guc_client *client, /* The GuC wants only the low-order word of the context descriptor */ wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, engine); - wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT; + wqi->submit_element_info = tail << WQ_RING_TAIL_SHIFT; wqi->fence_id = rq->global_seqno; } diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h index 3ae8cefa47b3..4edf40fe361f 100644 --- a/drivers/gpu/drm/i915/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/intel_guc_fwif.h @@ -251,7 +251,7 @@ union guc_doorbell_qw { struct guc_wq_item { u32 header; u32 context_desc; - u32 ring_tail; + u32 submit_element_info; u32 fence_id; } __packed; @@ -278,7 +278,7 @@ struct guc_execlist_context { u32 context_desc; u32 context_id; u32 ring_status; - u32 ring_lcra; + u32 ring_lrca; u32 ring_begin; u32 ring_end; u32 ring_next_free_location; diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index 3ec54a141d2e..65d0b5918661 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -34,7 +34,9 @@ struct drm_i915_gem_request; /* * This structure primarily describes the GEM object shared with the GuC. - * The GEM object is held for the entire lifetime of our interaction with + * The specs sometimes refer to this object as a "GuC context", but we use + * the term "client" to avoid confusion with hardware contexts. This + * GEM object is held for the entire lifetime of our interaction with * the GuC, being allocated before the GuC is loaded with its firmware. * Because there's no way to update the address used by the GuC after * initialisation, the shared object must stay pinned into the GGTT as @@ -44,7 +46,7 @@ struct drm_i915_gem_request; * * The single GEM object described here is actually made up of several * separate areas, as far as the GuC is concerned. The first page (kept - * kmap'd) includes the "process decriptor" which holds sequence data for + * kmap'd) includes the "process descriptor" which holds sequence data for * the doorbell, and one cacheline which actually *is* the doorbell; a * write to this will "ring the doorbell" (i.e. send an interrupt to the * GuC). The subsequent pages of the client object constitute the work -- cgit v1.2.3 From ed2ec71f9f7c0e1682c0821e4fa2452430f70e5e Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Wed, 22 Mar 2017 10:39:51 -0700 Subject: drm/i915/guc: Wait for doorbell to be inactive before deallocating Doorbell release flow requires that we wait for GEN8_DRB_VALID bit to go to zero after updating db_status before we call the GuC to release the doorbell. Kudos to Daniele for finding this out. v2: WARN instead of DRM_ERROR (Joonas) Signed-off-by: Oscar Mateo Reviewed-by: Daniele Ceraolo Spurio Reviewed-by: Joonas Lahtinen Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_guc_submission.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index e98cb743090f..60add049b531 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -206,12 +206,22 @@ static int __create_doorbell(struct i915_guc_client *client) static int __destroy_doorbell(struct i915_guc_client *client) { + struct drm_i915_private *dev_priv = guc_to_i915(client->guc); struct guc_doorbell_info *doorbell; + u16 db_id = client->doorbell_id; + + GEM_BUG_ON(db_id >= GUC_DOORBELL_INVALID); doorbell = __get_doorbell(client); doorbell->db_status = GUC_DOORBELL_DISABLED; doorbell->cookie = 0; + /* Doorbell release flow requires that we wait for GEN8_DRB_VALID bit + * to go to zero after updating db_status before we call the GuC to + * release the doorbell */ + if (wait_for_us(!(I915_READ(GEN8_DRBREGL(db_id)) & GEN8_DRB_VALID), 10)) + WARN_ONCE(true, "Doorbell never became invalid after disable\n"); + return __guc_deallocate_doorbell(client->guc, client->ctx_index); } -- cgit v1.2.3 From 397fce887ad033b10ec8a3270dfe38388f28db01 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Wed, 22 Mar 2017 10:39:52 -0700 Subject: drm/i915/guc: A little bit more of doorbell sanitization Some recent refactoring patches have left the doorbell creation outside the GuC client allocation, which does not make a lot of sense (a client without a doorbell is something useless). Move it back there, and refactor the init_doorbell_hw consequently. Thanks to this, we can do some other improvements, like hoisting the check for GuC submission enabled out of the enable function. v2: Rebased. Signed-off-by: Oscar Mateo Cc: Michal Wajdeczko Cc: Arkadiusz Hiler Cc: Tvrtko Ursulin Cc: Chris Wilson Cc: Daniele Ceraolo Spurio Reviewed-by: Joonas Lahtinen Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_guc_submission.c | 231 ++++++++++++++++------------- drivers/gpu/drm/i915/intel_uc.c | 21 +-- 2 files changed, 138 insertions(+), 114 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 60add049b531..e2ad365679c3 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -163,15 +163,13 @@ static struct guc_context_desc *__get_context_desc(struct i915_guc_client *clien * client object which contains the page being used for the doorbell */ -static int __update_doorbell_desc(struct i915_guc_client *client, u16 new_id) +static void __update_doorbell_desc(struct i915_guc_client *client, u16 new_id) { struct guc_context_desc *desc; /* Update the GuC's idea of the doorbell ID */ desc = __get_context_desc(client); desc->db_id = new_id; - - return 0; } static struct guc_doorbell_info *__get_doorbell(struct i915_guc_client *client) @@ -225,6 +223,28 @@ static int __destroy_doorbell(struct i915_guc_client *client) return __guc_deallocate_doorbell(client->guc, client->ctx_index); } +static int create_doorbell(struct i915_guc_client *client) +{ + int ret; + + ret = __reserve_doorbell(client); + if (ret) + return ret; + + __update_doorbell_desc(client, client->doorbell_id); + + ret = __create_doorbell(client); + if (ret) + goto err; + + return 0; + +err: + __update_doorbell_desc(client, GUC_DOORBELL_INVALID); + __unreserve_doorbell(client); + return ret; +} + static int destroy_doorbell(struct i915_guc_client *client) { int err; @@ -494,6 +514,17 @@ static void guc_wq_item_append(struct i915_guc_client *client, wqi->fence_id = rq->global_seqno; } +static void guc_reset_wq(struct i915_guc_client *client) +{ + struct guc_process_desc *desc = client->vaddr + + client->proc_desc_offset; + + desc->head = 0; + desc->tail = 0; + + client->wq_tail = 0; +} + static int guc_ring_doorbell(struct i915_guc_client *client) { struct guc_process_desc *desc; @@ -748,19 +779,6 @@ err: return vma; } -static void guc_client_free(struct i915_guc_client *client) -{ - /* - * XXX: wait for any outstanding submissions before freeing memory. - * Be sure to drop any locks - */ - guc_ctx_desc_fini(client->guc, client); - i915_gem_object_unpin_map(client->vma->obj); - i915_vma_unpin_and_release(&client->vma); - ida_simple_remove(&client->guc->ctx_ids, client->ctx_index); - kfree(client); -} - /* Check that a doorbell register is in the expected state */ static bool doorbell_ok(struct intel_guc *guc, u16 db_id) { @@ -791,9 +809,8 @@ static int __reset_doorbell(struct i915_guc_client* client, u16 db_id) { int err; - err = __update_doorbell_desc(client, db_id); - if (!err) - err = __create_doorbell(client); + __update_doorbell_desc(client, db_id); + err = __create_doorbell(client); if (!err) err = __destroy_doorbell(client); @@ -801,48 +818,61 @@ static int __reset_doorbell(struct i915_guc_client* client, u16 db_id) } /* - * Borrow the first client to set up & tear down each unused doorbell - * in turn, to ensure that all doorbell h/w is (re)initialised. + * Set up & tear down each unused doorbell in turn, to ensure that all doorbell + * HW is (re)initialised. For that end, we might have to borrow the first + * client. Also, tell GuC about all the doorbells in use by all clients. + * We do this because the KMD, the GuC and the doorbell HW can easily go out of + * sync (e.g. we can reset the GuC, but not the doorbel HW). */ static int guc_init_doorbell_hw(struct intel_guc *guc) { struct i915_guc_client *client = guc->execbuf_client; - int err; - int i; - - if (has_doorbell(client)) - destroy_doorbell(client); + bool recreate_first_client = false; + u16 db_id; + int ret; - for (i = 0; i < GUC_NUM_DOORBELLS; ++i) { - if (doorbell_ok(guc, i)) + /* For unused doorbells, make sure they are disabled */ + for_each_clear_bit(db_id, guc->doorbell_bitmap, GUC_NUM_DOORBELLS) { + if (doorbell_ok(guc, db_id)) continue; - err = __reset_doorbell(client, i); - WARN(err, "Doorbell %d reset failed, err %d\n", i, err); + if (has_doorbell(client)) { + /* Borrow execbuf_client (we will recreate it later) */ + destroy_doorbell(client); + recreate_first_client = true; + } + + ret = __reset_doorbell(client, db_id); + WARN(ret, "Doorbell %u reset failed, err %d\n", db_id, ret); } - /* Read back & verify all doorbell registers */ - for (i = 0; i < GUC_NUM_DOORBELLS; ++i) - WARN_ON(!doorbell_ok(guc, i)); + if (recreate_first_client) { + ret = __reserve_doorbell(client); + if (unlikely(ret)) { + DRM_ERROR("Couldn't re-reserve first client db: %d\n", ret); + return ret; + } - err = __reserve_doorbell(client); - if (err) - return err; + __update_doorbell_desc(client, client->doorbell_id); + } - err = __update_doorbell_desc(client, client->doorbell_id); - if (err) - goto err_reserve; + /* Now for every client (and not only execbuf_client) make sure their + * doorbells are known by the GuC */ + //for (client = client_list; client != NULL; client = client->next) + { + ret = __create_doorbell(client); + if (ret) { + DRM_ERROR("Couldn't recreate client %u doorbell: %d\n", + client->ctx_index, ret); + return ret; + } + } - err = __create_doorbell(client); - if (err) - goto err_update; + /* Read back & verify all (used & unused) doorbell registers */ + for (db_id = 0; db_id < GUC_NUM_DOORBELLS; ++db_id) + WARN_ON(!doorbell_ok(guc, db_id)); return 0; -err_reserve: - __unreserve_doorbell(client); -err_update: - __update_doorbell_desc(client, GUC_DOORBELL_INVALID); - return err; } /** @@ -922,11 +952,9 @@ guc_client_alloc(struct drm_i915_private *dev_priv, guc_proc_desc_init(guc, client); guc_ctx_desc_init(guc, client); - /* FIXME: Runtime client allocation (which currently we don't do) will - * require that the doorbell gets created now. The static execbuf_client - * is now getting its doorbell later (on submission enable) but maybe we - * also want to reorder things in the future so that we don't have to - * special case the doorbell creation */ + ret = create_doorbell(client); + if (ret) + goto err_vaddr; DRM_DEBUG_DRIVER("new priority %u client %p for engine(s) 0x%x: ctx_index %u\n", priority, client, client->engines, client->ctx_index); @@ -934,6 +962,9 @@ guc_client_alloc(struct drm_i915_private *dev_priv, client->doorbell_id, client->doorbell_offset); return client; + +err_vaddr: + i915_gem_object_unpin_map(client->vma->obj); err_vma: i915_vma_unpin_and_release(&client->vma); err_id: @@ -943,6 +974,24 @@ err_client: return ERR_PTR(ret); } +static void guc_client_free(struct i915_guc_client *client) +{ + /* + * XXX: wait for any outstanding submissions before freeing memory. + * Be sure to drop any locks + */ + + /* FIXME: in many cases, by the time we get here the GuC has been + * reset, so we cannot destroy the doorbell properly. Ignore the + * error message for now */ + destroy_doorbell(client); + guc_ctx_desc_fini(client->guc, client); + i915_gem_object_unpin_map(client->vma->obj); + i915_vma_unpin_and_release(&client->vma); + ida_simple_remove(&client->guc->ctx_ids, client->ctx_index); + kfree(client); +} + static void guc_policies_init(struct guc_policies *policies) { struct guc_policy *policy; @@ -1034,8 +1083,8 @@ static void guc_ads_destroy(struct intel_guc *guc) } /* - * Set up the memory resources to be shared with the GuC. At this point, - * we require just one object that can be mapped through the GGTT. + * Set up the memory resources to be shared with the GuC (via the GGTT) + * at firmware loading time. */ int i915_guc_submission_init(struct drm_i915_private *dev_priv) { @@ -1047,16 +1096,6 @@ int i915_guc_submission_init(struct drm_i915_private *dev_priv) void *vaddr; int ret; - if (!HAS_GUC_SCHED(dev_priv)) - return 0; - - /* Wipe bitmap & delete client in case of reinitialisation */ - bitmap_clear(guc->doorbell_bitmap, 0, GUC_NUM_DOORBELLS); - i915_guc_submission_disable(dev_priv); - - if (!i915.enable_guc_submission) - return 0; - if (guc->ctx_pool) return 0; @@ -1084,20 +1123,8 @@ int i915_guc_submission_init(struct drm_i915_private *dev_priv) ida_init(&guc->ctx_ids); - guc->execbuf_client = guc_client_alloc(dev_priv, - INTEL_INFO(dev_priv)->ring_mask, - GUC_CTX_PRIORITY_KMD_NORMAL, - dev_priv->kernel_context); - if (IS_ERR(guc->execbuf_client)) { - DRM_ERROR("Failed to create GuC client for execbuf!\n"); - ret = PTR_ERR(guc->execbuf_client); - goto err_ads; - } - return 0; -err_ads: - guc_ads_destroy(guc); err_log: intel_guc_log_destroy(guc); err_vaddr: @@ -1111,11 +1138,6 @@ void i915_guc_submission_fini(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; - if (!i915.enable_guc_submission) - return 0; - - guc_client_free(guc->execbuf_client); - guc->execbuf_client = NULL; ida_destroy(&guc->ctx_ids); guc_ads_destroy(guc); intel_guc_log_destroy(guc); @@ -1123,17 +1145,6 @@ void i915_guc_submission_fini(struct drm_i915_private *dev_priv) i915_vma_unpin_and_release(&guc->ctx_pool); } -static void guc_reset_wq(struct i915_guc_client *client) -{ - struct guc_process_desc *desc = client->vaddr + - client->proc_desc_offset; - - desc->head = 0; - desc->tail = 0; - - client->wq_tail = 0; -} - static void guc_interrupts_capture(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; @@ -1184,17 +1195,28 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) enum intel_engine_id id; int err; - if (!client) - return -ENODEV; + if (!client) { + client = guc_client_alloc(dev_priv, + INTEL_INFO(dev_priv)->ring_mask, + GUC_CTX_PRIORITY_KMD_NORMAL, + dev_priv->kernel_context); + if (IS_ERR(client)) { + DRM_ERROR("Failed to create GuC client for execbuf!\n"); + return PTR_ERR(client); + } + + guc->execbuf_client = client; + } err = intel_guc_sample_forcewake(guc); if (err) - return err; + goto err_execbuf_client; guc_reset_wq(client); + err = guc_init_doorbell_hw(guc); if (err) - return err; + goto err_execbuf_client; /* Take over from manual control of ELSP (execlists) */ guc_interrupts_capture(dev_priv); @@ -1221,6 +1243,11 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) } return 0; + +err_execbuf_client: + guc_client_free(guc->execbuf_client); + guc->execbuf_client = NULL; + return err; } static void guc_interrupts_release(struct drm_i915_private *dev_priv) @@ -1253,16 +1280,11 @@ void i915_guc_submission_disable(struct drm_i915_private *dev_priv) guc_interrupts_release(dev_priv); - if (!guc->execbuf_client) - return; - - /* FIXME: in many cases, by the time we get here the GuC has been - * reset, so we cannot destroy the doorbell properly. Ignore the - * error message for now */ - destroy_doorbell(guc->execbuf_client); - /* Revert back to manual ELSP submission */ intel_engines_reset_default_submission(dev_priv); + + guc_client_free(guc->execbuf_client); + guc->execbuf_client = NULL; } /** @@ -1291,7 +1313,6 @@ int intel_guc_suspend(struct drm_i915_private *dev_priv) return intel_guc_send(guc, data, ARRAY_SIZE(data)); } - /** * intel_guc_resume() - notify GuC resuming from suspend state * @dev_priv: i915 device private diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 0b8b2505a44a..4a872cdf57e8 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -126,13 +126,15 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) /* We need to notify the guc whenever we change the GGTT */ i915_ggtt_enable_guc(dev_priv); - /* - * This is stuff we need to have available at fw load time - * if we are planning to enable submission later - */ - ret = i915_guc_submission_init(dev_priv); - if (ret) - goto err_guc; + if (i915.enable_guc_submission) { + /* + * This is stuff we need to have available at fw load time + * if we are planning to enable submission later + */ + ret = i915_guc_submission_init(dev_priv); + if (ret) + goto err_guc; + } /* WaEnableuKernelHeaderValidFix:skl */ /* WaEnableGuCBootHashCheckNotSet:skl,bxt,kbl */ @@ -187,7 +189,8 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) err_interrupts: gen9_disable_guc_interrupts(dev_priv); err_submission: - i915_guc_submission_fini(dev_priv); + if (i915.enable_guc_submission) + i915_guc_submission_fini(dev_priv); err_guc: i915_ggtt_disable_guc(dev_priv); @@ -210,8 +213,8 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv) if (i915.enable_guc_submission) { i915_guc_submission_disable(dev_priv); gen9_disable_guc_interrupts(dev_priv); + i915_guc_submission_fini(dev_priv); } - i915_guc_submission_fini(dev_priv); i915_ggtt_disable_guc(dev_priv); } -- cgit v1.2.3 From b09935a60d55265027db9f2b0177aa24317b714c Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Wed, 22 Mar 2017 10:39:53 -0700 Subject: drm/i915/guc: Refactor the concept "GuC context descriptor" into "GuC stage descriptor" A GuC context and a HW context are in no way related, so the name "GuC context descriptor" is very unfortunate, because a new reader of the code gets overwhelmed very quickly with a lot of things called "context" that refer to different things. We can improve legibility a lot by simply renaming a few objects in the GuC code. v2: - Rebased - s/ctx_desc_pool/stage_desc_pool - Move some explanations to the definition of the guc_stage_desc struct (Chris) v3: - Calculate gemsize with less intermediate steps (Joonas) - Use BIT() macro (Joonas) Signed-off-by: Oscar Mateo Cc: Chris Wilson Cc: Daniele Ceraolo Spurio Reviewed-by: Joonas Lahtinen Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_debugfs.c | 4 +- drivers/gpu/drm/i915/i915_guc_submission.c | 118 +++++++++++++++-------------- drivers/gpu/drm/i915/intel_guc_fwif.h | 48 +++++++----- drivers/gpu/drm/i915/intel_guc_loader.c | 4 +- drivers/gpu/drm/i915/intel_uc.h | 8 +- 5 files changed, 96 insertions(+), 86 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index e2390df22e08..3ebdbc798f33 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2471,8 +2471,8 @@ static void i915_guc_client_info(struct seq_file *m, enum intel_engine_id id; uint64_t tot = 0; - seq_printf(m, "\tPriority %d, GuC ctx index: %u, PD offset 0x%x\n", - client->priority, client->ctx_index, client->proc_desc_offset); + seq_printf(m, "\tPriority %d, GuC stage index: %u, PD offset 0x%x\n", + client->priority, client->stage_id, client->proc_desc_offset); seq_printf(m, "\tDoorbell id %d, offset: 0x%lx, cookie 0x%x\n", client->doorbell_id, client->doorbell_offset, client->doorbell_cookie); seq_printf(m, "\tWQ size %d, offset: 0x%x, tail %d\n", diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index e2ad365679c3..41ec8af526a5 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -37,16 +37,16 @@ * descriptor and a workqueue (all of them inside a single gem object that * contains all required pages for these elements). * - * GuC context descriptor: + * GuC stage descriptor: * During initialization, the driver allocates a static pool of 1024 such * descriptors, and shares them with the GuC. * Currently, there exists a 1:1 mapping between a i915_guc_client and a - * guc_context_desc (via the client's context_index), so effectively only - * one guc_context_desc gets used. This context descriptor lets the GuC know - * about the doorbell, workqueue and process descriptor. Theoretically, it also - * lets the GuC know about our HW contexts (Context ID, etc...), but we actually + * guc_stage_desc (via the client's stage_id), so effectively only one + * gets used. This stage descriptor lets the GuC know about the doorbell, + * workqueue and process descriptor. Theoretically, it also lets the GuC + * know about our HW contexts (context ID, etc...), but we actually * employ a kind of submission where the GuC uses the LRCA sent via the work - * item instead (the single guc_context_desc associated to execbuf client + * item instead (the single guc_stage_desc associated to execbuf client * contains information about the default kernel context only, but this is * essentially unused). This is called a "proxy" submission. * @@ -82,7 +82,7 @@ static inline bool is_high_priority(struct i915_guc_client* client) { - return client->priority <= GUC_CTX_PRIORITY_HIGH; + return client->priority <= GUC_CLIENT_PRIORITY_HIGH; } static int __reserve_doorbell(struct i915_guc_client *client) @@ -112,7 +112,7 @@ static int __reserve_doorbell(struct i915_guc_client *client) __set_bit(id, client->guc->doorbell_bitmap); client->doorbell_id = id; DRM_DEBUG_DRIVER("client %u (high prio=%s) reserved doorbell: %d\n", - client->ctx_index, yesno(is_high_priority(client)), + client->stage_id, yesno(is_high_priority(client)), id); return 0; } @@ -129,31 +129,31 @@ static void __unreserve_doorbell(struct i915_guc_client *client) * Tell the GuC to allocate or deallocate a specific doorbell */ -static int __guc_allocate_doorbell(struct intel_guc *guc, u32 ctx_index) +static int __guc_allocate_doorbell(struct intel_guc *guc, u32 stage_id) { u32 action[] = { INTEL_GUC_ACTION_ALLOCATE_DOORBELL, - ctx_index + stage_id }; return intel_guc_send(guc, action, ARRAY_SIZE(action)); } -static int __guc_deallocate_doorbell(struct intel_guc *guc, u32 ctx_index) +static int __guc_deallocate_doorbell(struct intel_guc *guc, u32 stage_id) { u32 action[] = { INTEL_GUC_ACTION_DEALLOCATE_DOORBELL, - ctx_index + stage_id }; return intel_guc_send(guc, action, ARRAY_SIZE(action)); } -static struct guc_context_desc *__get_context_desc(struct i915_guc_client *client) +static struct guc_stage_desc *__get_stage_desc(struct i915_guc_client *client) { - struct guc_context_desc *base = client->guc->ctx_pool_vaddr; + struct guc_stage_desc *base = client->guc->stage_desc_pool_vaddr; - return &base[client->ctx_index]; + return &base[client->stage_id]; } /* @@ -165,10 +165,10 @@ static struct guc_context_desc *__get_context_desc(struct i915_guc_client *clien static void __update_doorbell_desc(struct i915_guc_client *client, u16 new_id) { - struct guc_context_desc *desc; + struct guc_stage_desc *desc; /* Update the GuC's idea of the doorbell ID */ - desc = __get_context_desc(client); + desc = __get_stage_desc(client); desc->db_id = new_id; } @@ -194,7 +194,7 @@ static int __create_doorbell(struct i915_guc_client *client) doorbell->db_status = GUC_DOORBELL_ENABLED; doorbell->cookie = client->doorbell_cookie; - err = __guc_allocate_doorbell(client->guc, client->ctx_index); + err = __guc_allocate_doorbell(client->guc, client->stage_id); if (err) { doorbell->db_status = GUC_DOORBELL_DISABLED; doorbell->cookie = 0; @@ -220,7 +220,7 @@ static int __destroy_doorbell(struct i915_guc_client *client) if (wait_for_us(!(I915_READ(GEN8_DRBREGL(db_id)) & GEN8_DRB_VALID), 10)) WARN_ONCE(true, "Doorbell never became invalid after disable\n"); - return __guc_deallocate_doorbell(client->guc, client->ctx_index); + return __guc_deallocate_doorbell(client->guc, client->stage_id); } static int create_doorbell(struct i915_guc_client *client) @@ -301,34 +301,34 @@ static void guc_proc_desc_init(struct intel_guc *guc, desc->wq_base_addr = 0; desc->db_base_addr = 0; - desc->context_id = client->ctx_index; + desc->stage_id = client->stage_id; desc->wq_size_bytes = client->wq_size; desc->wq_status = WQ_STATUS_ACTIVE; desc->priority = client->priority; } /* - * Initialise/clear the context descriptor shared with the GuC firmware. + * Initialise/clear the stage descriptor shared with the GuC firmware. * * This descriptor tells the GuC where (in GGTT space) to find the important * data structures relating to this client (doorbell, process descriptor, * write queue, etc). */ -static void guc_ctx_desc_init(struct intel_guc *guc, - struct i915_guc_client *client) +static void guc_stage_desc_init(struct intel_guc *guc, + struct i915_guc_client *client) { struct drm_i915_private *dev_priv = guc_to_i915(guc); struct intel_engine_cs *engine; struct i915_gem_context *ctx = client->owner; - struct guc_context_desc *desc; + struct guc_stage_desc *desc; unsigned int tmp; u32 gfx_addr; - desc = __get_context_desc(client); + desc = __get_stage_desc(client); memset(desc, 0, sizeof(*desc)); - desc->attribute = GUC_CTX_DESC_ATTR_ACTIVE | GUC_CTX_DESC_ATTR_KERNEL; - desc->context_id = client->ctx_index; + desc->attribute = GUC_STAGE_DESC_ATTR_ACTIVE | GUC_STAGE_DESC_ATTR_KERNEL; + desc->stage_id = client->stage_id; desc->priority = client->priority; desc->db_id = client->doorbell_id; @@ -348,7 +348,7 @@ static void guc_ctx_desc_init(struct intel_guc *guc, break; /* XXX: continue? */ /* - * XXX: When this is a GUC_CTX_DESC_ATTR_KERNEL client (proxy + * XXX: When this is a GUC_STAGE_DESC_ATTR_KERNEL client (proxy * submission or, in other words, not using a direct submission * model) the KMD's LRCA is not used for any work submission. * Instead, the GuC uses the LRCA of the user mode context (see @@ -359,7 +359,10 @@ static void guc_ctx_desc_init(struct intel_guc *guc, /* The state page is after PPHWSP */ lrc->ring_lrca = guc_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE; - lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) | + + /* XXX: In direct submission, the GuC wants the HW context id + * here. In proxy submission, it wants the stage id */ + lrc->context_id = (client->stage_id << GUC_ELC_CTXID_OFFSET) | (guc_engine_id << GUC_ELC_ENGINE_OFFSET); lrc->ring_begin = guc_ggtt_offset(ce->ring->vma); @@ -390,12 +393,12 @@ static void guc_ctx_desc_init(struct intel_guc *guc, desc->desc_private = (uintptr_t)client; } -static void guc_ctx_desc_fini(struct intel_guc *guc, - struct i915_guc_client *client) +static void guc_stage_desc_fini(struct intel_guc *guc, + struct i915_guc_client *client) { - struct guc_context_desc *desc; + struct guc_stage_desc *desc; - desc = __get_context_desc(client); + desc = __get_stage_desc(client); memset(desc, 0, sizeof(*desc)); } @@ -863,7 +866,7 @@ static int guc_init_doorbell_hw(struct intel_guc *guc) ret = __create_doorbell(client); if (ret) { DRM_ERROR("Couldn't recreate client %u doorbell: %d\n", - client->ctx_index, ret); + client->stage_id, ret); return ret; } } @@ -913,12 +916,12 @@ guc_client_alloc(struct drm_i915_private *dev_priv, client->wq_size = GUC_WQ_SIZE; spin_lock_init(&client->wq_lock); - ret = ida_simple_get(&guc->ctx_ids, 0, GUC_MAX_GPU_CONTEXTS, + ret = ida_simple_get(&guc->stage_ids, 0, GUC_MAX_STAGE_DESCRIPTORS, GFP_KERNEL); if (ret < 0) goto err_client; - client->ctx_index = ret; + client->stage_id = ret; /* The first page is doorbell/proc_desc. Two followed pages are wq. */ vma = intel_guc_allocate_vma(guc, GUC_DB_SIZE + GUC_WQ_SIZE); @@ -950,14 +953,14 @@ guc_client_alloc(struct drm_i915_private *dev_priv, client->proc_desc_offset = (GUC_DB_SIZE / 2); guc_proc_desc_init(guc, client); - guc_ctx_desc_init(guc, client); + guc_stage_desc_init(guc, client); ret = create_doorbell(client); if (ret) goto err_vaddr; - DRM_DEBUG_DRIVER("new priority %u client %p for engine(s) 0x%x: ctx_index %u\n", - priority, client, client->engines, client->ctx_index); + DRM_DEBUG_DRIVER("new priority %u client %p for engine(s) 0x%x: stage_id %u\n", + priority, client, client->engines, client->stage_id); DRM_DEBUG_DRIVER("doorbell id %u, cacheline offset 0x%lx\n", client->doorbell_id, client->doorbell_offset); @@ -968,7 +971,7 @@ err_vaddr: err_vma: i915_vma_unpin_and_release(&client->vma); err_id: - ida_simple_remove(&guc->ctx_ids, client->ctx_index); + ida_simple_remove(&guc->stage_ids, client->stage_id); err_client: kfree(client); return ERR_PTR(ret); @@ -985,10 +988,10 @@ static void guc_client_free(struct i915_guc_client *client) * reset, so we cannot destroy the doorbell properly. Ignore the * error message for now */ destroy_doorbell(client); - guc_ctx_desc_fini(client->guc, client); + guc_stage_desc_fini(client->guc, client); i915_gem_object_unpin_map(client->vma->obj); i915_vma_unpin_and_release(&client->vma); - ida_simple_remove(&client->guc->ctx_ids, client->ctx_index); + ida_simple_remove(&client->guc->stage_ids, client->stage_id); kfree(client); } @@ -1000,7 +1003,7 @@ static void guc_policies_init(struct guc_policies *policies) policies->dpc_promote_time = 500000; policies->max_num_work_items = POLICY_MAX_NUM_WI; - for (p = 0; p < GUC_CTX_PRIORITY_NUM; p++) { + for (p = 0; p < GUC_CLIENT_PRIORITY_NUM; p++) { for (i = GUC_RENDER_ENGINE; i < GUC_MAX_ENGINES_NUM; i++) { policy = &policies->policy[p][i]; @@ -1088,30 +1091,29 @@ static void guc_ads_destroy(struct intel_guc *guc) */ int i915_guc_submission_init(struct drm_i915_private *dev_priv) { - const size_t ctxsize = sizeof(struct guc_context_desc); - const size_t poolsize = GUC_MAX_GPU_CONTEXTS * ctxsize; - const size_t gemsize = round_up(poolsize, PAGE_SIZE); struct intel_guc *guc = &dev_priv->guc; struct i915_vma *vma; void *vaddr; int ret; - if (guc->ctx_pool) + if (guc->stage_desc_pool) return 0; - vma = intel_guc_allocate_vma(guc, gemsize); + vma = intel_guc_allocate_vma(guc, + PAGE_ALIGN(sizeof(struct guc_stage_desc) * + GUC_MAX_STAGE_DESCRIPTORS)); if (IS_ERR(vma)) return PTR_ERR(vma); - guc->ctx_pool = vma; + guc->stage_desc_pool = vma; - vaddr = i915_gem_object_pin_map(guc->ctx_pool->obj, I915_MAP_WB); + vaddr = i915_gem_object_pin_map(guc->stage_desc_pool->obj, I915_MAP_WB); if (IS_ERR(vaddr)) { ret = PTR_ERR(vaddr); goto err_vma; } - guc->ctx_pool_vaddr = vaddr; + guc->stage_desc_pool_vaddr = vaddr; ret = intel_guc_log_create(guc); if (ret < 0) @@ -1121,16 +1123,16 @@ int i915_guc_submission_init(struct drm_i915_private *dev_priv) if (ret < 0) goto err_log; - ida_init(&guc->ctx_ids); + ida_init(&guc->stage_ids); return 0; err_log: intel_guc_log_destroy(guc); err_vaddr: - i915_gem_object_unpin_map(guc->ctx_pool->obj); + i915_gem_object_unpin_map(guc->stage_desc_pool->obj); err_vma: - i915_vma_unpin_and_release(&guc->ctx_pool); + i915_vma_unpin_and_release(&guc->stage_desc_pool); return ret; } @@ -1138,11 +1140,11 @@ void i915_guc_submission_fini(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; - ida_destroy(&guc->ctx_ids); + ida_destroy(&guc->stage_ids); guc_ads_destroy(guc); intel_guc_log_destroy(guc); - i915_gem_object_unpin_map(guc->ctx_pool->obj); - i915_vma_unpin_and_release(&guc->ctx_pool); + i915_gem_object_unpin_map(guc->stage_desc_pool->obj); + i915_vma_unpin_and_release(&guc->stage_desc_pool); } static void guc_interrupts_capture(struct drm_i915_private *dev_priv) @@ -1198,7 +1200,7 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) if (!client) { client = guc_client_alloc(dev_priv, INTEL_INFO(dev_priv)->ring_mask, - GUC_CTX_PRIORITY_KMD_NORMAL, + GUC_CLIENT_PRIORITY_KMD_NORMAL, dev_priv->kernel_context); if (IS_ERR(client)) { DRM_ERROR("Failed to create GuC client for execbuf!\n"); diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h index 4edf40fe361f..18131b7289a5 100644 --- a/drivers/gpu/drm/i915/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/intel_guc_fwif.h @@ -26,14 +26,14 @@ #define GFXCORE_FAMILY_GEN9 12 #define GFXCORE_FAMILY_UNKNOWN 0x7fffffff -#define GUC_CTX_PRIORITY_KMD_HIGH 0 -#define GUC_CTX_PRIORITY_HIGH 1 -#define GUC_CTX_PRIORITY_KMD_NORMAL 2 -#define GUC_CTX_PRIORITY_NORMAL 3 -#define GUC_CTX_PRIORITY_NUM 4 +#define GUC_CLIENT_PRIORITY_KMD_HIGH 0 +#define GUC_CLIENT_PRIORITY_HIGH 1 +#define GUC_CLIENT_PRIORITY_KMD_NORMAL 2 +#define GUC_CLIENT_PRIORITY_NORMAL 3 +#define GUC_CLIENT_PRIORITY_NUM 4 -#define GUC_MAX_GPU_CONTEXTS 1024 -#define GUC_INVALID_CTX_ID GUC_MAX_GPU_CONTEXTS +#define GUC_MAX_STAGE_DESCRIPTORS 1024 +#define GUC_INVALID_STAGE_ID GUC_MAX_STAGE_DESCRIPTORS #define GUC_RENDER_ENGINE 0 #define GUC_VIDEO_ENGINE 1 @@ -68,14 +68,14 @@ #define GUC_DOORBELL_ENABLED 1 #define GUC_DOORBELL_DISABLED 0 -#define GUC_CTX_DESC_ATTR_ACTIVE (1 << 0) -#define GUC_CTX_DESC_ATTR_PENDING_DB (1 << 1) -#define GUC_CTX_DESC_ATTR_KERNEL (1 << 2) -#define GUC_CTX_DESC_ATTR_PREEMPT (1 << 3) -#define GUC_CTX_DESC_ATTR_RESET (1 << 4) -#define GUC_CTX_DESC_ATTR_WQLOCKED (1 << 5) -#define GUC_CTX_DESC_ATTR_PCH (1 << 6) -#define GUC_CTX_DESC_ATTR_TERMINATED (1 << 7) +#define GUC_STAGE_DESC_ATTR_ACTIVE BIT(0) +#define GUC_STAGE_DESC_ATTR_PENDING_DB BIT(1) +#define GUC_STAGE_DESC_ATTR_KERNEL BIT(2) +#define GUC_STAGE_DESC_ATTR_PREEMPT BIT(3) +#define GUC_STAGE_DESC_ATTR_RESET BIT(4) +#define GUC_STAGE_DESC_ATTR_WQLOCKED BIT(5) +#define GUC_STAGE_DESC_ATTR_PCH BIT(6) +#define GUC_STAGE_DESC_ATTR_TERMINATED BIT(7) /* The guc control data is 10 DWORDs */ #define GUC_CTL_CTXINFO 0 @@ -256,7 +256,7 @@ struct guc_wq_item { } __packed; struct guc_process_desc { - u32 context_id; + u32 stage_id; u64 db_base_addr; u32 head; u32 tail; @@ -289,10 +289,18 @@ struct guc_execlist_context { u16 engine_submit_queue_count; } __packed; -/*Context descriptor for communicating between uKernel and Driver*/ -struct guc_context_desc { +/* + * This structure describes a stage set arranged for a particular communication + * between uKernel (GuC) and Driver (KMD). Technically, this is known as a + * "GuC Context descriptor" in the specs, but we use the term "stage descriptor" + * to avoid confusion with all the other things already named "context" in the + * driver. A static pool of these descriptors are stored inside a GEM object + * (stage_desc_pool) which is held for the entire lifetime of our interaction + * with the GuC, being allocated before the GuC is loaded with its firmware. + */ +struct guc_stage_desc { u32 sched_common_area; - u32 context_id; + u32 stage_id; u32 pas_id; u8 engines_used; u64 db_trigger_cpu; @@ -359,7 +367,7 @@ struct guc_policy { } __packed; struct guc_policies { - struct guc_policy policy[GUC_CTX_PRIORITY_NUM][GUC_MAX_ENGINES_NUM]; + struct guc_policy policy[GUC_CLIENT_PRIORITY_NUM][GUC_MAX_ENGINES_NUM]; /* In micro seconds. How much time to allow before DPC processing is * called back via interrupt (to prevent DPC queue drain starving). diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 62ebf5605f6e..7d92321f8731 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -151,8 +151,8 @@ static void guc_params_init(struct drm_i915_private *dev_priv) /* If GuC submission is enabled, set up additional parameters here */ if (i915.enable_guc_submission) { u32 ads = guc_ggtt_offset(guc->ads_vma) >> PAGE_SHIFT; - u32 pgs = guc_ggtt_offset(dev_priv->guc.ctx_pool); - u32 ctx_in_16 = GUC_MAX_GPU_CONTEXTS / 16; + u32 pgs = guc_ggtt_offset(dev_priv->guc.stage_desc_pool); + u32 ctx_in_16 = GUC_MAX_STAGE_DESCRIPTORS / 16; params[GUC_CTL_DEBUG] |= ads << GUC_ADS_ADDR_SHIFT; params[GUC_CTL_DEBUG] |= GUC_ADS_ENABLED; diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index 65d0b5918661..6cf2d14fa0dc 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -74,7 +74,7 @@ struct i915_guc_client { uint32_t engines; /* bitmap of (host) engine ids */ uint32_t priority; - u32 ctx_index; + u32 stage_id; uint32_t proc_desc_offset; u16 doorbell_id; @@ -157,9 +157,9 @@ struct intel_guc { bool interrupts_enabled; struct i915_vma *ads_vma; - struct i915_vma *ctx_pool; - void *ctx_pool_vaddr; - struct ida ctx_ids; + struct i915_vma *stage_desc_pool; + void *stage_desc_pool_vaddr; + struct ida stage_ids; struct i915_guc_client *execbuf_client; -- cgit v1.2.3 From 35815ea9cf2626c40e32743284380bc9fc3f8cad Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Wed, 22 Mar 2017 10:39:54 -0700 Subject: drm/i915/guc: Split out the mmio_white_list struct We are going to need it for future platforms. Signed-off-by: Oscar Mateo Cc: Joonas Lahtinen Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_guc_submission.c | 4 ++-- drivers/gpu/drm/i915/intel_guc_fwif.h | 15 ++++++++------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 41ec8af526a5..f9a8a4b6add6 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -1049,11 +1049,11 @@ static int guc_ads_create(struct intel_guc *guc) /* MMIO reg state */ for_each_engine(engine, dev_priv, id) { - blob->reg_state.mmio_white_list[engine->guc_id].mmio_start = + blob->reg_state.white_list[engine->guc_id].mmio_start = engine->mmio_base + GUC_MMIO_WHITE_LIST_START; /* Nothing to be saved or restored for now. */ - blob->reg_state.mmio_white_list[engine->guc_id].count = 0; + blob->reg_state.white_list[engine->guc_id].count = 0; } /* diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h index 18131b7289a5..cb36cbf3818f 100644 --- a/drivers/gpu/drm/i915/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/intel_guc_fwif.h @@ -409,16 +409,17 @@ struct guc_mmio_regset { u32 number_of_registers; } __packed; +/* MMIO registers that are set as non privileged */ +struct mmio_white_list { + u32 mmio_start; + u32 offsets[GUC_MMIO_WHITE_LIST_MAX]; + u32 count; +} __packed; + struct guc_mmio_reg_state { struct guc_mmio_regset global_reg; struct guc_mmio_regset engine_reg[GUC_MAX_ENGINES_NUM]; - - /* MMIO registers that are set as non privileged */ - struct __packed { - u32 mmio_start; - u32 offsets[GUC_MMIO_WHITE_LIST_MAX]; - u32 count; - } mmio_white_list[GUC_MAX_ENGINES_NUM]; + struct mmio_white_list white_list[GUC_MAX_ENGINES_NUM]; } __packed; /* GuC Additional Data Struct */ -- cgit v1.2.3 From 618ef005acf8bb5bd1574b9db12c7828e457776a Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Wed, 22 Mar 2017 10:39:55 -0700 Subject: drm/i915/guc: Move guc_interrupts_release next to guc_interrupts_capture They go better together. Signed-off-by: Oscar Mateo Cc: Joonas Lahtinen Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_guc_submission.c | 48 +++++++++++++++--------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index f9a8a4b6add6..20ab20b03ee8 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -1189,6 +1189,30 @@ static void guc_interrupts_capture(struct drm_i915_private *dev_priv) dev_priv->rps.pm_intrmsk_mbz &= ~GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; } +static void guc_interrupts_release(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + int irqs; + + /* + * tell all command streamers NOT to forward interrupts or vblank + * to GuC. + */ + irqs = _MASKED_FIELD(GFX_FORWARD_VBLANK_MASK, GFX_FORWARD_VBLANK_NEVER); + irqs |= _MASKED_BIT_DISABLE(GFX_INTERRUPT_STEERING); + for_each_engine(engine, dev_priv, id) + I915_WRITE(RING_MODE_GEN7(engine), irqs); + + /* route all GT interrupts to the host */ + I915_WRITE(GUC_BCS_RCS_IER, 0); + I915_WRITE(GUC_VCS2_VCS1_IER, 0); + I915_WRITE(GUC_WD_VECS_IER, 0); + + dev_priv->rps.pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; + dev_priv->rps.pm_intrmsk_mbz &= ~ARAT_EXPIRED_INTRMSK; +} + int i915_guc_submission_enable(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; @@ -1252,30 +1276,6 @@ err_execbuf_client: return err; } -static void guc_interrupts_release(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - int irqs; - - /* - * tell all command streamers NOT to forward interrupts or vblank - * to GuC. - */ - irqs = _MASKED_FIELD(GFX_FORWARD_VBLANK_MASK, GFX_FORWARD_VBLANK_NEVER); - irqs |= _MASKED_BIT_DISABLE(GFX_INTERRUPT_STEERING); - for_each_engine(engine, dev_priv, id) - I915_WRITE(RING_MODE_GEN7(engine), irqs); - - /* route all GT interrupts to the host */ - I915_WRITE(GUC_BCS_RCS_IER, 0); - I915_WRITE(GUC_VCS2_VCS1_IER, 0); - I915_WRITE(GUC_WD_VECS_IER, 0); - - dev_priv->rps.pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; - dev_priv->rps.pm_intrmsk_mbz &= ~ARAT_EXPIRED_INTRMSK; -} - void i915_guc_submission_disable(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; -- cgit v1.2.3 From 46f788ba2e09c9f48d50f47e27c8e2cfc7e1c076 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 17 Mar 2017 23:17:55 +0200 Subject: drm/i915: Extract skl_plane_ctl() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pull the code to calculate the SKL plane control register value into a separate function. Allows us to pre-compute it in the future. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170317211808.14693-2-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_display.c | 38 ++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 010e5ddb198a..6ddf5c90d15c 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3335,6 +3335,31 @@ u32 skl_plane_ctl_rotation(unsigned int rotation) return 0; } +static u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + struct drm_i915_private *dev_priv = + to_i915(plane_state->base.plane->dev); + const struct drm_framebuffer *fb = plane_state->base.fb; + unsigned int rotation = plane_state->base.rotation; + u32 plane_ctl; + + plane_ctl = PLANE_CTL_ENABLE; + + if (!IS_GEMINILAKE(dev_priv)) { + plane_ctl |= + PLANE_CTL_PIPE_GAMMA_ENABLE | + PLANE_CTL_PIPE_CSC_ENABLE | + PLANE_CTL_PLANE_GAMMA_DISABLE; + } + + plane_ctl |= skl_plane_ctl_format(fb->format->format); + plane_ctl |= skl_plane_ctl_tiling(fb->modifier); + plane_ctl |= skl_plane_ctl_rotation(rotation); + + return plane_ctl; +} + static void skylake_update_primary_plane(struct drm_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) @@ -3360,18 +3385,7 @@ static void skylake_update_primary_plane(struct drm_plane *plane, int dst_h = drm_rect_height(&plane_state->base.dst); unsigned long irqflags; - plane_ctl = PLANE_CTL_ENABLE; - - if (!IS_GEMINILAKE(dev_priv)) { - plane_ctl |= - PLANE_CTL_PIPE_GAMMA_ENABLE | - PLANE_CTL_PIPE_CSC_ENABLE | - PLANE_CTL_PLANE_GAMMA_DISABLE; - } - - plane_ctl |= skl_plane_ctl_format(fb->format->format); - plane_ctl |= skl_plane_ctl_tiling(fb->modifier); - plane_ctl |= skl_plane_ctl_rotation(rotation); + plane_ctl = skl_plane_ctl(crtc_state, plane_state); /* Sizes are 0 based */ src_w--; -- cgit v1.2.3 From 2e881264b47414e6798329783d9dd9b30d89b76d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 17 Mar 2017 23:17:56 +0200 Subject: drm/i915: Use skl_plane_ctl() for the SKL "sprite" planes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On SKL the planes are uniform so the "sprites" can use the primary plane code perfectly fine. The only difference we have is the color key handling, but since we never enable that for the primary plane the same code works just fine. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170317211808.14693-3-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_display.c | 16 +++++++++++----- drivers/gpu/drm/i915/intel_drv.h | 5 ++--- drivers/gpu/drm/i915/intel_sprite.c | 18 +----------------- 3 files changed, 14 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 6ddf5c90d15c..8a9f1bd21e0e 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3254,7 +3254,7 @@ u32 skl_plane_stride(const struct drm_framebuffer *fb, int plane, return stride; } -u32 skl_plane_ctl_format(uint32_t pixel_format) +static u32 skl_plane_ctl_format(uint32_t pixel_format) { switch (pixel_format) { case DRM_FORMAT_C8: @@ -3295,7 +3295,7 @@ u32 skl_plane_ctl_format(uint32_t pixel_format) return 0; } -u32 skl_plane_ctl_tiling(uint64_t fb_modifier) +static u32 skl_plane_ctl_tiling(uint64_t fb_modifier) { switch (fb_modifier) { case DRM_FORMAT_MOD_NONE: @@ -3313,7 +3313,7 @@ u32 skl_plane_ctl_tiling(uint64_t fb_modifier) return 0; } -u32 skl_plane_ctl_rotation(unsigned int rotation) +static u32 skl_plane_ctl_rotation(unsigned int rotation) { switch (rotation) { case DRM_ROTATE_0: @@ -3335,13 +3335,14 @@ u32 skl_plane_ctl_rotation(unsigned int rotation) return 0; } -static u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) +u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = to_i915(plane_state->base.plane->dev); const struct drm_framebuffer *fb = plane_state->base.fb; unsigned int rotation = plane_state->base.rotation; + const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; u32 plane_ctl; plane_ctl = PLANE_CTL_ENABLE; @@ -3357,6 +3358,11 @@ static u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, plane_ctl |= skl_plane_ctl_tiling(fb->modifier); plane_ctl |= skl_plane_ctl_rotation(rotation); + if (key->flags & I915_SET_COLORKEY_DESTINATION) + plane_ctl |= PLANE_CTL_KEY_ENABLE_DESTINATION; + else if (key->flags & I915_SET_COLORKEY_SOURCE) + plane_ctl |= PLANE_CTL_KEY_ENABLE_SOURCE; + return plane_ctl; } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 51228fe4283b..45e55b45e772 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1445,9 +1445,8 @@ static inline u32 intel_plane_ggtt_offset(const struct intel_plane_state *state) return i915_ggtt_offset(state->vma); } -u32 skl_plane_ctl_format(uint32_t pixel_format); -u32 skl_plane_ctl_tiling(uint64_t fb_modifier); -u32 skl_plane_ctl_rotation(unsigned int rotation); +u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state); u32 skl_plane_stride(const struct drm_framebuffer *fb, int plane, unsigned int rotation); int skl_check_plane_surface(struct intel_plane_state *plane_state); diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index b931d0bd7a64..455a9ed44204 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -232,23 +232,7 @@ skl_update_plane(struct drm_plane *drm_plane, uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; unsigned long irqflags; - plane_ctl = PLANE_CTL_ENABLE; - - if (!IS_GEMINILAKE(dev_priv)) { - plane_ctl |= - PLANE_CTL_PIPE_GAMMA_ENABLE | - PLANE_CTL_PIPE_CSC_ENABLE | - PLANE_CTL_PLANE_GAMMA_DISABLE; - } - - plane_ctl |= skl_plane_ctl_format(fb->format->format); - plane_ctl |= skl_plane_ctl_tiling(fb->modifier); - plane_ctl |= skl_plane_ctl_rotation(rotation); - - if (key->flags & I915_SET_COLORKEY_DESTINATION) - plane_ctl |= PLANE_CTL_KEY_ENABLE_DESTINATION; - else if (key->flags & I915_SET_COLORKEY_SOURCE) - plane_ctl |= PLANE_CTL_KEY_ENABLE_SOURCE; + plane_ctl = skl_plane_ctl(crtc_state, plane_state); /* Sizes are 0 based */ src_w--; -- cgit v1.2.3 From 96ef6854bb4c5ad23d81f69fd0a4aed238c40d34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 17 Mar 2017 23:17:58 +0200 Subject: drm/i915: Extract vlv_sprite_ctl() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pull the code to calculate the VLV/CHV sprite control register value into a separate function. Allows us to pre-compute it in the future. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170317211808.14693-5-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_sprite.c | 71 +++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 455a9ed44204..ed4db9276c59 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -345,32 +345,15 @@ chv_update_csc(struct intel_plane *intel_plane, uint32_t format) I915_WRITE_FW(SPCSCCROCLAMP(plane_id), SPCSC_OMAX(1023) | SPCSC_OMIN(0)); } -static void -vlv_update_plane(struct drm_plane *dplane, - const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) +static u32 vlv_sprite_ctl(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) { - struct drm_device *dev = dplane->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_plane *intel_plane = to_intel_plane(dplane); - struct drm_framebuffer *fb = plane_state->base.fb; - enum pipe pipe = intel_plane->pipe; - enum plane_id plane_id = intel_plane->id; - u32 sprctl; - u32 sprsurf_offset, linear_offset; + const struct drm_framebuffer *fb = plane_state->base.fb; unsigned int rotation = plane_state->base.rotation; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; - int crtc_x = plane_state->base.dst.x1; - int crtc_y = plane_state->base.dst.y1; - uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); - uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); - uint32_t x = plane_state->base.src.x1 >> 16; - uint32_t y = plane_state->base.src.y1 >> 16; - uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; - uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; - unsigned long irqflags; + u32 sprctl; - sprctl = SP_ENABLE; + sprctl = SP_ENABLE | SP_GAMMA_ENABLE; switch (fb->format->format) { case DRM_FORMAT_YUYV: @@ -407,20 +390,10 @@ vlv_update_plane(struct drm_plane *dplane, sprctl |= SP_FORMAT_RGBA8888; break; default: - /* - * If we get here one of the upper layers failed to filter - * out the unsupported plane formats - */ - BUG(); - break; + MISSING_CASE(fb->format->format); + return 0; } - /* - * Enable gamma to match primary/cursor plane behaviour. - * FIXME should be user controllable via propertiesa. - */ - sprctl |= SP_GAMMA_ENABLE; - if (fb->modifier == I915_FORMAT_MOD_X_TILED) sprctl |= SP_TILED; @@ -433,6 +406,36 @@ vlv_update_plane(struct drm_plane *dplane, if (key->flags & I915_SET_COLORKEY_SOURCE) sprctl |= SP_SOURCE_KEY; + return sprctl; +} + +static void +vlv_update_plane(struct drm_plane *dplane, + const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + struct drm_device *dev = dplane->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_plane *intel_plane = to_intel_plane(dplane); + struct drm_framebuffer *fb = plane_state->base.fb; + enum pipe pipe = intel_plane->pipe; + enum plane_id plane_id = intel_plane->id; + u32 sprctl; + u32 sprsurf_offset, linear_offset; + unsigned int rotation = plane_state->base.rotation; + const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; + int crtc_x = plane_state->base.dst.x1; + int crtc_y = plane_state->base.dst.y1; + uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); + uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); + uint32_t x = plane_state->base.src.x1 >> 16; + uint32_t y = plane_state->base.src.y1 >> 16; + uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; + uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; + unsigned long irqflags; + + sprctl = vlv_sprite_ctl(crtc_state, plane_state); + /* Sizes are 0 based */ src_w--; src_h--; -- cgit v1.2.3 From 45dea7b0c60738cc4920fa0d39b8d44398436659 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 17 Mar 2017 23:17:59 +0200 Subject: drm/i915: Extract ivb_sprite_ctl() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pull the code to calculate the IVB-BDW sprite control register value into a separate function. Allows us to pre-compute it in the future. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170317211808.14693-6-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_sprite.c | 80 ++++++++++++++++++++----------------- 1 file changed, 44 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index ed4db9276c59..a3a847a3b39f 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -503,31 +503,23 @@ vlv_disable_plane(struct drm_plane *dplane, struct drm_crtc *crtc) spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } -static void -ivb_update_plane(struct drm_plane *plane, - const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) +static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) { - struct drm_device *dev = plane->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_plane *intel_plane = to_intel_plane(plane); - struct drm_framebuffer *fb = plane_state->base.fb; - enum pipe pipe = intel_plane->pipe; - u32 sprctl, sprscale = 0; - u32 sprsurf_offset, linear_offset; + struct drm_i915_private *dev_priv = + to_i915(plane_state->base.plane->dev); + const struct drm_framebuffer *fb = plane_state->base.fb; unsigned int rotation = plane_state->base.rotation; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; - int crtc_x = plane_state->base.dst.x1; - int crtc_y = plane_state->base.dst.y1; - uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); - uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); - uint32_t x = plane_state->base.src.x1 >> 16; - uint32_t y = plane_state->base.src.y1 >> 16; - uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; - uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; - unsigned long irqflags; + u32 sprctl; - sprctl = SPRITE_ENABLE; + sprctl = SPRITE_ENABLE | SPRITE_GAMMA_ENABLE; + + if (IS_IVYBRIDGE(dev_priv)) + sprctl |= SPRITE_TRICKLE_FEED_DISABLE; + + if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) + sprctl |= SPRITE_PIPE_CSC_ENABLE; switch (fb->format->format) { case DRM_FORMAT_XBGR8888: @@ -549,34 +541,50 @@ ivb_update_plane(struct drm_plane *plane, sprctl |= SPRITE_FORMAT_YUV422 | SPRITE_YUV_ORDER_VYUY; break; default: - BUG(); + MISSING_CASE(fb->format->format); + return 0; } - /* - * Enable gamma to match primary/cursor plane behaviour. - * FIXME should be user controllable via propertiesa. - */ - sprctl |= SPRITE_GAMMA_ENABLE; - if (fb->modifier == I915_FORMAT_MOD_X_TILED) sprctl |= SPRITE_TILED; if (rotation & DRM_ROTATE_180) sprctl |= SPRITE_ROTATE_180; - if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - sprctl &= ~SPRITE_TRICKLE_FEED_DISABLE; - else - sprctl |= SPRITE_TRICKLE_FEED_DISABLE; - - if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - sprctl |= SPRITE_PIPE_CSC_ENABLE; - if (key->flags & I915_SET_COLORKEY_DESTINATION) sprctl |= SPRITE_DEST_KEY; else if (key->flags & I915_SET_COLORKEY_SOURCE) sprctl |= SPRITE_SOURCE_KEY; + return sprctl; +} + +static void +ivb_update_plane(struct drm_plane *plane, + const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + struct drm_device *dev = plane->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_plane *intel_plane = to_intel_plane(plane); + struct drm_framebuffer *fb = plane_state->base.fb; + enum pipe pipe = intel_plane->pipe; + u32 sprctl, sprscale = 0; + u32 sprsurf_offset, linear_offset; + unsigned int rotation = plane_state->base.rotation; + const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; + int crtc_x = plane_state->base.dst.x1; + int crtc_y = plane_state->base.dst.y1; + uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); + uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); + uint32_t x = plane_state->base.src.x1 >> 16; + uint32_t y = plane_state->base.src.y1 >> 16; + uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; + uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; + unsigned long irqflags; + + sprctl = ivb_sprite_ctl(crtc_state, plane_state); + /* Sizes are 0 based */ src_w--; src_h--; -- cgit v1.2.3 From 0a37514795625713f8a0b6e7d33e42a3abd1dbe6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 17 Mar 2017 23:18:00 +0200 Subject: drm/i915: Extract ilk_sprite_ctl() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pull the code to calculate the ILK-SNB sprite control register value into a separate function. Allows us to pre-compute it in the future. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170317211808.14693-7-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_sprite.c | 72 +++++++++++++++++++++---------------- 1 file changed, 41 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index a3a847a3b39f..12e33bc149e4 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -659,31 +659,20 @@ ivb_disable_plane(struct drm_plane *plane, struct drm_crtc *crtc) spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } -static void -ilk_update_plane(struct drm_plane *plane, - const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) +static u32 ilk_sprite_ctl(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) { - struct drm_device *dev = plane->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_plane *intel_plane = to_intel_plane(plane); - struct drm_framebuffer *fb = plane_state->base.fb; - int pipe = intel_plane->pipe; - u32 dvscntr, dvsscale; - u32 dvssurf_offset, linear_offset; + struct drm_i915_private *dev_priv = + to_i915(plane_state->base.plane->dev); + const struct drm_framebuffer *fb = plane_state->base.fb; unsigned int rotation = plane_state->base.rotation; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; - int crtc_x = plane_state->base.dst.x1; - int crtc_y = plane_state->base.dst.y1; - uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); - uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); - uint32_t x = plane_state->base.src.x1 >> 16; - uint32_t y = plane_state->base.src.y1 >> 16; - uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; - uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; - unsigned long irqflags; + u32 dvscntr; + + dvscntr = DVS_ENABLE | DVS_GAMMA_ENABLE; - dvscntr = DVS_ENABLE; + if (IS_GEN6(dev_priv)) + dvscntr |= DVS_TRICKLE_FEED_DISABLE; switch (fb->format->format) { case DRM_FORMAT_XBGR8888: @@ -705,29 +694,50 @@ ilk_update_plane(struct drm_plane *plane, dvscntr |= DVS_FORMAT_YUV422 | DVS_YUV_ORDER_VYUY; break; default: - BUG(); + MISSING_CASE(fb->format->format); + return 0; } - /* - * Enable gamma to match primary/cursor plane behaviour. - * FIXME should be user controllable via propertiesa. - */ - dvscntr |= DVS_GAMMA_ENABLE; - if (fb->modifier == I915_FORMAT_MOD_X_TILED) dvscntr |= DVS_TILED; if (rotation & DRM_ROTATE_180) dvscntr |= DVS_ROTATE_180; - if (IS_GEN6(dev_priv)) - dvscntr |= DVS_TRICKLE_FEED_DISABLE; /* must disable */ - if (key->flags & I915_SET_COLORKEY_DESTINATION) dvscntr |= DVS_DEST_KEY; else if (key->flags & I915_SET_COLORKEY_SOURCE) dvscntr |= DVS_SOURCE_KEY; + return dvscntr; +} + +static void +ilk_update_plane(struct drm_plane *plane, + const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + struct drm_device *dev = plane->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_plane *intel_plane = to_intel_plane(plane); + struct drm_framebuffer *fb = plane_state->base.fb; + int pipe = intel_plane->pipe; + u32 dvscntr, dvsscale; + u32 dvssurf_offset, linear_offset; + unsigned int rotation = plane_state->base.rotation; + const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; + int crtc_x = plane_state->base.dst.x1; + int crtc_y = plane_state->base.dst.y1; + uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); + uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); + uint32_t x = plane_state->base.src.x1 >> 16; + uint32_t y = plane_state->base.src.y1 >> 16; + uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; + uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; + unsigned long irqflags; + + dvscntr = ilk_sprite_ctl(crtc_state, plane_state); + /* Sizes are 0 based */ src_w--; src_h--; -- cgit v1.2.3 From 292889e1ffa8bb56fda2c6208d3f884ce97e1189 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 17 Mar 2017 23:18:01 +0200 Subject: drm/i915: Extract i845_cursor_ctl() and i9xx_cursor_ctl() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pull the code to calculate the cursor control register value into separate functions. Allows us to pre-compute them in the future. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170317211808.14693-8-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_display.c | 119 +++++++++++++++++++++-------------- 1 file changed, 72 insertions(+), 47 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 8a9f1bd21e0e..9a28a8917dc1 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9179,7 +9179,33 @@ out: return active; } +static u32 i845_cursor_ctl(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + unsigned int width = plane_state->base.crtc_w; + unsigned int stride = roundup_pow_of_two(width) * 4; + + switch (stride) { + default: + WARN_ONCE(1, "Invalid cursor width/stride, width=%u, stride=%u\n", + width, stride); + stride = 256; + /* fallthrough */ + case 256: + case 512: + case 1024: + case 2048: + break; + } + + return CURSOR_ENABLE | + CURSOR_GAMMA_ENABLE | + CURSOR_FORMAT_ARGB | + CURSOR_STRIDE(stride); +} + static void i845_update_cursor(struct drm_crtc *crtc, u32 base, + const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { struct drm_device *dev = crtc->dev; @@ -9190,26 +9216,8 @@ static void i845_update_cursor(struct drm_crtc *crtc, u32 base, if (plane_state && plane_state->base.visible) { unsigned int width = plane_state->base.crtc_w; unsigned int height = plane_state->base.crtc_h; - unsigned int stride = roundup_pow_of_two(width) * 4; - - switch (stride) { - default: - WARN_ONCE(1, "Invalid cursor width/stride, width=%u, stride=%u\n", - width, stride); - stride = 256; - /* fallthrough */ - case 256: - case 512: - case 1024: - case 2048: - break; - } - - cntl |= CURSOR_ENABLE | - CURSOR_GAMMA_ENABLE | - CURSOR_FORMAT_ARGB | - CURSOR_STRIDE(stride); + cntl = i845_cursor_ctl(crtc_state, plane_state); size = (height << 12) | width; } @@ -9242,7 +9250,45 @@ static void i845_update_cursor(struct drm_crtc *crtc, u32 base, } } +static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + struct drm_i915_private *dev_priv = + to_i915(plane_state->base.plane->dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + enum pipe pipe = crtc->pipe; + u32 cntl; + + cntl = MCURSOR_GAMMA_ENABLE; + + if (HAS_DDI(dev_priv)) + cntl |= CURSOR_PIPE_CSC_ENABLE; + + cntl |= pipe << 28; /* Connect to correct pipe */ + + switch (plane_state->base.crtc_w) { + case 64: + cntl |= CURSOR_MODE_64_ARGB_AX; + break; + case 128: + cntl |= CURSOR_MODE_128_ARGB_AX; + break; + case 256: + cntl |= CURSOR_MODE_256_ARGB_AX; + break; + default: + MISSING_CASE(plane_state->base.crtc_w); + return 0; + } + + if (plane_state->base.rotation & DRM_ROTATE_180) + cntl |= CURSOR_ROTATE_180; + + return cntl; +} + static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base, + const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { struct drm_device *dev = crtc->dev; @@ -9251,30 +9297,8 @@ static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base, int pipe = intel_crtc->pipe; uint32_t cntl = 0; - if (plane_state && plane_state->base.visible) { - cntl = MCURSOR_GAMMA_ENABLE; - switch (plane_state->base.crtc_w) { - case 64: - cntl |= CURSOR_MODE_64_ARGB_AX; - break; - case 128: - cntl |= CURSOR_MODE_128_ARGB_AX; - break; - case 256: - cntl |= CURSOR_MODE_256_ARGB_AX; - break; - default: - MISSING_CASE(plane_state->base.crtc_w); - return; - } - cntl |= pipe << 28; /* Connect to correct pipe */ - - if (HAS_DDI(dev_priv)) - cntl |= CURSOR_PIPE_CSC_ENABLE; - - if (plane_state->base.rotation & DRM_ROTATE_180) - cntl |= CURSOR_ROTATE_180; - } + if (plane_state && plane_state->base.visible) + cntl = i9xx_cursor_ctl(crtc_state, plane_state); if (intel_crtc->cursor_cntl != cntl) { I915_WRITE_FW(CURCNTR(pipe), cntl); @@ -9291,6 +9315,7 @@ static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base, /* If no-part of the cursor is visible on the framebuffer, then the GPU may hang... */ static void intel_crtc_update_cursor(struct drm_crtc *crtc, + const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { struct drm_device *dev = crtc->dev; @@ -9330,9 +9355,9 @@ static void intel_crtc_update_cursor(struct drm_crtc *crtc, I915_WRITE_FW(CURPOS(pipe), pos); if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) - i845_update_cursor(crtc, base, plane_state); + i845_update_cursor(crtc, base, crtc_state, plane_state); else - i9xx_update_cursor(crtc, base, plane_state); + i9xx_update_cursor(crtc, base, crtc_state, plane_state); spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } @@ -13762,7 +13787,7 @@ intel_disable_cursor_plane(struct drm_plane *plane, struct intel_crtc *intel_crtc = to_intel_crtc(crtc); intel_crtc->cursor_addr = 0; - intel_crtc_update_cursor(crtc, NULL); + intel_crtc_update_cursor(crtc, NULL, NULL); } static void @@ -13784,7 +13809,7 @@ intel_update_cursor_plane(struct drm_plane *plane, addr = obj->phys_handle->busaddr; intel_crtc->cursor_addr = addr; - intel_crtc_update_cursor(crtc, state); + intel_crtc_update_cursor(crtc, crtc_state, state); } static struct intel_plane * -- cgit v1.2.3 From 2e70b8c6bf0003bf1691ac5cab5aff128747d494 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Mar 2017 13:48:03 +0000 Subject: drm/i915/execlists: Relax the locked clear_bit(IRQ_EXECLIST) We only need to care about the ordering of the clearing of the bit with the uncached CSB read in order to correctly detect a new interrupt before the read completes. The uncached read itself acts as a full memory barrier, so we do not need to enforce another in the form of a locked clear_bit. v2: Clarify why the split and unlocked test/clear is harmless. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170323134803.10418-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_lrc.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index eec1e714f531..dd0e9d587852 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -540,7 +540,17 @@ static void intel_lrc_irq_handler(unsigned long data) dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)); unsigned int csb, head, tail; - clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + /* The write will be ordered by the uncached read (itself + * a memory barrier), so we do not need another in the form + * of a locked instruction. The race between the interrupt + * handler and the split test/clear is harmless as we order + * our clear before the CSB read. If the interrupt arrived + * first between the test and the clear, we read the updated + * CSB and clear the bit. If the interrupt arrives as we read + * the CSB or later (i.e. after we had cleared the bit) the bit + * is set and we do a new loop. + */ + __clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); csb = readl(csb_mmio); head = GEN8_CSB_READ_PTR(csb); tail = GEN8_CSB_WRITE_PTR(csb); -- cgit v1.2.3 From bd00e73ede5933322e01fc9b32e91d85826ac1c2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Mar 2017 23:00:00 +0000 Subject: drm/i915/guc: Refactor the retrieval of guc_process_desc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the common "client->vaddr + client->proc_desc_offset" to its own function, __get_process_desc() to match the newly established pattern. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170323230000.20786-1-chris@chris-wilson.co.uk Reviewed-by: Michał Winiarski --- drivers/gpu/drm/i915/i915_guc_submission.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 20ab20b03ee8..991e76e10f82 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -280,6 +280,12 @@ static unsigned long __select_cacheline(struct intel_guc* guc) return offset; } +static inline struct guc_process_desc * +__get_process_desc(struct i915_guc_client *client) +{ + return client->vaddr + client->proc_desc_offset; +} + /* * Initialise the process descriptor shared with the GuC firmware. */ @@ -288,9 +294,7 @@ static void guc_proc_desc_init(struct intel_guc *guc, { struct guc_process_desc *desc; - desc = client->vaddr + client->proc_desc_offset; - - memset(desc, 0, sizeof(*desc)); + desc = memset(__get_process_desc(client), 0, sizeof(*desc)); /* * XXX: pDoorbell and WQVBaseAddress are pointers in process address @@ -422,8 +426,7 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request) { const size_t wqi_size = sizeof(struct guc_wq_item); struct i915_guc_client *client = request->i915->guc.execbuf_client; - struct guc_process_desc *desc = client->vaddr + - client->proc_desc_offset; + struct guc_process_desc *desc = __get_process_desc(client); u32 freespace; int ret; @@ -468,12 +471,10 @@ static void guc_wq_item_append(struct i915_guc_client *client, const size_t wqi_size = sizeof(struct guc_wq_item); const u32 wqi_len = wqi_size/sizeof(u32) - 1; struct intel_engine_cs *engine = rq->engine; - struct guc_process_desc *desc; + struct guc_process_desc *desc = __get_process_desc(client); struct guc_wq_item *wqi; u32 freespace, tail, wq_off; - desc = client->vaddr + client->proc_desc_offset; - /* Free space is guaranteed, see i915_guc_wq_reserve() above */ freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size); GEM_BUG_ON(freespace < wqi_size); @@ -519,8 +520,7 @@ static void guc_wq_item_append(struct i915_guc_client *client, static void guc_reset_wq(struct i915_guc_client *client) { - struct guc_process_desc *desc = client->vaddr + - client->proc_desc_offset; + struct guc_process_desc *desc = __get_process_desc(client); desc->head = 0; desc->tail = 0; @@ -530,13 +530,11 @@ static void guc_reset_wq(struct i915_guc_client *client) static int guc_ring_doorbell(struct i915_guc_client *client) { - struct guc_process_desc *desc; + struct guc_process_desc *desc = __get_process_desc(client); union guc_doorbell_qw db_cmp, db_exc, db_ret; union guc_doorbell_qw *db; int attempt = 2, ret = -EAGAIN; - desc = client->vaddr + client->proc_desc_offset; - /* Update the tail so it is visible to GuC */ desc->tail = client->wq_tail; -- cgit v1.2.3 From 5d64c120c046c287382373df52b9e6fc47e6367c Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 24 Mar 2017 07:48:39 -0700 Subject: drm/i915/guc: limit forcewake to blitter domain in guc_send The forcewake_get call in the guc_send_mmio function was added to avoid getting and releasing forcewake on each register access. While this makes sense, all GuC registers are in the blitter range so no need to wake all the wells. Signed-off-by: Daniele Ceraolo Spurio Link: http://patchwork.freedesktop.org/patch/msgid/1490366919-34715-1-git-send-email-daniele.ceraolospurio@intel.com Reviewed-by: Sagar Arun Kamble Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_uc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 4a872cdf57e8..e01622757b84 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -245,7 +245,7 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len) return -EINVAL; mutex_lock(&guc->send_mutex); - intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_BLITTER); dev_priv->guc.action_count += 1; dev_priv->guc.action_cmd = action[0]; @@ -283,7 +283,7 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len) } dev_priv->guc.action_status = status; - intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_BLITTER); mutex_unlock(&guc->send_mutex); return ret; -- cgit v1.2.3 From e02d9d76bf4de91d6eddf73345c5e8a9ae5a5f9f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 24 Mar 2017 15:17:23 +0000 Subject: drm/i915: Disable MI_SET_CONTEXT psmi w/a for bdw MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current w/a for the gen7 psmi related hangs doesn't apply to bdw, so disable it if using bdw ringbuffer submission. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170324151724.32640-1-chris@chris-wilson.co.uk Reviewed-by: Michał Winiarski --- drivers/gpu/drm/i915/i915_gem_context.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 8fc8b3d15a0f..8bd0c4966913 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -582,8 +582,8 @@ mi_set_context(struct drm_i915_gem_request *req, u32 flags) struct intel_engine_cs *engine = req->engine; enum intel_engine_id id; const int num_rings = - /* Use an extended w/a on ivb+ if signalling from other rings */ - i915.semaphores ? + /* Use an extended w/a on gen7 if signalling from other rings */ + (i915.semaphores && INTEL_GEN(dev_priv) == 7) ? INTEL_INFO(dev_priv)->num_rings - 1 : 0; int len; -- cgit v1.2.3 From 6f9b850bec05ba8b08c504112808973259e41140 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 24 Mar 2017 15:17:24 +0000 Subject: drm/i915: Fix semaphore emission for BDW+ RCS ringbuffer emission MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The required number of dwords for semaphore emission on BDW RCS is 8, not 6 - leading to ring buffer corruption and immediate GPU hangs when using ringbuffer submission. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170324151724.32640-2-chris@chris-wilson.co.uk Reviewed-by: Michał Winiarski --- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 62756eb2bd4a..4729ac7ac122 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2117,7 +2117,7 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine) num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask) - 1; - engine->emit_breadcrumb_sz += num_rings * 6; + engine->emit_breadcrumb_sz += num_rings * 8; } } else if (INTEL_GEN(dev_priv) >= 6) { engine->init_context = intel_rcs_ctx_init; -- cgit v1.2.3 From 9d98af0bbec8a7a03fc5c72381ec3dd53b8de87d Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Mon, 27 Mar 2017 09:45:10 +0000 Subject: drm/i915/uc: Make intel_uc_prepare_fw() static There is no need to expose this function as it is called from one function only. Also move it up to avoid forward declaration. v2: drop intel_ prefix (Oscar) and rename to fetch_uc_fw (Michal) Signed-off-by: Michal Wajdeczko Cc: Arkadiusz Hiler Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170327094510.167400-1-michal.wajdeczko@intel.com Reviewed-by: Joonas Lahtinen Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_uc.c | 270 ++++++++++++++++++++-------------------- drivers/gpu/drm/i915/intel_uc.h | 2 - 2 files changed, 135 insertions(+), 137 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index e01622757b84..c767dc351c63 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -89,13 +89,146 @@ void intel_uc_init_early(struct drm_i915_private *dev_priv) guc->send = intel_guc_send_mmio; } +static void fetch_uc_fw(struct drm_i915_private *dev_priv, + struct intel_uc_fw *uc_fw) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + struct drm_i915_gem_object *obj; + const struct firmware *fw = NULL; + struct uc_css_header *css; + size_t size; + int err; + + uc_fw->fetch_status = INTEL_UC_FIRMWARE_PENDING; + + DRM_DEBUG_DRIVER("before requesting firmware: uC fw fetch status %s\n", + intel_uc_fw_status_repr(uc_fw->fetch_status)); + + err = request_firmware(&fw, uc_fw->path, &pdev->dev); + if (err) + goto fail; + if (!fw) + goto fail; + + DRM_DEBUG_DRIVER("fetch uC fw from %s succeeded, fw %p\n", + uc_fw->path, fw); + + /* Check the size of the blob before examining buffer contents */ + if (fw->size < sizeof(struct uc_css_header)) { + DRM_NOTE("Firmware header is missing\n"); + goto fail; + } + + css = (struct uc_css_header *)fw->data; + + /* Firmware bits always start from header */ + uc_fw->header_offset = 0; + uc_fw->header_size = (css->header_size_dw - css->modulus_size_dw - + css->key_size_dw - css->exponent_size_dw) * sizeof(u32); + + if (uc_fw->header_size != sizeof(struct uc_css_header)) { + DRM_NOTE("CSS header definition mismatch\n"); + goto fail; + } + + /* then, uCode */ + uc_fw->ucode_offset = uc_fw->header_offset + uc_fw->header_size; + uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32); + + /* now RSA */ + if (css->key_size_dw != UOS_RSA_SCRATCH_MAX_COUNT) { + DRM_NOTE("RSA key size is bad\n"); + goto fail; + } + uc_fw->rsa_offset = uc_fw->ucode_offset + uc_fw->ucode_size; + uc_fw->rsa_size = css->key_size_dw * sizeof(u32); + + /* At least, it should have header, uCode and RSA. Size of all three. */ + size = uc_fw->header_size + uc_fw->ucode_size + uc_fw->rsa_size; + if (fw->size < size) { + DRM_NOTE("Missing firmware components\n"); + goto fail; + } + + /* + * The GuC firmware image has the version number embedded at a + * well-known offset within the firmware blob; note that major / minor + * version are TWO bytes each (i.e. u16), although all pointers and + * offsets are defined in terms of bytes (u8). + */ + switch (uc_fw->type) { + case INTEL_UC_FW_TYPE_GUC: + /* Header and uCode will be loaded to WOPCM. Size of the two. */ + size = uc_fw->header_size + uc_fw->ucode_size; + + /* Top 32k of WOPCM is reserved (8K stack + 24k RC6 context). */ + if (size > intel_guc_wopcm_size(dev_priv)) { + DRM_ERROR("Firmware is too large to fit in WOPCM\n"); + goto fail; + } + uc_fw->major_ver_found = css->guc.sw_version >> 16; + uc_fw->minor_ver_found = css->guc.sw_version & 0xFFFF; + break; + + case INTEL_UC_FW_TYPE_HUC: + uc_fw->major_ver_found = css->huc.sw_version >> 16; + uc_fw->minor_ver_found = css->huc.sw_version & 0xFFFF; + break; + + default: + DRM_ERROR("Unknown firmware type %d\n", uc_fw->type); + err = -ENOEXEC; + goto fail; + } + + if (uc_fw->major_ver_wanted == 0 && uc_fw->minor_ver_wanted == 0) { + DRM_NOTE("Skipping uC firmware version check\n"); + } else if (uc_fw->major_ver_found != uc_fw->major_ver_wanted || + uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) { + DRM_NOTE("uC firmware version %d.%d, required %d.%d\n", + uc_fw->major_ver_found, uc_fw->minor_ver_found, + uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); + err = -ENOEXEC; + goto fail; + } + + DRM_DEBUG_DRIVER("firmware version %d.%d OK (minimum %d.%d)\n", + uc_fw->major_ver_found, uc_fw->minor_ver_found, + uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); + + obj = i915_gem_object_create_from_data(dev_priv, fw->data, fw->size); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto fail; + } + + uc_fw->obj = obj; + uc_fw->size = fw->size; + + DRM_DEBUG_DRIVER("uC fw fetch status SUCCESS, obj %p\n", + uc_fw->obj); + + release_firmware(fw); + uc_fw->fetch_status = INTEL_UC_FIRMWARE_SUCCESS; + return; + +fail: + DRM_WARN("Failed to fetch valid uC firmware from %s (error %d)\n", + uc_fw->path, err); + DRM_DEBUG_DRIVER("uC fw fetch status FAIL; err %d, fw %p, obj %p\n", + err, fw, uc_fw->obj); + + release_firmware(fw); /* OK even if fw is NULL */ + uc_fw->fetch_status = INTEL_UC_FIRMWARE_FAIL; +} + void intel_uc_init_fw(struct drm_i915_private *dev_priv) { if (dev_priv->huc.fw.path) - intel_uc_prepare_fw(dev_priv, &dev_priv->huc.fw); + fetch_uc_fw(dev_priv, &dev_priv->huc.fw); if (dev_priv->guc.fw.path) - intel_uc_prepare_fw(dev_priv, &dev_priv->guc.fw); + fetch_uc_fw(dev_priv, &dev_priv->guc.fw); } void intel_uc_fini_fw(struct drm_i915_private *dev_priv) @@ -304,136 +437,3 @@ int intel_guc_sample_forcewake(struct intel_guc *guc) return intel_guc_send(guc, action, ARRAY_SIZE(action)); } - -void intel_uc_prepare_fw(struct drm_i915_private *dev_priv, - struct intel_uc_fw *uc_fw) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - struct drm_i915_gem_object *obj; - const struct firmware *fw = NULL; - struct uc_css_header *css; - size_t size; - int err; - - uc_fw->fetch_status = INTEL_UC_FIRMWARE_PENDING; - - DRM_DEBUG_DRIVER("before requesting firmware: uC fw fetch status %s\n", - intel_uc_fw_status_repr(uc_fw->fetch_status)); - - err = request_firmware(&fw, uc_fw->path, &pdev->dev); - if (err) - goto fail; - if (!fw) - goto fail; - - DRM_DEBUG_DRIVER("fetch uC fw from %s succeeded, fw %p\n", - uc_fw->path, fw); - - /* Check the size of the blob before examining buffer contents */ - if (fw->size < sizeof(struct uc_css_header)) { - DRM_NOTE("Firmware header is missing\n"); - goto fail; - } - - css = (struct uc_css_header *)fw->data; - - /* Firmware bits always start from header */ - uc_fw->header_offset = 0; - uc_fw->header_size = (css->header_size_dw - css->modulus_size_dw - - css->key_size_dw - css->exponent_size_dw) * sizeof(u32); - - if (uc_fw->header_size != sizeof(struct uc_css_header)) { - DRM_NOTE("CSS header definition mismatch\n"); - goto fail; - } - - /* then, uCode */ - uc_fw->ucode_offset = uc_fw->header_offset + uc_fw->header_size; - uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32); - - /* now RSA */ - if (css->key_size_dw != UOS_RSA_SCRATCH_MAX_COUNT) { - DRM_NOTE("RSA key size is bad\n"); - goto fail; - } - uc_fw->rsa_offset = uc_fw->ucode_offset + uc_fw->ucode_size; - uc_fw->rsa_size = css->key_size_dw * sizeof(u32); - - /* At least, it should have header, uCode and RSA. Size of all three. */ - size = uc_fw->header_size + uc_fw->ucode_size + uc_fw->rsa_size; - if (fw->size < size) { - DRM_NOTE("Missing firmware components\n"); - goto fail; - } - - /* - * The GuC firmware image has the version number embedded at a - * well-known offset within the firmware blob; note that major / minor - * version are TWO bytes each (i.e. u16), although all pointers and - * offsets are defined in terms of bytes (u8). - */ - switch (uc_fw->type) { - case INTEL_UC_FW_TYPE_GUC: - /* Header and uCode will be loaded to WOPCM. Size of the two. */ - size = uc_fw->header_size + uc_fw->ucode_size; - - /* Top 32k of WOPCM is reserved (8K stack + 24k RC6 context). */ - if (size > intel_guc_wopcm_size(dev_priv)) { - DRM_ERROR("Firmware is too large to fit in WOPCM\n"); - goto fail; - } - uc_fw->major_ver_found = css->guc.sw_version >> 16; - uc_fw->minor_ver_found = css->guc.sw_version & 0xFFFF; - break; - - case INTEL_UC_FW_TYPE_HUC: - uc_fw->major_ver_found = css->huc.sw_version >> 16; - uc_fw->minor_ver_found = css->huc.sw_version & 0xFFFF; - break; - - default: - DRM_ERROR("Unknown firmware type %d\n", uc_fw->type); - err = -ENOEXEC; - goto fail; - } - - if (uc_fw->major_ver_wanted == 0 && uc_fw->minor_ver_wanted == 0) { - DRM_NOTE("Skipping uC firmware version check\n"); - } else if (uc_fw->major_ver_found != uc_fw->major_ver_wanted || - uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) { - DRM_NOTE("uC firmware version %d.%d, required %d.%d\n", - uc_fw->major_ver_found, uc_fw->minor_ver_found, - uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); - err = -ENOEXEC; - goto fail; - } - - DRM_DEBUG_DRIVER("firmware version %d.%d OK (minimum %d.%d)\n", - uc_fw->major_ver_found, uc_fw->minor_ver_found, - uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); - - obj = i915_gem_object_create_from_data(dev_priv, fw->data, fw->size); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto fail; - } - - uc_fw->obj = obj; - uc_fw->size = fw->size; - - DRM_DEBUG_DRIVER("uC fw fetch status SUCCESS, obj %p\n", - uc_fw->obj); - - release_firmware(fw); - uc_fw->fetch_status = INTEL_UC_FIRMWARE_SUCCESS; - return; - -fail: - DRM_WARN("Failed to fetch valid uC firmware from %s (error %d)\n", - uc_fw->path, err); - DRM_DEBUG_DRIVER("uC fw fetch status FAIL; err %d, fw %p, obj %p\n", - err, fw, uc_fw->obj); - - release_firmware(fw); /* OK even if fw is NULL */ - uc_fw->fetch_status = INTEL_UC_FIRMWARE_FAIL; -} diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index 6cf2d14fa0dc..087192d9c1af 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -197,8 +197,6 @@ void intel_uc_init_fw(struct drm_i915_private *dev_priv); void intel_uc_fini_fw(struct drm_i915_private *dev_priv); int intel_uc_init_hw(struct drm_i915_private *dev_priv); void intel_uc_fini_hw(struct drm_i915_private *dev_priv); -void intel_uc_prepare_fw(struct drm_i915_private *dev_priv, - struct intel_uc_fw *uc_fw); int intel_guc_sample_forcewake(struct intel_guc *guc); int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len); static inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) -- cgit v1.2.3 From 4af0d727b1363200d8380032a5180544d64daa68 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 25 Mar 2017 20:10:53 +0000 Subject: drm/i915/execlists: Trim irq handler I noticed that gcc was spilling the CSB to the stack, so rearrange the code to be more compact. Spilling in this function is slightly more interesting due to the mmio reads acting as memory barriers and so end up flushing the stack spills. Still miniscule to having to do at least the pair of uncached reads :( function old new delta intel_lrc_irq_handler 1039 878 -161 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170325201053.21306-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_lrc.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index dd0e9d587852..b75df70e8e0e 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -538,7 +538,7 @@ static void intel_lrc_irq_handler(unsigned long data) dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)); u32 __iomem *buf = dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)); - unsigned int csb, head, tail; + unsigned int head, tail; /* The write will be ordered by the uncached read (itself * a memory barrier), so we do not need another in the form @@ -551,17 +551,14 @@ static void intel_lrc_irq_handler(unsigned long data) * is set and we do a new loop. */ __clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); - csb = readl(csb_mmio); - head = GEN8_CSB_READ_PTR(csb); - tail = GEN8_CSB_WRITE_PTR(csb); - if (head == tail) - break; + head = readl(csb_mmio); + tail = GEN8_CSB_WRITE_PTR(head); + head = GEN8_CSB_READ_PTR(head); + while (head != tail) { + unsigned int status; - if (tail < head) - tail += GEN8_CSB_ENTRIES; - do { - unsigned int idx = ++head % GEN8_CSB_ENTRIES; - unsigned int status = readl(buf + 2 * idx); + if (++head == GEN8_CSB_ENTRIES) + head = 0; /* We are flying near dragons again. * @@ -580,11 +577,12 @@ static void intel_lrc_irq_handler(unsigned long data) * status notifier. */ + status = readl(buf + 2 * head); if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) continue; /* Check the context/desc id for this event matches */ - GEM_DEBUG_BUG_ON(readl(buf + 2 * idx + 1) != + GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) != port[0].context_id); GEM_BUG_ON(port[0].count == 0); @@ -602,10 +600,9 @@ static void intel_lrc_irq_handler(unsigned long data) GEM_BUG_ON(port[0].count == 0 && !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); - } while (head < tail); + } - writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, - GEN8_CSB_WRITE_PTR(csb) << 8), + writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8), csb_mmio); } -- cgit v1.2.3 From e2a2aa36a5090e38bdbdd3f85a216a19c334a6de Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Mar 2017 15:00:53 +0000 Subject: drm/i915: Check we have an wake device before flushing GTT writes We can assume that if the device is asleep then all pending GTT writes will have been posted, and so we can defer the flush from i915_gem_object_flush_gtt_write_domain() [ 1957.462568] WARNING: CPU: 0 PID: 6132 at drivers/gpu/drm/i915/intel_drv.h:1742 fwtable_read32+0x123/0x150 [i915] [ 1957.462582] RPM wakelock ref not held during HW access [ 1957.462583] Modules linked in: i915 intel_gtt drm_kms_helper prime_numbers [ 1957.462607] CPU: 0 PID: 6132 Comm: gem_concurrent_ Tainted: G U 4.11.0-rc1+ #464 [ 1957.462619] Hardware name: / , BIOS PYBSWCEL.86A.0027.2015.0507.1758 05/07/2015 [ 1957.462630] Call Trace: [ 1957.462646] dump_stack+0x4d/0x6f [ 1957.462657] __warn+0xc1/0xe0 [ 1957.462667] warn_slowpath_fmt+0x4a/0x50 [ 1957.462709] fwtable_read32+0x123/0x150 [i915] [ 1957.462750] i915_gem_object_flush_gtt_write_domain+0x43/0x70 [i915] [ 1957.462791] i915_gem_object_set_to_cpu_domain+0x46/0xa0 [i915] [ 1957.462831] i915_gem_set_domain_ioctl+0x15d/0x220 [i915] [ 1957.462843] drm_ioctl+0x1d7/0x440 [ 1957.462885] ? i915_gem_obj_prepare_shmem_write+0x1d0/0x1d0 [i915] [ 1957.462896] ? pick_next_task_fair+0x436/0x440 [ 1957.462906] ? mntput+0x1f/0x30 [ 1957.462915] do_vfs_ioctl+0x8f/0x5c0 [ 1957.462925] ? __schedule+0x16f/0x5f0 [ 1957.462935] ? ____fput+0x9/0x10 [ 1957.462943] SyS_ioctl+0x3c/0x70 [ 1957.462952] entry_SYSCALL_64_fastpath+0x17/0x98 [ 1957.462961] RIP: 0033:0x7fc542179ca7 [ 1957.462968] RSP: 002b:00007ffeef12ff98 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 1957.462982] RAX: ffffffffffffffda RBX: 00007ffeef1301d0 RCX: 00007fc542179ca7 [ 1957.462990] RDX: 00007ffeef12ffd0 RSI: 00000000400c645f RDI: 0000000000000003 [ 1957.462999] RBP: 0000000000000003 R08: 000055f433bc7c40 R09: 000000000000002c [ 1957.463006] R10: 0000000000000073 R11: 0000000000000246 R12: 0000000000000018 [ 1957.463015] R13: 000055f432c89d20 R14: 000055f432c87690 R15: 0000000000000000 Fixes: 3b5724d702ef ("drm/i915: Wait for writes through the GTT to land before reading back") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170323150053.28582-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9515daea6358..84ea249c6f4f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3317,9 +3317,12 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) */ wmb(); if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) { - spin_lock_irq(&dev_priv->uncore.lock); - POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base)); - spin_unlock_irq(&dev_priv->uncore.lock); + if (intel_runtime_pm_get_if_in_use(dev_priv)) { + spin_lock_irq(&dev_priv->uncore.lock); + POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base)); + spin_unlock_irq(&dev_priv->uncore.lock); + intel_runtime_pm_put(dev_priv); + } } intel_fb_obj_flush(obj, write_origin(obj, I915_GEM_DOMAIN_GTT)); -- cgit v1.2.3 From 71cc2b184c0c1b7b4983091e65f102c6817aada9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 24 Mar 2017 16:54:18 +0000 Subject: drm/i915: Limit number of reads to stabilize rc6 counter reads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have only 8bits of precise timestamps in which to complete our upper/load reads, along with the switch between precision. This is not always enough time to read the upper counter twice within the same time slice, leading to hard lockups. Limit the number of times to prevent an inifite loop (my fault for assuming we would have no trouble doing the write + reads fast enough). Fixes: 47c21d9a1a7b ("drm/i915: Extend vlv/chv residency resolution") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100377 Signed-off-by: Chris Wilson Cc: Ville Syrjälä Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170324165418.7455-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_pm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 13ada8560e71..b7ae42a67494 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -8388,6 +8388,7 @@ static u64 vlv_residency_raw(struct drm_i915_private *dev_priv, const i915_reg_t reg) { u32 lower, upper, tmp; + int loop = 2; /* The register accessed do not need forcewake. We borrow * uncore lock to prevent concurrent access to range reg. @@ -8416,7 +8417,7 @@ static u64 vlv_residency_raw(struct drm_i915_private *dev_priv, I915_WRITE_FW(VLV_COUNTER_CONTROL, _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH)); upper = I915_READ_FW(reg); - } while (upper != tmp); + } while (upper != tmp && --loop); /* Everywhere else we always use VLV_COUNTER_CONTROL with the * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set -- cgit v1.2.3 From f4ce766f28cd0efa0cb4d869a84905d573ef7e70 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 25 Mar 2017 11:32:43 +0000 Subject: drm/i915: Align "unfenced" tiled access on gen2, early gen3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Old devices have quite severe restrictions for using fences, and unlike more recent device (anything from Pineview onwards) we need to enforce those restrictions even for unfenced tiled access from the render pipeline. Fixes: 944397f04f24 ("drm/i915: Store required fence size/alignment for GGTT vma") Reported-by: Ville Syrjälä Signed-off-by: Chris Wilson Cc: Ville Syrjälä Cc: Joonas Lahtinen Cc: # v4.11-rc1+ Link: http://patchwork.freedesktop.org/patch/msgid/20170325113243.16438-1-chris@chris-wilson.co.uk Reviewed-by: Daniel Vetter Reviewed-by: Joonas Lahtinen Tested-by: Ville Syrjälä --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 +++- drivers/gpu/drm/i915/i915_pci.c | 5 +++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2911c49113b0..86f097db8ef6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -853,6 +853,7 @@ struct intel_csr { func(has_resource_streamer); \ func(has_runtime_pm); \ func(has_snoop); \ + func(unfenced_needs_alignment); \ func(cursor_needs_physical); \ func(hws_needs_physical); \ func(overlay_needs_physical); \ diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index dd7181ed5eca..a3e59c8ef27b 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -890,6 +890,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine, struct list_head ordered_vmas; struct list_head pinned_vmas; bool has_fenced_gpu_access = INTEL_GEN(engine->i915) < 4; + bool needs_unfenced_map = INTEL_INFO(engine->i915)->unfenced_needs_alignment; int retry; vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm; @@ -910,7 +911,8 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine, if (!has_fenced_gpu_access) entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE; need_fence = - entry->flags & EXEC_OBJECT_NEEDS_FENCE && + (entry->flags & EXEC_OBJECT_NEEDS_FENCE || + needs_unfenced_map) && i915_gem_object_is_tiled(obj); need_mappable = need_fence || need_reloc_mappable(vma); diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 732101ed57fb..f87b0c4e564d 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -61,6 +61,7 @@ .has_overlay = 1, .overlay_needs_physical = 1, \ .has_gmch_display = 1, \ .hws_needs_physical = 1, \ + .unfenced_needs_alignment = 1, \ .ring_mask = RENDER_RING, \ GEN_DEFAULT_PIPEOFFSETS, \ CURSOR_OFFSETS @@ -102,6 +103,7 @@ static const struct intel_device_info intel_i915g_info = { .platform = INTEL_I915G, .cursor_needs_physical = 1, .has_overlay = 1, .overlay_needs_physical = 1, .hws_needs_physical = 1, + .unfenced_needs_alignment = 1, }; static const struct intel_device_info intel_i915gm_info = { @@ -113,6 +115,7 @@ static const struct intel_device_info intel_i915gm_info = { .supports_tv = 1, .has_fbc = 1, .hws_needs_physical = 1, + .unfenced_needs_alignment = 1, }; static const struct intel_device_info intel_i945g_info = { @@ -121,6 +124,7 @@ static const struct intel_device_info intel_i945g_info = { .has_hotplug = 1, .cursor_needs_physical = 1, .has_overlay = 1, .overlay_needs_physical = 1, .hws_needs_physical = 1, + .unfenced_needs_alignment = 1, }; static const struct intel_device_info intel_i945gm_info = { @@ -131,6 +135,7 @@ static const struct intel_device_info intel_i945gm_info = { .supports_tv = 1, .has_fbc = 1, .hws_needs_physical = 1, + .unfenced_needs_alignment = 1, }; static const struct intel_device_info intel_g33_info = { -- cgit v1.2.3 From 9a29dd85a09d825cb28dcaacd36ed0ee0be49699 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 24 Mar 2017 16:35:38 +0000 Subject: drm/i915: Fixup intel_write_status_page() for old CPUs without clflush MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not all of our target platforms have clflush. For those without, just assume the status page is sufficiently coherent that we do not need our paranoia. Reported-by: Ville Syrjälä Fixes: 14a6bbf9e535 ("drm/i915: Replace irq_seqno_barrier on hws write with a clflush") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170324163540.31981-1-chris@chris-wilson.co.uk Tested-by: Ville Syrjälä Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_ringbuffer.h | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 2ecb41788fb6..35da008dc70c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -454,14 +454,22 @@ intel_read_status_page(struct intel_engine_cs *engine, int reg) } static inline void -intel_write_status_page(struct intel_engine_cs *engine, - int reg, u32 value) +intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) { - mb(); - clflush(&engine->status_page.page_addr[reg]); - engine->status_page.page_addr[reg] = value; - clflush(&engine->status_page.page_addr[reg]); - mb(); + /* Writing into the status page should be done sparingly. Since + * we do when we are uncertain of the device state, we take a bit + * of extra paranoia to try and ensure that the HWS takes the value + * we give and that it doesn't end up trapped inside the CPU! + */ + if (static_cpu_has(X86_FEATURE_CLFLUSH)) { + mb(); + clflush(&engine->status_page.page_addr[reg]); + engine->status_page.page_addr[reg] = value; + clflush(&engine->status_page.page_addr[reg]); + mb(); + } else { + WRITE_ONCE(engine->status_page.page_addr[reg], value); + } } /* -- cgit v1.2.3 From ddb2397e30896626831dbac31a75eeb1991c4819 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 24 Mar 2017 16:35:39 +0000 Subject: drm/i915: Remove unused intel_flush_status_page() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit intel_flush_status_page() is defunct since commit f8dd2934c4ec ("drm/i915: Remove BXT incoherent seqno write workaround"), time to remove it. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Joonas Lahtinen Cc: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170324163540.31981-2-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_ringbuffer.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 35da008dc70c..607755aa50b2 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -438,14 +438,6 @@ intel_engine_flag(const struct intel_engine_cs *engine) return 1 << engine->id; } -static inline void -intel_flush_status_page(struct intel_engine_cs *engine, int reg) -{ - mb(); - clflush(&engine->status_page.page_addr[reg]); - mb(); -} - static inline u32 intel_read_status_page(struct intel_engine_cs *engine, int reg) { -- cgit v1.2.3 From 59ce13104d532f04e85cbbbbaa400200c1d12c28 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 24 Mar 2017 16:35:40 +0000 Subject: drm/i915: Use BIT() for computing the engine's flag Since the engine's flag is just the bit of its id, use BIT(). Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170324163540.31981-3-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_ringbuffer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 607755aa50b2..166aa1ae65cf 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -432,10 +432,10 @@ struct intel_engine_cs { u32 (*get_cmd_length_mask)(u32 cmd_header); }; -static inline unsigned +static inline unsigned int intel_engine_flag(const struct intel_engine_cs *engine) { - return 1 << engine->id; + return BIT(engine->id); } static inline u32 -- cgit v1.2.3 From 7145f60a347d5676f2ac6982ed9d57dbef17a7e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 23 Mar 2017 21:27:07 +0200 Subject: drm/i915: Extract i9xx_plane_ctl() and ironlake_plane_ctl() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pull the code to calculate the pre-SKL primary plane control register value into separate functions. Allows us to pre-compute it in the future. v2: Split the pre-ilk vs. ilk+ unification to a separate patch (Chris) Cc: Chris Wilson Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170323192712.30682-2-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_display.c | 104 ++++++++++++++++++++++------------- 1 file changed, 66 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 9a28a8917dc1..aa2c85b2bf78 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2962,28 +2962,23 @@ int skl_check_plane_surface(struct intel_plane_state *plane_state) return 0; } -static void i9xx_update_primary_plane(struct drm_plane *primary, - const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) +static u32 i9xx_plane_ctl(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) { - struct drm_i915_private *dev_priv = to_i915(primary->dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); - struct drm_framebuffer *fb = plane_state->base.fb; - int plane = intel_crtc->plane; - u32 linear_offset; - u32 dspcntr; - i915_reg_t reg = DSPCNTR(plane); + struct drm_i915_private *dev_priv = + to_i915(plane_state->base.plane->dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + const struct drm_framebuffer *fb = plane_state->base.fb; unsigned int rotation = plane_state->base.rotation; - int x = plane_state->base.src.x1 >> 16; - int y = plane_state->base.src.y1 >> 16; - unsigned long irqflags; + u32 dspcntr; - dspcntr = DISPPLANE_GAMMA_ENABLE; + dspcntr = DISPLAY_PLANE_ENABLE | DISPPLANE_GAMMA_ENABLE; - dspcntr |= DISPLAY_PLANE_ENABLE; + if (IS_G4X(dev_priv)) + dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE; if (INTEL_GEN(dev_priv) < 4) { - if (intel_crtc->pipe == PIPE_B) + if (crtc->pipe == PIPE_B) dspcntr |= DISPPLANE_SEL_PIPE_B; } @@ -3010,7 +3005,8 @@ static void i9xx_update_primary_plane(struct drm_plane *primary, dspcntr |= DISPPLANE_RGBX101010; break; default: - BUG(); + MISSING_CASE(fb->format->format); + return 0; } if (INTEL_GEN(dev_priv) >= 4 && @@ -3023,8 +3019,26 @@ static void i9xx_update_primary_plane(struct drm_plane *primary, if (rotation & DRM_REFLECT_X) dspcntr |= DISPPLANE_MIRROR; - if (IS_G4X(dev_priv)) - dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE; + return dspcntr; +} + +static void i9xx_update_primary_plane(struct drm_plane *primary, + const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + struct drm_i915_private *dev_priv = to_i915(primary->dev); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_framebuffer *fb = plane_state->base.fb; + int plane = intel_crtc->plane; + u32 linear_offset; + u32 dspcntr; + i915_reg_t reg = DSPCNTR(plane); + unsigned int rotation = plane_state->base.rotation; + int x = plane_state->base.src.x1 >> 16; + int y = plane_state->base.src.y1 >> 16; + unsigned long irqflags; + + dspcntr = i9xx_plane_ctl(crtc_state, plane_state); intel_add_fb_offsets(&x, &y, plane_state, 0); @@ -3105,25 +3119,19 @@ static void i9xx_disable_primary_plane(struct drm_plane *primary, spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } -static void ironlake_update_primary_plane(struct drm_plane *primary, - const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) +static u32 ironlake_plane_ctl(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) { - struct drm_device *dev = primary->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); - struct drm_framebuffer *fb = plane_state->base.fb; - int plane = intel_crtc->plane; - u32 linear_offset; - u32 dspcntr; - i915_reg_t reg = DSPCNTR(plane); + struct drm_i915_private *dev_priv = + to_i915(plane_state->base.plane->dev); + const struct drm_framebuffer *fb = plane_state->base.fb; unsigned int rotation = plane_state->base.rotation; - int x = plane_state->base.src.x1 >> 16; - int y = plane_state->base.src.y1 >> 16; - unsigned long irqflags; + u32 dspcntr; - dspcntr = DISPPLANE_GAMMA_ENABLE; - dspcntr |= DISPLAY_PLANE_ENABLE; + dspcntr = DISPLAY_PLANE_ENABLE | DISPPLANE_GAMMA_ENABLE; + + if (!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)) + dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE; if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) dspcntr |= DISPPLANE_PIPE_CSC_ENABLE; @@ -3148,7 +3156,8 @@ static void ironlake_update_primary_plane(struct drm_plane *primary, dspcntr |= DISPPLANE_RGBX101010; break; default: - BUG(); + MISSING_CASE(fb->format->format); + return 0; } if (fb->modifier == I915_FORMAT_MOD_X_TILED) @@ -3157,8 +3166,27 @@ static void ironlake_update_primary_plane(struct drm_plane *primary, if (rotation & DRM_ROTATE_180) dspcntr |= DISPPLANE_ROTATE_180; - if (!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)) - dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE; + return dspcntr; +} + +static void ironlake_update_primary_plane(struct drm_plane *primary, + const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + struct drm_device *dev = primary->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_framebuffer *fb = plane_state->base.fb; + int plane = intel_crtc->plane; + u32 linear_offset; + u32 dspcntr; + i915_reg_t reg = DSPCNTR(plane); + unsigned int rotation = plane_state->base.rotation; + int x = plane_state->base.src.x1 >> 16; + int y = plane_state->base.src.y1 >> 16; + unsigned long irqflags; + + dspcntr = ironlake_plane_ctl(crtc_state, plane_state); intel_add_fb_offsets(&x, &y, plane_state, 0); -- cgit v1.2.3 From 6a4407a653e93f56e0d7b8849fc9c1d334de39fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 23 Mar 2017 21:27:08 +0200 Subject: drm/i915: Nuke ironlake_plane_ctl() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Share the code to compute the primary plane control register value between the i9xx and ilk codepaths as the differences are minimal. Actually there are no differences between g4x and ilk, so the current split doesn't really make any sense. Cc: Chris Wilson Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170323192712.30682-3-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_display.c | 58 ++++-------------------------------- 1 file changed, 6 insertions(+), 52 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index aa2c85b2bf78..4f57ce982a72 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2974,9 +2974,13 @@ static u32 i9xx_plane_ctl(const struct intel_crtc_state *crtc_state, dspcntr = DISPLAY_PLANE_ENABLE | DISPPLANE_GAMMA_ENABLE; - if (IS_G4X(dev_priv)) + if (IS_G4X(dev_priv) || IS_GEN5(dev_priv) || + IS_GEN6(dev_priv) || IS_IVYBRIDGE(dev_priv)) dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE; + if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) + dspcntr |= DISPPLANE_PIPE_CSC_ENABLE; + if (INTEL_GEN(dev_priv) < 4) { if (crtc->pipe == PIPE_B) dspcntr |= DISPPLANE_SEL_PIPE_B; @@ -3119,56 +3123,6 @@ static void i9xx_disable_primary_plane(struct drm_plane *primary, spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } -static u32 ironlake_plane_ctl(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) -{ - struct drm_i915_private *dev_priv = - to_i915(plane_state->base.plane->dev); - const struct drm_framebuffer *fb = plane_state->base.fb; - unsigned int rotation = plane_state->base.rotation; - u32 dspcntr; - - dspcntr = DISPLAY_PLANE_ENABLE | DISPPLANE_GAMMA_ENABLE; - - if (!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)) - dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE; - - if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - dspcntr |= DISPPLANE_PIPE_CSC_ENABLE; - - switch (fb->format->format) { - case DRM_FORMAT_C8: - dspcntr |= DISPPLANE_8BPP; - break; - case DRM_FORMAT_RGB565: - dspcntr |= DISPPLANE_BGRX565; - break; - case DRM_FORMAT_XRGB8888: - dspcntr |= DISPPLANE_BGRX888; - break; - case DRM_FORMAT_XBGR8888: - dspcntr |= DISPPLANE_RGBX888; - break; - case DRM_FORMAT_XRGB2101010: - dspcntr |= DISPPLANE_BGRX101010; - break; - case DRM_FORMAT_XBGR2101010: - dspcntr |= DISPPLANE_RGBX101010; - break; - default: - MISSING_CASE(fb->format->format); - return 0; - } - - if (fb->modifier == I915_FORMAT_MOD_X_TILED) - dspcntr |= DISPPLANE_TILED; - - if (rotation & DRM_ROTATE_180) - dspcntr |= DISPPLANE_ROTATE_180; - - return dspcntr; -} - static void ironlake_update_primary_plane(struct drm_plane *primary, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) @@ -3186,7 +3140,7 @@ static void ironlake_update_primary_plane(struct drm_plane *primary, int y = plane_state->base.src.y1 >> 16; unsigned long irqflags; - dspcntr = ironlake_plane_ctl(crtc_state, plane_state); + dspcntr = i9xx_plane_ctl(crtc_state, plane_state); intel_add_fb_offsets(&x, &y, plane_state, 0); -- cgit v1.2.3 From a0864d5905a94359621946a027e180a3063f80a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 23 Mar 2017 21:27:09 +0200 Subject: drm/i915: Pre-compute plane control register value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Computing the plane control register value is branchy so moving it out from the plane commit hook seems prudent. Let's pre-compute it during the atomic check phase and store the result in the plane state. Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170323192712.30682-4-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 41 ++++++++++++++++++------------------ drivers/gpu/drm/i915/intel_drv.h | 3 +++ drivers/gpu/drm/i915/intel_sprite.c | 24 ++++++++++----------- 3 files changed, 36 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 4f57ce982a72..def3dfea0ffe 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3035,15 +3035,13 @@ static void i9xx_update_primary_plane(struct drm_plane *primary, struct drm_framebuffer *fb = plane_state->base.fb; int plane = intel_crtc->plane; u32 linear_offset; - u32 dspcntr; + u32 dspcntr = plane_state->ctl; i915_reg_t reg = DSPCNTR(plane); unsigned int rotation = plane_state->base.rotation; int x = plane_state->base.src.x1 >> 16; int y = plane_state->base.src.y1 >> 16; unsigned long irqflags; - dspcntr = i9xx_plane_ctl(crtc_state, plane_state); - intel_add_fb_offsets(&x, &y, plane_state, 0); if (INTEL_GEN(dev_priv) >= 4) @@ -3133,15 +3131,13 @@ static void ironlake_update_primary_plane(struct drm_plane *primary, struct drm_framebuffer *fb = plane_state->base.fb; int plane = intel_crtc->plane; u32 linear_offset; - u32 dspcntr; + u32 dspcntr = plane_state->ctl; i915_reg_t reg = DSPCNTR(plane); unsigned int rotation = plane_state->base.rotation; int x = plane_state->base.src.x1 >> 16; int y = plane_state->base.src.y1 >> 16; unsigned long irqflags; - dspcntr = i9xx_plane_ctl(crtc_state, plane_state); - intel_add_fb_offsets(&x, &y, plane_state, 0); intel_crtc->dspaddr_offset = @@ -3358,7 +3354,7 @@ static void skylake_update_primary_plane(struct drm_plane *plane, struct drm_framebuffer *fb = plane_state->base.fb; enum plane_id plane_id = to_intel_plane(plane)->id; enum pipe pipe = to_intel_plane(plane)->pipe; - u32 plane_ctl; + u32 plane_ctl = plane_state->ctl; unsigned int rotation = plane_state->base.rotation; u32 stride = skl_plane_stride(fb, 0, rotation); u32 surf_addr = plane_state->main.offset; @@ -3373,8 +3369,6 @@ static void skylake_update_primary_plane(struct drm_plane *plane, int dst_h = drm_rect_height(&plane_state->base.dst); unsigned long irqflags; - plane_ctl = skl_plane_ctl(crtc_state, plane_state); - /* Sizes are 0 based */ src_w--; src_h--; @@ -9187,7 +9181,6 @@ static u32 i845_cursor_ctl(const struct intel_crtc_state *crtc_state, } static void i845_update_cursor(struct drm_crtc *crtc, u32 base, - const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { struct drm_device *dev = crtc->dev; @@ -9199,7 +9192,7 @@ static void i845_update_cursor(struct drm_crtc *crtc, u32 base, unsigned int width = plane_state->base.crtc_w; unsigned int height = plane_state->base.crtc_h; - cntl = i845_cursor_ctl(crtc_state, plane_state); + cntl = plane_state->ctl; size = (height << 12) | width; } @@ -9270,7 +9263,6 @@ static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state, } static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base, - const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { struct drm_device *dev = crtc->dev; @@ -9280,7 +9272,7 @@ static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base, uint32_t cntl = 0; if (plane_state && plane_state->base.visible) - cntl = i9xx_cursor_ctl(crtc_state, plane_state); + cntl = plane_state->ctl; if (intel_crtc->cursor_cntl != cntl) { I915_WRITE_FW(CURCNTR(pipe), cntl); @@ -9297,7 +9289,6 @@ static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base, /* If no-part of the cursor is visible on the framebuffer, then the GPU may hang... */ static void intel_crtc_update_cursor(struct drm_crtc *crtc, - const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { struct drm_device *dev = crtc->dev; @@ -9337,9 +9328,9 @@ static void intel_crtc_update_cursor(struct drm_crtc *crtc, I915_WRITE_FW(CURPOS(pipe), pos); if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) - i845_update_cursor(crtc, base, crtc_state, plane_state); + i845_update_cursor(crtc, base, plane_state); else - i9xx_update_cursor(crtc, base, crtc_state, plane_state); + i9xx_update_cursor(crtc, base, plane_state); spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } @@ -13371,6 +13362,10 @@ intel_check_primary_plane(struct drm_plane *plane, ret = skl_check_plane_surface(state); if (ret) return ret; + + state->ctl = skl_plane_ctl(crtc_state, state); + } else { + state->ctl = i9xx_plane_ctl(crtc_state, state); } return 0; @@ -13706,6 +13701,7 @@ intel_check_cursor_plane(struct drm_plane *plane, struct intel_crtc_state *crtc_state, struct intel_plane_state *state) { + struct drm_i915_private *dev_priv = to_i915(plane->dev); struct drm_framebuffer *fb = state->base.fb; struct drm_i915_gem_object *obj = intel_fb_obj(fb); enum pipe pipe = to_intel_plane(plane)->pipe; @@ -13725,7 +13721,7 @@ intel_check_cursor_plane(struct drm_plane *plane, return 0; /* Check for which cursor types we support */ - if (!cursor_size_ok(to_i915(plane->dev), state->base.crtc_w, + if (!cursor_size_ok(dev_priv, state->base.crtc_w, state->base.crtc_h)) { DRM_DEBUG("Cursor dimension %dx%d not supported\n", state->base.crtc_w, state->base.crtc_h); @@ -13753,12 +13749,17 @@ intel_check_cursor_plane(struct drm_plane *plane, * display power well must be turned off and on again. * Refuse the put the cursor into that compromised position. */ - if (IS_CHERRYVIEW(to_i915(plane->dev)) && pipe == PIPE_C && + if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_C && state->base.visible && state->base.crtc_x < 0) { DRM_DEBUG_KMS("CHV cursor C not allowed to straddle the left screen edge\n"); return -EINVAL; } + if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) + state->ctl = i845_cursor_ctl(crtc_state, state); + else + state->ctl = i9xx_cursor_ctl(crtc_state, state); + return 0; } @@ -13769,7 +13770,7 @@ intel_disable_cursor_plane(struct drm_plane *plane, struct intel_crtc *intel_crtc = to_intel_crtc(crtc); intel_crtc->cursor_addr = 0; - intel_crtc_update_cursor(crtc, NULL, NULL); + intel_crtc_update_cursor(crtc, NULL); } static void @@ -13791,7 +13792,7 @@ intel_update_cursor_plane(struct drm_plane *plane, addr = obj->phys_handle->busaddr; intel_crtc->cursor_addr = addr; - intel_crtc_update_cursor(crtc, crtc_state, state); + intel_crtc_update_cursor(crtc, state); } static struct intel_plane * diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 45e55b45e772..a35442eadd36 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -398,6 +398,9 @@ struct intel_plane_state { int x, y; } aux; + /* plane control register */ + u32 ctl; + /* * scaler_id * = -1 : not using a scaler diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 12e33bc149e4..6b76012ded1a 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -217,7 +217,7 @@ skl_update_plane(struct drm_plane *drm_plane, struct drm_framebuffer *fb = plane_state->base.fb; enum plane_id plane_id = intel_plane->id; enum pipe pipe = intel_plane->pipe; - u32 plane_ctl; + u32 plane_ctl = plane_state->ctl; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; u32 surf_addr = plane_state->main.offset; unsigned int rotation = plane_state->base.rotation; @@ -232,8 +232,6 @@ skl_update_plane(struct drm_plane *drm_plane, uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; unsigned long irqflags; - plane_ctl = skl_plane_ctl(crtc_state, plane_state); - /* Sizes are 0 based */ src_w--; src_h--; @@ -420,7 +418,7 @@ vlv_update_plane(struct drm_plane *dplane, struct drm_framebuffer *fb = plane_state->base.fb; enum pipe pipe = intel_plane->pipe; enum plane_id plane_id = intel_plane->id; - u32 sprctl; + u32 sprctl = plane_state->ctl; u32 sprsurf_offset, linear_offset; unsigned int rotation = plane_state->base.rotation; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; @@ -434,8 +432,6 @@ vlv_update_plane(struct drm_plane *dplane, uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; unsigned long irqflags; - sprctl = vlv_sprite_ctl(crtc_state, plane_state); - /* Sizes are 0 based */ src_w--; src_h--; @@ -569,7 +565,7 @@ ivb_update_plane(struct drm_plane *plane, struct intel_plane *intel_plane = to_intel_plane(plane); struct drm_framebuffer *fb = plane_state->base.fb; enum pipe pipe = intel_plane->pipe; - u32 sprctl, sprscale = 0; + u32 sprctl = plane_state->ctl, sprscale = 0; u32 sprsurf_offset, linear_offset; unsigned int rotation = plane_state->base.rotation; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; @@ -583,8 +579,6 @@ ivb_update_plane(struct drm_plane *plane, uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; unsigned long irqflags; - sprctl = ivb_sprite_ctl(crtc_state, plane_state); - /* Sizes are 0 based */ src_w--; src_h--; @@ -722,7 +716,7 @@ ilk_update_plane(struct drm_plane *plane, struct intel_plane *intel_plane = to_intel_plane(plane); struct drm_framebuffer *fb = plane_state->base.fb; int pipe = intel_plane->pipe; - u32 dvscntr, dvsscale; + u32 dvscntr = plane_state->ctl, dvsscale; u32 dvssurf_offset, linear_offset; unsigned int rotation = plane_state->base.rotation; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; @@ -736,8 +730,6 @@ ilk_update_plane(struct drm_plane *plane, uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; unsigned long irqflags; - dvscntr = ilk_sprite_ctl(crtc_state, plane_state); - /* Sizes are 0 based */ src_w--; src_h--; @@ -986,6 +978,14 @@ intel_check_sprite_plane(struct drm_plane *plane, ret = skl_check_plane_surface(state); if (ret) return ret; + + state->ctl = skl_plane_ctl(crtc_state, state); + } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { + state->ctl = vlv_sprite_ctl(crtc_state, state); + } else if (INTEL_GEN(dev_priv) >= 7) { + state->ctl = ivb_sprite_ctl(crtc_state, state); + } else { + state->ctl = ilk_sprite_ctl(crtc_state, state); } return 0; -- cgit v1.2.3 From 5b7fcc44aa6ea56285d0db20753fed699d047706 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 23 Mar 2017 21:27:10 +0200 Subject: drm/i915: Introduce i9xx_check_plane_surface() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract the primary plane surfae offset/x/y calculations for pre-SKL platforms into a common function, and call it during the atomic check phase to reduce the amount of stuff we have to do during the commit phase. SKL is already doing this. v2: Update the comment about the rotation adjustments to match the code better (Chris) Cc: Chris Wilson Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170323192712.30682-5-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 82 ++++++++++++++++++++++-------------- 1 file changed, 50 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index def3dfea0ffe..95b5f8cc14fa 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3026,6 +3026,43 @@ static u32 i9xx_plane_ctl(const struct intel_crtc_state *crtc_state, return dspcntr; } +static int i9xx_check_plane_surface(struct intel_plane_state *plane_state) +{ + struct drm_i915_private *dev_priv = + to_i915(plane_state->base.plane->dev); + int src_x = plane_state->base.src.x1 >> 16; + int src_y = plane_state->base.src.y1 >> 16; + u32 offset; + + intel_add_fb_offsets(&src_x, &src_y, plane_state, 0); + + if (INTEL_GEN(dev_priv) >= 4) + offset = intel_compute_tile_offset(&src_x, &src_y, + plane_state, 0); + else + offset = 0; + + /* HSW/BDW do this automagically in hardware */ + if (!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)) { + unsigned int rotation = plane_state->base.rotation; + int src_w = drm_rect_width(&plane_state->base.src) >> 16; + int src_h = drm_rect_height(&plane_state->base.src) >> 16; + + if (rotation & DRM_ROTATE_180) { + src_x += src_w - 1; + src_y += src_h - 1; + } else if (rotation & DRM_REFLECT_X) { + src_x += src_w - 1; + } + } + + plane_state->main.offset = offset; + plane_state->main.x = src_x; + plane_state->main.y = src_y; + + return 0; +} + static void i9xx_update_primary_plane(struct drm_plane *primary, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) @@ -3037,27 +3074,15 @@ static void i9xx_update_primary_plane(struct drm_plane *primary, u32 linear_offset; u32 dspcntr = plane_state->ctl; i915_reg_t reg = DSPCNTR(plane); - unsigned int rotation = plane_state->base.rotation; - int x = plane_state->base.src.x1 >> 16; - int y = plane_state->base.src.y1 >> 16; + int x = plane_state->main.x; + int y = plane_state->main.y; unsigned long irqflags; - intel_add_fb_offsets(&x, &y, plane_state, 0); - - if (INTEL_GEN(dev_priv) >= 4) - intel_crtc->dspaddr_offset = - intel_compute_tile_offset(&x, &y, plane_state, 0); - - if (rotation & DRM_ROTATE_180) { - x += crtc_state->pipe_src_w - 1; - y += crtc_state->pipe_src_h - 1; - } else if (rotation & DRM_REFLECT_X) { - x += crtc_state->pipe_src_w - 1; - } - linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); - if (INTEL_GEN(dev_priv) < 4) + if (INTEL_GEN(dev_priv) >= 4) + intel_crtc->dspaddr_offset = plane_state->main.offset; + else intel_crtc->dspaddr_offset = linear_offset; intel_crtc->adjusted_x = x; @@ -3133,25 +3158,14 @@ static void ironlake_update_primary_plane(struct drm_plane *primary, u32 linear_offset; u32 dspcntr = plane_state->ctl; i915_reg_t reg = DSPCNTR(plane); - unsigned int rotation = plane_state->base.rotation; - int x = plane_state->base.src.x1 >> 16; - int y = plane_state->base.src.y1 >> 16; + int x = plane_state->main.x; + int y = plane_state->main.y; unsigned long irqflags; - intel_add_fb_offsets(&x, &y, plane_state, 0); - - intel_crtc->dspaddr_offset = - intel_compute_tile_offset(&x, &y, plane_state, 0); - - /* HSW+ does this automagically in hardware */ - if (!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv) && - rotation & DRM_ROTATE_180) { - x += crtc_state->pipe_src_w - 1; - y += crtc_state->pipe_src_h - 1; - } - linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); + intel_crtc->dspaddr_offset = plane_state->main.offset; + intel_crtc->adjusted_x = x; intel_crtc->adjusted_y = y; @@ -13365,6 +13379,10 @@ intel_check_primary_plane(struct drm_plane *plane, state->ctl = skl_plane_ctl(crtc_state, state); } else { + ret = i9xx_check_plane_surface(state); + if (ret) + return ret; + state->ctl = i9xx_plane_ctl(crtc_state, state); } -- cgit v1.2.3 From 3ba35e53cfbad79ba288a514b01b41a46fc8dc7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 23 Mar 2017 21:27:11 +0200 Subject: drm/i915: Eliminate ironlake_update_primary_plane() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The effective difference between i9xx_update_primary_plane() and ironlake_update_primary_plane() is only the HSW/BDW DSPOFFSET special case. So bring that over into i9xx_update_primary_plane() and eliminate the duplicated code. Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170323192712.30682-6-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 55 ++++-------------------------------- 1 file changed, 6 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 95b5f8cc14fa..73aeb2405a16 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3109,7 +3109,12 @@ static void i9xx_update_primary_plane(struct drm_plane *primary, I915_WRITE_FW(reg, dspcntr); I915_WRITE_FW(DSPSTRIDE(plane), fb->pitches[0]); - if (INTEL_GEN(dev_priv) >= 4) { + if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { + I915_WRITE_FW(DSPSURF(plane), + intel_plane_ggtt_offset(plane_state) + + intel_crtc->dspaddr_offset); + I915_WRITE_FW(DSPOFFSET(plane), (y << 16) | x); + } else if (INTEL_GEN(dev_priv) >= 4) { I915_WRITE_FW(DSPSURF(plane), intel_plane_ggtt_offset(plane_state) + intel_crtc->dspaddr_offset); @@ -3146,48 +3151,6 @@ static void i9xx_disable_primary_plane(struct drm_plane *primary, spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } -static void ironlake_update_primary_plane(struct drm_plane *primary, - const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) -{ - struct drm_device *dev = primary->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); - struct drm_framebuffer *fb = plane_state->base.fb; - int plane = intel_crtc->plane; - u32 linear_offset; - u32 dspcntr = plane_state->ctl; - i915_reg_t reg = DSPCNTR(plane); - int x = plane_state->main.x; - int y = plane_state->main.y; - unsigned long irqflags; - - linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); - - intel_crtc->dspaddr_offset = plane_state->main.offset; - - intel_crtc->adjusted_x = x; - intel_crtc->adjusted_y = y; - - spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); - - I915_WRITE_FW(reg, dspcntr); - - I915_WRITE_FW(DSPSTRIDE(plane), fb->pitches[0]); - I915_WRITE_FW(DSPSURF(plane), - intel_plane_ggtt_offset(plane_state) + - intel_crtc->dspaddr_offset); - if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { - I915_WRITE_FW(DSPOFFSET(plane), (y << 16) | x); - } else { - I915_WRITE_FW(DSPTILEOFF(plane), (y << 16) | x); - I915_WRITE_FW(DSPLINOFF(plane), linear_offset); - } - POSTING_READ_FW(reg); - - spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); -} - static u32 intel_fb_stride_alignment(const struct drm_framebuffer *fb, int plane) { @@ -13642,12 +13605,6 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) primary->update_plane = skylake_update_primary_plane; primary->disable_plane = skylake_disable_primary_plane; - } else if (HAS_PCH_SPLIT(dev_priv)) { - intel_primary_formats = i965_primary_formats; - num_formats = ARRAY_SIZE(i965_primary_formats); - - primary->update_plane = ironlake_update_primary_plane; - primary->disable_plane = i9xx_disable_primary_plane; } else if (INTEL_GEN(dev_priv) >= 4) { intel_primary_formats = i965_primary_formats; num_formats = ARRAY_SIZE(i965_primary_formats); -- cgit v1.2.3 From f9407ae1533111cd43161734582932b13397ab7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 23 Mar 2017 21:27:12 +0200 Subject: drm/i915: Use i9xx_check_plane_surface() for sprite planes as well MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All the pre-SKL sprite planes compute the x/y/tile offsets in a similar way. There are a couple of minor differences but the primary planes have those as well. Thus i9xx_check_plane_surface() already does what we need, so let's use it. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170323192712.30682-7-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_display.c | 2 +- drivers/gpu/drm/i915/intel_drv.h | 1 + drivers/gpu/drm/i915/intel_sprite.c | 71 +++++++++++++----------------------- 3 files changed, 27 insertions(+), 47 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 73aeb2405a16..6dc75552c753 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3026,7 +3026,7 @@ static u32 i9xx_plane_ctl(const struct intel_crtc_state *crtc_state, return dspcntr; } -static int i9xx_check_plane_surface(struct intel_plane_state *plane_state) +int i9xx_check_plane_surface(struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = to_i915(plane_state->base.plane->dev); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index a35442eadd36..15e3eb2824e3 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1453,6 +1453,7 @@ u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, u32 skl_plane_stride(const struct drm_framebuffer *fb, int plane, unsigned int rotation); int skl_check_plane_surface(struct intel_plane_state *plane_state); +int i9xx_check_plane_surface(struct intel_plane_state *plane_state); /* intel_csr.c */ void intel_csr_ucode_init(struct drm_i915_private *); diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 6b76012ded1a..f7d431427115 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -419,35 +419,21 @@ vlv_update_plane(struct drm_plane *dplane, enum pipe pipe = intel_plane->pipe; enum plane_id plane_id = intel_plane->id; u32 sprctl = plane_state->ctl; - u32 sprsurf_offset, linear_offset; - unsigned int rotation = plane_state->base.rotation; + u32 sprsurf_offset = plane_state->main.offset; + u32 linear_offset; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; int crtc_x = plane_state->base.dst.x1; int crtc_y = plane_state->base.dst.y1; uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); - uint32_t x = plane_state->base.src.x1 >> 16; - uint32_t y = plane_state->base.src.y1 >> 16; - uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; - uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; + uint32_t x = plane_state->main.x; + uint32_t y = plane_state->main.y; unsigned long irqflags; /* Sizes are 0 based */ - src_w--; - src_h--; crtc_w--; crtc_h--; - intel_add_fb_offsets(&x, &y, plane_state, 0); - sprsurf_offset = intel_compute_tile_offset(&x, &y, plane_state, 0); - - if (rotation & DRM_ROTATE_180) { - x += src_w; - y += src_h; - } else if (rotation & DRM_REFLECT_X) { - x += src_w; - } - linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); @@ -566,15 +552,15 @@ ivb_update_plane(struct drm_plane *plane, struct drm_framebuffer *fb = plane_state->base.fb; enum pipe pipe = intel_plane->pipe; u32 sprctl = plane_state->ctl, sprscale = 0; - u32 sprsurf_offset, linear_offset; - unsigned int rotation = plane_state->base.rotation; + u32 sprsurf_offset = plane_state->main.offset; + u32 linear_offset; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; int crtc_x = plane_state->base.dst.x1; int crtc_y = plane_state->base.dst.y1; uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); - uint32_t x = plane_state->base.src.x1 >> 16; - uint32_t y = plane_state->base.src.y1 >> 16; + uint32_t x = plane_state->main.x; + uint32_t y = plane_state->main.y; uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; unsigned long irqflags; @@ -588,16 +574,6 @@ ivb_update_plane(struct drm_plane *plane, if (crtc_w != src_w || crtc_h != src_h) sprscale = SPRITE_SCALE_ENABLE | (src_w << 16) | src_h; - intel_add_fb_offsets(&x, &y, plane_state, 0); - sprsurf_offset = intel_compute_tile_offset(&x, &y, plane_state, 0); - - /* HSW+ does this automagically in hardware */ - if (!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv) && - rotation & DRM_ROTATE_180) { - x += src_w; - y += src_h; - } - linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); @@ -716,16 +692,16 @@ ilk_update_plane(struct drm_plane *plane, struct intel_plane *intel_plane = to_intel_plane(plane); struct drm_framebuffer *fb = plane_state->base.fb; int pipe = intel_plane->pipe; - u32 dvscntr = plane_state->ctl, dvsscale; - u32 dvssurf_offset, linear_offset; - unsigned int rotation = plane_state->base.rotation; + u32 dvscntr = plane_state->ctl, dvsscale = 0; + u32 dvssurf_offset = plane_state->main.offset; + u32 linear_offset; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; int crtc_x = plane_state->base.dst.x1; int crtc_y = plane_state->base.dst.y1; uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); - uint32_t x = plane_state->base.src.x1 >> 16; - uint32_t y = plane_state->base.src.y1 >> 16; + uint32_t x = plane_state->main.x; + uint32_t y = plane_state->main.y; uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; unsigned long irqflags; @@ -736,18 +712,9 @@ ilk_update_plane(struct drm_plane *plane, crtc_w--; crtc_h--; - dvsscale = 0; if (crtc_w != src_w || crtc_h != src_h) dvsscale = DVS_SCALE_ENABLE | (src_w << 16) | src_h; - intel_add_fb_offsets(&x, &y, plane_state, 0); - dvssurf_offset = intel_compute_tile_offset(&x, &y, plane_state, 0); - - if (rotation & DRM_ROTATE_180) { - x += src_w; - y += src_h; - } - linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); @@ -981,10 +948,22 @@ intel_check_sprite_plane(struct drm_plane *plane, state->ctl = skl_plane_ctl(crtc_state, state); } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { + ret = i9xx_check_plane_surface(state); + if (ret) + return ret; + state->ctl = vlv_sprite_ctl(crtc_state, state); } else if (INTEL_GEN(dev_priv) >= 7) { + ret = i9xx_check_plane_surface(state); + if (ret) + return ret; + state->ctl = ivb_sprite_ctl(crtc_state, state); } else { + ret = i9xx_check_plane_surface(state); + if (ret) + return ret; + state->ctl = ilk_sprite_ctl(crtc_state, state); } -- cgit v1.2.3 From 450362d3fe866b14304f309b5fffba0c33fbfbc3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 27 Mar 2017 14:00:07 +0100 Subject: drm/i915/execlists: Wrap tail pointer after reset tweaking If the request->wa_tail is 0 (because it landed exactly on the end of the ringbuffer), when we reconstruct request->tail following a reset we fill in an illegal value (-8 or 0x001ffff8). As a result, RING_HEAD is never able to catch up with RING_TAIL and the GPU spins endlessly. If the ring contains a couple of breadcrumbs, even our hangcheck is unable to catch the busy-looping as the ACTHD and seqno continually advance. v2: Move the wrap into a common intel_ring_wrap(). Fixes: a3aabe86a340 ("drm/i915/execlists: Reinitialise context image after GPU hang") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: # v4.10+ Link: http://patchwork.freedesktop.org/patch/msgid/20170327130009.4678-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_lrc.c | 4 +++- drivers/gpu/drm/i915/intel_ringbuffer.h | 10 ++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index b75df70e8e0e..32fb8ad3fd36 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1278,7 +1278,9 @@ static void reset_common_ring(struct intel_engine_cs *engine, GEM_BUG_ON(request->ctx != port[0].request->ctx); /* Reset WaIdleLiteRestore:bdw,skl as well */ - request->tail = request->wa_tail - WA_TAIL_DWORDS * sizeof(u32); + request->tail = + intel_ring_wrap(request->ring, + request->wa_tail - WA_TAIL_DWORDS*sizeof(u32)); GEM_BUG_ON(!IS_ALIGNED(request->tail, 8)); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 166aa1ae65cf..17ac44980d84 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -515,12 +515,18 @@ intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs) } static inline u32 -intel_ring_offset(struct drm_i915_gem_request *req, void *addr) +intel_ring_wrap(const struct intel_ring *ring, u32 pos) +{ + return pos & (ring->size - 1); +} + +static inline u32 +intel_ring_offset(const struct drm_i915_gem_request *req, void *addr) { /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ u32 offset = addr - req->ring->vaddr; GEM_BUG_ON(offset > req->ring->size); - return offset & (req->ring->size - 1); + return intel_ring_wrap(req->ring, offset); } void intel_ring_update_space(struct intel_ring *ring); -- cgit v1.2.3 From a91fdf1293044535a13fb9a434101f363dbe7e3c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 27 Mar 2017 14:00:08 +0100 Subject: drm/i915: Assert that the request->tail fits within the ring In addition to being qword-aligned, the RING_TAIL offset must be within the ring! Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170327130009.4678-2-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_lrc.c | 4 ++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 3 +++ 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 32fb8ad3fd36..39cb84a5d4e4 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -327,6 +327,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq) u32 *reg_state = ce->lrc_reg_state; GEM_BUG_ON(!IS_ALIGNED(rq->tail, 8)); + GEM_BUG_ON(rq->tail >= rq->ring->size); reg_state[CTX_RING_TAIL+1] = rq->tail; /* True 32b PPGTT with dynamic page allocation: update PDP @@ -1282,6 +1283,7 @@ static void reset_common_ring(struct intel_engine_cs *engine, intel_ring_wrap(request->ring, request->wa_tail - WA_TAIL_DWORDS*sizeof(u32)); GEM_BUG_ON(!IS_ALIGNED(request->tail, 8)); + GEM_BUG_ON(request->tail >= request->ring->size); } static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) @@ -1493,6 +1495,7 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs) *cs++ = MI_NOOP; request->tail = intel_ring_offset(request, cs); GEM_BUG_ON(!IS_ALIGNED(request->tail, 8)); + GEM_BUG_ON(request->tail >= request->ring->size); gen8_emit_wa_tail(request, cs); } @@ -1521,6 +1524,7 @@ static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request, *cs++ = MI_NOOP; request->tail = intel_ring_offset(request, cs); GEM_BUG_ON(!IS_ALIGNED(request->tail, 8)); + GEM_BUG_ON(request->tail >= request->ring->size); gen8_emit_wa_tail(request, cs); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 4729ac7ac122..47921dcbedb3 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -775,6 +775,7 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request) i915_gem_request_submit(request); GEM_BUG_ON(!IS_ALIGNED(request->tail, 8)); + GEM_BUG_ON(request->tail >= request->ring->size); I915_WRITE_TAIL(request->engine, request->tail); } @@ -787,6 +788,7 @@ static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs) req->tail = intel_ring_offset(req, cs); GEM_BUG_ON(!IS_ALIGNED(req->tail, 8)); + GEM_BUG_ON(req->tail >= req->ring->size); } static const int i9xx_emit_breadcrumb_sz = 4; @@ -826,6 +828,7 @@ static void gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req, req->tail = intel_ring_offset(req, cs); GEM_BUG_ON(!IS_ALIGNED(req->tail, 8)); + GEM_BUG_ON(req->tail >= req->ring->size); } static const int gen8_render_emit_breadcrumb_sz = 8; -- cgit v1.2.3 From ed1501d45136da125a89b1a5728a783564e6491f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 27 Mar 2017 14:14:12 +0100 Subject: drm/i915: Refactor tests for validity of RING_TAIL Whilst I like having the assertions clearly visible in the code, they are quite repetitious! As we find new limits we want to incorporate into the set of assertions, it make sense to refactor them to a common routine. v2: Add a guc holdout. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170327131412.20293-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_guc_submission.c | 2 +- drivers/gpu/drm/i915/intel_lrc.c | 12 ++++-------- drivers/gpu/drm/i915/intel_ringbuffer.c | 9 +++------ drivers/gpu/drm/i915/intel_ringbuffer.h | 11 +++++++++++ 4 files changed, 19 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 991e76e10f82..6193ad7edcf4 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -481,7 +481,7 @@ static void guc_wq_item_append(struct i915_guc_client *client, /* The GuC firmware wants the tail index in QWords, not bytes */ tail = rq->tail; - GEM_BUG_ON(tail & 7); + assert_ring_tail_valid(rq->ring, rq->tail); tail >>= 3; GEM_BUG_ON(tail > WQ_RING_TAIL_MAX); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 39cb84a5d4e4..b0c3a029b592 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -326,8 +326,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq) rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; u32 *reg_state = ce->lrc_reg_state; - GEM_BUG_ON(!IS_ALIGNED(rq->tail, 8)); - GEM_BUG_ON(rq->tail >= rq->ring->size); + assert_ring_tail_valid(rq->ring, rq->tail); reg_state[CTX_RING_TAIL+1] = rq->tail; /* True 32b PPGTT with dynamic page allocation: update PDP @@ -1282,8 +1281,7 @@ static void reset_common_ring(struct intel_engine_cs *engine, request->tail = intel_ring_wrap(request->ring, request->wa_tail - WA_TAIL_DWORDS*sizeof(u32)); - GEM_BUG_ON(!IS_ALIGNED(request->tail, 8)); - GEM_BUG_ON(request->tail >= request->ring->size); + assert_ring_tail_valid(request->ring, request->tail); } static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) @@ -1494,8 +1492,7 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs) *cs++ = MI_USER_INTERRUPT; *cs++ = MI_NOOP; request->tail = intel_ring_offset(request, cs); - GEM_BUG_ON(!IS_ALIGNED(request->tail, 8)); - GEM_BUG_ON(request->tail >= request->ring->size); + assert_ring_tail_valid(request->ring, request->tail); gen8_emit_wa_tail(request, cs); } @@ -1523,8 +1520,7 @@ static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request, *cs++ = MI_USER_INTERRUPT; *cs++ = MI_NOOP; request->tail = intel_ring_offset(request, cs); - GEM_BUG_ON(!IS_ALIGNED(request->tail, 8)); - GEM_BUG_ON(request->tail >= request->ring->size); + assert_ring_tail_valid(request->ring, request->tail); gen8_emit_wa_tail(request, cs); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 47921dcbedb3..66a2b8b83972 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -774,8 +774,7 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request) i915_gem_request_submit(request); - GEM_BUG_ON(!IS_ALIGNED(request->tail, 8)); - GEM_BUG_ON(request->tail >= request->ring->size); + assert_ring_tail_valid(request->ring, request->tail); I915_WRITE_TAIL(request->engine, request->tail); } @@ -787,8 +786,7 @@ static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs) *cs++ = MI_USER_INTERRUPT; req->tail = intel_ring_offset(req, cs); - GEM_BUG_ON(!IS_ALIGNED(req->tail, 8)); - GEM_BUG_ON(req->tail >= req->ring->size); + assert_ring_tail_valid(req->ring, req->tail); } static const int i9xx_emit_breadcrumb_sz = 4; @@ -827,8 +825,7 @@ static void gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req, *cs++ = MI_NOOP; req->tail = intel_ring_offset(req, cs); - GEM_BUG_ON(!IS_ALIGNED(req->tail, 8)); - GEM_BUG_ON(req->tail >= req->ring->size); + assert_ring_tail_valid(req->ring, req->tail); } static const int gen8_render_emit_breadcrumb_sz = 8; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 17ac44980d84..a82a0807f64d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -529,6 +529,17 @@ intel_ring_offset(const struct drm_i915_gem_request *req, void *addr) return intel_ring_wrap(req->ring, offset); } +static inline void +assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail) +{ + /* We could combine these into a single tail operation, but keeping + * them as seperate tests will help identify the cause should one + * ever fire. + */ + GEM_BUG_ON(!IS_ALIGNED(tail, 8)); + GEM_BUG_ON(tail >= ring->size); +} + void intel_ring_update_space(struct intel_ring *ring); void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno); -- cgit v1.2.3 From 598b6b5ada3f8a3ecd7bd821f7df87ca8ea0f053 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 25 Mar 2017 13:47:35 +0000 Subject: drm/i915: Mark manually wedged engines as guilty Use the incoming value from debugfs/i915_wedged to select which engines to marked as guilty in order to force us to reset those requests (required to quickly bypass simulated hangs). Testcase: igt/gem_exec_capture Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170325134735.30581-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_debugfs.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 3ebdbc798f33..316bc47a8eea 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4126,7 +4126,9 @@ i915_wedged_get(void *data, u64 *val) static int i915_wedged_set(void *data, u64 val) { - struct drm_i915_private *dev_priv = data; + struct drm_i915_private *i915 = data; + struct intel_engine_cs *engine; + unsigned int tmp; /* * There is no safeguard against this debugfs entry colliding @@ -4136,13 +4138,17 @@ i915_wedged_set(void *data, u64 val) * while it is writing to 'i915_wedged' */ - if (i915_reset_backoff(&dev_priv->gpu_error)) + if (i915_reset_backoff(&i915->gpu_error)) return -EAGAIN; - i915_handle_error(dev_priv, val, - "Manually setting wedged to %llu", val); + for_each_engine_masked(engine, i915, val, tmp) { + engine->hangcheck.seqno = intel_engine_get_seqno(engine); + engine->hangcheck.stalled = true; + } + + i915_handle_error(i915, val, "Manually setting wedged to %llu", val); - wait_on_bit(&dev_priv->gpu_error.flags, + wait_on_bit(&i915->gpu_error.flags, I915_RESET_HANDOFF, TASK_UNINTERRUPTIBLE); -- cgit v1.2.3 From 44a126ba5d13db90945115324851466685b03733 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Wed, 22 Mar 2017 15:58:45 -0300 Subject: drm/i915: kill intel_ddi_pll_select() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All it does is pick the encoder and call intel_get_shared_dpll(). We can just do this in the caller. One less indirection level during code reading. As another plus, now the two callers of intel_get_shared_dpll() are {ironlake,haswell}_crtc_compute_clock(). Reported-by: Ville Syrjälä Signed-off-by: Paulo Zanoni Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1490209125-20046-2-git-send-email-paulo.r.zanoni@intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 25 ++----------------------- drivers/gpu/drm/i915/intel_display.c | 8 +++++++- drivers/gpu/drm/i915/intel_drv.h | 4 ++-- 3 files changed, 11 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index b30898c2a076..3fbe498a04c2 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -837,7 +837,8 @@ intel_ddi_get_crtc_encoder(struct intel_crtc *crtc) return ret; } -static struct intel_encoder * +/* Finds the only possible encoder associated with the given CRTC. */ +struct intel_encoder * intel_ddi_get_crtc_new_encoder(struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); @@ -1127,28 +1128,6 @@ void intel_ddi_clock_get(struct intel_encoder *encoder, bxt_ddi_clock_get(encoder, pipe_config); } -/* - * Tries to find a *shared* PLL for the CRTC and store it in - * intel_crtc->ddi_pll_sel. - * - * For private DPLLs, compute_config() should do the selection for us. This - * function should be folded into compute_config() eventually. - */ -bool intel_ddi_pll_select(struct intel_crtc *intel_crtc, - struct intel_crtc_state *crtc_state) -{ - struct intel_encoder *encoder = - intel_ddi_get_crtc_new_encoder(crtc_state); - struct intel_shared_dpll *pll; - - pll = intel_get_shared_dpll(intel_crtc, crtc_state, encoder); - if (!pll) - DRM_DEBUG_DRIVER("failed to find PLL for pipe %c\n", - pipe_name(intel_crtc->pipe)); - - return pll != NULL; -} - void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 6dc75552c753..e6f6406ef183 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -8835,8 +8835,14 @@ static int haswell_crtc_compute_clock(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state) { if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DSI)) { - if (!intel_ddi_pll_select(crtc, crtc_state)) + struct intel_encoder *encoder = + intel_ddi_get_crtc_new_encoder(crtc_state); + + if (!intel_get_shared_dpll(crtc, crtc_state, encoder)) { + DRM_DEBUG_DRIVER("failed to find PLL for pipe %c\n", + pipe_name(crtc->pipe)); return -EINVAL; + } } crtc->lowfreq_avail = false; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 15e3eb2824e3..464cd5eeb598 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1239,8 +1239,8 @@ void intel_ddi_disable_transcoder_func(struct drm_i915_private *dev_priv, enum transcoder cpu_transcoder); void intel_ddi_enable_pipe_clock(const struct intel_crtc_state *crtc_state); void intel_ddi_disable_pipe_clock(const struct intel_crtc_state *crtc_state); -bool intel_ddi_pll_select(struct intel_crtc *crtc, - struct intel_crtc_state *crtc_state); +struct intel_encoder * +intel_ddi_get_crtc_new_encoder(struct intel_crtc_state *crtc_state); void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state); void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp); bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector); -- cgit v1.2.3 From 15953637886d88d31cb54d461f1f3dcaa8146673 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Mon, 13 Mar 2017 16:54:03 +0530 Subject: drm/i915: enable scrambling Geminilake platform sports a native HDMI 2.0 controller, and is capable of driving pixel-clocks upto 594Mhz. HDMI 2.0 spec mendates scrambling for these higher clocks, for reduced RF footprint. This patch checks if the monitor supports scrambling, and if required, enables it during the modeset. V2: Addressed review comments from Ville: - Do not track scrambling status in DRM layer, track somewhere in driver like in intel_crtc_state. - Don't talk to monitor at such a low layer, set monitor scrambling in intel_enable_ddi() before enabling the port. V3: Addressed review comments from Jani - In comments, function names, use "sink" instead of "monitor", so that the implementation could be close to the language of HDMI spec. V4: Addressed review comment from Maarten - scrambling -> hdmi_scrambling - high_tmds_clock_ratio -> hdmi_high_tmds_clock_ratio V5: Addressed review comments from Ville and Ander - Do not modifiy the crtc_state after compute_config. Move all scrambling and tmds_clock_ratio calcutations to compute_config. - While setting scrambling for source/sink, do not check the conditions again, just go by the crtc_state flags. This will simplyfy the condition checks. V6: Addressed review comments from Ville - Do not add IS_GLK check in disable/enable function, instead add it in compute_config, while setting state flags. - Remove unnecessary paranthesis. - Simplyfy handle_sink_scrambling function as suggested. - Add readout code for scrambling status in get_ddi_config and add a check for the same in pipe_config_compare. V7: Addressed review comments from Ander/Ville - No separate function for source scrambling, make it inline - Align the last line of the macro TRANS_DDI_HDMI_SCRAMBLING_MASK - Do not add platform check while setting source scrambling - Use pipe_config instead of crtc->config to set sink scrambling - To readout scrambling status, Compare with SCRAMBLING_MASK not any of its bits - Remove platform check in intel_pipe_config_compare while checking scrambling status V8: Fixed mege conflict, Addressed review comments from Ander - Remove the desciption/comment about scrambling fom the caller, move it to the function - Move the IS_GLK check into scrambling function - Fix alignment V9: Fixed review comments from Ville, Ander - Pass the scrambling state variables as bool input to the sink_scrambling function and let the disable call be unconditional. - Fix alignments in function calls and debug messages. - Add kernel doc for function intel_hdmi_handle_sink_scrambling V10: Rebase Signed-off-by: Shashank Sharma Reviewed-by: Ander Conselvan de Oliveira Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1489404244-16608-6-git-send-email-shashank.sharma@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 7 ++++ drivers/gpu/drm/i915/intel_ddi.c | 23 +++++++++++++ drivers/gpu/drm/i915/intel_display.c | 3 ++ drivers/gpu/drm/i915/intel_drv.h | 10 ++++++ drivers/gpu/drm/i915/intel_hdmi.c | 63 ++++++++++++++++++++++++++++++++++++ 5 files changed, 106 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 04c8f69fcc62..11b12f412492 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7829,7 +7829,14 @@ enum { #define TRANS_DDI_EDP_INPUT_B_ONOFF (5<<12) #define TRANS_DDI_EDP_INPUT_C_ONOFF (6<<12) #define TRANS_DDI_DP_VC_PAYLOAD_ALLOC (1<<8) +#define TRANS_DDI_HDMI_SCRAMBLER_CTS_ENABLE (1<<7) +#define TRANS_DDI_HDMI_SCRAMBLER_RESET_FREQ (1<<6) #define TRANS_DDI_BFI_ENABLE (1<<4) +#define TRANS_DDI_HIGH_TMDS_CHAR_RATE (1<<4) +#define TRANS_DDI_HDMI_SCRAMBLING (1<<0) +#define TRANS_DDI_HDMI_SCRAMBLING_MASK (TRANS_DDI_HDMI_SCRAMBLER_CTS_ENABLE \ + | TRANS_DDI_HDMI_SCRAMBLER_RESET_FREQ \ + | TRANS_DDI_HDMI_SCRAMBLING) /* DisplayPort Transport Control */ #define _DP_TP_CTL_A 0x64040 diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 3fbe498a04c2..83abab9379fb 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1244,6 +1244,11 @@ void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state) temp |= TRANS_DDI_MODE_SELECT_HDMI; else temp |= TRANS_DDI_MODE_SELECT_DVI; + + if (crtc_state->hdmi_scrambling) + temp |= TRANS_DDI_HDMI_SCRAMBLING_MASK; + if (crtc_state->hdmi_high_tmds_clock_ratio) + temp |= TRANS_DDI_HIGH_TMDS_CHAR_RATE; } else if (type == INTEL_OUTPUT_ANALOG) { temp |= TRANS_DDI_MODE_SELECT_FDI; temp |= (crtc_state->fdi_lanes - 1) << 1; @@ -1816,6 +1821,12 @@ static void intel_enable_ddi(struct intel_encoder *intel_encoder, if (type == INTEL_OUTPUT_HDMI) { struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder); + bool clock_ratio = pipe_config->hdmi_high_tmds_clock_ratio; + bool scrambling = pipe_config->hdmi_scrambling; + + intel_hdmi_handle_sink_scrambling(intel_encoder, + conn_state->connector, + clock_ratio, scrambling); /* In HDMI/DVI mode, the port width, and swing/emphasis values * are ignored so nothing special needs to be done besides @@ -1849,6 +1860,12 @@ static void intel_disable_ddi(struct intel_encoder *intel_encoder, if (old_crtc_state->has_audio) intel_audio_codec_disable(intel_encoder); + if (type == INTEL_OUTPUT_HDMI) { + intel_hdmi_handle_sink_scrambling(intel_encoder, + old_conn_state->connector, + false, false); + } + if (type == INTEL_OUTPUT_EDP) { struct intel_dp *intel_dp = enc_to_intel_dp(encoder); @@ -1975,6 +1992,12 @@ void intel_ddi_get_config(struct intel_encoder *encoder, if (intel_hdmi->infoframe_enabled(&encoder->base, pipe_config)) pipe_config->has_infoframe = true; + + if ((temp & TRANS_DDI_HDMI_SCRAMBLING_MASK) == + TRANS_DDI_HDMI_SCRAMBLING_MASK) + pipe_config->hdmi_scrambling = true; + if (temp & TRANS_DDI_HIGH_TMDS_CHAR_RATE) + pipe_config->hdmi_high_tmds_clock_ratio = true; /* fall through */ case TRANS_DDI_MODE_SELECT_DVI: pipe_config->lane_count = 4; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e6f6406ef183..20fb8dd13184 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11710,6 +11710,9 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, if ((INTEL_GEN(dev_priv) < 8 && !IS_HASWELL(dev_priv)) || IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) PIPE_CONF_CHECK_I(limited_color_range); + + PIPE_CONF_CHECK_I(hdmi_scrambling); + PIPE_CONF_CHECK_I(hdmi_high_tmds_clock_ratio); PIPE_CONF_CHECK_I(has_infoframe); PIPE_CONF_CHECK_I(has_audio); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 464cd5eeb598..e24641b559e2 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -732,6 +732,12 @@ struct intel_crtc_state { /* bitmask of visible planes (enum plane_id) */ u8 active_planes; + + /* HDMI scrambling status */ + bool hdmi_scrambling; + + /* HDMI High TMDS char rate ratio */ + bool hdmi_high_tmds_clock_ratio; }; struct intel_crtc { @@ -1623,6 +1629,10 @@ struct intel_hdmi *enc_to_intel_hdmi(struct drm_encoder *encoder); bool intel_hdmi_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state); +void intel_hdmi_handle_sink_scrambling(struct intel_encoder *intel_encoder, + struct drm_connector *connector, + bool high_tmds_clock_ratio, + bool scrambling); void intel_dp_dual_mode_set_tmds_output(struct intel_hdmi *hdmi, bool enable); diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 3eec74ca5116..e0bdc1979841 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "intel_drv.h" #include #include @@ -1334,6 +1335,7 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder, struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; + struct drm_scdc *scdc = &conn_state->connector->display_info.hdmi.scdc; int clock_8bpc = pipe_config->base.adjusted_mode.crtc_clock; int clock_12bpc = clock_8bpc * 3 / 2; int desired_bpp; @@ -1403,6 +1405,16 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder, pipe_config->lane_count = 4; + if (scdc->scrambling.supported && IS_GEMINILAKE(dev_priv)) { + if (scdc->scrambling.low_rates) + pipe_config->hdmi_scrambling = true; + + if (pipe_config->port_clock > 340000) { + pipe_config->hdmi_scrambling = true; + pipe_config->hdmi_high_tmds_clock_ratio = true; + } + } + return true; } @@ -1812,6 +1824,57 @@ intel_hdmi_add_properties(struct intel_hdmi *intel_hdmi, struct drm_connector *c intel_hdmi->aspect_ratio = HDMI_PICTURE_ASPECT_NONE; } +/* + * intel_hdmi_handle_sink_scrambling: handle sink scrambling/clock ratio setup + * @encoder: intel_encoder + * @connector: drm_connector + * @high_tmds_clock_ratio = bool to indicate if the function needs to set + * or reset the high tmds clock ratio for scrambling + * @scrambling: bool to Indicate if the function needs to set or reset + * sink scrambling + * + * This function handles scrambling on HDMI 2.0 capable sinks. + * If required clock rate is > 340 Mhz && scrambling is supported by sink + * it enables scrambling. This should be called before enabling the HDMI + * 2.0 port, as the sink can choose to disable the scrambling if it doesn't + * detect a scrambled clock within 100 ms. + */ +void intel_hdmi_handle_sink_scrambling(struct intel_encoder *encoder, + struct drm_connector *connector, + bool high_tmds_clock_ratio, + bool scrambling) +{ + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + struct drm_i915_private *dev_priv = connector->dev->dev_private; + struct drm_scrambling *sink_scrambling = + &connector->display_info.hdmi.scdc.scrambling; + struct i2c_adapter *adptr = intel_gmbus_get_adapter(dev_priv, + intel_hdmi->ddc_bus); + bool ret; + + if (!sink_scrambling->supported) + return; + + DRM_DEBUG_KMS("Setting sink scrambling for enc:%s connector:%s\n", + encoder->base.name, connector->name); + + /* Set TMDS bit clock ratio to 1/40 or 1/10 */ + ret = drm_scdc_set_high_tmds_clock_ratio(adptr, high_tmds_clock_ratio); + if (!ret) { + DRM_ERROR("Set TMDS ratio failed\n"); + return; + } + + /* Enable/disable sink scrambling */ + ret = drm_scdc_set_scrambling(adptr, scrambling); + if (!ret) { + DRM_ERROR("Set sink scrambling failed\n"); + return; + } + + DRM_DEBUG_KMS("sink scrambling handled\n"); +} + static u8 intel_hdmi_ddc_pin(struct drm_i915_private *dev_priv, enum port port) { -- cgit v1.2.3 From 14292b7ff86f0ee63e9413d470f57456127ee6c2 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Mon, 13 Mar 2017 16:54:04 +0530 Subject: drm/i915: allow HDMI 2.0 clock rates Geminilake has a native HDMI 2.0 controller, which is capable of driving clocks upto 594Mhz. This patch updates the max tmds clock limit for the same. V2: rebase V3: rebase V4: added r-b from Ander V5: rebase V6: rebase V7: rebase V8: rebase V9: rebase V10: rebase Cc: Ander Conselvan De Oliveira Signed-off-by: Shashank Sharma Reviewed-by: Ander Conselvan De Oliveira Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1489404244-16608-7-git-send-email-shashank.sharma@intel.com --- drivers/gpu/drm/i915/intel_hdmi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index e0bdc1979841..1d623b5e09d6 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1209,6 +1209,8 @@ static int intel_hdmi_source_max_tmds_clock(struct drm_i915_private *dev_priv) { if (IS_G4X(dev_priv)) return 165000; + else if (IS_GEMINILAKE(dev_priv)) + return 594000; else if (IS_HASWELL(dev_priv) || INTEL_INFO(dev_priv)->gen >= 8) return 300000; else -- cgit v1.2.3 From 22f880ca8246c6c80c4f48731c6a7d5d15042f56 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 27 Mar 2017 21:34:59 +0100 Subject: drm/i915/perf: destroy stream on sample_flags mismatch If we were to ever encounter a sample_flags mismatch we need to ensure we destroy the stream when we bail. Fixes: d79651522e89 ("drm/i915: Enable i915 perf stream for Haswell OA unit") Signed-off-by: Matthew Auld Cc: Robert Bragg Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170327203459.18398-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/i915_perf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 8c121187ff39..20d036a9714d 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1705,7 +1705,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, */ if (WARN_ON(stream->sample_flags != props->sample_flags)) { ret = -ENODEV; - goto err_alloc; + goto err_flags; } list_add(&stream->link, &dev_priv->perf.streams); @@ -1728,6 +1728,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, err_open: list_del(&stream->link); +err_flags: if (stream->ops->destroy) stream->ops->destroy(stream); err_alloc: -- cgit v1.2.3 From c1aecc537627cfb569a16b4badc2a5ec600c989a Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 15 Mar 2017 12:49:26 +0200 Subject: drm/i915: update the firmware download URL The old URL works but gives 301 Moved Permanently. Update. Reviewed-by: Michal Wajdeczko Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1489574966-27200-1-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_csr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index 36832257cc9b..1575bde0cf90 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -49,7 +49,7 @@ MODULE_FIRMWARE(I915_CSR_SKL); MODULE_FIRMWARE(I915_CSR_BXT); #define BXT_CSR_VERSION_REQUIRED CSR_VERSION(1, 7) -#define FIRMWARE_URL "https://01.org/linuxgraphics/intel-linux-graphics-firmwares" +#define FIRMWARE_URL "https://01.org/linuxgraphics/downloads/firmware" -- cgit v1.2.3 From 0a309f9e3dfaa4f5db0bf1b0cab54571744b491a Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 27 Mar 2017 21:32:36 +0100 Subject: drm/i915/perf: remove user triggerable warn Don't throw a warning if we are given an invalid property id. While here let's also bring back Robert' original idea of catching unhandled enumeration values at compile time. Fixes: eec688e1420d ("drm/i915: Add i915 perf infrastructure") Signed-off-by: Matthew Auld Cc: Robert Bragg Reviewed-by: Robert Bragg Signed-off-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170327203236.18276-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/i915_perf.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 20d036a9714d..060b171480d5 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1794,6 +1794,11 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, if (ret) return ret; + if (id == 0 || id >= DRM_I915_PERF_PROP_MAX) { + DRM_DEBUG("Unknown i915 perf property ID\n"); + return -EINVAL; + } + switch ((enum drm_i915_perf_property_id)id) { case DRM_I915_PERF_PROP_CTX_HANDLE: props->single_context = 1; @@ -1863,9 +1868,8 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, props->oa_periodic = true; props->oa_period_exponent = value; break; - default: + case DRM_I915_PERF_PROP_MAX: MISSING_CASE(id); - DRM_DEBUG("Unknown i915 perf property ID\n"); return -EINVAL; } -- cgit v1.2.3 From f5073824efc856c9a8f56706f03ad4e07b779a36 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 28 Mar 2017 12:38:55 +0300 Subject: drm/i915: WARN if the core runtime PM get helpers fail We don't expect the core runtime PM get helpers to return any error, so add a WARN for this. Also print the return value for all the callsites to help debugging. v2: - Don't call pm_runtime_get_sync() as part of initing locals. (Chris) Signed-off-by: Imre Deak Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1490693935-12638-1-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_runtime_pm.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 012bc358a33a..f8a375f8dde6 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -2840,8 +2840,10 @@ void intel_runtime_pm_get(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = dev_priv->drm.pdev; struct device *kdev = &pdev->dev; + int ret; - pm_runtime_get_sync(kdev); + ret = pm_runtime_get_sync(kdev); + WARN_ONCE(ret < 0, "pm_runtime_get_sync() failed: %d\n", ret); atomic_inc(&dev_priv->pm.wakeref_count); assert_rpm_wakelock_held(dev_priv); @@ -2871,7 +2873,8 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv) * function, since the power state is undefined. This applies * atm to the late/early system suspend/resume handlers. */ - WARN_ON_ONCE(ret < 0); + WARN_ONCE(ret < 0, + "pm_runtime_get_if_in_use() failed: %d\n", ret); if (ret <= 0) return false; } @@ -2955,8 +2958,11 @@ void intel_runtime_pm_enable(struct drm_i915_private *dev_priv) * platforms without RPM support. */ if (!HAS_RUNTIME_PM(dev_priv)) { + int ret; + pm_runtime_dont_use_autosuspend(kdev); - pm_runtime_get_sync(kdev); + ret = pm_runtime_get_sync(kdev); + WARN(ret < 0, "pm_runtime_get_sync() failed: %d\n", ret); } else { pm_runtime_use_autosuspend(kdev); } -- cgit v1.2.3 From 090e5fe3f0115ff0bcb299bb90cfd8cb82f5cbf8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 28 Mar 2017 14:14:07 +0100 Subject: drm/i915: Take rpm wakelock around debugfs/i915_gpu_info Capturing GPU state requires the device to be awake in order to read registers. Normally, this is taken along the error handler, but for the direct debugfs access, we cannot make assumptions about the current device state and so either need to wake it up, or abort. Fixes: 5a4c6f1b1b2d ("drm/i915: The return of i915_gpu_info to debugfs") Testcase: igt/pm_rpm/debugfs-read Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170328131407.14863-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_debugfs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 316bc47a8eea..d4904245472f 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1012,9 +1012,12 @@ static int gpu_state_release(struct inode *inode, struct file *file) static int i915_gpu_info_open(struct inode *inode, struct file *file) { + struct drm_i915_private *i915 = inode->i_private; struct i915_gpu_state *gpu; - gpu = i915_capture_gpu_state(inode->i_private); + intel_runtime_pm_get(i915); + gpu = i915_capture_gpu_state(i915); + intel_runtime_pm_put(i915); if (!gpu) return -ENOMEM; -- cgit v1.2.3 From 9a86cda07af2c63649932f0a4fc757701ef54c42 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 27 Mar 2017 14:33:25 +0300 Subject: drm/i915/dp: reduce link M/N parameters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Several major vendor USB-C->HDMI converters, in particular the DA200, fail to recover a 5.4 GHz 1 lane signal if the link N is greater than 0x80000. The link M and N depend on the pixel clock and link clock ratio. With current code link N exceeds 0x80000 only when link clock >= 540000 kHz. Except for the eDP intermediate link clocks, at least the four least significant bits are always zero. Just one bit shift right would be enough to bring even the DP 1.4 810000 kHz link clock under 0x80000 link N. The pixel clock for modes that require a link clock >= 540000 kHz would also have several least significant bits zero. Unless the user provides a mode with an odd pixel clock value, we can reduce the numbers to reach the goal, with no loss in precision. The DP spec even mentions sources making choices that "allow for static and relatively small Mvid and Nvid values", thus reducing the link M/N regardless of the sink in question seems justified. Everything here is based on the work and information gathered by Clint Taylor . This is just an iteration to reduce the parameters regardless of lane count, link rate, or sink. Reference: http://patchwork.freedesktop.org/patch/msgid/1490225256-11667-1-git-send-email-clinton.a.taylor@intel.com Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93578 Tested-by: Mads Tested-by: PJ Tested-by: François Guerraz Tested-by: Lev Popov Tested-by: Igor Krivenko Tested-by: Clint Taylor Reviewed-by: Clint Taylor Reviewed-by: Dhinakaran Pandiyan Acked-by: Daniel Vetter Cc: Clint Taylor Cc: Anusha Srivatsa Cc: Ville Syrjälä Cc: Daniel Vetter Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1490614405-23337-1-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_display.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 20fb8dd13184..654b8a0c28ee 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -6290,6 +6290,17 @@ intel_reduce_m_n_ratio(uint32_t *num, uint32_t *den) static void compute_m_n(unsigned int m, unsigned int n, uint32_t *ret_m, uint32_t *ret_n) { + /* + * Reduce M/N as much as possible without loss in precision. Several DP + * dongles in particular seem to be fussy about too large *link* M/N + * values. The passed in values are more likely to have the least + * significant bits zero than M after rounding below, so do this first. + */ + while ((m & 1) == 0 && (n & 1) == 0) { + m >>= 1; + n >>= 1; + } + *ret_n = min_t(unsigned int, roundup_pow_of_two(n), DATA_LINK_N_MAX); *ret_m = div_u64((uint64_t) m * *ret_n, n); intel_reduce_m_n_ratio(ret_m, ret_n); -- cgit v1.2.3 From 88a16b64c3f48d1c7c2b12f1632ba2181eb502de Mon Sep 17 00:00:00 2001 From: Weinan Li Date: Fri, 17 Mar 2017 09:38:57 +0800 Subject: drm/i915/gvt: emulate SKL_FUSE_STATUS and LCPLL_CTL for virtual monitor detection Initialize the correct vreg for virtual monitor. Set PG0/1/2 distribution and fuse download done in SKL_FUSE_STATUS. Set PLL_ENABLE and PLL_LOCK in LCPLL_CTL. Guest may need to check these registers for display monitor detection on Skylake platforms. Signed-off-by: Weinan Li Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/display.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index 5419ae6ec633..79939f93464d 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -172,9 +172,20 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) SDE_PORTC_HOTPLUG_CPT | SDE_PORTD_HOTPLUG_CPT); - if (IS_SKYLAKE(dev_priv)) + if (IS_SKYLAKE(dev_priv)) { vgpu_vreg(vgpu, SDEISR) &= ~(SDE_PORTA_HOTPLUG_SPT | SDE_PORTE_HOTPLUG_SPT); + vgpu_vreg(vgpu, SKL_FUSE_STATUS) |= + SKL_FUSE_DOWNLOAD_STATUS | + SKL_FUSE_PG0_DIST_STATUS | + SKL_FUSE_PG1_DIST_STATUS | + SKL_FUSE_PG2_DIST_STATUS; + vgpu_vreg(vgpu, LCPLL1_CTL) |= + LCPLL_PLL_ENABLE | + LCPLL_PLL_LOCK; + vgpu_vreg(vgpu, LCPLL2_CTL) |= LCPLL_PLL_ENABLE; + + } if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) { vgpu_vreg(vgpu, SDEISR) |= SDE_PORTB_HOTPLUG_CPT; -- cgit v1.2.3 From e2e02cbb5beb5f7bfdfd73558794e3949868267c Mon Sep 17 00:00:00 2001 From: Pei Zhang Date: Mon, 20 Mar 2017 23:54:39 +0800 Subject: drm/i915/gvt: make dpcd_fix_data supports DP1.2 GVT-g will emulate a fixed DPCD data to VM for DP/eDP panel. Update this data to latest DP1.2 with the maximum lane bandwidth of 5.4G/s to support 4K resolution in VM. V3: modify patch comment V2: add inline comment to describe the dpcd_fix_data. Signed-off-by: Pei Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/display.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index 79939f93464d..f21dd2328b73 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -161,8 +161,9 @@ static unsigned char virtual_dp_monitor_edid[GVT_EDID_NUM][EDID_SIZE] = { #define DPCD_HEADER_SIZE 0xb +/* let the virtual display supports DP1.2 */ static u8 dpcd_fix_data[DPCD_HEADER_SIZE] = { - 0x11, 0x0a, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 + 0x12, 0x014, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; static void emulate_monitor_status_change(struct intel_vgpu *vgpu) -- cgit v1.2.3 From 7a7a65617b84912287ec4c6ed7b85f9418c7304b Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Thu, 16 Mar 2017 18:06:39 +0800 Subject: drm/i915/gvt: Add mdev device attribute group This adds initial attribute group for mdev to hold vGPU related for each mdev device, currently just vGPU id is shown. v2: rename group name as "intel_vgpu" Signed-off-by: Zhenyu Wang Reviewed-by: Kevin Tian --- drivers/gpu/drm/i915/gvt/kvmgt.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 1ea3eb270de8..c7e7c9377cef 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1146,8 +1146,40 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, return 0; } +static ssize_t +vgpu_id_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct mdev_device *mdev = mdev_from_dev(dev); + + if (mdev) { + struct intel_vgpu *vgpu = (struct intel_vgpu *) + mdev_get_drvdata(mdev); + return sprintf(buf, "%d\n", vgpu->id); + } + return sprintf(buf, "\n"); +} + +static DEVICE_ATTR_RO(vgpu_id); + +static struct attribute *intel_vgpu_attrs[] = { + &dev_attr_vgpu_id.attr, + NULL +}; + +static const struct attribute_group intel_vgpu_group = { + .name = "intel_vgpu", + .attrs = intel_vgpu_attrs, +}; + +static const struct attribute_group *intel_vgpu_groups[] = { + &intel_vgpu_group, + NULL, +}; + static const struct mdev_parent_ops intel_vgpu_ops = { .supported_type_groups = intel_vgpu_type_groups, + .mdev_attr_groups = intel_vgpu_groups, .create = intel_vgpu_create, .remove = intel_vgpu_remove, -- cgit v1.2.3 From 18af19dbe1a521921b148b8f82d03e585f0bed41 Mon Sep 17 00:00:00 2001 From: Xu Han Date: Wed, 29 Mar 2017 10:13:56 +0800 Subject: drm/i915/gvt: Add KBL platform definition. Add KBL platform definition. Signed-off-by: Xu Han Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/mmio.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/mmio.h b/drivers/gpu/drm/i915/gvt/mmio.h index a3a027025cd0..7edd66f38ef9 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.h +++ b/drivers/gpu/drm/i915/gvt/mmio.h @@ -44,20 +44,21 @@ struct intel_vgpu; #define D_HSW (1 << 2) #define D_BDW (1 << 3) #define D_SKL (1 << 4) +#define D_KBL (1 << 5) -#define D_GEN9PLUS (D_SKL) -#define D_GEN8PLUS (D_BDW | D_SKL) -#define D_GEN75PLUS (D_HSW | D_BDW | D_SKL) -#define D_GEN7PLUS (D_IVB | D_HSW | D_BDW | D_SKL) +#define D_GEN9PLUS (D_SKL | D_KBL) +#define D_GEN8PLUS (D_BDW | D_SKL | D_KBL) +#define D_GEN75PLUS (D_HSW | D_BDW | D_SKL | D_KBL) +#define D_GEN7PLUS (D_IVB | D_HSW | D_BDW | D_SKL | D_KBL) -#define D_SKL_PLUS (D_SKL) -#define D_BDW_PLUS (D_BDW | D_SKL) -#define D_HSW_PLUS (D_HSW | D_BDW | D_SKL) -#define D_IVB_PLUS (D_IVB | D_HSW | D_BDW | D_SKL) +#define D_SKL_PLUS (D_SKL | D_KBL) +#define D_BDW_PLUS (D_BDW | D_SKL | D_KBL) +#define D_HSW_PLUS (D_HSW | D_BDW | D_SKL | D_KBL) +#define D_IVB_PLUS (D_IVB | D_HSW | D_BDW | D_SKL | D_KBL) #define D_PRE_BDW (D_SNB | D_IVB | D_HSW) #define D_PRE_SKL (D_SNB | D_IVB | D_HSW | D_BDW) -#define D_ALL (D_SNB | D_IVB | D_HSW | D_BDW | D_SKL) +#define D_ALL (D_SNB | D_IVB | D_HSW | D_BDW | D_SKL | D_KBL) struct intel_gvt_mmio_info { u32 offset; -- cgit v1.2.3 From 5cf5fe8f729b64ce4f37edc8fa01f8cab5c74389 Mon Sep 17 00:00:00 2001 From: Xu Han Date: Wed, 29 Mar 2017 10:13:57 +0800 Subject: drm/i915/gvt: Update MMIO handle policy to compatible KBL platform. Update MMIO handle policy to KBL platform. Signed-off-by: Xu Han Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 409 +++++++++++++++++++----------------- 1 file changed, 211 insertions(+), 198 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index eaff45d417e8..18b690312d17 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -2584,219 +2584,232 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_DH(FORCEWAKE_MEDIA_GEN9, D_SKL_PLUS, NULL, mul_force_wake_write); MMIO_DH(FORCEWAKE_ACK_MEDIA_GEN9, D_SKL_PLUS, NULL, NULL); - MMIO_F(_DPB_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL, NULL, dp_aux_ch_ctl_mmio_write); - MMIO_F(_DPC_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL, NULL, dp_aux_ch_ctl_mmio_write); - MMIO_F(_DPD_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL, NULL, dp_aux_ch_ctl_mmio_write); - - MMIO_D(HSW_PWR_WELL_BIOS, D_SKL); - MMIO_DH(HSW_PWR_WELL_DRIVER, D_SKL, NULL, skl_power_well_ctl_write); + MMIO_F(_DPB_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, + dp_aux_ch_ctl_mmio_write); + MMIO_F(_DPC_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, + dp_aux_ch_ctl_mmio_write); + MMIO_F(_DPD_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, + dp_aux_ch_ctl_mmio_write); + + MMIO_D(HSW_PWR_WELL_BIOS, D_SKL_PLUS); + MMIO_DH(HSW_PWR_WELL_DRIVER, D_SKL_PLUS, NULL, + skl_power_well_ctl_write); + MMIO_DH(GEN6_PCODE_MAILBOX, D_SKL_PLUS, NULL, mailbox_write); MMIO_D(0xa210, D_SKL_PLUS); MMIO_D(GEN9_MEDIA_PG_IDLE_HYSTERESIS, D_SKL_PLUS); MMIO_D(GEN9_RENDER_PG_IDLE_HYSTERESIS, D_SKL_PLUS); MMIO_DFH(GEN9_GAMT_ECO_REG_RW_IA, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DH(0x4ddc, D_SKL, NULL, skl_misc_ctl_write); - MMIO_DH(0x42080, D_SKL, NULL, skl_misc_ctl_write); - MMIO_D(0x45504, D_SKL); - MMIO_D(0x45520, D_SKL); - MMIO_D(0x46000, D_SKL); - MMIO_DH(0x46010, D_SKL, NULL, skl_lcpll_write); - MMIO_DH(0x46014, D_SKL, NULL, skl_lcpll_write); - MMIO_D(0x6C040, D_SKL); - MMIO_D(0x6C048, D_SKL); - MMIO_D(0x6C050, D_SKL); - MMIO_D(0x6C044, D_SKL); - MMIO_D(0x6C04C, D_SKL); - MMIO_D(0x6C054, D_SKL); - MMIO_D(0x6c058, D_SKL); - MMIO_D(0x6c05c, D_SKL); - MMIO_DH(0X6c060, D_SKL, dpll_status_read, NULL); - - MMIO_DH(SKL_PS_WIN_POS(PIPE_A, 0), D_SKL, NULL, pf_write); - MMIO_DH(SKL_PS_WIN_POS(PIPE_A, 1), D_SKL, NULL, pf_write); - MMIO_DH(SKL_PS_WIN_POS(PIPE_B, 0), D_SKL, NULL, pf_write); - MMIO_DH(SKL_PS_WIN_POS(PIPE_B, 1), D_SKL, NULL, pf_write); - MMIO_DH(SKL_PS_WIN_POS(PIPE_C, 0), D_SKL, NULL, pf_write); - MMIO_DH(SKL_PS_WIN_POS(PIPE_C, 1), D_SKL, NULL, pf_write); - - MMIO_DH(SKL_PS_WIN_SZ(PIPE_A, 0), D_SKL, NULL, pf_write); - MMIO_DH(SKL_PS_WIN_SZ(PIPE_A, 1), D_SKL, NULL, pf_write); - MMIO_DH(SKL_PS_WIN_SZ(PIPE_B, 0), D_SKL, NULL, pf_write); - MMIO_DH(SKL_PS_WIN_SZ(PIPE_B, 1), D_SKL, NULL, pf_write); - MMIO_DH(SKL_PS_WIN_SZ(PIPE_C, 0), D_SKL, NULL, pf_write); - MMIO_DH(SKL_PS_WIN_SZ(PIPE_C, 1), D_SKL, NULL, pf_write); - - MMIO_DH(SKL_PS_CTRL(PIPE_A, 0), D_SKL, NULL, pf_write); - MMIO_DH(SKL_PS_CTRL(PIPE_A, 1), D_SKL, NULL, pf_write); - MMIO_DH(SKL_PS_CTRL(PIPE_B, 0), D_SKL, NULL, pf_write); - MMIO_DH(SKL_PS_CTRL(PIPE_B, 1), D_SKL, NULL, pf_write); - MMIO_DH(SKL_PS_CTRL(PIPE_C, 0), D_SKL, NULL, pf_write); - MMIO_DH(SKL_PS_CTRL(PIPE_C, 1), D_SKL, NULL, pf_write); - - MMIO_DH(PLANE_BUF_CFG(PIPE_A, 0), D_SKL, NULL, NULL); - MMIO_DH(PLANE_BUF_CFG(PIPE_A, 1), D_SKL, NULL, NULL); - MMIO_DH(PLANE_BUF_CFG(PIPE_A, 2), D_SKL, NULL, NULL); - MMIO_DH(PLANE_BUF_CFG(PIPE_A, 3), D_SKL, NULL, NULL); - - MMIO_DH(PLANE_BUF_CFG(PIPE_B, 0), D_SKL, NULL, NULL); - MMIO_DH(PLANE_BUF_CFG(PIPE_B, 1), D_SKL, NULL, NULL); - MMIO_DH(PLANE_BUF_CFG(PIPE_B, 2), D_SKL, NULL, NULL); - MMIO_DH(PLANE_BUF_CFG(PIPE_B, 3), D_SKL, NULL, NULL); - - MMIO_DH(PLANE_BUF_CFG(PIPE_C, 0), D_SKL, NULL, NULL); - MMIO_DH(PLANE_BUF_CFG(PIPE_C, 1), D_SKL, NULL, NULL); - MMIO_DH(PLANE_BUF_CFG(PIPE_C, 2), D_SKL, NULL, NULL); - MMIO_DH(PLANE_BUF_CFG(PIPE_C, 3), D_SKL, NULL, NULL); - - MMIO_DH(CUR_BUF_CFG(PIPE_A), D_SKL, NULL, NULL); - MMIO_DH(CUR_BUF_CFG(PIPE_B), D_SKL, NULL, NULL); - MMIO_DH(CUR_BUF_CFG(PIPE_C), D_SKL, NULL, NULL); - - MMIO_F(PLANE_WM(PIPE_A, 0, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL); - MMIO_F(PLANE_WM(PIPE_A, 1, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL); - MMIO_F(PLANE_WM(PIPE_A, 2, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL); - - MMIO_F(PLANE_WM(PIPE_B, 0, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL); - MMIO_F(PLANE_WM(PIPE_B, 1, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL); - MMIO_F(PLANE_WM(PIPE_B, 2, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL); - - MMIO_F(PLANE_WM(PIPE_C, 0, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL); - MMIO_F(PLANE_WM(PIPE_C, 1, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL); - MMIO_F(PLANE_WM(PIPE_C, 2, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL); - - MMIO_F(CUR_WM(PIPE_A, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL); - MMIO_F(CUR_WM(PIPE_B, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL); - MMIO_F(CUR_WM(PIPE_C, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL); - - MMIO_DH(PLANE_WM_TRANS(PIPE_A, 0), D_SKL, NULL, NULL); - MMIO_DH(PLANE_WM_TRANS(PIPE_A, 1), D_SKL, NULL, NULL); - MMIO_DH(PLANE_WM_TRANS(PIPE_A, 2), D_SKL, NULL, NULL); - - MMIO_DH(PLANE_WM_TRANS(PIPE_B, 0), D_SKL, NULL, NULL); - MMIO_DH(PLANE_WM_TRANS(PIPE_B, 1), D_SKL, NULL, NULL); - MMIO_DH(PLANE_WM_TRANS(PIPE_B, 2), D_SKL, NULL, NULL); - - MMIO_DH(PLANE_WM_TRANS(PIPE_C, 0), D_SKL, NULL, NULL); - MMIO_DH(PLANE_WM_TRANS(PIPE_C, 1), D_SKL, NULL, NULL); - MMIO_DH(PLANE_WM_TRANS(PIPE_C, 2), D_SKL, NULL, NULL); - - MMIO_DH(CUR_WM_TRANS(PIPE_A), D_SKL, NULL, NULL); - MMIO_DH(CUR_WM_TRANS(PIPE_B), D_SKL, NULL, NULL); - MMIO_DH(CUR_WM_TRANS(PIPE_C), D_SKL, NULL, NULL); - - MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 0), D_SKL, NULL, NULL); - MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 1), D_SKL, NULL, NULL); - MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 2), D_SKL, NULL, NULL); - MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 3), D_SKL, NULL, NULL); - - MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 0), D_SKL, NULL, NULL); - MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 1), D_SKL, NULL, NULL); - MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 2), D_SKL, NULL, NULL); - MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 3), D_SKL, NULL, NULL); - - MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 0), D_SKL, NULL, NULL); - MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 1), D_SKL, NULL, NULL); - MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 2), D_SKL, NULL, NULL); - MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 3), D_SKL, NULL, NULL); - - MMIO_DH(_REG_701C0(PIPE_A, 1), D_SKL, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_A, 2), D_SKL, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_A, 3), D_SKL, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_A, 4), D_SKL, NULL, NULL); - - MMIO_DH(_REG_701C0(PIPE_B, 1), D_SKL, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_B, 2), D_SKL, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_B, 3), D_SKL, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_B, 4), D_SKL, NULL, NULL); - - MMIO_DH(_REG_701C0(PIPE_C, 1), D_SKL, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_C, 2), D_SKL, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_C, 3), D_SKL, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_C, 4), D_SKL, NULL, NULL); - - MMIO_DH(_REG_701C4(PIPE_A, 1), D_SKL, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_A, 2), D_SKL, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_A, 3), D_SKL, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_A, 4), D_SKL, NULL, NULL); - - MMIO_DH(_REG_701C4(PIPE_B, 1), D_SKL, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_B, 2), D_SKL, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_B, 3), D_SKL, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_B, 4), D_SKL, NULL, NULL); - - MMIO_DH(_REG_701C4(PIPE_C, 1), D_SKL, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_C, 2), D_SKL, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_C, 3), D_SKL, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_C, 4), D_SKL, NULL, NULL); - - MMIO_D(0x70380, D_SKL); - MMIO_D(0x71380, D_SKL); - MMIO_D(0x72380, D_SKL); - MMIO_D(0x7039c, D_SKL); - - MMIO_F(0x80000, 0x3000, 0, 0, 0, D_SKL, NULL, NULL); - MMIO_D(0x8f074, D_SKL); - MMIO_D(0x8f004, D_SKL); - MMIO_D(0x8f034, D_SKL); - - MMIO_D(0xb11c, D_SKL); - - MMIO_D(0x51000, D_SKL); - MMIO_D(0x6c00c, D_SKL); - - MMIO_F(0xc800, 0x7f8, F_CMD_ACCESS, 0, 0, D_SKL, NULL, NULL); - MMIO_F(0xb020, 0x80, F_CMD_ACCESS, 0, 0, D_SKL, NULL, NULL); - - MMIO_D(0xd08, D_SKL); - MMIO_DFH(0x20e0, D_SKL, F_MODE_MASK, NULL, NULL); - MMIO_DFH(0x20ec, D_SKL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DH(0x4ddc, D_SKL_PLUS, NULL, skl_misc_ctl_write); + MMIO_DH(0x42080, D_SKL_PLUS, NULL, skl_misc_ctl_write); + MMIO_D(0x45504, D_SKL_PLUS); + MMIO_D(0x45520, D_SKL_PLUS); + MMIO_D(0x46000, D_SKL_PLUS); + MMIO_DH(0x46010, D_SKL | D_KBL, NULL, skl_lcpll_write); + MMIO_DH(0x46014, D_SKL | D_KBL, NULL, skl_lcpll_write); + MMIO_D(0x6C040, D_SKL | D_KBL); + MMIO_D(0x6C048, D_SKL | D_KBL); + MMIO_D(0x6C050, D_SKL | D_KBL); + MMIO_D(0x6C044, D_SKL | D_KBL); + MMIO_D(0x6C04C, D_SKL | D_KBL); + MMIO_D(0x6C054, D_SKL | D_KBL); + MMIO_D(0x6c058, D_SKL | D_KBL); + MMIO_D(0x6c05c, D_SKL | D_KBL); + MMIO_DH(0X6c060, D_SKL | D_KBL, dpll_status_read, NULL); + + MMIO_DH(SKL_PS_WIN_POS(PIPE_A, 0), D_SKL_PLUS, NULL, pf_write); + MMIO_DH(SKL_PS_WIN_POS(PIPE_A, 1), D_SKL_PLUS, NULL, pf_write); + MMIO_DH(SKL_PS_WIN_POS(PIPE_B, 0), D_SKL_PLUS, NULL, pf_write); + MMIO_DH(SKL_PS_WIN_POS(PIPE_B, 1), D_SKL_PLUS, NULL, pf_write); + MMIO_DH(SKL_PS_WIN_POS(PIPE_C, 0), D_SKL_PLUS, NULL, pf_write); + MMIO_DH(SKL_PS_WIN_POS(PIPE_C, 1), D_SKL_PLUS, NULL, pf_write); + + MMIO_DH(SKL_PS_WIN_SZ(PIPE_A, 0), D_SKL_PLUS, NULL, pf_write); + MMIO_DH(SKL_PS_WIN_SZ(PIPE_A, 1), D_SKL_PLUS, NULL, pf_write); + MMIO_DH(SKL_PS_WIN_SZ(PIPE_B, 0), D_SKL_PLUS, NULL, pf_write); + MMIO_DH(SKL_PS_WIN_SZ(PIPE_B, 1), D_SKL_PLUS, NULL, pf_write); + MMIO_DH(SKL_PS_WIN_SZ(PIPE_C, 0), D_SKL_PLUS, NULL, pf_write); + MMIO_DH(SKL_PS_WIN_SZ(PIPE_C, 1), D_SKL_PLUS, NULL, pf_write); + + MMIO_DH(SKL_PS_CTRL(PIPE_A, 0), D_SKL_PLUS, NULL, pf_write); + MMIO_DH(SKL_PS_CTRL(PIPE_A, 1), D_SKL_PLUS, NULL, pf_write); + MMIO_DH(SKL_PS_CTRL(PIPE_B, 0), D_SKL_PLUS, NULL, pf_write); + MMIO_DH(SKL_PS_CTRL(PIPE_B, 1), D_SKL_PLUS, NULL, pf_write); + MMIO_DH(SKL_PS_CTRL(PIPE_C, 0), D_SKL_PLUS, NULL, pf_write); + MMIO_DH(SKL_PS_CTRL(PIPE_C, 1), D_SKL_PLUS, NULL, pf_write); + + MMIO_DH(PLANE_BUF_CFG(PIPE_A, 0), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_BUF_CFG(PIPE_A, 1), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_BUF_CFG(PIPE_A, 2), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_BUF_CFG(PIPE_A, 3), D_SKL_PLUS, NULL, NULL); + + MMIO_DH(PLANE_BUF_CFG(PIPE_B, 0), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_BUF_CFG(PIPE_B, 1), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_BUF_CFG(PIPE_B, 2), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_BUF_CFG(PIPE_B, 3), D_SKL_PLUS, NULL, NULL); + + MMIO_DH(PLANE_BUF_CFG(PIPE_C, 0), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_BUF_CFG(PIPE_C, 1), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_BUF_CFG(PIPE_C, 2), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_BUF_CFG(PIPE_C, 3), D_SKL_PLUS, NULL, NULL); + + MMIO_DH(CUR_BUF_CFG(PIPE_A), D_SKL_PLUS, NULL, NULL); + MMIO_DH(CUR_BUF_CFG(PIPE_B), D_SKL_PLUS, NULL, NULL); + MMIO_DH(CUR_BUF_CFG(PIPE_C), D_SKL_PLUS, NULL, NULL); + + MMIO_F(PLANE_WM(PIPE_A, 0, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL); + MMIO_F(PLANE_WM(PIPE_A, 1, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL); + MMIO_F(PLANE_WM(PIPE_A, 2, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL); + + MMIO_F(PLANE_WM(PIPE_B, 0, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL); + MMIO_F(PLANE_WM(PIPE_B, 1, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL); + MMIO_F(PLANE_WM(PIPE_B, 2, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL); + + MMIO_F(PLANE_WM(PIPE_C, 0, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL); + MMIO_F(PLANE_WM(PIPE_C, 1, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL); + MMIO_F(PLANE_WM(PIPE_C, 2, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL); + + MMIO_F(CUR_WM(PIPE_A, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL); + MMIO_F(CUR_WM(PIPE_B, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL); + MMIO_F(CUR_WM(PIPE_C, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL); + + MMIO_DH(PLANE_WM_TRANS(PIPE_A, 0), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_WM_TRANS(PIPE_A, 1), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_WM_TRANS(PIPE_A, 2), D_SKL_PLUS, NULL, NULL); + + MMIO_DH(PLANE_WM_TRANS(PIPE_B, 0), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_WM_TRANS(PIPE_B, 1), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_WM_TRANS(PIPE_B, 2), D_SKL_PLUS, NULL, NULL); + + MMIO_DH(PLANE_WM_TRANS(PIPE_C, 0), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_WM_TRANS(PIPE_C, 1), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_WM_TRANS(PIPE_C, 2), D_SKL_PLUS, NULL, NULL); + + MMIO_DH(CUR_WM_TRANS(PIPE_A), D_SKL_PLUS, NULL, NULL); + MMIO_DH(CUR_WM_TRANS(PIPE_B), D_SKL_PLUS, NULL, NULL); + MMIO_DH(CUR_WM_TRANS(PIPE_C), D_SKL_PLUS, NULL, NULL); + + MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 0), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 1), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 2), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 3), D_SKL_PLUS, NULL, NULL); + + MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 0), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 1), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 2), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 3), D_SKL_PLUS, NULL, NULL); + + MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 0), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 1), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 2), D_SKL_PLUS, NULL, NULL); + MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 3), D_SKL_PLUS, NULL, NULL); + + MMIO_DH(_REG_701C0(PIPE_A, 1), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_REG_701C0(PIPE_A, 2), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_REG_701C0(PIPE_A, 3), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_REG_701C0(PIPE_A, 4), D_SKL_PLUS, NULL, NULL); + + MMIO_DH(_REG_701C0(PIPE_B, 1), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_REG_701C0(PIPE_B, 2), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_REG_701C0(PIPE_B, 3), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_REG_701C0(PIPE_B, 4), D_SKL_PLUS, NULL, NULL); + + MMIO_DH(_REG_701C0(PIPE_C, 1), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_REG_701C0(PIPE_C, 2), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_REG_701C0(PIPE_C, 3), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_REG_701C0(PIPE_C, 4), D_SKL_PLUS, NULL, NULL); + + MMIO_DH(_REG_701C4(PIPE_A, 1), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_REG_701C4(PIPE_A, 2), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_REG_701C4(PIPE_A, 3), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_REG_701C4(PIPE_A, 4), D_SKL_PLUS, NULL, NULL); + + MMIO_DH(_REG_701C4(PIPE_B, 1), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_REG_701C4(PIPE_B, 2), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_REG_701C4(PIPE_B, 3), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_REG_701C4(PIPE_B, 4), D_SKL_PLUS, NULL, NULL); + + MMIO_DH(_REG_701C4(PIPE_C, 1), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_REG_701C4(PIPE_C, 2), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_REG_701C4(PIPE_C, 3), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_REG_701C4(PIPE_C, 4), D_SKL_PLUS, NULL, NULL); + + MMIO_D(0x70380, D_SKL_PLUS); + MMIO_D(0x71380, D_SKL_PLUS); + MMIO_D(0x72380, D_SKL_PLUS); + MMIO_D(0x7039c, D_SKL_PLUS); + + MMIO_F(0x80000, 0x3000, 0, 0, 0, D_SKL_PLUS, NULL, NULL); + MMIO_D(0x8f074, D_SKL | D_KBL); + MMIO_D(0x8f004, D_SKL | D_KBL); + MMIO_D(0x8f034, D_SKL | D_KBL); + + MMIO_D(0xb11c, D_SKL | D_KBL); + + MMIO_D(0x51000, D_SKL | D_KBL); + MMIO_D(0x6c00c, D_SKL_PLUS); + + MMIO_F(0xc800, 0x7f8, F_CMD_ACCESS, 0, 0, D_SKL | D_KBL, NULL, NULL); + MMIO_F(0xb020, 0x80, F_CMD_ACCESS, 0, 0, D_SKL | D_KBL, NULL, NULL); + + MMIO_D(0xd08, D_SKL_PLUS); + MMIO_DFH(0x20e0, D_SKL_PLUS, F_MODE_MASK, NULL, NULL); + MMIO_DFH(0x20ec, D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); /* TRTT */ - MMIO_DFH(0x4de0, D_SKL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x4de4, D_SKL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x4de8, D_SKL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x4dec, D_SKL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x4df0, D_SKL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x4df4, D_SKL, F_CMD_ACCESS, NULL, gen9_trtte_write); - MMIO_DH(0x4dfc, D_SKL, NULL, gen9_trtt_chicken_write); + MMIO_DFH(0x4de0, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x4de4, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x4de8, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x4dec, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x4df0, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x4df4, D_SKL | D_KBL, F_CMD_ACCESS, NULL, gen9_trtte_write); + MMIO_DH(0x4dfc, D_SKL | D_KBL, NULL, gen9_trtt_chicken_write); - MMIO_D(0x45008, D_SKL); + MMIO_D(0x45008, D_SKL | D_KBL); - MMIO_D(0x46430, D_SKL); + MMIO_D(0x46430, D_SKL | D_KBL); - MMIO_D(0x46520, D_SKL); + MMIO_D(0x46520, D_SKL | D_KBL); - MMIO_D(0xc403c, D_SKL); - MMIO_D(0xb004, D_SKL); + MMIO_D(0xc403c, D_SKL | D_KBL); + MMIO_D(0xb004, D_SKL_PLUS); MMIO_DH(DMA_CTRL, D_SKL_PLUS, NULL, dma_ctrl_write); - MMIO_D(0x65900, D_SKL); - MMIO_D(0x1082c0, D_SKL); - MMIO_D(0x4068, D_SKL); - MMIO_D(0x67054, D_SKL); - MMIO_D(0x6e560, D_SKL); - MMIO_D(0x6e554, D_SKL); - MMIO_D(0x2b20, D_SKL); - MMIO_D(0x65f00, D_SKL); - MMIO_D(0x65f08, D_SKL); - MMIO_D(0x320f0, D_SKL); - - MMIO_DFH(_REG_VCS2_EXCC, D_SKL, F_CMD_ACCESS, NULL, NULL); - MMIO_D(0x70034, D_SKL); - MMIO_D(0x71034, D_SKL); - MMIO_D(0x72034, D_SKL); - - MMIO_D(_PLANE_KEYVAL_1(PIPE_A), D_SKL); - MMIO_D(_PLANE_KEYVAL_1(PIPE_B), D_SKL); - MMIO_D(_PLANE_KEYVAL_1(PIPE_C), D_SKL); - MMIO_D(_PLANE_KEYMSK_1(PIPE_A), D_SKL); - MMIO_D(_PLANE_KEYMSK_1(PIPE_B), D_SKL); - MMIO_D(_PLANE_KEYMSK_1(PIPE_C), D_SKL); - - MMIO_D(0x44500, D_SKL); + MMIO_D(0x65900, D_SKL_PLUS); + MMIO_D(0x1082c0, D_SKL | D_KBL); + MMIO_D(0x4068, D_SKL | D_KBL); + MMIO_D(0x67054, D_SKL | D_KBL); + MMIO_D(0x6e560, D_SKL | D_KBL); + MMIO_D(0x6e554, D_SKL | D_KBL); + MMIO_D(0x2b20, D_SKL | D_KBL); + MMIO_D(0x65f00, D_SKL | D_KBL); + MMIO_D(0x65f08, D_SKL | D_KBL); + MMIO_D(0x320f0, D_SKL | D_KBL); + + MMIO_DFH(_REG_VCS2_EXCC, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_REG_VECS_EXCC, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_D(0x70034, D_SKL_PLUS); + MMIO_D(0x71034, D_SKL_PLUS); + MMIO_D(0x72034, D_SKL_PLUS); + + MMIO_D(_PLANE_KEYVAL_1(PIPE_A), D_SKL_PLUS); + MMIO_D(_PLANE_KEYVAL_1(PIPE_B), D_SKL_PLUS); + MMIO_D(_PLANE_KEYVAL_1(PIPE_C), D_SKL_PLUS); + MMIO_D(_PLANE_KEYMSK_1(PIPE_A), D_SKL_PLUS); + MMIO_D(_PLANE_KEYMSK_1(PIPE_B), D_SKL_PLUS); + MMIO_D(_PLANE_KEYMSK_1(PIPE_C), D_SKL_PLUS); + + MMIO_D(0x44500, D_SKL_PLUS); MMIO_DFH(GEN9_CSFE_CHICKEN1_RCS, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(GEN8_HDC_CHICKEN1, D_SKL, F_MODE_MASK | F_CMD_ACCESS, + MMIO_DFH(GEN8_HDC_CHICKEN1, D_SKL | D_KBL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + + MMIO_D(0x4ab8, D_KBL); + MMIO_D(0x940c, D_SKL_PLUS); + MMIO_D(0x2248, D_SKL_PLUS | D_KBL); + MMIO_D(0x4ab0, D_SKL | D_KBL); + MMIO_D(0x20d4, D_SKL | D_KBL); + return 0; } -- cgit v1.2.3 From 6f696d13558e7c3ea6835a4215629ffc16979bf6 Mon Sep 17 00:00:00 2001 From: Xu Han Date: Wed, 29 Mar 2017 10:13:58 +0800 Subject: drm/i915/gvt: Update save/restore list to compatible KBL platform. Add some KBL specially registers to save/restore list. Signed-off-by: Xu Han Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/render.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c index 95ee091ce085..f0b3de352aae 100644 --- a/drivers/gpu/drm/i915/gvt/render.c +++ b/drivers/gpu/drm/i915/gvt/render.c @@ -126,6 +126,18 @@ static struct render_mmio gen9_render_mmio_list[] = { {VCS2, _MMIO(0x1c028), 0xffff, false}, {VECS, _MMIO(0x1a028), 0xffff, false}, + + {RCS, _MMIO(0x7304), 0xffff, true}, + {RCS, _MMIO(0x2248), 0x0, false}, + {RCS, _MMIO(0x940c), 0x0, false}, + {RCS, _MMIO(0x4ab8), 0x0, false}, + + {RCS, _MMIO(0x4ab0), 0x0, false}, + {RCS, _MMIO(0x20d4), 0x0, false}, + + {RCS, _MMIO(0xb004), 0x0, false}, + {RCS, _MMIO(0x20a0), 0x0, false}, + {RCS, _MMIO(0x20e4), 0xffff, false}, }; static u32 gen9_render_mocs[I915_NUM_ENGINES][64]; -- cgit v1.2.3 From e3476c0021c31b64070c40f02bfb7faab55591b4 Mon Sep 17 00:00:00 2001 From: Xu Han Date: Wed, 29 Mar 2017 10:13:59 +0800 Subject: drm/i915/gvt: Add KBL dispatch logic in each function. Extend function dispatch logic to support KBL platform. Signed-off-by: Xu Han Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 17 +++++++++++------ drivers/gpu/drm/i915/gvt/display.c | 8 ++++---- drivers/gpu/drm/i915/gvt/gtt.c | 3 ++- drivers/gpu/drm/i915/gvt/gvt.c | 3 ++- drivers/gpu/drm/i915/gvt/handlers.c | 19 +++++++++++++------ drivers/gpu/drm/i915/gvt/interrupt.c | 5 +++-- drivers/gpu/drm/i915/gvt/render.c | 11 ++++++----- drivers/gpu/drm/i915/gvt/scheduler.c | 3 ++- 8 files changed, 43 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 6f972afbdbc3..c239fa8cbd0c 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -1215,7 +1215,7 @@ static int gen8_check_mi_display_flip(struct parser_exec_state *s, if (!info->async_flip) return 0; - if (IS_SKYLAKE(dev_priv)) { + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { stride = vgpu_vreg(s->vgpu, info->stride_reg) & GENMASK(9, 0); tile = (vgpu_vreg(s->vgpu, info->ctrl_reg) & GENMASK(12, 10)) >> 10; @@ -1243,7 +1243,7 @@ static int gen8_update_plane_mmio_from_mi_display_flip( set_mask_bits(&vgpu_vreg(vgpu, info->surf_reg), GENMASK(31, 12), info->surf_val << 12); - if (IS_SKYLAKE(dev_priv)) { + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { set_mask_bits(&vgpu_vreg(vgpu, info->stride_reg), GENMASK(9, 0), info->stride_val); set_mask_bits(&vgpu_vreg(vgpu, info->ctrl_reg), GENMASK(12, 10), @@ -1267,7 +1267,7 @@ static int decode_mi_display_flip(struct parser_exec_state *s, if (IS_BROADWELL(dev_priv)) return gen8_decode_mi_display_flip(s, info); - if (IS_SKYLAKE(dev_priv)) + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) return skl_decode_mi_display_flip(s, info); return -ENODEV; @@ -1278,7 +1278,9 @@ static int check_mi_display_flip(struct parser_exec_state *s, { struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv; - if (IS_BROADWELL(dev_priv) || IS_SKYLAKE(dev_priv)) + if (IS_BROADWELL(dev_priv) + || IS_SKYLAKE(dev_priv) + || IS_KABYLAKE(dev_priv)) return gen8_check_mi_display_flip(s, info); return -ENODEV; } @@ -1289,7 +1291,9 @@ static int update_plane_mmio_from_mi_display_flip( { struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv; - if (IS_BROADWELL(dev_priv) || IS_SKYLAKE(dev_priv)) + if (IS_BROADWELL(dev_priv) + || IS_SKYLAKE(dev_priv) + || IS_KABYLAKE(dev_priv)) return gen8_update_plane_mmio_from_mi_display_flip(s, info); return -ENODEV; } @@ -1569,7 +1573,8 @@ static int batch_buffer_needs_scan(struct parser_exec_state *s) { struct intel_gvt *gvt = s->vgpu->gvt; - if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)) { + if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv) + || IS_KABYLAKE(gvt->dev_priv)) { /* BDW decides privilege based on address space */ if (cmd_val(s, 0) & (1 << 8)) return 0; diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index f21dd2328b73..4cf2b29fbaa1 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -173,7 +173,7 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) SDE_PORTC_HOTPLUG_CPT | SDE_PORTD_HOTPLUG_CPT); - if (IS_SKYLAKE(dev_priv)) { + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { vgpu_vreg(vgpu, SDEISR) &= ~(SDE_PORTA_HOTPLUG_SPT | SDE_PORTE_HOTPLUG_SPT); vgpu_vreg(vgpu, SKL_FUSE_STATUS) |= @@ -203,7 +203,7 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDID_DETECTED; } - if (IS_SKYLAKE(dev_priv) && + if ((IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) && intel_vgpu_has_monitor_on_port(vgpu, PORT_E)) { vgpu_vreg(vgpu, SDEISR) |= SDE_PORTE_HOTPLUG_SPT; } @@ -365,7 +365,7 @@ void intel_vgpu_clean_display(struct intel_vgpu *vgpu) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - if (IS_SKYLAKE(dev_priv)) + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) clean_virtual_dp_monitor(vgpu, PORT_D); else clean_virtual_dp_monitor(vgpu, PORT_B); @@ -387,7 +387,7 @@ int intel_vgpu_init_display(struct intel_vgpu *vgpu, u64 resolution) intel_vgpu_init_i2c_edid(vgpu); - if (IS_SKYLAKE(dev_priv)) + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) return setup_virtual_dp_monitor(vgpu, PORT_D, GVT_DP_D, resolution); else diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index da7312715824..69d3d8ddecc2 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -2220,7 +2220,8 @@ int intel_gvt_init_gtt(struct intel_gvt *gvt) gvt_dbg_core("init gtt\n"); - if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)) { + if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv) + || IS_KABYLAKE(gvt->dev_priv)) { gvt->gtt.pte_ops = &gen8_gtt_pte_ops; gvt->gtt.gma_ops = &gen8_gtt_gma_ops; gvt->gtt.mm_alloc_page_table = gen8_mm_alloc_page_table; diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index 3b9d59e457ba..b84b7ca3f66f 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -106,7 +106,8 @@ static void init_device_info(struct intel_gvt *gvt) struct intel_gvt_device_info *info = &gvt->device_info; struct pci_dev *pdev = gvt->dev_priv->drm.pdev; - if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)) { + if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv) + || IS_KABYLAKE(gvt->dev_priv)) { info->max_support_vgpus = 8; info->cfg_space_size = 256; info->mmio_size = 2 * 1024 * 1024; diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 18b690312d17..0b41d19ae09a 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -68,6 +68,8 @@ unsigned long intel_gvt_get_device_type(struct intel_gvt *gvt) return D_BDW; else if (IS_SKYLAKE(gvt->dev_priv)) return D_SKL; + else if (IS_KABYLAKE(gvt->dev_priv)) + return D_KBL; return 0; } @@ -234,7 +236,8 @@ static int mul_force_wake_write(struct intel_vgpu *vgpu, old = vgpu_vreg(vgpu, offset); new = CALC_MODE_MASK_REG(old, *(u32 *)p_data); - if (IS_SKYLAKE(vgpu->gvt->dev_priv)) { + if (IS_SKYLAKE(vgpu->gvt->dev_priv) + || IS_KABYLAKE(vgpu->gvt->dev_priv)) { switch (offset) { case FORCEWAKE_RENDER_GEN9_REG: ack_reg_offset = FORCEWAKE_ACK_RENDER_GEN9_REG; @@ -823,8 +826,9 @@ static int dp_aux_ch_ctl_mmio_write(struct intel_vgpu *vgpu, write_vreg(vgpu, offset, p_data, bytes); data = vgpu_vreg(vgpu, offset); - if (IS_SKYLAKE(vgpu->gvt->dev_priv) && - offset != _REG_SKL_DP_AUX_CH_CTL(port_index)) { + if ((IS_SKYLAKE(vgpu->gvt->dev_priv) + || IS_KABYLAKE(vgpu->gvt->dev_priv)) + && offset != _REG_SKL_DP_AUX_CH_CTL(port_index)) { /* SKL DPB/C/D aux ctl register changed */ return 0; } else if (IS_BROADWELL(vgpu->gvt->dev_priv) && @@ -1303,7 +1307,8 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset, switch (cmd) { case GEN9_PCODE_READ_MEM_LATENCY: - if (IS_SKYLAKE(vgpu->gvt->dev_priv)) { + if (IS_SKYLAKE(vgpu->gvt->dev_priv) + || IS_KABYLAKE(vgpu->gvt->dev_priv)) { /** * "Read memory latency" command on gen9. * Below memory latency values are read @@ -1316,7 +1321,8 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset, } break; case SKL_PCODE_CDCLK_CONTROL: - if (IS_SKYLAKE(vgpu->gvt->dev_priv)) + if (IS_SKYLAKE(vgpu->gvt->dev_priv) + || IS_KABYLAKE(vgpu->gvt->dev_priv)) *data0 = SKL_CDCLK_READY_FOR_CHANGE; break; case GEN6_PCODE_READ_RC6VIDS: @@ -2886,7 +2892,8 @@ int intel_gvt_setup_mmio_info(struct intel_gvt *gvt) ret = init_broadwell_mmio_info(gvt); if (ret) goto err; - } else if (IS_SKYLAKE(dev_priv)) { + } else if (IS_SKYLAKE(dev_priv) + || IS_KABYLAKE(dev_priv)) { ret = init_broadwell_mmio_info(gvt); if (ret) goto err; diff --git a/drivers/gpu/drm/i915/gvt/interrupt.c b/drivers/gpu/drm/i915/gvt/interrupt.c index 92bb247e3478..9d6812f0957f 100644 --- a/drivers/gpu/drm/i915/gvt/interrupt.c +++ b/drivers/gpu/drm/i915/gvt/interrupt.c @@ -580,7 +580,7 @@ static void gen8_init_irq( SET_BIT_INFO(irq, 4, PRIMARY_C_FLIP_DONE, INTEL_GVT_IRQ_INFO_DE_PIPE_C); SET_BIT_INFO(irq, 5, SPRITE_C_FLIP_DONE, INTEL_GVT_IRQ_INFO_DE_PIPE_C); - } else if (IS_SKYLAKE(gvt->dev_priv)) { + } else if (IS_SKYLAKE(gvt->dev_priv) || IS_KABYLAKE(gvt->dev_priv)) { SET_BIT_INFO(irq, 25, AUX_CHANNEL_B, INTEL_GVT_IRQ_INFO_DE_PORT); SET_BIT_INFO(irq, 26, AUX_CHANNEL_C, INTEL_GVT_IRQ_INFO_DE_PORT); SET_BIT_INFO(irq, 27, AUX_CHANNEL_D, INTEL_GVT_IRQ_INFO_DE_PORT); @@ -690,7 +690,8 @@ int intel_gvt_init_irq(struct intel_gvt *gvt) gvt_dbg_core("init irq framework\n"); - if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)) { + if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv) + || IS_KABYLAKE(gvt->dev_priv)) { irq->ops = &gen8_irq_ops; irq->irq_map = gen8_irq_map; } else { diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c index f0b3de352aae..e24e57afc45e 100644 --- a/drivers/gpu/drm/i915/gvt/render.c +++ b/drivers/gpu/drm/i915/gvt/render.c @@ -171,7 +171,7 @@ static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id) */ fw = intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ | FW_REG_WRITE); - if (ring_id == RCS && IS_SKYLAKE(dev_priv)) + if (ring_id == RCS && (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))) fw |= FORCEWAKE_RENDER; intel_uncore_forcewake_get(dev_priv, fw); @@ -204,7 +204,7 @@ static void load_mocs(struct intel_vgpu *vgpu, int ring_id) if (WARN_ON(ring_id >= ARRAY_SIZE(regs))) return; - if (!IS_SKYLAKE(dev_priv)) + if (!(IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))) return; offset.reg = regs[ring_id]; @@ -242,7 +242,7 @@ static void restore_mocs(struct intel_vgpu *vgpu, int ring_id) if (WARN_ON(ring_id >= ARRAY_SIZE(regs))) return; - if (!IS_SKYLAKE(dev_priv)) + if (!(IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))) return; offset.reg = regs[ring_id]; @@ -277,7 +277,8 @@ void intel_gvt_load_render_mmio(struct intel_vgpu *vgpu, int ring_id) u32 inhibit_mask = _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); - if (IS_SKYLAKE(vgpu->gvt->dev_priv)) { + if (IS_SKYLAKE(vgpu->gvt->dev_priv) + || IS_KABYLAKE(vgpu->gvt->dev_priv)) { mmio = gen9_render_mmio_list; array_size = ARRAY_SIZE(gen9_render_mmio_list); load_mocs(vgpu, ring_id); @@ -324,7 +325,7 @@ void intel_gvt_restore_render_mmio(struct intel_vgpu *vgpu, int ring_id) u32 v; int i, array_size; - if (IS_SKYLAKE(dev_priv)) { + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { mmio = gen9_render_mmio_list; array_size = ARRAY_SIZE(gen9_render_mmio_list); restore_mocs(vgpu, ring_id); diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index ad8876bd15b3..375038252761 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -448,7 +448,8 @@ static int workload_thread(void *priv) struct intel_vgpu_workload *workload = NULL; struct intel_vgpu *vgpu = NULL; int ret; - bool need_force_wake = IS_SKYLAKE(gvt->dev_priv); + bool need_force_wake = IS_SKYLAKE(gvt->dev_priv) + || IS_KABYLAKE(gvt->dev_priv); DEFINE_WAIT_FUNC(wait, woken_wake_function); kfree(p); -- cgit v1.2.3 From 96cd733c3e7d6a5dc3cf6dc43e97f39963680351 Mon Sep 17 00:00:00 2001 From: Xu Han Date: Wed, 29 Mar 2017 10:14:00 +0800 Subject: drm/i915/gvt: Turn on KBL platform support. Turn on KBL WS platform support in gvt-g. More platforms would be enabled, after validate. Signed-off-by: Xu Han Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/intel_gvt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c index 8c04eca84351..e1ab6432a914 100644 --- a/drivers/gpu/drm/i915/intel_gvt.c +++ b/drivers/gpu/drm/i915/intel_gvt.c @@ -45,6 +45,8 @@ static bool is_supported_device(struct drm_i915_private *dev_priv) return true; if (IS_SKYLAKE(dev_priv)) return true; + if (IS_KABYLAKE(dev_priv) && INTEL_DEVID(dev_priv) == 0x591D) + return true; return false; } -- cgit v1.2.3 From c10c12558c8bb375798b7643c762dbcc93db081a Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Fri, 17 Mar 2017 03:08:51 -0400 Subject: drm/i915/gvt: remove workload from intel_shadow_wa_ctx structure intel_shadow_wa_ctx is a field of intel_vgpu_workload. container_of() can be used to refine the relation-ship between intel_shadow_wa_ctx and intel_vgpu_workload. This patch removes the useless dereference. v2. add "drm/i915/gvt" prefix. (Zhenyu) Signed-off-by: Tina Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 25 +++++++++++++++++-------- drivers/gpu/drm/i915/gvt/execlist.c | 9 +++++---- drivers/gpu/drm/i915/gvt/scheduler.h | 1 - 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index c239fa8cbd0c..94f2e701e4d4 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -2609,6 +2609,9 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) unsigned long gma_head, gma_tail, gma_bottom, ring_size, ring_tail; struct parser_exec_state s; int ret = 0; + struct intel_vgpu_workload *workload = container_of(wa_ctx, + struct intel_vgpu_workload, + wa_ctx); /* ring base is page aligned */ if (WARN_ON(!IS_ALIGNED(wa_ctx->indirect_ctx.guest_gma, GTT_PAGE_SIZE))) @@ -2623,14 +2626,14 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) s.buf_type = RING_BUFFER_INSTRUCTION; s.buf_addr_type = GTT_BUFFER; - s.vgpu = wa_ctx->workload->vgpu; - s.ring_id = wa_ctx->workload->ring_id; + s.vgpu = workload->vgpu; + s.ring_id = workload->ring_id; s.ring_start = wa_ctx->indirect_ctx.guest_gma; s.ring_size = ring_size; s.ring_head = gma_head; s.ring_tail = gma_tail; s.rb_va = wa_ctx->indirect_ctx.shadow_va; - s.workload = wa_ctx->workload; + s.workload = workload; ret = ip_gma_set(&s, gma_head); if (ret) @@ -2713,12 +2716,15 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx) { int ctx_size = wa_ctx->indirect_ctx.size; unsigned long guest_gma = wa_ctx->indirect_ctx.guest_gma; - struct intel_vgpu *vgpu = wa_ctx->workload->vgpu; + struct intel_vgpu_workload *workload = container_of(wa_ctx, + struct intel_vgpu_workload, + wa_ctx); + struct intel_vgpu *vgpu = workload->vgpu; struct drm_i915_gem_object *obj; int ret = 0; void *map; - obj = i915_gem_object_create(wa_ctx->workload->vgpu->gvt->dev_priv, + obj = i915_gem_object_create(workload->vgpu->gvt->dev_priv, roundup(ctx_size + CACHELINE_BYTES, PAGE_SIZE)); if (IS_ERR(obj)) @@ -2738,8 +2744,8 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx) goto unmap_src; } - ret = copy_gma_to_hva(wa_ctx->workload->vgpu, - wa_ctx->workload->vgpu->gtt.ggtt_mm, + ret = copy_gma_to_hva(workload->vgpu, + workload->vgpu->gtt.ggtt_mm, guest_gma, guest_gma + ctx_size, map); if (ret < 0) { @@ -2777,7 +2783,10 @@ static int combine_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) { int ret; - struct intel_vgpu *vgpu = wa_ctx->workload->vgpu; + struct intel_vgpu_workload *workload = container_of(wa_ctx, + struct intel_vgpu_workload, + wa_ctx); + struct intel_vgpu *vgpu = workload->vgpu; if (wa_ctx->indirect_ctx.size == 0) return 0; diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index f1f426a97aa9..ce4276a7cf9c 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -394,9 +394,11 @@ static void prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx) { - int ring_id = wa_ctx->workload->ring_id; - struct i915_gem_context *shadow_ctx = - wa_ctx->workload->vgpu->shadow_ctx; + struct intel_vgpu_workload *workload = container_of(wa_ctx, + struct intel_vgpu_workload, + wa_ctx); + int ring_id = workload->ring_id; + struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx; struct drm_i915_gem_object *ctx_obj = shadow_ctx->engine[ring_id].state->obj; struct execlist_ring_context *shadow_ring_context; @@ -680,7 +682,6 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id, CACHELINE_BYTES; workload->wa_ctx.per_ctx.guest_gma = per_ctx & PER_CTX_ADDR_MASK; - workload->wa_ctx.workload = workload; WARN_ON(workload->wa_ctx.indirect_ctx.size && !(per_ctx & 0x1)); } diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index 2833dfa8c9ae..2cd725c0573e 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -67,7 +67,6 @@ struct shadow_per_ctx { }; struct intel_shadow_wa_ctx { - struct intel_vgpu_workload *workload; struct shadow_indirect_ctx indirect_ctx; struct shadow_per_ctx per_ctx; -- cgit v1.2.3 From f0d661534fc90da1774e8af7e7faf5043375606f Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 28 Mar 2017 08:45:12 +0000 Subject: drm/i915: Move WARN_ON/MISSING_CASE macros to i915_utils.h We can't sometimes use these macros in other headers due to include and definition order. As i915_utils.h already contains other helper macros move these macros there. v2: checkpatch cleanup for WARN() macro. Signed-off-by: Michal Wajdeczko Cc: Joonas Lahtinen Cc: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170328084513.174200-1-michal.wajdeczko@intel.com Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 18 ------------------ drivers/gpu/drm/i915/i915_utils.h | 18 ++++++++++++++++++ drivers/gpu/drm/i915/intel_pm.c | 2 +- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 86f097db8ef6..0cbadac02a53 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -82,24 +82,6 @@ #define DRIVER_DATE "20170320" #define DRIVER_TIMESTAMP 1489994464 -#undef WARN_ON -/* Many gcc seem to no see through this and fall over :( */ -#if 0 -#define WARN_ON(x) ({ \ - bool __i915_warn_cond = (x); \ - if (__builtin_constant_p(__i915_warn_cond)) \ - BUILD_BUG_ON(__i915_warn_cond); \ - WARN(__i915_warn_cond, "WARN_ON(" #x ")"); }) -#else -#define WARN_ON(x) WARN((x), "%s", "WARN_ON(" __stringify(x) ")") -#endif - -#undef WARN_ON_ONCE -#define WARN_ON_ONCE(x) WARN_ONCE((x), "%s", "WARN_ON_ONCE(" __stringify(x) ")") - -#define MISSING_CASE(x) WARN(1, "Missing switch case (%lu) in %s\n", \ - (long) (x), __func__); - /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and * WARN_ON()) for hw state sanity checks to check for unexpected conditions * which may not necessarily be a user visible problem. This will either diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 94a3a3299910..c5455d36b617 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -25,6 +25,24 @@ #ifndef __I915_UTILS_H #define __I915_UTILS_H +#undef WARN_ON +/* Many gcc seem to no see through this and fall over :( */ +#if 0 +#define WARN_ON(x) ({ \ + bool __i915_warn_cond = (x); \ + if (__builtin_constant_p(__i915_warn_cond)) \ + BUILD_BUG_ON(__i915_warn_cond); \ + WARN(__i915_warn_cond, "WARN_ON(" #x ")"); }) +#else +#define WARN_ON(x) WARN((x), "%s", "WARN_ON(" __stringify(x) ")") +#endif + +#undef WARN_ON_ONCE +#define WARN_ON_ONCE(x) WARN_ONCE((x), "%s", "WARN_ON_ONCE(" __stringify(x) ")") + +#define MISSING_CASE(x) WARN(1, "Missing switch case (%lu) in %s\n", \ + (long)(x), __func__) + #if GCC_VERSION >= 70000 #define add_overflows(A, B) \ __builtin_add_overflow_p((A), (B), (typeof((A) + (B)))0) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index b7ae42a67494..570bd603f401 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -8088,7 +8088,7 @@ static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv) case GEN6_PCODE_TIMEOUT: return -ETIMEDOUT; default: - MISSING_CASE(flags) + MISSING_CASE(flags); return 0; } } -- cgit v1.2.3 From a79a524e9260d4ffaff88348615e70fb3d393692 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 27 Mar 2017 21:21:43 +0100 Subject: drm/i915: Avoid lock dropping between rescheduling Unlocking is dangerous. In this case we combine an early update to the out-of-queue request, because we know that it will be inserted into the correct FIFO priority-ordered slot when it becomes ready in the future. However, given sufficient enthusiasm, it may become ready as we are continuing to reschedule, and so may gazump the FIFO if we have since dropped its spinlock. The result is that it may be executed too early, before its dependencies. v2: Move all work into the second phase over the topological sort. This removes the shortcut on the out-of-rbtree request to ensure that we only adjust its priority after adjusting all of its dependencies. Fixes: 20311bd35060 ("drm/i915/scheduler: Execute requests in order of priorities") Testcase: igt/gem_exec_whisper Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: # v4.10+ Link: http://patchwork.freedesktop.org/patch/msgid/20170327202143.7972-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_lrc.c | 53 ++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index b0c3a029b592..12cf645793ed 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -658,15 +658,14 @@ static void execlists_submit_request(struct drm_i915_gem_request *request) static struct intel_engine_cs * pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked) { - struct intel_engine_cs *engine; + struct intel_engine_cs *engine = + container_of(pt, struct drm_i915_gem_request, priotree)->engine; + + GEM_BUG_ON(!locked); - engine = container_of(pt, - struct drm_i915_gem_request, - priotree)->engine; if (engine != locked) { - if (locked) - spin_unlock_irq(&locked->timeline->lock); - spin_lock_irq(&engine->timeline->lock); + spin_unlock(&locked->timeline->lock); + spin_lock(&engine->timeline->lock); } return engine; @@ -674,7 +673,7 @@ pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked) static void execlists_schedule(struct drm_i915_gem_request *request, int prio) { - struct intel_engine_cs *engine = NULL; + struct intel_engine_cs *engine; struct i915_dependency *dep, *p; struct i915_dependency stack; LIST_HEAD(dfs); @@ -708,26 +707,23 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) list_for_each_entry_safe(dep, p, &dfs, dfs_link) { struct i915_priotree *pt = dep->signaler; - list_for_each_entry(p, &pt->signalers_list, signal_link) + /* Within an engine, there can be no cycle, but we may + * refer to the same dependency chain multiple times + * (redundant dependencies are not eliminated) and across + * engines. + */ + list_for_each_entry(p, &pt->signalers_list, signal_link) { + GEM_BUG_ON(p->signaler->priority < pt->priority); if (prio > READ_ONCE(p->signaler->priority)) list_move_tail(&p->dfs_link, &dfs); + } list_safe_reset_next(dep, p, dfs_link); - if (!RB_EMPTY_NODE(&pt->node)) - continue; - - engine = pt_lock_engine(pt, engine); - - /* If it is not already in the rbtree, we can update the - * priority inplace and skip over it (and its dependencies) - * if it is referenced *again* as we descend the dfs. - */ - if (prio > pt->priority && RB_EMPTY_NODE(&pt->node)) { - pt->priority = prio; - list_del_init(&dep->dfs_link); - } } + engine = request->engine; + spin_lock_irq(&engine->timeline->lock); + /* Fifo and depth-first replacement ensure our deps execute before us */ list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) { struct i915_priotree *pt = dep->signaler; @@ -739,16 +735,15 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) if (prio <= pt->priority) continue; - GEM_BUG_ON(RB_EMPTY_NODE(&pt->node)); - pt->priority = prio; - rb_erase(&pt->node, &engine->execlist_queue); - if (insert_request(pt, &engine->execlist_queue)) - engine->execlist_first = &pt->node; + if (!RB_EMPTY_NODE(&pt->node)) { + rb_erase(&pt->node, &engine->execlist_queue); + if (insert_request(pt, &engine->execlist_queue)) + engine->execlist_first = &pt->node; + } } - if (engine) - spin_unlock_irq(&engine->timeline->lock); + spin_unlock_irq(&engine->timeline->lock); /* XXX Do we need to preempt to make room for us and our deps? */ } -- cgit v1.2.3 From 18afa288920475a9f95d70920b0c54b43b5256a7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 29 Mar 2017 10:47:46 +0100 Subject: Revert "drm/i915: Skip execlists_dequeue() early if the list is empty" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 6c943de6686f ("drm/i915: Skip execlists_dequeue() early if the list is empty"). The validity of using READ_ONCE there depends upon having a mb to coordinate the assignment of engine->execlist_first inside submit_request() and checking prior to taking the spinlock in execlists_dequeue(). We wrote "the update to TASKLET_SCHED incurs a memory barrier making this cross-cpu checking safe", but failed to notice that this mb was *conditional* on the execlists being ready, i.e. there wasn't the required mb when it was most necessary! We could install an unconditional memory barrier to fixup the READ_ONCE(): diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 7dd732cb9f57..1ed164b16d44 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -616,6 +616,7 @@ static void execlists_submit_request(struct drm_i915_gem_request *request) if (insert_request(&request->priotree, &engine->execlist_queue)) { engine->execlist_first = &request->priotree.node; + smp_wmb(); if (execlists_elsp_ready(engine)) But we have opted to remove the race as it should be rarely effective, and saves us having to explain the necessary memory barriers which we quite clearly failed at. Reported-and-tested-by: Tvrtko Ursulin Fixes: 6c943de6686f ("drm/i915: Skip execlists_dequeue() early if the list is empty") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Michał Winiarski Link: http://patchwork.freedesktop.org/patch/msgid/20170329100052.29505-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_guc_submission.c | 12 ------------ drivers/gpu/drm/i915/intel_lrc.c | 12 ------------ 2 files changed, 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 6193ad7edcf4..1642fff9cf13 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -662,18 +662,6 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine) struct rb_node *rb; bool submit = false; - /* After execlist_first is updated, the tasklet will be rescheduled. - * - * If we are currently running (inside the tasklet) and a third - * party queues a request and so updates engine->execlist_first under - * the spinlock (which we have elided), it will atomically set the - * TASKLET_SCHED flag causing the us to be re-executed and pick up - * the change in state (the update to TASKLET_SCHED incurs a memory - * barrier making this cross-cpu checking safe). - */ - if (!READ_ONCE(engine->execlist_first)) - return false; - spin_lock_irq(&engine->timeline->lock); rb = engine->execlist_first; while (rb) { diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 12cf645793ed..c8f7c631fc1f 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -402,18 +402,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) struct rb_node *rb; bool submit = false; - /* After execlist_first is updated, the tasklet will be rescheduled. - * - * If we are currently running (inside the tasklet) and a third - * party queues a request and so updates engine->execlist_first under - * the spinlock (which we have elided), it will atomically set the - * TASKLET_SCHED flag causing the us to be re-executed and pick up - * the change in state (the update to TASKLET_SCHED incurs a memory - * barrier making this cross-cpu checking safe). - */ - if (!READ_ONCE(engine->execlist_first)) - return; - last = port->request; if (last) /* WaIdleLiteRestore:bdw,skl -- cgit v1.2.3 From 2f075565e32b7dc92559f9bc294ac94a487999fc Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 24 Mar 2017 14:29:48 -0700 Subject: drm/i915: Use LINEAR modifier instead of NONE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit They're the same, so use the one which makes more sense. Signed-off-by: Ben Widawsky Link: http://patchwork.freedesktop.org/patch/msgid/20170324212950.2206-1-ben@bwidawsk.net Reviewed-by: Ville Syrjälä Signed-off-by: Ville Syrjälä --- drivers/gpu/drm/i915/intel_display.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 654b8a0c28ee..fe6bd745c130 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1997,7 +1997,7 @@ intel_tile_width_bytes(const struct drm_framebuffer *fb, int plane) unsigned int cpp = fb->format->cpp[plane]; switch (fb->modifier) { - case DRM_FORMAT_MOD_NONE: + case DRM_FORMAT_MOD_LINEAR: return cpp; case I915_FORMAT_MOD_X_TILED: if (IS_GEN2(dev_priv)) @@ -2033,7 +2033,7 @@ intel_tile_width_bytes(const struct drm_framebuffer *fb, int plane) static unsigned int intel_tile_height(const struct drm_framebuffer *fb, int plane) { - if (fb->modifier == DRM_FORMAT_MOD_NONE) + if (fb->modifier == DRM_FORMAT_MOD_LINEAR) return 1; else return intel_tile_size(to_i915(fb->dev)) / @@ -2107,7 +2107,7 @@ static unsigned int intel_surf_alignment(const struct drm_framebuffer *fb, return 4096; switch (fb->modifier) { - case DRM_FORMAT_MOD_NONE: + case DRM_FORMAT_MOD_LINEAR: return intel_linear_alignment(dev_priv); case I915_FORMAT_MOD_X_TILED: if (INTEL_GEN(dev_priv) >= 9) @@ -2290,7 +2290,7 @@ static u32 intel_adjust_tile_offset(int *x, int *y, WARN_ON(new_offset > old_offset); - if (fb->modifier != DRM_FORMAT_MOD_NONE) { + if (fb->modifier != DRM_FORMAT_MOD_LINEAR) { unsigned int tile_size, tile_width, tile_height; unsigned int pitch_tiles; @@ -2345,7 +2345,7 @@ static u32 _intel_compute_tile_offset(const struct drm_i915_private *dev_priv, if (alignment) alignment--; - if (fb_modifier != DRM_FORMAT_MOD_NONE) { + if (fb_modifier != DRM_FORMAT_MOD_LINEAR) { unsigned int tile_size, tile_width, tile_height; unsigned int tile_rows, tiles, pitch_tiles; @@ -2471,7 +2471,7 @@ intel_fill_fb_info(struct drm_i915_private *dev_priv, DRM_ROTATE_0, tile_size); offset /= tile_size; - if (fb->modifier != DRM_FORMAT_MOD_NONE) { + if (fb->modifier != DRM_FORMAT_MOD_LINEAR) { unsigned int tile_width, tile_height; unsigned int pitch_tiles; struct drm_rect r; @@ -2803,7 +2803,7 @@ static int skl_max_plane_width(const struct drm_framebuffer *fb, int plane, int cpp = fb->format->cpp[plane]; switch (fb->modifier) { - case DRM_FORMAT_MOD_NONE: + case DRM_FORMAT_MOD_LINEAR: case I915_FORMAT_MOD_X_TILED: switch (cpp) { case 8: @@ -3154,7 +3154,7 @@ static void i9xx_disable_primary_plane(struct drm_plane *primary, static u32 intel_fb_stride_alignment(const struct drm_framebuffer *fb, int plane) { - if (fb->modifier == DRM_FORMAT_MOD_NONE) + if (fb->modifier == DRM_FORMAT_MOD_LINEAR) return 64; else return intel_tile_width_bytes(fb, plane); @@ -3253,7 +3253,7 @@ static u32 skl_plane_ctl_format(uint32_t pixel_format) static u32 skl_plane_ctl_tiling(uint64_t fb_modifier) { switch (fb_modifier) { - case DRM_FORMAT_MOD_NONE: + case DRM_FORMAT_MOD_LINEAR: break; case I915_FORMAT_MOD_X_TILED: return PLANE_CTL_TILED_X; @@ -8390,7 +8390,7 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc, tiling = val & PLANE_CTL_TILED_MASK; switch (tiling) { case PLANE_CTL_TILED_LINEAR: - fb->modifier = DRM_FORMAT_MOD_NONE; + fb->modifier = DRM_FORMAT_MOD_LINEAR; break; case PLANE_CTL_TILED_X: plane_config->tiling = I915_TILING_X; @@ -10366,7 +10366,7 @@ static void skl_do_mmio_flip(struct intel_crtc *intel_crtc, ctl = I915_READ(PLANE_CTL(pipe, 0)); ctl &= ~PLANE_CTL_TILED_MASK; switch (fb->modifier) { - case DRM_FORMAT_MOD_NONE: + case DRM_FORMAT_MOD_LINEAR: break; case I915_FORMAT_MOD_X_TILED: ctl |= PLANE_CTL_TILED_X; @@ -13729,7 +13729,7 @@ intel_check_cursor_plane(struct drm_plane *plane, return -ENOMEM; } - if (fb->modifier != DRM_FORMAT_MOD_NONE) { + if (fb->modifier != DRM_FORMAT_MOD_LINEAR) { DRM_DEBUG_KMS("cursor cannot be tiled\n"); return -EINVAL; } @@ -14390,7 +14390,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, mode_cmd->modifier[0]); goto err; } - case DRM_FORMAT_MOD_NONE: + case DRM_FORMAT_MOD_LINEAR: case I915_FORMAT_MOD_X_TILED: break; default: @@ -14413,7 +14413,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, mode_cmd->pixel_format); if (mode_cmd->pitches[0] > pitch_limit) { DRM_DEBUG_KMS("%s pitch (%u) must be at most %d\n", - mode_cmd->modifier[0] != DRM_FORMAT_MOD_NONE ? + mode_cmd->modifier[0] != DRM_FORMAT_MOD_LINEAR ? "tiled" : "linear", mode_cmd->pitches[0], pitch_limit); goto err; -- cgit v1.2.3 From bf39ec335eb8cc51b4e1c9303ef92b380d204bb1 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Mon, 27 Mar 2017 17:41:02 +0800 Subject: drm/i915/gvt: adjust mem size for low resolution type From commit d1a513be1f0a ("drm/i915/gvt: add resolution definition for vGPU type"), small type has been restricted to small resolution, so not require larger high GM size any more. Change to smaller 384M for more VM creation with vGPU enabled which still perform reasonable workload. Fixes: d1a513be1f0a ("drm/i915/gvt: add resolution definition for vGPU type") Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/vgpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 41cfa5ccae84..d8d128625331 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -72,7 +72,7 @@ static struct { char *name; } vgpu_types[] = { /* Fixed vGPU type table */ - { MB_TO_BYTES(64), MB_TO_BYTES(512), 4, GVT_EDID_1024_768, "8" }, + { MB_TO_BYTES(64), MB_TO_BYTES(384), 4, GVT_EDID_1024_768, "8" }, { MB_TO_BYTES(128), MB_TO_BYTES(512), 4, GVT_EDID_1920_1200, "4" }, { MB_TO_BYTES(256), MB_TO_BYTES(1024), 4, GVT_EDID_1920_1200, "2" }, { MB_TO_BYTES(512), MB_TO_BYTES(2048), 4, GVT_EDID_1920_1200, "1" }, -- cgit v1.2.3 From 865f03d42ed0c90c9faf3301775176834ba13eba Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Fri, 24 Mar 2017 01:56:54 -0400 Subject: drm/i915/gvt: remove the redundant info NULL check The variable info is never NULL, which is checked by the caller. This patch removes the redundant info NULL check logic. Fixes: 695fbc08d80f ("drm/i915/gvt: replace the gvt_err with gvt_vgpu_err") Signed-off-by: Tina Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/kvmgt.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index c7e7c9377cef..65bcf6096e5e 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1371,13 +1371,6 @@ static int kvmgt_guest_init(struct mdev_device *mdev) static bool kvmgt_guest_exit(struct kvmgt_guest_info *info) { - struct intel_vgpu *vgpu = info->vgpu; - - if (!info) { - gvt_vgpu_err("kvmgt_guest_info invalid\n"); - return false; - } - kvm_page_track_unregister_notifier(info->kvm, &info->track_node); kvmgt_protect_table_destroy(info); gvt_cache_destroy(info->vgpu); -- cgit v1.2.3 From 91d0101ad30bd1bd7f7f805f4fa314c6b70bb602 Mon Sep 17 00:00:00 2001 From: Ping Gao Date: Thu, 30 Mar 2017 00:36:34 +0800 Subject: drm/i915/gvt: use hrtimer replace delayed_work in scheduler Currently the scheduler is triggered by delayed_work, which doesn't provide precision at microsecond level. Move to hrtimer instead for more accurate control. Signed-off-by: Ping Gao Reviewed-by: Kevin Tian Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gvt.c | 5 ++++ drivers/gpu/drm/i915/gvt/gvt.h | 1 + drivers/gpu/drm/i915/gvt/sched_policy.c | 49 ++++++++++++++++++++++----------- drivers/gpu/drm/i915/gvt/sched_policy.h | 2 ++ 4 files changed, 41 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index b84b7ca3f66f..894735c77f63 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -144,6 +144,11 @@ static int gvt_service_thread(void *data) intel_gvt_emulate_vblank(gvt); mutex_unlock(&gvt->lock); } + + if (test_and_clear_bit(INTEL_GVT_REQUEST_SCHED, + (void *)&gvt->service_request)) { + intel_gvt_schedule(gvt); + } } return 0; diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 6dfc48b63b71..7455214b242c 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -249,6 +249,7 @@ static inline struct intel_gvt *to_gvt(struct drm_i915_private *i915) enum { INTEL_GVT_REQUEST_EMULATE_VBLANK = 0, + INTEL_GVT_REQUEST_SCHED = 1, }; static inline void intel_gvt_request_service(struct intel_gvt *gvt, diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c index 34b9acdf3479..c8ade8fc511d 100644 --- a/drivers/gpu/drm/i915/gvt/sched_policy.c +++ b/drivers/gpu/drm/i915/gvt/sched_policy.c @@ -96,17 +96,16 @@ struct tbs_vgpu_data { struct tbs_sched_data { struct intel_gvt *gvt; - struct delayed_work work; + struct hrtimer timer; unsigned long period; struct list_head runq_head; }; -#define GVT_DEFAULT_TIME_SLICE (msecs_to_jiffies(1)) +/* in nanosecond */ +#define GVT_DEFAULT_TIME_SLICE 1000000 -static void tbs_sched_func(struct work_struct *work) +static void tbs_sched_func(struct tbs_sched_data *sched_data) { - struct tbs_sched_data *sched_data = container_of(work, - struct tbs_sched_data, work.work); struct tbs_vgpu_data *vgpu_data; struct intel_gvt *gvt = sched_data->gvt; @@ -115,8 +114,6 @@ static void tbs_sched_func(struct work_struct *work) struct intel_vgpu *vgpu = NULL; struct list_head *pos, *head; - mutex_lock(&gvt->lock); - /* no vgpu or has already had a target */ if (list_empty(&sched_data->runq_head) || scheduler->next_vgpu) goto out; @@ -151,17 +148,30 @@ out: scheduler->next_vgpu->id); try_to_schedule_next_vgpu(gvt); } +} - /* - * still have vgpu on runq - * or last schedule haven't finished due to running workload - */ - if (!list_empty(&sched_data->runq_head) || scheduler->next_vgpu) - schedule_delayed_work(&sched_data->work, sched_data->period); +void intel_gvt_schedule(struct intel_gvt *gvt) +{ + struct tbs_sched_data *sched_data = gvt->scheduler.sched_data; + mutex_lock(&gvt->lock); + tbs_sched_func(sched_data); mutex_unlock(&gvt->lock); } +static enum hrtimer_restart tbs_timer_fn(struct hrtimer *timer_data) +{ + struct tbs_sched_data *data; + + data = container_of(timer_data, struct tbs_sched_data, timer); + + intel_gvt_request_service(data->gvt, INTEL_GVT_REQUEST_SCHED); + + hrtimer_add_expires_ns(&data->timer, data->period); + + return HRTIMER_RESTART; +} + static int tbs_sched_init(struct intel_gvt *gvt) { struct intel_gvt_workload_scheduler *scheduler = @@ -174,11 +184,13 @@ static int tbs_sched_init(struct intel_gvt *gvt) return -ENOMEM; INIT_LIST_HEAD(&data->runq_head); - INIT_DELAYED_WORK(&data->work, tbs_sched_func); + hrtimer_init(&data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + data->timer.function = tbs_timer_fn; data->period = GVT_DEFAULT_TIME_SLICE; data->gvt = gvt; scheduler->sched_data = data; + return 0; } @@ -188,7 +200,8 @@ static void tbs_sched_clean(struct intel_gvt *gvt) &gvt->scheduler; struct tbs_sched_data *data = scheduler->sched_data; - cancel_delayed_work(&data->work); + hrtimer_cancel(&data->timer); + kfree(data); scheduler->sched_data = NULL; } @@ -205,6 +218,7 @@ static int tbs_sched_init_vgpu(struct intel_vgpu *vgpu) INIT_LIST_HEAD(&data->list); vgpu->sched_data = data; + return 0; } @@ -223,7 +237,10 @@ static void tbs_sched_start_schedule(struct intel_vgpu *vgpu) return; list_add_tail(&vgpu_data->list, &sched_data->runq_head); - schedule_delayed_work(&sched_data->work, 0); + + if (!hrtimer_active(&sched_data->timer)) + hrtimer_start(&sched_data->timer, ktime_add_ns(ktime_get(), + sched_data->period), HRTIMER_MODE_ABS); } static void tbs_sched_stop_schedule(struct intel_vgpu *vgpu) diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.h b/drivers/gpu/drm/i915/gvt/sched_policy.h index bb8b9097e41a..ba00a5f7455f 100644 --- a/drivers/gpu/drm/i915/gvt/sched_policy.h +++ b/drivers/gpu/drm/i915/gvt/sched_policy.h @@ -43,6 +43,8 @@ struct intel_gvt_sched_policy_ops { void (*stop_schedule)(struct intel_vgpu *vgpu); }; +void intel_gvt_schedule(struct intel_gvt *gvt); + int intel_gvt_init_sched_policy(struct intel_gvt *gvt); void intel_gvt_clean_sched_policy(struct intel_gvt *gvt); -- cgit v1.2.3 From f6504cce54b26e4318697a854a50cf1a1cb3c066 Mon Sep 17 00:00:00 2001 From: Ping Gao Date: Thu, 30 Mar 2017 00:36:35 +0800 Subject: drm/i915/gvt: add some statistic routine for scheduler Add some statistic routine to collect the time when vGPU is scheduled in/out and the time of the last ctx submission. Signed-off-by: Ping Gao Reviewed-by: Kevin Tian Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gvt.h | 5 +++ drivers/gpu/drm/i915/gvt/handlers.c | 1 + drivers/gpu/drm/i915/gvt/sched_policy.c | 67 +++++++++++++++++++++------------ 3 files changed, 48 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 7455214b242c..fe7f5ae8884a 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -138,6 +138,10 @@ struct intel_vgpu_display { struct intel_vgpu_sbi sbi; }; +struct vgpu_sched_ctl { + int weight; +}; + struct intel_vgpu { struct intel_gvt *gvt; int id; @@ -160,6 +164,7 @@ struct intel_vgpu { struct list_head workload_q_head[I915_NUM_ENGINES]; struct kmem_cache *workloads; atomic_t running_workload_num; + ktime_t last_ctx_submit_time; DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES); struct i915_gem_context *shadow_ctx; diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 0b41d19ae09a..68600a3c46e5 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1416,6 +1416,7 @@ static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, execlist->elsp_dwords.data[execlist->elsp_dwords.index] = data; if (execlist->elsp_dwords.index == 3) { + vgpu->last_ctx_submit_time = ktime_get(); ret = intel_vgpu_submit_execlist(vgpu, ring_id); if(ret) gvt_vgpu_err("fail submit workload on ring %d\n", diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c index c8ade8fc511d..ff19abc6f77e 100644 --- a/drivers/gpu/drm/i915/gvt/sched_policy.c +++ b/drivers/gpu/drm/i915/gvt/sched_policy.c @@ -47,11 +47,33 @@ static bool vgpu_has_pending_workload(struct intel_vgpu *vgpu) return false; } +struct vgpu_sched_data { + struct list_head list; + struct intel_vgpu *vgpu; + + ktime_t sched_in_time; + ktime_t sched_out_time; + ktime_t sched_time; + ktime_t left_ts; + ktime_t allocated_ts; + + struct vgpu_sched_ctl sched_ctl; +}; + +struct gvt_sched_data { + struct intel_gvt *gvt; + struct hrtimer timer; + unsigned long period; + struct list_head runq_head; +}; + static void try_to_schedule_next_vgpu(struct intel_gvt *gvt) { struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; enum intel_engine_id i; struct intel_engine_cs *engine; + struct vgpu_sched_data *vgpu_data; + ktime_t cur_time; /* no target to schedule */ if (!scheduler->next_vgpu) @@ -77,6 +99,14 @@ static void try_to_schedule_next_vgpu(struct intel_gvt *gvt) gvt_dbg_sched("switch to next vgpu %d\n", scheduler->next_vgpu->id); + cur_time = ktime_get(); + if (scheduler->current_vgpu) { + vgpu_data = scheduler->current_vgpu->sched_data; + vgpu_data->sched_out_time = cur_time; + } + vgpu_data = scheduler->next_vgpu->sched_data; + vgpu_data->sched_in_time = cur_time; + /* switch current vgpu */ scheduler->current_vgpu = scheduler->next_vgpu; scheduler->next_vgpu = NULL; @@ -88,25 +118,12 @@ static void try_to_schedule_next_vgpu(struct intel_gvt *gvt) wake_up(&scheduler->waitq[i]); } -struct tbs_vgpu_data { - struct list_head list; - struct intel_vgpu *vgpu; - /* put some per-vgpu sched stats here */ -}; - -struct tbs_sched_data { - struct intel_gvt *gvt; - struct hrtimer timer; - unsigned long period; - struct list_head runq_head; -}; - /* in nanosecond */ #define GVT_DEFAULT_TIME_SLICE 1000000 -static void tbs_sched_func(struct tbs_sched_data *sched_data) +static void tbs_sched_func(struct gvt_sched_data *sched_data) { - struct tbs_vgpu_data *vgpu_data; + struct vgpu_sched_data *vgpu_data; struct intel_gvt *gvt = sched_data->gvt; struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; @@ -130,7 +147,7 @@ static void tbs_sched_func(struct tbs_sched_data *sched_data) if (pos == &sched_data->runq_head) continue; - vgpu_data = container_of(pos, struct tbs_vgpu_data, list); + vgpu_data = container_of(pos, struct vgpu_sched_data, list); if (!vgpu_has_pending_workload(vgpu_data->vgpu)) continue; @@ -152,7 +169,7 @@ out: void intel_gvt_schedule(struct intel_gvt *gvt) { - struct tbs_sched_data *sched_data = gvt->scheduler.sched_data; + struct gvt_sched_data *sched_data = gvt->scheduler.sched_data; mutex_lock(&gvt->lock); tbs_sched_func(sched_data); @@ -161,9 +178,9 @@ void intel_gvt_schedule(struct intel_gvt *gvt) static enum hrtimer_restart tbs_timer_fn(struct hrtimer *timer_data) { - struct tbs_sched_data *data; + struct gvt_sched_data *data; - data = container_of(timer_data, struct tbs_sched_data, timer); + data = container_of(timer_data, struct gvt_sched_data, timer); intel_gvt_request_service(data->gvt, INTEL_GVT_REQUEST_SCHED); @@ -177,7 +194,7 @@ static int tbs_sched_init(struct intel_gvt *gvt) struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; - struct tbs_sched_data *data; + struct gvt_sched_data *data; data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) @@ -198,7 +215,7 @@ static void tbs_sched_clean(struct intel_gvt *gvt) { struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; - struct tbs_sched_data *data = scheduler->sched_data; + struct gvt_sched_data *data = scheduler->sched_data; hrtimer_cancel(&data->timer); @@ -208,7 +225,7 @@ static void tbs_sched_clean(struct intel_gvt *gvt) static int tbs_sched_init_vgpu(struct intel_vgpu *vgpu) { - struct tbs_vgpu_data *data; + struct vgpu_sched_data *data; data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) @@ -230,8 +247,8 @@ static void tbs_sched_clean_vgpu(struct intel_vgpu *vgpu) static void tbs_sched_start_schedule(struct intel_vgpu *vgpu) { - struct tbs_sched_data *sched_data = vgpu->gvt->scheduler.sched_data; - struct tbs_vgpu_data *vgpu_data = vgpu->sched_data; + struct gvt_sched_data *sched_data = vgpu->gvt->scheduler.sched_data; + struct vgpu_sched_data *vgpu_data = vgpu->sched_data; if (!list_empty(&vgpu_data->list)) return; @@ -245,7 +262,7 @@ static void tbs_sched_start_schedule(struct intel_vgpu *vgpu) static void tbs_sched_stop_schedule(struct intel_vgpu *vgpu) { - struct tbs_vgpu_data *vgpu_data = vgpu->sched_data; + struct vgpu_sched_data *vgpu_data = vgpu->sched_data; list_del_init(&vgpu_data->list); } -- cgit v1.2.3 From 32356920dae17ef3b04fe02a113418c983fc66ce Mon Sep 17 00:00:00 2001 From: Ping Gao Date: Thu, 30 Mar 2017 00:36:36 +0800 Subject: drm/i915/gvt: factor out the scheduler Factor out the scheduler to a more clear structure, the basic logic is to find out next vGPU first and then schedule it. vGPUs were ordered in a LRU list, scheduler scan from the LRU list head and choose the first vGPU who has pending workload. Signed-off-by: Ping Gao Reviewed-by: Kevin Tian Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/sched_policy.c | 66 ++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c index ff19abc6f77e..15e0c3b53d93 100644 --- a/drivers/gpu/drm/i915/gvt/sched_policy.c +++ b/drivers/gpu/drm/i915/gvt/sched_policy.c @@ -48,7 +48,7 @@ static bool vgpu_has_pending_workload(struct intel_vgpu *vgpu) } struct vgpu_sched_data { - struct list_head list; + struct list_head lru_list; struct intel_vgpu *vgpu; ktime_t sched_in_time; @@ -64,7 +64,7 @@ struct gvt_sched_data { struct intel_gvt *gvt; struct hrtimer timer; unsigned long period; - struct list_head runq_head; + struct list_head lru_runq_head; }; static void try_to_schedule_next_vgpu(struct intel_gvt *gvt) @@ -118,36 +118,17 @@ static void try_to_schedule_next_vgpu(struct intel_gvt *gvt) wake_up(&scheduler->waitq[i]); } -/* in nanosecond */ -#define GVT_DEFAULT_TIME_SLICE 1000000 - -static void tbs_sched_func(struct gvt_sched_data *sched_data) +static struct intel_vgpu *find_busy_vgpu(struct gvt_sched_data *sched_data) { struct vgpu_sched_data *vgpu_data; - - struct intel_gvt *gvt = sched_data->gvt; - struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; - struct intel_vgpu *vgpu = NULL; - struct list_head *pos, *head; - - /* no vgpu or has already had a target */ - if (list_empty(&sched_data->runq_head) || scheduler->next_vgpu) - goto out; - - if (scheduler->current_vgpu) { - vgpu_data = scheduler->current_vgpu->sched_data; - head = &vgpu_data->list; - } else { - head = &sched_data->runq_head; - } + struct list_head *head = &sched_data->lru_runq_head; + struct list_head *pos; /* search a vgpu with pending workload */ list_for_each(pos, head) { - if (pos == &sched_data->runq_head) - continue; - vgpu_data = container_of(pos, struct vgpu_sched_data, list); + vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list); if (!vgpu_has_pending_workload(vgpu_data->vgpu)) continue; @@ -155,8 +136,33 @@ static void tbs_sched_func(struct gvt_sched_data *sched_data) break; } + return vgpu; +} + +/* in nanosecond */ +#define GVT_DEFAULT_TIME_SLICE 1000000 + +static void tbs_sched_func(struct gvt_sched_data *sched_data) +{ + struct intel_gvt *gvt = sched_data->gvt; + struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; + struct vgpu_sched_data *vgpu_data; + struct intel_vgpu *vgpu = NULL; + + /* no active vgpu or has already had a target */ + if (list_empty(&sched_data->lru_runq_head) || scheduler->next_vgpu) + goto out; + + vgpu = find_busy_vgpu(sched_data); if (vgpu) { scheduler->next_vgpu = vgpu; + + /* Move the last used vGPU to the tail of lru_list */ + vgpu_data = vgpu->sched_data; + list_del_init(&vgpu_data->lru_list); + list_add_tail(&vgpu_data->lru_list, + &sched_data->lru_runq_head); + gvt_dbg_sched("pick next vgpu %d\n", vgpu->id); } out: @@ -200,7 +206,7 @@ static int tbs_sched_init(struct intel_gvt *gvt) if (!data) return -ENOMEM; - INIT_LIST_HEAD(&data->runq_head); + INIT_LIST_HEAD(&data->lru_runq_head); hrtimer_init(&data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); data->timer.function = tbs_timer_fn; data->period = GVT_DEFAULT_TIME_SLICE; @@ -232,7 +238,7 @@ static int tbs_sched_init_vgpu(struct intel_vgpu *vgpu) return -ENOMEM; data->vgpu = vgpu; - INIT_LIST_HEAD(&data->list); + INIT_LIST_HEAD(&data->lru_list); vgpu->sched_data = data; @@ -250,10 +256,10 @@ static void tbs_sched_start_schedule(struct intel_vgpu *vgpu) struct gvt_sched_data *sched_data = vgpu->gvt->scheduler.sched_data; struct vgpu_sched_data *vgpu_data = vgpu->sched_data; - if (!list_empty(&vgpu_data->list)) + if (!list_empty(&vgpu_data->lru_list)) return; - list_add_tail(&vgpu_data->list, &sched_data->runq_head); + list_add_tail(&vgpu_data->lru_list, &sched_data->lru_runq_head); if (!hrtimer_active(&sched_data->timer)) hrtimer_start(&sched_data->timer, ktime_add_ns(ktime_get(), @@ -264,7 +270,7 @@ static void tbs_sched_stop_schedule(struct intel_vgpu *vgpu) { struct vgpu_sched_data *vgpu_data = vgpu->sched_data; - list_del_init(&vgpu_data->list); + list_del_init(&vgpu_data->lru_list); } static struct intel_gvt_sched_policy_ops tbs_schedule_ops = { -- cgit v1.2.3 From bc90d097ae144fab1f789f8523b621de7125c6a8 Mon Sep 17 00:00:00 2001 From: Ping Gao Date: Thu, 30 Mar 2017 00:36:37 +0800 Subject: drm/i915/gvt: define weight according to vGPU type The weight defines proportional control of physical GPU resource shared between vGPUs. So far the weight is tied to a specific vGPU type, i.e when creating multiple vGPUs with different types, they will inherit different weights. e.g. The weight of type GVTg_V5_2 is 8, the weight of type GVTg_V5_4 is 4, so vGPU of type GVTg_V5_2 has double vGPU resource of vGPU type GVTg_V5_4. TODO: allow user control the weight setting in the future. Signed-off-by: Ping Gao Reviewed-by: Kevin Tian Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gvt.h | 4 ++++ drivers/gpu/drm/i915/gvt/kvmgt.c | 6 ++++-- drivers/gpu/drm/i915/gvt/sched_policy.c | 1 + drivers/gpu/drm/i915/gvt/vgpu.c | 29 ++++++++++++++++++++++++----- 4 files changed, 33 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index fe7f5ae8884a..0631f64e06db 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -151,6 +151,7 @@ struct intel_vgpu { bool failsafe; bool resetting; void *sched_data; + struct vgpu_sched_ctl sched_ctl; struct intel_vgpu_fence fence; struct intel_vgpu_gm gm; @@ -220,6 +221,7 @@ struct intel_vgpu_type { unsigned int low_gm_size; unsigned int high_gm_size; unsigned int fence; + unsigned int weight; enum intel_vgpu_edid resolution; }; @@ -328,6 +330,8 @@ struct intel_vgpu_creation_params { __u64 resolution; __s32 primary; __u64 vgpu_id; + + __u32 weight; }; int intel_vgpu_alloc_resource(struct intel_vgpu *vgpu, diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 65bcf6096e5e..5f55d89a0959 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -295,10 +295,12 @@ static ssize_t description_show(struct kobject *kobj, struct device *dev, return 0; return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n" - "fence: %d\nresolution: %s\n", + "fence: %d\nresolution: %s\n" + "weight: %d\n", BYTES_TO_MB(type->low_gm_size), BYTES_TO_MB(type->high_gm_size), - type->fence, vgpu_edid_str(type->resolution)); + type->fence, vgpu_edid_str(type->resolution), + type->weight); } static MDEV_TYPE_ATTR_RO(available_instances); diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c index 15e0c3b53d93..5aa7a2539e4d 100644 --- a/drivers/gpu/drm/i915/gvt/sched_policy.c +++ b/drivers/gpu/drm/i915/gvt/sched_policy.c @@ -237,6 +237,7 @@ static int tbs_sched_init_vgpu(struct intel_vgpu *vgpu) if (!data) return -ENOMEM; + data->sched_ctl.weight = vgpu->sched_ctl.weight; data->vgpu = vgpu; INIT_LIST_HEAD(&data->lru_list); diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index d8d128625331..36c107e2058a 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -64,18 +64,28 @@ void populate_pvinfo_page(struct intel_vgpu *vgpu) WARN_ON(sizeof(struct vgt_if) != VGT_PVINFO_SIZE); } +#define VGPU_MAX_WEIGHT 16 +#define VGPU_WEIGHT(vgpu_num) \ + (VGPU_MAX_WEIGHT / (vgpu_num)) + static struct { unsigned int low_mm; unsigned int high_mm; unsigned int fence; + + /* A vGPU with a weight of 8 will get twice as much GPU as a vGPU + * with a weight of 4 on a contended host, different vGPU type has + * different weight set. Legal weights range from 1 to 16. + */ + unsigned int weight; enum intel_vgpu_edid edid; char *name; } vgpu_types[] = { /* Fixed vGPU type table */ - { MB_TO_BYTES(64), MB_TO_BYTES(384), 4, GVT_EDID_1024_768, "8" }, - { MB_TO_BYTES(128), MB_TO_BYTES(512), 4, GVT_EDID_1920_1200, "4" }, - { MB_TO_BYTES(256), MB_TO_BYTES(1024), 4, GVT_EDID_1920_1200, "2" }, - { MB_TO_BYTES(512), MB_TO_BYTES(2048), 4, GVT_EDID_1920_1200, "1" }, + { MB_TO_BYTES(64), MB_TO_BYTES(384), 4, VGPU_WEIGHT(8), GVT_EDID_1024_768, "8" }, + { MB_TO_BYTES(128), MB_TO_BYTES(512), 4, VGPU_WEIGHT(4), GVT_EDID_1920_1200, "4" }, + { MB_TO_BYTES(256), MB_TO_BYTES(1024), 4, VGPU_WEIGHT(2), GVT_EDID_1920_1200, "2" }, + { MB_TO_BYTES(512), MB_TO_BYTES(2048), 4, VGPU_WEIGHT(1), GVT_EDID_1920_1200, "1" }, }; /** @@ -120,6 +130,12 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt) gvt->types[i].low_gm_size = vgpu_types[i].low_mm; gvt->types[i].high_gm_size = vgpu_types[i].high_mm; gvt->types[i].fence = vgpu_types[i].fence; + + if (vgpu_types[i].weight < 1 || + vgpu_types[i].weight > VGPU_MAX_WEIGHT) + return -EINVAL; + + gvt->types[i].weight = vgpu_types[i].weight; gvt->types[i].resolution = vgpu_types[i].edid; gvt->types[i].avail_instance = min(low_avail / vgpu_types[i].low_mm, high_avail / vgpu_types[i].high_mm); @@ -131,11 +147,12 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt) sprintf(gvt->types[i].name, "GVTg_V5_%s", vgpu_types[i].name); - gvt_dbg_core("type[%d]: %s avail %u low %u high %u fence %u res %s\n", + gvt_dbg_core("type[%d]: %s avail %u low %u high %u fence %u weight %u res %s\n", i, gvt->types[i].name, gvt->types[i].avail_instance, gvt->types[i].low_gm_size, gvt->types[i].high_gm_size, gvt->types[i].fence, + gvt->types[i].weight, vgpu_edid_str(gvt->types[i].resolution)); } @@ -239,6 +256,7 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, vgpu->id = ret; vgpu->handle = param->handle; vgpu->gvt = gvt; + vgpu->sched_ctl.weight = param->weight; bitmap_zero(vgpu->tlb_handle_pending, I915_NUM_ENGINES); intel_vgpu_init_cfg_space(vgpu, param->primary); @@ -325,6 +343,7 @@ struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt, param.low_gm_sz = type->low_gm_size; param.high_gm_sz = type->high_gm_size; param.fence_sz = type->fence; + param.weight = type->weight; param.resolution = type->resolution; /* XXX current param based on MB */ -- cgit v1.2.3 From 39d467c2b71ccb4a55af830c117fafe0525caa12 Mon Sep 17 00:00:00 2001 From: Ping Gao Date: Thu, 30 Mar 2017 10:03:24 +0800 Subject: drm/i915/gvt: add basic function for weight control This method tries to guarantee precision in second level, with the adjustment conducted in every 100ms. At the end of each vGPU switch calculate the sched time and subtract it from the time slice allocated; the allocated time slice for every 100ms together with remaining timeslice, will be used to decide how much timeslice allocated to this vGPU in the next 100ms slice, with the end goal to guarantee weight ratio in second level. Signed-off-by: Ping Gao Reviewed-by: Kevin Tian Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/sched_policy.c | 59 +++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c index 5aa7a2539e4d..dd5b38c3e4ed 100644 --- a/drivers/gpu/drm/i915/gvt/sched_policy.c +++ b/drivers/gpu/drm/i915/gvt/sched_policy.c @@ -67,6 +67,60 @@ struct gvt_sched_data { struct list_head lru_runq_head; }; +static void vgpu_update_timeslice(struct intel_vgpu *pre_vgpu) +{ + ktime_t delta_ts; + struct vgpu_sched_data *vgpu_data = pre_vgpu->sched_data; + + delta_ts = vgpu_data->sched_out_time - vgpu_data->sched_in_time; + + vgpu_data->sched_time += delta_ts; + vgpu_data->left_ts -= delta_ts; +} + +#define GVT_TS_BALANCE_PERIOD_MS 100 +#define GVT_TS_BALANCE_STAGE_NUM 10 + +static void gvt_balance_timeslice(struct gvt_sched_data *sched_data) +{ + struct vgpu_sched_data *vgpu_data; + struct list_head *pos; + static uint64_t stage_check; + int stage = stage_check++ % GVT_TS_BALANCE_STAGE_NUM; + + /* The timeslice accumulation reset at stage 0, which is + * allocated again without adding previous debt. + */ + if (stage == 0) { + int total_weight = 0; + ktime_t fair_timeslice; + + list_for_each(pos, &sched_data->lru_runq_head) { + vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list); + total_weight += vgpu_data->sched_ctl.weight; + } + + list_for_each(pos, &sched_data->lru_runq_head) { + vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list); + fair_timeslice = ms_to_ktime(GVT_TS_BALANCE_PERIOD_MS) * + vgpu_data->sched_ctl.weight / + total_weight; + + vgpu_data->allocated_ts = fair_timeslice; + vgpu_data->left_ts = vgpu_data->allocated_ts; + } + } else { + list_for_each(pos, &sched_data->lru_runq_head) { + vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list); + + /* timeslice for next 100ms should add the left/debt + * slice of previous stages. + */ + vgpu_data->left_ts += vgpu_data->allocated_ts; + } + } +} + static void try_to_schedule_next_vgpu(struct intel_gvt *gvt) { struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; @@ -103,6 +157,7 @@ static void try_to_schedule_next_vgpu(struct intel_gvt *gvt) if (scheduler->current_vgpu) { vgpu_data = scheduler->current_vgpu->sched_data; vgpu_data->sched_out_time = cur_time; + vgpu_update_timeslice(scheduler->current_vgpu); } vgpu_data = scheduler->next_vgpu->sched_data; vgpu_data->sched_in_time = cur_time; @@ -148,6 +203,10 @@ static void tbs_sched_func(struct gvt_sched_data *sched_data) struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; struct vgpu_sched_data *vgpu_data; struct intel_vgpu *vgpu = NULL; + static uint64_t timer_check; + + if (!(timer_check++ % GVT_TS_BALANCE_PERIOD_MS)) + gvt_balance_timeslice(sched_data); /* no active vgpu or has already had a target */ if (list_empty(&sched_data->lru_runq_head) || scheduler->next_vgpu) -- cgit v1.2.3 From afe04fbe6c522e73d5cc61a6660cce0e78630786 Mon Sep 17 00:00:00 2001 From: Ping Gao Date: Thu, 30 Mar 2017 00:36:39 +0800 Subject: drm/i915/gvt: create an idle vGPU vGPU resource is allocated by scheduler. To account for non-allocated free cycles, we create an idle vGPU as the placeholder similar to idle task concept, which is useful to handle some corner cases in scheduling policy. Signed-off-by: Ping Gao Reviewed-by: Kevin Tian Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gvt.c | 11 ++++++++ drivers/gpu/drm/i915/gvt/gvt.h | 3 +++ drivers/gpu/drm/i915/gvt/vgpu.c | 56 ++++++++++++++++++++++++++++++++++++++++- 3 files changed, 69 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index 894735c77f63..0f3a98865a58 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -202,6 +202,8 @@ void intel_gvt_clean_device(struct drm_i915_private *dev_priv) idr_destroy(&gvt->vgpu_idr); + intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu); + kfree(dev_priv->gvt); dev_priv->gvt = NULL; } @@ -220,6 +222,7 @@ void intel_gvt_clean_device(struct drm_i915_private *dev_priv) int intel_gvt_init_device(struct drm_i915_private *dev_priv) { struct intel_gvt *gvt; + struct intel_vgpu *vgpu; int ret; /* @@ -292,6 +295,14 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv) goto out_clean_types; } + vgpu = intel_gvt_create_idle_vgpu(gvt); + if (IS_ERR(vgpu)) { + ret = PTR_ERR(vgpu); + gvt_err("failed to create idle vgpu\n"); + goto out_clean_types; + } + gvt->idle_vgpu = vgpu; + gvt_dbg_core("gvt device initialization is done\n"); dev_priv->gvt = gvt; return 0; diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 0631f64e06db..806da96b6a92 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -243,6 +243,7 @@ struct intel_gvt { DECLARE_HASHTABLE(cmd_table, GVT_CMD_HASH_BITS); struct intel_vgpu_type *types; unsigned int num_types; + struct intel_vgpu *idle_vgpu; struct task_struct *service_thread; wait_queue_head_t service_thread_wq; @@ -386,6 +387,8 @@ static inline void intel_vgpu_write_pci_bar(struct intel_vgpu *vgpu, int intel_gvt_init_vgpu_types(struct intel_gvt *gvt); void intel_gvt_clean_vgpu_types(struct intel_gvt *gvt); +struct intel_vgpu *intel_gvt_create_idle_vgpu(struct intel_gvt *gvt); +void intel_gvt_destroy_idle_vgpu(struct intel_vgpu *vgpu); struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt, struct intel_vgpu_type *type); void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu); diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 36c107e2058a..6ba02525e905 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -233,6 +233,59 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu) mutex_unlock(&gvt->lock); } +#define IDLE_VGPU_IDR 0 + +/** + * intel_gvt_create_idle_vgpu - create an idle virtual GPU + * @gvt: GVT device + * + * This function is called when user wants to create an idle virtual GPU. + * + * Returns: + * pointer to intel_vgpu, error pointer if failed. + */ +struct intel_vgpu *intel_gvt_create_idle_vgpu(struct intel_gvt *gvt) +{ + struct intel_vgpu *vgpu; + enum intel_engine_id i; + int ret; + + vgpu = vzalloc(sizeof(*vgpu)); + if (!vgpu) + return ERR_PTR(-ENOMEM); + + vgpu->id = IDLE_VGPU_IDR; + vgpu->gvt = gvt; + + for (i = 0; i < I915_NUM_ENGINES; i++) + INIT_LIST_HEAD(&vgpu->workload_q_head[i]); + + ret = intel_vgpu_init_sched_policy(vgpu); + if (ret) + goto out_free_vgpu; + + vgpu->active = false; + + return vgpu; + +out_free_vgpu: + vfree(vgpu); + return ERR_PTR(ret); +} + +/** + * intel_gvt_destroy_vgpu - destroy an idle virtual GPU + * @vgpu: virtual GPU + * + * This function is called when user wants to destroy an idle virtual GPU. + * + */ +void intel_gvt_destroy_idle_vgpu(struct intel_vgpu *vgpu) +{ + intel_vgpu_clean_sched_policy(vgpu); + vfree(vgpu); +} + static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, struct intel_vgpu_creation_params *param) { @@ -249,7 +302,8 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, mutex_lock(&gvt->lock); - ret = idr_alloc(&gvt->vgpu_idr, vgpu, 1, GVT_MAX_VGPU, GFP_KERNEL); + ret = idr_alloc(&gvt->vgpu_idr, vgpu, IDLE_VGPU_IDR + 1, GVT_MAX_VGPU, + GFP_KERNEL); if (ret < 0) goto out_free_vgpu; -- cgit v1.2.3 From b35f34d1da4e77637869c8041a355da810f69fb6 Mon Sep 17 00:00:00 2001 From: Ping Gao Date: Thu, 30 Mar 2017 00:36:40 +0800 Subject: drm/i915/gvt: control the scheduler by timeslice usage The timeslice usage will determine vGPU whether has chance to schedule or not at every vGPU switch checkpoint. Signed-off-by: Ping Gao Reviewed-by: Kevin Tian Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/sched_policy.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c index dd5b38c3e4ed..f84959170674 100644 --- a/drivers/gpu/drm/i915/gvt/sched_policy.c +++ b/drivers/gpu/drm/i915/gvt/sched_policy.c @@ -187,8 +187,11 @@ static struct intel_vgpu *find_busy_vgpu(struct gvt_sched_data *sched_data) if (!vgpu_has_pending_workload(vgpu_data->vgpu)) continue; - vgpu = vgpu_data->vgpu; - break; + /* Return the vGPU only if it has time slice left */ + if (vgpu_data->left_ts > 0) { + vgpu = vgpu_data->vgpu; + break; + } } return vgpu; @@ -223,6 +226,8 @@ static void tbs_sched_func(struct gvt_sched_data *sched_data) &sched_data->lru_runq_head); gvt_dbg_sched("pick next vgpu %d\n", vgpu->id); + } else { + scheduler->next_vgpu = gvt->idle_vgpu; } out: if (scheduler->next_vgpu) { -- cgit v1.2.3 From 0e53f472c3f6d72dd50b87456cce82343fbc73c6 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 29 Mar 2017 13:32:55 +0300 Subject: drm/i915/opregion: bail out early for systems with no opregion VBT Reduce indent. No functional changes. Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1490783578-6065-1-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_opregion.c | 64 +++++++++++++++++------------------ 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c index 441c01466384..76a39ee6d005 100644 --- a/drivers/gpu/drm/i915/intel_opregion.c +++ b/drivers/gpu/drm/i915/intel_opregion.c @@ -920,6 +920,8 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv) char buf[sizeof(OPREGION_SIGNATURE)]; int err = 0; void *base; + const void *vbt = NULL; + u32 vbt_size = 0; BUILD_BUG_ON(sizeof(struct opregion_header) != 0x100); BUILD_BUG_ON(sizeof(struct opregion_acpi) != 0x100); @@ -972,45 +974,43 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv) if (mboxes & MBOX_ASLE_EXT) DRM_DEBUG_DRIVER("ASLE extension supported\n"); - if (!dmi_check_system(intel_no_opregion_vbt)) { - const void *vbt = NULL; - u32 vbt_size = 0; - - if (opregion->header->opregion_ver >= 2 && opregion->asle && - opregion->asle->rvda && opregion->asle->rvds) { - opregion->rvda = memremap(opregion->asle->rvda, - opregion->asle->rvds, - MEMREMAP_WB); - vbt = opregion->rvda; - vbt_size = opregion->asle->rvds; - } + if (dmi_check_system(intel_no_opregion_vbt)) + goto out; + + if (opregion->header->opregion_ver >= 2 && opregion->asle && + opregion->asle->rvda && opregion->asle->rvds) { + opregion->rvda = memremap(opregion->asle->rvda, + opregion->asle->rvds, + MEMREMAP_WB); + vbt = opregion->rvda; + vbt_size = opregion->asle->rvds; + } + if (intel_bios_is_valid_vbt(vbt, vbt_size)) { + DRM_DEBUG_KMS("Found valid VBT in ACPI OpRegion (RVDA)\n"); + opregion->vbt = vbt; + opregion->vbt_size = vbt_size; + } else { + vbt = base + OPREGION_VBT_OFFSET; + /* + * The VBT specification says that if the ASLE ext mailbox is + * not used its area is reserved, but on some CHT boards the VBT + * extends into the ASLE ext area. Allow this even though it is + * against the spec, so we do not end up rejecting the VBT on + * those boards (and end up not finding the LCD panel because of + * this). + */ + vbt_size = (mboxes & MBOX_ASLE_EXT) ? + OPREGION_ASLE_EXT_OFFSET : OPREGION_SIZE; + vbt_size -= OPREGION_VBT_OFFSET; if (intel_bios_is_valid_vbt(vbt, vbt_size)) { - DRM_DEBUG_KMS("Found valid VBT in ACPI OpRegion (RVDA)\n"); + DRM_DEBUG_KMS("Found valid VBT in ACPI OpRegion (Mailbox #4)\n"); opregion->vbt = vbt; opregion->vbt_size = vbt_size; - } else { - vbt = base + OPREGION_VBT_OFFSET; - /* - * The VBT specification says that if the ASLE ext - * mailbox is not used its area is reserved, but - * on some CHT boards the VBT extends into the - * ASLE ext area. Allow this even though it is - * against the spec, so we do not end up rejecting - * the VBT on those boards (and end up not finding the - * LCD panel because of this). - */ - vbt_size = (mboxes & MBOX_ASLE_EXT) ? - OPREGION_ASLE_EXT_OFFSET : OPREGION_SIZE; - vbt_size -= OPREGION_VBT_OFFSET; - if (intel_bios_is_valid_vbt(vbt, vbt_size)) { - DRM_DEBUG_KMS("Found valid VBT in ACPI OpRegion (Mailbox #4)\n"); - opregion->vbt = vbt; - opregion->vbt_size = vbt_size; - } } } +out: return 0; err_out: -- cgit v1.2.3 From ccbf6b6498a47a71ea3287c6b41e3028a70c6282 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 29 Mar 2017 13:32:56 +0300 Subject: drm/i915/opregion: try to validate RVDA VBT only if it's there Seems more sensible this way, and reduces indent for the more common case. Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1490783578-6065-2-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_opregion.c | 41 +++++++++++++++++------------------ 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c index 76a39ee6d005..2b64cb6691eb 100644 --- a/drivers/gpu/drm/i915/intel_opregion.c +++ b/drivers/gpu/drm/i915/intel_opregion.c @@ -920,8 +920,8 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv) char buf[sizeof(OPREGION_SIGNATURE)]; int err = 0; void *base; - const void *vbt = NULL; - u32 vbt_size = 0; + const void *vbt; + u32 vbt_size; BUILD_BUG_ON(sizeof(struct opregion_header) != 0x100); BUILD_BUG_ON(sizeof(struct opregion_acpi) != 0x100); @@ -984,30 +984,29 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv) MEMREMAP_WB); vbt = opregion->rvda; vbt_size = opregion->asle->rvds; + if (intel_bios_is_valid_vbt(vbt, vbt_size)) { + DRM_DEBUG_KMS("Found valid VBT in ACPI OpRegion (RVDA)\n"); + opregion->vbt = vbt; + opregion->vbt_size = vbt_size; + goto out; + } } + vbt = base + OPREGION_VBT_OFFSET; + /* + * The VBT specification says that if the ASLE ext mailbox is not used + * its area is reserved, but on some CHT boards the VBT extends into the + * ASLE ext area. Allow this even though it is against the spec, so we + * do not end up rejecting the VBT on those boards (and end up not + * finding the LCD panel because of this). + */ + vbt_size = (mboxes & MBOX_ASLE_EXT) ? + OPREGION_ASLE_EXT_OFFSET : OPREGION_SIZE; + vbt_size -= OPREGION_VBT_OFFSET; if (intel_bios_is_valid_vbt(vbt, vbt_size)) { - DRM_DEBUG_KMS("Found valid VBT in ACPI OpRegion (RVDA)\n"); + DRM_DEBUG_KMS("Found valid VBT in ACPI OpRegion (Mailbox #4)\n"); opregion->vbt = vbt; opregion->vbt_size = vbt_size; - } else { - vbt = base + OPREGION_VBT_OFFSET; - /* - * The VBT specification says that if the ASLE ext mailbox is - * not used its area is reserved, but on some CHT boards the VBT - * extends into the ASLE ext area. Allow this even though it is - * against the spec, so we do not end up rejecting the VBT on - * those boards (and end up not finding the LCD panel because of - * this). - */ - vbt_size = (mboxes & MBOX_ASLE_EXT) ? - OPREGION_ASLE_EXT_OFFSET : OPREGION_SIZE; - vbt_size -= OPREGION_VBT_OFFSET; - if (intel_bios_is_valid_vbt(vbt, vbt_size)) { - DRM_DEBUG_KMS("Found valid VBT in ACPI OpRegion (Mailbox #4)\n"); - opregion->vbt = vbt; - opregion->vbt_size = vbt_size; - } } out: -- cgit v1.2.3 From 2e5fec5f621410f6b71403ff6794b115c79a9d6f Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 29 Mar 2017 13:32:57 +0300 Subject: drm/i915/opregion: debug log about invalid ACPI OpRegion VBT Leave more breadcrumbs for debuggers. Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1490783578-6065-3-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_opregion.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c index 2b64cb6691eb..d44465190dc1 100644 --- a/drivers/gpu/drm/i915/intel_opregion.c +++ b/drivers/gpu/drm/i915/intel_opregion.c @@ -989,6 +989,8 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv) opregion->vbt = vbt; opregion->vbt_size = vbt_size; goto out; + } else { + DRM_DEBUG_KMS("Invalid VBT in ACPI OpRegion (RVDA)\n"); } } @@ -1007,6 +1009,8 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv) DRM_DEBUG_KMS("Found valid VBT in ACPI OpRegion (Mailbox #4)\n"); opregion->vbt = vbt; opregion->vbt_size = vbt_size; + } else { + DRM_DEBUG_KMS("Invalid VBT in ACPI OpRegion (Mailbox #4)\n"); } out: -- cgit v1.2.3 From b8991403ea0f3e5b59e08e37a7ac0af8ef4264e3 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Tue, 28 Mar 2017 09:53:47 -0700 Subject: drm/i915/guc: Take enable_guc_loading check out of GEM core code The should happen as soon as possible, but always within the logic that depends on it (and not interrupting the top-level driver control flow). Cc: Chris Wilson Cc: Joonas Lahtinen Signed-off-by: Oscar Mateo Reviewed-by: Chris Wilson Signed-off-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1490720027-23234-1-git-send-email-oscar.mateo@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 3 +-- drivers/gpu/drm/i915/i915_gem.c | 10 ++++------ drivers/gpu/drm/i915/intel_uc.c | 6 ++++++ 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 6d9944a00b7d..0b3811673199 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -549,8 +549,7 @@ static const struct vga_switcheroo_client_ops i915_switcheroo_ops = { static void i915_gem_fini(struct drm_i915_private *dev_priv) { mutex_lock(&dev_priv->drm.struct_mutex); - if (i915.enable_guc_loading) - intel_uc_fini_hw(dev_priv); + intel_uc_fini_hw(dev_priv); i915_gem_cleanup_engines(dev_priv); i915_gem_context_fini(dev_priv); mutex_unlock(&dev_priv->drm.struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 84ea249c6f4f..2709be922512 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4611,12 +4611,10 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv) intel_mocs_init_l3cc_table(dev_priv); - if (i915.enable_guc_loading) { - /* We can't enable contexts until all firmware is loaded */ - ret = intel_uc_init_hw(dev_priv); - if (ret) - goto out; - } + /* We can't enable contexts until all firmware is loaded */ + ret = intel_uc_init_hw(dev_priv); + if (ret) + goto out; out: intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index c767dc351c63..19653224b683 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -254,6 +254,9 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) { int ret, attempts; + if (!i915.enable_guc_loading) + return 0; + gen9_reset_guc_interrupts(dev_priv); /* We need to notify the guc whenever we change the GGTT */ @@ -343,6 +346,9 @@ err_guc: void intel_uc_fini_hw(struct drm_i915_private *dev_priv) { + if (!i915.enable_guc_loading) + return; + if (i915.enable_guc_submission) { i915_guc_submission_disable(dev_priv); gen9_disable_guc_interrupts(dev_priv); -- cgit v1.2.3 From 228ec87ccd040b620c467cd61d594bfaa4f8a12e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 30 Mar 2017 09:53:41 +0100 Subject: drm/i915: Ironlake do_idle_maps w/a may be called w/o struct_mutex Since commit 1233e2db199d ("drm/i915: Move object backing storage manipulation to its own locking"), i915_gem_object_put_pages() and specifically the i915_gem_gtt_finish_pages() may be called from outside of the struct_mutex and so we can no longer pass I915_WAIT_LOCKED to i915_gem_wait_for_idle. Fixes: 1233e2db199d ("drm/i915: Move object backing storage manipulation to its own locking") Signed-off-by: Chris Wilson Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Cc: # v4.10+ Link: http://patchwork.freedesktop.org/patch/msgid/20170330085341.20311-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index cee9c4fec52a..8bab4aea63e6 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2364,7 +2364,7 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, struct i915_ggtt *ggtt = &dev_priv->ggtt; if (unlikely(ggtt->do_idle_maps)) { - if (i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED)) { + if (i915_gem_wait_for_idle(dev_priv, 0)) { DRM_ERROR("Failed to wait for idle; VT'd may hang.\n"); /* Wait a bit, in hopes it avoids the hang */ udelay(10); -- cgit v1.2.3 From 05506b5be081b728353f1612b05c8ff689772832 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 30 Mar 2017 12:16:14 +0100 Subject: drm/i915: Use a dummy timeline name for a signaled fence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Michał Winiarski pointed out that the debugging infrastructure (such as trace_dma_fence_release) likes to pretty print the timeline name, long after we have freed the timeline. Our timelines currently live as part of the GTT (due to the strict ordering we currently use through each) which belong to the context. We aim to free the context and release its hardware resources as soon as we able to (i.e. when the last fence/request using it has been signaled and retired). As the .get_timeline_name is purely a debug feature, rather than extending the lifetime of the context, or splitting it into many different release phases just to keep the name around, replace the timeline name with a constant after the fence has been signaled. This avoids the potential use-after-free. Reported-by: Krzysztof Olinski Fixes: 80b204bce8f2 ("drm/i915: Enable multiple timelines") Signed-off-by: Chris Wilson Cc: Michał Winiarski Cc: Joonas Lahtinen Cc: # v4.10+ Link: http://patchwork.freedesktop.org/patch/msgid/20170330111614.29757-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen Reviewed-by: Michał Winiarski --- drivers/gpu/drm/i915/i915_gem_request.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index dbfa9db2419d..b9d8f43b31e6 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -37,6 +37,17 @@ static const char *i915_fence_get_driver_name(struct dma_fence *fence) static const char *i915_fence_get_timeline_name(struct dma_fence *fence) { + /* The timeline struct (as part of the ppgtt underneath a context) + * may be freed when the request is no longer in use by the GPU. + * We could extend the life of a context to beyond that of all + * fences, possibly keeping the hw resource around indefinitely, + * or we just give them a false name. Since + * dma_fence_ops.get_timeline_name is a debug feature, the occasional + * lie seems justifiable. + */ + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + return "signaled"; + return to_request(fence)->timeline->common->name; } -- cgit v1.2.3 From 17ab792ab1c10019ecc6ed594b6a6aae4cb52f78 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 30 Mar 2017 14:48:20 +0100 Subject: drm/i915: Drop verbose and archaic "ring" from our internal engine names We pretty print the name of an engine in several places, mostly for debug, but also in the GPU hang report. Using "ring" in the name is archaic (we call those engines now to differentiate them from the multiple rings of commands we execute on each engine), quite verbose and often tautological. We run out of room in our GPU hang report for instance if we have more than a couple of engines hung simultaneously. Bit the bullet and update the strings to reflect the common internal names. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170330134820.12273-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index ef3c62000697..1fb97e8401de 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -36,45 +36,45 @@ static const struct engine_info { int (*init_execlists)(struct intel_engine_cs *engine); } intel_engines[] = { [RCS] = { - .name = "render ring", - .exec_id = I915_EXEC_RENDER, + .name = "rcs", .hw_id = RCS_HW, + .exec_id = I915_EXEC_RENDER, .mmio_base = RENDER_RING_BASE, .irq_shift = GEN8_RCS_IRQ_SHIFT, .init_execlists = logical_render_ring_init, .init_legacy = intel_init_render_ring_buffer, }, [BCS] = { - .name = "blitter ring", - .exec_id = I915_EXEC_BLT, + .name = "bcs", .hw_id = BCS_HW, + .exec_id = I915_EXEC_BLT, .mmio_base = BLT_RING_BASE, .irq_shift = GEN8_BCS_IRQ_SHIFT, .init_execlists = logical_xcs_ring_init, .init_legacy = intel_init_blt_ring_buffer, }, [VCS] = { - .name = "bsd ring", - .exec_id = I915_EXEC_BSD, + .name = "vcs", .hw_id = VCS_HW, + .exec_id = I915_EXEC_BSD, .mmio_base = GEN6_BSD_RING_BASE, .irq_shift = GEN8_VCS1_IRQ_SHIFT, .init_execlists = logical_xcs_ring_init, .init_legacy = intel_init_bsd_ring_buffer, }, [VCS2] = { - .name = "bsd2 ring", - .exec_id = I915_EXEC_BSD, + .name = "vcs2", .hw_id = VCS2_HW, + .exec_id = I915_EXEC_BSD, .mmio_base = GEN8_BSD2_RING_BASE, .irq_shift = GEN8_VCS2_IRQ_SHIFT, .init_execlists = logical_xcs_ring_init, .init_legacy = intel_init_bsd2_ring_buffer, }, [VECS] = { - .name = "video enhancement ring", - .exec_id = I915_EXEC_VEBOX, + .name = "vecs", .hw_id = VECS_HW, + .exec_id = I915_EXEC_VEBOX, .mmio_base = VEBOX_RING_BASE, .irq_shift = GEN8_VECS_IRQ_SHIFT, .init_execlists = logical_xcs_ring_init, -- cgit v1.2.3 From 2c170af76cd77258ff41d7270a281e23c386c649 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 30 Mar 2017 15:50:36 +0100 Subject: drm/i915: Do request retirement before marking engines as wedged As we declare an engine as wedged, we mark all of its active requests as in error. However, we don't want to mark successfully completed requests as in error, which requires us to retire those requests first. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170330145041.9005-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2709be922512..cb61cec26db5 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2998,10 +2998,15 @@ void i915_gem_set_wedged(struct drm_i915_private *dev_priv) lockdep_assert_held(&dev_priv->drm.struct_mutex); set_bit(I915_WEDGED, &dev_priv->gpu_error.flags); + /* Retire completed requests first so the list of inflight/incomplete + * requests is accurate and we don't try and mark successful requests + * as in error during __i915_gem_set_wedged_BKL(). + */ + i915_gem_retire_requests(dev_priv); + stop_machine(__i915_gem_set_wedged_BKL, dev_priv, NULL); i915_gem_context_lost(dev_priv); - i915_gem_retire_requests(dev_priv); mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0); } -- cgit v1.2.3 From 8490ae207f1d1dd6765e89dd7c189c2c975ee3bc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 30 Mar 2017 15:50:37 +0100 Subject: drm/i915: Suppress busy status for engines if wedged If the driver is wedged, HW state may be very inconsistent and report that it is still busy, even though we have stopped using it. This can lead to a double *ERROR* rather than a graceful cleanup after wedging. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170330145041.9005-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 4 +--- drivers/gpu/drm/i915/intel_engine_cs.c | 9 +++++++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index cb61cec26db5..7165f314f830 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3112,9 +3112,7 @@ i915_gem_idle_work_handler(struct work_struct *work) * Wait for last execlists context complete, but bail out in case a * new request is submitted. */ - wait_for(READ_ONCE(dev_priv->gt.active_requests) || - intel_engines_are_idle(dev_priv), - 10); + wait_for(intel_engines_are_idle(dev_priv), 10); if (READ_ONCE(dev_priv->gt.active_requests)) return; diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 1fb97e8401de..854e8e0c836b 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1111,6 +1111,15 @@ bool intel_engines_are_idle(struct drm_i915_private *dev_priv) struct intel_engine_cs *engine; enum intel_engine_id id; + if (READ_ONCE(dev_priv->gt.active_requests)) + return false; + + /* If the driver is wedged, HW state may be very inconsistent and + * report that it is still busy, even though we have stopped using it. + */ + if (i915_terminally_wedged(&dev_priv->gpu_error)) + return true; + for_each_engine(engine, dev_priv, id) { if (!intel_engine_is_idle(engine)) return false; -- cgit v1.2.3 From 31581b60d4c936fa4c8901c175bcbc5c8839029e Mon Sep 17 00:00:00 2001 From: Tamara Diaconita Date: Thu, 30 Mar 2017 14:55:10 +0300 Subject: drivers: gpu: drm: i915L intel_lpe_audio: Fix kerneldoc comments Add description for existing parameter 'pipe' to fix the build warning: ./drivers/gpu/drm/i915/intel_lpe_audio.c:342: warning: No description found for parameter 'pipe'. Signed-off-by: Tamara Diaconita Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170330115510.14054-1-diaconita.tamara@gmail.com --- drivers/gpu/drm/i915/intel_lpe_audio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c index 7a5b41b1c024..d8ca187ae001 100644 --- a/drivers/gpu/drm/i915/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/intel_lpe_audio.c @@ -331,6 +331,7 @@ void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv) * audio driver and i915 * @dev_priv: the i915 drm device private data * @eld : ELD data + * @pipe: pipe id * @port: port id * @tmds_clk_speed: tmds clock frequency in Hz * -- cgit v1.2.3 From 4f1cd3eb16aa83c8a3981dba6815f45dab7e2d84 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 30 Mar 2017 11:21:11 +0000 Subject: drm/i915/uc: Move intel_uc_fw_status_repr() to intel_uc.h The file fits better. Also use "" for invalid case. v2: move directly to .h (Joonas) Signed-off-by: Michal Wajdeczko Cc: Arkadiusz Hiler Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_guc_loader.c | 16 ---------------- drivers/gpu/drm/i915/intel_uc.h | 20 +++++++++++++++++++- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 7d92321f8731..8a1a023e48b2 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -73,22 +73,6 @@ MODULE_FIRMWARE(I915_BXT_GUC_UCODE); #define I915_KBL_GUC_UCODE GUC_FW_PATH(kbl, KBL_FW_MAJOR, KBL_FW_MINOR) MODULE_FIRMWARE(I915_KBL_GUC_UCODE); -/* User-friendly representation of an enum */ -const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status) -{ - switch (status) { - case INTEL_UC_FIRMWARE_FAIL: - return "FAIL"; - case INTEL_UC_FIRMWARE_NONE: - return "NONE"; - case INTEL_UC_FIRMWARE_PENDING: - return "PENDING"; - case INTEL_UC_FIRMWARE_SUCCESS: - return "SUCCESS"; - default: - return "UNKNOWN!"; - } -}; static u32 get_gttype(struct drm_i915_private *dev_priv) { diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index 087192d9c1af..f524387a3e14 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -101,6 +101,25 @@ enum intel_uc_fw_status { INTEL_UC_FIRMWARE_SUCCESS }; +/* User-friendly representation of an enum */ +static inline +const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status) +{ + switch (status) { + case INTEL_UC_FIRMWARE_FAIL: + return "FAIL"; + case INTEL_UC_FIRMWARE_NONE: + return "NONE"; + case INTEL_UC_FIRMWARE_PENDING: + return "PENDING"; + case INTEL_UC_FIRMWARE_SUCCESS: + return "SUCCESS"; + default: + MISSING_CASE(status); + return ""; + } +} + enum intel_uc_fw_type { INTEL_UC_FW_TYPE_GUC, INTEL_UC_FW_TYPE_HUC @@ -207,7 +226,6 @@ static inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 l /* intel_guc_loader.c */ int intel_guc_select_fw(struct intel_guc *guc); int intel_guc_init_hw(struct intel_guc *guc); -const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status); int intel_guc_suspend(struct drm_i915_private *dev_priv); int intel_guc_resume(struct drm_i915_private *dev_priv); u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv); -- cgit v1.2.3 From 5e065f1f49a352376f1004cc107712ee9231fee7 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 30 Mar 2017 11:21:12 +0000 Subject: drm/i915/uc: Add intel_uc_fw_type_repr() Some of the DRM_NOTE messages are just using "uC" without specifying which uc they are related to. We can be more user friendly. v2: moved to the header (Joonas) Signed-off-by: Michal Wajdeczko Cc: Arkadiusz Hiler Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_uc.c | 6 ++++-- drivers/gpu/drm/i915/intel_uc.h | 14 ++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 19653224b683..aaef4f51a66e 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -182,10 +182,12 @@ static void fetch_uc_fw(struct drm_i915_private *dev_priv, } if (uc_fw->major_ver_wanted == 0 && uc_fw->minor_ver_wanted == 0) { - DRM_NOTE("Skipping uC firmware version check\n"); + DRM_NOTE("Skipping %s firmware version check\n", + intel_uc_fw_type_repr(uc_fw->type)); } else if (uc_fw->major_ver_found != uc_fw->major_ver_wanted || uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) { - DRM_NOTE("uC firmware version %d.%d, required %d.%d\n", + DRM_NOTE("%s firmware version %d.%d, required %d.%d\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->major_ver_found, uc_fw->minor_ver_found, uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); err = -ENOEXEC; diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index f524387a3e14..7139e31153ad 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -125,6 +125,20 @@ enum intel_uc_fw_type { INTEL_UC_FW_TYPE_HUC }; +/* User-friendly representation of an enum */ +static inline const char *intel_uc_fw_type_repr(enum intel_uc_fw_type type) +{ + switch (type) { + case INTEL_UC_FW_TYPE_GUC: + return "GuC"; + case INTEL_UC_FW_TYPE_HUC: + return "HuC"; + default: + MISSING_CASE(type); + return ""; + } +} + /* * This structure encapsulates all the data needed during the process * of fetching, caching, and loading the firmware image into the GuC. -- cgit v1.2.3 From 00bbb72c8619a9196db8acabc720e1d5efd7ce18 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 30 Mar 2017 11:21:13 +0000 Subject: drm/i915/uc: Add intel_uc_fw_fini() Cleanups of uc firmware structs from GuC and Huc are the same for both. Move common code to the helper function to avoid duplication. Signed-off-by: Michal Wajdeczko Cc: Arkadiusz Hiler Cc: Joonas Lahtinen Reviewed-by: Arkadiusz Hiler Reviewed-by: Joonas Lahtinen Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_uc.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index aaef4f51a66e..2540bfbfccf7 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -26,6 +26,19 @@ #include "intel_uc.h" #include +/* Cleans up uC firmware by releasing the firmware GEM obj. + */ +static void __intel_uc_fw_fini(struct intel_uc_fw *uc_fw) +{ + struct drm_i915_gem_object *obj; + + obj = fetch_and_zero(&uc_fw->obj); + if (obj) + i915_gem_object_put(obj); + + uc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; +} + /* Reset GuC providing us with fresh state for both GuC and HuC. */ static int __intel_uc_reset_hw(struct drm_i915_private *dev_priv) @@ -235,21 +248,8 @@ void intel_uc_init_fw(struct drm_i915_private *dev_priv) void intel_uc_fini_fw(struct drm_i915_private *dev_priv) { - struct intel_uc_fw *guc_fw = &dev_priv->guc.fw; - struct intel_uc_fw *huc_fw = &dev_priv->huc.fw; - struct drm_i915_gem_object *obj; - - obj = fetch_and_zero(&guc_fw->obj); - if (obj) - i915_gem_object_put(obj); - - guc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; - - obj = fetch_and_zero(&huc_fw->obj); - if (obj) - i915_gem_object_put(obj); - - huc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; + __intel_uc_fw_fini(&dev_priv->guc.fw); + __intel_uc_fw_fini(&dev_priv->huc.fw); } int intel_uc_init_hw(struct drm_i915_private *dev_priv) -- cgit v1.2.3 From 40908230e8da7a49f65bccd4d78f09e100bafff0 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 30 Mar 2017 11:21:14 +0000 Subject: drm/i915/huc: Remove unused intel_huc_fini() This function is no longer used. Its functionality is covered by intel_uc_fini_fw(). Signed-off-by: Michal Wajdeczko Cc: Arkadiusz Hiler Cc: Joonas Lahtinen Reviewed-by: Arkadiusz Hiler Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_huc.c | 18 ------------------ drivers/gpu/drm/i915/intel_uc.h | 1 - 2 files changed, 19 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index 7af900bcdc05..9ee819666a4c 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -250,24 +250,6 @@ fail: return err; } -/** - * intel_huc_fini() - clean up resources allocated for HuC - * @dev_priv: the drm_i915_private device - * - * Cleans up by releasing the huc firmware GEM obj. - */ -void intel_huc_fini(struct drm_i915_private *dev_priv) -{ - struct intel_uc_fw *huc_fw = &dev_priv->huc.fw; - struct drm_i915_gem_object *obj; - - obj = fetch_and_zero(&huc_fw->obj); - if (obj) - i915_gem_object_put(obj); - - huc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; -} - /** * intel_guc_auth_huc() - authenticate ucode * @dev_priv: the drm_i915_device diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index 7139e31153ad..d2dcde79669e 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -270,7 +270,6 @@ static inline u32 guc_ggtt_offset(struct i915_vma *vma) /* intel_huc.c */ void intel_huc_select_fw(struct intel_huc *huc); -void intel_huc_fini(struct drm_i915_private *dev_priv); int intel_huc_init_hw(struct intel_huc *huc); void intel_guc_auth_huc(struct drm_i915_private *dev_priv); -- cgit v1.2.3 From b57f7f7d31c0ffef5faabd4bf68b8d2577da389a Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 30 Mar 2017 11:21:15 +0000 Subject: drm/i915/uc: Move fw path check to fetch_uc_fw() There is no reason to separately check for valid fw path before we try to fetch it. Let the fetch function take care of this. Signed-off-by: Michal Wajdeczko Cc: Arkadiusz Hiler Cc: Joonas Lahtinen Reviewed-by: Arkadiusz Hiler Signed-off-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170330112115.120240-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/intel_uc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 2540bfbfccf7..c117424f1f50 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -112,6 +112,9 @@ static void fetch_uc_fw(struct drm_i915_private *dev_priv, size_t size; int err; + if (!uc_fw->path) + return; + uc_fw->fetch_status = INTEL_UC_FIRMWARE_PENDING; DRM_DEBUG_DRIVER("before requesting firmware: uC fw fetch status %s\n", @@ -239,11 +242,8 @@ fail: void intel_uc_init_fw(struct drm_i915_private *dev_priv) { - if (dev_priv->huc.fw.path) - fetch_uc_fw(dev_priv, &dev_priv->huc.fw); - - if (dev_priv->guc.fw.path) - fetch_uc_fw(dev_priv, &dev_priv->guc.fw); + fetch_uc_fw(dev_priv, &dev_priv->huc.fw); + fetch_uc_fw(dev_priv, &dev_priv->guc.fw); } void intel_uc_fini_fw(struct drm_i915_private *dev_priv) -- cgit v1.2.3 From 72022a705e1da854653e56b67bef57b72f1392eb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 30 Mar 2017 15:50:38 +0100 Subject: drm/i915: Move retire-requests into i915_gem_wait_for_idle() As we now distinguish everywhere that can call i915_gem_retire_requests() following a successful wait_for_idle, we can remove the duplication by moving that call into i915_gem_wait_for_idle() itself. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170330145041.9005-3-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_debugfs.c | 6 +----- drivers/gpu/drm/i915/i915_gem.c | 6 +++--- drivers/gpu/drm/i915/i915_gem_evict.c | 2 -- drivers/gpu/drm/i915/i915_gem_request.c | 3 --- drivers/gpu/drm/i915/selftests/i915_gem_request.c | 2 -- drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 1 - 6 files changed, 4 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index d4904245472f..d689e511744e 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4179,10 +4179,6 @@ fault_irq_set(struct drm_i915_private *i915, if (err) goto err_unlock; - /* Retire to kick idle work */ - i915_gem_retire_requests(i915); - GEM_BUG_ON(i915->gt.active_requests); - *irq = val; mutex_unlock(&i915->drm.struct_mutex); @@ -4286,7 +4282,7 @@ i915_drop_caches_set(void *data, u64 val) goto unlock; } - if (val & (DROP_RETIRE | DROP_ACTIVE)) + if (val & DROP_RETIRE) i915_gem_retire_requests(dev_priv); lockdep_set_current_reclaim_state(GFP_KERNEL); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 7165f314f830..70bc72634a91 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3285,6 +3285,9 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) if (ret) return ret; } + + i915_gem_retire_requests(i915); + GEM_BUG_ON(i915->gt.active_requests); } else { ret = wait_for_timeline(&i915->gt.global_timeline, flags); if (ret) @@ -4426,9 +4429,6 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) if (ret) goto err_unlock; - i915_gem_retire_requests(dev_priv); - GEM_BUG_ON(dev_priv->gt.active_requests); - assert_kernel_context_is_current(dev_priv); i915_gem_context_lost(dev_priv); mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 2da3a94fc9f3..51e365f70464 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -196,7 +196,6 @@ search_again: if (ret) return ret; - i915_gem_retire_requests(dev_priv); goto search_again; found: @@ -383,7 +382,6 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle) if (ret) return ret; - i915_gem_retire_requests(dev_priv); WARN_ON(!list_empty(&vm->active_list)); } diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index b9d8f43b31e6..f16900848650 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -203,9 +203,6 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) if (ret) return ret; - i915_gem_retire_requests(i915); - GEM_BUG_ON(i915->gt.active_requests > 1); - /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ for_each_engine(engine, i915, id) { struct intel_timeline *tl = &timeline->engine[id]; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c index 926b24c117d6..6b788ffda822 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c @@ -291,8 +291,6 @@ static int begin_live_test(struct live_test *t, return err; } - i915_gem_retire_requests(i915); - i915->gpu_error.missed_irq_rings = 0; t->reset_count = i915_reset_count(&i915->gpu_error); diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 6ec7c731a267..aa31d6c0cdfb 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -235,7 +235,6 @@ static void hang_fini(struct hang *h) i915_gem_object_put(h->hws); i915_gem_wait_for_idle(h->i915, I915_WAIT_LOCKED); - i915_gem_retire_requests(h->i915); } static int igt_hang_sanitycheck(void *arg) -- cgit v1.2.3 From 25112b64b3d261c192369acb56727d44840a5d30 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 30 Mar 2017 15:50:39 +0100 Subject: drm/i915: Wait for all engines to be idle as part of i915_gem_wait_for_idle() Make i915_gem_wait_for_idle() be a little heavier in order to try and guarantee that the GPU is indeed idle (by checking each engine individually is idle, i.e. all writes are complete and the rings stopped) after waiting for in-flight requests to be completed. v2: And return the final error. v3: Break the wait_for() out from under the WARN -- the macro expansion is hideous and unreadable in the warning message v4: If wait_for_engine() fails the result is catastrophic, mark the device as wedged and wait for the repair team. References: https://bugs.freedesktop.org/show_bug.cgi?id=98836 Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170330145041.9005-4-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 70bc72634a91..bbc6f1c9f175 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3271,6 +3271,29 @@ static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags) return 0; } +static int wait_for_engine(struct intel_engine_cs *engine, int timeout_ms) +{ + return wait_for(intel_engine_is_idle(engine), timeout_ms); +} + +static int wait_for_engines(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, i915, id) { + if (GEM_WARN_ON(wait_for_engine(engine, 50))) { + i915_gem_set_wedged(i915); + return -EIO; + } + + GEM_BUG_ON(intel_engine_get_seqno(engine) != + intel_engine_last_submit(engine)); + } + + return 0; +} + int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) { int ret; @@ -3288,13 +3311,13 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) i915_gem_retire_requests(i915); GEM_BUG_ON(i915->gt.active_requests); + + ret = wait_for_engines(i915); } else { ret = wait_for_timeline(&i915->gt.global_timeline, flags); - if (ret) - return ret; } - return 0; + return ret; } /** Flushes the GTT write domain for the object if it's dirty. */ -- cgit v1.2.3 From 7a453fb82f869ea919aefb4943c7d24194c98698 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 30 Mar 2017 15:50:40 +0100 Subject: drm/i915: Remove redudant wait for each engine to idle from seqno wrap Having added the wait upon each engine to idle into the central i915_gem_wait_for_idle(), we can remove the now redundant wait from reset_all_global_seqno(). This has the advantage of removing the late detection of an error (an engine still busy) which left the seqno reset only partially complete (though it should be safe enough!). Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170330145041.9005-5-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_request.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index f16900848650..1bfc3e664470 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -207,9 +207,6 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) for_each_engine(engine, i915, id) { struct intel_timeline *tl = &timeline->engine[id]; - if (wait_for(intel_engine_is_idle(engine), 50)) - return -EBUSY; - if (!i915_seqno_passed(seqno, tl->seqno)) { /* spin until threads are complete */ while (intel_breadcrumbs_busy(engine)) -- cgit v1.2.3 From ae351beb1f0e439fba1136c083167ddfa8d8fe1d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 30 Mar 2017 15:50:41 +0100 Subject: drm/i915: Combine reset_all_global_seqno() loops into one We can merge the pair of loops over the engines and their timelines into a single loop, making it easier to read and more consistent with the commentary. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170330145041.9005-6-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_request.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 1bfc3e664470..6348353b91ec 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -191,7 +191,6 @@ i915_priotree_init(struct i915_priotree *pt) static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) { - struct i915_gem_timeline *timeline = &i915->gt.global_timeline; struct intel_engine_cs *engine; enum intel_engine_id id; int ret; @@ -205,7 +204,8 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ for_each_engine(engine, i915, id) { - struct intel_timeline *tl = &timeline->engine[id]; + struct i915_gem_timeline *timeline; + struct intel_timeline *tl = engine->timeline; if (!i915_seqno_passed(seqno, tl->seqno)) { /* spin until threads are complete */ @@ -216,14 +216,10 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) /* Finally reset hw state */ tl->seqno = seqno; intel_engine_init_global_seqno(engine, seqno); - } - - list_for_each_entry(timeline, &i915->gt.timelines, link) { - for_each_engine(engine, i915, id) { - struct intel_timeline *tl = &timeline->engine[id]; - memset(tl->sync_seqno, 0, sizeof(tl->sync_seqno)); - } + list_for_each_entry(timeline, &i915->gt.timelines, link) + memset(timeline->engine[id].sync_seqno, 0, + sizeof(timeline->engine[id].sync_seqno)); } return 0; -- cgit v1.2.3 From d7c530b25946e323fc188cd06777ac94f2eddb95 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Thu, 30 Mar 2017 17:57:52 -0300 Subject: drm/i915: make a few DDI functions static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't need to export them since they're not being used outside the file. The next time I try to find the callers for these things I will know I won't need to look outside intel_ddi.c. Signed-off-by: Paulo Zanoni Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1490907472-10883-1-git-send-email-paulo.r.zanoni@intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 8 ++++---- drivers/gpu/drm/i915/intel_drv.h | 4 ---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 83abab9379fb..0914ad96a71b 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -539,7 +539,7 @@ intel_ddi_get_buf_trans_fdi(struct drm_i915_private *dev_priv, * values in advance. This function programs the correct values for * DP/eDP/FDI use cases. */ -void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder) +static void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); u32 iboost_bit = 0; @@ -806,7 +806,7 @@ void hsw_fdi_link_train(struct intel_crtc *crtc, DP_TP_CTL_ENABLE); } -void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder) +static void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder) { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); struct intel_digital_port *intel_dig_port = @@ -1616,8 +1616,8 @@ uint32_t ddi_signal_levels(struct intel_dp *intel_dp) return DDI_BUF_TRANS_SELECT(level); } -void intel_ddi_clk_select(struct intel_encoder *encoder, - struct intel_shared_dpll *pll) +static void intel_ddi_clk_select(struct intel_encoder *encoder, + struct intel_shared_dpll *pll) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = intel_ddi_get_encoder_port(encoder); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index e24641b559e2..313fad059bfa 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1229,12 +1229,9 @@ void intel_crt_init(struct drm_i915_private *dev_priv); void intel_crt_reset(struct drm_encoder *encoder); /* intel_ddi.c */ -void intel_ddi_clk_select(struct intel_encoder *encoder, - struct intel_shared_dpll *pll); void intel_ddi_fdi_post_disable(struct intel_encoder *intel_encoder, struct intel_crtc_state *old_crtc_state, struct drm_connector_state *old_conn_state); -void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder); void hsw_fdi_link_train(struct intel_crtc *crtc, const struct intel_crtc_state *crtc_state); void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port); @@ -1255,7 +1252,6 @@ bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv, void intel_ddi_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config); -void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder); void intel_ddi_clock_get(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config); void intel_ddi_set_vc_payload_alloc(const struct intel_crtc_state *crtc_state, -- cgit v1.2.3 From b9ab1f3f4454163c7850bd6cbaefcafa6955e1c1 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 31 Mar 2017 10:26:52 +0000 Subject: drm/i915/uc: Drop use of MISSING_CASE on trivial enums We can rely on compiler to notify us if we miss any case. This approach may also reduce driver size (reported ~4K). Signed-off-by: Michal Wajdeczko Cc: Chris Wilson Cc: Jani Nikula Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170331102652.177664-1-michal.wajdeczko@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_uc.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index d2dcde79669e..4b7f73aeddac 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -114,10 +114,8 @@ const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status) return "PENDING"; case INTEL_UC_FIRMWARE_SUCCESS: return "SUCCESS"; - default: - MISSING_CASE(status); - return ""; } + return ""; } enum intel_uc_fw_type { @@ -133,10 +131,8 @@ static inline const char *intel_uc_fw_type_repr(enum intel_uc_fw_type type) return "GuC"; case INTEL_UC_FW_TYPE_HUC: return "HuC"; - default: - MISSING_CASE(type); - return ""; } + return "uC"; } /* -- cgit v1.2.3 From 5f09ad8be7bc2d09f2248ad102f0024e6bdb4613 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 31 Mar 2017 20:21:21 +0100 Subject: drm/i915: Clear gt.active_requests before checking idle status commit 8490ae207f1d ("drm/i915: Suppress busy status for engines if wedged") moved the check for inflight requests to the intel_engines_are_idle() check to protect the idle worker. However, the request selftests were also checking the engine idle status and erroring out if they did not become idle within a short period of time after the final wait. In order to accommodate the new check, call retire requests prior to the engine check so that we flush all the waits. Fixes: 8490ae207f1d ("drm/i915: Suppress busy status for engines if wedged") Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170331192121.10024-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/selftests/i915_gem_request.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c index 6b788ffda822..98b7aac41eec 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c @@ -301,7 +301,9 @@ static int end_live_test(struct live_test *t) { struct drm_i915_private *i915 = t->i915; - if (wait_for(intel_engines_are_idle(i915), 1)) { + i915_gem_retire_requests(i915); + + if (wait_for(intel_engines_are_idle(i915), 10)) { pr_err("%s(%s): GPU not idle\n", t->func, t->name); return -EIO; } -- cgit v1.2.3 From ba515d3407dccfe3b4597f4afdaaf2ef1beb48e1 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 3 Apr 2017 07:52:18 +0200 Subject: drm/i915: Update DRIVER_DATE to 20170403 Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0cbadac02a53..c9b0949f6c1a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -79,8 +79,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20170320" -#define DRIVER_TIMESTAMP 1489994464 +#define DRIVER_DATE "20170403" +#define DRIVER_TIMESTAMP 1491198738 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and * WARN_ON()) for hw state sanity checks to check for unexpected conditions -- cgit v1.2.3