drm/i915/execlists: Reset RING registers upon resume

commit bafb2f7d4755bf1571bd5e9a03b97f3fc4fe69ae upstream. There is a disparity in the context image saved to disk and our own bookkeeping - that is we presume the RING_HEAD and RING_TAIL match our stored ce->ring->tail value. However, as we emit WA_TAIL_DWORDS into the ring but may not tell the GPU about them, the GPU may be lagging behind our bookkeeping. Upon hibernation we do not save stolen pages, presuming that their contents are volatile. This means that although we start writing into the ring at tail, the GPU starts executing from its HEAD and there may be some garbage in between and so the GPU promptly hangs upon resume. Testcase: igt/gem_exec_suspend/basic-S4 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96526 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20160921135108.29574-3-chris@chris-wilson.co.uk Cc: Eric Blau <eblau1@gmail.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
author: Chris Wilson <chris@chris-wilson.co.uk> 2016-09-21 14:51:08 +0100
committer: Greg Kroah-Hartman <gregkh@linuxfoundation.org> 2017-02-09 08:08:31 +0100
commit: f2a0409a08502d64fbe3990354dff5902b08d2fb (patch)
tree: 2658dff5334b26957424e8068bfb060bd0f1f473
parent: 72cd604cfd864b06c5a10d2bb139b19825e0fcbc (diff)
1 files changed, 31 insertions, 19 deletions
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 67db1577ee49..4147e51cf893 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2152,30 +2152,42 @@ error_deref_obj:
 
 void intel_lr_context_resume(struct drm_i915_private *dev_priv)
 {
-	struct i915_gem_context *ctx = dev_priv->kernel_context;
 	struct intel_engine_cs *engine;
+	struct i915_gem_context *ctx;
+
+	/* Because we emit WA_TAIL_DWORDS there may be a disparity
+	 * between our bookkeeping in ce->ring->head and ce->ring->tail and
+	 * that stored in context. As we only write new commands from
+	 * ce->ring->tail onwards, everything before that is junk. If the GPU
+	 * starts reading from its RING_HEAD from the context, it may try to
+	 * execute that junk and die.
+	 *
+	 * So to avoid that we reset the context images upon resume. For
+	 * simplicity, we just zero everything out.
+	 */
+	list_for_each_entry(ctx, &dev_priv->context_list, link) {
+		for_each_engine(engine, dev_priv) {
+			struct intel_context *ce = &ctx->engine[engine->id];
+			u32 *reg;
 
-	for_each_engine(engine, dev_priv) {
-		struct intel_context *ce = &ctx->engine[engine->id];
-		void *vaddr;
-		uint32_t *reg_state;
-
-		if (!ce->state)
-			continue;
-
-		vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB);
-		if (WARN_ON(IS_ERR(vaddr)))
-			continue;
+			if (!ce->state)
+				continue;
 
-		reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
+			reg = i915_gem_object_pin_map(ce->state->obj,
+						      I915_MAP_WB);
+			if (WARN_ON(IS_ERR(reg)))
+				continue;
 
-		reg_state[CTX_RING_HEAD+1] = 0;
-		reg_state[CTX_RING_TAIL+1] = 0;
+			reg += LRC_STATE_PN * PAGE_SIZE / sizeof(*reg);
+			reg[CTX_RING_HEAD+1] = 0;
+			reg[CTX_RING_TAIL+1] = 0;
 
-		ce->state->obj->dirty = true;
-		i915_gem_object_unpin_map(ce->state->obj);
+			ce->state->obj->dirty = true;
+			i915_gem_object_unpin_map(ce->state->obj);
 
-		ce->ring->head = 0;
-		ce->ring->tail = 0;
+			ce->ring->head = ce->ring->tail = 0;
+			ce->ring->last_retired_head = -1;
+			intel_ring_update_space(ce->ring);
+		}
 	}
 }
author	Chris Wilson <chris@chris-wilson.co.uk>	2016-09-21 14:51:08 +0100
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>	2017-02-09 08:08:31 +0100
commit	f2a0409a08502d64fbe3990354dff5902b08d2fb (patch)
tree	2658dff5334b26957424e8068bfb060bd0f1f473
parent	72cd604cfd864b06c5a10d2bb139b19825e0fcbc (diff)