summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2014-01-27 13:52:34 +0000
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2014-02-13 13:50:23 -0800
commitda5f952cebc8fe41b430378b27c599f0dfdcf6ac (patch)
tree7308d8985df7b93e8c026c55de17e451f5a96a12
parent8b839b5d48a000069a60c9c80b13a96caa62f27e (diff)
drm/i915: Decouple GPU error reporting from ring initialisation
commit 372fbb8e3927fc76b0f842d8eb8a798a71d8960f upstream. Currently we report through our error state only the rings that have been initialised (as detected by ring->obj). This check is done after the GPU reset and ring re-initialisation, which means that the software state may not be the same as when we captured the hardware error and we may not print out any of the vital information for debugging the hang. This (and the implied object leak) is a regression from commit 3d57e5bd1284f44e325f3a52d966259ed42f9e05 Author: Ben Widawsky <ben@bwidawsk.net> Date: Mon Oct 14 10:01:36 2013 -0700 drm/i915: Do a fuller init after reset Note that we are already starting to get bug reports with incomplete error states from 3.13, which also hampers debugging userspace driver issues. v2: Prevent a NULL dereference on 830gm/845g after a GPU reset where the scratch obj may be NULL. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Ben Widawsky <ben@bwidawsk.net> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> References: https://bugs.freedesktop.org/show_bug.cgi?id=74094 Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com> [danvet: Add a bit of fluff to make it clear we need this expedited in stable.] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h1
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c22
2 files changed, 16 insertions, 7 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ab0f2c0a440c..881c9af0971d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -296,6 +296,7 @@ struct drm_i915_error_state {
u64 fence[I915_MAX_NUM_FENCES];
struct timeval time;
struct drm_i915_error_ring {
+ bool valid;
struct drm_i915_error_object {
int page_count;
u32 gtt_offset;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index dae364f0028c..354e3e32b30e 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -221,6 +221,9 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
unsigned ring)
{
BUG_ON(ring >= I915_NUM_RINGS); /* shut up confused gcc */
+ if (!error->ring[ring].valid)
+ return;
+
err_printf(m, "%s command stream:\n", ring_str(ring));
err_printf(m, " HEAD: 0x%08x\n", error->head[ring]);
err_printf(m, " TAIL: 0x%08x\n", error->tail[ring]);
@@ -272,7 +275,6 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
struct drm_device *dev = error_priv->dev;
drm_i915_private_t *dev_priv = dev->dev_private;
struct drm_i915_error_state *error = error_priv->error;
- struct intel_ring_buffer *ring;
int i, j, page, offset, elt;
if (!error) {
@@ -306,7 +308,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
if (INTEL_INFO(dev)->gen == 7)
err_printf(m, "ERR_INT: 0x%08x\n", error->err_int);
- for_each_ring(ring, dev_priv, i)
+ for (i = 0; i < ARRAY_SIZE(error->ring); i++)
i915_ring_error_state(m, dev, error, i);
if (error->active_bo)
@@ -363,8 +365,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
}
}
- obj = error->ring[i].ctx;
- if (obj) {
+ if ((obj = error->ring[i].ctx)) {
err_printf(m, "%s --- HW Context = 0x%08x\n",
dev_priv->ring[i].name,
obj->gtt_offset);
@@ -644,7 +645,8 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
return NULL;
obj = ring->scratch.obj;
- if (acthd >= i915_gem_obj_ggtt_offset(obj) &&
+ if (obj != NULL &&
+ acthd >= i915_gem_obj_ggtt_offset(obj) &&
acthd < i915_gem_obj_ggtt_offset(obj) + obj->base.size)
return i915_error_object_create(dev_priv, obj);
}
@@ -747,11 +749,17 @@ static void i915_gem_record_rings(struct drm_device *dev,
struct drm_i915_error_state *error)
{
struct drm_i915_private *dev_priv = dev->dev_private;
- struct intel_ring_buffer *ring;
struct drm_i915_gem_request *request;
int i, count;
- for_each_ring(ring, dev_priv, i) {
+ for (i = 0; i < I915_NUM_RINGS; i++) {
+ struct intel_ring_buffer *ring = &dev_priv->ring[i];
+
+ if (ring->dev == NULL)
+ continue;
+
+ error->ring[i].valid = true;
+
i915_record_ring_state(dev, error, ring);
error->ring[i].batchbuffer =