summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c8
-rw-r--r--drivers/gpu/drm/i915/i915_gem_context.c12
-rw-r--r--drivers/gpu/drm/i915/i915_gem_gtt.c2
-rw-r--r--drivers/gpu/drm/i915/i915_gem_shrinker.c12
-rw-r--r--drivers/gpu/drm/i915/i915_guc_submission.c3
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c6
-rw-r--r--drivers/gpu/drm/i915/i915_vma.c7
-rw-r--r--drivers/gpu/drm/i915/intel_display.c14
-rw-r--r--drivers/gpu/drm/i915/intel_engine_cs.c5
-rw-r--r--drivers/gpu/drm/i915/intel_lrc.c29
-rw-r--r--drivers/gpu/drm/i915/intel_pm.c53
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.h34
12 files changed, 133 insertions, 52 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 026cb52ece0b..94b23fcbc989 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4603,11 +4603,17 @@ static void __i915_gem_free_work(struct work_struct *work)
* unbound now.
*/
+ spin_lock(&i915->mm.free_lock);
while ((freed = llist_del_all(&i915->mm.free_list))) {
+ spin_unlock(&i915->mm.free_lock);
+
__i915_gem_free_objects(i915, freed);
if (need_resched())
- break;
+ return;
+
+ spin_lock(&i915->mm.free_lock);
}
+ spin_unlock(&i915->mm.free_lock);
}
static void __i915_gem_free_object_rcu(struct rcu_head *head)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 5bf96a258509..e304dcbc6042 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -106,14 +106,9 @@ static void lut_close(struct i915_gem_context *ctx)
radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) {
struct i915_vma *vma = rcu_dereference_raw(*slot);
- struct drm_i915_gem_object *obj = vma->obj;
radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
-
- if (!i915_vma_is_ggtt(vma))
- i915_vma_close(vma);
-
- __i915_gem_object_release_unless_active(obj);
+ __i915_gem_object_release_unless_active(vma->obj);
}
}
@@ -198,6 +193,11 @@ static void context_close(struct i915_gem_context *ctx)
{
i915_gem_context_set_closed(ctx);
+ /*
+ * The LUT uses the VMA as a backpointer to unref the object,
+ * so we need to clear the LUT before we close all the VMA (inside
+ * the ppgtt).
+ */
lut_close(ctx);
if (ctx->ppgtt)
i915_ppgtt_close(&ctx->ppgtt->base);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 527a2d2d6281..5eaa6893daaa 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1341,7 +1341,7 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
if (IS_ERR(pt))
goto unwind;
- if (count < GEN8_PTES)
+ if (count < GEN8_PTES || intel_vgpu_active(vm->i915))
gen8_initialize_pt(vm, pt);
gen8_ppgtt_set_pde(vm, pd, pt, pde);
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index eb31f8aa5c21..3770e3323fc8 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -162,6 +162,18 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
if (!shrinker_lock(dev_priv, &unlock))
return 0;
+ /*
+ * When shrinking the active list, also consider active contexts.
+ * Active contexts are pinned until they are retired, and so can
+ * not be simply unbound to retire and unpin their pages. To shrink
+ * the contexts, we must wait until the gpu is idle.
+ *
+ * We don't care about errors here; if we cannot wait upon the GPU,
+ * we will free as much as we can and hope to get a second chance.
+ */
+ if (flags & I915_SHRINK_ACTIVE)
+ i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED);
+
trace_i915_gem_shrink(dev_priv, target, flags);
i915_gem_retire_requests(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index a2e8114b739d..f84c267728fd 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -610,6 +610,7 @@ done:
execlists->first = rb;
if (submit) {
port_assign(port, last);
+ execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
i915_guc_submit(engine);
}
spin_unlock_irq(&engine->timeline->lock);
@@ -633,6 +634,8 @@ static void i915_guc_irq_handler(unsigned long data)
rq = port_request(&port[0]);
}
+ if (!rq)
+ execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
if (!port_isset(last_port))
i915_guc_dequeue(engine);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index b1296a55c1e4..f8205841868b 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1388,8 +1388,10 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)
bool tasklet = false;
if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) {
- __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
- tasklet = true;
+ if (READ_ONCE(engine->execlists.active)) {
+ __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+ tasklet = true;
+ }
}
if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) {
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index fc77e8191eb5..fbfab2f33023 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -54,6 +54,13 @@ i915_vma_retire(struct i915_gem_active *active,
if (--obj->active_count)
return;
+ /* Prune the shared fence arrays iff completely idle (inc. external) */
+ if (reservation_object_trylock(obj->resv)) {
+ if (reservation_object_test_signaled_rcu(obj->resv, true))
+ reservation_object_add_excl_fence(obj->resv, NULL);
+ reservation_object_unlock(obj->resv);
+ }
+
/* Bump our place on the bound list to keep it roughly in LRU order
* so that we don't steal from recently used but inactive objects
* (unless we are forced to ofc!)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index e2ac976844d8..f4a9a182868f 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -3676,6 +3676,7 @@ void intel_finish_reset(struct drm_i915_private *dev_priv)
intel_pps_unlock_regs_wa(dev_priv);
intel_modeset_init_hw(dev);
+ intel_init_clock_gating(dev_priv);
spin_lock_irq(&dev_priv->irq_lock);
if (dev_priv->display.hpd_irq_setup)
@@ -14350,8 +14351,6 @@ void intel_modeset_init_hw(struct drm_device *dev)
intel_update_cdclk(dev_priv);
dev_priv->cdclk.logical = dev_priv->cdclk.actual = dev_priv->cdclk.hw;
-
- intel_init_clock_gating(dev_priv);
}
/*
@@ -15063,6 +15062,15 @@ intel_modeset_setup_hw_state(struct drm_device *dev,
struct intel_encoder *encoder;
int i;
+ if (IS_HASWELL(dev_priv)) {
+ /*
+ * WaRsPkgCStateDisplayPMReq:hsw
+ * System hang if this isn't done before disabling all planes!
+ */
+ I915_WRITE(CHICKEN_PAR1_1,
+ I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES);
+ }
+
intel_modeset_readout_hw_state(dev);
/* HW state is read out, now we need to sanitize this mess. */
@@ -15160,6 +15168,8 @@ void intel_modeset_gem_init(struct drm_device *dev)
intel_init_gt_powersave(dev_priv);
+ intel_init_clock_gating(dev_priv);
+
intel_setup_overlay(dev_priv);
}
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index a47a9c6bea52..ab5bf4e2e28e 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1548,8 +1548,8 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
return false;
- /* Both ports drained, no more ELSP submission? */
- if (port_request(&engine->execlists.port[0]))
+ /* Waiting to drain ELSP? */
+ if (READ_ONCE(engine->execlists.active))
return false;
/* ELSP is empty, but there are ready requests? */
@@ -1749,6 +1749,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m)
idx);
}
}
+ drm_printf(m, "\t\tHW active? 0x%x\n", execlists->active);
rcu_read_unlock();
} else if (INTEL_GEN(dev_priv) > 6) {
drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 7f45dd7dc3e5..d36e25607435 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -575,7 +575,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* the state of the GPU is known (idle).
*/
inject_preempt_context(engine);
- execlists->preempt = true;
+ execlists_set_active(execlists,
+ EXECLISTS_ACTIVE_PREEMPT);
goto unlock;
} else {
/*
@@ -683,8 +684,10 @@ done:
unlock:
spin_unlock_irq(&engine->timeline->lock);
- if (submit)
+ if (submit) {
+ execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
execlists_submit_ports(engine);
+ }
}
static void
@@ -696,6 +699,7 @@ execlist_cancel_port_requests(struct intel_engine_execlists *execlists)
while (num_ports-- && port_isset(port)) {
struct drm_i915_gem_request *rq = port_request(port);
+ GEM_BUG_ON(!execlists->active);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_PREEMPTED);
i915_gem_request_put(rq);
@@ -730,7 +734,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
INIT_LIST_HEAD(&rq->priotree.link);
- rq->priotree.priority = INT_MAX;
dma_fence_set_error(&rq->fence, -EIO);
__i915_gem_request_submit(rq);
@@ -861,15 +864,21 @@ static void intel_lrc_irq_handler(unsigned long data)
unwind_incomplete_requests(engine);
spin_unlock_irq(&engine->timeline->lock);
- GEM_BUG_ON(!execlists->preempt);
- execlists->preempt = false;
+ GEM_BUG_ON(!execlists_is_active(execlists,
+ EXECLISTS_ACTIVE_PREEMPT));
+ execlists_clear_active(execlists,
+ EXECLISTS_ACTIVE_PREEMPT);
continue;
}
if (status & GEN8_CTX_STATUS_PREEMPTED &&
- execlists->preempt)
+ execlists_is_active(execlists,
+ EXECLISTS_ACTIVE_PREEMPT))
continue;
+ GEM_BUG_ON(!execlists_is_active(execlists,
+ EXECLISTS_ACTIVE_USER));
+
/* Check the context/desc id for this event matches */
GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
@@ -881,7 +890,6 @@ static void intel_lrc_irq_handler(unsigned long data)
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
trace_i915_gem_request_out(rq);
- rq->priotree.priority = INT_MAX;
i915_gem_request_put(rq);
execlists_port_complete(execlists, port);
@@ -892,6 +900,9 @@ static void intel_lrc_irq_handler(unsigned long data)
/* After the final element, the hw should be idle */
GEM_BUG_ON(port_count(port) == 0 &&
!(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
+ if (port_count(port) == 0)
+ execlists_clear_active(execlists,
+ EXECLISTS_ACTIVE_USER);
}
if (head != execlists->csb_head) {
@@ -901,7 +912,7 @@ static void intel_lrc_irq_handler(unsigned long data)
}
}
- if (!execlists->preempt)
+ if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
execlists_dequeue(engine);
intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
@@ -1460,7 +1471,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift);
clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
execlists->csb_head = -1;
- execlists->preempt = false;
+ execlists->active = 0;
/* After a GPU reset, we may have requests to replay */
if (!i915_modparams.enable_guc_submission && execlists->first)
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index c42a65a93b3a..aa12a44e9a76 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3133,7 +3133,11 @@ static int ilk_compute_intermediate_wm(struct drm_device *dev,
struct intel_crtc_state *newstate)
{
struct intel_pipe_wm *a = &newstate->wm.ilk.intermediate;
- struct intel_pipe_wm *b = &intel_crtc->wm.active.ilk;
+ struct intel_atomic_state *intel_state =
+ to_intel_atomic_state(newstate->base.state);
+ const struct intel_crtc_state *oldstate =
+ intel_atomic_get_old_crtc_state(intel_state, intel_crtc);
+ const struct intel_pipe_wm *b = &oldstate->wm.ilk.optimal;
int level, max_level = ilk_wm_max_level(to_i915(dev));
/*
@@ -3142,6 +3146,9 @@ static int ilk_compute_intermediate_wm(struct drm_device *dev,
* and after the vblank.
*/
*a = newstate->wm.ilk.optimal;
+ if (!newstate->base.active || drm_atomic_crtc_needs_modeset(&newstate->base))
+ return 0;
+
a->pipe_enabled |= b->pipe_enabled;
a->sprites_enabled |= b->sprites_enabled;
a->sprites_scaled |= b->sprites_scaled;
@@ -5755,12 +5762,30 @@ void vlv_wm_sanitize(struct drm_i915_private *dev_priv)
mutex_unlock(&dev_priv->wm.wm_mutex);
}
+/*
+ * FIXME should probably kill this and improve
+ * the real watermark readout/sanitation instead
+ */
+static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv)
+{
+ I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN);
+ I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN);
+ I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN);
+
+ /*
+ * Don't touch WM1S_LP_EN here.
+ * Doing so could cause underruns.
+ */
+}
+
void ilk_wm_get_hw_state(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = to_i915(dev);
struct ilk_wm_values *hw = &dev_priv->wm.hw;
struct drm_crtc *crtc;
+ ilk_init_lp_watermarks(dev_priv);
+
for_each_crtc(dev, crtc)
ilk_pipe_wm_get_hw_state(crtc);
@@ -8207,18 +8232,6 @@ static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv)
}
}
-static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv)
-{
- I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN);
- I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN);
- I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN);
-
- /*
- * Don't touch WM1S_LP_EN here.
- * Doing so could cause underruns.
- */
-}
-
static void ilk_init_clock_gating(struct drm_i915_private *dev_priv)
{
uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
@@ -8252,8 +8265,6 @@ static void ilk_init_clock_gating(struct drm_i915_private *dev_priv)
(I915_READ(DISP_ARB_CTL) |
DISP_FBC_WM_DIS));
- ilk_init_lp_watermarks(dev_priv);
-
/*
* Based on the document from hardware guys the following bits
* should be set unconditionally in order to enable FBC.
@@ -8366,8 +8377,6 @@ static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)
I915_WRITE(GEN6_GT_MODE,
_MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
- ilk_init_lp_watermarks(dev_priv);
-
I915_WRITE(CACHE_MODE_0,
_MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
@@ -8594,8 +8603,6 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv)
I915_GTT_PAGE_SIZE_2M);
enum pipe pipe;
- ilk_init_lp_watermarks(dev_priv);
-
/* WaSwitchSolVfFArbitrationPriority:bdw */
I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
@@ -8646,8 +8653,6 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv)
static void hsw_init_clock_gating(struct drm_i915_private *dev_priv)
{
- ilk_init_lp_watermarks(dev_priv);
-
/* L3 caching of data atomics doesn't work -- disable it. */
I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
I915_WRITE(HSW_ROW_CHICKEN3,
@@ -8691,10 +8696,6 @@ static void hsw_init_clock_gating(struct drm_i915_private *dev_priv)
/* WaSwitchSolVfFArbitrationPriority:hsw */
I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
- /* WaRsPkgCStateDisplayPMReq:hsw */
- I915_WRITE(CHICKEN_PAR1_1,
- I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES);
-
lpt_init_clock_gating(dev_priv);
}
@@ -8702,8 +8703,6 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv)
{
uint32_t snpcr;
- ilk_init_lp_watermarks(dev_priv);
-
I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
/* WaDisableEarlyCull:ivb */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 17186f067408..6a42ed618a28 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -241,9 +241,17 @@ struct intel_engine_execlists {
} port[EXECLIST_MAX_PORTS];
/**
- * @preempt: are we currently handling a preempting context switch?
+ * @active: is the HW active? We consider the HW as active after
+ * submitting any context for execution and until we have seen the
+ * last context completion event. After that, we do not expect any
+ * more events until we submit, and so can park the HW.
+ *
+ * As we have a small number of different sources from which we feed
+ * the HW, we track the state of each inside a single bitfield.
*/
- bool preempt;
+ unsigned int active;
+#define EXECLISTS_ACTIVE_USER 0
+#define EXECLISTS_ACTIVE_PREEMPT 1
/**
* @port_mask: number of execlist ports - 1
@@ -525,6 +533,27 @@ struct intel_engine_cs {
u32 (*get_cmd_length_mask)(u32 cmd_header);
};
+static inline void
+execlists_set_active(struct intel_engine_execlists *execlists,
+ unsigned int bit)
+{
+ __set_bit(bit, (unsigned long *)&execlists->active);
+}
+
+static inline void
+execlists_clear_active(struct intel_engine_execlists *execlists,
+ unsigned int bit)
+{
+ __clear_bit(bit, (unsigned long *)&execlists->active);
+}
+
+static inline bool
+execlists_is_active(const struct intel_engine_execlists *execlists,
+ unsigned int bit)
+{
+ return test_bit(bit, (unsigned long *)&execlists->active);
+}
+
static inline unsigned int
execlists_num_ports(const struct intel_engine_execlists * const execlists)
{
@@ -538,6 +567,7 @@ execlists_port_complete(struct intel_engine_execlists * const execlists,
const unsigned int m = execlists->port_mask;
GEM_BUG_ON(port_index(port, execlists) != 0);
+ GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
memmove(port, port + 1, m * sizeof(struct execlist_port));
memset(port + m, 0, sizeof(struct execlist_port));