From 74f6e183913b5dc90a004cafa84159ddb61cd0f0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 26 Sep 2018 11:47:07 +0100 Subject: drm/i915: Convert to BITS_PER_TYPE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit 9144d75e22ca ("include/linux/bitops.h: introduce BITS_PER_TYPE"), we made BITS_PER_TYPE available to all and now we can use the macro to replace some open-coded computation of sizeof(T) * BITS_PER_BYTE. Suggested-by: Ville Syrjälä Signed-off-by: Chris Wilson Cc: Ville Syrjälä Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Reviewed-by: Jani Nikula Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180926104707.17410-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index db9688d14912..717f4321e987 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5959,7 +5959,7 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, * the bits. */ BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES > - sizeof(atomic_t) * BITS_PER_BYTE); + BITS_PER_TYPE(atomic_t)); if (old) { WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits)); -- cgit v1.2.3 From 6edafc4eb3e4ae26b1b5dbc0cabfc82d96d6b9bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Tue, 18 Sep 2018 13:47:10 -0700 Subject: drm/i915: Unset reset pch handshake when PCH is not present in one place MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Right now RESET_PCH_HANDSHAKE_ENABLE is enabled all the times inside of intel_power_domains_init_hw() and if PCH is NOP it is unsed in i915_gem_init_hw(). So making skl_pch_reset_handshake() handle both cases and calling it for the missing gens in intel_power_domains_init_hw(). Ivybridge have a different register and bits but with the same objective so moving it too. v2(Rodrigo): - handling IVYBRIDGE case inside intel_pch_reset_handshake() v4(Rodrigo and Ville): - moving the enable/disable decision to callers Cc: Rodrigo Vivi Signed-off-by: José Roberto de Souza Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20180918204714.27306-2-jose.souza@intel.com --- drivers/gpu/drm/i915/i915_gem.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 717f4321e987..627b1c8a7ea3 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5301,18 +5301,6 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv) I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ? LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); - if (HAS_PCH_NOP(dev_priv)) { - if (IS_IVYBRIDGE(dev_priv)) { - u32 temp = I915_READ(GEN7_MSG_CTL); - temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); - I915_WRITE(GEN7_MSG_CTL, temp); - } else if (INTEL_GEN(dev_priv) >= 7) { - u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); - temp &= ~RESET_PCH_HANDSHAKE_ENABLE; - I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); - } - } - intel_gt_workarounds_apply(dev_priv); i915_gem_init_swizzling(dev_priv); -- cgit v1.2.3 From f8e57863f81f962a1837d6a17825752de5bc23f7 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 26 Sep 2018 09:03:53 +0100 Subject: drm/i915: Trim partial view sg lists Partial views are small but there can be many of them, and since the sg list space for them is allocated pessimistically, we can save some slab by trimming the unused tail entries. Signed-off-by: Tvrtko Ursulin Cc: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180926080353.20867-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 627b1c8a7ea3..28e943ee8b5e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2491,7 +2491,7 @@ unlock: mutex_unlock(&obj->mm.lock); } -static bool i915_sg_trim(struct sg_table *orig_st) +bool i915_sg_trim(struct sg_table *orig_st) { struct sg_table new_st; struct scatterlist *sg, *new_sg; -- cgit v1.2.3 From e9eaf82d97a2b05460ff5ef6a3e07446f7d049fe Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 1 Oct 2018 15:47:55 +0100 Subject: drm/i915: Priority boost for waiting clients Latency is in the eye of the beholder. In the case where a client stops and waits for the gpu, give that request chain a small priority boost (not so that it overtakes higher priority clients, to preserve the external ordering) so that ideally the wait completes earlier. v2: Tvrtko recommends to keep the boost-from-user-stall as small as possible and to allow new client flows to be preferred for interactivity over stalls. Testcase: igt/gem_sync/switch-default Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Dmitry Rogozhkin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20181001144755.7978-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 28e943ee8b5e..7d45e71100bc 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1748,6 +1748,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, */ err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE | + I915_WAIT_PRIORITY | (write_domain ? I915_WAIT_ALL : 0), MAX_SCHEDULE_TIMEOUT, to_rps_client(file)); @@ -3751,7 +3752,9 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) start = ktime_get(); ret = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_PRIORITY | + I915_WAIT_ALL, to_wait_timeout(args->timeout_ns), to_rps_client(file)); -- cgit v1.2.3 From a5e856a5348f6cd50889d125c40bbeec7328e466 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 12 Oct 2018 15:02:28 +0100 Subject: drm/i915: Large page offsets for pread/pwrite Handle integer overflow when computing the sub-page length for shmem backed pread/pwrite. Reported-by: Tvrtko Ursulin Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: stable@vger.kernel.org Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20181012140228.29783-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 7d45e71100bc..93d09282710d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1127,11 +1127,7 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj, offset = offset_in_page(args->offset); for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { struct page *page = i915_gem_object_get_page(obj, idx); - int length; - - length = remain; - if (offset + length > PAGE_SIZE) - length = PAGE_SIZE - offset; + unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); ret = shmem_pread(page, offset, length, user_data, page_to_phys(page) & obj_do_bit17_swizzling, @@ -1575,11 +1571,7 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, offset = offset_in_page(args->offset); for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { struct page *page = i915_gem_object_get_page(obj, idx); - int length; - - length = remain; - if (offset + length > PAGE_SIZE) - length = PAGE_SIZE - offset; + unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); ret = shmem_pwrite(page, offset, length, user_data, page_to_phys(page) & obj_do_bit17_swizzling, -- cgit v1.2.3 From e6db7f4d7c5005258b862a5ed1732756fccb6bfa Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 5 Nov 2018 17:06:40 +0000 Subject: drm/i915: Break long iterations for get/put shmemfs pages As we may have to iterate a few thousand elements to acquire and release the shmemfs backing storage for a GPU object, we need to break up the long loop with cond_resched() to retain a modicum of low latency for other processes. Testcase: igt/benchmarks/gem_syslatency Signed-off-by: Chris Wilson Cc: Kuo-Hsin Yang Cc: Matthew Auld Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20181105170640.26905-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 93d09282710d..347b3836c809 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2404,6 +2404,7 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj, mark_page_accessed(page); put_page(page); + cond_resched(); } obj->mm.dirty = false; @@ -2574,6 +2575,7 @@ rebuild_st: gfp_t gfp = noreclaim; do { + cond_resched(); page = shmem_read_mapping_page_gfp(mapping, i, gfp); if (likely(!IS_ERR(page))) break; @@ -2584,7 +2586,6 @@ rebuild_st: } i915_gem_shrink(dev_priv, 2 * page_count, NULL, *s++); - cond_resched(); /* * We've tried hard to allocate the memory by reaping -- cgit v1.2.3 From 64e3d12f769d60eaee6d2e53a9b7f0b3814f32ed Mon Sep 17 00:00:00 2001 From: Kuo-Hsin Yang Date: Tue, 6 Nov 2018 13:23:24 +0000 Subject: mm, drm/i915: mark pinned shmemfs pages as unevictable The i915 driver uses shmemfs to allocate backing storage for gem objects. These shmemfs pages can be pinned (increased ref count) by shmem_read_mapping_page_gfp(). When a lot of pages are pinned, vmscan wastes a lot of time scanning these pinned pages. In some extreme case, all pages in the inactive anon lru are pinned, and only the inactive anon lru is scanned due to inactive_ratio, the system cannot swap and invokes the oom-killer. Mark these pinned pages as unevictable to speed up vmscan. Export pagevec API check_move_unevictable_pages(). This patch was inspired by Chris Wilson's change [1]. [1]: https://patchwork.kernel.org/patch/9768741/ Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Peter Zijlstra Cc: Andrew Morton Cc: Dave Hansen Signed-off-by: Kuo-Hsin Yang Acked-by: Michal Hocko # mm part Reviewed-by: Chris Wilson Acked-by: Dave Hansen Acked-by: Andrew Morton Link: https://patchwork.freedesktop.org/patch/msgid/20181106132324.17390-1-chris@chris-wilson.co.uk Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 347b3836c809..5b80b0c14aed 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2382,11 +2382,23 @@ void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj) invalidate_mapping_pages(mapping, 0, (loff_t)-1); } +/* + * Move pages to appropriate lru and release the pagevec, decrementing the + * ref count of those pages. + */ +static void check_release_pagevec(struct pagevec *pvec) +{ + check_move_unevictable_pages(pvec); + __pagevec_release(pvec); + cond_resched(); +} + static void i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj, struct sg_table *pages) { struct sgt_iter sgt_iter; + struct pagevec pvec; struct page *page; __i915_gem_object_release_shmem(obj, pages, true); @@ -2396,6 +2408,9 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj, if (i915_gem_object_needs_bit17_swizzle(obj)) i915_gem_object_save_bit_17_swizzle(obj, pages); + mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping); + + pagevec_init(&pvec); for_each_sgt_page(page, sgt_iter, pages) { if (obj->mm.dirty) set_page_dirty(page); @@ -2403,9 +2418,11 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj, if (obj->mm.madv == I915_MADV_WILLNEED) mark_page_accessed(page); - put_page(page); - cond_resched(); + if (!pagevec_add(&pvec, page)) + check_release_pagevec(&pvec); } + if (pagevec_count(&pvec)) + check_release_pagevec(&pvec); obj->mm.dirty = false; sg_free_table(pages); @@ -2526,6 +2543,7 @@ static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) unsigned long last_pfn = 0; /* suppress gcc warning */ unsigned int max_segment = i915_sg_segment_size(); unsigned int sg_page_sizes; + struct pagevec pvec; gfp_t noreclaim; int ret; @@ -2561,6 +2579,7 @@ rebuild_st: * Fail silently without starting the shrinker */ mapping = obj->base.filp->f_mapping; + mapping_set_unevictable(mapping); noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM); noreclaim |= __GFP_NORETRY | __GFP_NOWARN; @@ -2675,8 +2694,14 @@ rebuild_st: err_sg: sg_mark_end(sg); err_pages: - for_each_sgt_page(page, sgt_iter, st) - put_page(page); + mapping_clear_unevictable(mapping); + pagevec_init(&pvec); + for_each_sgt_page(page, sgt_iter, st) { + if (!pagevec_add(&pvec, page)) + check_release_pagevec(&pvec); + } + if (pagevec_count(&pvec)) + check_release_pagevec(&pvec); sg_free_table(st); kfree(st); -- cgit v1.2.3 From 8811d616dfaa8c6e1905a20ce0543ec401275997 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 9 Nov 2018 09:03:11 +0000 Subject: drm/i915: Initialise the obj->rcu head Make the rcu_head known to the system, in particular for debugobjects. And having declared it for debugobjects, we need to tidy up afterwards. v2: mark the obj->rcu as being destroyed when we reuse its location for the freed list. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108691 Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20181109090311.15321-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5b80b0c14aed..5f69b9aadae8 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4739,6 +4739,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, INIT_LIST_HEAD(&obj->lut_list); INIT_LIST_HEAD(&obj->batch_pool_link); + init_rcu_head(&obj->rcu); + obj->ops = ops; reservation_object_init(&obj->__builtin_resv); @@ -5005,6 +5007,13 @@ static void __i915_gem_free_object_rcu(struct rcu_head *head) container_of(head, typeof(*obj), rcu); struct drm_i915_private *i915 = to_i915(obj->base.dev); + /* + * We reuse obj->rcu for the freed list, so we had better not treat + * it like a rcu_head from this point forwards. And we expect all + * objects to be freed via this path. + */ + destroy_rcu_head(&obj->rcu); + /* * Since we require blocking on struct_mutex to unbind the freed * object from the GPU before releasing resources back to the -- cgit v1.2.3 From a1db9c54eb29afd9842a08b2cbc2bc07a8a602b9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 8 Nov 2018 09:21:01 +0000 Subject: drm/i915: Track rcu_head for our idle worker While our little rcu worker might be able to be replaced now by the dedicated rcu_work, in the meantime we should mark up the rcu_head for correct debugobjects tracking. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20181108092101.27598-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5f69b9aadae8..7d9457915704 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3557,6 +3557,8 @@ static void __sleep_rcu(struct rcu_head *rcu) struct sleep_rcu_work *s = container_of(rcu, typeof(*s), rcu); struct drm_i915_private *i915 = s->i915; + destroy_rcu_head(&s->rcu); + if (same_epoch(i915, s->epoch)) { INIT_WORK(&s->work, __sleep_work); queue_work(i915->wq, &s->work); @@ -3673,6 +3675,7 @@ out_rearm: if (same_epoch(dev_priv, epoch)) { struct sleep_rcu_work *s = kmalloc(sizeof(*s), GFP_KERNEL); if (s) { + init_rcu_head(&s->rcu); s->i915 = dev_priv; s->epoch = epoch; call_rcu(&s->rcu, __sleep_rcu); -- cgit v1.2.3