From ed5982e6ce5f106abcbf071f80730db344a6da42 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 17 Jan 2013 22:23:36 +0100 Subject: drm/i915: Allow userspace to hint that the relocations were known Userspace is able to hint to the kernel that its command stream and auxiliary state buffers already hold the correct presumed addresses and so the relocation process may be skipped if the kernel does not need to move any buffers in preparation for the execbuffer. Thus for the common case where the allotment of buffers is static between batches, we can avoid the overhead of individually checking the relocation entries. Note that this requires userspace to supply the domain tracking and requests for workarounds itself that would otherwise be computed based upon the relocation entries. Using copywinwin10 as an example that is dependent upon emitting a lot of relocations (2 per operation), we see improvements of: c2d/gm45: 618000.0/sec to 632000.0/sec. i3-330m: 748000.0/sec to 830000.0/sec. (measured relative to a baseline with neither optimisations applied). Signed-off-by: Chris Wilson Reviewed-by: Imre Deak [danvet: Fixup merge conflict in userspace header due to different baseline trees.] Signed-off-by: Daniel Vetter --- include/uapi/drm/i915_drm.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index c4d2e9c74002..2430b6ad6a85 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -308,6 +308,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_RSVD_FOR_FUTURE_USE 22 #define I915_PARAM_HAS_SECURE_BATCHES 23 #define I915_PARAM_HAS_PINNED_BATCHES 24 +#define I915_PARAM_HAS_EXEC_NO_RELOC 25 typedef struct drm_i915_getparam { int param; @@ -628,7 +629,11 @@ struct drm_i915_gem_exec_object2 { __u64 offset; #define EXEC_OBJECT_NEEDS_FENCE (1<<0) +#define EXEC_OBJECT_NEEDS_GTT (1<<1) +#define EXEC_OBJECT_WRITE (1<<2) +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1) __u64 flags; + __u64 rsvd1; __u64 rsvd2; }; @@ -687,6 +692,15 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_IS_PINNED (1<<10) +/** Provide a hint to the kernel that the command stream and auxilliary + * state buffers already holds the correct presumed addresses and so the + * relocation process may be skipped if no buffers need to be moved in + * preparation for the execbuffer. + */ +#define I915_EXEC_NO_RELOC (1<<11) + +#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_NO_RELOC<<1) + #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ (eb2).rsvd1 = context & I915_EXEC_CONTEXT_ID_MASK -- cgit v1.2.3 From eef90ccb8a4d50b219a95cc53878ebb007315b32 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 8 Jan 2013 10:53:17 +0000 Subject: drm/i915: Use the reloc.handle as an index into the execbuffer array Using copywinwin10 as an example that is dependent upon emitting a lot of relocations (2 per operation), we see improvements of: c2d/gm45: 618000.0/sec to 623000.0/sec. i3-330m: 748000.0/sec to 789000.0/sec. (measured relative to a baseline with neither optimisations applied). Signed-off-by: Chris Wilson Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- include/uapi/drm/i915_drm.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 2430b6ad6a85..07d59419fe6b 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -309,6 +309,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_HAS_SECURE_BATCHES 23 #define I915_PARAM_HAS_PINNED_BATCHES 24 #define I915_PARAM_HAS_EXEC_NO_RELOC 25 +#define I915_PARAM_HAS_EXEC_HANDLE_LUT 26 typedef struct drm_i915_getparam { int param; @@ -699,7 +700,12 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_NO_RELOC (1<<11) -#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_NO_RELOC<<1) +/** Use the reloc.handle as an index into the exec object array rather + * than as the per-file handle. + */ +#define I915_EXEC_HANDLE_LUT (1<<12) + +#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_HANDLE_LUT<<1) #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ -- cgit v1.2.3 From b1edd6a6ecd436af33f210a5c75e0249466fd200 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 17 Jan 2013 16:31:30 +0200 Subject: drm/edid: Add drm_rgb_quant_range_selectable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drm_rgb_quant_range_selectable() will report whether the monitor claims to support for RGB quantization range selection. The information can be found in the CEA Video capability block. v2: s/quantzation/quantization/ in the comment Reviewed-by: Paulo Zanoni Signed-off-by: Ville Syrjälä Acked-by: David Airlie Signed-off-by: Daniel Vetter --- include/drm/drm_crtc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 00d78b5161c0..30892dc9376e 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -1033,6 +1033,7 @@ extern u8 *drm_find_cea_extension(struct edid *edid); extern u8 drm_match_cea_mode(struct drm_display_mode *to_match); extern bool drm_detect_hdmi_monitor(struct edid *edid); extern bool drm_detect_monitor_audio(struct edid *edid); +extern bool drm_rgb_quant_range_selectable(struct edid *edid); extern int drm_mode_page_flip_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); extern struct drm_display_mode *drm_cvt_mode(struct drm_device *dev, -- cgit v1.2.3 From a81cc00c11ab6816fbcb7dd99a60b50e71765d25 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 18 Jan 2013 12:30:31 -0800 Subject: drm/i915: Cut out the infamous ILK w/a from AGP layer And, move it to where the rest of the logic is. There is some slight functionality changes. There was extra paranoid checks in AGP code making sure we never do idle maps on gen2 parts. That was not duplicated as the simple PCI id check should do the right thing. v2: use IS_GEN5 && IS_MOBILE check instead. For now, this is the same as IS_IRONLAKE_M but is more future proof. The workaround docs hint that more than one platform may be effected, but we've never seen such a platform in the wild. (Rodrigo, Daniel) Reviewed-by: Rodrigo Vivi (v1) Cc: Dave Airlie Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- include/drm/intel-gtt.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h index 3e3a166a2690..1747aa095f41 100644 --- a/include/drm/intel-gtt.h +++ b/include/drm/intel-gtt.h @@ -13,8 +13,6 @@ struct intel_gtt { unsigned int gtt_mappable_entries; /* Whether i915 needs to use the dmar apis or not. */ unsigned int needs_dmar : 1; - /* Whether we idle the gpu before mapping/unmapping */ - unsigned int do_idle_maps : 1; /* Share the scratch page dma with ppgtts. */ dma_addr_t scratch_page_dma; struct page *scratch_page; -- cgit v1.2.3 From 9c61a32d31a55c8c6e590d83ae5645e14fde09f2 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 18 Jan 2013 12:30:32 -0800 Subject: drm/i915: Remove scratch page from shared We already had a mapping in both (minus the phys_addr in AGP). Reviewed-by: Rodrigo Vivi Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- include/drm/intel-gtt.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h index 1747aa095f41..c787ee4c4c07 100644 --- a/include/drm/intel-gtt.h +++ b/include/drm/intel-gtt.h @@ -13,9 +13,6 @@ struct intel_gtt { unsigned int gtt_mappable_entries; /* Whether i915 needs to use the dmar apis or not. */ unsigned int needs_dmar : 1; - /* Share the scratch page dma with ppgtts. */ - dma_addr_t scratch_page_dma; - struct page *scratch_page; /* needed for ioremap in drm/i915 */ phys_addr_t gma_bus_addr; } *intel_gtt_get(void); -- cgit v1.2.3 From 8d2e630899165d413ae8a2adc36846ac0b71bada Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 18 Jan 2013 12:30:33 -0800 Subject: drm/i915: Needs_dmar, not The reasoning behind our code taking two paths depending upon whether or not we may have been configured for IOMMU isn't clear to me. It should always be safe to use the pci mapping functions as they are designed to abstract the decision we were handling in i915. Aside from simpler code, removing another member for the intel_gtt struct is a nice motivation. I ran this by Chris, and he wasn't concerned about the extra kzalloc, and memory references vs. page_to_phys calculation in the case without IOMMU. v2: Update commit message v3: Remove needs_dmar addition from Zhenyu upstream This reverts (and then other stuff) commit 20652097dadd9a7fb4d652f25466299974bc78f9 Author: Zhenyu Wang Date: Thu Dec 13 23:47:47 2012 +0800 drm/i915: Fix missed needs_dmar setting Reviewed-by: Rodrigo Vivi (v2) Cc: Zhenyu Wang Signed-off-by: Ben Widawsky [danvet: Squash in follow-up fix to remove the bogus hunk which deleted the dma_mask configuration for gen6+.] Signed-off-by: Daniel Vetter --- include/drm/intel-gtt.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h index c787ee4c4c07..984105cddc57 100644 --- a/include/drm/intel-gtt.h +++ b/include/drm/intel-gtt.h @@ -11,8 +11,6 @@ struct intel_gtt { /* Part of the gtt that is mappable by the cpu, for those chips where * this is not the full gtt. */ unsigned int gtt_mappable_entries; - /* Whether i915 needs to use the dmar apis or not. */ - unsigned int needs_dmar : 1; /* needed for ioremap in drm/i915 */ phys_addr_t gma_bus_addr; } *intel_gtt_get(void); -- cgit v1.2.3 From e5c653777986b40e2986d2c918847fddbcba3a34 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 18 Jan 2013 12:30:34 -0800 Subject: agp/intel: Add gma_bus_addr It is no longer used in the i915 code, so isolate it from the shared struct. This was originally part of: commit 0e275518f325418d559c05327775bff894b237f7 Author: Ben Widawsky Date: Mon Jan 14 13:35:33 2013 -0800 agp/intel: decouple more of the agp-i915 sharing Reviewed-by: Rodrigo Vivi Signed-off-by: Ben Widawsky That commit had some other hunks which can't be used due to issues Daniel found in previous commits. Signed-off-by: Ben Widawsky [danvet: drop squash notice from the commit since it's imo ok to keep this one separate.] Signed-off-by: Daniel Vetter --- include/drm/intel-gtt.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h index 984105cddc57..769b6c79097e 100644 --- a/include/drm/intel-gtt.h +++ b/include/drm/intel-gtt.h @@ -11,8 +11,6 @@ struct intel_gtt { /* Part of the gtt that is mappable by the cpu, for those chips where * this is not the full gtt. */ unsigned int gtt_mappable_entries; - /* needed for ioremap in drm/i915 */ - phys_addr_t gma_bus_addr; } *intel_gtt_get(void); int intel_gmch_probe(struct pci_dev *bridge_pdev, struct pci_dev *gpu_pdev, -- cgit v1.2.3 From a54c0c279f3864171fe53c66e769d5a137c5c651 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 24 Jan 2013 14:45:00 -0800 Subject: drm/i915: remove intel_gtt structure With the probe call in our dispatch table, we can now cut away the last three remaining members in the intel_gtt shared struct and so remove it completely. v2: Rebased on top of Daniel's series Signed-off-by: Ben Widawsky Reviewed-by: Damien Lespiau [danvet: bikeshed commit message a bit.] Signed-off-by: Daniel Vetter --- include/drm/intel-gtt.h | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'include') diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h index 769b6c79097e..cf105557fea9 100644 --- a/include/drm/intel-gtt.h +++ b/include/drm/intel-gtt.h @@ -3,15 +3,7 @@ #ifndef _DRM_INTEL_GTT_H #define _DRM_INTEL_GTT_H -struct intel_gtt { - /* Size of memory reserved for graphics by the BIOS */ - unsigned int stolen_size; - /* Total number of gtt entries. */ - unsigned int gtt_total_entries; - /* Part of the gtt that is mappable by the cpu, for those chips where - * this is not the full gtt. */ - unsigned int gtt_mappable_entries; -} *intel_gtt_get(void); +void intel_gtt_get(size_t *gtt_total, size_t *stolen_size); int intel_gmch_probe(struct pci_dev *bridge_pdev, struct pci_dev *gpu_pdev, struct agp_bridge_data *bridge); -- cgit v1.2.3