From ed5982e6ce5f106abcbf071f80730db344a6da42 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 17 Jan 2013 22:23:36 +0100 Subject: drm/i915: Allow userspace to hint that the relocations were known Userspace is able to hint to the kernel that its command stream and auxiliary state buffers already hold the correct presumed addresses and so the relocation process may be skipped if the kernel does not need to move any buffers in preparation for the execbuffer. Thus for the common case where the allotment of buffers is static between batches, we can avoid the overhead of individually checking the relocation entries. Note that this requires userspace to supply the domain tracking and requests for workarounds itself that would otherwise be computed based upon the relocation entries. Using copywinwin10 as an example that is dependent upon emitting a lot of relocations (2 per operation), we see improvements of: c2d/gm45: 618000.0/sec to 632000.0/sec. i3-330m: 748000.0/sec to 830000.0/sec. (measured relative to a baseline with neither optimisations applied). Signed-off-by: Chris Wilson Reviewed-by: Imre Deak [danvet: Fixup merge conflict in userspace header due to different baseline trees.] Signed-off-by: Daniel Vetter --- include/uapi/drm/i915_drm.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index c4d2e9c74002..2430b6ad6a85 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -308,6 +308,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_RSVD_FOR_FUTURE_USE 22 #define I915_PARAM_HAS_SECURE_BATCHES 23 #define I915_PARAM_HAS_PINNED_BATCHES 24 +#define I915_PARAM_HAS_EXEC_NO_RELOC 25 typedef struct drm_i915_getparam { int param; @@ -628,7 +629,11 @@ struct drm_i915_gem_exec_object2 { __u64 offset; #define EXEC_OBJECT_NEEDS_FENCE (1<<0) +#define EXEC_OBJECT_NEEDS_GTT (1<<1) +#define EXEC_OBJECT_WRITE (1<<2) +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1) __u64 flags; + __u64 rsvd1; __u64 rsvd2; }; @@ -687,6 +692,15 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_IS_PINNED (1<<10) +/** Provide a hint to the kernel that the command stream and auxilliary + * state buffers already holds the correct presumed addresses and so the + * relocation process may be skipped if no buffers need to be moved in + * preparation for the execbuffer. + */ +#define I915_EXEC_NO_RELOC (1<<11) + +#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_NO_RELOC<<1) + #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ (eb2).rsvd1 = context & I915_EXEC_CONTEXT_ID_MASK -- cgit v1.2.3 From eef90ccb8a4d50b219a95cc53878ebb007315b32 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 8 Jan 2013 10:53:17 +0000 Subject: drm/i915: Use the reloc.handle as an index into the execbuffer array Using copywinwin10 as an example that is dependent upon emitting a lot of relocations (2 per operation), we see improvements of: c2d/gm45: 618000.0/sec to 623000.0/sec. i3-330m: 748000.0/sec to 789000.0/sec. (measured relative to a baseline with neither optimisations applied). Signed-off-by: Chris Wilson Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- include/uapi/drm/i915_drm.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 2430b6ad6a85..07d59419fe6b 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -309,6 +309,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_HAS_SECURE_BATCHES 23 #define I915_PARAM_HAS_PINNED_BATCHES 24 #define I915_PARAM_HAS_EXEC_NO_RELOC 25 +#define I915_PARAM_HAS_EXEC_HANDLE_LUT 26 typedef struct drm_i915_getparam { int param; @@ -699,7 +700,12 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_NO_RELOC (1<<11) -#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_NO_RELOC<<1) +/** Use the reloc.handle as an index into the exec object array rather + * than as the per-file handle. + */ +#define I915_EXEC_HANDLE_LUT (1<<12) + +#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_HANDLE_LUT<<1) #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ -- cgit v1.2.3 From 8bb0daffb0b8e45188066255b4203446eae181f1 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 11 Feb 2013 12:43:09 -0500 Subject: drm/omap: move out of staging Now that the omapdss interface has been reworked so that omapdrm can use dispc directly, we have been able to fix the remaining functional kms issues with omapdrm. And in the mean time the PM sequencing and many other of that open issues have been solved. So I think it makes sense to finally move omapdrm out of staging. Signed-off-by: Rob Clark --- include/uapi/drm/omap_drm.h | 123 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 include/uapi/drm/omap_drm.h (limited to 'include/uapi') diff --git a/include/uapi/drm/omap_drm.h b/include/uapi/drm/omap_drm.h new file mode 100644 index 000000000000..1d0b1172664e --- /dev/null +++ b/include/uapi/drm/omap_drm.h @@ -0,0 +1,123 @@ +/* + * include/uapi/drm/omap_drm.h + * + * Copyright (C) 2011 Texas Instruments + * Author: Rob Clark + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef __OMAP_DRM_H__ +#define __OMAP_DRM_H__ + +#include + +/* Please note that modifications to all structs defined here are + * subject to backwards-compatibility constraints. + */ + +#define OMAP_PARAM_CHIPSET_ID 1 /* ie. 0x3430, 0x4430, etc */ + +struct drm_omap_param { + uint64_t param; /* in */ + uint64_t value; /* in (set_param), out (get_param) */ +}; + +#define OMAP_BO_SCANOUT 0x00000001 /* scanout capable (phys contiguous) */ +#define OMAP_BO_CACHE_MASK 0x00000006 /* cache type mask, see cache modes */ +#define OMAP_BO_TILED_MASK 0x00000f00 /* tiled mapping mask, see tiled modes */ + +/* cache modes */ +#define OMAP_BO_CACHED 0x00000000 /* default */ +#define OMAP_BO_WC 0x00000002 /* write-combine */ +#define OMAP_BO_UNCACHED 0x00000004 /* strongly-ordered (uncached) */ + +/* tiled modes */ +#define OMAP_BO_TILED_8 0x00000100 +#define OMAP_BO_TILED_16 0x00000200 +#define OMAP_BO_TILED_32 0x00000300 +#define OMAP_BO_TILED (OMAP_BO_TILED_8 | OMAP_BO_TILED_16 | OMAP_BO_TILED_32) + +union omap_gem_size { + uint32_t bytes; /* (for non-tiled formats) */ + struct { + uint16_t width; + uint16_t height; + } tiled; /* (for tiled formats) */ +}; + +struct drm_omap_gem_new { + union omap_gem_size size; /* in */ + uint32_t flags; /* in */ + uint32_t handle; /* out */ + uint32_t __pad; +}; + +/* mask of operations: */ +enum omap_gem_op { + OMAP_GEM_READ = 0x01, + OMAP_GEM_WRITE = 0x02, +}; + +struct drm_omap_gem_cpu_prep { + uint32_t handle; /* buffer handle (in) */ + uint32_t op; /* mask of omap_gem_op (in) */ +}; + +struct drm_omap_gem_cpu_fini { + uint32_t handle; /* buffer handle (in) */ + uint32_t op; /* mask of omap_gem_op (in) */ + /* TODO maybe here we pass down info about what regions are touched + * by sw so we can be clever about cache ops? For now a placeholder, + * set to zero and we just do full buffer flush.. + */ + uint32_t nregions; + uint32_t __pad; +}; + +struct drm_omap_gem_info { + uint32_t handle; /* buffer handle (in) */ + uint32_t pad; + uint64_t offset; /* mmap offset (out) */ + /* note: in case of tiled buffers, the user virtual size can be + * different from the physical size (ie. how many pages are needed + * to back the object) which is returned in DRM_IOCTL_GEM_OPEN.. + * This size here is the one that should be used if you want to + * mmap() the buffer: + */ + uint32_t size; /* virtual size for mmap'ing (out) */ + uint32_t __pad; +}; + +#define DRM_OMAP_GET_PARAM 0x00 +#define DRM_OMAP_SET_PARAM 0x01 +/* placeholder for plugin-api +#define DRM_OMAP_GET_BASE 0x02 +*/ +#define DRM_OMAP_GEM_NEW 0x03 +#define DRM_OMAP_GEM_CPU_PREP 0x04 +#define DRM_OMAP_GEM_CPU_FINI 0x05 +#define DRM_OMAP_GEM_INFO 0x06 +#define DRM_OMAP_NUM_IOCTLS 0x07 + +#define DRM_IOCTL_OMAP_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_OMAP_GET_PARAM, struct drm_omap_param) +#define DRM_IOCTL_OMAP_SET_PARAM DRM_IOW (DRM_COMMAND_BASE + DRM_OMAP_SET_PARAM, struct drm_omap_param) +/* placeholder for plugin-api +#define DRM_IOCTL_OMAP_GET_BASE DRM_IOWR(DRM_COMMAND_BASE + DRM_OMAP_GET_BASE, struct drm_omap_get_base) +*/ +#define DRM_IOCTL_OMAP_GEM_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_OMAP_GEM_NEW, struct drm_omap_gem_new) +#define DRM_IOCTL_OMAP_GEM_CPU_PREP DRM_IOW (DRM_COMMAND_BASE + DRM_OMAP_GEM_CPU_PREP, struct drm_omap_gem_cpu_prep) +#define DRM_IOCTL_OMAP_GEM_CPU_FINI DRM_IOW (DRM_COMMAND_BASE + DRM_OMAP_GEM_CPU_FINI, struct drm_omap_gem_cpu_fini) +#define DRM_IOCTL_OMAP_GEM_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_OMAP_GEM_INFO, struct drm_omap_gem_info) + +#endif /* __OMAP_DRM_H__ */ -- cgit v1.2.3