From c649432e86ca677d8762c5764a2832509ca8d449 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 11 Jun 2021 14:22:21 +0100 Subject: drm/i915: Fix busy ioctl commentary Just tidy one instance of incorrect context parameter name and a stray sentence ending from before reporting was converted to be class based. Signed-off-by: Tvrtko Ursulin Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210611132221.1055650-1-tvrtko.ursulin@linux.intel.com --- include/uapi/drm/i915_drm.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index c2c7759b7d2e..a1cb4aa035a9 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1348,12 +1348,11 @@ struct drm_i915_gem_busy { * reading from the object simultaneously. * * The value of each engine class is the same as specified in the - * I915_CONTEXT_SET_ENGINES parameter and via perf, i.e. + * I915_CONTEXT_PARAM_ENGINES context parameter and via perf, i.e. * I915_ENGINE_CLASS_RENDER, I915_ENGINE_CLASS_COPY, etc. - * reported as active itself. Some hardware may have parallel - * execution engines, e.g. multiple media engines, which are - * mapped to the same class identifier and so are not separately - * reported for busyness. + * Some hardware may have parallel execution engines, e.g. multiple + * media engines, which are mapped to the same class identifier and so + * are not separately reported for busyness. * * Caveat emptor: * Only the boolean result of this query is reliable; that is whether -- cgit v1.2.3 From 577729533cdc4e37a8c230e404a44ad7a3ff4eda Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 18 Jun 2021 16:00:36 +0100 Subject: drm/i915: Document the Virtual Engine uAPI A little bit of documentation covering the topics of engine discovery, context engine maps and virtual engines. It is not very detailed but supposed to be a starting point of giving a brief high level overview of general principles and intended use cases. v2: * Have the text in uapi header and link from there. v4: * Link from driver-uapi.rst. Signed-off-by: Tvrtko Ursulin Cc: Daniel Vetter Acked-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210618150036.2507653-1-tvrtko.ursulin@linux.intel.com --- include/uapi/drm/i915_drm.h | 188 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 188 insertions(+) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index a1cb4aa035a9..2f70c48567c0 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1806,6 +1806,69 @@ struct drm_i915_gem_context_param_sseu { __u32 rsvd; }; +/** + * DOC: Virtual Engine uAPI + * + * Virtual engine is a concept where userspace is able to configure a set of + * physical engines, submit a batch buffer, and let the driver execute it on any + * engine from the set as it sees fit. + * + * This is primarily useful on parts which have multiple instances of a same + * class engine, like for example GT3+ Skylake parts with their two VCS engines. + * + * For instance userspace can enumerate all engines of a certain class using the + * previously described `Engine Discovery uAPI`_. After that userspace can + * create a GEM context with a placeholder slot for the virtual engine (using + * `I915_ENGINE_CLASS_INVALID` and `I915_ENGINE_CLASS_INVALID_NONE` for class + * and instance respectively) and finally using the + * `I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE` extension place a virtual engine in + * the same reserved slot. + * + * Example of creating a virtual engine and submitting a batch buffer to it: + * + * .. code-block:: C + * + * I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(virtual, 2) = { + * .base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE, + * .engine_index = 0, // Place this virtual engine into engine map slot 0 + * .num_siblings = 2, + * .engines = { { I915_ENGINE_CLASS_VIDEO, 0 }, + * { I915_ENGINE_CLASS_VIDEO, 1 }, }, + * }; + * I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1) = { + * .engines = { { I915_ENGINE_CLASS_INVALID, + * I915_ENGINE_CLASS_INVALID_NONE } }, + * .extensions = to_user_pointer(&virtual), // Chains after load_balance extension + * }; + * struct drm_i915_gem_context_create_ext_setparam p_engines = { + * .base = { + * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, + * }, + * .param = { + * .param = I915_CONTEXT_PARAM_ENGINES, + * .value = to_user_pointer(&engines), + * .size = sizeof(engines), + * }, + * }; + * struct drm_i915_gem_context_create_ext create = { + * .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS, + * .extensions = to_user_pointer(&p_engines); + * }; + * + * ctx_id = gem_context_create_ext(drm_fd, &create); + * + * // Now we have created a GEM context with its engine map containing a + * // single virtual engine. Submissions to this slot can go either to + * // vcs0 or vcs1, depending on the load balancing algorithm used inside + * // the driver. The load balancing is dynamic from one batch buffer to + * // another and transparent to userspace. + * + * ... + * execbuf.rsvd1 = ctx_id; + * execbuf.flags = 0; // Submits to index 0 which is the virtual engine + * gem_execbuf(drm_fd, &execbuf); + */ + /* * i915_context_engines_load_balance: * @@ -1882,6 +1945,61 @@ struct i915_context_engines_bond { struct i915_engine_class_instance engines[N__]; \ } __attribute__((packed)) name__ +/** + * DOC: Context Engine Map uAPI + * + * Context engine map is a new way of addressing engines when submitting batch- + * buffers, replacing the existing way of using identifiers like `I915_EXEC_BLT` + * inside the flags field of `struct drm_i915_gem_execbuffer2`. + * + * To use it created GEM contexts need to be configured with a list of engines + * the user is intending to submit to. This is accomplished using the + * `I915_CONTEXT_PARAM_ENGINES` parameter and `struct + * i915_context_param_engines`. + * + * For such contexts the `I915_EXEC_RING_MASK` field becomes an index into the + * configured map. + * + * Example of creating such context and submitting against it: + * + * .. code-block:: C + * + * I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 2) = { + * .engines = { { I915_ENGINE_CLASS_RENDER, 0 }, + * { I915_ENGINE_CLASS_COPY, 0 } } + * }; + * struct drm_i915_gem_context_create_ext_setparam p_engines = { + * .base = { + * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, + * }, + * .param = { + * .param = I915_CONTEXT_PARAM_ENGINES, + * .value = to_user_pointer(&engines), + * .size = sizeof(engines), + * }, + * }; + * struct drm_i915_gem_context_create_ext create = { + * .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS, + * .extensions = to_user_pointer(&p_engines); + * }; + * + * ctx_id = gem_context_create_ext(drm_fd, &create); + * + * // We have now created a GEM context with two engines in the map: + * // Index 0 points to rcs0 while index 1 points to bcs0. Other engines + * // will not be accessible from this context. + * + * ... + * execbuf.rsvd1 = ctx_id; + * execbuf.flags = 0; // Submits to index 0, which is rcs0 for this context + * gem_execbuf(drm_fd, &execbuf); + * + * ... + * execbuf.rsvd1 = ctx_id; + * execbuf.flags = 1; // Submits to index 0, which is bcs0 for this context + * gem_execbuf(drm_fd, &execbuf); + */ + struct i915_context_param_engines { __u64 extensions; /* linked chain of extension blocks, 0 terminates */ #define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */ @@ -2375,6 +2493,76 @@ struct drm_i915_query_topology_info { __u8 data[]; }; +/** + * DOC: Engine Discovery uAPI + * + * Engine discovery uAPI is a way of enumerating physical engines present in a + * GPU associated with an open i915 DRM file descriptor. This supersedes the old + * way of using `DRM_IOCTL_I915_GETPARAM` and engine identifiers like + * `I915_PARAM_HAS_BLT`. + * + * The need for this interface came starting with Icelake and newer GPUs, which + * started to establish a pattern of having multiple engines of a same class, + * where not all instances were always completely functionally equivalent. + * + * Entry point for this uapi is `DRM_IOCTL_I915_QUERY` with the + * `DRM_I915_QUERY_ENGINE_INFO` as the queried item id. + * + * Example for getting the list of engines: + * + * .. code-block:: C + * + * struct drm_i915_query_engine_info *info; + * struct drm_i915_query_item item = { + * .query_id = DRM_I915_QUERY_ENGINE_INFO; + * }; + * struct drm_i915_query query = { + * .num_items = 1, + * .items_ptr = (uintptr_t)&item, + * }; + * int err, i; + * + * // First query the size of the blob we need, this needs to be large + * // enough to hold our array of engines. The kernel will fill out the + * // item.length for us, which is the number of bytes we need. + * // + * // Alternatively a large buffer can be allocated straight away enabling + * // querying in one pass, in which case item.length should contain the + * // length of the provided buffer. + * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query); + * if (err) ... + * + * info = calloc(1, item.length); + * // Now that we allocated the required number of bytes, we call the ioctl + * // again, this time with the data_ptr pointing to our newly allocated + * // blob, which the kernel can then populate with info on all engines. + * item.data_ptr = (uintptr_t)&info, + * + * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query); + * if (err) ... + * + * // We can now access each engine in the array + * for (i = 0; i < info->num_engines; i++) { + * struct drm_i915_engine_info einfo = info->engines[i]; + * u16 class = einfo.engine.class; + * u16 instance = einfo.engine.instance; + * .... + * } + * + * free(info); + * + * Each of the enumerated engines, apart from being defined by its class and + * instance (see `struct i915_engine_class_instance`), also can have flags and + * capabilities defined as documented in i915_drm.h. + * + * For instance video engines which support HEVC encoding will have the + * `I915_VIDEO_CLASS_CAPABILITY_HEVC` capability bit set. + * + * Engine discovery only fully comes to its own when combined with the new way + * of addressing engines when submitting batch buffers using contexts with + * engine maps configured. + */ + /** * struct drm_i915_engine_info * -- cgit v1.2.3 From fe4751c3d513ff4f5422dbf55a966abafe39255e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:06 -0500 Subject: drm/i915: Drop I915_CONTEXT_PARAM_RINGSIZE This reverts commit 88be76cdafc7 ("drm/i915: Allow userspace to specify ringsize on construction"). This API was originally added for OpenCL but the compute-runtime PR has sat open for a year without action so we can still pull it out if we want. I argue we should drop it for three reasons: 1. If the compute-runtime PR has sat open for a year, this clearly isn't that important. 2. It's a very leaky API. Ring size is an implementation detail of the current execlist scheduler and really only makes sense there. It can't apply to the older ring-buffer scheduler on pre-execlist hardware because that's shared across all contexts and it won't apply to the GuC scheduler that's in the pipeline. 3. Having userspace set a ring size in bytes is a bad solution to the problem of having too small a ring. There is no way that userspace has the information to know how to properly set the ring size so it's just going to detect the feature and always set it to the maximum of 512K. This is what the compute-runtime PR does. The scheduler in i915, on the other hand, does have the information to make an informed choice. It could detect if the ring size is a problem and grow it itself. Or, if that's too hard, we could just increase the default size from 16K to 32K or even 64K instead of relying on userspace to do it. Let's drop this API for now and, if someone decides they really care about solving this problem, they can do it properly. Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-2-jason@jlekstrand.net --- include/uapi/drm/i915_drm.h | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 2f70c48567c0..f229c0abcbb5 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1722,24 +1722,8 @@ struct drm_i915_gem_context_param { */ #define I915_CONTEXT_PARAM_PERSISTENCE 0xb -/* - * I915_CONTEXT_PARAM_RINGSIZE: - * - * Sets the size of the CS ringbuffer to use for logical ring contexts. This - * applies a limit of how many batches can be queued to HW before the caller - * is blocked due to lack of space for more commands. - * - * Only reliably possible to be set prior to first use, i.e. during - * construction. At any later point, the current execution must be flushed as - * the ring can only be changed while the context is idle. Note, the ringsize - * can be specified as a constructor property, see - * I915_CONTEXT_CREATE_EXT_SETPARAM, but can also be set later if required. - * - * Only applies to the current set of engine and lost when those engines - * are replaced by a new mapping (see I915_CONTEXT_PARAM_ENGINES). - * - * Must be between 4 - 512 KiB, in intervals of page size [4 KiB]. - * Default is 16 KiB. +/* This API has been removed. On the off chance someone somewhere has + * attempted to use it, never re-use this context param number. */ #define I915_CONTEXT_PARAM_RINGSIZE 0xc /* Must be kept compact -- no holes and well documented */ -- cgit v1.2.3 From 6ff6d61dd2a943bd0c80bb77eb5630e8aa0cac15 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:08 -0500 Subject: drm/i915: Drop I915_CONTEXT_PARAM_NO_ZEROMAP The idea behind this param is to support OpenCL drivers with relocations because OpenCL reserves 0x0 for NULL and, if we placed memory there, it would confuse CL kernels. It was originally sent out as part of a patch series including libdrm [1] and Beignet [2] support. However, the libdrm and Beignet patches never landed in their respective upstream projects so this API has never been used. It's never been used in Mesa or any other driver, either. Dropping this API allows us to delete a small bit of code. [1]: https://lists.freedesktop.org/archives/intel-gfx/2015-May/067030.html [2]: https://lists.freedesktop.org/archives/intel-gfx/2015-May/067031.html Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-4-jason@jlekstrand.net --- include/uapi/drm/i915_drm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index f229c0abcbb5..79dcafaf476e 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1638,6 +1638,10 @@ struct drm_i915_gem_context_param { __u32 size; __u64 param; #define I915_CONTEXT_PARAM_BAN_PERIOD 0x1 +/* I915_CONTEXT_PARAM_NO_ZEROMAP has been removed. On the off chance + * someone somewhere has attempted to use it, never re-use this context + * param number. + */ #define I915_CONTEXT_PARAM_NO_ZEROMAP 0x2 #define I915_CONTEXT_PARAM_GTT_SIZE 0x3 #define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE 0x4 -- cgit v1.2.3 From 4a766ae40ec8330103a27922b5aa978fdf8bc005 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 8 Jul 2021 10:48:11 -0500 Subject: drm/i915: Drop the CONTEXT_CLONE API (v2) This API allows one context to grab bits out of another context upon creation. It can be used as a short-cut for setparam(getparam()) for things like I915_CONTEXT_PARAM_VM. However, it's never been used by any real userspace. It's used by a few IGT tests and that's it. Since it doesn't add any real value (most of the stuff you can CLONE you can copy in other ways), drop it. There is one thing that this API allows you to clone which you cannot clone via getparam/setparam: timelines. However, timelines are an implementation detail of i915 and not really something that needs to be exposed to userspace. Also, sharing timelines between contexts isn't obviously useful and supporting it has the potential to complicate i915 internally. It also doesn't add any functionality that the client can't get in other ways. If a client really wants a shared timeline, they can use a syncobj and set it as an in and out fence on every submit. v2 (Jason Ekstrand): - More detailed commit message Signed-off-by: Jason Ekstrand Reviewed-by: Daniel Vetter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210708154835.528166-7-jason@jlekstrand.net --- include/uapi/drm/i915_drm.h | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 79dcafaf476e..e334a8b14ef2 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -2006,20 +2006,10 @@ struct drm_i915_gem_context_create_ext_setparam { struct drm_i915_gem_context_param param; }; -struct drm_i915_gem_context_create_ext_clone { +/* This API has been removed. On the off chance someone somewhere has + * attempted to use it, never re-use this extension number. + */ #define I915_CONTEXT_CREATE_EXT_CLONE 1 - struct i915_user_extension base; - __u32 clone_id; - __u32 flags; -#define I915_CONTEXT_CLONE_ENGINES (1u << 0) -#define I915_CONTEXT_CLONE_FLAGS (1u << 1) -#define I915_CONTEXT_CLONE_SCHEDATTR (1u << 2) -#define I915_CONTEXT_CLONE_SSEU (1u << 3) -#define I915_CONTEXT_CLONE_TIMELINE (1u << 4) -#define I915_CONTEXT_CLONE_VM (1u << 5) -#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_VM << 1) - __u64 rsvd; -}; struct drm_i915_gem_context_destroy { __u32 ctx_id; -- cgit v1.2.3 From 289f5a72009b8f67334c9f911f7f5fe6e8a80049 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 5 Jul 2021 14:53:07 +0100 Subject: drm/i915/uapi: convert drm_i915_gem_caching to kernel doc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert all the drm_i915_gem_caching bits to proper kernel doc. Suggested-by: Daniel Vetter Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Maarten Lankhorst Cc: Tvrtko Ursulin Cc: Jordan Justen Cc: Kenneth Graunke Cc: Jason Ekstrand Cc: Daniel Vetter Cc: Ramalingam C Reviewed-by: Ramalingam C Link: https://patchwork.freedesktop.org/patch/msgid/20210705135310.1502437-2-matthew.auld@intel.com --- include/uapi/drm/i915_drm.h | 65 +++++++++++++++++++++++++-------------------- 1 file changed, 36 insertions(+), 29 deletions(-) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index e334a8b14ef2..9078ded93558 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1363,43 +1363,50 @@ struct drm_i915_gem_busy { }; /** - * I915_CACHING_NONE + * struct drm_i915_gem_caching - Set or get the caching for given object + * handle. * - * GPU access is not coherent with cpu caches. Default for machines without an - * LLC. + * Allow userspace to control the GTT caching bits for a given object when the + * object is later mapped through the ppGTT(or GGTT on older platforms lacking + * ppGTT support, or if the object is used for scanout). Note that this might + * require unbinding the object from the GTT first, if its current caching value + * doesn't match. */ -#define I915_CACHING_NONE 0 -/** - * I915_CACHING_CACHED - * - * GPU access is coherent with cpu caches and furthermore the data is cached in - * last-level caches shared between cpu cores and the gpu GT. Default on - * machines with HAS_LLC. - */ -#define I915_CACHING_CACHED 1 -/** - * I915_CACHING_DISPLAY - * - * Special GPU caching mode which is coherent with the scanout engines. - * Transparently falls back to I915_CACHING_NONE on platforms where no special - * cache mode (like write-through or gfdt flushing) is available. The kernel - * automatically sets this mode when using a buffer as a scanout target. - * Userspace can manually set this mode to avoid a costly stall and clflush in - * the hotpath of drawing the first frame. - */ -#define I915_CACHING_DISPLAY 2 - struct drm_i915_gem_caching { /** - * Handle of the buffer to set/get the caching level of. */ + * @handle: Handle of the buffer to set/get the caching level. + */ __u32 handle; /** - * Cacheing level to apply or return value + * @caching: The GTT caching level to apply or possible return value. * - * bits0-15 are for generic caching control (i.e. the above defined - * values). bits16-31 are reserved for platform-specific variations - * (e.g. l3$ caching on gen7). */ + * The supported @caching values: + * + * I915_CACHING_NONE: + * + * GPU access is not coherent with CPU caches. Default for machines + * without an LLC. This means manual flushing might be needed, if we + * want GPU access to be coherent. + * + * I915_CACHING_CACHED: + * + * GPU access is coherent with CPU caches and furthermore the data is + * cached in last-level caches shared between CPU cores and the GPU GT. + * + * I915_CACHING_DISPLAY: + * + * Special GPU caching mode which is coherent with the scanout engines. + * Transparently falls back to I915_CACHING_NONE on platforms where no + * special cache mode (like write-through or gfdt flushing) is + * available. The kernel automatically sets this mode when using a + * buffer as a scanout target. Userspace can manually set this mode to + * avoid a costly stall and clflush in the hotpath of drawing the first + * frame. + */ +#define I915_CACHING_NONE 0 +#define I915_CACHING_CACHED 1 +#define I915_CACHING_DISPLAY 2 __u32 caching; }; -- cgit v1.2.3 From 3aa8c57fe25a9247e25977f1c2302395cbbd8242 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 5 Jul 2021 14:53:09 +0100 Subject: drm/i915/uapi: convert drm_i915_gem_set_domain to kernel doc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert all the drm_i915_gem_set_domain bits to proper kernel doc. Suggested-by: Daniel Vetter Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Maarten Lankhorst Cc: Tvrtko Ursulin Cc: Jordan Justen Cc: Kenneth Graunke Cc: Jason Ekstrand Cc: Daniel Vetter Cc: Ramalingam C Reviewed-by: Ramalingam C Link: https://patchwork.freedesktop.org/patch/msgid/20210705135310.1502437-4-matthew.auld@intel.com --- include/uapi/drm/i915_drm.h | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 9078ded93558..e54f9efaead0 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -880,14 +880,38 @@ struct drm_i915_gem_mmap_offset { __u64 extensions; }; +/** + * struct drm_i915_gem_set_domain - Adjust the objects write or read domain, in + * preparation for accessing the pages via some CPU domain. + * + * Specifying a new write or read domain will flush the object out of the + * previous domain(if required), before then updating the objects domain + * tracking with the new domain. + * + * Note this might involve waiting for the object first if it is still active on + * the GPU. + * + * Supported values for @read_domains and @write_domain: + * + * - I915_GEM_DOMAIN_WC: Uncached write-combined domain + * - I915_GEM_DOMAIN_CPU: CPU cache domain + * - I915_GEM_DOMAIN_GTT: Mappable aperture domain + * + * All other domains are rejected. + */ struct drm_i915_gem_set_domain { - /** Handle for the object */ + /** @handle: Handle for the object. */ __u32 handle; - /** New read domains */ + /** @read_domains: New read domains. */ __u32 read_domains; - /** New write domain */ + /** + * @write_domain: New write domain. + * + * Note that having something in the write domain implies it's in the + * read domain, and only that read domain. + */ __u32 write_domain; }; -- cgit v1.2.3 From e7737b67ab46ee0eeaa0ca1958f72d86f8d8ccf6 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 15 Jul 2021 11:15:33 +0100 Subject: drm/i915/uapi: reject caching ioctls for discrete MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's a noop on DG1, and in the future when need to support other devices which let us control the coherency, then it should be an immutable creation time property for the BO. This will likely be controlled through a new gem_create_ext extension. v2: add some kernel doc for the discrete changes, and document the implicit rules Suggested-by: Daniel Vetter Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Maarten Lankhorst Cc: Tvrtko Ursulin Cc: Jordan Justen Cc: Kenneth Graunke Cc: Jason Ekstrand Cc: Daniel Vetter Cc: Ramalingam C Reviewed-by: Ramalingam C Reviewed-by: Kenneth Graunke Link: https://patchwork.freedesktop.org/patch/msgid/20210715101536.2606307-2-matthew.auld@intel.com --- include/uapi/drm/i915_drm.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index e54f9efaead0..868c2ee7be60 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1395,6 +1395,35 @@ struct drm_i915_gem_busy { * ppGTT support, or if the object is used for scanout). Note that this might * require unbinding the object from the GTT first, if its current caching value * doesn't match. + * + * Note that this all changes on discrete platforms, starting from DG1, the + * set/get caching is no longer supported, and is now rejected. Instead the CPU + * caching attributes(WB vs WC) will become an immutable creation time property + * for the object, along with the GTT caching level. For now we don't expose any + * new uAPI for this, instead on DG1 this is all implicit, although this largely + * shouldn't matter since DG1 is coherent by default(without any way of + * controlling it). + * + * Implicit caching rules, starting from DG1: + * + * - If any of the object placements (see &drm_i915_gem_create_ext_memory_regions) + * contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and + * mapped as write-combined only. + * + * - Everything else is always allocated and mapped as write-back, with the + * guarantee that everything is also coherent with the GPU. + * + * Note that this is likely to change in the future again, where we might need + * more flexibility on future devices, so making this all explicit as part of a + * new &drm_i915_gem_create_ext extension is probable. + * + * Side note: Part of the reason for this is that changing the at-allocation-time CPU + * caching attributes for the pages might be required(and is expensive) if we + * need to then CPU map the pages later with different caching attributes. This + * inconsistent caching behaviour, while supported on x86, is not universally + * supported on other architectures. So for simplicity we opt for setting + * everything at creation time, whilst also making it immutable, on discrete + * platforms. */ struct drm_i915_gem_caching { /** -- cgit v1.2.3 From aef7b67a79564f6cff488aff7f4b89438ca80b23 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 15 Jul 2021 11:15:34 +0100 Subject: drm/i915/uapi: convert drm_i915_gem_userptr to kernel doc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the missing kernel-doc. Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Maarten Lankhorst Cc: Tvrtko Ursulin Cc: Jordan Justen Cc: Kenneth Graunke Cc: Jason Ekstrand Cc: Daniel Vetter Cc: Ramalingam C Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20210715101536.2606307-3-matthew.auld@intel.com --- include/uapi/drm/i915_drm.h | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 868c2ee7be60..e20eeeca7a1c 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -2141,14 +2141,52 @@ struct drm_i915_reset_stats { __u32 pad; }; +/** + * struct drm_i915_gem_userptr - Create GEM object from user allocated memory. + * + * Userptr objects have several restrictions on what ioctls can be used with the + * object handle. + */ struct drm_i915_gem_userptr { + /** + * @user_ptr: The pointer to the allocated memory. + * + * Needs to be aligned to PAGE_SIZE. + */ __u64 user_ptr; + + /** + * @user_size: + * + * The size in bytes for the allocated memory. This will also become the + * object size. + * + * Needs to be aligned to PAGE_SIZE, and should be at least PAGE_SIZE, + * or larger. + */ __u64 user_size; + + /** + * @flags: + * + * Supported flags: + * + * I915_USERPTR_READ_ONLY: + * + * Mark the object as readonly, this also means GPU access can only be + * readonly. This is only supported on HW which supports readonly access + * through the GTT. If the HW can't support readonly access, an error is + * returned. + * + * I915_USERPTR_UNSYNCHRONIZED: + * + * NOT USED. Setting this flag will result in an error. + */ __u32 flags; #define I915_USERPTR_READ_ONLY 0x1 #define I915_USERPTR_UNSYNCHRONIZED 0x80000000 /** - * Returned handle for the object. + * @handle: Returned handle for the object. * * Object handles are nonzero. */ -- cgit v1.2.3 From 7961c5b60f23dff5d82a523f9aeb8ebf34cf9926 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 14 Jul 2021 14:28:33 +0200 Subject: drm/i915: Add TTM offset argument to mmap. The FIXED mapping is only used for ttm, and tells userspace that the mapping type is pre-defined. This disables the other type of mmap offsets when discrete memory is used, so fix the selftests as well. Document the struct as well, so it shows up in docbook. Cc: Jason Ekstrand Reviewed-by: Daniel Vetter Reviewed-by: Jason Ekstrand Signed-off-by: Maarten Lankhorst [mauld: Included minor fixes from the review comments] Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210714122833.766586-1-maarten.lankhorst@linux.intel.com --- include/uapi/drm/i915_drm.h | 47 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 11 deletions(-) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index e20eeeca7a1c..0aea82657cdc 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -849,31 +849,56 @@ struct drm_i915_gem_mmap_gtt { __u64 offset; }; +/** + * struct drm_i915_gem_mmap_offset - Retrieve an offset so we can mmap this buffer object. + * + * This struct is passed as argument to the `DRM_IOCTL_I915_GEM_MMAP_OFFSET` ioctl, + * and is used to retrieve the fake offset to mmap an object specified by &handle. + * + * The legacy way of using `DRM_IOCTL_I915_GEM_MMAP` is removed on gen12+. + * `DRM_IOCTL_I915_GEM_MMAP_GTT` is an older supported alias to this struct, but will behave + * as setting the &extensions to 0, and &flags to `I915_MMAP_OFFSET_GTT`. + */ struct drm_i915_gem_mmap_offset { - /** Handle for the object being mapped. */ + /** @handle: Handle for the object being mapped. */ __u32 handle; + /** @pad: Must be zero */ __u32 pad; /** - * Fake offset to use for subsequent mmap call + * @offset: The fake offset to use for subsequent mmap call * * This is a fixed-size type for 32/64 compatibility. */ __u64 offset; /** - * Flags for extended behaviour. + * @flags: Flags for extended behaviour. * - * It is mandatory that one of the MMAP_OFFSET types - * (GTT, WC, WB, UC, etc) should be included. + * It is mandatory that one of the `MMAP_OFFSET` types + * should be included: + * + * - `I915_MMAP_OFFSET_GTT`: Use mmap with the object bound to GTT. (Write-Combined) + * - `I915_MMAP_OFFSET_WC`: Use Write-Combined caching. + * - `I915_MMAP_OFFSET_WB`: Use Write-Back caching. + * - `I915_MMAP_OFFSET_FIXED`: Use object placement to determine caching. + * + * On devices with local memory `I915_MMAP_OFFSET_FIXED` is the only valid + * type. On devices without local memory, this caching mode is invalid. + * + * As caching mode when specifying `I915_MMAP_OFFSET_FIXED`, WC or WB will + * be used, depending on the object placement on creation. WB will be used + * when the object can only exist in system memory, WC otherwise. */ __u64 flags; -#define I915_MMAP_OFFSET_GTT 0 -#define I915_MMAP_OFFSET_WC 1 -#define I915_MMAP_OFFSET_WB 2 -#define I915_MMAP_OFFSET_UC 3 - /* - * Zero-terminated chain of extensions. +#define I915_MMAP_OFFSET_GTT 0 +#define I915_MMAP_OFFSET_WC 1 +#define I915_MMAP_OFFSET_WB 2 +#define I915_MMAP_OFFSET_UC 3 +#define I915_MMAP_OFFSET_FIXED 4 + + /** + * @extensions: Zero-terminated chain of extensions. * * No current extensions defined; mbz. */ -- cgit v1.2.3 From 81340cf3bddded4fe23a55148152e6d5e2460351 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 15 Jul 2021 11:15:36 +0100 Subject: drm/i915/uapi: reject set_domain for discrete MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The CPU domain should be static for discrete, and on DG1 we don't need any flushing since everything is already coherent, so really all this does is an object wait, for which we have an ioctl. Longer term the desired caching should be an immutable creation time property for the BO, which can be set with something like gem_create_ext. One other user is iris + userptr, which uses the set_domain to probe all the pages to check if the GUP succeeds, however we now have a PROBE flag for this purpose. v2: add some more kernel doc, also add the implicit rules with caching Suggested-by: Daniel Vetter Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Maarten Lankhorst Cc: Tvrtko Ursulin Cc: Jordan Justen Cc: Kenneth Graunke Cc: Jason Ekstrand Cc: Daniel Vetter Cc: Ramalingam C Reviewed-by: Ramalingam C Acked-by: Kenneth Graunke Link: https://patchwork.freedesktop.org/patch/msgid/20210715101536.2606307-5-matthew.auld@intel.com --- include/uapi/drm/i915_drm.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 0aea82657cdc..975087553ea0 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -923,6 +923,25 @@ struct drm_i915_gem_mmap_offset { * - I915_GEM_DOMAIN_GTT: Mappable aperture domain * * All other domains are rejected. + * + * Note that for discrete, starting from DG1, this is no longer supported, and + * is instead rejected. On such platforms the CPU domain is effectively static, + * where we also only support a single &drm_i915_gem_mmap_offset cache mode, + * which can't be set explicitly and instead depends on the object placements, + * as per the below. + * + * Implicit caching rules, starting from DG1: + * + * - If any of the object placements (see &drm_i915_gem_create_ext_memory_regions) + * contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and + * mapped as write-combined only. + * + * - Everything else is always allocated and mapped as write-back, with the + * guarantee that everything is also coherent with the GPU. + * + * Note that this is likely to change in the future again, where we might need + * more flexibility on future devices, so making this all explicit as part of a + * new &drm_i915_gem_create_ext extension is probable. */ struct drm_i915_gem_set_domain { /** @handle: Handle for the object. */ -- cgit v1.2.3 From ee242ca704d386991d7ece0c46134e211d52412b Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 26 Jul 2021 17:23:47 -0700 Subject: drm/i915/guc: Implement GuC priority management Implement a simple static mapping algorithm of the i915 priority levels (int, -1k to 1k exposed to user) to the 4 GuC levels. Mapping is as follows: i915 level < 0 -> GuC low level (3) i915 level == 0 -> GuC normal level (2) i915 level < INT_MAX -> GuC high level (1) i915 level == INT_MAX -> GuC highest level (0) We believe this mapping should cover the UMD use cases (3 distinct user levels + 1 kernel level). In addition to static mapping, a simple counter system is attached to each context tracking the number of requests inflight on the context at each level. This is needed as the GuC levels are per context while in the i915 levels are per request. v2: (Daniele) - Add BUILD_BUG_ON to enforce ordering of priority levels - Add missing lockdep to guc_prio_fini - Check for return before setting context registered flag - Map DISPLAY priority or higher to highest guc prio - Update comment for guc_prio Signed-off-by: Matthew Brost Cc: Daniele Ceraolo Spurio Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-33-matthew.brost@intel.com --- include/uapi/drm/i915_drm.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 975087553ea0..7f13d241417f 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -572,6 +572,15 @@ typedef struct drm_i915_irq_wait { #define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2) #define I915_SCHEDULER_CAP_SEMAPHORES (1ul << 3) #define I915_SCHEDULER_CAP_ENGINE_BUSY_STATS (1ul << 4) +/* + * Indicates the 2k user priority levels are statically mapped into 3 buckets as + * follows: + * + * -1k to -1 Low priority + * 0 Normal priority + * 1 to 1k Highest priority + */ +#define I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP (1ul << 5) #define I915_PARAM_HUC_STATUS 42 -- cgit v1.2.3 From b65a9489730a2494f7a2a33a6eb0a12b8f1dd193 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 23 Jul 2021 12:34:05 +0100 Subject: drm/i915/userptr: Probe existence of backing struct pages upon creation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Jason Ekstrand requested a more efficient method than userptr+set-domain to determine if the userptr object was backed by a complete set of pages upon creation. To be more efficient than simply populating the userptr using get_user_pages() (as done by the call to set-domain or execbuf), we can walk the tree of vm_area_struct and check for gaps or vma not backed by struct page (VM_PFNMAP). The question is how to handle VM_MIXEDMAP which may be either struct page or pfn backed... With discrete we are going to drop support for set_domain(), so offering a way to probe the pages, without having to resort to dummy batches has been requested. v2: - add new query param for the PROBE flag, so userspace can easily check if the kernel supports it(Jason). - use mmap_read_{lock, unlock}. - add some kernel-doc. v3: - In the docs also mention that PROBE doesn't guarantee that the pages will remain valid by the time they are actually used(Tvrtko). - Add a small comment for the hole finding logic(Jason). - Move the param next to all the other params which just return true. Testcase: igt/gem_userptr_blits/probe Signed-off-by: Chris Wilson Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Maarten Lankhorst Cc: Tvrtko Ursulin Cc: Jordan Justen Cc: Kenneth Graunke Cc: Jason Ekstrand Cc: Daniel Vetter Cc: Ramalingam C Reviewed-by: Tvrtko Ursulin Acked-by: Kenneth Graunke Reviewed-by: Jason Ekstrand Acked-by: Maarten Lankhorst Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20210723113405.427004-1-matthew.auld@intel.com --- include/uapi/drm/i915_drm.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 7f13d241417f..bde5860b3686 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -683,6 +683,9 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55 +/* Query if the kernel supports the I915_USERPTR_PROBE flag. */ +#define I915_PARAM_HAS_USERPTR_PROBE 56 + /* Must be kept compact -- no holes and well documented */ typedef struct drm_i915_getparam { @@ -2231,12 +2234,29 @@ struct drm_i915_gem_userptr { * through the GTT. If the HW can't support readonly access, an error is * returned. * + * I915_USERPTR_PROBE: + * + * Probe the provided @user_ptr range and validate that the @user_ptr is + * indeed pointing to normal memory and that the range is also valid. + * For example if some garbage address is given to the kernel, then this + * should complain. + * + * Returns -EFAULT if the probe failed. + * + * Note that this doesn't populate the backing pages, and also doesn't + * guarantee that the object will remain valid when the object is + * eventually used. + * + * The kernel supports this feature if I915_PARAM_HAS_USERPTR_PROBE + * returns a non-zero value. + * * I915_USERPTR_UNSYNCHRONIZED: * * NOT USED. Setting this flag will result in an error. */ __u32 flags; #define I915_USERPTR_READ_ONLY 0x1 +#define I915_USERPTR_PROBE 0x2 #define I915_USERPTR_UNSYNCHRONIZED 0x80000000 /** * @handle: Returned handle for the object. -- cgit v1.2.3