From 7f3f317a66cac307f6fbc1b5dd74902fb1b48860 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 21 May 2019 22:11:25 +0100 Subject: drm/i915: Restore control over ppgtt for context creation ABI Having hid the partially exposed new ABI from the PR, put it back again for completion of context recovery. A significant part of context recovery is the ability to reuse as much of the old context as is feasible (to avoid expensive reconstruction). The biggest chunk kept hidden at the moment is fine-control over the ctx->ppgtt (the GPU page tables and associated translation tables and kernel maps), so make control over the ctx->ppgtt explicit. This allows userspace to create and share virtual memory address spaces (within the limits of a single fd) between contexts they own, along with the ability to query the contexts for the vm state. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190521211134.16117-1-chris@chris-wilson.co.uk --- include/uapi/drm/i915_drm.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 3a73f5316766..d6ad4a15b2b9 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -355,6 +355,8 @@ typedef struct _drm_i915_sarea { #define DRM_I915_PERF_ADD_CONFIG 0x37 #define DRM_I915_PERF_REMOVE_CONFIG 0x38 #define DRM_I915_QUERY 0x39 +#define DRM_I915_GEM_VM_CREATE 0x3a +#define DRM_I915_GEM_VM_DESTROY 0x3b /* Must be kept compact -- no holes */ #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) @@ -415,6 +417,8 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_PERF_ADD_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config) #define DRM_IOCTL_I915_PERF_REMOVE_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64) #define DRM_IOCTL_I915_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_QUERY, struct drm_i915_query) +#define DRM_IOCTL_I915_GEM_VM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_VM_CREATE, struct drm_i915_gem_vm_control) +#define DRM_IOCTL_I915_GEM_VM_DESTROY DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_VM_DESTROY, struct drm_i915_gem_vm_control) /* Allow drivers to submit batchbuffers directly to hardware, relying * on the security mechanisms provided by hardware. @@ -1507,6 +1511,17 @@ struct drm_i915_gem_context_param { * On creation, all new contexts are marked as recoverable. */ #define I915_CONTEXT_PARAM_RECOVERABLE 0x8 + + /* + * The id of the associated virtual memory address space (ppGTT) of + * this context. Can be retrieved and passed to another context + * (on the same fd) for both to use the same ppGTT and so share + * address layouts, and avoid reloading the page tables on context + * switches between themselves. + * + * See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY. + */ +#define I915_CONTEXT_PARAM_VM 0x9 /* Must be kept compact -- no holes and well documented */ __u64 value; -- cgit v1.2.3 From 976b55f0e1db5cb8fccb0a42f68ea77ae42604a6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 21 May 2019 22:11:26 +0100 Subject: drm/i915: Allow a context to define its set of engines Over the last few years, we have debated how to extend the user API to support an increase in the number of engines, that may be sparse and even be heterogeneous within a class (not all video decoders created equal). We settled on using (class, instance) tuples to identify a specific engine, with an API for the user to construct a map of engines to capabilities. Into this picture, we then add a challenge of virtual engines; one user engine that maps behind the scenes to any number of physical engines. To keep it general, we want the user to have full control over that mapping. To that end, we allow the user to constrain a context to define the set of engines that it can access, order fully controlled by the user via (class, instance). With such precise control in context setup, we can continue to use the existing execbuf uABI of specifying a single index; only now it doesn't automagically map onto the engines, it uses the user defined engine map from the context. v2: Fixup freeing of local on success of get_engines() v3: Allow empty engines[] v4: s/nengine/num_engines/ v5: Replace 64 limit on num_engines with a note that execbuf is currently limited to only using the first 64 engines. v6: Actually use the engines_mutex to guard the ctx->engines. Testcase: igt/gem_ctx_engines Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190521211134.16117-2-chris@chris-wilson.co.uk --- include/uapi/drm/i915_drm.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index d6ad4a15b2b9..8e1bb22926e4 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -136,6 +136,7 @@ enum drm_i915_gem_engine_class { struct i915_engine_class_instance { __u16 engine_class; /* see enum drm_i915_gem_engine_class */ __u16 engine_instance; +#define I915_ENGINE_CLASS_INVALID_NONE -1 }; /** @@ -1522,6 +1523,26 @@ struct drm_i915_gem_context_param { * See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY. */ #define I915_CONTEXT_PARAM_VM 0x9 + +/* + * I915_CONTEXT_PARAM_ENGINES: + * + * Bind this context to operate on this subset of available engines. Henceforth, + * the I915_EXEC_RING selector for DRM_IOCTL_I915_GEM_EXECBUFFER2 operates as + * an index into this array of engines; I915_EXEC_DEFAULT selecting engine[0] + * and upwards. Slots 0...N are filled in using the specified (class, instance). + * Use + * engine_class: I915_ENGINE_CLASS_INVALID, + * engine_instance: I915_ENGINE_CLASS_INVALID_NONE + * to specify a gap in the array that can be filled in later, e.g. by a + * virtual engine used for load balancing. + * + * Setting the number of engines bound to the context to 0, by passing a zero + * sized argument, will revert back to default settings. + * + * See struct i915_context_param_engines. + */ +#define I915_CONTEXT_PARAM_ENGINES 0xa /* Must be kept compact -- no holes and well documented */ __u64 value; @@ -1585,6 +1606,16 @@ struct drm_i915_gem_context_param_sseu { __u32 rsvd; }; +struct i915_context_param_engines { + __u64 extensions; /* linked chain of extension blocks, 0 terminates */ + struct i915_engine_class_instance engines[0]; +} __attribute__((packed)); + +#define I915_DEFINE_CONTEXT_PARAM_ENGINES(name__, N__) struct { \ + __u64 extensions; \ + struct i915_engine_class_instance engines[N__]; \ +} __attribute__((packed)) name__ + struct drm_i915_gem_context_create_ext_setparam { #define I915_CONTEXT_CREATE_EXT_SETPARAM 0 struct i915_user_extension base; -- cgit v1.2.3 From e620f7b3a26389dfce2663ad4e64c2271ad1a815 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 21 May 2019 22:11:27 +0100 Subject: drm/i915: Extend I915_CONTEXT_PARAM_SSEU to support local ctx->engine[] Allow the user to specify a local engine index (as opposed to class:index) that they can use to refer to a preset engine inside the ctx->engine[] array defined by an earlier I915_CONTEXT_PARAM_ENGINES. This will be useful for setting SSEU parameters on virtual engines that are local to the context and do not have a valid global class:instance lookup. Note that due to the ambiguity in using class:instance with ctx->engines[], if a user supplied engine map is active the user must specify the engine to alter by its index into the ctx->engines[]. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190521211134.16117-3-chris@chris-wilson.co.uk --- include/uapi/drm/i915_drm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 8e1bb22926e4..82bd488ed0d1 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1576,9 +1576,10 @@ struct drm_i915_gem_context_param_sseu { struct i915_engine_class_instance engine; /* - * Unused for now. Must be cleared to zero. + * Unknown flags must be cleared to zero. */ __u32 flags; +#define I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX (1u << 0) /* * Mask of slices to enable for the context. Valid values are a subset -- cgit v1.2.3 From 8319f44c0525708c26ac7724da897cff3dbb0f84 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 21 May 2019 22:11:28 +0100 Subject: drm/i915: Re-expose SINGLE_TIMELINE flags for context creation The SINGLE_TIMELINE flag can be used to create a context such that all engine instances within that context share a common timeline. This can be useful for mixing operations between real and virtual engines, or when using a composite context for a single client API context. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190521211134.16117-4-chris@chris-wilson.co.uk --- include/uapi/drm/i915_drm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 82bd488ed0d1..957ba8e60e02 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1469,8 +1469,9 @@ struct drm_i915_gem_context_create_ext { __u32 ctx_id; /* output: id of new context*/ __u32 flags; #define I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS (1u << 0) +#define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE (1u << 1) #define I915_CONTEXT_CREATE_FLAGS_UNKNOWN \ - (-(I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS << 1)) + (-(I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE << 1)) __u64 extensions; }; -- cgit v1.2.3 From b81dde719439c8f09bb61e742ed95bfc4b33946b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 21 May 2019 22:11:29 +0100 Subject: drm/i915: Allow userspace to clone contexts on creation A usecase arose out of handling context recovery in mesa, whereby they wish to recreate a context with fresh logical state but preserving all other details of the original. Currently, they create a new context and iterate over which bits they want to copy across, but it would much more convenient if they were able to just pass in a target context to clone during creation. This essentially extends the setparam during creation to pull the details from a target context instead of the user supplied parameters. The ideal here is that we don't expose control over anything more than can be obtained via CONTEXT_PARAM. That is userspace retains explicit control over all features, and this api is just convenience. For example, you could replace struct context_param p = { .param = CONTEXT_PARAM_VM }; param.ctx_id = old_id; gem_context_get_param(&p.param); new_id = gem_context_create(); param.ctx_id = new_id; gem_context_set_param(&p.param); gem_vm_destroy(param.value); /* drop the ref to VM_ID handle */ with struct create_ext_param p = { { .name = CONTEXT_CREATE_CLONE }, .clone_id = old_id, .flags = CLONE_FLAGS_VM } new_id = gem_context_create_ext(&p); and not have to worry about stray namespace pollution etc. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190521211134.16117-5-chris@chris-wilson.co.uk --- include/uapi/drm/i915_drm.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 957ba8e60e02..62396d575e28 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1624,6 +1624,21 @@ struct drm_i915_gem_context_create_ext_setparam { struct drm_i915_gem_context_param param; }; +struct drm_i915_gem_context_create_ext_clone { +#define I915_CONTEXT_CREATE_EXT_CLONE 1 + struct i915_user_extension base; + __u32 clone_id; + __u32 flags; +#define I915_CONTEXT_CLONE_ENGINES (1u << 0) +#define I915_CONTEXT_CLONE_FLAGS (1u << 1) +#define I915_CONTEXT_CLONE_SCHEDATTR (1u << 2) +#define I915_CONTEXT_CLONE_SSEU (1u << 3) +#define I915_CONTEXT_CLONE_TIMELINE (1u << 4) +#define I915_CONTEXT_CLONE_VM (1u << 5) +#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_VM << 1) + __u64 rsvd; +}; + struct drm_i915_gem_context_destroy { __u32 ctx_id; __u32 pad; -- cgit v1.2.3 From 6d06779e86724322d79eb53b26989edd9db188f6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 21 May 2019 22:11:30 +0100 Subject: drm/i915: Load balancing across a virtual engine Having allowed the user to define a set of engines that they will want to only use, we go one step further and allow them to bind those engines into a single virtual instance. Submitting a batch to the virtual engine will then forward it to any one of the set in a manner as best to distribute load. The virtual engine has a single timeline across all engines (it operates as a single queue), so it is not able to concurrently run batches across multiple engines by itself; that is left up to the user to submit multiple concurrent batches to multiple queues. Multiple users will be load balanced across the system. The mechanism used for load balancing in this patch is a late greedy balancer. When a request is ready for execution, it is added to each engine's queue, and when an engine is ready for its next request it claims it from the virtual engine. The first engine to do so, wins, i.e. the request is executed at the earliest opportunity (idle moment) in the system. As not all HW is created equal, the user is still able to skip the virtual engine and execute the batch on a specific engine, all within the same queue. It will then be executed in order on the correct engine, with execution on other virtual engines being moved away due to the load detection. A couple of areas for potential improvement left! - The virtual engine always take priority over equal-priority tasks. Mostly broken up by applying FQ_CODEL rules for prioritising new clients, and hopefully the virtual and real engines are not then congested (i.e. all work is via virtual engines, or all work is to the real engine). - We require the breadcrumb irq around every virtual engine request. For normal engines, we eliminate the need for the slow round trip via interrupt by using the submit fence and queueing in order. For virtual engines, we have to allow any job to transfer to a new ring, and cannot coalesce the submissions, so require the completion fence instead, forcing the persistent use of interrupts. - We only drip feed single requests through each virtual engine and onto the physical engines, even if there was enough work to fill all ELSP, leaving small stalls with an idle CS event at the end of every request. Could we be greedy and fill both slots? Being lazy is virtuous for load distribution on less-than-full workloads though. Other areas of improvement are more general, such as reducing lock contention, reducing dispatch overhead, looking at direct submission rather than bouncing around tasklets etc. sseu: Lift the restriction to allow sseu to be reconfigured on virtual engines composed of RENDER_CLASS (rcs). v2: macroize check_user_mbz() v3: Cancel virtual engines on wedging v4: Commence commenting v5: Replace 64b sibling_mask with a list of class:instance v6: Drop the one-element array in the uabi v7: Assert it is an virtual engine in to_virtual_engine() v8: Skip over holes in [class][inst] so we can selftest with (vcs0, vcs2) Link: https://github.com/intel/media-driver/pull/283 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190521211134.16117-6-chris@chris-wilson.co.uk --- include/uapi/drm/i915_drm.h | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 62396d575e28..f9770948161c 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -137,6 +137,7 @@ struct i915_engine_class_instance { __u16 engine_class; /* see enum drm_i915_gem_engine_class */ __u16 engine_instance; #define I915_ENGINE_CLASS_INVALID_NONE -1 +#define I915_ENGINE_CLASS_INVALID_VIRTUAL -2 }; /** @@ -1608,8 +1609,46 @@ struct drm_i915_gem_context_param_sseu { __u32 rsvd; }; +/* + * i915_context_engines_load_balance: + * + * Enable load balancing across this set of engines. + * + * Into the I915_EXEC_DEFAULT slot [0], a virtual engine is created that when + * used will proxy the execbuffer request onto one of the set of engines + * in such a way as to distribute the load evenly across the set. + * + * The set of engines must be compatible (e.g. the same HW class) as they + * will share the same logical GPU context and ring. + * + * To intermix rendering with the virtual engine and direct rendering onto + * the backing engines (bypassing the load balancing proxy), the context must + * be defined to use a single timeline for all engines. + */ +struct i915_context_engines_load_balance { + struct i915_user_extension base; + + __u16 engine_index; + __u16 num_siblings; + __u32 flags; /* all undefined flags must be zero */ + + __u64 mbz64; /* reserved for future use; must be zero */ + + struct i915_engine_class_instance engines[0]; +} __attribute__((packed)); + +#define I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(name__, N__) struct { \ + struct i915_user_extension base; \ + __u16 engine_index; \ + __u16 num_siblings; \ + __u32 flags; \ + __u64 mbz64; \ + struct i915_engine_class_instance engines[N__]; \ +} __attribute__((packed)) name__ + struct i915_context_param_engines { __u64 extensions; /* linked chain of extension blocks, 0 terminates */ +#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */ struct i915_engine_class_instance engines[0]; } __attribute__((packed)); -- cgit v1.2.3 From ee1136908e9b28173f9794be25465a13b2bb9b18 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 21 May 2019 22:11:33 +0100 Subject: drm/i915/execlists: Virtual engine bonding Some users require that when a master batch is executed on one particular engine, a companion batch is run simultaneously on a specific slave engine. For this purpose, we introduce virtual engine bonding, allowing maps of master:slaves to be constructed to constrain which physical engines a virtual engine may select given a fence on a master engine. For the moment, we continue to ignore the issue of preemption deferring the master request for later. Ideally, we would like to then also remove the slave and run something else rather than have it stall the pipeline. With load balancing, we should be able to move workload around it, but there is a similar stall on the master pipeline while it may wait for the slave to be executed. At the cost of more latency for the bonded request, it may be interesting to launch both on their engines in lockstep. (Bubbles abound.) Opens: Also what about bonding an engine as its own master? It doesn't break anything internally, so allow the silliness. v2: Emancipate the bonds v3: Couple in delayed scheduling for the selftests v4: Handle invalid mutually exclusive bonding v5: Mention what the uapi does v6: s/nbond/num_bonds/ Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190521211134.16117-9-chris@chris-wilson.co.uk --- include/uapi/drm/i915_drm.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index f9770948161c..e2da9027bcdf 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1543,6 +1543,10 @@ struct drm_i915_gem_context_param { * sized argument, will revert back to default settings. * * See struct i915_context_param_engines. + * + * Extensions: + * i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE) + * i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND) */ #define I915_CONTEXT_PARAM_ENGINES 0xa /* Must be kept compact -- no holes and well documented */ @@ -1646,9 +1650,49 @@ struct i915_context_engines_load_balance { struct i915_engine_class_instance engines[N__]; \ } __attribute__((packed)) name__ +/* + * i915_context_engines_bond: + * + * Constructed bonded pairs for execution within a virtual engine. + * + * All engines are equal, but some are more equal than others. Given + * the distribution of resources in the HW, it may be preferable to run + * a request on a given subset of engines in parallel to a request on a + * specific engine. We enable this selection of engines within a virtual + * engine by specifying bonding pairs, for any given master engine we will + * only execute on one of the corresponding siblings within the virtual engine. + * + * To execute a request in parallel on the master engine and a sibling requires + * coordination with a I915_EXEC_FENCE_SUBMIT. + */ +struct i915_context_engines_bond { + struct i915_user_extension base; + + struct i915_engine_class_instance master; + + __u16 virtual_index; /* index of virtual engine in ctx->engines[] */ + __u16 num_bonds; + + __u64 flags; /* all undefined flags must be zero */ + __u64 mbz64[4]; /* reserved for future use; must be zero */ + + struct i915_engine_class_instance engines[0]; +} __attribute__((packed)); + +#define I915_DEFINE_CONTEXT_ENGINES_BOND(name__, N__) struct { \ + struct i915_user_extension base; \ + struct i915_engine_class_instance master; \ + __u16 virtual_index; \ + __u16 num_bonds; \ + __u64 flags; \ + __u64 mbz64[4]; \ + struct i915_engine_class_instance engines[N__]; \ +} __attribute__((packed)) name__ + struct i915_context_param_engines { __u64 extensions; /* linked chain of extension blocks, 0 terminates */ #define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */ +#define I915_CONTEXT_ENGINES_EXT_BOND 1 /* see i915_context_engines_bond */ struct i915_engine_class_instance engines[0]; } __attribute__((packed)); -- cgit v1.2.3 From a88b6e4cbafd6f23b3450c087acdbe23d90e7606 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 21 May 2019 22:11:34 +0100 Subject: drm/i915: Allow specification of parallel execbuf There is a desire to split a task onto two engines and have them run at the same time, e.g. scanline interleaving to spread the workload evenly. Through the use of the out-fence from the first execbuf, we can coordinate secondary execbuf to only become ready simultaneously with the first, so that with all things idle the second execbufs are executed in parallel with the first. The key difference here between the new EXEC_FENCE_SUBMIT and the existing EXEC_FENCE_IN is that the in-fence waits for the completion of the first request (so that all of its rendering results are visible to the second execbuf, the more common userspace fence requirement). Since we only have a single input fence slot, userspace cannot mix an in-fence and a submit-fence. It has to use one or the other! This is not such a harsh requirement, since by virtue of the submit-fence, the secondary execbuf inherit all of the dependencies from the first request, and for the application the dependencies should be common between the primary and secondary execbuf. Suggested-by: Tvrtko Ursulin Testcase: igt/gem_exec_fence/parallel Link: https://github.com/intel/media-driver/pull/546 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190521211134.16117-10-chris@chris-wilson.co.uk --- include/uapi/drm/i915_drm.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index e2da9027bcdf..bdb00ec1f8be 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -604,6 +604,12 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_MMAP_GTT_COHERENT 52 +/* + * Query whether DRM_I915_GEM_EXECBUFFER2 supports coordination of parallel + * execution through use of explicit fence support. + * See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT. + */ +#define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 53 /* Must be kept compact -- no holes and well documented */ typedef struct drm_i915_getparam { @@ -1126,7 +1132,16 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_FENCE_ARRAY (1<<19) -#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1)) +/* + * Setting I915_EXEC_FENCE_SUBMIT implies that lower_32_bits(rsvd2) represent + * a sync_file fd to wait upon (in a nonblocking manner) prior to executing + * the batch. + * + * Returns -EINVAL if the sync_file fd cannot be found. + */ +#define I915_EXEC_FENCE_SUBMIT (1 << 20) + +#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SUBMIT << 1)) #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ -- cgit v1.2.3 From c5d3e39caa456b1e061644b739131f2b54c84c08 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 22 May 2019 10:00:54 +0100 Subject: drm/i915: Engine discovery query Engine discovery query allows userspace to enumerate engines, probe their configuration features, all without needing to maintain the internal PCI ID based database. A new query for the generic i915 query ioctl is added named DRM_I915_QUERY_ENGINE_INFO, together with accompanying structure drm_i915_query_engine_info. The address of latter should be passed to the kernel in the query.data_ptr field, and should be large enough for the kernel to fill out all known engines as struct drm_i915_engine_info elements trailing the query. As with other queries, setting the item query length to zero allows userspace to query minimum required buffer size. Enumerated engines have common type mask which can be used to query all hardware engines, versus engines userspace can submit to using the execbuf uAPI. Engines also have capabilities which are per engine class namespace of bits describing features not present on all engine instances. v2: * Fixed HEVC assignment. * Reorder some fields, rename type to flags, increase width. (Lionel) * No need to allocate temporary storage if we do it engine by engine. (Lionel) v3: * Describe engine flags and mark mbz fields. (Lionel) * HEVC only applies to VCS. v4: * Squash SFC flag into main patch. * Tidy some comments. v5: * Add uabi_ prefix to engine capabilities. (Chris Wilson) * Report exact size of engine info array. (Chris Wilson) * Drop the engine flags. (Joonas Lahtinen) * Added some more reserved fields. * Move flags after class/instance. v6: * Do not check engine info array was zeroed by userspace but zero the unused fields for them instead. v7: * Simplify length calculation loop. (Lionel Landwerlin) v8: * Remove MBZ comments where not applicable. * Rename ABI flags to match engine class define naming. * Rename SFC ABI flag to reflect it applies to VCS and VECS. * SFC is wired to even _logical_ engine instances. * SFC applies to VCS and VECS. * HEVC is present on all instances on Gen11. (Tony) * Simplify length calculation even more. (Chris Wilson) * Move info_ptr assigment closer to loop for clarity. (Chris Wilson) * Use vdbox_sfc_access from runtime info. * Rebase for RUNTIME_INFO. * Refactor for lower indentation. * Rename uAPI class/instance to engine_class/instance to avoid C++ keyword. v9: * Rebase for s/num_rings/num_engines/ in RUNTIME_INFO. v10: * Use new copy_query_item. v11: * Consolidate with struct i915_engine_class_instnace. Signed-off-by: Tvrtko Ursulin Cc: Chris Wilson Cc: Jon Bloomfield Cc: Dmitry Rogozhkin Cc: Lionel Landwerlin Cc: Joonas Lahtinen Cc: Tony Ye Reviewed-by: Lionel Landwerlin # v7 Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190522090054.6007-1-tvrtko.ursulin@linux.intel.com --- include/uapi/drm/i915_drm.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index bdb00ec1f8be..328d05e77d9f 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1982,6 +1982,7 @@ struct drm_i915_perf_oa_config { struct drm_i915_query_item { __u64 query_id; #define DRM_I915_QUERY_TOPOLOGY_INFO 1 +#define DRM_I915_QUERY_ENGINE_INFO 2 /* Must be kept compact -- no holes and well documented */ /* @@ -2080,6 +2081,47 @@ struct drm_i915_query_topology_info { __u8 data[]; }; +/** + * struct drm_i915_engine_info + * + * Describes one engine and it's capabilities as known to the driver. + */ +struct drm_i915_engine_info { + /** Engine class and instance. */ + struct i915_engine_class_instance engine; + + /** Reserved field. */ + __u32 rsvd0; + + /** Engine flags. */ + __u64 flags; + + /** Capabilities of this engine. */ + __u64 capabilities; +#define I915_VIDEO_CLASS_CAPABILITY_HEVC (1 << 0) +#define I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC (1 << 1) + + /** Reserved fields. */ + __u64 rsvd1[4]; +}; + +/** + * struct drm_i915_query_engine_info + * + * Engine info query enumerates all engines known to the driver by filling in + * an array of struct drm_i915_engine_info structures. + */ +struct drm_i915_query_engine_info { + /** Number of struct drm_i915_engine_info structs following. */ + __u32 num_engines; + + /** MBZ */ + __u32 rsvd[3]; + + /** Marker for drm_i915_engine_info structures. */ + struct drm_i915_engine_info engines[]; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3