From 3affaa5a7ca3ca114e01049bc45e3ed4a3955505 Mon Sep 17 00:00:00 2001 From: Brian Starkey Date: Mon, 3 Dec 2018 11:31:57 +0000 Subject: drm/afbc: Add AFBC modifier usage documentation AFBC is a flexible, proprietary, lossless compression protocol and format, with a number of defined DRM format modifiers. To facilitate consistency and compatibility between different AFBC producers and consumers, document the expectations for usage of the AFBC DRM format modifiers in a new .rst chapter. Signed-off-by: Brian Starkey Reviewed-by: Liviu Dudau [Updated MAINTAINERS entry to show that "Mali DP Maintainers" is actually a mailing list and added an SPDX-License-Identifier to the documentation] Signed-off-by: Liviu Dudau --- include/uapi/drm/drm_fourcc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 0b44260a5ee9..df014ede4e57 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -572,6 +572,9 @@ extern "C" { * AFBC has several features which may be supported and/or used, which are * represented using bits in the modifier. Not all combinations are valid, * and different devices or use-cases may support different combinations. + * + * Further information on the use of AFBC modifiers can be found in + * Documentation/gpu/afbc.rst */ #define DRM_FORMAT_MOD_ARM_AFBC(__afbc_mode) fourcc_mod_code(ARM, __afbc_mode) -- cgit v1.2.3 From 03ca3cf8e9aa7549e6c398462af0f68bdd43e7fe Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 17 Jan 2019 21:59:43 -0800 Subject: drm/i915/icl: Adding few more device IDs for Ice Lake MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We just got aware that there was more IDs available at spec, so let's add them already. Cc: James Ausmus Cc: José Roberto de Souza Signed-off-by: Rodrigo Vivi Reviewed-by: Mika Kuoppala Reviewed-by: José Roberto de Souza Signed-off-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190118055943.10252-1-rodrigo.vivi@intel.com --- include/drm/i915_pciids.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index 192667144693..df72be7e8b88 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -457,9 +457,13 @@ INTEL_VGA_DEVICE(0x8A51, info), \ INTEL_VGA_DEVICE(0x8A5C, info), \ INTEL_VGA_DEVICE(0x8A5D, info), \ + INTEL_VGA_DEVICE(0x8A59, info), \ + INTEL_VGA_DEVICE(0x8A58, info), \ INTEL_VGA_DEVICE(0x8A52, info), \ INTEL_VGA_DEVICE(0x8A5A, info), \ INTEL_VGA_DEVICE(0x8A5B, info), \ + INTEL_VGA_DEVICE(0x8A57, info), \ + INTEL_VGA_DEVICE(0x8A56, info), \ INTEL_VGA_DEVICE(0x8A71, info), \ INTEL_VGA_DEVICE(0x8A70, info) -- cgit v1.2.3 From 3c8861d84a4d2c6cd7221d18e49bf9201c6c6115 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 17 Dec 2018 14:44:14 -0800 Subject: drm: Add color management LUT validation helper (v4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some hardware may place additional restrictions on the gamma/degamma curves described by our LUT properties. E.g., that a gamma curve never decreases or that the red/green/blue channels of a LUT's entries must be equal. Let's add a helper function that drivers can use to test that a userspace-provided LUT is valid and doesn't violate hardware requirements. v2: - Combine into a single helper that just takes a bitmask of the tests to apply. (Brian Starkey) - Add additional check (always performed) that LUT property blob size is always a multiple of the LUT entry size. (stolen from ARM driver) v3: - Drop the LUT size check again since drm_atomic_replace_property_blob_from_id() already covers this for us. (Alexandru Gheorghe) v4: - Use an enum to describe possible test values rather than #define's; this is cleaner to provide kerneldoc for. (Daniel Vetter) - s/DRM_COLOR_LUT_INCREASING/DRM_COLOR_LUT_NON_DECREASING/. (Ville) Cc: Uma Shankar Cc: Swati Sharma Cc: Brian Starkey Cc: Daniel Vetter Cc: Ville Syrjälä Signed-off-by: Matt Roper Reviewed-by: Brian Starkey Reviewed-by: Alexandru Gheorghe Reviewed-by: Uma Shankar Acked-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20181217224415.12848-1-matthew.d.roper@intel.com --- include/drm/drm_color_mgmt.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include') diff --git a/include/drm/drm_color_mgmt.h b/include/drm/drm_color_mgmt.h index 90ef9996d9a4..6affbda6d9cb 100644 --- a/include/drm/drm_color_mgmt.h +++ b/include/drm/drm_color_mgmt.h @@ -69,4 +69,33 @@ int drm_plane_create_color_properties(struct drm_plane *plane, u32 supported_ranges, enum drm_color_encoding default_encoding, enum drm_color_range default_range); + +/** + * enum drm_color_lut_tests - hw-specific LUT tests to perform + * + * The drm_color_lut_check() function takes a bitmask of the values here to + * determine which tests to apply to a userspace-provided LUT. + */ +enum drm_color_lut_tests { + /** + * @DRM_COLOR_LUT_EQUAL_CHANNELS: + * + * Checks whether the entries of a LUT all have equal values for the + * red, green, and blue channels. Intended for hardware that only + * accepts a single value per LUT entry and assumes that value applies + * to all three color components. + */ + DRM_COLOR_LUT_EQUAL_CHANNELS = BIT(0), + + /** + * @DRM_COLOR_LUT_NON_DECREASING: + * + * Checks whether the entries of a LUT are always flat or increasing + * (never decreasing). + */ + DRM_COLOR_LUT_NON_DECREASING = BIT(1), +}; + +int drm_color_lut_check(struct drm_property_blob *lut, + uint32_t tests); #endif -- cgit v1.2.3 From ae6d343541bb75958e9535d056adaf4ff6a66d6a Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Thu, 10 Jan 2019 17:56:39 +0800 Subject: drm/ttm: add lru notify to bo driver v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit allow driver do somethings when lru changed. v2: address Michel's comments. Signed-off-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/drm/ttm/ttm_bo_driver.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 1021106438b2..15829b24277c 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -381,6 +381,15 @@ struct ttm_bo_driver { */ int (*access_memory)(struct ttm_buffer_object *bo, unsigned long offset, void *buf, int len, int write); + + /** + * struct ttm_bo_driver member del_from_lru_notify + * + * @bo: the buffer object deleted from lru + * + * notify driver that a BO was deleted from LRU. + */ + void (*del_from_lru_notify)(struct ttm_buffer_object *bo); }; /** -- cgit v1.2.3 From 222b5f044159877504dbac9bc1910f89a74136e2 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Tue, 4 Dec 2018 16:56:14 -0500 Subject: drm/sched: Refactor ring mirror list handling. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Decauple sched threads stop and start and ring mirror list handling from the policy of what to do about the guilty jobs. When stoppping the sched thread and detaching sched fences from non signaled HW fenes wait for all signaled HW fences to complete before rerunning the jobs. v2: Fix resubmission of guilty job into HW after refactoring. v4: Full restart for all the jobs, not only from guilty ring. Extract karma increase into standalone function. v5: Rework waiting for signaled jobs without relying on the job struct itself as those might already be freed for non 'guilty' job's schedulers. Expose karma increase to drivers. v6: Use list_for_each_entry_safe_continue and drm_sched_process_job in case fence already signaled. Call drm_sched_increase_karma only once for amdgpu and add documentation. v7: Wait only for the latest job's fence. Suggested-by: Christian Koenig Signed-off-by: Andrey Grodzovsky Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/drm/gpu_scheduler.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 47e19796c450..c567bba91ba0 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -298,9 +298,10 @@ int drm_sched_job_init(struct drm_sched_job *job, void *owner); void drm_sched_job_cleanup(struct drm_sched_job *job); void drm_sched_wakeup(struct drm_gpu_scheduler *sched); -void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, - struct drm_sched_job *job); -void drm_sched_job_recovery(struct drm_gpu_scheduler *sched); +void drm_sched_stop(struct drm_gpu_scheduler *sched); +void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery); +void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched); +void drm_sched_increase_karma(struct drm_sched_job *bad); bool drm_sched_dependency_optimized(struct dma_fence* fence, struct drm_sched_entity *entity); void drm_sched_fault(struct drm_gpu_scheduler *sched); -- cgit v1.2.3 From 3741540e04137256df82105bcd720a5e27423c34 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Wed, 5 Dec 2018 14:21:28 -0500 Subject: drm/sched: Rework HW fence processing. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Expedite job deletion from ring mirror list to the HW fence signal callback instead from finish_work, together with waiting for all such fences to signal in drm_sched_stop we garantee that already signaled job will not be processed twice. Remove the sched finish fence callback and just submit finish_work directly from the HW fence callback. v2: Fix comments. v3: Attach hw fence cb to sched_job v5: Rebase Suggested-by: Christian Koenig Signed-off-by: Andrey Grodzovsky Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/drm/gpu_scheduler.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index c567bba91ba0..0daca4d8dad9 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -137,10 +137,6 @@ struct drm_sched_fence { */ struct dma_fence finished; - /** - * @cb: the callback for the parent fence below. - */ - struct dma_fence_cb cb; /** * @parent: the fence returned by &drm_sched_backend_ops.run_job * when scheduling the job on hardware. We signal the @@ -181,6 +177,7 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); * be scheduled further. * @s_priority: the priority of the job. * @entity: the entity to which this job belongs. + * @cb: the callback for the parent fence in s_fence. * * A job is created by the driver using drm_sched_job_init(), and * should call drm_sched_entity_push_job() once it wants the scheduler @@ -197,6 +194,7 @@ struct drm_sched_job { atomic_t karma; enum drm_sched_priority s_priority; struct drm_sched_entity *entity; + struct dma_fence_cb cb; }; static inline bool drm_sched_invalidate_job(struct drm_sched_job *s_job, -- cgit v1.2.3 From cbce5f0a9f306f890d257a92fcca6cb341af76ed Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 25 Jan 2019 12:02:11 +0100 Subject: drm/ttm: Remove ttm_bo_reference and ttm_bo_unref MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both functions are obsolete and all calls have been replaced by ttm_bo_get and ttm_bo_put. Signed-off-by: Thomas Zimmermann Reviewed-by: Christian König Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- include/drm/ttm/ttm_bo_api.h | 28 ---------------------------- 1 file changed, 28 deletions(-) (limited to 'include') diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 3fc4854dce49..49d9cdfc58f2 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -295,23 +295,6 @@ static inline void ttm_bo_get(struct ttm_buffer_object *bo) kref_get(&bo->kref); } -/** - * ttm_bo_reference - reference a struct ttm_buffer_object - * - * @bo: The buffer object. - * - * Returns a refcounted pointer to a buffer object. - * - * This function is deprecated. Use @ttm_bo_get instead. - */ - -static inline struct ttm_buffer_object * -ttm_bo_reference(struct ttm_buffer_object *bo) -{ - ttm_bo_get(bo); - return bo; -} - /** * ttm_bo_get_unless_zero - reference a struct ttm_buffer_object unless * its refcount has already reached zero. @@ -386,17 +369,6 @@ int ttm_bo_validate(struct ttm_buffer_object *bo, */ void ttm_bo_put(struct ttm_buffer_object *bo); -/** - * ttm_bo_unref - * - * @bo: The buffer object. - * - * Unreference and clear a pointer to a buffer object. - * - * This function is deprecated. Use @ttm_bo_put instead. - */ -void ttm_bo_unref(struct ttm_buffer_object **bo); - /** * ttm_bo_add_to_lru * -- cgit v1.2.3 From 5a3db6f08a8eae15bff597dce290ac238daeb717 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 29 Jan 2019 19:06:09 +0200 Subject: drm: Constify drm_color_lut_check() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drm_color_lut_check() doens't modify the passed in blob so let's make it const. Also s/uint32_t/u32/ while at it. v2: Reduce line wraps (Sam) Cc: Matt Roper Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190129170609.5718-1-ville.syrjala@linux.intel.com Reviewed-by: Sam Ravnborg Acked-by: Daniel Vetter --- include/drm/drm_color_mgmt.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/drm/drm_color_mgmt.h b/include/drm/drm_color_mgmt.h index 6affbda6d9cb..d1c662d92ab7 100644 --- a/include/drm/drm_color_mgmt.h +++ b/include/drm/drm_color_mgmt.h @@ -96,6 +96,5 @@ enum drm_color_lut_tests { DRM_COLOR_LUT_NON_DECREASING = BIT(1), }; -int drm_color_lut_check(struct drm_property_blob *lut, - uint32_t tests); +int drm_color_lut_check(const struct drm_property_blob *lut, u32 tests); #endif -- cgit v1.2.3 From 5e0f5a58b167fc2c8352d90c0faa8c0c9ca75c26 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 1 Feb 2019 15:50:49 -0800 Subject: drm/i915/cfl: Adding another PCI Device ID. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While cross checking PCI IDs from Intel Media SDK and kernel Dmitry noticed this gap. So we checked the spec and this new ID had been recently added. v2: Adding new H_GT1 entry to i915_pci.c (Jose) Reported-by: Dmitry Rogozhkin Cc: Dmitry Rogozhkin Cc: José Roberto de Souza Signed-off-by: Rodrigo Vivi Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20190201235049.27206-1-rodrigo.vivi@intel.com --- include/drm/i915_pciids.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index df72be7e8b88..d2fad7b0fcf6 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -394,6 +394,9 @@ INTEL_VGA_DEVICE(0x3E9A, info) /* SRV GT2 */ /* CFL H */ +#define INTEL_CFL_H_GT1_IDS(info) \ + INTEL_VGA_DEVICE(0x3E9C, info) + #define INTEL_CFL_H_GT2_IDS(info) \ INTEL_VGA_DEVICE(0x3E9B, info), /* Halo GT2 */ \ INTEL_VGA_DEVICE(0x3E94, info) /* Halo GT2 */ @@ -426,6 +429,7 @@ #define INTEL_CFL_IDS(info) \ INTEL_CFL_S_GT1_IDS(info), \ INTEL_CFL_S_GT2_IDS(info), \ + INTEL_CFL_H_GT1_IDS(info), \ INTEL_CFL_H_GT2_IDS(info), \ INTEL_CFL_U_GT2_IDS(info), \ INTEL_CFL_U_GT3_IDS(info), \ -- cgit v1.2.3 From e46c2e99f60043bfdb63f18fe775db1f688906d9 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 5 Feb 2019 09:50:31 +0000 Subject: drm/i915: Expose RPCS (SSEU) configuration to userspace (Gen11 only) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We want to allow userspace to reconfigure the subslice configuration on a per context basis. This is required for the functional requirement of shutting down non-VME enabled sub-slices on Gen11 parts. To do so, we expose a context parameter to allow adjustment of the RPCS register stored within the context image (and currently not accessible via LRI). If the context is adjusted before first use or whilst idle, the adjustment is for "free"; otherwise if the context is active we queue a request to do so (using the kernel context), following all other activity by that context, which is also marked as barrier for all following submission against the same context. Since the overhead of device re-configuration during context switching can be significant, especially in multi-context workloads, we limit this new uAPI to only support the Gen11 VME use case. In this use case either the device is fully enabled, and exactly one slice and half of the subslices are enabled. Example usage: struct drm_i915_gem_context_param_sseu sseu = { }; struct drm_i915_gem_context_param arg = { .param = I915_CONTEXT_PARAM_SSEU, .ctx_id = gem_context_create(fd), .size = sizeof(sseu), .value = to_user_pointer(&sseu) }; /* Query device defaults. */ gem_context_get_param(fd, &arg); /* Set VME configuration on a 1x6x8 part. */ sseu.slice_mask = 0x1; sseu.subslice_mask = 0xe0; gem_context_set_param(fd, &arg); v2: Fix offset of CTX_R_PWR_CLK_STATE in intel_lr_context_set_sseu() (Lionel) v3: Add ability to program this per engine (Chris) v4: Move most get_sseu() into i915_gem_context.c (Lionel) v5: Validate sseu configuration against the device's capabilities (Lionel) v6: Change context powergating settings through MI_SDM on kernel context (Chris) v7: Synchronize the requests following a powergating setting change using a global dependency (Chris) Iterate timelines through dev_priv.gt.active_rings (Tvrtko) Disable RPCS configuration setting for non capable users (Lionel/Tvrtko) v8: s/union intel_sseu/struct intel_sseu/ (Lionel) s/dev_priv/i915/ (Tvrtko) Change uapi class/instance fields to u16 (Tvrtko) Bump mask fields to 64bits (Lionel) Don't return EPERM when dynamic sseu is disabled (Tvrtko) v9: Import context image into kernel context's ppgtt only when reconfiguring powergated slice/subslices (Chris) Use aliasing ppgtt when needed (Michel) Tvrtko Ursulin: v10: * Update for upstream changes. * Request submit needs a RPM reference. * Reject on !FULL_PPGTT for simplicity. * Pull out get/set param to helpers for readability and less indent. * Use i915_request_await_dma_fence in add_global_barrier to skip waits on the same timeline and avoid GEM_BUG_ON. * No need to explicitly assign a NULL pointer to engine in legacy mode. * No need to move gen8_make_rpcs up. * Factored out global barrier as prep patch. * Allow to only CAP_SYS_ADMIN if !Gen11. v11: * Remove engine vfunc in favour of local helper. (Chris Wilson) * Stop retiring requests before updates since it is not needed (Chris Wilson) * Implement direct CPU update path for idle contexts. (Chris Wilson) * Left side dependency needs only be on the same context timeline. (Chris Wilson) * It is sufficient to order the timeline. (Chris Wilson) * Reject !RCS configuration attempts with -ENODEV for now. v12: * Rebase for make_rpcs. v13: * Centralize SSEU normalization to make_rpcs. * Type width checking (uAPI <-> implementation). * Gen11 restrictions uAPI checks. * Gen11 subslice count differences handling. Chris Wilson: * args->size handling fixes. * Update context image from GGTT. * Postpone context image update to pinning. * Use i915_gem_active_raw instead of last_request_on_engine. v14: * Add activity tracker on intel_context to fix the lifetime issues and simplify the code. (Chris Wilson) v15: * Fix context pin leak if no space in ring by simplifying the context pinning sequence. v16: * Rebase for context get/set param locking changes. * Just -ENODEV on !Gen11. (Joonas) v17: * Fix one Gen11 subslice enablement rule. * Handle error from i915_sw_fence_await_sw_fence_gfp. (Chris Wilson) v18: * Update commit message. (Joonas) * Restrict uAPI to VME use case. (Joonas) v19: * Rebase. v20: * Rebase for ce->active_tracker. v21: * Rebase for IS_GEN changes. v22: * Reserve uAPI for flags straight away. (Chris Wilson) v23: * Rebase for RUNTIME_INFO. v24: * Added some headline docs for the uapi usage. (Joonas/Chris) v25: * Renamed class/instance to engine_class/engine_instance to avoid clash with C++ keyword. (Tony Ye) v26: * Rebased for runtime pm api changes. v27: * Rebased for intel_context_init. * Wrap commit msg to 75. v28: (Chris Wilson) * Use i915_gem_ggtt. * Use i915_request_await_dma_fence to show a better example. v29: * i915_timeline_set_barrier can now fail. (Chris Wilson) v30: * Capture some acks. v31: * Drop the WARN_ON from use controllable paths. (Chris Wilson) * Use overflows_type for all checks. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100899 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107634 Issue: https://github.com/intel/media-driver/issues/267 Signed-off-by: Chris Wilson Signed-off-by: Lionel Landwerlin Cc: Dmitry Rogozhkin Cc: Tvrtko Ursulin Cc: Zhipeng Gong Cc: Joonas Lahtinen Cc: Tony Ye Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Reviewed-by: Joonas Lahtinen Acked-by: Timo Aaltonen Acked-by: Takashi Iwai Acked-by: Stéphane Marchesin Link: https://patchwork.freedesktop.org/patch/msgid/20190205095032.22673-4-tvrtko.ursulin@linux.intel.com --- include/uapi/drm/i915_drm.h | 64 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 298b2e197744..397810fa2d33 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1486,9 +1486,73 @@ struct drm_i915_gem_context_param { #define I915_CONTEXT_MAX_USER_PRIORITY 1023 /* inclusive */ #define I915_CONTEXT_DEFAULT_PRIORITY 0 #define I915_CONTEXT_MIN_USER_PRIORITY -1023 /* inclusive */ + /* + * When using the following param, value should be a pointer to + * drm_i915_gem_context_param_sseu. + */ +#define I915_CONTEXT_PARAM_SSEU 0x7 __u64 value; }; +/** + * Context SSEU programming + * + * It may be necessary for either functional or performance reason to configure + * a context to run with a reduced number of SSEU (where SSEU stands for Slice/ + * Sub-slice/EU). + * + * This is done by configuring SSEU configuration using the below + * @struct drm_i915_gem_context_param_sseu for every supported engine which + * userspace intends to use. + * + * Not all GPUs or engines support this functionality in which case an error + * code -ENODEV will be returned. + * + * Also, flexibility of possible SSEU configuration permutations varies between + * GPU generations and software imposed limitations. Requesting such a + * combination will return an error code of -EINVAL. + * + * NOTE: When perf/OA is active the context's SSEU configuration is ignored in + * favour of a single global setting. + */ +struct drm_i915_gem_context_param_sseu { + /* + * Engine class & instance to be configured or queried. + */ + __u16 engine_class; + __u16 engine_instance; + + /* + * Unused for now. Must be cleared to zero. + */ + __u32 flags; + + /* + * Mask of slices to enable for the context. Valid values are a subset + * of the bitmask value returned for I915_PARAM_SLICE_MASK. + */ + __u64 slice_mask; + + /* + * Mask of subslices to enable for the context. Valid values are a + * subset of the bitmask value return by I915_PARAM_SUBSLICE_MASK. + */ + __u64 subslice_mask; + + /* + * Minimum/Maximum number of EUs to enable per subslice for the + * context. min_eus_per_subslice must be inferior or equal to + * max_eus_per_subslice. + */ + __u16 min_eus_per_subslice; + __u16 max_eus_per_subslice; + + /* + * Unused for now. Must be cleared to zero. + */ + __u32 rsvd; +}; + enum drm_i915_oa_format { I915_OA_FORMAT_A13 = 1, /* HSW only */ I915_OA_FORMAT_A29, /* HSW only */ -- cgit v1.2.3 From 67dd1a36334ffce82bebeb2d633e152aa436d370 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Thu, 31 Jan 2019 15:44:22 -0500 Subject: drm/amdgpu: Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New chunk for dependency on start of job's execution instead on the end. This is used for GPU deadlock prevention when userspace uses mid-IB fences to wait for mid-IB work on other rings. v2: Fix typo in AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES v3: Bump KMS version v4: put old fence AFTER acquiring the scheduled fence. Signed-off-by: Andrey Grodzovsky Suggested-by: Christian Koenig Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index be84e43c1e19..47296f2ec3fa 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -523,6 +523,7 @@ struct drm_amdgpu_gem_va { #define AMDGPU_CHUNK_ID_SYNCOBJ_IN 0x04 #define AMDGPU_CHUNK_ID_SYNCOBJ_OUT 0x05 #define AMDGPU_CHUNK_ID_BO_HANDLES 0x06 +#define AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES 0x07 struct drm_amdgpu_cs_chunk { __u32 chunk_id; -- cgit v1.2.3 From 41cca166cc57e75e94d888595a428d23a3bf4e36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 21 Jan 2019 17:22:55 -0500 Subject: drm/amdgpu: add a workaround for GDS ordered append hangs with compute queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I'm not increasing the DRM version because GDS isn't totally without bugs yet. v2: update emit_ib_size Signed-off-by: Marek Olšák Acked-by: Christian König Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 47296f2ec3fa..2acbc8bc465b 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -566,6 +566,11 @@ union drm_amdgpu_cs { * caches (L2/vL1/sL1/I$). */ #define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3) +/* Set GDS_COMPUTE_MAX_WAVE_ID = DEFAULT before PACKET3_INDIRECT_BUFFER. + * This will reset wave ID counters for the IB. + */ +#define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4) + struct drm_amdgpu_cs_chunk_ib { __u32 _pad; /** AMDGPU_IB_FLAG_* */ -- cgit v1.2.3 From 5a5fccbd8c315c08db01e585e1cbe88e30b70691 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 1 Feb 2019 14:28:24 +0100 Subject: gpu: host1x: Introduce support for wide opcodes The CDMA push buffer can currently only handle opcodes that take a single word parameter. However, the host1x implementation on Tegra186 and later supports opcodes that require multiple words as parameters. Unfortunately the way the push buffer is structured, these wide opcodes cannot simply be composed of two regular opcodes because that could result in the wide opcode being split across the end of the push buffer and the final RESTART opcode required to wrap the push buffer around would break the wide opcode. One way to fix this would be to remove the concept of slots to simplify push buffer operations. However, that's not entirely trivial and should be done in a separate patch. For now, simply use a different function to push four-word opcodes into the push buffer. Technically only three words are pushed, with the fourth word used as padding to preserve the 2-word alignment required by the slots abstraction. The fourth word is always a NOP opcode. Additional care must be taken when the end of the push buffer is reached. If a four-word opcode doesn't fit into the push buffer without being split by the boundary, NOP opcodes will be introduced and the new wide opcode placed at the beginning of the push buffer. Signed-off-by: Thierry Reding --- include/trace/events/host1x.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include') diff --git a/include/trace/events/host1x.h b/include/trace/events/host1x.h index a37ef73092e5..3d340b6f1ea3 100644 --- a/include/trace/events/host1x.h +++ b/include/trace/events/host1x.h @@ -80,6 +80,32 @@ TRACE_EVENT(host1x_cdma_push, __entry->name, __entry->op1, __entry->op2) ); +TRACE_EVENT(host1x_cdma_push_wide, + TP_PROTO(const char *name, u32 op1, u32 op2, u32 op3, u32 op4), + + TP_ARGS(name, op1, op2, op3, op4), + + TP_STRUCT__entry( + __field(const char *, name) + __field(u32, op1) + __field(u32, op2) + __field(u32, op3) + __field(u32, op4) + ), + + TP_fast_assign( + __entry->name = name; + __entry->op1 = op1; + __entry->op2 = op2; + __entry->op3 = op3; + __entry->op4 = op4; + ), + + TP_printk("name=%s, op1=%08x, op2=%08x, op3=%08x op4=%08x", + __entry->name, __entry->op1, __entry->op2, __entry->op3, + __entry->op4) +); + TRACE_EVENT(host1x_cdma_push_gather, TP_PROTO(const char *name, struct host1x_bo *bo, u32 words, u32 offset, void *cmdbuf), -- cgit v1.2.3