summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/xe
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/xe')
-rw-r--r--drivers/gpu/drm/xe/display/xe_display.c65
-rw-r--r--drivers/gpu/drm/xe/display/xe_display.h8
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gt_regs.h5
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c43
-rw-r--r--drivers/gpu/drm/xe/xe_bo_evict.c20
-rw-r--r--drivers/gpu/drm/xe/xe_debugfs.c2
-rw-r--r--drivers/gpu/drm/xe/xe_device.c16
-rw-r--r--drivers/gpu/drm/xe/xe_device.h14
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h9
-rw-r--r--drivers/gpu/drm/xe/xe_exec.c29
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.c6
-rw-r--r--drivers/gpu/drm/xe/xe_force_wake.c12
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt.c10
-rw-r--r--drivers/gpu/drm/xe/xe_gpu_scheduler.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c5
-rw-r--r--drivers/gpu/drm/xe/xe_gt_ccs_mode.c15
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c4
-rw-r--r--drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c31
-rw-r--r--drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h1
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.c53
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c126
-rw-r--r--drivers/gpu/drm/xe/xe_guc_types.h7
-rw-r--r--drivers/gpu/drm/xe/xe_oa.c2
-rw-r--r--drivers/gpu/drm/xe/xe_pm.c6
-rw-r--r--drivers/gpu/drm/xe/xe_query.c6
-rw-r--r--drivers/gpu/drm/xe/xe_sync.c5
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c8
-rw-r--r--drivers/gpu/drm/xe/xe_wa.c4
-rw-r--r--drivers/gpu/drm/xe/xe_wait_user_fence.c10
29 files changed, 269 insertions, 255 deletions
diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c
index 75736faf2a80..c6e0c8d77a70 100644
--- a/drivers/gpu/drm/xe/display/xe_display.c
+++ b/drivers/gpu/drm/xe/display/xe_display.c
@@ -309,18 +309,7 @@ static void xe_display_flush_cleanup_work(struct xe_device *xe)
}
/* TODO: System and runtime suspend/resume sequences will be sanitized as a follow-up. */
-void xe_display_pm_runtime_suspend(struct xe_device *xe)
-{
- if (!xe->info.probe_display)
- return;
-
- if (xe->d3cold.allowed)
- xe_display_pm_suspend(xe, true);
-
- intel_hpd_poll_enable(xe);
-}
-
-void xe_display_pm_suspend(struct xe_device *xe, bool runtime)
+static void __xe_display_pm_suspend(struct xe_device *xe, bool runtime)
{
struct intel_display *display = &xe->display;
bool s2idle = suspend_to_idle();
@@ -353,28 +342,38 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime)
intel_opregion_suspend(display, s2idle ? PCI_D1 : PCI_D3cold);
intel_dmc_suspend(xe);
+
+ if (runtime && has_display(xe))
+ intel_hpd_poll_enable(xe);
}
-void xe_display_pm_suspend_late(struct xe_device *xe)
+void xe_display_pm_suspend(struct xe_device *xe)
+{
+ __xe_display_pm_suspend(xe, false);
+}
+
+void xe_display_pm_runtime_suspend(struct xe_device *xe)
{
- bool s2idle = suspend_to_idle();
if (!xe->info.probe_display)
return;
- intel_power_domains_suspend(xe, s2idle);
+ if (xe->d3cold.allowed) {
+ __xe_display_pm_suspend(xe, true);
+ return;
+ }
- intel_display_power_suspend_late(xe);
+ intel_hpd_poll_enable(xe);
}
-void xe_display_pm_runtime_resume(struct xe_device *xe)
+void xe_display_pm_suspend_late(struct xe_device *xe)
{
+ bool s2idle = suspend_to_idle();
if (!xe->info.probe_display)
return;
- intel_hpd_poll_disable(xe);
+ intel_power_domains_suspend(xe, s2idle);
- if (xe->d3cold.allowed)
- xe_display_pm_resume(xe, true);
+ intel_display_power_suspend_late(xe);
}
void xe_display_pm_resume_early(struct xe_device *xe)
@@ -387,7 +386,7 @@ void xe_display_pm_resume_early(struct xe_device *xe)
intel_power_domains_resume(xe);
}
-void xe_display_pm_resume(struct xe_device *xe, bool runtime)
+static void __xe_display_pm_resume(struct xe_device *xe, bool runtime)
{
struct intel_display *display = &xe->display;
@@ -411,9 +410,11 @@ void xe_display_pm_resume(struct xe_device *xe, bool runtime)
intel_display_driver_resume(xe);
drm_kms_helper_poll_enable(&xe->drm);
intel_display_driver_enable_user_access(xe);
- intel_hpd_poll_disable(xe);
}
+ if (has_display(xe))
+ intel_hpd_poll_disable(xe);
+
intel_opregion_resume(display);
intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_RUNNING, false);
@@ -421,6 +422,26 @@ void xe_display_pm_resume(struct xe_device *xe, bool runtime)
intel_power_domains_enable(xe);
}
+void xe_display_pm_resume(struct xe_device *xe)
+{
+ __xe_display_pm_resume(xe, false);
+}
+
+void xe_display_pm_runtime_resume(struct xe_device *xe)
+{
+ if (!xe->info.probe_display)
+ return;
+
+ if (xe->d3cold.allowed) {
+ __xe_display_pm_resume(xe, true);
+ return;
+ }
+
+ intel_hpd_init(xe);
+ intel_hpd_poll_disable(xe);
+}
+
+
static void display_device_remove(struct drm_device *dev, void *arg)
{
struct xe_device *xe = arg;
diff --git a/drivers/gpu/drm/xe/display/xe_display.h b/drivers/gpu/drm/xe/display/xe_display.h
index 53d727fd792b..bed55fd26f30 100644
--- a/drivers/gpu/drm/xe/display/xe_display.h
+++ b/drivers/gpu/drm/xe/display/xe_display.h
@@ -34,10 +34,10 @@ void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir);
void xe_display_irq_reset(struct xe_device *xe);
void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt);
-void xe_display_pm_suspend(struct xe_device *xe, bool runtime);
+void xe_display_pm_suspend(struct xe_device *xe);
void xe_display_pm_suspend_late(struct xe_device *xe);
void xe_display_pm_resume_early(struct xe_device *xe);
-void xe_display_pm_resume(struct xe_device *xe, bool runtime);
+void xe_display_pm_resume(struct xe_device *xe);
void xe_display_pm_runtime_suspend(struct xe_device *xe);
void xe_display_pm_runtime_resume(struct xe_device *xe);
@@ -65,10 +65,10 @@ static inline void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir)
static inline void xe_display_irq_reset(struct xe_device *xe) {}
static inline void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) {}
-static inline void xe_display_pm_suspend(struct xe_device *xe, bool runtime) {}
+static inline void xe_display_pm_suspend(struct xe_device *xe) {}
static inline void xe_display_pm_suspend_late(struct xe_device *xe) {}
static inline void xe_display_pm_resume_early(struct xe_device *xe) {}
-static inline void xe_display_pm_resume(struct xe_device *xe, bool runtime) {}
+static inline void xe_display_pm_resume(struct xe_device *xe) {}
static inline void xe_display_pm_runtime_suspend(struct xe_device *xe) {}
static inline void xe_display_pm_runtime_resume(struct xe_device *xe) {}
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index ac9c437e103d..bd604b9f08e4 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -393,9 +393,6 @@
#define XE2_GLOBAL_INVAL XE_REG(0xb404)
-#define SCRATCH1LPFC XE_REG(0xb474)
-#define EN_L3_RW_CCS_CACHE_FLUSH REG_BIT(0)
-
#define XE2LPM_L3SQCREG2 XE_REG_MCR(0xb604)
#define XE2LPM_L3SQCREG3 XE_REG_MCR(0xb608)
@@ -520,7 +517,7 @@
* [4-6] RSVD
* [7] Disabled
*/
-#define CCS_MODE XE_REG(0x14804)
+#define CCS_MODE XE_REG(0x14804, XE_REG_OPTION_MASKED)
#define CCS_MODE_CSLICE_0_3_MASK REG_GENMASK(11, 0) /* 3 bits per cslice */
#define CCS_MODE_CSLICE_MASK 0x7 /* CCS0-3 + rsvd */
#define CCS_MODE_CSLICE_WIDTH ilog2(CCS_MODE_CSLICE_MASK + 1)
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index e5f51fd23c65..2a093540354e 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -886,8 +886,8 @@ int xe_bo_evict_pinned(struct xe_bo *bo)
if (WARN_ON(!xe_bo_is_pinned(bo)))
return -EINVAL;
- if (WARN_ON(!xe_bo_is_vram(bo)))
- return -EINVAL;
+ if (!xe_bo_is_vram(bo))
+ return 0;
ret = ttm_bo_mem_space(&bo->ttm, &placement, &new_mem, &ctx);
if (ret)
@@ -937,6 +937,7 @@ int xe_bo_restore_pinned(struct xe_bo *bo)
.interruptible = false,
};
struct ttm_resource *new_mem;
+ struct ttm_place *place = &bo->placements[0];
int ret;
xe_bo_assert_held(bo);
@@ -947,9 +948,15 @@ int xe_bo_restore_pinned(struct xe_bo *bo)
if (WARN_ON(!xe_bo_is_pinned(bo)))
return -EINVAL;
- if (WARN_ON(xe_bo_is_vram(bo) || !bo->ttm.ttm))
+ if (WARN_ON(xe_bo_is_vram(bo)))
+ return -EINVAL;
+
+ if (WARN_ON(!bo->ttm.ttm && !xe_bo_is_stolen(bo)))
return -EINVAL;
+ if (!mem_type_is_vram(place->mem_type))
+ return 0;
+
ret = ttm_bo_mem_space(&bo->ttm, &bo->placement, &new_mem, &ctx);
if (ret)
return ret;
@@ -1719,6 +1726,7 @@ int xe_bo_pin_external(struct xe_bo *bo)
int xe_bo_pin(struct xe_bo *bo)
{
+ struct ttm_place *place = &bo->placements[0];
struct xe_device *xe = xe_bo_device(bo);
int err;
@@ -1749,21 +1757,21 @@ int xe_bo_pin(struct xe_bo *bo)
*/
if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
bo->flags & XE_BO_FLAG_INTERNAL_TEST)) {
- struct ttm_place *place = &(bo->placements[0]);
-
if (mem_type_is_vram(place->mem_type)) {
xe_assert(xe, place->flags & TTM_PL_FLAG_CONTIGUOUS);
place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE) -
vram_region_gpu_offset(bo->ttm.resource)) >> PAGE_SHIFT;
place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT);
-
- spin_lock(&xe->pinned.lock);
- list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
- spin_unlock(&xe->pinned.lock);
}
}
+ if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) {
+ spin_lock(&xe->pinned.lock);
+ list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
+ spin_unlock(&xe->pinned.lock);
+ }
+
ttm_bo_pin(&bo->ttm);
/*
@@ -1809,23 +1817,18 @@ void xe_bo_unpin_external(struct xe_bo *bo)
void xe_bo_unpin(struct xe_bo *bo)
{
+ struct ttm_place *place = &bo->placements[0];
struct xe_device *xe = xe_bo_device(bo);
xe_assert(xe, !bo->ttm.base.import_attach);
xe_assert(xe, xe_bo_is_pinned(bo));
- if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
- bo->flags & XE_BO_FLAG_INTERNAL_TEST)) {
- struct ttm_place *place = &(bo->placements[0]);
-
- if (mem_type_is_vram(place->mem_type)) {
- spin_lock(&xe->pinned.lock);
- xe_assert(xe, !list_empty(&bo->pinned_link));
- list_del_init(&bo->pinned_link);
- spin_unlock(&xe->pinned.lock);
- }
+ if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) {
+ spin_lock(&xe->pinned.lock);
+ xe_assert(xe, !list_empty(&bo->pinned_link));
+ list_del_init(&bo->pinned_link);
+ spin_unlock(&xe->pinned.lock);
}
-
ttm_bo_unpin(&bo->ttm);
}
diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c
index 541b49007d73..8fb2be061003 100644
--- a/drivers/gpu/drm/xe/xe_bo_evict.c
+++ b/drivers/gpu/drm/xe/xe_bo_evict.c
@@ -34,14 +34,22 @@ int xe_bo_evict_all(struct xe_device *xe)
u8 id;
int ret;
- if (!IS_DGFX(xe))
- return 0;
-
/* User memory */
- for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) {
+ for (mem_type = XE_PL_TT; mem_type <= XE_PL_VRAM1; ++mem_type) {
struct ttm_resource_manager *man =
ttm_manager_type(bdev, mem_type);
+ /*
+ * On igpu platforms with flat CCS we need to ensure we save and restore any CCS
+ * state since this state lives inside graphics stolen memory which doesn't survive
+ * hibernation.
+ *
+ * This can be further improved by only evicting objects that we know have actually
+ * used a compression enabled PAT index.
+ */
+ if (mem_type == XE_PL_TT && (IS_DGFX(xe) || !xe_device_has_flat_ccs(xe)))
+ continue;
+
if (man) {
ret = ttm_resource_manager_evict_all(bdev, man);
if (ret)
@@ -125,9 +133,6 @@ int xe_bo_restore_kernel(struct xe_device *xe)
struct xe_bo *bo;
int ret;
- if (!IS_DGFX(xe))
- return 0;
-
spin_lock(&xe->pinned.lock);
for (;;) {
bo = list_first_entry_or_null(&xe->pinned.evicted,
@@ -159,7 +164,6 @@ int xe_bo_restore_kernel(struct xe_device *xe)
* should setup the iosys map.
*/
xe_assert(xe, !iosys_map_is_null(&bo->vmap));
- xe_assert(xe, xe_bo_is_vram(bo));
xe_bo_put(bo);
diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
index 668615c6b172..fe4319eb13fd 100644
--- a/drivers/gpu/drm/xe/xe_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -187,7 +187,7 @@ void xe_debugfs_register(struct xe_device *xe)
debugfs_create_file("forcewake_all", 0400, root, xe,
&forcewake_all_fops);
- debugfs_create_file("wedged_mode", 0400, root, xe,
+ debugfs_create_file("wedged_mode", 0600, root, xe,
&wedged_mode_fops);
for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) {
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 5a63d135ba96..a1987b554a8d 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -87,10 +87,6 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file)
mutex_init(&xef->exec_queue.lock);
xa_init_flags(&xef->exec_queue.xa, XA_FLAGS_ALLOC1);
- spin_lock(&xe->clients.lock);
- xe->clients.count++;
- spin_unlock(&xe->clients.lock);
-
file->driver_priv = xef;
kref_init(&xef->refcount);
@@ -107,17 +103,12 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file)
static void xe_file_destroy(struct kref *ref)
{
struct xe_file *xef = container_of(ref, struct xe_file, refcount);
- struct xe_device *xe = xef->xe;
xa_destroy(&xef->exec_queue.xa);
mutex_destroy(&xef->exec_queue.lock);
xa_destroy(&xef->vm.xa);
mutex_destroy(&xef->vm.lock);
- spin_lock(&xe->clients.lock);
- xe->clients.count--;
- spin_unlock(&xe->clients.lock);
-
xe_drm_client_put(xef->client);
kfree(xef->process_name);
kfree(xef);
@@ -333,7 +324,6 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
xe->info.force_execlist = xe_modparam.force_execlist;
spin_lock_init(&xe->irq.lock);
- spin_lock_init(&xe->clients.lock);
init_waitqueue_head(&xe->ufence_wq);
@@ -890,7 +880,7 @@ void xe_device_l2_flush(struct xe_device *xe)
spin_lock(&gt->global_invl_lock);
xe_mmio_write32(gt, XE2_GLOBAL_INVAL, 0x1);
- if (xe_mmio_wait32(gt, XE2_GLOBAL_INVAL, 0x1, 0x0, 150, NULL, true))
+ if (xe_mmio_wait32(gt, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true))
xe_gt_err_once(gt, "Global invalidation timeout\n");
spin_unlock(&gt->global_invl_lock);
@@ -980,13 +970,13 @@ void xe_device_declare_wedged(struct xe_device *xe)
return;
}
+ xe_pm_runtime_get_noresume(xe);
+
if (drmm_add_action_or_reset(&xe->drm, xe_device_wedged_fini, xe)) {
drm_err(&xe->drm, "Failed to register xe_device_wedged_fini clean-up. Although device is wedged.\n");
return;
}
- xe_pm_runtime_get_noresume(xe);
-
if (!atomic_xchg(&xe->wedged.flag, 1)) {
xe->needs_flr_on_fini = true;
drm_err(&xe->drm,
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 894f04770454..34620ef855c0 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -178,4 +178,18 @@ void xe_device_declare_wedged(struct xe_device *xe);
struct xe_file *xe_file_get(struct xe_file *xef);
void xe_file_put(struct xe_file *xef);
+/*
+ * Occasionally it is seen that the G2H worker starts running after a delay of more than
+ * a second even after being queued and activated by the Linux workqueue subsystem. This
+ * leads to G2H timeout error. The root cause of issue lies with scheduling latency of
+ * Lunarlake Hybrid CPU. Issue disappears if we disable Lunarlake atom cores from BIOS
+ * and this is beyond xe kmd.
+ *
+ * TODO: Drop this change once workqueue scheduling delay issue is fixed on LNL Hybrid CPU.
+ */
+#define LNL_FLUSH_WORKQUEUE(wq__) \
+ flush_workqueue(wq__)
+#define LNL_FLUSH_WORK(wrk__) \
+ flush_work(wrk__)
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 09d731a9125c..687f3a9039bb 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -353,15 +353,6 @@ struct xe_device {
struct workqueue_struct *wq;
} sriov;
- /** @clients: drm clients info */
- struct {
- /** @clients.lock: Protects drm clients info */
- spinlock_t lock;
-
- /** @clients.count: number of drm clients */
- u64 count;
- } clients;
-
/** @usm: unified memory state */
struct {
/** @usm.asid: convert a ASID to VM */
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index 7b38485817dc..31cca938956f 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -41,11 +41,6 @@
* user knows an exec writes to a BO and reads from the BO in the next exec, it
* is the user's responsibility to pass in / out fence between the two execs).
*
- * Implicit dependencies for external BOs are handled by using the dma-buf
- * implicit dependency uAPI (TODO: add link). To make this works each exec must
- * install the job's fence into the DMA_RESV_USAGE_WRITE slot of every external
- * BO mapped in the VM.
- *
* We do not allow a user to trigger a bind at exec time rather we have a VM
* bind IOCTL which uses the same in / out fence interface as exec. In that
* sense, a VM bind is basically the same operation as an exec from the user
@@ -59,8 +54,8 @@
* behind any pending kernel operations on any external BOs in VM or any BOs
* private to the VM. This is accomplished by the rebinds waiting on BOs
* DMA_RESV_USAGE_KERNEL slot (kernel ops) and kernel ops waiting on all BOs
- * slots (inflight execs are in the DMA_RESV_USAGE_BOOKING for private BOs and
- * in DMA_RESV_USAGE_WRITE for external BOs).
+ * slots (inflight execs are in the DMA_RESV_USAGE_BOOKKEEP for private BOs and
+ * for external BOs).
*
* Rebinds / dma-resv usage applies to non-compute mode VMs only as for compute
* mode VMs we use preempt fences and a rebind worker (TODO: add link).
@@ -137,12 +132,16 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
if (XE_IOCTL_DBG(xe, !q))
return -ENOENT;
- if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_VM))
- return -EINVAL;
+ if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_VM)) {
+ err = -EINVAL;
+ goto err_exec_queue;
+ }
if (XE_IOCTL_DBG(xe, args->num_batch_buffer &&
- q->width != args->num_batch_buffer))
- return -EINVAL;
+ q->width != args->num_batch_buffer)) {
+ err = -EINVAL;
+ goto err_exec_queue;
+ }
if (XE_IOCTL_DBG(xe, q->ops->reset_status(q))) {
err = -ECANCELED;
@@ -204,14 +203,14 @@ retry:
write_locked = false;
}
if (err)
- goto err_syncs;
+ goto err_hw_exec_mode;
if (write_locked) {
err = xe_vm_userptr_pin(vm);
downgrade_write(&vm->lock);
write_locked = false;
if (err)
- goto err_hw_exec_mode;
+ goto err_unlock_list;
}
if (!args->num_batch_buffer) {
@@ -225,6 +224,7 @@ retry:
fence = xe_sync_in_fence_get(syncs, num_syncs, q, vm);
if (IS_ERR(fence)) {
err = PTR_ERR(fence);
+ xe_vm_unlock(vm);
goto err_unlock_list;
}
for (i = 0; i < num_syncs; i++)
@@ -304,7 +304,8 @@ retry:
xe_sched_job_arm(job);
if (!xe_vm_in_lr_mode(vm))
drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, &job->drm.s_fence->finished,
- DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_WRITE);
+ DMA_RESV_USAGE_BOOKKEEP,
+ DMA_RESV_USAGE_BOOKKEEP);
for (i = 0; i < num_syncs; i++) {
xe_sync_entry_signal(&syncs[i], &job->drm.s_fence->finished);
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index d098d2dd1b2d..fd0f3b3c9101 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -260,8 +260,14 @@ void xe_exec_queue_fini(struct xe_exec_queue *q)
{
int i;
+ /*
+ * Before releasing our ref to lrc and xef, accumulate our run ticks
+ */
+ xe_exec_queue_update_run_ticks(q);
+
for (i = 0; i < q->width; ++i)
xe_lrc_put(q->lrc[i]);
+
__xe_exec_queue_free(q);
}
diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c
index b263fff15273..7d9fc489dcb8 100644
--- a/drivers/gpu/drm/xe/xe_force_wake.c
+++ b/drivers/gpu/drm/xe/xe_force_wake.c
@@ -115,9 +115,15 @@ static int __domain_wait(struct xe_gt *gt, struct xe_force_wake_domain *domain,
XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC,
&value, true);
if (ret)
- xe_gt_notice(gt, "Force wake domain %d failed to ack %s (%pe) reg[%#x] = %#x\n",
- domain->id, str_wake_sleep(wake), ERR_PTR(ret),
- domain->reg_ack.addr, value);
+ xe_gt_err(gt, "Force wake domain %d failed to ack %s (%pe) reg[%#x] = %#x\n",
+ domain->id, str_wake_sleep(wake), ERR_PTR(ret),
+ domain->reg_ack.addr, value);
+ if (value == ~0) {
+ xe_gt_err(gt,
+ "Force wake domain %d: %s. MMIO unreliable (forcewake register returns 0xFFFFFFFF)!\n",
+ domain->id, str_wake_sleep(wake));
+ ret = -EIO;
+ }
return ret;
}
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 2895f154654c..ff19eca5d358 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -397,6 +397,16 @@ static void ggtt_invalidate_gt_tlb(struct xe_gt *gt)
static void xe_ggtt_invalidate(struct xe_ggtt *ggtt)
{
+ struct xe_device *xe = tile_to_xe(ggtt->tile);
+
+ /*
+ * XXX: Barrier for GGTT pages. Unsure exactly why this required but
+ * without this LNL is having issues with the GuC reading scratch page
+ * vs. correct GGTT page. Not particularly a hot code path so blindly
+ * do a mmio read here which results in GuC reading correct GGTT page.
+ */
+ xe_mmio_read32(xe_root_mmio_gt(xe), VF_CAP_REG);
+
/* Each GT in a tile has its own TLB to cache GGTT lookups */
ggtt_invalidate_gt_tlb(ggtt->tile->primary_gt);
ggtt_invalidate_gt_tlb(ggtt->tile->media_gt);
diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h
index 5ad5629a6c60..64b2ae6839db 100644
--- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h
+++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h
@@ -63,7 +63,9 @@ xe_sched_invalidate_job(struct xe_sched_job *job, int threshold)
static inline void xe_sched_add_pending_job(struct xe_gpu_scheduler *sched,
struct xe_sched_job *job)
{
+ spin_lock(&sched->base.job_list_lock);
list_add(&job->drm.list, &sched->base.pending_list);
+ spin_unlock(&sched->base.job_list_lock);
}
static inline
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index f0dc2bf24c7b..d5fd6a089b7c 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -108,7 +108,6 @@ static void xe_gt_enable_host_l2_vram(struct xe_gt *gt)
return;
if (!xe_gt_is_media_type(gt)) {
- xe_mmio_write32(gt, SCRATCH1LPFC, EN_L3_RW_CCS_CACHE_FLUSH);
reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL);
reg |= CG_DIS_CNTLBUS;
xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg);
@@ -874,7 +873,9 @@ int xe_gt_sanitize_freq(struct xe_gt *gt)
int ret = 0;
if ((!xe_uc_fw_is_available(&gt->uc.gsc.fw) ||
- xe_uc_fw_is_loaded(&gt->uc.gsc.fw)) && XE_WA(gt, 22019338487))
+ xe_uc_fw_is_loaded(&gt->uc.gsc.fw) ||
+ xe_uc_fw_is_in_error_state(&gt->uc.gsc.fw)) &&
+ XE_WA(gt, 22019338487))
ret = xe_guc_pc_restore_stashed_freq(&gt->uc.guc.pc);
return ret;
diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
index d2e4dc3aaf61..ffcbd05671fc 100644
--- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
+++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
@@ -68,6 +68,12 @@ static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines)
}
}
+ /*
+ * Mask bits need to be set for the register. Though only Xe2+
+ * platforms require setting of mask bits, it won't harm for older
+ * platforms as these bits are unused there.
+ */
+ mode |= CCS_MODE_CSLICE_0_3_MASK << 16;
xe_mmio_write32(gt, CCS_MODE, mode);
xe_gt_dbg(gt, "CCS_MODE=%x config:%08x, num_engines:%d, num_slices:%d\n",
@@ -133,9 +139,10 @@ ccs_mode_store(struct device *kdev, struct device_attribute *attr,
}
/* CCS mode can only be updated when there are no drm clients */
- spin_lock(&xe->clients.lock);
- if (xe->clients.count) {
- spin_unlock(&xe->clients.lock);
+ mutex_lock(&xe->drm.filelist_mutex);
+ if (!list_empty(&xe->drm.filelist)) {
+ mutex_unlock(&xe->drm.filelist_mutex);
+ xe_gt_dbg(gt, "Rejecting compute mode change as there are active drm clients\n");
return -EBUSY;
}
@@ -146,7 +153,7 @@ ccs_mode_store(struct device *kdev, struct device_attribute *attr,
xe_gt_reset_async(gt);
}
- spin_unlock(&xe->clients.lock);
+ mutex_unlock(&xe->drm.filelist_mutex);
return count;
}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
index 8250ef71e685..afdb477ecf83 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
@@ -387,6 +387,8 @@ static void pf_release_ggtt(struct xe_tile *tile, struct xe_ggtt_node *node)
* the xe_ggtt_clear() called by below xe_ggtt_remove_node().
*/
xe_ggtt_node_remove(node, false);
+ } else {
+ xe_ggtt_node_fini(node);
}
}
@@ -442,7 +444,7 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size)
config->ggtt_region = node;
return 0;
err:
- xe_ggtt_node_fini(node);
+ pf_release_ggtt(tile, node);
return err;
}
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index cca9cf536f76..9d82ea30f4df 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -37,6 +37,15 @@ static long tlb_timeout_jiffies(struct xe_gt *gt)
return hw_tlb_timeout + 2 * delay;
}
+static void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence)
+{
+ if (WARN_ON_ONCE(!fence->gt))
+ return;
+
+ xe_pm_runtime_put(gt_to_xe(fence->gt));
+ fence->gt = NULL; /* fini() should be called once */
+}
+
static void
__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
{
@@ -63,6 +72,8 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work)
struct xe_device *xe = gt_to_xe(gt);
struct xe_gt_tlb_invalidation_fence *fence, *next;
+ LNL_FLUSH_WORK(&gt->uc.guc.ct.g2h_worker);
+
spin_lock_irq(&gt->tlb_invalidation.pending_lock);
list_for_each_entry_safe(fence, next,
&gt->tlb_invalidation.pending_fences, link) {
@@ -204,7 +215,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
tlb_timeout_jiffies(gt));
}
spin_unlock_irq(&gt->tlb_invalidation.pending_lock);
- } else if (ret < 0) {
+ } else {
__invalidation_fence_signal(xe, fence);
}
if (!ret) {
@@ -267,10 +278,8 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
xe_gt_tlb_invalidation_fence_init(gt, &fence, true);
ret = xe_gt_tlb_invalidation_guc(gt, &fence);
- if (ret < 0) {
- xe_gt_tlb_invalidation_fence_fini(&fence);
+ if (ret)
return ret;
- }
xe_gt_tlb_invalidation_fence_wait(&fence);
} else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) {
@@ -496,7 +505,8 @@ static const struct dma_fence_ops invalidation_fence_ops = {
* @stack: fence is stack variable
*
* Initialize TLB invalidation fence for use. xe_gt_tlb_invalidation_fence_fini
- * must be called if fence is not signaled.
+ * will be automatically called when fence is signalled (all fences must signal),
+ * even on error.
*/
void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt,
struct xe_gt_tlb_invalidation_fence *fence,
@@ -516,14 +526,3 @@ void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt,
dma_fence_get(&fence->base);
fence->gt = gt;
}
-
-/**
- * xe_gt_tlb_invalidation_fence_fini - Finalize TLB invalidation fence
- * @fence: TLB invalidation fence to finalize
- *
- * Drop PM ref which fence took durinig init.
- */
-void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence)
-{
- xe_pm_runtime_put(gt_to_xe(fence->gt));
-}
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
index a84065fa324c..f430d5797af7 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
@@ -28,7 +28,6 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt,
struct xe_gt_tlb_invalidation_fence *fence,
bool stack);
-void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence);
static inline void
xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence)
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index f24dd5223926..9c505d3517cd 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -667,16 +667,12 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
num_g2h = 1;
if (g2h_fence_needs_alloc(g2h_fence)) {
- void *ptr;
-
g2h_fence->seqno = next_ct_seqno(ct, true);
- ptr = xa_store(&ct->fence_lookup,
- g2h_fence->seqno,
- g2h_fence, GFP_ATOMIC);
- if (IS_ERR(ptr)) {
- ret = PTR_ERR(ptr);
+ ret = xa_err(xa_store(&ct->fence_lookup,
+ g2h_fence->seqno, g2h_fence,
+ GFP_ATOMIC));
+ if (ret)
goto out;
- }
}
seqno = g2h_fence->seqno;
@@ -879,14 +875,11 @@ retry:
retry_same_fence:
ret = guc_ct_send(ct, action, len, 0, 0, &g2h_fence);
if (unlikely(ret == -ENOMEM)) {
- void *ptr;
-
/* Retry allocation /w GFP_KERNEL */
- ptr = xa_store(&ct->fence_lookup,
- g2h_fence.seqno,
- &g2h_fence, GFP_KERNEL);
- if (IS_ERR(ptr))
- return PTR_ERR(ptr);
+ ret = xa_err(xa_store(&ct->fence_lookup, g2h_fence.seqno,
+ &g2h_fence, GFP_KERNEL));
+ if (ret)
+ return ret;
goto retry_same_fence;
} else if (unlikely(ret)) {
@@ -903,16 +896,35 @@ retry_same_fence:
}
ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ);
+
if (!ret) {
- xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x",
- g2h_fence.seqno, action[0]);
+ LNL_FLUSH_WORK(&ct->g2h_worker);
+ if (g2h_fence.done) {
+ xe_gt_warn(gt, "G2H fence %u, action %04x, done\n",
+ g2h_fence.seqno, action[0]);
+ ret = 1;
+ }
+ }
+
+ /*
+ * Ensure we serialize with completion side to prevent UAF with fence going out of scope on
+ * the stack, since we have no clue if it will fire after the timeout before we can erase
+ * from the xa. Also we have some dependent loads and stores below for which we need the
+ * correct ordering, and we lack the needed barriers.
+ */
+ mutex_lock(&ct->lock);
+ if (!ret) {
+ xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x, done %s",
+ g2h_fence.seqno, action[0], str_yes_no(g2h_fence.done));
xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno);
+ mutex_unlock(&ct->lock);
return -ETIME;
}
if (g2h_fence.retry) {
xe_gt_dbg(gt, "H2G action %#x retrying: reason %#x\n",
action[0], g2h_fence.reason);
+ mutex_unlock(&ct->lock);
goto retry;
}
if (g2h_fence.fail) {
@@ -921,7 +933,12 @@ retry_same_fence:
ret = -EIO;
}
- return ret > 0 ? response_buffer ? g2h_fence.response_len : g2h_fence.response_data : ret;
+ if (ret > 0)
+ ret = response_buffer ? g2h_fence.response_len : g2h_fence.response_data;
+
+ mutex_unlock(&ct->lock);
+
+ return ret;
}
/**
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 80062e1d3f66..4f5d00aea716 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -224,80 +224,11 @@ static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q)
EXEC_QUEUE_STATE_BANNED));
}
-#ifdef CONFIG_PROVE_LOCKING
-static int alloc_submit_wq(struct xe_guc *guc)
-{
- int i;
-
- for (i = 0; i < NUM_SUBMIT_WQ; ++i) {
- guc->submission_state.submit_wq_pool[i] =
- alloc_ordered_workqueue("submit_wq", 0);
- if (!guc->submission_state.submit_wq_pool[i])
- goto err_free;
- }
-
- return 0;
-
-err_free:
- while (i)
- destroy_workqueue(guc->submission_state.submit_wq_pool[--i]);
-
- return -ENOMEM;
-}
-
-static void free_submit_wq(struct xe_guc *guc)
-{
- int i;
-
- for (i = 0; i < NUM_SUBMIT_WQ; ++i)
- destroy_workqueue(guc->submission_state.submit_wq_pool[i]);
-}
-
-static struct workqueue_struct *get_submit_wq(struct xe_guc *guc)
-{
- int idx = guc->submission_state.submit_wq_idx++ % NUM_SUBMIT_WQ;
-
- return guc->submission_state.submit_wq_pool[idx];
-}
-#else
-static int alloc_submit_wq(struct xe_guc *guc)
-{
- return 0;
-}
-
-static void free_submit_wq(struct xe_guc *guc)
-{
-
-}
-
-static struct workqueue_struct *get_submit_wq(struct xe_guc *guc)
-{
- return NULL;
-}
-#endif
-
-static void xe_guc_submit_fini(struct xe_guc *guc)
-{
- struct xe_device *xe = guc_to_xe(guc);
- struct xe_gt *gt = guc_to_gt(guc);
- int ret;
-
- ret = wait_event_timeout(guc->submission_state.fini_wq,
- xa_empty(&guc->submission_state.exec_queue_lookup),
- HZ * 5);
-
- drain_workqueue(xe->destroy_wq);
-
- xe_gt_assert(gt, ret);
-}
-
static void guc_submit_fini(struct drm_device *drm, void *arg)
{
struct xe_guc *guc = arg;
- xe_guc_submit_fini(guc);
xa_destroy(&guc->submission_state.exec_queue_lookup);
- free_submit_wq(guc);
}
static void guc_submit_wedged_fini(void *arg)
@@ -359,10 +290,6 @@ int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids)
if (err)
return err;
- err = alloc_submit_wq(guc);
- if (err)
- return err;
-
gt->exec_queue_ops = &guc_exec_queue_ops;
xa_init(&guc->submission_state.exec_queue_lookup);
@@ -393,7 +320,6 @@ static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa
static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
{
int ret;
- void *ptr;
int i;
/*
@@ -413,12 +339,10 @@ static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
q->guc->id = ret;
for (i = 0; i < q->width; ++i) {
- ptr = xa_store(&guc->submission_state.exec_queue_lookup,
- q->guc->id + i, q, GFP_NOWAIT);
- if (IS_ERR(ptr)) {
- ret = PTR_ERR(ptr);
+ ret = xa_err(xa_store(&guc->submission_state.exec_queue_lookup,
+ q->guc->id + i, q, GFP_NOWAIT));
+ if (ret)
goto err_release;
- }
}
return 0;
@@ -821,8 +745,6 @@ static void guc_exec_queue_free_job(struct drm_sched_job *drm_job)
{
struct xe_sched_job *job = to_xe_sched_job(drm_job);
- xe_exec_queue_update_run_ticks(job->q);
-
trace_xe_sched_job_free(job);
xe_sched_job_put(job);
}
@@ -992,12 +914,22 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job)
{
struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q));
- u32 ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]);
- u32 ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]);
+ u32 ctx_timestamp, ctx_job_timestamp;
u32 timeout_ms = q->sched_props.job_timeout_ms;
u32 diff;
u64 running_time_ms;
+ if (!xe_sched_job_started(job)) {
+ xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, not started",
+ xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
+ q->guc->id);
+
+ return xe_sched_invalidate_job(job, 2);
+ }
+
+ ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]);
+ ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]);
+
/*
* Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch
* possible overflows with a high timeout.
@@ -1106,10 +1038,13 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
/*
* TDR has fired before free job worker. Common if exec queue
- * immediately closed after last fence signaled.
+ * immediately closed after last fence signaled. Add back to pending
+ * list so job can be freed and kick scheduler ensuring free job is not
+ * lost.
*/
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) {
- guc_exec_queue_free_job(drm_job);
+ xe_sched_add_pending_job(sched, job);
+ xe_sched_submission_start(sched);
return DRM_GPU_SCHED_STAT_NOMINAL;
}
@@ -1122,10 +1057,6 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
exec_queue_killed_or_banned_or_wedged(q) ||
exec_queue_destroyed(q);
- /* Job hasn't started, can't be timed out */
- if (!skip_timeout_check && !xe_sched_job_started(job))
- goto rearm;
-
/*
* XXX: Sampling timeout doesn't work in wedged mode as we have to
* modify scheduling state to read timestamp. We could read the
@@ -1482,8 +1413,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT :
msecs_to_jiffies(q->sched_props.job_timeout_ms);
err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops,
- get_submit_wq(guc),
- q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES, 64,
+ NULL, q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES, 64,
timeout, guc_to_gt(guc)->ordered_wq, NULL,
q->name, gt_to_xe(q->gt)->drm.dev);
if (err)
@@ -1800,8 +1730,13 @@ void xe_guc_submit_stop(struct xe_guc *guc)
mutex_lock(&guc->submission_state.lock);
- xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
+ xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
+ /* Prevent redundant attempts to stop parallel queues */
+ if (q->guc->id != index)
+ continue;
+
guc_exec_queue_stop(guc, q);
+ }
mutex_unlock(&guc->submission_state.lock);
@@ -1839,8 +1774,13 @@ int xe_guc_submit_start(struct xe_guc *guc)
mutex_lock(&guc->submission_state.lock);
atomic_dec(&guc->submission_state.stopped);
- xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
+ xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
+ /* Prevent redundant attempts to start parallel queues */
+ if (q->guc->id != index)
+ continue;
+
guc_exec_queue_start(q);
+ }
mutex_unlock(&guc->submission_state.lock);
wake_up_all(&guc->ct.wq);
diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h
index 69046f698271..ed150fc09ad0 100644
--- a/drivers/gpu/drm/xe/xe_guc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_types.h
@@ -72,13 +72,6 @@ struct xe_guc {
atomic_t stopped;
/** @submission_state.lock: protects submission state */
struct mutex lock;
-#ifdef CONFIG_PROVE_LOCKING
-#define NUM_SUBMIT_WQ 256
- /** @submission_state.submit_wq_pool: submission ordered workqueues pool */
- struct workqueue_struct *submit_wq_pool[NUM_SUBMIT_WQ];
- /** @submission_state.submit_wq_idx: submission ordered workqueue index */
- int submit_wq_idx;
-#endif
/** @submission_state.enabled: submission is enabled */
bool enabled;
/** @submission_state.fini_wq: submit fini wait queue */
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 2804f14f8f29..78823f53d290 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -1206,9 +1206,11 @@ static int xe_oa_release(struct inode *inode, struct file *file)
struct xe_oa_stream *stream = file->private_data;
struct xe_gt *gt = stream->gt;
+ xe_pm_runtime_get(gt_to_xe(gt));
mutex_lock(&gt->oa.gt_lock);
xe_oa_destroy_locked(stream);
mutex_unlock(&gt->oa.gt_lock);
+ xe_pm_runtime_put(gt_to_xe(gt));
/* Release the reference the OA stream kept on the driver */
drm_dev_put(&gt_to_xe(gt)->drm);
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 7cf2160fe040..33eb039053e4 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -123,7 +123,7 @@ int xe_pm_suspend(struct xe_device *xe)
for_each_gt(gt, xe, id)
xe_gt_suspend_prepare(gt);
- xe_display_pm_suspend(xe, false);
+ xe_display_pm_suspend(xe);
/* FIXME: Super racey... */
err = xe_bo_evict_all(xe);
@@ -133,7 +133,7 @@ int xe_pm_suspend(struct xe_device *xe)
for_each_gt(gt, xe, id) {
err = xe_gt_suspend(gt);
if (err) {
- xe_display_pm_resume(xe, false);
+ xe_display_pm_resume(xe);
goto err;
}
}
@@ -187,7 +187,7 @@ int xe_pm_resume(struct xe_device *xe)
for_each_gt(gt, xe, id)
xe_gt_resume(gt);
- xe_display_pm_resume(xe, false);
+ xe_display_pm_resume(xe);
err = xe_bo_restore_user(xe);
if (err)
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 28d9bb3b825d..848da8e68c7a 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -161,7 +161,11 @@ query_engine_cycles(struct xe_device *xe,
cpu_clock);
xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- resp.width = 36;
+
+ if (GRAPHICS_VER(xe) >= 20)
+ resp.width = 64;
+ else
+ resp.width = 36;
/* Only write to the output fields of user query */
if (put_user(resp.cpu_timestamp, &query_ptr->cpu_timestamp))
diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
index bb3c2a830362..2e72c06fd40d 100644
--- a/drivers/gpu/drm/xe/xe_sync.c
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -54,11 +54,12 @@ static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr,
{
struct xe_user_fence *ufence;
u64 __user *ptr = u64_to_user_ptr(addr);
+ u64 __maybe_unused prefetch_val;
- if (!access_ok(ptr, sizeof(*ptr)))
+ if (get_user(prefetch_val, ptr))
return ERR_PTR(-EFAULT);
- ufence = kmalloc(sizeof(*ufence), GFP_KERNEL);
+ ufence = kzalloc(sizeof(*ufence), GFP_KERNEL);
if (!ufence)
return ERR_PTR(-ENOMEM);
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index ce9dca4d4e87..c99380271de6 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -3199,10 +3199,8 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
ret = xe_gt_tlb_invalidation_vma(tile->primary_gt,
&fence[fence_id], vma);
- if (ret < 0) {
- xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]);
+ if (ret)
goto wait;
- }
++fence_id;
if (!tile->media_gt)
@@ -3214,10 +3212,8 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
ret = xe_gt_tlb_invalidation_vma(tile->media_gt,
&fence[fence_id], vma);
- if (ret < 0) {
- xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]);
+ if (ret)
goto wait;
- }
++fence_id;
}
}
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index d424992514a4..353936a0f877 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -710,6 +710,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
DIS_PARTIAL_AUTOSTRIP |
DIS_AUTOSTRIP))
},
+ { XE_RTP_NAME("15016589081"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)),
+ XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX))
+ },
/* Xe2_HPG */
{ XE_RTP_NAME("15010599737"),
diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c
index d46fa8374980..5b4264ea38bd 100644
--- a/drivers/gpu/drm/xe/xe_wait_user_fence.c
+++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c
@@ -155,6 +155,13 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
}
if (!timeout) {
+ LNL_FLUSH_WORKQUEUE(xe->ordered_wq);
+ err = do_compare(addr, args->value, args->mask,
+ args->op);
+ if (err <= 0) {
+ drm_dbg(&xe->drm, "LNL_FLUSH_WORKQUEUE resolved ufence timeout\n");
+ break;
+ }
err = -ETIME;
break;
}
@@ -169,9 +176,6 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
args->timeout = 0;
}
- if (!timeout && !(err < 0))
- err = -ETIME;
-
if (q)
xe_exec_queue_put(q);