summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.mailmap1
-rw-r--r--Documentation/ABI/testing/sysfs-driver-intel-xe-sriov2
-rw-r--r--Documentation/gpu/xe/xe_exec_queue.rst14
-rw-r--r--MAINTAINERS2
-rw-r--r--drivers/gpu/drm/drm_gpusvm.c3
-rw-r--r--drivers/gpu/drm/xe/Makefile2
-rw-r--r--drivers/gpu/drm/xe/abi/guc_actions_abi.h6
-rw-r--r--drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h67
-rw-r--r--drivers/gpu/drm/xe/abi/guc_klvs_abi.h9
-rw-r--r--drivers/gpu/drm/xe/abi/guc_lfd_abi.h171
-rw-r--r--drivers/gpu/drm/xe/abi/guc_lic_abi.h77
-rw-r--r--drivers/gpu/drm/xe/abi/guc_log_abi.h42
-rw-r--r--drivers/gpu/drm/xe/display/xe_fb_pin.c23
-rw-r--r--drivers/gpu/drm/xe/display/xe_hdcp_gsc.c31
-rw-r--r--drivers/gpu/drm/xe/instructions/xe_gpu_commands.h1
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gt_regs.h3
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gtt_defs.h1
-rw-r--r--drivers/gpu/drm/xe/regs/xe_guc_regs.h3
-rw-r--r--drivers/gpu/drm/xe/regs/xe_irq_regs.h1
-rw-r--r--drivers/gpu/drm/xe/regs/xe_mert_regs.h21
-rw-r--r--drivers/gpu/drm/xe/regs/xe_oa_regs.h17
-rw-r--r--drivers/gpu/drm/xe/tests/xe_args_test.c54
-rw-r--r--drivers/gpu/drm/xe/tests/xe_bo.c10
-rw-r--r--drivers/gpu/drm/xe/tests/xe_dma_buf.c3
-rw-r--r--drivers/gpu/drm/xe/tests/xe_migrate.c10
-rw-r--r--drivers/gpu/drm/xe/tests/xe_mocs.c27
-rw-r--r--drivers/gpu/drm/xe/xe_args.h27
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c26
-rw-r--r--drivers/gpu/drm/xe/xe_bo.h1
-rw-r--r--drivers/gpu/drm/xe/xe_debugfs.c143
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump.c30
-rw-r--r--drivers/gpu/drm/xe/xe_device.c100
-rw-r--r--drivers/gpu/drm/xe/xe_device.h5
-rw-r--r--drivers/gpu/drm/xe/xe_device_sysfs.c33
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h27
-rw-r--r--drivers/gpu/drm/xe/xe_drm_client.c67
-rw-r--r--drivers/gpu/drm/xe/xe_exec.c9
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.c455
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.h68
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue_types.h62
-rw-r--r--drivers/gpu/drm/xe/xe_execlist.c2
-rw-r--r--drivers/gpu/drm/xe/xe_force_wake.c7
-rw-r--r--drivers/gpu/drm/xe/xe_force_wake.h40
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt.c3
-rw-r--r--drivers/gpu/drm/xe/xe_gsc.c21
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_debugfs.c3
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_proxy.c17
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c174
-rw-r--r--drivers/gpu/drm/xe/xe_gt.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_debugfs.c30
-rw-r--r--drivers/gpu/drm/xe/xe_gt_freq.c27
-rw-r--r--drivers/gpu/drm/xe/xe_gt_idle.c41
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c19
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c21
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf.c170
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c12
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h13
-rw-r--r--drivers/gpu/drm/xe/xe_gt_stats.c10
-rw-r--r--drivers/gpu/drm/xe/xe_gt_stats.h32
-rw-r--r--drivers/gpu/drm/xe/xe_gt_stats_types.h5
-rw-r--r--drivers/gpu/drm/xe/xe_gt_throttle.c9
-rw-r--r--drivers/gpu/drm/xe/xe_gt_types.h5
-rw-r--r--drivers/gpu/drm/xe/xe_guc.c80
-rw-r--r--drivers/gpu/drm/xe/xe_guc.h23
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ads.c6
-rw-r--r--drivers/gpu/drm/xe/xe_guc_buf.c2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_capture.c16
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.c273
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.h2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_debugfs.c15
-rw-r--r--drivers/gpu/drm/xe/xe_guc_fwif.h10
-rw-r--r--drivers/gpu/drm/xe/xe_guc_klv_thresholds_set_types.h6
-rw-r--r--drivers/gpu/drm/xe/xe_guc_log.c507
-rw-r--r--drivers/gpu/drm/xe/xe_guc_log.h30
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.c66
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c705
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.h9
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit_types.h13
-rw-r--r--drivers/gpu/drm/xe/xe_guc_tlb_inval.c41
-rw-r--r--drivers/gpu/drm/xe/xe_huc.c7
-rw-r--r--drivers/gpu/drm/xe/xe_huc_debugfs.c3
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c16
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine_group.c79
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine_group.h4
-rw-r--r--drivers/gpu/drm/xe/xe_hwmon.c52
-rw-r--r--drivers/gpu/drm/xe/xe_i2c.c2
-rw-r--r--drivers/gpu/drm/xe/xe_irq.c2
-rw-r--r--drivers/gpu/drm/xe/xe_lmtt.c25
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.c73
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.h7
-rw-r--r--drivers/gpu/drm/xe/xe_lrc_types.h3
-rw-r--r--drivers/gpu/drm/xe/xe_mert.c82
-rw-r--r--drivers/gpu/drm/xe/xe_mert.h32
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c57
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.h3
-rw-r--r--drivers/gpu/drm/xe/xe_mocs.c18
-rw-r--r--drivers/gpu/drm/xe/xe_nvm.c34
-rw-r--r--drivers/gpu/drm/xe/xe_oa.c94
-rw-r--r--drivers/gpu/drm/xe/xe_oa_types.h1
-rw-r--r--drivers/gpu/drm/xe/xe_page_reclaim.c136
-rw-r--r--drivers/gpu/drm/xe/xe_page_reclaim.h105
-rw-r--r--drivers/gpu/drm/xe/xe_pagefault.c36
-rw-r--r--drivers/gpu/drm/xe/xe_pat.c220
-rw-r--r--drivers/gpu/drm/xe/xe_pat.h21
-rw-r--r--drivers/gpu/drm/xe/xe_pci.c29
-rw-r--r--drivers/gpu/drm/xe/xe_pci_sriov.c10
-rw-r--r--drivers/gpu/drm/xe/xe_pci_types.h6
-rw-r--r--drivers/gpu/drm/xe/xe_pcode_api.h2
-rw-r--r--drivers/gpu/drm/xe/xe_pm.c10
-rw-r--r--drivers/gpu/drm/xe/xe_pmu.c2
-rw-r--r--drivers/gpu/drm/xe/xe_pt.c135
-rw-r--r--drivers/gpu/drm/xe/xe_pt_types.h5
-rw-r--r--drivers/gpu/drm/xe/xe_pxp.c55
-rw-r--r--drivers/gpu/drm/xe/xe_query.c23
-rw-r--r--drivers/gpu/drm/xe/xe_reg_sr.c17
-rw-r--r--drivers/gpu/drm/xe/xe_reg_whitelist.c81
-rw-r--r--drivers/gpu/drm/xe/xe_ring_ops.c68
-rw-r--r--drivers/gpu/drm/xe/xe_sa.c67
-rw-r--r--drivers/gpu/drm/xe/xe_sa.h20
-rw-r--r--drivers/gpu/drm/xe/xe_sa_types.h3
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_packet.c2
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf.c4
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c6
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c16
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf.c84
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf_ccs.c27
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf_ccs.h1
-rw-r--r--drivers/gpu/drm/xe/xe_survivability_mode.c273
-rw-r--r--drivers/gpu/drm/xe/xe_survivability_mode_types.h28
-rw-r--r--drivers/gpu/drm/xe/xe_svm.c88
-rw-r--r--drivers/gpu/drm/xe/xe_sync.c33
-rw-r--r--drivers/gpu/drm/xe/xe_sync.h2
-rw-r--r--drivers/gpu/drm/xe/xe_tile.c5
-rw-r--r--drivers/gpu/drm/xe/xe_tile_debugfs.c17
-rw-r--r--drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c3
-rw-r--r--drivers/gpu/drm/xe/xe_tlb_inval.c27
-rw-r--r--drivers/gpu/drm/xe/xe_tlb_inval.h2
-rw-r--r--drivers/gpu/drm/xe/xe_tlb_inval_job.c36
-rw-r--r--drivers/gpu/drm/xe/xe_tlb_inval_job.h4
-rw-r--r--drivers/gpu/drm/xe/xe_tlb_inval_types.h5
-rw-r--r--drivers/gpu/drm/xe/xe_trace.h46
-rw-r--r--drivers/gpu/drm/xe/xe_uc.c35
-rw-r--r--drivers/gpu/drm/xe/xe_uc.h2
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw.c10
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c154
-rw-r--r--drivers/gpu/drm/xe/xe_vm.h3
-rw-r--r--drivers/gpu/drm/xe/xe_vm_types.h1
-rw-r--r--drivers/gpu/drm/xe/xe_vram.c6
-rw-r--r--drivers/gpu/drm/xe/xe_wa.c52
-rw-r--r--drivers/gpu/drm/xe/xe_wa_oob.rules6
-rw-r--r--include/uapi/drm/xe_drm.h81
152 files changed, 5329 insertions, 1498 deletions
diff --git a/.mailmap b/.mailmap
index 84309a39d329..44cea28596e7 100644
--- a/.mailmap
+++ b/.mailmap
@@ -481,6 +481,7 @@ Lorenzo Pieralisi <lpieralisi@kernel.org> <lorenzo.pieralisi@arm.com>
Lorenzo Stoakes <lorenzo.stoakes@oracle.com> <lstoakes@gmail.com>
Luca Ceresoli <luca.ceresoli@bootlin.com> <luca@lucaceresoli.net>
Luca Weiss <luca@lucaweiss.eu> <luca@z3ntu.xyz>
+Lucas De Marchi <demarchi@kernel.org> <lucas.demarchi@intel.com>
Lukasz Luba <lukasz.luba@arm.com> <l.luba@partner.samsung.com>
Luo Jie <quic_luoj@quicinc.com> <luoj@codeaurora.org>
Lance Yang <lance.yang@linux.dev> <ioworker0@gmail.com>
diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-sriov b/Documentation/ABI/testing/sysfs-driver-intel-xe-sriov
index 2fd7e9b7bacc..7f5ef9eada53 100644
--- a/Documentation/ABI/testing/sysfs-driver-intel-xe-sriov
+++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-sriov
@@ -119,7 +119,7 @@ Description:
The GT preemption timeout (PT) in [us] to be applied to all functions.
See sriov_admin/{pf,vf<N>}/profile/preempt_timeout_us for more details.
- sched_priority: (RW/RO) string
+ sched_priority: (WO) string
The GT scheduling priority to be applied for all functions.
See sriov_admin/{pf,vf<N>}/profile/sched_priority for more details.
diff --git a/Documentation/gpu/xe/xe_exec_queue.rst b/Documentation/gpu/xe/xe_exec_queue.rst
index 6076569e311c..8707806211c9 100644
--- a/Documentation/gpu/xe/xe_exec_queue.rst
+++ b/Documentation/gpu/xe/xe_exec_queue.rst
@@ -7,6 +7,20 @@ Execution Queue
.. kernel-doc:: drivers/gpu/drm/xe/xe_exec_queue.c
:doc: Execution Queue
+Multi Queue Group
+=================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_exec_queue.c
+ :doc: Multi Queue Group
+
+.. _multi-queue-group-guc-interface:
+
+Multi Queue Group GuC interface
+===============================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_guc_submit.c
+ :doc: Multi Queue Group GuC interface
+
Internal API
============
diff --git a/MAINTAINERS b/MAINTAINERS
index bdbe32ddcedb..b8a5569606d2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12640,7 +12640,7 @@ F: include/drm/intel/
F: include/uapi/drm/i915_drm.h
INTEL DRM XE DRIVER (Lunar Lake and newer)
-M: Lucas De Marchi <lucas.demarchi@intel.com>
+M: Matthew Brost <matthew.brost@intel.com>
M: Thomas Hellström <thomas.hellstrom@linux.intel.com>
M: Rodrigo Vivi <rodrigo.vivi@intel.com>
L: intel-xe@lists.freedesktop.org
diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c
index 73e550c8ff8c..39c8c50401dd 100644
--- a/drivers/gpu/drm/drm_gpusvm.c
+++ b/drivers/gpu/drm/drm_gpusvm.c
@@ -1288,6 +1288,9 @@ int drm_gpusvm_get_pages(struct drm_gpusvm *gpusvm,
DMA_BIDIRECTIONAL;
retry:
+ if (time_after(jiffies, timeout))
+ return -EBUSY;
+
hmm_range.notifier_seq = mmu_interval_read_begin(notifier);
if (drm_gpusvm_pages_valid_unlocked(gpusvm, svm_pages))
goto set_seqno;
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 35f71dbd1bac..7f08b4cd91d6 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -95,6 +95,7 @@ xe-y += xe_bb.o \
xe_oa.o \
xe_observation.o \
xe_pagefault.o \
+ xe_page_reclaim.o \
xe_pat.o \
xe_pci.o \
xe_pcode.o \
@@ -173,6 +174,7 @@ xe-$(CONFIG_PCI_IOV) += \
xe_lmtt.o \
xe_lmtt_2l.o \
xe_lmtt_ml.o \
+ xe_mert.o \
xe_pci_sriov.o \
xe_sriov_packet.o \
xe_sriov_pf.o \
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
index 47756e4674a1..83a6e7794982 100644
--- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
@@ -139,6 +139,10 @@ enum xe_guc_action {
XE_GUC_ACTION_DEREGISTER_G2G = 0x4508,
XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600,
XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
+ XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE = 0x4602,
+ XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC = 0x4603,
+ XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE = 0x4604,
+ XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CGP_CONTEXT_ERROR = 0x4605,
XE_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507,
XE_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A,
XE_GUC_ACTION_SET_DEVICE_ENGINE_ACTIVITY_BUFFER = 0x550C,
@@ -151,6 +155,8 @@ enum xe_guc_action {
XE_GUC_ACTION_TLB_INVALIDATION = 0x7000,
XE_GUC_ACTION_TLB_INVALIDATION_DONE = 0x7001,
XE_GUC_ACTION_TLB_INVALIDATION_ALL = 0x7002,
+ XE_GUC_ACTION_PAGE_RECLAMATION = 0x7003,
+ XE_GUC_ACTION_PAGE_RECLAMATION_DONE = 0x7004,
XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION = 0x8002,
XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE = 0x8003,
XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED = 0x8004,
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
index 0b28659d94e9..d9f21202e1a9 100644
--- a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
@@ -502,13 +502,17 @@
#define VF2GUC_VF_RESET_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0
/**
- * DOC: VF2GUC_NOTIFY_RESFIX_DONE
+ * DOC: VF2GUC_RESFIX_DONE
*
- * This action is used by VF to notify the GuC that the VF KMD has completed
- * post-migration recovery steps.
+ * This action is used by VF to inform the GuC that the VF KMD has completed
+ * post-migration recovery steps. From GuC VF compatibility 1.27.0 onwards, it
+ * shall only be sent after posting RESFIX_START and that both @MARKER fields
+ * must match.
*
* This message must be sent as `MMIO HXG Message`_.
*
+ * Updated since GuC VF compatibility 1.27.0.
+ *
* +---+-------+--------------------------------------------------------------+
* | | Bits | Description |
* +===+=======+==============================================================+
@@ -516,9 +520,11 @@
* | +-------+--------------------------------------------------------------+
* | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ |
* | +-------+--------------------------------------------------------------+
- * | | 27:16 | DATA0 = MBZ |
+ * | | 27:16 | DATA0 = MARKER = MBZ (only prior 1.27.0) |
* | +-------+--------------------------------------------------------------+
- * | | 15:0 | ACTION = _`GUC_ACTION_VF2GUC_NOTIFY_RESFIX_DONE` = 0x5508 |
+ * | | 27:16 | DATA0 = MARKER - can't be zero (1.27.0+) |
+ * | +-------+--------------------------------------------------------------+
+ * | | 15:0 | ACTION = _`GUC_ACTION_VF2GUC_RESFIX_DONE` = 0x5508 |
* +---+-------+--------------------------------------------------------------+
*
* +---+-------+--------------------------------------------------------------+
@@ -531,13 +537,13 @@
* | | 27:0 | DATA0 = MBZ |
* +---+-------+--------------------------------------------------------------+
*/
-#define GUC_ACTION_VF2GUC_NOTIFY_RESFIX_DONE 0x5508u
+#define GUC_ACTION_VF2GUC_RESFIX_DONE 0x5508u
-#define VF2GUC_NOTIFY_RESFIX_DONE_REQUEST_MSG_LEN GUC_HXG_REQUEST_MSG_MIN_LEN
-#define VF2GUC_NOTIFY_RESFIX_DONE_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0
+#define VF2GUC_RESFIX_DONE_REQUEST_MSG_LEN GUC_HXG_REQUEST_MSG_MIN_LEN
+#define VF2GUC_RESFIX_DONE_REQUEST_MSG_0_MARKER GUC_HXG_REQUEST_MSG_0_DATA0
-#define VF2GUC_NOTIFY_RESFIX_DONE_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN
-#define VF2GUC_NOTIFY_RESFIX_DONE_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0
+#define VF2GUC_RESFIX_DONE_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN
+#define VF2GUC_RESFIX_DONE_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0
/**
* DOC: VF2GUC_QUERY_SINGLE_KLV
@@ -656,4 +662,45 @@
#define PF2GUC_SAVE_RESTORE_VF_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN
#define PF2GUC_SAVE_RESTORE_VF_RESPONSE_MSG_0_USED GUC_HXG_RESPONSE_MSG_0_DATA0
+/**
+ * DOC: VF2GUC_RESFIX_START
+ *
+ * This action is used by VF to inform the GuC that the VF KMD will be starting
+ * post-migration recovery fixups. The @MARKER sent with this action must match
+ * with the MARKER posted in the VF2GUC_RESFIX_DONE message.
+ *
+ * This message must be sent as `MMIO HXG Message`_.
+ *
+ * Available since GuC VF compatibility 1.27.0.
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:16 | DATA0 = MARKER - can't be zero |
+ * | +-------+--------------------------------------------------------------+
+ * | | 15:0 | ACTION = _`GUC_ACTION_VF2GUC_RESFIX_START` = 0x550F |
+ * +---+-------+--------------------------------------------------------------+
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:0 | DATA0 = MBZ |
+ * +---+-------+--------------------------------------------------------------+
+ */
+#define GUC_ACTION_VF2GUC_RESFIX_START 0x550Fu
+
+#define VF2GUC_RESFIX_START_REQUEST_MSG_LEN GUC_HXG_REQUEST_MSG_MIN_LEN
+#define VF2GUC_RESFIX_START_REQUEST_MSG_0_MARKER GUC_HXG_REQUEST_MSG_0_DATA0
+
+#define VF2GUC_RESFIX_START_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN
+#define VF2GUC_RESFIX_START_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0
+
#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
index 265a135e7061..89a4f8c504e6 100644
--- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
@@ -352,6 +352,12 @@ enum {
* :1: NORMAL = schedule VF always, irrespective of whether it has work or not
* :2: HIGH = schedule VF in the next time-slice after current active
* time-slice completes if it has active work
+ *
+ * _`GUC_KLV_VF_CFG_THRESHOLD_MULTI_LRC_COUNT` : 0x8A0D
+ * Given that multi-LRC contexts are incompatible with SRIOV scheduler
+ * groups and cause the latter to be turned off when registered with the
+ * GuC, this config allows the PF to set a threshold for multi-LRC context
+ * registrations by VFs to monitor their behavior.
*/
#define GUC_KLV_VF_CFG_GGTT_START_KEY 0x0001
@@ -410,6 +416,9 @@ enum {
#define GUC_SCHED_PRIORITY_NORMAL 1u
#define GUC_SCHED_PRIORITY_HIGH 2u
+#define GUC_KLV_VF_CFG_THRESHOLD_MULTI_LRC_COUNT_KEY 0x8a0d
+#define GUC_KLV_VF_CFG_THRESHOLD_MULTI_LRC_COUNT_LEN 1u
+
/*
* Workaround keys:
*/
diff --git a/drivers/gpu/drm/xe/abi/guc_lfd_abi.h b/drivers/gpu/drm/xe/abi/guc_lfd_abi.h
new file mode 100644
index 000000000000..b6ed20d5b508
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_lfd_abi.h
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_LFD_ABI_H_
+#define _ABI_GUC_LFD_ABI_H_
+
+#include <linux/types.h>
+
+#include "guc_lic_abi.h"
+
+/* The current major version of GuC-Log-File format. */
+#define GUC_LFD_FORMAT_VERSION_MAJOR 0x0001
+/* The current minor version of GuC-Log-File format. */
+#define GUC_LFD_FORMAT_VERSION_MINOR 0x0000
+
+/** enum guc_lfd_type - Log format descriptor type */
+enum guc_lfd_type {
+ /**
+ * @GUC_LFD_TYPE_FW_REQUIRED_RANGE_START: Start of range for
+ * required LFDs from GuC
+ * @GUC_LFD_TYPE_FW_VERSION: GuC Firmware Version structure.
+ * @GUC_LFD_TYPE_GUC_DEVICE_ID: GuC microcontroller device ID.
+ * @GUC_LFD_TYPE_TSC_FREQUENCY: Frequency of GuC timestamps.
+ * @GUC_LFD_TYPE_GMD_ID: HW GMD ID.
+ * @GUC_LFD_TYPE_BUILD_PLATFORM_ID: GuC build platform ID.
+ * @GUC_LFD_TYPE_FW_REQUIRED_RANGE_END: End of range for
+ * required LFDs from GuC
+ */
+ GUC_LFD_TYPE_FW_REQUIRED_RANGE_START = 0x1,
+ GUC_LFD_TYPE_FW_VERSION = 0x1,
+ GUC_LFD_TYPE_GUC_DEVICE_ID = 0x2,
+ GUC_LFD_TYPE_TSC_FREQUENCY = 0x3,
+ GUC_LFD_TYPE_GMD_ID = 0x4,
+ GUC_LFD_TYPE_BUILD_PLATFORM_ID = 0x5,
+ GUC_LFD_TYPE_FW_REQUIRED_RANGE_END = 0x1FFF,
+
+ /**
+ * @GUC_LFD_TYPE_FW_OPTIONAL_RANGE_START: Start of range for
+ * optional LFDs from GuC
+ * @GUC_LFD_TYPE_LOG_EVENTS_BUFFER: Log-event-entries buffer.
+ * @GUC_LFD_TYPE_FW_CRASH_DUMP: GuC generated crash-dump blob.
+ * @GUC_LFD_TYPE_FW_OPTIONAL_RANGE_END: End of range for
+ * optional LFDs from GuC
+ */
+ GUC_LFD_TYPE_FW_OPTIONAL_RANGE_START = 0x2000,
+ GUC_LFD_TYPE_LOG_EVENTS_BUFFER = 0x2000,
+ GUC_LFD_TYPE_FW_CRASH_DUMP = 0x2001,
+ GUC_LFD_TYPE_FW_OPTIONAL_RANGE_END = 0x3FFF,
+
+ /**
+ * @GUC_LFD_TYPE_KMD_REQUIRED_RANGE_START: Start of range for
+ * required KMD LFDs
+ * @GUC_LFD_TYPE_OS_ID: An identifier for the OS.
+ * @GUC_LFD_TYPE_KMD_REQUIRED_RANGE_END: End of this range for
+ * required KMD LFDs
+ */
+ GUC_LFD_TYPE_KMD_REQUIRED_RANGE_START = 0x4000,
+ GUC_LFD_TYPE_OS_ID = 0x4000,
+ GUC_LFD_TYPE_KMD_REQUIRED_RANGE_END = 0x5FFF,
+
+ /**
+ * @GUC_LFD_TYPE_KMD_OPTIONAL_RANGE_START: Start of range for
+ * optional KMD LFDs
+ * @GUC_LFD_TYPE_BINARY_SCHEMA_FORMAT: Binary representation of
+ * GuC log-events schema.
+ * @GUC_LFD_TYPE_HOST_COMMENT: ASCII string containing comments
+ * from the host/KMD.
+ * @GUC_LFD_TYPE_TIMESTAMP_ANCHOR: A timestamp anchor, to convert
+ * between host and GuC timestamp.
+ * @GUC_LFD_TYPE_TIMESTAMP_ANCHOR_CONFIG: Timestamp anchor
+ * configuration, definition of timestamp frequency and bit width.
+ * @GUC_LFD_TYPE_KMD_OPTIONAL_RANGE_END: End of this range for
+ * optional KMD LFDs
+ */
+ GUC_LFD_TYPE_KMD_OPTIONAL_RANGE_START = 0x6000,
+ GUC_LFD_TYPE_BINARY_SCHEMA_FORMAT = 0x6000,
+ GUC_LFD_TYPE_HOST_COMMENT = 0x6001,
+ GUC_LFD_TYPE_TIMESTAMP_ANCHOR = 0x6002,
+ GUC_LFD_TYPE_TIMESTAMP_ANCHOR_CONFIG = 0x6003,
+ GUC_LFD_TYPE_KMD_OPTIONAL_RANGE_END = 0x7FFF,
+
+ /*
+ * @GUC_LFD_TYPE_RESERVED_RANGE_START: Start of reserved range
+ * @GUC_LFD_TYPE_RESERVED_RANGE_END: End of reserved range
+ */
+ GUC_LFD_TYPE_RESERVED_RANGE_START = 0x8000,
+ GUC_LFD_TYPE_RESERVED_RANGE_END = 0xFFFF,
+};
+
+/** enum guc_lfd_os_type - OS Type LFD-ID */
+enum guc_lfd_os_type {
+ /** @GUC_LFD_OS_TYPE_OSID_WIN: Windows OS */
+ GUC_LFD_OS_TYPE_OSID_WIN = 0x1,
+ /** @GUC_LFD_OS_TYPE_OSID_LIN: Linux OS */
+ GUC_LFD_OS_TYPE_OSID_LIN = 0x2,
+ /** @GUC_LFD_OS_TYPE_OSID_VMW: VMWare OS */
+ GUC_LFD_OS_TYPE_OSID_VMW = 0x3,
+ /** @GUC_LFD_OS_TYPE_OSID_OTHER: Other */
+ GUC_LFD_OS_TYPE_OSID_OTHER = 0x4,
+};
+
+/** struct guc_lfd_data - A generic header structure for all LFD blocks */
+struct guc_lfd_data {
+ /** @header: A 32 bits dword, contains multiple bit fields */
+ u32 header;
+ /* LFD type. See guc_lfd_type */
+#define GUC_LFD_DATA_HEADER_MASK_TYPE GENMASK(31, 16)
+#define GUC_LFD_DATA_HEADER_MASK_MAGIC GENMASK(15, 0)
+
+ /** @data_count: Number of dwords the `data` field contains. */
+ u32 data_count;
+ /** @data: Data defined by GUC_LFD_DATA_HEADER_MASK_TYPE */
+ u32 data[] __counted_by(data_count);
+} __packed;
+
+/**
+ * struct guc_lfd_data_log_events_buf - GuC Log Events Buffer.
+ * This is optional fw LFD data
+ */
+struct guc_lfd_data_log_events_buf {
+ /**
+ * @log_events_format_version: version of GuC log format of buffer
+ */
+ u32 log_events_format_version;
+ /**
+ * @log_event: The log event data.
+ * Size in dwords is LFD block size - 1.
+ */
+ u32 log_event[];
+} __packed;
+
+/** struct guc_lfd_data_os_info - OS Version Information. */
+struct guc_lfd_data_os_info {
+ /**
+ * @os_id: enum values to identify the OS brand.
+ * See guc_lfd_os_type for the range of types
+ */
+ u32 os_id;
+ /**
+ * @build_version: ASCII string containing OS build version
+ * information based on os_id. String is padded with null
+ * characters to ensure its DWORD aligned.
+ * Size in dwords is LFD block size - 1.
+ */
+ char build_version[];
+} __packed;
+
+/**
+ * struct guc_logfile_header - Header of GuC Log Streaming-LFD-File Format.
+ * This structure encapsulates the layout of the guc-log-file format
+ */
+struct guc_lfd_file_header {
+ /**
+ * @magic: A magic number set by producer of a GuC log file to
+ * identify that file is a valid guc-log-file containing a stream
+ * of LFDs.
+ */
+ u64 magic;
+ /** @version: Version of this file format layout */
+ u32 version;
+#define GUC_LFD_FILE_HEADER_VERSION_MASK_MAJOR GENMASK(31, 16)
+#define GUC_LFD_FILE_HEADER_VERSION_MASK_MINOR GENMASK(15, 0)
+
+ /** @stream: A stream of one or more guc_lfd_data LFD blocks
+ */
+ u32 stream[];
+} __packed;
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_lic_abi.h b/drivers/gpu/drm/xe/abi/guc_lic_abi.h
new file mode 100644
index 000000000000..9169644093a2
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_lic_abi.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_LIC_ABI_H_
+#define _ABI_GUC_LIC_ABI_H_
+
+#include <linux/types.h>
+
+/**
+ * enum guc_lic_type - Log Init Config KLV IDs.
+ */
+enum guc_lic_type {
+ /**
+ * @GUC_LIC_TYPE_GUC_SW_VERSION: GuC firmware version. Value
+ * is a 32 bit number represented by guc_sw_version.
+ */
+ GUC_LIC_TYPE_GUC_SW_VERSION = 0x1,
+ /**
+ * @GUC_LIC_TYPE_GUC_DEVICE_ID: GuC device id. Value is a 32
+ * bit.
+ */
+ GUC_LIC_TYPE_GUC_DEVICE_ID = 0x2,
+ /**
+ * @GUC_LIC_TYPE_TSC_FREQUENCY: GuC timestamp counter
+ * frequency. Value is a 32 bit number representing frequency in
+ * kHz. This timestamp is utilized in log entries, timer and
+ * for engine utilization tracking.
+ */
+ GUC_LIC_TYPE_TSC_FREQUENCY = 0x3,
+ /**
+ * @GUC_LIC_TYPE_GMD_ID: HW GMD ID. Value is a 32 bit number
+ * representing graphics, media and display HW architecture IDs.
+ */
+ GUC_LIC_TYPE_GMD_ID = 0x4,
+ /**
+ * @GUC_LIC_TYPE_BUILD_PLATFORM_ID: GuC build platform ID.
+ * Value is 32 bits.
+ */
+ GUC_LIC_TYPE_BUILD_PLATFORM_ID = 0x5,
+};
+
+/**
+ * struct guc_lic - GuC LIC (Log-Init-Config) structure.
+ *
+ * This is populated by the GUC at log init time and is located in the log
+ * buffer memory allocation.
+ */
+struct guc_lic {
+ /**
+ * @magic: A magic number set by GuC to identify that this
+ * structure contains valid information: magic = GUC_LIC_MAGIC.
+ */
+ u32 magic;
+#define GUC_LIC_MAGIC 0x8086900D
+ /**
+ * @version: The version of the this structure.
+ * Major and minor version number are represented as bit fields.
+ */
+ u32 version;
+#define GUC_LIC_VERSION_MASK_MAJOR GENMASK(31, 16)
+#define GUC_LIC_VERSION_MASK_MINOR GENMASK(15, 0)
+
+#define GUC_LIC_VERSION_MAJOR 1u
+#define GUC_LIC_VERSION_MINOR 0u
+
+ /** @data_count: Number of dwords the `data` array contains. */
+ u32 data_count;
+ /**
+ * @data: Array of dwords representing a list of LIC KLVs of
+ * type guc_klv_generic with keys represented by guc_lic_type
+ */
+ u32 data[] __counted_by(data_count);
+} __packed;
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_log_abi.h b/drivers/gpu/drm/xe/abi/guc_log_abi.h
index 554630b7ccd9..fbf212d59a40 100644
--- a/drivers/gpu/drm/xe/abi/guc_log_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_log_abi.h
@@ -8,11 +8,45 @@
#include <linux/types.h>
+/**
+ * DOC: GuC Log buffer Layout
+ *
+ * The in-memory log buffer layout is as follows::
+ *
+ * +===============================+ 0000h
+ * | Crash dump state header | ^
+ * +-------------------------------+ 32B |
+ * | Debug state header | |
+ * +-------------------------------+ 64B 4KB
+ * | Capture state header | |
+ * +-------------------------------+ 96B |
+ * | | v
+ * +===============================+ <--- EVENT_DATA_OFFSET
+ * | Event logs(raw data) | ^
+ * | | |
+ * | | EVENT_DATA_BUFFER_SIZE
+ * | | |
+ * | | v
+ * +===============================+ <--- CRASH_DUMP_OFFSET
+ * | Crash Dump(raw data) | ^
+ * | | |
+ * | | CRASH_DUMP_BUFFER_SIZE
+ * | | |
+ * | | v
+ * +===============================+ <--- STATE_CAPTURE_OFFSET
+ * | Error state capture(raw data) | ^
+ * | | |
+ * | | STATE_CAPTURE_BUFFER_SIZE
+ * | | |
+ * | | v
+ * +===============================+ Total: GUC_LOG_SIZE
+ */
+
/* GuC logging buffer types */
-enum guc_log_buffer_type {
- GUC_LOG_BUFFER_CRASH_DUMP,
- GUC_LOG_BUFFER_DEBUG,
- GUC_LOG_BUFFER_CAPTURE,
+enum guc_log_type {
+ GUC_LOG_TYPE_EVENT_DATA,
+ GUC_LOG_TYPE_CRASH_DUMP,
+ GUC_LOG_TYPE_STATE_CAPTURE,
};
#define GUC_LOG_BUFFER_TYPE_MAX 3
diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c
index 1fd4a815e784..6a935a75f2a4 100644
--- a/drivers/gpu/drm/xe/display/xe_fb_pin.c
+++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c
@@ -210,10 +210,11 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb,
/* TODO: Consider sharing framebuffer mapping?
* embed i915_vma inside intel_framebuffer
*/
- xe_pm_runtime_get_noresume(xe);
- ret = mutex_lock_interruptible(&ggtt->lock);
+ guard(xe_pm_runtime_noresume)(xe);
+ ACQUIRE(mutex_intr, lock)(&ggtt->lock);
+ ret = ACQUIRE_ERR(mutex_intr, &lock);
if (ret)
- goto out;
+ return ret;
align = XE_PAGE_SIZE;
if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K)
@@ -223,15 +224,13 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb,
vma->node = bo->ggtt_node[tile0->id];
} else if (view->type == I915_GTT_VIEW_NORMAL) {
vma->node = xe_ggtt_node_init(ggtt);
- if (IS_ERR(vma->node)) {
- ret = PTR_ERR(vma->node);
- goto out_unlock;
- }
+ if (IS_ERR(vma->node))
+ return PTR_ERR(vma->node);
ret = xe_ggtt_node_insert_locked(vma->node, xe_bo_size(bo), align, 0);
if (ret) {
xe_ggtt_node_fini(vma->node);
- goto out_unlock;
+ return ret;
}
xe_ggtt_map_bo(ggtt, vma->node, bo, xe->pat.idx[XE_CACHE_NONE]);
@@ -245,13 +244,13 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb,
vma->node = xe_ggtt_node_init(ggtt);
if (IS_ERR(vma->node)) {
ret = PTR_ERR(vma->node);
- goto out_unlock;
+ return ret;
}
ret = xe_ggtt_node_insert_locked(vma->node, size, align, 0);
if (ret) {
xe_ggtt_node_fini(vma->node);
- goto out_unlock;
+ return ret;
}
ggtt_ofs = vma->node->base.start;
@@ -265,10 +264,6 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb,
rot_info->plane[i].dst_stride);
}
-out_unlock:
- mutex_unlock(&ggtt->lock);
-out:
- xe_pm_runtime_put(xe);
return ret;
}
diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
index 4e5ccd50f69d..07acae121aa7 100644
--- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
+++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
@@ -38,8 +38,6 @@ static bool intel_hdcp_gsc_check_status(struct drm_device *drm)
struct xe_tile *tile = xe_device_get_root_tile(xe);
struct xe_gt *gt = tile->media_gt;
struct xe_gsc *gsc = &gt->uc.gsc;
- bool ret = true;
- unsigned int fw_ref;
if (!gsc || !xe_uc_fw_is_enabled(&gsc->fw)) {
drm_dbg_kms(&xe->drm,
@@ -47,22 +45,15 @@ static bool intel_hdcp_gsc_check_status(struct drm_device *drm)
return false;
}
- xe_pm_runtime_get(xe);
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
- if (!fw_ref) {
+ guard(xe_pm_runtime)(xe);
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GSC);
+ if (!fw_ref.domains) {
drm_dbg_kms(&xe->drm,
"failed to get forcewake to check proxy status\n");
- ret = false;
- goto out;
+ return false;
}
- if (!xe_gsc_proxy_init_done(gsc))
- ret = false;
-
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
-out:
- xe_pm_runtime_put(xe);
- return ret;
+ return xe_gsc_proxy_init_done(gsc);
}
/*This function helps allocate memory for the command that we will send to gsc cs */
@@ -168,17 +159,15 @@ static ssize_t intel_hdcp_gsc_msg_send(struct intel_hdcp_gsc_context *gsc_contex
u32 addr_out_off, addr_in_wr_off = 0;
int ret, tries = 0;
- if (msg_in_len > max_msg_size || msg_out_len > max_msg_size) {
- ret = -ENOSPC;
- goto out;
- }
+ if (msg_in_len > max_msg_size || msg_out_len > max_msg_size)
+ return -ENOSPC;
msg_size_in = msg_in_len + HDCP_GSC_HEADER_SIZE;
msg_size_out = msg_out_len + HDCP_GSC_HEADER_SIZE;
addr_out_off = PAGE_SIZE;
host_session_id = xe_gsc_create_host_session_id();
- xe_pm_runtime_get_noresume(xe);
+ guard(xe_pm_runtime_noresume)(xe);
addr_in_wr_off = xe_gsc_emit_header(xe, &gsc_context->hdcp_bo->vmap,
addr_in_wr_off, HECI_MEADDRESS_HDCP,
host_session_id, msg_in_len);
@@ -203,14 +192,12 @@ static ssize_t intel_hdcp_gsc_msg_send(struct intel_hdcp_gsc_context *gsc_contex
} while (++tries < 20);
if (ret)
- goto out;
+ return ret;
xe_map_memcpy_from(xe, msg_out, &gsc_context->hdcp_bo->vmap,
addr_out_off + HDCP_GSC_HEADER_SIZE,
msg_out_len);
-out:
- xe_pm_runtime_put(xe);
return ret;
}
diff --git a/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h b/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h
index 5d41ca297447..885fcf211e6d 100644
--- a/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h
+++ b/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h
@@ -47,6 +47,7 @@
#define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
+#define PIPE_CONTROL0_QUEUE_DRAIN_MODE BIT(12)
#define PIPE_CONTROL0_L3_READ_ONLY_CACHE_INVALIDATE BIT(10) /* gen12 */
#define PIPE_CONTROL0_HDC_PIPELINE_FLUSH BIT(9) /* gen12 */
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 917a088c28f2..93643da57428 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -227,6 +227,9 @@
#define MIRROR_FUSE1 XE_REG(0x911c)
+#define FUSE2 XE_REG(0x9120)
+#define PRODUCTION_HW REG_BIT(2)
+
#define MIRROR_L3BANK_ENABLE XE_REG(0x9130)
#define XE3_L3BANK_ENABLE REG_GENMASK(31, 0)
diff --git a/drivers/gpu/drm/xe/regs/xe_gtt_defs.h b/drivers/gpu/drm/xe/regs/xe_gtt_defs.h
index 4389e5a76f89..4d83461e538b 100644
--- a/drivers/gpu/drm/xe/regs/xe_gtt_defs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gtt_defs.h
@@ -9,6 +9,7 @@
#define XELPG_GGTT_PTE_PAT0 BIT_ULL(52)
#define XELPG_GGTT_PTE_PAT1 BIT_ULL(53)
+#define XE_PTE_ADDR_MASK GENMASK_ULL(51, 12)
#define GGTT_PTE_VFID GENMASK_ULL(11, 2)
#define GUC_GGTT_TOP 0xFEE00000
diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
index 2118f7dec287..87984713dd12 100644
--- a/drivers/gpu/drm/xe/regs/xe_guc_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
@@ -90,6 +90,9 @@
#define GUC_SEND_INTERRUPT XE_REG(0xc4c8)
#define GUC_SEND_TRIGGER REG_BIT(0)
+#define GUC_INTR_CHICKEN XE_REG(0xc50c)
+#define DISABLE_SIGNALING_ENGINES REG_BIT(1)
+
#define GUC_BCS_RCS_IER XE_REG(0xc550)
#define GUC_VCS2_VCS1_IER XE_REG(0xc554)
#define GUC_WD_VECS_IER XE_REG(0xc558)
diff --git a/drivers/gpu/drm/xe/regs/xe_irq_regs.h b/drivers/gpu/drm/xe/regs/xe_irq_regs.h
index 2f97662d958d..9d74f454d3ff 100644
--- a/drivers/gpu/drm/xe/regs/xe_irq_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_irq_regs.h
@@ -20,6 +20,7 @@
#define GU_MISC_IRQ REG_BIT(29)
#define ERROR_IRQ(x) REG_BIT(26 + (x))
#define DISPLAY_IRQ REG_BIT(16)
+#define SOC_H2DMEMINT_IRQ REG_BIT(13)
#define I2C_IRQ REG_BIT(12)
#define GT_DW_IRQ(x) REG_BIT(x)
diff --git a/drivers/gpu/drm/xe/regs/xe_mert_regs.h b/drivers/gpu/drm/xe/regs/xe_mert_regs.h
new file mode 100644
index 000000000000..c345e11ceea8
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_mert_regs.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_MERT_REGS_H_
+#define _XE_MERT_REGS_H_
+
+#include "regs/xe_reg_defs.h"
+
+#define MERT_LMEM_CFG XE_REG(0x1448b0)
+
+#define MERT_TLB_CT_INTR_ERR_ID_PORT XE_REG(0x145190)
+#define MERT_TLB_CT_VFID_MASK REG_GENMASK(16, 9)
+#define MERT_TLB_CT_ERROR_MASK REG_GENMASK(5, 0)
+#define MERT_TLB_CT_LMTT_FAULT 0x05
+
+#define MERT_TLB_INV_DESC_A XE_REG(0x14cf7c)
+#define MERT_TLB_INV_DESC_A_VALID REG_BIT(0)
+
+#endif /* _XE_MERT_REGS_H_ */
diff --git a/drivers/gpu/drm/xe/regs/xe_oa_regs.h b/drivers/gpu/drm/xe/regs/xe_oa_regs.h
index e693a50706f8..04a729e610aa 100644
--- a/drivers/gpu/drm/xe/regs/xe_oa_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_oa_regs.h
@@ -100,4 +100,21 @@
#define OAM_COMPRESSION_T3_CONTROL XE_REG(0x1c2e00)
#define OAM_LAT_MEASURE_ENABLE REG_BIT(4)
+/* Actual address is MEDIA_GT_GSI_OFFSET + the base addr below */
+#define XE_OAM_SAG_BASE 0x13000
+#define XE_OAM_SCMI_0_BASE 0x14000
+#define XE_OAM_SCMI_1_BASE 0x14800
+#define XE_OAM_SAG_BASE_ADJ (MEDIA_GT_GSI_OFFSET + XE_OAM_SAG_BASE)
+#define XE_OAM_SCMI_0_BASE_ADJ (MEDIA_GT_GSI_OFFSET + XE_OAM_SCMI_0_BASE)
+#define XE_OAM_SCMI_1_BASE_ADJ (MEDIA_GT_GSI_OFFSET + XE_OAM_SCMI_1_BASE)
+
+#define OAMERT_CONTROL XE_REG(0x1453a0)
+#define OAMERT_DEBUG XE_REG(0x1453a4)
+#define OAMERT_STATUS XE_REG(0x1453a8)
+#define OAMERT_HEAD_POINTER XE_REG(0x1453ac)
+#define OAMERT_TAIL_POINTER XE_REG(0x1453b0)
+#define OAMERT_BUFFER XE_REG(0x1453b4)
+#define OAMERT_CONTEXT_CONTROL XE_REG(0x1453c8)
+#define OAMERT_MMIO_TRG XE_REG(0x1453cc)
+
#endif
diff --git a/drivers/gpu/drm/xe/tests/xe_args_test.c b/drivers/gpu/drm/xe/tests/xe_args_test.c
index f3fb23aa5d2e..2687a1b054dd 100644
--- a/drivers/gpu/drm/xe/tests/xe_args_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_args_test.c
@@ -78,6 +78,24 @@ static void pick_arg_example(struct kunit *test)
#undef buz
}
+static void if_args_example(struct kunit *test)
+{
+ enum { Z = 1, Q };
+
+#define foo X, Y
+#define bar IF_ARGS(Z, Q, foo)
+#define buz IF_ARGS(Z, Q, DROP_FIRST_ARG(FIRST_ARG(foo)))
+
+ KUNIT_EXPECT_EQ(test, bar, Z);
+ KUNIT_EXPECT_EQ(test, buz, Q);
+ KUNIT_EXPECT_STREQ(test, __stringify(bar), "Z");
+ KUNIT_EXPECT_STREQ(test, __stringify(buz), "Q");
+
+#undef foo
+#undef bar
+#undef buz
+}
+
static void sep_comma_example(struct kunit *test)
{
#define foo(f) f(X) f(Y) f(Z) f(Q)
@@ -198,6 +216,40 @@ static void last_arg_test(struct kunit *test)
KUNIT_EXPECT_STREQ(test, __stringify(LAST_ARG(MAX_ARGS)), "-12");
}
+static void if_args_test(struct kunit *test)
+{
+ bool with_args = true;
+ bool no_args = false;
+ enum { X = 100 };
+
+ KUNIT_EXPECT_TRUE(test, IF_ARGS(true, false, FOO_ARGS));
+ KUNIT_EXPECT_FALSE(test, IF_ARGS(true, false, NO_ARGS));
+
+ KUNIT_EXPECT_TRUE(test, CONCATENATE(IF_ARGS(with, no, FOO_ARGS), _args));
+ KUNIT_EXPECT_FALSE(test, CONCATENATE(IF_ARGS(with, no, NO_ARGS), _args));
+
+ KUNIT_EXPECT_STREQ(test, __stringify(IF_ARGS(yes, no, FOO_ARGS)), "yes");
+ KUNIT_EXPECT_STREQ(test, __stringify(IF_ARGS(yes, no, NO_ARGS)), "no");
+
+ KUNIT_EXPECT_EQ(test, IF_ARGS(CALL_ARGS(COUNT_ARGS, FOO_ARGS), -1, FOO_ARGS), 4);
+ KUNIT_EXPECT_EQ(test, IF_ARGS(CALL_ARGS(COUNT_ARGS, FOO_ARGS), -1, NO_ARGS), -1);
+ KUNIT_EXPECT_EQ(test, IF_ARGS(CALL_ARGS(COUNT_ARGS, NO_ARGS), -1, FOO_ARGS), 0);
+ KUNIT_EXPECT_EQ(test, IF_ARGS(CALL_ARGS(COUNT_ARGS, NO_ARGS), -1, NO_ARGS), -1);
+
+ KUNIT_EXPECT_EQ(test,
+ CALL_ARGS(FIRST_ARG,
+ CALL_ARGS(CONCATENATE, IF_ARGS(FOO, MAX, FOO_ARGS), _ARGS)), X);
+ KUNIT_EXPECT_EQ(test,
+ CALL_ARGS(FIRST_ARG,
+ CALL_ARGS(CONCATENATE, IF_ARGS(FOO, MAX, NO_ARGS), _ARGS)), -1);
+ KUNIT_EXPECT_EQ(test,
+ CALL_ARGS(COUNT_ARGS,
+ CALL_ARGS(CONCATENATE, IF_ARGS(FOO, MAX, FOO_ARGS), _ARGS)), 4);
+ KUNIT_EXPECT_EQ(test,
+ CALL_ARGS(COUNT_ARGS,
+ CALL_ARGS(CONCATENATE, IF_ARGS(FOO, MAX, NO_ARGS), _ARGS)), 12);
+}
+
static struct kunit_case args_tests[] = {
KUNIT_CASE(count_args_test),
KUNIT_CASE(call_args_example),
@@ -209,6 +261,8 @@ static struct kunit_case args_tests[] = {
KUNIT_CASE(last_arg_example),
KUNIT_CASE(last_arg_test),
KUNIT_CASE(pick_arg_example),
+ KUNIT_CASE(if_args_example),
+ KUNIT_CASE(if_args_test),
KUNIT_CASE(sep_comma_example),
{}
};
diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 2294cf89f3e1..2278e589a493 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -185,8 +185,7 @@ static int ccs_test_run_device(struct xe_device *xe)
return 0;
}
- xe_pm_runtime_get(xe);
-
+ guard(xe_pm_runtime)(xe);
for_each_tile(tile, xe, id) {
/* For igfx run only for primary tile */
if (!IS_DGFX(xe) && id > 0)
@@ -194,8 +193,6 @@ static int ccs_test_run_device(struct xe_device *xe)
ccs_test_run_tile(xe, tile, test);
}
- xe_pm_runtime_put(xe);
-
return 0;
}
@@ -356,13 +353,10 @@ static int evict_test_run_device(struct xe_device *xe)
return 0;
}
- xe_pm_runtime_get(xe);
-
+ guard(xe_pm_runtime)(xe);
for_each_tile(tile, xe, id)
evict_test_run_tile(xe, tile, test);
- xe_pm_runtime_put(xe);
-
return 0;
}
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
index 5df98de5ba3c..954b6b911ea0 100644
--- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
@@ -266,7 +266,7 @@ static int dma_buf_run_device(struct xe_device *xe)
const struct dma_buf_test_params *params;
struct kunit *test = kunit_get_current_test();
- xe_pm_runtime_get(xe);
+ guard(xe_pm_runtime)(xe);
for (params = test_params; params->mem_mask; ++params) {
struct dma_buf_test_params p = *params;
@@ -274,7 +274,6 @@ static int dma_buf_run_device(struct xe_device *xe)
test->priv = &p;
xe_test_dmabuf_import_same_driver(xe);
}
- xe_pm_runtime_put(xe);
/* A non-zero return would halt iteration over driver devices */
return 0;
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 5904d658d1f2..34e2f0f4631f 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -344,8 +344,7 @@ static int migrate_test_run_device(struct xe_device *xe)
struct xe_tile *tile;
int id;
- xe_pm_runtime_get(xe);
-
+ guard(xe_pm_runtime)(xe);
for_each_tile(tile, xe, id) {
struct xe_migrate *m = tile->migrate;
struct drm_exec *exec = XE_VALIDATION_OPT_OUT;
@@ -356,8 +355,6 @@ static int migrate_test_run_device(struct xe_device *xe)
xe_vm_unlock(m->q->vm);
}
- xe_pm_runtime_put(xe);
-
return 0;
}
@@ -759,13 +756,10 @@ static int validate_ccs_test_run_device(struct xe_device *xe)
return 0;
}
- xe_pm_runtime_get(xe);
-
+ guard(xe_pm_runtime)(xe);
for_each_tile(tile, xe, id)
validate_ccs_test_run_tile(xe, tile, test);
- xe_pm_runtime_put(xe);
-
return 0;
}
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c
index 6bb278167aaf..daf3c6836c75 100644
--- a/drivers/gpu/drm/xe/tests/xe_mocs.c
+++ b/drivers/gpu/drm/xe/tests/xe_mocs.c
@@ -43,14 +43,12 @@ static void read_l3cc_table(struct xe_gt *gt,
{
struct kunit *test = kunit_get_current_test();
u32 l3cc, l3cc_expected;
- unsigned int fw_ref, i;
+ unsigned int i;
u32 reg_val;
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL))
KUNIT_FAIL_AND_ABORT(test, "Forcewake Failed.\n");
- }
for (i = 0; i < info->num_mocs_regs; i++) {
if (!(i & 1)) {
@@ -74,7 +72,6 @@ static void read_l3cc_table(struct xe_gt *gt,
KUNIT_EXPECT_EQ_MSG(test, l3cc_expected, l3cc,
"l3cc idx=%u has incorrect val.\n", i);
}
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
static void read_mocs_table(struct xe_gt *gt,
@@ -82,14 +79,14 @@ static void read_mocs_table(struct xe_gt *gt,
{
struct kunit *test = kunit_get_current_test();
u32 mocs, mocs_expected;
- unsigned int fw_ref, i;
+ unsigned int i;
u32 reg_val;
KUNIT_EXPECT_TRUE_MSG(test, info->unused_entries_index,
"Unused entries index should have been defined\n");
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- KUNIT_ASSERT_NE_MSG(test, fw_ref, 0, "Forcewake Failed.\n");
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
+ KUNIT_ASSERT_NE_MSG(test, fw_ref.domains, 0, "Forcewake Failed.\n");
for (i = 0; i < info->num_mocs_regs; i++) {
if (regs_are_mcr(gt))
@@ -106,8 +103,6 @@ static void read_mocs_table(struct xe_gt *gt,
KUNIT_EXPECT_EQ_MSG(test, mocs_expected, mocs,
"mocs reg 0x%x has incorrect val.\n", i);
}
-
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
static int mocs_kernel_test_run_device(struct xe_device *xe)
@@ -120,8 +115,7 @@ static int mocs_kernel_test_run_device(struct xe_device *xe)
unsigned int flags;
int id;
- xe_pm_runtime_get(xe);
-
+ guard(xe_pm_runtime)(xe);
for_each_gt(gt, xe, id) {
flags = live_mocs_init(&mocs, gt);
if (flags & HAS_GLOBAL_MOCS)
@@ -130,8 +124,6 @@ static int mocs_kernel_test_run_device(struct xe_device *xe)
read_l3cc_table(gt, &mocs.table);
}
- xe_pm_runtime_put(xe);
-
return 0;
}
@@ -155,8 +147,7 @@ static int mocs_reset_test_run_device(struct xe_device *xe)
int id;
struct kunit *test = kunit_get_current_test();
- xe_pm_runtime_get(xe);
-
+ guard(xe_pm_runtime)(xe);
for_each_gt(gt, xe, id) {
flags = live_mocs_init(&mocs, gt);
kunit_info(test, "mocs_reset_test before reset\n");
@@ -174,8 +165,6 @@ static int mocs_reset_test_run_device(struct xe_device *xe)
read_l3cc_table(gt, &mocs.table);
}
- xe_pm_runtime_put(xe);
-
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_args.h b/drivers/gpu/drm/xe/xe_args.h
index 4dbc7e53c624..f550b5e3b993 100644
--- a/drivers/gpu/drm/xe/xe_args.h
+++ b/drivers/gpu/drm/xe/xe_args.h
@@ -122,6 +122,33 @@
#define PICK_ARG12(args...) PICK_ARG11(DROP_FIRST_ARG(args))
/**
+ * IF_ARGS() - Make selection based on optional argument list.
+ * @then: token to return if arguments are present
+ * @else: token to return if arguments are empty
+ * @...: arguments to check (optional)
+ *
+ * This macro allows to select a token based on the presence of the argument list.
+ *
+ * Example:
+ *
+ * #define foo X, Y
+ * #define bar IF_ARGS(Z, Q, foo)
+ * #define buz IF_ARGS(Z, Q, DROP_FIRST_ARG(FIRST_ARG(foo)))
+ *
+ * With above definitions bar expands to Z while buz expands to Q.
+ */
+#if defined(CONFIG_CC_IS_CLANG) || GCC_VERSION >= 100100
+#define IF_ARGS(then, else, ...) FIRST_ARG(__VA_OPT__(then,) else)
+#else
+#define IF_ARGS(then, else, ...) _IF_ARGS(then, else, CALL_ARGS(FIRST_ARG, __VA_ARGS__))
+#define _IF_ARGS(then, else, ...) __IF_ARGS(then, else, CALL_ARGS(COUNT_ARGS, __VA_ARGS__))
+#define __IF_ARGS(then, else, n) ___IF_ARGS(then, else, CALL_ARGS(CONCATENATE, ___IF_ARG, n))
+#define ___IF_ARGS(then, else, if) CALL_ARGS(if, then, else)
+#define ___IF_ARG1(then, else) then
+#define ___IF_ARG0(then, else) else
+#endif
+
+/**
* ARGS_SEP_COMMA - Definition of a comma character.
*
* This definition can be used in cases where any intermediate macro expects
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index bf4ee976b680..8b6474cd3eaf 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -516,8 +516,7 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
* non-coherent and require a CPU:WC mapping.
*/
if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) ||
- (xe->info.graphics_verx100 >= 1270 &&
- bo->flags & XE_BO_FLAG_PAGETABLE))
+ (!xe->info.has_cached_pt && bo->flags & XE_BO_FLAG_PAGETABLE))
caching = ttm_write_combined;
}
@@ -2026,13 +2025,9 @@ static int xe_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
struct ttm_buffer_object *ttm_bo = vma->vm_private_data;
struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
struct xe_device *xe = xe_bo_device(bo);
- int ret;
-
- xe_pm_runtime_get(xe);
- ret = ttm_bo_vm_access(vma, addr, buf, len, write);
- xe_pm_runtime_put(xe);
- return ret;
+ guard(xe_pm_runtime)(xe);
+ return ttm_bo_vm_access(vma, addr, buf, len, write);
}
/**
@@ -3176,7 +3171,8 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
if (XE_IOCTL_DBG(xe, args->flags &
~(DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING |
DRM_XE_GEM_CREATE_FLAG_SCANOUT |
- DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM)))
+ DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM |
+ DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION)))
return -EINVAL;
if (XE_IOCTL_DBG(xe, args->handle))
@@ -3198,6 +3194,12 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT)
bo_flags |= XE_BO_FLAG_SCANOUT;
+ if (args->flags & DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION) {
+ if (XE_IOCTL_DBG(xe, GRAPHICS_VER(xe) < 20))
+ return -EOPNOTSUPP;
+ bo_flags |= XE_BO_FLAG_NO_COMPRESSION;
+ }
+
bo_flags |= args->placement << (ffs(XE_BO_FLAG_SYSTEM) - 1);
/* CCS formats need physical placement at a 64K alignment in VRAM. */
@@ -3519,8 +3521,12 @@ bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
* Compression implies coh_none, therefore we know for sure that WB
* memory can't currently use compression, which is likely one of the
* common cases.
+ * Additionally, userspace may explicitly request no compression via the
+ * DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION flag, which should also disable
+ * CCS usage.
*/
- if (bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)
+ if (bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB ||
+ bo->flags & XE_BO_FLAG_NO_COMPRESSION)
return false;
return true;
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 911d5b90461a..8ab4474129c3 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -50,6 +50,7 @@
#define XE_BO_FLAG_GGTT3 BIT(23)
#define XE_BO_FLAG_CPU_ADDR_MIRROR BIT(24)
#define XE_BO_FLAG_FORCE_USER_VRAM BIT(25)
+#define XE_BO_FLAG_NO_COMPRESSION BIT(26)
/* this one is trigger internally only */
#define XE_BO_FLAG_INTERNAL_TEST BIT(30)
diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
index e91da9589c5f..0907868b32d6 100644
--- a/drivers/gpu/drm/xe/xe_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -68,7 +68,7 @@ static int info(struct seq_file *m, void *data)
struct xe_gt *gt;
u8 id;
- xe_pm_runtime_get(xe);
+ guard(xe_pm_runtime)(xe);
drm_printf(&p, "graphics_verx100 %d\n", xe->info.graphics_verx100);
drm_printf(&p, "media_verx100 %d\n", xe->info.media_verx100);
@@ -93,9 +93,10 @@ static int info(struct seq_file *m, void *data)
xe_force_wake_ref(gt_to_fw(gt), XE_FW_GT));
drm_printf(&p, "gt%d engine_mask 0x%llx\n", id,
gt->info.engine_mask);
+ drm_printf(&p, "gt%d multi_queue_engine_class_mask 0x%x\n", id,
+ gt->info.multi_queue_engine_class_mask);
}
- xe_pm_runtime_put(xe);
return 0;
}
@@ -110,9 +111,8 @@ static int sriov_info(struct seq_file *m, void *data)
static int workarounds(struct xe_device *xe, struct drm_printer *p)
{
- xe_pm_runtime_get(xe);
+ guard(xe_pm_runtime)(xe);
xe_wa_device_dump(xe, p);
- xe_pm_runtime_put(xe);
return 0;
}
@@ -134,7 +134,7 @@ static int dgfx_pkg_residencies_show(struct seq_file *m, void *data)
xe = node_to_xe(m->private);
p = drm_seq_file_printer(m);
- xe_pm_runtime_get(xe);
+ guard(xe_pm_runtime)(xe);
mmio = xe_root_tile_mmio(xe);
static const struct {
u32 offset;
@@ -151,7 +151,6 @@ static int dgfx_pkg_residencies_show(struct seq_file *m, void *data)
for (int i = 0; i < ARRAY_SIZE(residencies); i++)
read_residency_counter(xe, mmio, residencies[i].offset, residencies[i].name, &p);
- xe_pm_runtime_put(xe);
return 0;
}
@@ -163,7 +162,7 @@ static int dgfx_pcie_link_residencies_show(struct seq_file *m, void *data)
xe = node_to_xe(m->private);
p = drm_seq_file_printer(m);
- xe_pm_runtime_get(xe);
+ guard(xe_pm_runtime)(xe);
mmio = xe_root_tile_mmio(xe);
static const struct {
@@ -178,7 +177,6 @@ static int dgfx_pcie_link_residencies_show(struct seq_file *m, void *data)
for (int i = 0; i < ARRAY_SIZE(residencies); i++)
read_residency_counter(xe, mmio, residencies[i].offset, residencies[i].name, &p);
- xe_pm_runtime_put(xe);
return 0;
}
@@ -277,16 +275,14 @@ static ssize_t wedged_mode_set(struct file *f, const char __user *ubuf,
xe->wedged.mode = wedged_mode;
- xe_pm_runtime_get(xe);
+ guard(xe_pm_runtime)(xe);
for_each_gt(gt, xe, id) {
ret = xe_guc_ads_scheduler_policy_toggle_reset(&gt->uc.guc.ads);
if (ret) {
xe_gt_err(gt, "Failed to update GuC ADS scheduler policy. GuC may still cause engine reset even with wedged_mode=2\n");
- xe_pm_runtime_put(xe);
return -EIO;
}
}
- xe_pm_runtime_put(xe);
return size;
}
@@ -297,6 +293,39 @@ static const struct file_operations wedged_mode_fops = {
.write = wedged_mode_set,
};
+static ssize_t page_reclaim_hw_assist_show(struct file *f, char __user *ubuf,
+ size_t size, loff_t *pos)
+{
+ struct xe_device *xe = file_inode(f)->i_private;
+ char buf[8];
+ int len;
+
+ len = scnprintf(buf, sizeof(buf), "%d\n", xe->info.has_page_reclaim_hw_assist);
+ return simple_read_from_buffer(ubuf, size, pos, buf, len);
+}
+
+static ssize_t page_reclaim_hw_assist_set(struct file *f, const char __user *ubuf,
+ size_t size, loff_t *pos)
+{
+ struct xe_device *xe = file_inode(f)->i_private;
+ bool val;
+ ssize_t ret;
+
+ ret = kstrtobool_from_user(ubuf, size, &val);
+ if (ret)
+ return ret;
+
+ xe->info.has_page_reclaim_hw_assist = val;
+
+ return size;
+}
+
+static const struct file_operations page_reclaim_hw_assist_fops = {
+ .owner = THIS_MODULE,
+ .read = page_reclaim_hw_assist_show,
+ .write = page_reclaim_hw_assist_set,
+};
+
static ssize_t atomic_svm_timeslice_ms_show(struct file *f, char __user *ubuf,
size_t size, loff_t *pos)
{
@@ -332,6 +361,74 @@ static const struct file_operations atomic_svm_timeslice_ms_fops = {
.write = atomic_svm_timeslice_ms_set,
};
+static ssize_t min_run_period_lr_ms_show(struct file *f, char __user *ubuf,
+ size_t size, loff_t *pos)
+{
+ struct xe_device *xe = file_inode(f)->i_private;
+ char buf[32];
+ int len = 0;
+
+ len = scnprintf(buf, sizeof(buf), "%d\n", xe->min_run_period_lr_ms);
+
+ return simple_read_from_buffer(ubuf, size, pos, buf, len);
+}
+
+static ssize_t min_run_period_lr_ms_set(struct file *f, const char __user *ubuf,
+ size_t size, loff_t *pos)
+{
+ struct xe_device *xe = file_inode(f)->i_private;
+ u32 min_run_period_lr_ms;
+ ssize_t ret;
+
+ ret = kstrtouint_from_user(ubuf, size, 0, &min_run_period_lr_ms);
+ if (ret)
+ return ret;
+
+ xe->min_run_period_lr_ms = min_run_period_lr_ms;
+
+ return size;
+}
+
+static const struct file_operations min_run_period_lr_ms_fops = {
+ .owner = THIS_MODULE,
+ .read = min_run_period_lr_ms_show,
+ .write = min_run_period_lr_ms_set,
+};
+
+static ssize_t min_run_period_pf_ms_show(struct file *f, char __user *ubuf,
+ size_t size, loff_t *pos)
+{
+ struct xe_device *xe = file_inode(f)->i_private;
+ char buf[32];
+ int len = 0;
+
+ len = scnprintf(buf, sizeof(buf), "%d\n", xe->min_run_period_pf_ms);
+
+ return simple_read_from_buffer(ubuf, size, pos, buf, len);
+}
+
+static ssize_t min_run_period_pf_ms_set(struct file *f, const char __user *ubuf,
+ size_t size, loff_t *pos)
+{
+ struct xe_device *xe = file_inode(f)->i_private;
+ u32 min_run_period_pf_ms;
+ ssize_t ret;
+
+ ret = kstrtouint_from_user(ubuf, size, 0, &min_run_period_pf_ms);
+ if (ret)
+ return ret;
+
+ xe->min_run_period_pf_ms = min_run_period_pf_ms;
+
+ return size;
+}
+
+static const struct file_operations min_run_period_pf_ms_fops = {
+ .owner = THIS_MODULE,
+ .read = min_run_period_pf_ms_show,
+ .write = min_run_period_pf_ms_set,
+};
+
static ssize_t disable_late_binding_show(struct file *f, char __user *ubuf,
size_t size, loff_t *pos)
{
@@ -375,7 +472,6 @@ void xe_debugfs_register(struct xe_device *xe)
struct ttm_resource_manager *man;
struct xe_tile *tile;
struct xe_gt *gt;
- u32 mem_type;
u8 tile_id;
u8 id;
@@ -400,19 +496,22 @@ void xe_debugfs_register(struct xe_device *xe)
debugfs_create_file("atomic_svm_timeslice_ms", 0600, root, xe,
&atomic_svm_timeslice_ms_fops);
- debugfs_create_file("disable_late_binding", 0600, root, xe,
- &disable_late_binding_fops);
+ debugfs_create_file("min_run_period_lr_ms", 0600, root, xe,
+ &min_run_period_lr_ms_fops);
- for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) {
- man = ttm_manager_type(bdev, mem_type);
+ debugfs_create_file("min_run_period_pf_ms", 0600, root, xe,
+ &min_run_period_pf_ms_fops);
- if (man) {
- char name[16];
+ debugfs_create_file("disable_late_binding", 0600, root, xe,
+ &disable_late_binding_fops);
- snprintf(name, sizeof(name), "vram%d_mm", mem_type - XE_PL_VRAM0);
- ttm_resource_manager_create_debugfs(man, root, name);
- }
- }
+ /*
+ * Don't expose page reclaim configuration file if not supported by the
+ * hardware initially.
+ */
+ if (xe->info.has_page_reclaim_hw_assist)
+ debugfs_create_file("page_reclaim_hw_assist", 0600, root, xe,
+ &page_reclaim_hw_assist_fops);
man = ttm_manager_type(bdev, XE_PL_TT);
ttm_resource_manager_create_debugfs(man, root, "gtt_mm");
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index d444eda65ca6..7263c2a5f3a8 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -276,7 +276,6 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work);
struct xe_devcoredump *coredump = container_of(ss, typeof(*coredump), snapshot);
struct xe_device *xe = coredump_to_xe(coredump);
- unsigned int fw_ref;
/*
* NB: Despite passing a GFP_ flags parameter here, more allocations are done
@@ -287,15 +286,15 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
xe_devcoredump_read, xe_devcoredump_free,
XE_COREDUMP_TIMEOUT_JIFFIES);
- xe_pm_runtime_get(xe);
+ guard(xe_pm_runtime)(xe);
/* keep going if fw fails as we still want to save the memory and SW data */
- fw_ref = xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL);
- if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
- xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n");
- xe_vm_snapshot_capture_delayed(ss->vm);
- xe_guc_exec_queue_snapshot_capture_delayed(ss->ge);
- xe_force_wake_put(gt_to_fw(ss->gt), fw_ref);
+ xe_with_force_wake(fw_ref, gt_to_fw(ss->gt), XE_FORCEWAKE_ALL) {
+ if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL))
+ xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n");
+ xe_vm_snapshot_capture_delayed(ss->vm);
+ xe_guc_exec_queue_snapshot_capture_delayed(ss->ge);
+ }
ss->read.chunk_position = 0;
@@ -306,7 +305,7 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
ss->read.buffer = kvmalloc(XE_DEVCOREDUMP_CHUNK_MAX,
GFP_USER);
if (!ss->read.buffer)
- goto put_pm;
+ return;
__xe_devcoredump_read(ss->read.buffer,
XE_DEVCOREDUMP_CHUNK_MAX,
@@ -314,15 +313,12 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
} else {
ss->read.buffer = kvmalloc(ss->read.size, GFP_USER);
if (!ss->read.buffer)
- goto put_pm;
+ return;
__xe_devcoredump_read(ss->read.buffer, ss->read.size, 0,
coredump);
xe_devcoredump_snapshot_free(ss);
}
-
-put_pm:
- xe_pm_runtime_put(xe);
}
static void devcoredump_snapshot(struct xe_devcoredump *coredump,
@@ -332,7 +328,6 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
struct xe_devcoredump_snapshot *ss = &coredump->snapshot;
struct xe_guc *guc = exec_queue_to_guc(q);
const char *process_name = "no process";
- unsigned int fw_ref;
bool cookie;
ss->snapshot_time = ktime_get_real();
@@ -348,10 +343,10 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
ss->gt = q->gt;
INIT_WORK(&ss->work, xe_devcoredump_deferred_snap_work);
- cookie = dma_fence_begin_signalling();
-
/* keep going if fw fails as we still want to save the memory and SW data */
- fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
+
+ cookie = dma_fence_begin_signalling();
ss->guc.log = xe_guc_log_snapshot_capture(&guc->log, true);
ss->guc.ct = xe_guc_ct_snapshot_capture(&guc->ct);
@@ -364,7 +359,6 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
queue_work(system_unbound_wq, &ss->work);
- xe_force_wake_put(gt_to_fw(q->gt), fw_ref);
dma_fence_end_signalling(cookie);
}
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index cf29e259861f..00afc84a8683 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -166,7 +166,7 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file)
struct xe_exec_queue *q;
unsigned long idx;
- xe_pm_runtime_get(xe);
+ guard(xe_pm_runtime)(xe);
/*
* No need for exec_queue.lock here as there is no contention for it
@@ -177,15 +177,18 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file)
xa_for_each(&xef->exec_queue.xa, idx, q) {
if (q->vm && q->hwe->hw_engine_group)
xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q);
- xe_exec_queue_kill(q);
+
+ if (xe_exec_queue_is_multi_queue_primary(q))
+ xe_exec_queue_group_kill_put(q->multi_queue.group);
+ else
+ xe_exec_queue_kill(q);
+
xe_exec_queue_put(q);
}
xa_for_each(&xef->vm.xa, idx, vm)
xe_vm_close_and_put(vm);
xe_file_put(xef);
-
- xe_pm_runtime_put(xe);
}
static const struct drm_ioctl_desc xe_ioctls[] = {
@@ -209,6 +212,8 @@ static const struct drm_ioctl_desc xe_ioctls[] = {
DRM_IOCTL_DEF_DRV(XE_MADVISE, xe_vm_madvise_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(XE_VM_QUERY_MEM_RANGE_ATTRS, xe_vm_query_vmas_attrs_ioctl,
DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_SET_PROPERTY, xe_exec_queue_set_property_ioctl,
+ DRM_RENDER_ALLOW),
};
static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
@@ -220,10 +225,10 @@ static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
if (xe_device_wedged(xe))
return -ECANCELED;
- ret = xe_pm_runtime_get_ioctl(xe);
+ ACQUIRE(xe_pm_runtime_ioctl, pm)(xe);
+ ret = ACQUIRE_ERR(xe_pm_runtime_ioctl, &pm);
if (ret >= 0)
ret = drm_ioctl(file, cmd, arg);
- xe_pm_runtime_put(xe);
return ret;
}
@@ -238,10 +243,10 @@ static long xe_drm_compat_ioctl(struct file *file, unsigned int cmd, unsigned lo
if (xe_device_wedged(xe))
return -ECANCELED;
- ret = xe_pm_runtime_get_ioctl(xe);
+ ACQUIRE(xe_pm_runtime_ioctl, pm)(xe);
+ ret = ACQUIRE_ERR(xe_pm_runtime_ioctl, &pm);
if (ret >= 0)
ret = drm_compat_ioctl(file, cmd, arg);
- xe_pm_runtime_put(xe);
return ret;
}
@@ -455,6 +460,7 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
xe->info.revid = pdev->revision;
xe->info.force_execlist = xe_modparam.force_execlist;
xe->atomic_svm_timeslice_ms = 5;
+ xe->min_run_period_lr_ms = 5;
err = xe_irq_init(xe);
if (err)
@@ -775,7 +781,6 @@ ALLOW_ERROR_INJECTION(xe_device_probe_early, ERRNO); /* See xe_pci_probe() */
static int probe_has_flat_ccs(struct xe_device *xe)
{
struct xe_gt *gt;
- unsigned int fw_ref;
u32 reg;
/* Always enabled/disabled, no runtime check to do */
@@ -786,8 +791,8 @@ static int probe_has_flat_ccs(struct xe_device *xe)
if (!gt)
return 0;
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (!fw_ref)
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref.domains)
return -ETIMEDOUT;
reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_LOWER);
@@ -797,11 +802,64 @@ static int probe_has_flat_ccs(struct xe_device *xe)
drm_dbg(&xe->drm,
"Flat CCS has been disabled in bios, May lead to performance impact");
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
-
return 0;
}
+/*
+ * Detect if the driver is being run on pre-production hardware. We don't
+ * keep workarounds for pre-production hardware long term, so print an
+ * error and add taint if we're being loaded on a pre-production platform
+ * for which the pre-prod workarounds have already been removed.
+ *
+ * The general policy is that we'll remove any workarounds that only apply to
+ * pre-production hardware around the time force_probe restrictions are lifted
+ * for a platform of the next major IP generation (for example, Xe2 pre-prod
+ * workarounds should be removed around the time the first Xe3 platforms have
+ * force_probe lifted).
+ */
+static void detect_preproduction_hw(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ int id;
+
+ /*
+ * SR-IOV VFs don't have access to the FUSE2 register, so we can't
+ * check pre-production status there. But the host OS will notice
+ * and report the pre-production status, which should be enough to
+ * help us catch mistaken use of pre-production hardware.
+ */
+ if (IS_SRIOV_VF(xe))
+ return;
+
+ /*
+ * The "SW_CAP" fuse contains a bit indicating whether the device is a
+ * production or pre-production device. This fuse is reflected through
+ * the GT "FUSE2" register, even though the contents of the fuse are
+ * not GT-specific. Every GT's reflection of this fuse should show the
+ * same value, so we'll just use the first available GT for lookup.
+ */
+ for_each_gt(gt, xe, id)
+ break;
+
+ if (!gt)
+ return;
+
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
+ if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FW_GT)) {
+ xe_gt_err(gt, "Forcewake failure; cannot determine production/pre-production hw status.\n");
+ return;
+ }
+
+ if (xe_mmio_read32(&gt->mmio, FUSE2) & PRODUCTION_HW)
+ return;
+
+ xe_info(xe, "Pre-production hardware detected.\n");
+ if (!xe->info.has_pre_prod_wa) {
+ xe_err(xe, "Pre-production workarounds for this platform have already been removed.\n");
+ add_taint(TAINT_MACHINE_CHECK, LOCKDEP_STILL_OK);
+ }
+}
+
int xe_device_probe(struct xe_device *xe)
{
struct xe_tile *tile;
@@ -972,6 +1030,8 @@ int xe_device_probe(struct xe_device *xe)
if (err)
goto err_unregister_display;
+ detect_preproduction_hw(xe);
+
return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe);
err_unregister_display:
@@ -1034,7 +1094,6 @@ void xe_device_wmb(struct xe_device *xe)
*/
static void tdf_request_sync(struct xe_device *xe)
{
- unsigned int fw_ref;
struct xe_gt *gt;
u8 id;
@@ -1042,8 +1101,8 @@ static void tdf_request_sync(struct xe_device *xe)
if (xe_gt_is_media_type(gt))
continue;
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (!fw_ref)
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref.domains)
return;
xe_mmio_write32(&gt->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST);
@@ -1058,15 +1117,12 @@ static void tdf_request_sync(struct xe_device *xe)
if (xe_mmio_wait32(&gt->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST, 0,
300, NULL, false))
xe_gt_err_once(gt, "TD flush timeout\n");
-
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
}
void xe_device_l2_flush(struct xe_device *xe)
{
struct xe_gt *gt;
- unsigned int fw_ref;
gt = xe_root_mmio_gt(xe);
if (!gt)
@@ -1075,8 +1131,8 @@ void xe_device_l2_flush(struct xe_device *xe)
if (!XE_GT_WA(gt, 16023588340))
return;
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (!fw_ref)
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref.domains)
return;
spin_lock(&gt->global_invl_lock);
@@ -1086,8 +1142,6 @@ void xe_device_l2_flush(struct xe_device *xe)
xe_gt_err_once(gt, "Global invalidation timeout\n");
spin_unlock(&gt->global_invl_lock);
-
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
/**
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 32cc6323b7f6..6604b89330d5 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -172,6 +172,11 @@ static inline bool xe_device_has_lmtt(struct xe_device *xe)
return IS_DGFX(xe);
}
+static inline bool xe_device_has_mert(struct xe_device *xe)
+{
+ return xe->info.has_mert;
+}
+
u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size);
void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p);
diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c
index ec9c06b06fb5..a73e0e957cb0 100644
--- a/drivers/gpu/drm/xe/xe_device_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_device_sysfs.c
@@ -57,9 +57,8 @@ vram_d3cold_threshold_store(struct device *dev, struct device_attribute *attr,
drm_dbg(&xe->drm, "vram_d3cold_threshold: %u\n", vram_d3cold_threshold);
- xe_pm_runtime_get(xe);
+ guard(xe_pm_runtime)(xe);
ret = xe_pm_set_vram_threshold(xe, vram_d3cold_threshold);
- xe_pm_runtime_put(xe);
return ret ?: count;
}
@@ -84,33 +83,31 @@ lb_fan_control_version_show(struct device *dev, struct device_attribute *attr, c
u16 major = 0, minor = 0, hotfix = 0, build = 0;
int ret;
- xe_pm_runtime_get(xe);
+ guard(xe_pm_runtime)(xe);
ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0),
&cap, NULL);
if (ret)
- goto out;
+ return ret;
if (REG_FIELD_GET(V1_FAN_PROVISIONED, cap)) {
ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_LOW, 0),
&ver_low, NULL);
if (ret)
- goto out;
+ return ret;
ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_HIGH, 0),
&ver_high, NULL);
if (ret)
- goto out;
+ return ret;
major = REG_FIELD_GET(MAJOR_VERSION_MASK, ver_low);
minor = REG_FIELD_GET(MINOR_VERSION_MASK, ver_low);
hotfix = REG_FIELD_GET(HOTFIX_VERSION_MASK, ver_high);
build = REG_FIELD_GET(BUILD_VERSION_MASK, ver_high);
}
-out:
- xe_pm_runtime_put(xe);
- return ret ?: sysfs_emit(buf, "%u.%u.%u.%u\n", major, minor, hotfix, build);
+ return sysfs_emit(buf, "%u.%u.%u.%u\n", major, minor, hotfix, build);
}
static DEVICE_ATTR_ADMIN_RO(lb_fan_control_version);
@@ -123,33 +120,31 @@ lb_voltage_regulator_version_show(struct device *dev, struct device_attribute *a
u16 major = 0, minor = 0, hotfix = 0, build = 0;
int ret;
- xe_pm_runtime_get(xe);
+ guard(xe_pm_runtime)(xe);
ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0),
&cap, NULL);
if (ret)
- goto out;
+ return ret;
if (REG_FIELD_GET(VR_PARAMS_PROVISIONED, cap)) {
ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_LOW, 0),
&ver_low, NULL);
if (ret)
- goto out;
+ return ret;
ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_HIGH, 0),
&ver_high, NULL);
if (ret)
- goto out;
+ return ret;
major = REG_FIELD_GET(MAJOR_VERSION_MASK, ver_low);
minor = REG_FIELD_GET(MINOR_VERSION_MASK, ver_low);
hotfix = REG_FIELD_GET(HOTFIX_VERSION_MASK, ver_high);
build = REG_FIELD_GET(BUILD_VERSION_MASK, ver_high);
}
-out:
- xe_pm_runtime_put(xe);
- return ret ?: sysfs_emit(buf, "%u.%u.%u.%u\n", major, minor, hotfix, build);
+ return sysfs_emit(buf, "%u.%u.%u.%u\n", major, minor, hotfix, build);
}
static DEVICE_ATTR_ADMIN_RO(lb_voltage_regulator_version);
@@ -233,9 +228,8 @@ auto_link_downgrade_capable_show(struct device *dev, struct device_attribute *at
struct xe_device *xe = pdev_to_xe_device(pdev);
u32 cap, val;
- xe_pm_runtime_get(xe);
+ guard(xe_pm_runtime)(xe);
val = xe_mmio_read32(xe_root_tile_mmio(xe), BMG_PCIE_CAP);
- xe_pm_runtime_put(xe);
cap = REG_FIELD_GET(LINK_DOWNGRADE, val);
return sysfs_emit(buf, "%u\n", cap == DOWNGRADE_CAPABLE);
@@ -251,11 +245,10 @@ auto_link_downgrade_status_show(struct device *dev, struct device_attribute *att
u32 val = 0;
int ret;
- xe_pm_runtime_get(xe);
+ guard(xe_pm_runtime)(xe);
ret = xe_pcode_read(xe_device_get_root_tile(xe),
PCODE_MBOX(DGFX_PCODE_STATUS, DGFX_GET_INIT_STATUS, 0),
&val, NULL);
- xe_pm_runtime_put(xe);
return ret ?: sysfs_emit(buf, "%u\n", REG_FIELD_GET(DGFX_LINK_DOWNGRADE_STATUS, val));
}
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 6ce3247d1bd8..dad355fec50c 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -17,6 +17,7 @@
#include "xe_late_bind_fw_types.h"
#include "xe_lmtt_types.h"
#include "xe_memirq_types.h"
+#include "xe_mert.h"
#include "xe_oa_types.h"
#include "xe_pagefault_types.h"
#include "xe_platform_types.h"
@@ -183,6 +184,13 @@ struct xe_tile {
* Media GT shares a pool with its primary GT.
*/
struct xe_sa_manager *kernel_bb_pool;
+
+ /**
+ * @mem.reclaim_pool: Pool for PRLs allocated.
+ *
+ * Only main GT has page reclaim list allocations.
+ */
+ struct xe_sa_manager *reclaim_pool;
} mem;
/** @sriov: tile level virtualization data */
@@ -219,6 +227,9 @@ struct xe_tile {
/** @debugfs: debugfs directory associated with this tile */
struct dentry *debugfs;
+
+ /** @mert: MERT-related data */
+ struct xe_mert mert;
};
/**
@@ -285,6 +296,8 @@ struct xe_device {
u8 has_asid:1;
/** @info.has_atomic_enable_pte_bit: Device has atomic enable PTE bit */
u8 has_atomic_enable_pte_bit:1;
+ /** @info.has_cached_pt: Supports caching pagetable */
+ u8 has_cached_pt:1;
/** @info.has_device_atomics_on_smem: Supports device atomics on SMEM */
u8 has_device_atomics_on_smem:1;
/** @info.has_fan_control: Device supports fan control */
@@ -297,6 +310,8 @@ struct xe_device {
u8 has_heci_cscfi:1;
/** @info.has_heci_gscfi: device has heci gscfi */
u8 has_heci_gscfi:1;
+ /** @info.has_i2c: Device has I2C controller */
+ u8 has_i2c:1;
/** @info.has_late_bind: Device has firmware late binding support */
u8 has_late_bind:1;
/** @info.has_llc: Device has a shared CPU+GPU last level cache */
@@ -307,6 +322,12 @@ struct xe_device {
u8 has_mbx_power_limits:1;
/** @info.has_mem_copy_instr: Device supports MEM_COPY instruction */
u8 has_mem_copy_instr:1;
+ /** @info.has_mert: Device has standalone MERT */
+ u8 has_mert:1;
+ /** @info.has_page_reclaim_hw_assist: Device supports page reclamation feature */
+ u8 has_page_reclaim_hw_assist:1;
+ /** @info.has_pre_prod_wa: Pre-production workarounds still present in driver */
+ u8 has_pre_prod_wa:1;
/** @info.has_pxp: Device has PXP support */
u8 has_pxp:1;
/** @info.has_range_tlb_inval: Has range based TLB invalidations */
@@ -605,6 +626,12 @@ struct xe_device {
/** @atomic_svm_timeslice_ms: Atomic SVM fault timeslice MS */
u32 atomic_svm_timeslice_ms;
+ /** @min_run_period_lr_ms: LR VM (preempt fence mode) timeslice */
+ u32 min_run_period_lr_ms;
+
+ /** @min_run_period_pf_ms: LR VM (page fault mode) timeslice */
+ u32 min_run_period_pf_ms;
+
#ifdef TEST_VM_OPS_ERROR
/**
* @vm_inject_error_position: inject errors at different places in VM
diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
index f931ff9b1ec0..2787bbb36141 100644
--- a/drivers/gpu/drm/xe/xe_drm_client.c
+++ b/drivers/gpu/drm/xe/xe_drm_client.c
@@ -285,32 +285,31 @@ static struct xe_hw_engine *any_engine(struct xe_device *xe)
return NULL;
}
-static bool force_wake_get_any_engine(struct xe_device *xe,
- struct xe_hw_engine **phwe,
- unsigned int *pfw_ref)
+/*
+ * Pick any engine and grab its forcewake. On error phwe will be NULL and
+ * the returned forcewake reference will be invalid. Callers should check
+ * phwe against NULL.
+ */
+static struct xe_force_wake_ref force_wake_get_any_engine(struct xe_device *xe,
+ struct xe_hw_engine **phwe)
{
enum xe_force_wake_domains domain;
- unsigned int fw_ref;
+ struct xe_force_wake_ref fw_ref = {};
struct xe_hw_engine *hwe;
- struct xe_force_wake *fw;
+
+ *phwe = NULL;
hwe = any_engine(xe);
if (!hwe)
- return false;
+ return fw_ref; /* will be invalid */
domain = xe_hw_engine_to_fw_domain(hwe);
- fw = gt_to_fw(hwe->gt);
-
- fw_ref = xe_force_wake_get(fw, domain);
- if (!xe_force_wake_ref_has_domain(fw_ref, domain)) {
- xe_force_wake_put(fw, fw_ref);
- return false;
- }
- *phwe = hwe;
- *pfw_ref = fw_ref;
+ fw_ref = xe_force_wake_constructor(gt_to_fw(hwe->gt), domain);
+ if (xe_force_wake_ref_has_domain(fw_ref.domains, domain))
+ *phwe = hwe; /* valid forcewake */
- return true;
+ return fw_ref;
}
static void show_run_ticks(struct drm_printer *p, struct drm_file *file)
@@ -322,7 +321,6 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file)
struct xe_hw_engine *hwe;
struct xe_exec_queue *q;
u64 gpu_timestamp;
- unsigned int fw_ref;
/*
* RING_TIMESTAMP registers are inaccessible in VF mode.
@@ -339,29 +337,26 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file)
wait_var_event(&xef->exec_queue.pending_removal,
!atomic_read(&xef->exec_queue.pending_removal));
- xe_pm_runtime_get(xe);
- if (!force_wake_get_any_engine(xe, &hwe, &fw_ref)) {
- xe_pm_runtime_put(xe);
- return;
- }
-
- /* Accumulate all the exec queues from this client */
- mutex_lock(&xef->exec_queue.lock);
- xa_for_each(&xef->exec_queue.xa, i, q) {
- xe_exec_queue_get(q);
- mutex_unlock(&xef->exec_queue.lock);
-
- xe_exec_queue_update_run_ticks(q);
+ scoped_guard(xe_pm_runtime, xe) {
+ CLASS(xe_force_wake_release_only, fw_ref)(force_wake_get_any_engine(xe, &hwe));
+ if (!hwe)
+ return;
+ /* Accumulate all the exec queues from this client */
mutex_lock(&xef->exec_queue.lock);
- xe_exec_queue_put(q);
- }
- mutex_unlock(&xef->exec_queue.lock);
+ xa_for_each(&xef->exec_queue.xa, i, q) {
+ xe_exec_queue_get(q);
+ mutex_unlock(&xef->exec_queue.lock);
- gpu_timestamp = xe_hw_engine_read_timestamp(hwe);
+ xe_exec_queue_update_run_ticks(q);
- xe_force_wake_put(gt_to_fw(hwe->gt), fw_ref);
- xe_pm_runtime_put(xe);
+ mutex_lock(&xef->exec_queue.lock);
+ xe_exec_queue_put(q);
+ }
+ mutex_unlock(&xef->exec_queue.lock);
+
+ gpu_timestamp = xe_hw_engine_read_timestamp(hwe);
+ }
for (class = 0; class < XE_ENGINE_CLASS_MAX; class++) {
const char *class_name;
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index fd9480031750..730a5c9c2637 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -121,7 +121,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
u64 addresses[XE_HW_ENGINE_MAX_INSTANCE];
struct drm_gpuvm_exec vm_exec = {.extra.fn = xe_exec_fn};
struct drm_exec *exec = &vm_exec.exec;
- u32 i, num_syncs, num_ufence = 0;
+ u32 i, num_syncs, num_in_sync = 0, num_ufence = 0;
struct xe_validation_ctx ctx;
struct xe_sched_job *job;
struct xe_vm *vm;
@@ -183,6 +183,9 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
if (xe_sync_is_ufence(&syncs[num_syncs]))
num_ufence++;
+
+ if (!num_in_sync && xe_sync_needs_wait(&syncs[num_syncs]))
+ num_in_sync++;
}
if (XE_IOCTL_DBG(xe, num_ufence > 1)) {
@@ -203,7 +206,9 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
mode = xe_hw_engine_group_find_exec_mode(q);
if (mode == EXEC_MODE_DMA_FENCE) {
- err = xe_hw_engine_group_get_mode(group, mode, &previous_mode);
+ err = xe_hw_engine_group_get_mode(group, mode, &previous_mode,
+ syncs, num_in_sync ?
+ num_syncs : 0);
if (err)
goto err_syncs;
}
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 8724f8de67e2..41023a464480 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -13,6 +13,7 @@
#include <drm/drm_syncobj.h>
#include <uapi/drm/xe_drm.h>
+#include "xe_bo.h"
#include "xe_dep_scheduler.h"
#include "xe_device.h"
#include "xe_gt.h"
@@ -53,6 +54,54 @@
* the ring operations the different engine classes support.
*/
+/**
+ * DOC: Multi Queue Group
+ *
+ * Multi Queue Group is another mode of execution supported by the compute
+ * and blitter copy command streamers (CCS and BCS, respectively). It is
+ * an enhancement of the existing hardware architecture and leverages the
+ * same submission model. It enables support for efficient, parallel
+ * execution of multiple queues within a single shared context. The multi
+ * queue group functionality is only supported with GuC submission backend.
+ * All the queues of a group must use the same address space (VM).
+ *
+ * The DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE execution queue property
+ * supports creating a multi queue group and adding queues to a queue group.
+ *
+ * The XE_EXEC_QUEUE_CREATE ioctl call with above property with value field
+ * set to DRM_XE_MULTI_GROUP_CREATE, will create a new multi queue group with
+ * the queue being created as the primary queue (aka q0) of the group. To add
+ * secondary queues to the group, they need to be created with the above
+ * property with id of the primary queue as the value. The properties of
+ * the primary queue (like priority, time slice) applies to the whole group.
+ * So, these properties can't be set for secondary queues of a group.
+ *
+ * The hardware does not support removing a queue from a multi-queue group.
+ * However, queues can be dynamically added to the group. A group can have
+ * up to 64 queues. To support this, XeKMD holds references to LRCs of the
+ * queues even after the queues are destroyed by the user until the whole
+ * group is destroyed. The secondary queues hold a reference to the primary
+ * queue thus preventing the group from being destroyed when user destroys
+ * the primary queue. Once the primary queue is destroyed, secondary queues
+ * can't be added to the queue group, but they can continue to submit the
+ * jobs if the DRM_XE_MULTI_GROUP_KEEP_ACTIVE flag is set during the multi
+ * queue group creation.
+ *
+ * The queues of a multi queue group can set their priority within the group
+ * through the DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY property.
+ * This multi queue priority can also be set dynamically through the
+ * XE_EXEC_QUEUE_SET_PROPERTY ioctl. This is the only other property
+ * supported by the secondary queues of a multi queue group, other than
+ * DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE.
+ *
+ * When GuC reports an error on any of the queues of a multi queue group,
+ * the queue cleanup mechanism is invoked for all the queues of the group
+ * as hardware cannot make progress on the multi queue context.
+ *
+ * Refer :ref:`multi-queue-group-guc-interface` for multi queue group GuC
+ * interface.
+ */
+
enum xe_exec_queue_sched_prop {
XE_EXEC_QUEUE_JOB_TIMEOUT = 0,
XE_EXEC_QUEUE_TIMESLICE = 1,
@@ -61,7 +110,35 @@ enum xe_exec_queue_sched_prop {
};
static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q,
- u64 extensions, int ext_number);
+ u64 extensions);
+
+static void xe_exec_queue_group_cleanup(struct xe_exec_queue *q)
+{
+ struct xe_exec_queue_group *group = q->multi_queue.group;
+ struct xe_lrc *lrc;
+ unsigned long idx;
+
+ if (xe_exec_queue_is_multi_queue_secondary(q)) {
+ /*
+ * Put pairs with get from xe_exec_queue_lookup() call
+ * in xe_exec_queue_group_validate().
+ */
+ xe_exec_queue_put(xe_exec_queue_multi_queue_primary(q));
+ return;
+ }
+
+ if (!group)
+ return;
+
+ /* Primary queue cleanup */
+ xa_for_each(&group->xa, idx, lrc)
+ xe_lrc_put(lrc);
+
+ xa_destroy(&group->xa);
+ mutex_destroy(&group->list_lock);
+ xe_bo_unpin_map_no_vm(group->cgp_bo);
+ kfree(group);
+}
static void __xe_exec_queue_free(struct xe_exec_queue *q)
{
@@ -73,12 +150,17 @@ static void __xe_exec_queue_free(struct xe_exec_queue *q)
if (xe_exec_queue_uses_pxp(q))
xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q);
+
+ if (xe_exec_queue_is_multi_queue(q))
+ xe_exec_queue_group_cleanup(q);
+
if (q->vm)
xe_vm_put(q->vm);
if (q->xef)
xe_file_put(q->xef);
+ kvfree(q->replay_state);
kfree(q);
}
@@ -147,6 +229,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
INIT_LIST_HEAD(&q->multi_gt_link);
INIT_LIST_HEAD(&q->hw_engine_group_link);
INIT_LIST_HEAD(&q->pxp.link);
+ q->multi_queue.priority = XE_MULTI_QUEUE_PRIORITY_NORMAL;
q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us;
q->sched_props.preempt_timeout_us =
@@ -175,7 +258,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
* may set q->usm, must come before xe_lrc_create(),
* may overwrite q->sched_props, must come before q->ops->init()
*/
- err = exec_queue_user_extensions(xe, q, extensions, 0);
+ err = exec_queue_user_extensions(xe, q, extensions);
if (err) {
__xe_exec_queue_free(q);
return ERR_PTR(err);
@@ -225,8 +308,8 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q, u32 exec_queue_flags)
struct xe_lrc *lrc;
xe_gt_sriov_vf_wait_valid_ggtt(q->gt);
- lrc = xe_lrc_create(q->hwe, q->vm, xe_lrc_ring_size(),
- q->msix_vec, flags);
+ lrc = xe_lrc_create(q->hwe, q->vm, q->replay_state,
+ xe_lrc_ring_size(), q->msix_vec, flags);
if (IS_ERR(lrc)) {
err = PTR_ERR(lrc);
goto err_lrc;
@@ -383,6 +466,26 @@ struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe,
}
ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO);
+static void xe_exec_queue_group_kill(struct kref *ref)
+{
+ struct xe_exec_queue_group *group = container_of(ref, struct xe_exec_queue_group,
+ kill_refcount);
+ xe_exec_queue_kill(group->primary);
+}
+
+static inline void xe_exec_queue_group_kill_get(struct xe_exec_queue_group *group)
+{
+ kref_get(&group->kill_refcount);
+}
+
+void xe_exec_queue_group_kill_put(struct xe_exec_queue_group *group)
+{
+ if (!group)
+ return;
+
+ kref_put(&group->kill_refcount, xe_exec_queue_group_kill);
+}
+
void xe_exec_queue_destroy(struct kref *ref)
{
struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount);
@@ -567,6 +670,217 @@ exec_queue_set_pxp_type(struct xe_device *xe, struct xe_exec_queue *q, u64 value
return xe_pxp_exec_queue_set_type(xe->pxp, q, DRM_XE_PXP_TYPE_HWDRM);
}
+static int exec_queue_set_hang_replay_state(struct xe_device *xe,
+ struct xe_exec_queue *q,
+ u64 value)
+{
+ size_t size = xe_gt_lrc_hang_replay_size(q->gt, q->class);
+ u64 __user *address = u64_to_user_ptr(value);
+ void *ptr;
+
+ ptr = vmemdup_user(address, size);
+ if (XE_IOCTL_DBG(xe, IS_ERR(ptr)))
+ return PTR_ERR(ptr);
+
+ q->replay_state = ptr;
+
+ return 0;
+}
+
+static int xe_exec_queue_group_init(struct xe_device *xe, struct xe_exec_queue *q)
+{
+ struct xe_tile *tile = gt_to_tile(q->gt);
+ struct xe_exec_queue_group *group;
+ struct xe_bo *bo;
+
+ group = kzalloc(sizeof(*group), GFP_KERNEL);
+ if (!group)
+ return -ENOMEM;
+
+ bo = xe_bo_create_pin_map_novm(xe, tile, SZ_4K, ttm_bo_type_kernel,
+ XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_PINNED_LATE_RESTORE |
+ XE_BO_FLAG_FORCE_USER_VRAM |
+ XE_BO_FLAG_GGTT_INVALIDATE |
+ XE_BO_FLAG_GGTT, false);
+ if (IS_ERR(bo)) {
+ drm_err(&xe->drm, "CGP bo allocation for queue group failed: %ld\n",
+ PTR_ERR(bo));
+ kfree(group);
+ return PTR_ERR(bo);
+ }
+
+ xe_map_memset(xe, &bo->vmap, 0, 0, SZ_4K);
+
+ group->primary = q;
+ group->cgp_bo = bo;
+ INIT_LIST_HEAD(&group->list);
+ kref_init(&group->kill_refcount);
+ xa_init_flags(&group->xa, XA_FLAGS_ALLOC1);
+ mutex_init(&group->list_lock);
+ q->multi_queue.group = group;
+
+ /* group->list_lock is used in submission backend */
+ if (IS_ENABLED(CONFIG_LOCKDEP)) {
+ fs_reclaim_acquire(GFP_KERNEL);
+ might_lock(&group->list_lock);
+ fs_reclaim_release(GFP_KERNEL);
+ }
+
+ return 0;
+}
+
+static inline bool xe_exec_queue_supports_multi_queue(struct xe_exec_queue *q)
+{
+ return q->gt->info.multi_queue_engine_class_mask & BIT(q->class);
+}
+
+static int xe_exec_queue_group_validate(struct xe_device *xe, struct xe_exec_queue *q,
+ u32 primary_id)
+{
+ struct xe_exec_queue_group *group;
+ struct xe_exec_queue *primary;
+ int ret;
+
+ /*
+ * Get from below xe_exec_queue_lookup() pairs with put
+ * in xe_exec_queue_group_cleanup().
+ */
+ primary = xe_exec_queue_lookup(q->vm->xef, primary_id);
+ if (XE_IOCTL_DBG(xe, !primary))
+ return -ENOENT;
+
+ if (XE_IOCTL_DBG(xe, !xe_exec_queue_is_multi_queue_primary(primary)) ||
+ XE_IOCTL_DBG(xe, q->vm != primary->vm) ||
+ XE_IOCTL_DBG(xe, q->logical_mask != primary->logical_mask)) {
+ ret = -EINVAL;
+ goto put_primary;
+ }
+
+ group = primary->multi_queue.group;
+ q->multi_queue.valid = true;
+ q->multi_queue.group = group;
+
+ return 0;
+put_primary:
+ xe_exec_queue_put(primary);
+ return ret;
+}
+
+#define XE_MAX_GROUP_SIZE 64
+static int xe_exec_queue_group_add(struct xe_device *xe, struct xe_exec_queue *q)
+{
+ struct xe_exec_queue_group *group = q->multi_queue.group;
+ u32 pos;
+ int err;
+
+ xe_assert(xe, xe_exec_queue_is_multi_queue_secondary(q));
+
+ /* Primary queue holds a reference to LRCs of all secondary queues */
+ err = xa_alloc(&group->xa, &pos, xe_lrc_get(q->lrc[0]),
+ XA_LIMIT(1, XE_MAX_GROUP_SIZE - 1), GFP_KERNEL);
+ if (XE_IOCTL_DBG(xe, err)) {
+ xe_lrc_put(q->lrc[0]);
+
+ /* It is invalid if queue group limit is exceeded */
+ if (err == -EBUSY)
+ err = -EINVAL;
+
+ return err;
+ }
+
+ q->multi_queue.pos = pos;
+
+ if (group->primary->multi_queue.keep_active) {
+ xe_exec_queue_group_kill_get(group);
+ q->multi_queue.keep_active = true;
+ }
+
+ return 0;
+}
+
+static void xe_exec_queue_group_delete(struct xe_device *xe, struct xe_exec_queue *q)
+{
+ struct xe_exec_queue_group *group = q->multi_queue.group;
+ struct xe_lrc *lrc;
+
+ xe_assert(xe, xe_exec_queue_is_multi_queue_secondary(q));
+
+ lrc = xa_erase(&group->xa, q->multi_queue.pos);
+ xe_assert(xe, lrc);
+ xe_lrc_put(lrc);
+
+ if (q->multi_queue.keep_active) {
+ xe_exec_queue_group_kill_put(group);
+ q->multi_queue.keep_active = false;
+ }
+}
+
+static int exec_queue_set_multi_group(struct xe_device *xe, struct xe_exec_queue *q,
+ u64 value)
+{
+ if (XE_IOCTL_DBG(xe, !xe_exec_queue_supports_multi_queue(q)))
+ return -ENODEV;
+
+ if (XE_IOCTL_DBG(xe, !xe_device_uc_enabled(xe)))
+ return -EOPNOTSUPP;
+
+ if (XE_IOCTL_DBG(xe, !q->vm->xef))
+ return -EINVAL;
+
+ if (XE_IOCTL_DBG(xe, xe_exec_queue_is_parallel(q)))
+ return -EINVAL;
+
+ if (XE_IOCTL_DBG(xe, xe_exec_queue_is_multi_queue(q)))
+ return -EINVAL;
+
+ if (value & DRM_XE_MULTI_GROUP_CREATE) {
+ if (XE_IOCTL_DBG(xe, value & ~(DRM_XE_MULTI_GROUP_CREATE |
+ DRM_XE_MULTI_GROUP_KEEP_ACTIVE)))
+ return -EINVAL;
+
+ /*
+ * KEEP_ACTIVE is not supported in preempt fence mode as in that mode,
+ * VM_DESTROY ioctl expects all exec queues of that VM are already killed.
+ */
+ if (XE_IOCTL_DBG(xe, (value & DRM_XE_MULTI_GROUP_KEEP_ACTIVE) &&
+ xe_vm_in_preempt_fence_mode(q->vm)))
+ return -EINVAL;
+
+ q->multi_queue.valid = true;
+ q->multi_queue.is_primary = true;
+ q->multi_queue.pos = 0;
+ if (value & DRM_XE_MULTI_GROUP_KEEP_ACTIVE)
+ q->multi_queue.keep_active = true;
+
+ return 0;
+ }
+
+ /* While adding secondary queues, the upper 32 bits must be 0 */
+ if (XE_IOCTL_DBG(xe, value & (~0ull << 32)))
+ return -EINVAL;
+
+ return xe_exec_queue_group_validate(xe, q, value);
+}
+
+static int exec_queue_set_multi_queue_priority(struct xe_device *xe, struct xe_exec_queue *q,
+ u64 value)
+{
+ if (XE_IOCTL_DBG(xe, value > XE_MULTI_QUEUE_PRIORITY_HIGH))
+ return -EINVAL;
+
+ /* For queue creation time (!q->xef) setting, just store the priority value */
+ if (!q->xef) {
+ q->multi_queue.priority = value;
+ return 0;
+ }
+
+ if (!xe_exec_queue_is_multi_queue(q))
+ return -EINVAL;
+
+ return q->ops->set_multi_queue_priority(q, value);
+}
+
typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe,
struct xe_exec_queue *q,
u64 value);
@@ -575,11 +889,76 @@ static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = {
[DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority,
[DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice,
[DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE] = exec_queue_set_pxp_type,
+ [DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE] = exec_queue_set_hang_replay_state,
+ [DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP] = exec_queue_set_multi_group,
+ [DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY] =
+ exec_queue_set_multi_queue_priority,
};
+int xe_exec_queue_set_property_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct xe_file *xef = to_xe_file(file);
+ struct drm_xe_exec_queue_set_property *args = data;
+ struct xe_exec_queue *q;
+ int ret;
+ u32 idx;
+
+ if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+ return -EINVAL;
+
+ if (XE_IOCTL_DBG(xe, args->property !=
+ DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY))
+ return -EINVAL;
+
+ q = xe_exec_queue_lookup(xef, args->exec_queue_id);
+ if (XE_IOCTL_DBG(xe, !q))
+ return -ENOENT;
+
+ idx = array_index_nospec(args->property,
+ ARRAY_SIZE(exec_queue_set_property_funcs));
+ ret = exec_queue_set_property_funcs[idx](xe, q, args->value);
+ if (XE_IOCTL_DBG(xe, ret))
+ goto err_post_lookup;
+
+ xe_exec_queue_put(q);
+ return 0;
+
+ err_post_lookup:
+ xe_exec_queue_put(q);
+ return ret;
+}
+
+static int exec_queue_user_ext_check(struct xe_exec_queue *q, u64 properties)
+{
+ u64 secondary_queue_valid_props = BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP) |
+ BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY);
+
+ /*
+ * Only MULTI_QUEUE_PRIORITY property is valid for secondary queues of a
+ * multi-queue group.
+ */
+ if (xe_exec_queue_is_multi_queue_secondary(q) &&
+ properties & ~secondary_queue_valid_props)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int exec_queue_user_ext_check_final(struct xe_exec_queue *q, u64 properties)
+{
+ /* MULTI_QUEUE_PRIORITY only applies to multi-queue group queues */
+ if ((properties & BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY)) &&
+ !(properties & BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP)))
+ return -EINVAL;
+
+ return 0;
+}
+
static int exec_queue_user_ext_set_property(struct xe_device *xe,
struct xe_exec_queue *q,
- u64 extension)
+ u64 extension, u64 *properties)
{
u64 __user *address = u64_to_user_ptr(extension);
struct drm_xe_ext_set_property ext;
@@ -595,27 +974,35 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe,
XE_IOCTL_DBG(xe, ext.pad) ||
XE_IOCTL_DBG(xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY &&
ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE &&
- ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE))
+ ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE &&
+ ext.property != DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE &&
+ ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP &&
+ ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY))
return -EINVAL;
idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs));
if (!exec_queue_set_property_funcs[idx])
return -EINVAL;
+ *properties |= BIT_ULL(idx);
+ err = exec_queue_user_ext_check(q, *properties);
+ if (XE_IOCTL_DBG(xe, err))
+ return err;
+
return exec_queue_set_property_funcs[idx](xe, q, ext.value);
}
typedef int (*xe_exec_queue_user_extension_fn)(struct xe_device *xe,
struct xe_exec_queue *q,
- u64 extension);
+ u64 extension, u64 *properties);
static const xe_exec_queue_user_extension_fn exec_queue_user_extension_funcs[] = {
[DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY] = exec_queue_user_ext_set_property,
};
#define MAX_USER_EXTENSIONS 16
-static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q,
- u64 extensions, int ext_number)
+static int __exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q,
+ u64 extensions, int ext_number, u64 *properties)
{
u64 __user *address = u64_to_user_ptr(extensions);
struct drm_xe_user_extension ext;
@@ -636,13 +1023,36 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue
idx = array_index_nospec(ext.name,
ARRAY_SIZE(exec_queue_user_extension_funcs));
- err = exec_queue_user_extension_funcs[idx](xe, q, extensions);
+ err = exec_queue_user_extension_funcs[idx](xe, q, extensions, properties);
if (XE_IOCTL_DBG(xe, err))
return err;
if (ext.next_extension)
- return exec_queue_user_extensions(xe, q, ext.next_extension,
- ++ext_number);
+ return __exec_queue_user_extensions(xe, q, ext.next_extension,
+ ++ext_number, properties);
+
+ return 0;
+}
+
+static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q,
+ u64 extensions)
+{
+ u64 properties = 0;
+ int err;
+
+ err = __exec_queue_user_extensions(xe, q, extensions, 0, &properties);
+ if (XE_IOCTL_DBG(xe, err))
+ return err;
+
+ err = exec_queue_user_ext_check_final(q, properties);
+ if (XE_IOCTL_DBG(xe, err))
+ return err;
+
+ if (xe_exec_queue_is_multi_queue_primary(q)) {
+ err = xe_exec_queue_group_init(xe, q);
+ if (XE_IOCTL_DBG(xe, err))
+ return err;
+ }
return 0;
}
@@ -798,12 +1208,18 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
if (IS_ERR(q))
return PTR_ERR(q);
+ if (xe_exec_queue_is_multi_queue_secondary(q)) {
+ err = xe_exec_queue_group_add(xe, q);
+ if (XE_IOCTL_DBG(xe, err))
+ goto put_exec_queue;
+ }
+
if (xe_vm_in_preempt_fence_mode(vm)) {
q->lr.context = dma_fence_context_alloc(1);
err = xe_vm_add_compute_exec_queue(vm, q);
if (XE_IOCTL_DBG(xe, err))
- goto put_exec_queue;
+ goto delete_queue_group;
}
if (q->vm && q->hwe->hw_engine_group) {
@@ -826,6 +1242,9 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
kill_exec_queue:
xe_exec_queue_kill(q);
+delete_queue_group:
+ if (xe_exec_queue_is_multi_queue_secondary(q))
+ xe_exec_queue_group_delete(xe, q);
put_exec_queue:
xe_exec_queue_put(q);
return err;
@@ -981,6 +1400,11 @@ void xe_exec_queue_kill(struct xe_exec_queue *q)
q->ops->kill(q);
xe_vm_remove_compute_exec_queue(q->vm, q);
+
+ if (!xe_exec_queue_is_multi_queue_primary(q) && q->multi_queue.keep_active) {
+ xe_exec_queue_group_kill_put(q->multi_queue.group);
+ q->multi_queue.keep_active = false;
+ }
}
int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data,
@@ -1007,7 +1431,10 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data,
if (q->vm && q->hwe->hw_engine_group)
xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q);
- xe_exec_queue_kill(q);
+ if (xe_exec_queue_is_multi_queue_primary(q))
+ xe_exec_queue_group_kill_put(q->multi_queue.group);
+ else
+ xe_exec_queue_kill(q);
trace_xe_exec_queue_close(q);
xe_exec_queue_put(q);
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h
index fda4d4f9bda8..b5ad975d7e97 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue.h
@@ -66,6 +66,55 @@ static inline bool xe_exec_queue_uses_pxp(struct xe_exec_queue *q)
return q->pxp.type;
}
+/**
+ * xe_exec_queue_is_multi_queue() - Whether an exec_queue is part of a queue group.
+ * @q: The exec_queue
+ *
+ * Return: True if the exec_queue is part of a queue group, false otherwise.
+ */
+static inline bool xe_exec_queue_is_multi_queue(struct xe_exec_queue *q)
+{
+ return q->multi_queue.valid;
+}
+
+/**
+ * xe_exec_queue_is_multi_queue_primary() - Whether an exec_queue is primary queue
+ * of a multi queue group.
+ * @q: The exec_queue
+ *
+ * Return: True if @q is primary queue of a queue group, false otherwise.
+ */
+static inline bool xe_exec_queue_is_multi_queue_primary(struct xe_exec_queue *q)
+{
+ return q->multi_queue.is_primary;
+}
+
+/**
+ * xe_exec_queue_is_multi_queue_secondary() - Whether an exec_queue is secondary queue
+ * of a multi queue group.
+ * @q: The exec_queue
+ *
+ * Return: True if @q is secondary queue of a queue group, false otherwise.
+ */
+static inline bool xe_exec_queue_is_multi_queue_secondary(struct xe_exec_queue *q)
+{
+ return xe_exec_queue_is_multi_queue(q) && !xe_exec_queue_is_multi_queue_primary(q);
+}
+
+/**
+ * xe_exec_queue_multi_queue_primary() - Get multi queue group's primary queue
+ * @q: The exec_queue
+ *
+ * If @q belongs to a multi queue group, then the primary queue of the group will
+ * be returned. Otherwise, @q will be returned.
+ */
+static inline struct xe_exec_queue *xe_exec_queue_multi_queue_primary(struct xe_exec_queue *q)
+{
+ return xe_exec_queue_is_multi_queue(q) ? q->multi_queue.group->primary : q;
+}
+
+void xe_exec_queue_group_kill_put(struct xe_exec_queue_group *group);
+
bool xe_exec_queue_is_lr(struct xe_exec_queue *q);
bool xe_exec_queue_is_idle(struct xe_exec_queue *q);
@@ -78,6 +127,8 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
+int xe_exec_queue_set_property_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
enum xe_exec_queue_priority xe_exec_queue_device_get_max_priority(struct xe_device *xe);
void xe_exec_queue_last_fence_put(struct xe_exec_queue *e, struct xe_vm *vm);
@@ -111,4 +162,21 @@ int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch);
struct xe_lrc *xe_exec_queue_lrc(struct xe_exec_queue *q);
+/**
+ * xe_exec_queue_idle_skip_suspend() - Can exec queue skip suspend
+ * @q: The exec_queue
+ *
+ * If an exec queue is not parallel and is idle, the suspend steps can be
+ * skipped in the submission backend immediatley signaling the suspend fence.
+ * Parallel queues cannot skip this step due to limitations in the submission
+ * backend.
+ *
+ * Return: True if exec queue is idle and can skip suspend steps, False
+ * otherwise
+ */
+static inline bool xe_exec_queue_idle_skip_suspend(struct xe_exec_queue *q)
+{
+ return !xe_exec_queue_is_parallel(q) && xe_exec_queue_is_idle(q);
+}
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index 771ffe35cd0c..67ea5eebf70b 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -33,6 +33,44 @@ enum xe_exec_queue_priority {
};
/**
+ * enum xe_multi_queue_priority - Multi Queue priority values
+ *
+ * The priority values of the queues within the multi queue group.
+ */
+enum xe_multi_queue_priority {
+ /** @XE_MULTI_QUEUE_PRIORITY_LOW: Priority low */
+ XE_MULTI_QUEUE_PRIORITY_LOW = 0,
+ /** @XE_MULTI_QUEUE_PRIORITY_NORMAL: Priority normal */
+ XE_MULTI_QUEUE_PRIORITY_NORMAL,
+ /** @XE_MULTI_QUEUE_PRIORITY_HIGH: Priority high */
+ XE_MULTI_QUEUE_PRIORITY_HIGH,
+};
+
+/**
+ * struct xe_exec_queue_group - Execution multi queue group
+ *
+ * Contains multi queue group information.
+ */
+struct xe_exec_queue_group {
+ /** @primary: Primary queue of this group */
+ struct xe_exec_queue *primary;
+ /** @cgp_bo: BO for the Context Group Page */
+ struct xe_bo *cgp_bo;
+ /** @xa: xarray to store LRCs */
+ struct xarray xa;
+ /** @list: List of all secondary queues in the group */
+ struct list_head list;
+ /** @list_lock: Secondary queue list lock */
+ struct mutex list_lock;
+ /** @kill_refcount: ref count to kill primary queue */
+ struct kref kill_refcount;
+ /** @sync_pending: CGP_SYNC_DONE g2h response pending */
+ bool sync_pending;
+ /** @banned: Group banned */
+ bool banned;
+};
+
+/**
* struct xe_exec_queue - Execution queue
*
* Contains all state necessary for submissions. Can either be a user object or
@@ -111,6 +149,24 @@ struct xe_exec_queue {
struct xe_guc_exec_queue *guc;
};
+ /** @multi_queue: Multi queue information */
+ struct {
+ /** @multi_queue.group: Queue group information */
+ struct xe_exec_queue_group *group;
+ /** @multi_queue.link: Link into group's secondary queues list */
+ struct list_head link;
+ /** @multi_queue.priority: Queue priority within the multi-queue group */
+ enum xe_multi_queue_priority priority;
+ /** @multi_queue.pos: Position of queue within the multi-queue group */
+ u8 pos;
+ /** @multi_queue.valid: Queue belongs to a multi queue group */
+ u8 valid:1;
+ /** @multi_queue.is_primary: Is primary queue (Q0) of the group */
+ u8 is_primary:1;
+ /** @multi_queue.keep_active: Keep the group active after primary is destroyed */
+ u8 keep_active:1;
+ } multi_queue;
+
/** @sched_props: scheduling properties */
struct {
/** @sched_props.timeslice_us: timeslice period in micro-seconds */
@@ -167,6 +223,9 @@ struct xe_exec_queue {
/** @ufence_timeline_value: User fence timeline value */
u64 ufence_timeline_value;
+ /** @replay_state: GPU hang replay state */
+ void *replay_state;
+
/** @ops: submission backend exec queue operations */
const struct xe_exec_queue_ops *ops;
@@ -213,6 +272,9 @@ struct xe_exec_queue_ops {
int (*set_timeslice)(struct xe_exec_queue *q, u32 timeslice_us);
/** @set_preempt_timeout: Set preemption timeout for exec queue */
int (*set_preempt_timeout)(struct xe_exec_queue *q, u32 preempt_timeout_us);
+ /** @set_multi_queue_priority: Set multi queue priority */
+ int (*set_multi_queue_priority)(struct xe_exec_queue *q,
+ enum xe_multi_queue_priority priority);
/**
* @suspend: Suspend exec queue from executing, allowed to be called
* multiple times in a row before resume with the caveat that
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index 769d05517f93..46c17a18a3f4 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -269,7 +269,7 @@ struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe,
port->hwe = hwe;
- port->lrc = xe_lrc_create(hwe, NULL, SZ_16K, XE_IRQ_DEFAULT_MSIX, 0);
+ port->lrc = xe_lrc_create(hwe, NULL, NULL, SZ_16K, XE_IRQ_DEFAULT_MSIX, 0);
if (IS_ERR(port->lrc)) {
err = PTR_ERR(port->lrc);
goto err;
diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c
index c59a9b330697..76e054f314ee 100644
--- a/drivers/gpu/drm/xe/xe_force_wake.c
+++ b/drivers/gpu/drm/xe/xe_force_wake.c
@@ -166,6 +166,13 @@ static int domain_sleep_wait(struct xe_gt *gt,
* xe_force_wake_ref_has_domain() function. Caller must call
* xe_force_wake_put() function to decrease incremented refcounts.
*
+ * When possible, scope-based forcewake (through CLASS(xe_force_wake, ...) or
+ * xe_with_force_wake()) should be used instead of direct calls to this
+ * function. Direct usage of get/put should only be used when the function
+ * has goto-based flows that can interfere with scope-based cleanup, or when
+ * the lifetime of the forcewake reference does not match a specific scope
+ * (e.g., forcewake obtained in one function and released in a different one).
+ *
* Return: opaque reference to woken domains or zero if none of requested
* domains were awake.
*/
diff --git a/drivers/gpu/drm/xe/xe_force_wake.h b/drivers/gpu/drm/xe/xe_force_wake.h
index 0e3e84bfa51c..1e2198f6a007 100644
--- a/drivers/gpu/drm/xe/xe_force_wake.h
+++ b/drivers/gpu/drm/xe/xe_force_wake.h
@@ -61,4 +61,44 @@ xe_force_wake_ref_has_domain(unsigned int fw_ref, enum xe_force_wake_domains dom
return fw_ref & domain;
}
+struct xe_force_wake_ref {
+ struct xe_force_wake *fw;
+ unsigned int domains;
+};
+
+static struct xe_force_wake_ref
+xe_force_wake_constructor(struct xe_force_wake *fw, unsigned int domains)
+{
+ struct xe_force_wake_ref fw_ref = { .fw = fw };
+
+ fw_ref.domains = xe_force_wake_get(fw, domains);
+
+ return fw_ref;
+}
+
+DEFINE_CLASS(xe_force_wake, struct xe_force_wake_ref,
+ xe_force_wake_put(_T.fw, _T.domains),
+ xe_force_wake_constructor(fw, domains),
+ struct xe_force_wake *fw, unsigned int domains);
+
+/*
+ * Scoped helper for the forcewake class, using the same trick as scoped_guard()
+ * to bind the lifetime to the next statement/block.
+ */
+#define __xe_with_force_wake(ref, fw, domains, done) \
+ for (CLASS(xe_force_wake, ref)(fw, domains), *(done) = NULL; \
+ !(done); (done) = (void *)1)
+
+#define xe_with_force_wake(ref, fw, domains) \
+ __xe_with_force_wake(ref, fw, domains, __UNIQUE_ID(done))
+
+/*
+ * Used when xe_force_wake_constructor() has already been called by another
+ * function and the current function is responsible for releasing the forcewake
+ * reference in all possible cases and error paths.
+ */
+DEFINE_CLASS(xe_force_wake_release_only, struct xe_force_wake_ref,
+ if (_T.fw) xe_force_wake_put(_T.fw, _T.domains), fw_ref,
+ struct xe_force_wake_ref fw_ref);
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index ef481b334af4..48ab8b43fcd0 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -396,9 +396,8 @@ static void ggtt_node_remove_work_func(struct work_struct *work)
delayed_removal_work);
struct xe_device *xe = tile_to_xe(node->ggtt->tile);
- xe_pm_runtime_get(xe);
+ guard(xe_pm_runtime)(xe);
ggtt_node_remove(node);
- xe_pm_runtime_put(xe);
}
/**
diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
index dd69cb834f8e..a3157b0fe791 100644
--- a/drivers/gpu/drm/xe/xe_gsc.c
+++ b/drivers/gpu/drm/xe/xe_gsc.c
@@ -352,7 +352,6 @@ static void gsc_work(struct work_struct *work)
struct xe_gsc *gsc = container_of(work, typeof(*gsc), work);
struct xe_gt *gt = gsc_to_gt(gsc);
struct xe_device *xe = gt_to_xe(gt);
- unsigned int fw_ref;
u32 actions;
int ret;
@@ -361,13 +360,12 @@ static void gsc_work(struct work_struct *work)
gsc->work_actions = 0;
spin_unlock_irq(&gsc->lock);
- xe_pm_runtime_get(xe);
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
+ guard(xe_pm_runtime)(xe);
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GSC);
if (actions & GSC_ACTION_ER_COMPLETE) {
- ret = gsc_er_complete(gt);
- if (ret)
- goto out;
+ if (gsc_er_complete(gt))
+ return;
}
if (actions & GSC_ACTION_FW_LOAD) {
@@ -380,10 +378,6 @@ static void gsc_work(struct work_struct *work)
if (actions & GSC_ACTION_SW_PROXY)
xe_gsc_proxy_request_handler(gsc);
-
-out:
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
- xe_pm_runtime_put(xe);
}
void xe_gsc_hwe_irq_handler(struct xe_hw_engine *hwe, u16 intr_vec)
@@ -615,7 +609,6 @@ void xe_gsc_print_info(struct xe_gsc *gsc, struct drm_printer *p)
{
struct xe_gt *gt = gsc_to_gt(gsc);
struct xe_mmio *mmio = &gt->mmio;
- unsigned int fw_ref;
xe_uc_fw_print(&gsc->fw, p);
@@ -624,8 +617,8 @@ void xe_gsc_print_info(struct xe_gsc *gsc, struct drm_printer *p)
if (!xe_uc_fw_is_enabled(&gsc->fw))
return;
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
- if (!fw_ref)
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GSC);
+ if (!fw_ref.domains)
return;
drm_printf(p, "\nHECI1 FWSTS: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n",
@@ -635,6 +628,4 @@ void xe_gsc_print_info(struct xe_gsc *gsc, struct drm_printer *p)
xe_mmio_read32(mmio, HECI_FWSTS4(MTL_GSC_HECI1_BASE)),
xe_mmio_read32(mmio, HECI_FWSTS5(MTL_GSC_HECI1_BASE)),
xe_mmio_read32(mmio, HECI_FWSTS6(MTL_GSC_HECI1_BASE)));
-
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
diff --git a/drivers/gpu/drm/xe/xe_gsc_debugfs.c b/drivers/gpu/drm/xe/xe_gsc_debugfs.c
index 461d7e99c2b3..b13928b50eb9 100644
--- a/drivers/gpu/drm/xe/xe_gsc_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gsc_debugfs.c
@@ -37,9 +37,8 @@ static int gsc_info(struct seq_file *m, void *data)
struct xe_device *xe = gsc_to_xe(gsc);
struct drm_printer p = drm_seq_file_printer(m);
- xe_pm_runtime_get(xe);
+ guard(xe_pm_runtime)(xe);
xe_gsc_print_info(gsc, &p);
- xe_pm_runtime_put(xe);
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c
index 464282a89eef..e7573a0c5e5d 100644
--- a/drivers/gpu/drm/xe/xe_gsc_proxy.c
+++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c
@@ -440,22 +440,19 @@ static void xe_gsc_proxy_remove(void *arg)
struct xe_gsc *gsc = arg;
struct xe_gt *gt = gsc_to_gt(gsc);
struct xe_device *xe = gt_to_xe(gt);
- unsigned int fw_ref = 0;
if (!gsc->proxy.component_added)
return;
/* disable HECI2 IRQs */
- xe_pm_runtime_get(xe);
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
- if (!fw_ref)
- xe_gt_err(gt, "failed to get forcewake to disable GSC interrupts\n");
+ scoped_guard(xe_pm_runtime, xe) {
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GSC);
+ if (!fw_ref.domains)
+ xe_gt_err(gt, "failed to get forcewake to disable GSC interrupts\n");
- /* try do disable irq even if forcewake failed */
- gsc_proxy_irq_toggle(gsc, false);
-
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
- xe_pm_runtime_put(xe);
+ /* try do disable irq even if forcewake failed */
+ gsc_proxy_irq_toggle(gsc, false);
+ }
xe_gsc_wait_for_worker_completion(gsc);
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index cdce210e36f2..313ce83ab0e5 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -103,14 +103,13 @@ void xe_gt_sanitize(struct xe_gt *gt)
static void xe_gt_enable_host_l2_vram(struct xe_gt *gt)
{
- unsigned int fw_ref;
u32 reg;
if (!XE_GT_WA(gt, 16023588340))
return;
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (!fw_ref)
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref.domains)
return;
if (xe_gt_is_main_type(gt)) {
@@ -120,12 +119,10 @@ static void xe_gt_enable_host_l2_vram(struct xe_gt *gt)
}
xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0xF);
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
static void xe_gt_disable_host_l2_vram(struct xe_gt *gt)
{
- unsigned int fw_ref;
u32 reg;
if (!XE_GT_WA(gt, 16023588340))
@@ -134,15 +131,13 @@ static void xe_gt_disable_host_l2_vram(struct xe_gt *gt)
if (xe_gt_is_media_type(gt))
return;
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (!fw_ref)
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref.domains)
return;
reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL);
reg &= ~CG_DIS_CNTLBUS;
xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg);
-
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
static void gt_reset_worker(struct work_struct *w);
@@ -389,7 +384,6 @@ put_exec_queue:
int xe_gt_init_early(struct xe_gt *gt)
{
- unsigned int fw_ref;
int err;
if (IS_SRIOV_PF(gt_to_xe(gt))) {
@@ -436,13 +430,12 @@ int xe_gt_init_early(struct xe_gt *gt)
if (err)
return err;
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (!fw_ref)
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref.domains)
return -ETIMEDOUT;
xe_gt_mcr_init_early(gt);
xe_pat_init(gt);
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
return 0;
}
@@ -460,16 +453,15 @@ static void dump_pat_on_error(struct xe_gt *gt)
static int gt_init_with_gt_forcewake(struct xe_gt *gt)
{
- unsigned int fw_ref;
int err;
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (!fw_ref)
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref.domains)
return -ETIMEDOUT;
err = xe_uc_init(&gt->uc);
if (err)
- goto err_force_wake;
+ return err;
xe_gt_topology_init(gt);
xe_gt_mcr_init(gt);
@@ -478,7 +470,7 @@ static int gt_init_with_gt_forcewake(struct xe_gt *gt)
if (xe_gt_is_main_type(gt)) {
err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt);
if (err)
- goto err_force_wake;
+ return err;
if (IS_SRIOV_PF(gt_to_xe(gt)))
xe_lmtt_init(&gt_to_tile(gt)->sriov.pf.lmtt);
}
@@ -492,17 +484,17 @@ static int gt_init_with_gt_forcewake(struct xe_gt *gt)
err = xe_hw_engines_init_early(gt);
if (err) {
dump_pat_on_error(gt);
- goto err_force_wake;
+ return err;
}
err = xe_hw_engine_class_sysfs_init(gt);
if (err)
- goto err_force_wake;
+ return err;
/* Initialize CCS mode sysfs after early initialization of HW engines */
err = xe_gt_ccs_mode_sysfs_init(gt);
if (err)
- goto err_force_wake;
+ return err;
/*
* Stash hardware-reported version. Since this register does not exist
@@ -510,25 +502,16 @@ static int gt_init_with_gt_forcewake(struct xe_gt *gt)
*/
gt->info.gmdid = xe_mmio_read32(&gt->mmio, GMD_ID);
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
return 0;
-
-err_force_wake:
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
-
- return err;
}
static int gt_init_with_all_forcewake(struct xe_gt *gt)
{
- unsigned int fw_ref;
int err;
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
- err = -ETIMEDOUT;
- goto err_force_wake;
- }
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL))
+ return -ETIMEDOUT;
xe_gt_mcr_set_implicit_defaults(gt);
xe_wa_process_gt(gt);
@@ -537,20 +520,20 @@ static int gt_init_with_all_forcewake(struct xe_gt *gt)
err = xe_gt_clock_init(gt);
if (err)
- goto err_force_wake;
+ return err;
xe_mocs_init(gt);
err = xe_execlist_init(gt);
if (err)
- goto err_force_wake;
+ return err;
err = xe_hw_engines_init(gt);
if (err)
- goto err_force_wake;
+ return err;
err = xe_uc_init_post_hwconfig(&gt->uc);
if (err)
- goto err_force_wake;
+ return err;
if (xe_gt_is_main_type(gt)) {
/*
@@ -561,10 +544,8 @@ static int gt_init_with_all_forcewake(struct xe_gt *gt)
gt->usm.bb_pool = xe_sa_bo_manager_init(gt_to_tile(gt),
IS_DGFX(xe) ? SZ_1M : SZ_512K, 16);
- if (IS_ERR(gt->usm.bb_pool)) {
- err = PTR_ERR(gt->usm.bb_pool);
- goto err_force_wake;
- }
+ if (IS_ERR(gt->usm.bb_pool))
+ return PTR_ERR(gt->usm.bb_pool);
}
}
@@ -573,12 +554,12 @@ static int gt_init_with_all_forcewake(struct xe_gt *gt)
err = xe_migrate_init(tile->migrate);
if (err)
- goto err_force_wake;
+ return err;
}
err = xe_uc_load_hw(&gt->uc);
if (err)
- goto err_force_wake;
+ return err;
/* Configure default CCS mode of 1 engine with all resources */
if (xe_gt_ccs_mode_enabled(gt)) {
@@ -592,14 +573,7 @@ static int gt_init_with_all_forcewake(struct xe_gt *gt)
if (IS_SRIOV_PF(gt_to_xe(gt)))
xe_gt_sriov_pf_init_hw(gt);
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
-
return 0;
-
-err_force_wake:
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
-
- return err;
}
static void xe_gt_fini(void *arg)
@@ -902,56 +876,42 @@ void xe_gt_reset_async(struct xe_gt *gt)
void xe_gt_suspend_prepare(struct xe_gt *gt)
{
- unsigned int fw_ref;
-
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
-
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL);
xe_uc_suspend_prepare(&gt->uc);
-
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
int xe_gt_suspend(struct xe_gt *gt)
{
- unsigned int fw_ref;
int err;
xe_gt_dbg(gt, "suspending\n");
xe_gt_sanitize(gt);
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
- goto err_msg;
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) {
+ xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(-ETIMEDOUT));
+ return -ETIMEDOUT;
+ }
err = xe_uc_suspend(&gt->uc);
- if (err)
- goto err_force_wake;
+ if (err) {
+ xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(err));
+ return err;
+ }
xe_gt_idle_disable_pg(gt);
xe_gt_disable_host_l2_vram(gt);
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
xe_gt_dbg(gt, "suspended\n");
return 0;
-
-err_msg:
- err = -ETIMEDOUT;
-err_force_wake:
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
- xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(err));
-
- return err;
}
void xe_gt_shutdown(struct xe_gt *gt)
{
- unsigned int fw_ref;
-
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL);
do_gt_reset(gt);
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
/**
@@ -976,32 +936,72 @@ int xe_gt_sanitize_freq(struct xe_gt *gt)
int xe_gt_resume(struct xe_gt *gt)
{
- unsigned int fw_ref;
int err;
xe_gt_dbg(gt, "resuming\n");
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
- goto err_msg;
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) {
+ xe_gt_err(gt, "resume failed (%pe)\n", ERR_PTR(-ETIMEDOUT));
+ return -ETIMEDOUT;
+ }
err = do_gt_restart(gt);
if (err)
- goto err_force_wake;
+ return err;
xe_gt_idle_enable_pg(gt);
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
xe_gt_dbg(gt, "resumed\n");
return 0;
+}
-err_msg:
- err = -ETIMEDOUT;
-err_force_wake:
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
- xe_gt_err(gt, "resume failed (%pe)\n", ERR_PTR(err));
+/**
+ * xe_gt_runtime_suspend() - GT runtime suspend
+ * @gt: the GT object
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int xe_gt_runtime_suspend(struct xe_gt *gt)
+{
+ xe_gt_dbg(gt, "runtime suspending\n");
- return err;
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) {
+ xe_gt_err(gt, "runtime suspend failed (%pe)\n", ERR_PTR(-ETIMEDOUT));
+ return -ETIMEDOUT;
+ }
+
+ xe_uc_runtime_suspend(&gt->uc);
+ xe_gt_disable_host_l2_vram(gt);
+
+ xe_gt_dbg(gt, "runtime suspended\n");
+
+ return 0;
+}
+
+/**
+ * xe_gt_runtime_resume() - GT runtime resume
+ * @gt: the GT object
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int xe_gt_runtime_resume(struct xe_gt *gt)
+{
+ xe_gt_dbg(gt, "runtime resuming\n");
+
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) {
+ xe_gt_err(gt, "runtime resume failed (%pe)\n", ERR_PTR(-ETIMEDOUT));
+ return -ETIMEDOUT;
+ }
+
+ xe_gt_enable_host_l2_vram(gt);
+ xe_uc_runtime_resume(&gt->uc);
+
+ xe_gt_dbg(gt, "runtime resumed\n");
+
+ return 0;
}
struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt,
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index 9d710049da45..94969ddd9d88 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -58,6 +58,8 @@ int xe_gt_suspend(struct xe_gt *gt);
void xe_gt_shutdown(struct xe_gt *gt);
int xe_gt_resume(struct xe_gt *gt);
void xe_gt_reset_async(struct xe_gt *gt);
+int xe_gt_runtime_resume(struct xe_gt *gt);
+int xe_gt_runtime_suspend(struct xe_gt *gt);
void xe_gt_sanitize(struct xe_gt *gt);
int xe_gt_sanitize_freq(struct xe_gt *gt);
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index e4fd632f43cf..e4f38b5150fc 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -105,35 +105,24 @@ int xe_gt_debugfs_show_with_rpm(struct seq_file *m, void *data)
struct drm_info_node *node = m->private;
struct xe_gt *gt = node_to_gt(node);
struct xe_device *xe = gt_to_xe(gt);
- int ret;
-
- xe_pm_runtime_get(xe);
- ret = xe_gt_debugfs_simple_show(m, data);
- xe_pm_runtime_put(xe);
- return ret;
+ guard(xe_pm_runtime)(xe);
+ return xe_gt_debugfs_simple_show(m, data);
}
static int hw_engines(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_hw_engine *hwe;
enum xe_hw_engine_id id;
- unsigned int fw_ref;
- int ret = 0;
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
- ret = -ETIMEDOUT;
- goto fw_put;
- }
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL))
+ return -ETIMEDOUT;
for_each_hw_engine(hwe, gt, id)
xe_hw_engine_print(hwe, p);
-fw_put:
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
-
- return ret;
+ return 0;
}
static int steering(struct xe_gt *gt, struct drm_printer *p)
@@ -220,6 +209,7 @@ static const struct drm_info_list vf_safe_debugfs_list[] = {
{ "default_lrc_vcs", .show = xe_gt_debugfs_show_with_rpm, .data = vcs_default_lrc },
{ "default_lrc_vecs", .show = xe_gt_debugfs_show_with_rpm, .data = vecs_default_lrc },
{ "hwconfig", .show = xe_gt_debugfs_show_with_rpm, .data = hwconfig },
+ { "pat_sw_config", .show = xe_gt_debugfs_simple_show, .data = xe_pat_dump_sw_config },
};
/* everything else should be added here */
@@ -269,9 +259,8 @@ static void force_reset(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
- xe_pm_runtime_get(xe);
+ guard(xe_pm_runtime)(xe);
xe_gt_reset_async(gt);
- xe_pm_runtime_put(xe);
}
static ssize_t force_reset_write(struct file *file,
@@ -297,9 +286,8 @@ static void force_reset_sync(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
- xe_pm_runtime_get(xe);
+ guard(xe_pm_runtime)(xe);
xe_gt_reset(gt);
- xe_pm_runtime_put(xe);
}
static ssize_t force_reset_sync_write(struct file *file,
diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c
index ce3c7810469f..a40dd074106f 100644
--- a/drivers/gpu/drm/xe/xe_gt_freq.c
+++ b/drivers/gpu/drm/xe/xe_gt_freq.c
@@ -70,9 +70,8 @@ static ssize_t act_freq_show(struct kobject *kobj,
struct xe_guc_pc *pc = dev_to_pc(dev);
u32 freq;
- xe_pm_runtime_get(dev_to_xe(dev));
+ guard(xe_pm_runtime)(dev_to_xe(dev));
freq = xe_guc_pc_get_act_freq(pc);
- xe_pm_runtime_put(dev_to_xe(dev));
return sysfs_emit(buf, "%d\n", freq);
}
@@ -86,9 +85,8 @@ static ssize_t cur_freq_show(struct kobject *kobj,
u32 freq;
ssize_t ret;
- xe_pm_runtime_get(dev_to_xe(dev));
+ guard(xe_pm_runtime)(dev_to_xe(dev));
ret = xe_guc_pc_get_cur_freq(pc, &freq);
- xe_pm_runtime_put(dev_to_xe(dev));
if (ret)
return ret;
@@ -113,9 +111,8 @@ static ssize_t rpe_freq_show(struct kobject *kobj,
struct xe_guc_pc *pc = dev_to_pc(dev);
u32 freq;
- xe_pm_runtime_get(dev_to_xe(dev));
+ guard(xe_pm_runtime)(dev_to_xe(dev));
freq = xe_guc_pc_get_rpe_freq(pc);
- xe_pm_runtime_put(dev_to_xe(dev));
return sysfs_emit(buf, "%d\n", freq);
}
@@ -128,9 +125,8 @@ static ssize_t rpa_freq_show(struct kobject *kobj,
struct xe_guc_pc *pc = dev_to_pc(dev);
u32 freq;
- xe_pm_runtime_get(dev_to_xe(dev));
+ guard(xe_pm_runtime)(dev_to_xe(dev));
freq = xe_guc_pc_get_rpa_freq(pc);
- xe_pm_runtime_put(dev_to_xe(dev));
return sysfs_emit(buf, "%d\n", freq);
}
@@ -154,9 +150,8 @@ static ssize_t min_freq_show(struct kobject *kobj,
u32 freq;
ssize_t ret;
- xe_pm_runtime_get(dev_to_xe(dev));
+ guard(xe_pm_runtime)(dev_to_xe(dev));
ret = xe_guc_pc_get_min_freq(pc, &freq);
- xe_pm_runtime_put(dev_to_xe(dev));
if (ret)
return ret;
@@ -175,9 +170,8 @@ static ssize_t min_freq_store(struct kobject *kobj,
if (ret)
return ret;
- xe_pm_runtime_get(dev_to_xe(dev));
+ guard(xe_pm_runtime)(dev_to_xe(dev));
ret = xe_guc_pc_set_min_freq(pc, freq);
- xe_pm_runtime_put(dev_to_xe(dev));
if (ret)
return ret;
@@ -193,9 +187,8 @@ static ssize_t max_freq_show(struct kobject *kobj,
u32 freq;
ssize_t ret;
- xe_pm_runtime_get(dev_to_xe(dev));
+ guard(xe_pm_runtime)(dev_to_xe(dev));
ret = xe_guc_pc_get_max_freq(pc, &freq);
- xe_pm_runtime_put(dev_to_xe(dev));
if (ret)
return ret;
@@ -214,9 +207,8 @@ static ssize_t max_freq_store(struct kobject *kobj,
if (ret)
return ret;
- xe_pm_runtime_get(dev_to_xe(dev));
+ guard(xe_pm_runtime)(dev_to_xe(dev));
ret = xe_guc_pc_set_max_freq(pc, freq);
- xe_pm_runtime_put(dev_to_xe(dev));
if (ret)
return ret;
@@ -243,9 +235,8 @@ static ssize_t power_profile_store(struct kobject *kobj,
struct xe_guc_pc *pc = dev_to_pc(dev);
int err;
- xe_pm_runtime_get(dev_to_xe(dev));
+ guard(xe_pm_runtime)(dev_to_xe(dev));
err = xe_guc_pc_set_power_profile(pc, buff);
- xe_pm_runtime_put(dev_to_xe(dev));
return err ?: count;
}
diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c
index 3e3d1d52f630..c1c9bec3c487 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle.c
+++ b/drivers/gpu/drm/xe/xe_gt_idle.c
@@ -105,7 +105,6 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt)
struct xe_gt_idle *gtidle = &gt->gtidle;
struct xe_mmio *mmio = &gt->mmio;
u32 vcs_mask, vecs_mask;
- unsigned int fw_ref;
int i, j;
if (IS_SRIOV_VF(xe))
@@ -137,7 +136,7 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt)
}
}
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
if (xe->info.skip_guc_pc) {
/*
* GuC sets the hysteresis value when GuC PC is enabled
@@ -154,13 +153,11 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt)
VDN_MFXVDENC_POWERGATE_ENABLE(2));
xe_mmio_write32(mmio, POWERGATE_ENABLE, gtidle->powergate_enable);
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
void xe_gt_idle_disable_pg(struct xe_gt *gt)
{
struct xe_gt_idle *gtidle = &gt->gtidle;
- unsigned int fw_ref;
if (IS_SRIOV_VF(gt_to_xe(gt)))
return;
@@ -168,9 +165,8 @@ void xe_gt_idle_disable_pg(struct xe_gt *gt)
xe_device_assert_mem_access(gt_to_xe(gt));
gtidle->powergate_enable = 0;
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
xe_mmio_write32(&gt->mmio, POWERGATE_ENABLE, gtidle->powergate_enable);
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
/**
@@ -189,7 +185,6 @@ int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p)
enum xe_gt_idle_state state;
u32 pg_enabled, pg_status = 0;
u32 vcs_mask, vecs_mask;
- unsigned int fw_ref;
int n;
/*
* Media Slices
@@ -226,14 +221,12 @@ int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p)
/* Do not wake the GT to read powergating status */
if (state != GT_IDLE_C6) {
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (!fw_ref)
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref.domains)
return -ETIMEDOUT;
pg_enabled = xe_mmio_read32(&gt->mmio, POWERGATE_ENABLE);
pg_status = xe_mmio_read32(&gt->mmio, POWERGATE_DOMAIN_STATUS);
-
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
if (gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK) {
@@ -271,13 +264,9 @@ static ssize_t name_show(struct kobject *kobj,
struct device *dev = kobj_to_dev(kobj);
struct xe_gt_idle *gtidle = dev_to_gtidle(dev);
struct xe_guc_pc *pc = gtidle_to_pc(gtidle);
- ssize_t ret;
-
- xe_pm_runtime_get(pc_to_xe(pc));
- ret = sysfs_emit(buff, "%s\n", gtidle->name);
- xe_pm_runtime_put(pc_to_xe(pc));
- return ret;
+ guard(xe_pm_runtime)(pc_to_xe(pc));
+ return sysfs_emit(buff, "%s\n", gtidle->name);
}
static struct kobj_attribute name_attr = __ATTR_RO(name);
@@ -289,9 +278,8 @@ static ssize_t idle_status_show(struct kobject *kobj,
struct xe_guc_pc *pc = gtidle_to_pc(gtidle);
enum xe_gt_idle_state state;
- xe_pm_runtime_get(pc_to_xe(pc));
- state = gtidle->idle_status(pc);
- xe_pm_runtime_put(pc_to_xe(pc));
+ scoped_guard(xe_pm_runtime, pc_to_xe(pc))
+ state = gtidle->idle_status(pc);
return sysfs_emit(buff, "%s\n", gt_idle_state_to_string(state));
}
@@ -319,9 +307,8 @@ static ssize_t idle_residency_ms_show(struct kobject *kobj,
struct xe_guc_pc *pc = gtidle_to_pc(gtidle);
u64 residency;
- xe_pm_runtime_get(pc_to_xe(pc));
- residency = xe_gt_idle_residency_msec(gtidle);
- xe_pm_runtime_put(pc_to_xe(pc));
+ scoped_guard(xe_pm_runtime, pc_to_xe(pc))
+ residency = xe_gt_idle_residency_msec(gtidle);
return sysfs_emit(buff, "%llu\n", residency);
}
@@ -404,21 +391,17 @@ void xe_gt_idle_enable_c6(struct xe_gt *gt)
int xe_gt_idle_disable_c6(struct xe_gt *gt)
{
- unsigned int fw_ref;
-
xe_device_assert_mem_access(gt_to_xe(gt));
if (IS_SRIOV_VF(gt_to_xe(gt)))
return 0;
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (!fw_ref)
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref.domains)
return -ETIMEDOUT;
xe_mmio_write32(&gt->mmio, RC_CONTROL, 0);
xe_mmio_write32(&gt->mmio, RC_STATE, 0);
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
-
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
index 59c5c6b4d994..6e8507c24986 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
@@ -269,7 +269,8 @@ static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config,
}
/* Return: number of configuration dwords written */
-static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config, bool details)
+static u32 encode_config(struct xe_gt *gt, u32 *cfg, const struct xe_gt_sriov_config *config,
+ bool details)
{
u32 n = 0;
@@ -303,9 +304,11 @@ static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config, bool
cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_PREEMPT_TIMEOUT);
cfg[n++] = config->preempt_timeout;
-#define encode_threshold_config(TAG, ...) ({ \
- cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_THRESHOLD_##TAG); \
- cfg[n++] = config->thresholds[MAKE_XE_GUC_KLV_THRESHOLD_INDEX(TAG)]; \
+#define encode_threshold_config(TAG, NAME, VER...) ({ \
+ if (IF_ARGS(GUC_FIRMWARE_VER_AT_LEAST(&gt->uc.guc, VER), true, VER)) { \
+ cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_THRESHOLD_##TAG); \
+ cfg[n++] = config->thresholds[MAKE_XE_GUC_KLV_THRESHOLD_INDEX(TAG)]; \
+ } \
});
MAKE_XE_GUC_KLV_THRESHOLDS_SET(encode_threshold_config);
@@ -328,7 +331,7 @@ static int pf_push_full_vf_config(struct xe_gt *gt, unsigned int vfid)
return -ENOBUFS;
cfg = xe_guc_buf_cpu_ptr(buf);
- num_dwords = encode_config(cfg, config, true);
+ num_dwords = encode_config(gt, cfg, config, true);
xe_gt_assert(gt, num_dwords <= max_cfg_dwords);
if (xe_gt_is_media_type(gt)) {
@@ -2518,7 +2521,7 @@ ssize_t xe_gt_sriov_pf_config_save(struct xe_gt *gt, unsigned int vfid, void *bu
ret = -ENOBUFS;
} else {
config = pf_pick_vf_config(gt, vfid);
- ret = encode_config(buf, config, false) * sizeof(u32);
+ ret = encode_config(gt, buf, config, false) * sizeof(u32);
}
}
mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
@@ -2551,11 +2554,13 @@ static int pf_restore_vf_config_klv(struct xe_gt *gt, unsigned int vfid,
return pf_provision_preempt_timeout(gt, vfid, value[0]);
/* auto-generate case statements */
-#define define_threshold_key_to_provision_case(TAG, ...) \
+#define define_threshold_key_to_provision_case(TAG, NAME, VER...) \
case MAKE_GUC_KLV_VF_CFG_THRESHOLD_KEY(TAG): \
BUILD_BUG_ON(MAKE_GUC_KLV_VF_CFG_THRESHOLD_LEN(TAG) != 1u); \
if (len != MAKE_GUC_KLV_VF_CFG_THRESHOLD_LEN(TAG)) \
return -EBADMSG; \
+ if (IF_ARGS(!GUC_FIRMWARE_VER_AT_LEAST(&gt->uc.guc, VER), false, VER)) \
+ return -EKEYREJECTED; \
return pf_provision_threshold(gt, vfid, \
MAKE_XE_GUC_KLV_THRESHOLD_INDEX(TAG), \
value[0]);
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
index 5278ea4fd655..ece9eed5d7c5 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
@@ -21,6 +21,7 @@
#include "xe_gt_sriov_pf_monitor.h"
#include "xe_gt_sriov_pf_policy.h"
#include "xe_gt_sriov_pf_service.h"
+#include "xe_guc.h"
#include "xe_pm.h"
#include "xe_sriov_pf.h"
#include "xe_sriov_pf_provision.h"
@@ -123,11 +124,10 @@ static int POLICY##_set(void *data, u64 val) \
if (val > (TYPE)~0ull) \
return -EOVERFLOW; \
\
- xe_pm_runtime_get(xe); \
+ guard(xe_pm_runtime)(xe); \
err = xe_gt_sriov_pf_policy_set_##POLICY(gt, val); \
if (!err) \
xe_sriov_pf_provision_set_custom_mode(xe); \
- xe_pm_runtime_put(xe); \
\
return err; \
} \
@@ -189,12 +189,11 @@ static int CONFIG##_set(void *data, u64 val) \
if (val > (TYPE)~0ull) \
return -EOVERFLOW; \
\
- xe_pm_runtime_get(xe); \
+ guard(xe_pm_runtime)(xe); \
err = xe_sriov_pf_wait_ready(xe) ?: \
xe_gt_sriov_pf_config_set_##CONFIG(gt, vfid, val); \
if (!err) \
xe_sriov_pf_provision_set_custom_mode(xe); \
- xe_pm_runtime_put(xe); \
\
return err; \
} \
@@ -249,11 +248,10 @@ static int set_threshold(void *data, u64 val, enum xe_guc_klv_threshold_index in
if (val > (u32)~0ull)
return -EOVERFLOW;
- xe_pm_runtime_get(xe);
+ guard(xe_pm_runtime)(xe);
err = xe_gt_sriov_pf_config_set_threshold(gt, vfid, index, val);
if (!err)
xe_sriov_pf_provision_set_custom_mode(xe);
- xe_pm_runtime_put(xe);
return err;
}
@@ -304,9 +302,11 @@ static void pf_add_config_attrs(struct xe_gt *gt, struct dentry *parent, unsigne
&sched_priority_fops);
/* register all threshold attributes */
-#define register_threshold_attribute(TAG, NAME, ...) \
- debugfs_create_file_unsafe("threshold_" #NAME, 0644, parent, parent, \
- &NAME##_fops);
+#define register_threshold_attribute(TAG, NAME, VER...) ({ \
+ if (IF_ARGS(GUC_FIRMWARE_VER_AT_LEAST(&gt->uc.guc, VER), true, VER)) \
+ debugfs_create_file_unsafe("threshold_" #NAME, 0644, parent, parent, \
+ &NAME##_fops); \
+});
MAKE_XE_GUC_KLV_THRESHOLDS_SET(register_threshold_attribute)
#undef register_threshold_attribute
}
@@ -358,9 +358,8 @@ static ssize_t control_write(struct file *file, const char __user *buf, size_t c
xe_gt_assert(gt, sizeof(cmd) > strlen(control_cmds[n].cmd));
if (sysfs_streq(cmd, control_cmds[n].cmd)) {
- xe_pm_runtime_get(xe);
+ guard(xe_pm_runtime)(xe);
ret = control_cmds[n].fn ? (*control_cmds[n].fn)(gt, vfid) : 0;
- xe_pm_runtime_put(xe);
break;
}
}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
index 3174a8dee779..7410e7b93256 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
@@ -1026,7 +1026,7 @@ static void action_ring_cleanup(void *arg)
static void pf_gt_migration_check_support(struct xe_gt *gt)
{
- if (GUC_FIRMWARE_VER(&gt->uc.guc) < MAKE_GUC_VER(70, 54, 0))
+ if (!GUC_FIRMWARE_VER_AT_LEAST(&gt->uc.guc, 70, 54))
xe_sriov_pf_migration_disable(gt_to_xe(gt), "requires GuC version >= 70.54.0");
}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
index 033eae2d03d3..b8b391cfc8eb 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
@@ -5,6 +5,7 @@
#include <linux/bitfield.h>
#include <linux/bsearch.h>
+#include <linux/delay.h>
#include <drm/drm_managed.h>
#include <drm/drm_print.h>
@@ -41,6 +42,37 @@
#define make_u64_from_u32(hi, lo) ((u64)((u64)(u32)(hi) << 32 | (u32)(lo)))
+#ifdef CONFIG_DRM_XE_DEBUG
+enum VF_MIGRATION_WAIT_POINTS {
+ VF_MIGRATION_WAIT_RESFIX_START = BIT(0),
+ VF_MIGRATION_WAIT_FIXUPS = BIT(1),
+ VF_MIGRATION_WAIT_RESTART_JOBS = BIT(2),
+ VF_MIGRATION_WAIT_RESFIX_DONE = BIT(3),
+};
+
+#define VF_MIGRATION_WAIT_DELAY_IN_MS 1000
+static void vf_post_migration_inject_wait(struct xe_gt *gt,
+ enum VF_MIGRATION_WAIT_POINTS wait)
+{
+ while (gt->sriov.vf.migration.debug.resfix_stoppers & wait) {
+ xe_gt_dbg(gt,
+ "*TESTING* injecting %u ms delay due to resfix_stoppers=%#x, to continue clear %#x\n",
+ VF_MIGRATION_WAIT_DELAY_IN_MS,
+ gt->sriov.vf.migration.debug.resfix_stoppers, wait);
+
+ msleep(VF_MIGRATION_WAIT_DELAY_IN_MS);
+ }
+}
+
+#define VF_MIGRATION_INJECT_WAIT(gt, _POS) ({ \
+ struct xe_gt *__gt = (gt); \
+ vf_post_migration_inject_wait(__gt, VF_MIGRATION_WAIT_##_POS); \
+ })
+
+#else
+#define VF_MIGRATION_INJECT_WAIT(_gt, ...) typecheck(struct xe_gt *, (_gt))
+#endif
+
static int guc_action_vf_reset(struct xe_guc *guc)
{
u32 request[GUC_HXG_REQUEST_MSG_MIN_LEN] = {
@@ -299,12 +331,13 @@ void xe_gt_sriov_vf_guc_versions(struct xe_gt *gt,
*found = gt->sriov.vf.guc_version;
}
-static int guc_action_vf_notify_resfix_done(struct xe_guc *guc)
+static int guc_action_vf_resfix_start(struct xe_guc *guc, u16 marker)
{
u32 request[GUC_HXG_REQUEST_MSG_MIN_LEN] = {
FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
- FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_VF2GUC_NOTIFY_RESFIX_DONE),
+ FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_VF2GUC_RESFIX_START) |
+ FIELD_PREP(VF2GUC_RESFIX_START_REQUEST_MSG_0_MARKER, marker),
};
int ret;
@@ -313,28 +346,43 @@ static int guc_action_vf_notify_resfix_done(struct xe_guc *guc)
return ret > 0 ? -EPROTO : ret;
}
-/**
- * vf_notify_resfix_done - Notify GuC about resource fixups apply completed.
- * @gt: the &xe_gt struct instance linked to target GuC
- *
- * Returns: 0 if the operation completed successfully, or a negative error
- * code otherwise.
- */
-static int vf_notify_resfix_done(struct xe_gt *gt)
+static int vf_resfix_start(struct xe_gt *gt, u16 marker)
{
struct xe_guc *guc = &gt->uc.guc;
- int err;
xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
- err = guc_action_vf_notify_resfix_done(guc);
- if (unlikely(err))
- xe_gt_sriov_err(gt, "Failed to notify GuC about resource fixup done (%pe)\n",
- ERR_PTR(err));
- else
- xe_gt_sriov_dbg_verbose(gt, "sent GuC resource fixup done\n");
+ VF_MIGRATION_INJECT_WAIT(gt, RESFIX_START);
- return err;
+ xe_gt_sriov_dbg_verbose(gt, "Sending resfix start marker %u\n", marker);
+
+ return guc_action_vf_resfix_start(guc, marker);
+}
+
+static int guc_action_vf_resfix_done(struct xe_guc *guc, u16 marker)
+{
+ u32 request[GUC_HXG_REQUEST_MSG_MIN_LEN] = {
+ FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+ FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_VF2GUC_RESFIX_DONE) |
+ FIELD_PREP(VF2GUC_RESFIX_DONE_REQUEST_MSG_0_MARKER, marker),
+ };
+ int ret;
+
+ ret = xe_guc_mmio_send(guc, request, ARRAY_SIZE(request));
+
+ return ret > 0 ? -EPROTO : ret;
+}
+
+static int vf_resfix_done(struct xe_gt *gt, u16 marker)
+{
+ struct xe_guc *guc = &gt->uc.guc;
+
+ xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+
+ xe_gt_sriov_dbg_verbose(gt, "Sending resfix done marker %u\n", marker);
+
+ return guc_action_vf_resfix_done(guc, marker);
}
static int guc_action_query_single_klv(struct xe_guc *guc, u32 key,
@@ -1123,12 +1171,8 @@ static bool vf_post_migration_shutdown(struct xe_gt *gt)
return true;
}
- spin_lock_irq(&gt->sriov.vf.migration.lock);
- gt->sriov.vf.migration.recovery_queued = false;
- spin_unlock_irq(&gt->sriov.vf.migration.lock);
-
xe_guc_ct_flush_and_stop(&gt->uc.guc.ct);
- xe_guc_submit_pause(&gt->uc.guc);
+ xe_guc_submit_pause_vf(&gt->uc.guc);
xe_tlb_inval_reset(&gt->tlb_inval);
return false;
@@ -1144,6 +1188,8 @@ static int vf_post_migration_fixups(struct xe_gt *gt)
void *buf = gt->sriov.vf.migration.scratch;
int err;
+ VF_MIGRATION_INJECT_WAIT(gt, FIXUPS);
+
/* xe_gt_sriov_vf_query_config will fixup the GGTT addresses */
err = xe_gt_sriov_vf_query_config(gt);
if (err)
@@ -1162,13 +1208,22 @@ static int vf_post_migration_fixups(struct xe_gt *gt)
static void vf_post_migration_rearm(struct xe_gt *gt)
{
+ VF_MIGRATION_INJECT_WAIT(gt, RESTART_JOBS);
+
+ /*
+ * Make sure interrupts on the new HW are properly set. The GuC IRQ
+ * must be working at this point, since the recovery did started,
+ * but the rest was not enabled using the procedure from spec.
+ */
+ xe_irq_resume(gt_to_xe(gt));
+
xe_guc_ct_restart(&gt->uc.guc.ct);
- xe_guc_submit_unpause_prepare(&gt->uc.guc);
+ xe_guc_submit_unpause_prepare_vf(&gt->uc.guc);
}
static void vf_post_migration_kickstart(struct xe_gt *gt)
{
- xe_guc_submit_unpause(&gt->uc.guc);
+ xe_guc_submit_unpause_vf(&gt->uc.guc);
}
static void vf_post_migration_abort(struct xe_gt *gt)
@@ -1183,37 +1238,49 @@ static void vf_post_migration_abort(struct xe_gt *gt)
xe_guc_submit_pause_abort(&gt->uc.guc);
}
-static int vf_post_migration_notify_resfix_done(struct xe_gt *gt)
+static int vf_post_migration_resfix_done(struct xe_gt *gt, u16 marker)
{
- bool skip_resfix = false;
+ VF_MIGRATION_INJECT_WAIT(gt, RESFIX_DONE);
spin_lock_irq(&gt->sriov.vf.migration.lock);
- if (gt->sriov.vf.migration.recovery_queued) {
- skip_resfix = true;
- xe_gt_sriov_dbg(gt, "another recovery imminent, resfix skipped\n");
- } else {
+ if (gt->sriov.vf.migration.recovery_queued)
+ xe_gt_sriov_dbg(gt, "another recovery imminent\n");
+ else
WRITE_ONCE(gt->sriov.vf.migration.recovery_inprogress, false);
- }
spin_unlock_irq(&gt->sriov.vf.migration.lock);
- if (skip_resfix)
- return -EAGAIN;
+ return vf_resfix_done(gt, marker);
+}
- /*
- * Make sure interrupts on the new HW are properly set. The GuC IRQ
- * must be working at this point, since the recovery did started,
- * but the rest was not enabled using the procedure from spec.
- */
- xe_irq_resume(gt_to_xe(gt));
+static int vf_post_migration_resfix_start(struct xe_gt *gt, u16 marker)
+{
+ int err;
+
+ err = vf_resfix_start(gt, marker);
- return vf_notify_resfix_done(gt);
+ guard(spinlock_irq) (&gt->sriov.vf.migration.lock);
+ gt->sriov.vf.migration.recovery_queued = false;
+
+ return err;
+}
+
+static u16 vf_post_migration_next_resfix_marker(struct xe_gt *gt)
+{
+ xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+
+ BUILD_BUG_ON(1 + ((typeof(gt->sriov.vf.migration.resfix_marker))~0) >
+ FIELD_MAX(VF2GUC_RESFIX_START_REQUEST_MSG_0_MARKER));
+
+ /* add 1 to avoid zero-marker */
+ return 1 + gt->sriov.vf.migration.resfix_marker++;
}
static void vf_post_migration_recovery(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
- int err;
+ u16 marker;
bool retry;
+ int err;
xe_gt_sriov_dbg(gt, "migration recovery in progress\n");
@@ -1227,15 +1294,30 @@ static void vf_post_migration_recovery(struct xe_gt *gt)
goto fail;
}
+ marker = vf_post_migration_next_resfix_marker(gt);
+
+ err = vf_post_migration_resfix_start(gt, marker);
+ if (unlikely(err)) {
+ xe_gt_sriov_err(gt, "Recovery failed at GuC RESFIX_START step (%pe)\n",
+ ERR_PTR(err));
+ goto fail;
+ }
+
err = vf_post_migration_fixups(gt);
if (err)
goto fail;
vf_post_migration_rearm(gt);
- err = vf_post_migration_notify_resfix_done(gt);
- if (err && err != -EAGAIN)
+ err = vf_post_migration_resfix_done(gt, marker);
+ if (err) {
+ if (err == -EREMCHG)
+ goto queue;
+
+ xe_gt_sriov_err(gt, "Recovery failed at GuC RESFIX_DONE step (%pe)\n",
+ ERR_PTR(err));
goto fail;
+ }
vf_post_migration_kickstart(gt);
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c
index 2ed5b6780d30..507718326e1f 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c
@@ -69,4 +69,16 @@ void xe_gt_sriov_vf_debugfs_register(struct xe_gt *gt, struct dentry *root)
vfdentry->d_inode->i_private = gt;
drm_debugfs_create_files(vf_info, ARRAY_SIZE(vf_info), vfdentry, minor);
+
+ /*
+ * /sys/kernel/debug/dri/BDF/
+ * ├── tile0
+ * ├── gt0
+ * ├── vf
+ * ├── resfix_stoppers
+ */
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+ debugfs_create_x8("resfix_stoppers", 0600, vfdentry,
+ &gt->sriov.vf.migration.debug.resfix_stoppers);
+ }
}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h
index 420b0e6089de..510c33116fbd 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h
@@ -52,6 +52,19 @@ struct xe_gt_sriov_vf_migration {
wait_queue_head_t wq;
/** @scratch: Scratch memory for VF recovery */
void *scratch;
+ /** @debug: Debug hooks for delaying migration */
+ struct {
+ /**
+ * @debug.resfix_stoppers: Stop and wait at different stages
+ * during post migration recovery
+ */
+ u8 resfix_stoppers;
+ } debug;
+ /**
+ * @resfix_marker: Marker sent on start and on end of post-migration
+ * steps.
+ */
+ u8 resfix_marker;
/** @recovery_teardown: VF post migration recovery is being torn down */
bool recovery_teardown;
/** @recovery_queued: VF post migration recovery in queued */
diff --git a/drivers/gpu/drm/xe/xe_gt_stats.c b/drivers/gpu/drm/xe/xe_gt_stats.c
index 5f74706bab81..fb2904bd0abd 100644
--- a/drivers/gpu/drm/xe/xe_gt_stats.c
+++ b/drivers/gpu/drm/xe/xe_gt_stats.c
@@ -66,6 +66,16 @@ static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = {
DEF_STAT_STR(SVM_4K_BIND_US, "svm_4K_bind_us"),
DEF_STAT_STR(SVM_64K_BIND_US, "svm_64K_bind_us"),
DEF_STAT_STR(SVM_2M_BIND_US, "svm_2M_bind_us"),
+ DEF_STAT_STR(HW_ENGINE_GROUP_SUSPEND_LR_QUEUE_COUNT,
+ "hw_engine_group_suspend_lr_queue_count"),
+ DEF_STAT_STR(HW_ENGINE_GROUP_SKIP_LR_QUEUE_COUNT,
+ "hw_engine_group_skip_lr_queue_count"),
+ DEF_STAT_STR(HW_ENGINE_GROUP_WAIT_DMA_QUEUE_COUNT,
+ "hw_engine_group_wait_dma_queue_count"),
+ DEF_STAT_STR(HW_ENGINE_GROUP_SUSPEND_LR_QUEUE_US,
+ "hw_engine_group_suspend_lr_queue_us"),
+ DEF_STAT_STR(HW_ENGINE_GROUP_WAIT_DMA_QUEUE_US,
+ "hw_engine_group_wait_dma_queue_us"),
};
/**
diff --git a/drivers/gpu/drm/xe/xe_gt_stats.h b/drivers/gpu/drm/xe/xe_gt_stats.h
index e8aea32bc971..59a7bf60e242 100644
--- a/drivers/gpu/drm/xe/xe_gt_stats.h
+++ b/drivers/gpu/drm/xe/xe_gt_stats.h
@@ -6,6 +6,8 @@
#ifndef _XE_GT_STATS_H_
#define _XE_GT_STATS_H_
+#include <linux/ktime.h>
+
#include "xe_gt_stats_types.h"
struct xe_gt;
@@ -23,4 +25,34 @@ xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id,
}
#endif
+
+/**
+ * xe_gt_stats_ktime_us_delta() - Get delta in microseconds between now and a
+ * start time
+ * @start: Start time
+ *
+ * Helper for GT stats to get delta in microseconds between now and a start
+ * time, compiles out if GT stats are disabled.
+ *
+ * Return: Delta in microseconds between now and a start time
+ */
+static inline s64 xe_gt_stats_ktime_us_delta(ktime_t start)
+{
+ return IS_ENABLED(CONFIG_DEBUG_FS) ?
+ ktime_us_delta(ktime_get(), start) : 0;
+}
+
+/**
+ * xe_gt_stats_ktime_get() - Get current ktime
+ *
+ * Helper for GT stats to get current ktime, compiles out if GT stats are
+ * disabled.
+ *
+ * Return: Get current ktime
+ */
+static inline ktime_t xe_gt_stats_ktime_get(void)
+{
+ return IS_ENABLED(CONFIG_DEBUG_FS) ? ktime_get() : 0;
+}
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_stats_types.h b/drivers/gpu/drm/xe/xe_gt_stats_types.h
index d8348a8de2e1..b92d013091d5 100644
--- a/drivers/gpu/drm/xe/xe_gt_stats_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_stats_types.h
@@ -44,6 +44,11 @@ enum xe_gt_stats_id {
XE_GT_STATS_ID_SVM_4K_BIND_US,
XE_GT_STATS_ID_SVM_64K_BIND_US,
XE_GT_STATS_ID_SVM_2M_BIND_US,
+ XE_GT_STATS_ID_HW_ENGINE_GROUP_SUSPEND_LR_QUEUE_COUNT,
+ XE_GT_STATS_ID_HW_ENGINE_GROUP_SKIP_LR_QUEUE_COUNT,
+ XE_GT_STATS_ID_HW_ENGINE_GROUP_WAIT_DMA_QUEUE_COUNT,
+ XE_GT_STATS_ID_HW_ENGINE_GROUP_SUSPEND_LR_QUEUE_US,
+ XE_GT_STATS_ID_HW_ENGINE_GROUP_WAIT_DMA_QUEUE_US,
/* must be the last entry */
__XE_GT_STATS_NUM_IDS,
};
diff --git a/drivers/gpu/drm/xe/xe_gt_throttle.c b/drivers/gpu/drm/xe/xe_gt_throttle.c
index 01477fc7b37b..570358310e97 100644
--- a/drivers/gpu/drm/xe/xe_gt_throttle.c
+++ b/drivers/gpu/drm/xe/xe_gt_throttle.c
@@ -85,7 +85,7 @@ u32 xe_gt_throttle_get_limit_reasons(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
struct xe_reg reg;
- u32 val, mask;
+ u32 mask;
if (xe_gt_is_media_type(gt))
reg = MTL_MEDIA_PERF_LIMIT_REASONS;
@@ -97,11 +97,8 @@ u32 xe_gt_throttle_get_limit_reasons(struct xe_gt *gt)
else
mask = GT0_PERF_LIMIT_REASONS_MASK;
- xe_pm_runtime_get(xe);
- val = xe_mmio_read32(&gt->mmio, reg) & mask;
- xe_pm_runtime_put(xe);
-
- return val;
+ guard(xe_pm_runtime)(xe);
+ return xe_mmio_read32(&gt->mmio, reg) & mask;
}
static bool is_throttled_by(struct xe_gt *gt, u32 mask)
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 0a728180b6fe..5318d92fd473 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -140,6 +140,11 @@ struct xe_gt {
u64 engine_mask;
/** @info.gmdid: raw GMD_ID value from hardware */
u32 gmdid;
+ /**
+ * @multi_queue_engine_class_mask: Bitmask of engine classes with
+ * multi queue support enabled.
+ */
+ u16 multi_queue_engine_class_mask;
/** @info.id: Unique ID of this GT within the PCI Device */
u8 id;
/** @info.has_indirect_ring_state: GT has indirect ring state support */
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index a686b04879d6..09ac092c3687 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -104,7 +104,7 @@ static u32 guc_ctl_log_params_flags(struct xe_guc *guc)
u32 offset = guc_bo_ggtt_addr(guc, guc->log.bo) >> PAGE_SHIFT;
u32 flags;
- #if (((CRASH_BUFFER_SIZE) % SZ_1M) == 0)
+ #if (((XE_GUC_LOG_CRASH_DUMP_BUFFER_SIZE) % SZ_1M) == 0)
#define LOG_UNIT SZ_1M
#define LOG_FLAG GUC_LOG_LOG_ALLOC_UNITS
#else
@@ -112,7 +112,7 @@ static u32 guc_ctl_log_params_flags(struct xe_guc *guc)
#define LOG_FLAG 0
#endif
- #if (((CAPTURE_BUFFER_SIZE) % SZ_1M) == 0)
+ #if (((XE_GUC_LOG_STATE_CAPTURE_BUFFER_SIZE) % SZ_1M) == 0)
#define CAPTURE_UNIT SZ_1M
#define CAPTURE_FLAG GUC_LOG_CAPTURE_ALLOC_UNITS
#else
@@ -120,20 +120,21 @@ static u32 guc_ctl_log_params_flags(struct xe_guc *guc)
#define CAPTURE_FLAG 0
#endif
- BUILD_BUG_ON(!CRASH_BUFFER_SIZE);
- BUILD_BUG_ON(!IS_ALIGNED(CRASH_BUFFER_SIZE, LOG_UNIT));
- BUILD_BUG_ON(!DEBUG_BUFFER_SIZE);
- BUILD_BUG_ON(!IS_ALIGNED(DEBUG_BUFFER_SIZE, LOG_UNIT));
- BUILD_BUG_ON(!CAPTURE_BUFFER_SIZE);
- BUILD_BUG_ON(!IS_ALIGNED(CAPTURE_BUFFER_SIZE, CAPTURE_UNIT));
+ BUILD_BUG_ON(!XE_GUC_LOG_CRASH_DUMP_BUFFER_SIZE);
+ BUILD_BUG_ON(!IS_ALIGNED(XE_GUC_LOG_CRASH_DUMP_BUFFER_SIZE, LOG_UNIT));
+ BUILD_BUG_ON(!XE_GUC_LOG_EVENT_DATA_BUFFER_SIZE);
+ BUILD_BUG_ON(!IS_ALIGNED(XE_GUC_LOG_EVENT_DATA_BUFFER_SIZE, LOG_UNIT));
+ BUILD_BUG_ON(!XE_GUC_LOG_STATE_CAPTURE_BUFFER_SIZE);
+ BUILD_BUG_ON(!IS_ALIGNED(XE_GUC_LOG_STATE_CAPTURE_BUFFER_SIZE, CAPTURE_UNIT));
flags = GUC_LOG_VALID |
GUC_LOG_NOTIFY_ON_HALF_FULL |
CAPTURE_FLAG |
LOG_FLAG |
- FIELD_PREP(GUC_LOG_CRASH, CRASH_BUFFER_SIZE / LOG_UNIT - 1) |
- FIELD_PREP(GUC_LOG_DEBUG, DEBUG_BUFFER_SIZE / LOG_UNIT - 1) |
- FIELD_PREP(GUC_LOG_CAPTURE, CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) |
+ FIELD_PREP(GUC_LOG_CRASH_DUMP, XE_GUC_LOG_CRASH_DUMP_BUFFER_SIZE / LOG_UNIT - 1) |
+ FIELD_PREP(GUC_LOG_EVENT_DATA, XE_GUC_LOG_EVENT_DATA_BUFFER_SIZE / LOG_UNIT - 1) |
+ FIELD_PREP(GUC_LOG_STATE_CAPTURE, XE_GUC_LOG_STATE_CAPTURE_BUFFER_SIZE /
+ CAPTURE_UNIT - 1) |
FIELD_PREP(GUC_LOG_BUF_ADDR, offset);
#undef LOG_UNIT
@@ -660,11 +661,9 @@ static void guc_fini_hw(void *arg)
{
struct xe_guc *guc = arg;
struct xe_gt *gt = guc_to_gt(guc);
- unsigned int fw_ref;
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- xe_uc_sanitize_reset(&guc_to_gt(guc)->uc);
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ xe_with_force_wake(fw_ref, gt_to_fw(gt), XE_FORCEWAKE_ALL)
+ xe_uc_sanitize_reset(&guc_to_gt(guc)->uc);
guc_g2g_fini(guc);
}
@@ -768,6 +767,10 @@ int xe_guc_init(struct xe_guc *guc)
if (!xe_uc_fw_is_enabled(&guc->fw))
return 0;
+ /* Disable page reclaim if GuC FW does not support */
+ if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 14, 0))
+ xe->info.has_page_reclaim_hw_assist = false;
+
if (IS_SRIOV_VF(xe)) {
ret = xe_guc_ct_init(&guc->ct);
if (ret)
@@ -1485,6 +1488,12 @@ timeout:
u32 hint = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, header);
u32 error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, header);
+ if (unlikely(error == XE_GUC_RESPONSE_VF_MIGRATED)) {
+ xe_gt_dbg(gt, "GuC mmio request %#x rejected due to MIGRATION (hint %#x)\n",
+ request[0], hint);
+ return -EREMCHG;
+ }
+
xe_gt_err(gt, "GuC mmio request %#x: failure %#x hint %#x\n",
request[0], error, hint);
return -ENXIO;
@@ -1618,18 +1627,51 @@ int xe_guc_start(struct xe_guc *guc)
return xe_guc_submit_start(guc);
}
+/**
+ * xe_guc_runtime_suspend() - GuC runtime suspend
+ * @guc: The GuC object
+ *
+ * Stop further runs of submission tasks on given GuC and runtime suspend
+ * GuC CT.
+ */
+void xe_guc_runtime_suspend(struct xe_guc *guc)
+{
+ xe_guc_submit_pause(guc);
+ xe_guc_submit_disable(guc);
+ xe_guc_ct_runtime_suspend(&guc->ct);
+}
+
+/**
+ * xe_guc_runtime_resume() - GuC runtime resume
+ * @guc: The GuC object
+ *
+ * Runtime resume GuC CT and allow further runs of submission tasks on
+ * given GuC.
+ */
+void xe_guc_runtime_resume(struct xe_guc *guc)
+{
+ /*
+ * Runtime PM flows are not applicable for VFs, so it's safe to
+ * directly enable IRQ.
+ */
+ guc_enable_irq(guc);
+
+ xe_guc_ct_runtime_resume(&guc->ct);
+ xe_guc_submit_enable(guc);
+ xe_guc_submit_unpause(guc);
+}
+
void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p)
{
struct xe_gt *gt = guc_to_gt(guc);
- unsigned int fw_ref;
u32 status;
int i;
xe_uc_fw_print(&guc->fw, p);
if (!IS_SRIOV_VF(gt_to_xe(gt))) {
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (!fw_ref)
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref.domains)
return;
status = xe_mmio_read32(&gt->mmio, GUC_STATUS);
@@ -1649,8 +1691,6 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p)
drm_printf(p, "\t%2d: \t0x%x\n",
i, xe_mmio_read32(&gt->mmio, SOFT_SCRATCH(i)));
}
-
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
drm_puts(p, "\n");
diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h
index e2d4c5f44ae3..a169f231cbd8 100644
--- a/drivers/gpu/drm/xe/xe_guc.h
+++ b/drivers/gpu/drm/xe/xe_guc.h
@@ -18,10 +18,16 @@
*/
#define MAKE_GUC_VER(maj, min, pat) (((maj) << 16) | ((min) << 8) | (pat))
#define MAKE_GUC_VER_STRUCT(ver) MAKE_GUC_VER((ver).major, (ver).minor, (ver).patch)
+#define MAKE_GUC_VER_ARGS(ver...) \
+ (BUILD_BUG_ON_ZERO(COUNT_ARGS(ver) < 2 || COUNT_ARGS(ver) > 3) + \
+ MAKE_GUC_VER(PICK_ARG1(ver), PICK_ARG2(ver), IF_ARGS(PICK_ARG3(ver), 0, PICK_ARG3(ver))))
+
#define GUC_SUBMIT_VER(guc) \
MAKE_GUC_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY])
#define GUC_FIRMWARE_VER(guc) \
MAKE_GUC_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_RELEASE])
+#define GUC_FIRMWARE_VER_AT_LEAST(guc, ver...) \
+ xe_guc_fw_version_at_least((guc), MAKE_GUC_VER_ARGS(ver))
struct drm_printer;
@@ -35,6 +41,8 @@ int xe_guc_upload(struct xe_guc *guc);
int xe_guc_min_load_for_hwconfig(struct xe_guc *guc);
int xe_guc_enable_communication(struct xe_guc *guc);
int xe_guc_opt_in_features_enable(struct xe_guc *guc);
+void xe_guc_runtime_suspend(struct xe_guc *guc);
+void xe_guc_runtime_resume(struct xe_guc *guc);
int xe_guc_suspend(struct xe_guc *guc);
void xe_guc_notify(struct xe_guc *guc);
int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr);
@@ -94,4 +102,19 @@ static inline struct drm_device *guc_to_drm(struct xe_guc *guc)
return &guc_to_xe(guc)->drm;
}
+/**
+ * xe_guc_fw_version_at_least() - Check if GuC is at least of given version.
+ * @guc: the &xe_guc
+ * @ver: the version to check
+ *
+ * The @ver should be prepared using MAKE_GUC_VER(major, minor, patch).
+ *
+ * Return: true if loaded GuC firmware is at least of given version,
+ * false otherwise.
+ */
+static inline bool xe_guc_fw_version_at_least(const struct xe_guc *guc, u32 ver)
+{
+ return GUC_FIRMWARE_VER(guc) >= ver;
+}
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index bcb85a1bf26d..5feeb91426ee 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -317,7 +317,7 @@ static void guc_waklv_init(struct xe_guc_ads *ads)
offset = guc_ads_waklv_offset(ads);
remain = guc_ads_waklv_size(ads);
- if (XE_GT_WA(gt, 14019882105) || XE_GT_WA(gt, 16021333562))
+ if (XE_GT_WA(gt, 16021333562))
guc_waklv_enable(ads, NULL, 0, &offset, &remain,
GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED);
if (XE_GT_WA(gt, 18024947630))
@@ -347,10 +347,10 @@ static void guc_waklv_init(struct xe_guc_ads *ads)
guc_waklv_enable(ads, NULL, 0, &offset, &remain,
GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET);
- if (GUC_FIRMWARE_VER(&gt->uc.guc) >= MAKE_GUC_VER(70, 44, 0) && XE_GT_WA(gt, 16026508708))
+ if (GUC_FIRMWARE_VER_AT_LEAST(&gt->uc.guc, 70, 44) && XE_GT_WA(gt, 16026508708))
guc_waklv_enable(ads, NULL, 0, &offset, &remain,
GUC_WA_KLV_RESET_BB_STACK_PTR_ON_VF_SWITCH);
- if (GUC_FIRMWARE_VER(&gt->uc.guc) >= MAKE_GUC_VER(70, 47, 0) && XE_GT_WA(gt, 16026007364)) {
+ if (GUC_FIRMWARE_VER_AT_LEAST(&gt->uc.guc, 70, 47) && XE_GT_WA(gt, 16026007364)) {
u32 data[] = {
0x0,
0xF,
diff --git a/drivers/gpu/drm/xe/xe_guc_buf.c b/drivers/gpu/drm/xe/xe_guc_buf.c
index 3ce442500130..c36fc31e0438 100644
--- a/drivers/gpu/drm/xe/xe_guc_buf.c
+++ b/drivers/gpu/drm/xe/xe_guc_buf.c
@@ -30,7 +30,7 @@ static int guc_buf_cache_init(struct xe_guc_buf_cache *cache, u32 size)
struct xe_gt *gt = cache_to_gt(cache);
struct xe_sa_manager *sam;
- sam = __xe_sa_bo_manager_init(gt_to_tile(gt), size, 0, sizeof(u32));
+ sam = __xe_sa_bo_manager_init(gt_to_tile(gt), size, 0, sizeof(u32), 0);
if (IS_ERR(sam))
return PTR_ERR(sam);
cache->sam = sam;
diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c
index 0c1fbe97b8bf..2cda92f7b323 100644
--- a/drivers/gpu/drm/xe/xe_guc_capture.c
+++ b/drivers/gpu/drm/xe/xe_guc_capture.c
@@ -843,7 +843,7 @@ static void check_guc_capture_size(struct xe_guc *guc)
{
int capture_size = guc_capture_output_size_est(guc);
int spare_size = capture_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER;
- u32 buffer_size = xe_guc_log_section_size_capture(&guc->log);
+ u32 buffer_size = XE_GUC_LOG_STATE_CAPTURE_BUFFER_SIZE;
/*
* NOTE: capture_size is much smaller than the capture region
@@ -949,7 +949,7 @@ guc_capture_init_node(struct xe_guc *guc, struct __guc_capture_parsed_output *no
* ADS module also calls separately for PF vs VF.
*
* --> alloc B: GuC output capture buf (registered via guc_init_params(log_param))
- * Size = #define CAPTURE_BUFFER_SIZE (warns if on too-small)
+ * Size = XE_GUC_LOG_STATE_CAPTURE_BUFFER_SIZE (warns if on too-small)
* Note2: 'x 3' to hold multiple capture groups
*
* GUC Runtime notify capture:
@@ -1367,7 +1367,7 @@ static int __guc_capture_flushlog_complete(struct xe_guc *guc)
{
u32 action[] = {
XE_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE,
- GUC_LOG_BUFFER_CAPTURE
+ GUC_LOG_TYPE_STATE_CAPTURE
};
return xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action));
@@ -1384,8 +1384,8 @@ static void __guc_capture_process_output(struct xe_guc *guc)
u32 log_buf_state_offset;
u32 src_data_offset;
- log_buf_state_offset = sizeof(struct guc_log_buffer_state) * GUC_LOG_BUFFER_CAPTURE;
- src_data_offset = xe_guc_get_log_buffer_offset(&guc->log, GUC_LOG_BUFFER_CAPTURE);
+ log_buf_state_offset = sizeof(struct guc_log_buffer_state) * GUC_LOG_TYPE_STATE_CAPTURE;
+ src_data_offset = XE_GUC_LOG_STATE_CAPTURE_OFFSET;
/*
* Make a copy of the state structure, inside GuC log buffer
@@ -1395,15 +1395,15 @@ static void __guc_capture_process_output(struct xe_guc *guc)
xe_map_memcpy_from(guc_to_xe(guc), &log_buf_state_local, &guc->log.bo->vmap,
log_buf_state_offset, sizeof(struct guc_log_buffer_state));
- buffer_size = xe_guc_get_log_buffer_size(&guc->log, GUC_LOG_BUFFER_CAPTURE);
+ buffer_size = XE_GUC_LOG_STATE_CAPTURE_BUFFER_SIZE;
read_offset = log_buf_state_local.read_ptr;
write_offset = log_buf_state_local.sampled_write_ptr;
full_count = FIELD_GET(GUC_LOG_BUFFER_STATE_BUFFER_FULL_CNT, log_buf_state_local.flags);
/* Bookkeeping stuff */
tmp = FIELD_GET(GUC_LOG_BUFFER_STATE_FLUSH_TO_FILE, log_buf_state_local.flags);
- guc->log.stats[GUC_LOG_BUFFER_CAPTURE].flush += tmp;
- new_overflow = xe_guc_check_log_buf_overflow(&guc->log, GUC_LOG_BUFFER_CAPTURE,
+ guc->log.stats[GUC_LOG_TYPE_STATE_CAPTURE].flush += tmp;
+ new_overflow = xe_guc_check_log_buf_overflow(&guc->log, GUC_LOG_TYPE_STATE_CAPTURE,
full_count);
/* Now copy the actual logs. */
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 4ac434ad216f..c3df9b3f1b4d 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -42,6 +42,21 @@ static void ct_exit_safe_mode(struct xe_guc_ct *ct);
static void guc_ct_change_state(struct xe_guc_ct *ct,
enum xe_guc_ct_state state);
+static struct xe_guc *ct_to_guc(struct xe_guc_ct *ct)
+{
+ return container_of(ct, struct xe_guc, ct);
+}
+
+static struct xe_gt *ct_to_gt(struct xe_guc_ct *ct)
+{
+ return container_of(ct, struct xe_gt, uc.guc.ct);
+}
+
+static struct xe_device *ct_to_xe(struct xe_guc_ct *ct)
+{
+ return gt_to_xe(ct_to_gt(ct));
+}
+
#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
enum {
/* Internal states, not error conditions */
@@ -68,14 +83,101 @@ enum {
static void ct_dead_worker_func(struct work_struct *w);
static void ct_dead_capture(struct xe_guc_ct *ct, struct guc_ctb *ctb, u32 reason_code);
-#define CT_DEAD(ct, ctb, reason_code) ct_dead_capture((ct), (ctb), CT_DEAD_##reason_code)
+static void ct_dead_fini(struct xe_guc_ct *ct)
+{
+ cancel_work_sync(&ct->dead.worker);
+}
+
+static void ct_dead_init(struct xe_guc_ct *ct)
+{
+ spin_lock_init(&ct->dead.lock);
+ INIT_WORK(&ct->dead.worker, ct_dead_worker_func);
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC)
+ stack_depot_init();
+#endif
+}
+
+static void fast_req_stack_save(struct xe_guc_ct *ct, unsigned int slot)
+{
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC)
+ unsigned long entries[SZ_32];
+ unsigned int n;
+
+ n = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
+ /* May be called under spinlock, so avoid sleeping */
+ ct->fast_req[slot].stack = stack_depot_save(entries, n, GFP_NOWAIT);
+#endif
+}
+
+static void fast_req_dump(struct xe_guc_ct *ct, u16 fence, unsigned int slot)
+{
+ struct xe_gt *gt = ct_to_gt(ct);
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC)
+ char *buf __cleanup(kfree) = kmalloc(SZ_4K, GFP_NOWAIT);
+
+ if (buf && stack_depot_snprint(ct->fast_req[slot].stack, buf, SZ_4K, 0))
+ xe_gt_err(gt, "Fence 0x%x was used by action %#04x sent at:\n%s\n",
+ fence, ct->fast_req[slot].action, buf);
+ else
+ xe_gt_err(gt, "Fence 0x%x was used by action %#04x [failed to retrieve stack]\n",
+ fence, ct->fast_req[slot].action);
+#else
+ xe_gt_err(gt, "Fence 0x%x was used by action %#04x\n",
+ fence, ct->fast_req[slot].action);
+#endif
+}
+
+static void fast_req_report(struct xe_guc_ct *ct, u16 fence)
+{
+ u16 fence_min = U16_MAX, fence_max = 0;
+ struct xe_gt *gt = ct_to_gt(ct);
+ unsigned int n;
+
+ lockdep_assert_held(&ct->lock);
+
+ for (n = 0; n < ARRAY_SIZE(ct->fast_req); n++) {
+ if (ct->fast_req[n].fence < fence_min)
+ fence_min = ct->fast_req[n].fence;
+ if (ct->fast_req[n].fence > fence_max)
+ fence_max = ct->fast_req[n].fence;
+
+ if (ct->fast_req[n].fence != fence)
+ continue;
+
+ return fast_req_dump(ct, fence, n);
+ }
+
+ xe_gt_warn(gt, "Fence 0x%x not found - tracking buffer wrapped? [range = 0x%x -> 0x%x, next = 0x%X]\n",
+ fence, fence_min, fence_max, ct->fence_seqno);
+}
+
+static void fast_req_track(struct xe_guc_ct *ct, u16 fence, u16 action)
+{
+ unsigned int slot = fence % ARRAY_SIZE(ct->fast_req);
+
+ fast_req_stack_save(ct, slot);
+ ct->fast_req[slot].fence = fence;
+ ct->fast_req[slot].action = action;
+}
+
+#define CT_DEAD(ct, ctb, reason_code) ct_dead_capture((ct), (ctb), CT_DEAD_##reason_code)
+
#else
+
+static void ct_dead_fini(struct xe_guc_ct *ct) { }
+static void ct_dead_init(struct xe_guc_ct *ct) { }
+
+static void fast_req_report(struct xe_guc_ct *ct, u16 fence) { }
+static void fast_req_track(struct xe_guc_ct *ct, u16 fence, u16 action) { }
+
#define CT_DEAD(ct, ctb, reason) \
do { \
struct guc_ctb *_ctb = (ctb); \
if (_ctb) \
_ctb->info.broken = true; \
} while (0)
+
#endif
/* Used when a CT send wants to block and / or receive data */
@@ -112,24 +214,6 @@ static bool g2h_fence_needs_alloc(struct g2h_fence *g2h_fence)
return g2h_fence->seqno == ~0x0;
}
-static struct xe_guc *
-ct_to_guc(struct xe_guc_ct *ct)
-{
- return container_of(ct, struct xe_guc, ct);
-}
-
-static struct xe_gt *
-ct_to_gt(struct xe_guc_ct *ct)
-{
- return container_of(ct, struct xe_gt, uc.guc.ct);
-}
-
-static struct xe_device *
-ct_to_xe(struct xe_guc_ct *ct)
-{
- return gt_to_xe(ct_to_gt(ct));
-}
-
/**
* DOC: GuC CTB Blob
*
@@ -169,8 +253,11 @@ ct_to_xe(struct xe_guc_ct *ct)
#define CTB_DESC_SIZE ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K)
#define CTB_H2G_BUFFER_OFFSET (CTB_DESC_SIZE * 2)
#define CTB_H2G_BUFFER_SIZE (SZ_4K)
+#define CTB_H2G_BUFFER_DWORDS (CTB_H2G_BUFFER_SIZE / sizeof(u32))
#define CTB_G2H_BUFFER_SIZE (SZ_128K)
+#define CTB_G2H_BUFFER_DWORDS (CTB_G2H_BUFFER_SIZE / sizeof(u32))
#define G2H_ROOM_BUFFER_SIZE (CTB_G2H_BUFFER_SIZE / 2)
+#define G2H_ROOM_BUFFER_DWORDS (CTB_G2H_BUFFER_DWORDS / 2)
/**
* xe_guc_ct_queue_proc_time_jiffies - Return maximum time to process a full
@@ -199,9 +286,7 @@ static void guc_ct_fini(struct drm_device *drm, void *arg)
{
struct xe_guc_ct *ct = arg;
-#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
- cancel_work_sync(&ct->dead.worker);
-#endif
+ ct_dead_fini(ct);
ct_exit_safe_mode(ct);
destroy_workqueue(ct->g2h_wq);
xa_destroy(&ct->fence_lookup);
@@ -239,13 +324,8 @@ int xe_guc_ct_init_noalloc(struct xe_guc_ct *ct)
xa_init(&ct->fence_lookup);
INIT_WORK(&ct->g2h_worker, g2h_worker_func);
INIT_DELAYED_WORK(&ct->safe_mode_worker, safe_mode_worker_func);
-#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
- spin_lock_init(&ct->dead.lock);
- INIT_WORK(&ct->dead.worker, ct_dead_worker_func);
-#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC)
- stack_depot_init();
-#endif
-#endif
+
+ ct_dead_init(ct);
init_waitqueue_head(&ct->wq);
init_waitqueue_head(&ct->g2h_fence_wq);
@@ -326,7 +406,7 @@ int xe_guc_ct_init_post_hwconfig(struct xe_guc_ct *ct)
static void guc_ct_ctb_h2g_init(struct xe_device *xe, struct guc_ctb *h2g,
struct iosys_map *map)
{
- h2g->info.size = CTB_H2G_BUFFER_SIZE / sizeof(u32);
+ h2g->info.size = CTB_H2G_BUFFER_DWORDS;
h2g->info.resv_space = 0;
h2g->info.tail = 0;
h2g->info.head = 0;
@@ -344,8 +424,8 @@ static void guc_ct_ctb_h2g_init(struct xe_device *xe, struct guc_ctb *h2g,
static void guc_ct_ctb_g2h_init(struct xe_device *xe, struct guc_ctb *g2h,
struct iosys_map *map)
{
- g2h->info.size = CTB_G2H_BUFFER_SIZE / sizeof(u32);
- g2h->info.resv_space = G2H_ROOM_BUFFER_SIZE / sizeof(u32);
+ g2h->info.size = CTB_G2H_BUFFER_DWORDS;
+ g2h->info.resv_space = G2H_ROOM_BUFFER_DWORDS;
g2h->info.head = 0;
g2h->info.tail = 0;
g2h->info.space = CIRC_SPACE(g2h->info.tail, g2h->info.head,
@@ -640,6 +720,39 @@ void xe_guc_ct_stop(struct xe_guc_ct *ct)
stop_g2h_handler(ct);
}
+/**
+ * xe_guc_ct_runtime_suspend() - GuC CT runtime suspend
+ * @ct: the &xe_guc_ct
+ *
+ * Set GuC CT to disabled state.
+ */
+void xe_guc_ct_runtime_suspend(struct xe_guc_ct *ct)
+{
+ struct guc_ctb *g2h = &ct->ctbs.g2h;
+ u32 credits = CIRC_SPACE(0, 0, CTB_G2H_BUFFER_DWORDS) - G2H_ROOM_BUFFER_DWORDS;
+
+ /* We should be back to guc_ct_ctb_g2h_init() values */
+ xe_gt_assert(ct_to_gt(ct), g2h->info.space == credits);
+
+ /*
+ * Since we're already in runtime suspend path, we shouldn't have pending
+ * messages. But if there happen to be any, we'd probably want them to be
+ * thrown as errors for further investigation.
+ */
+ xe_guc_ct_disable(ct);
+}
+
+/**
+ * xe_guc_ct_runtime_resume() - GuC CT runtime resume
+ * @ct: the &xe_guc_ct
+ *
+ * Restart GuC CT and set it to enabled state.
+ */
+void xe_guc_ct_runtime_resume(struct xe_guc_ct *ct)
+{
+ xe_guc_ct_restart(ct);
+}
+
static bool h2g_has_room(struct xe_guc_ct *ct, u32 cmd_len)
{
struct guc_ctb *h2g = &ct->ctbs.h2g;
@@ -747,28 +860,6 @@ static void g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
spin_unlock_irq(&ct->fast_lock);
}
-#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
-static void fast_req_track(struct xe_guc_ct *ct, u16 fence, u16 action)
-{
- unsigned int slot = fence % ARRAY_SIZE(ct->fast_req);
-#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC)
- unsigned long entries[SZ_32];
- unsigned int n;
-
- n = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
-
- /* May be called under spinlock, so avoid sleeping */
- ct->fast_req[slot].stack = stack_depot_save(entries, n, GFP_NOWAIT);
-#endif
- ct->fast_req[slot].fence = fence;
- ct->fast_req[slot].action = action;
-}
-#else
-static void fast_req_track(struct xe_guc_ct *ct, u16 fence, u16 action)
-{
-}
-#endif
-
/*
* The CT protocol accepts a 16 bits fence. This field is fully owned by the
* driver, the GuC will just copy it to the reply message. Since we need to
@@ -1310,10 +1401,12 @@ static int parse_g2h_event(struct xe_guc_ct *ct, u32 *msg, u32 len)
lockdep_assert_held(&ct->lock);
switch (action) {
+ case XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE:
case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE:
case XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE:
case XE_GUC_ACTION_SCHED_ENGINE_MODE_DONE:
case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+ case XE_GUC_ACTION_PAGE_RECLAMATION_DONE:
g2h_release_space(ct, len);
}
@@ -1338,55 +1431,6 @@ static int guc_crash_process_msg(struct xe_guc_ct *ct, u32 action)
return 0;
}
-#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
-static void fast_req_report(struct xe_guc_ct *ct, u16 fence)
-{
- u16 fence_min = U16_MAX, fence_max = 0;
- struct xe_gt *gt = ct_to_gt(ct);
- bool found = false;
- unsigned int n;
-#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC)
- char *buf;
-#endif
-
- lockdep_assert_held(&ct->lock);
-
- for (n = 0; n < ARRAY_SIZE(ct->fast_req); n++) {
- if (ct->fast_req[n].fence < fence_min)
- fence_min = ct->fast_req[n].fence;
- if (ct->fast_req[n].fence > fence_max)
- fence_max = ct->fast_req[n].fence;
-
- if (ct->fast_req[n].fence != fence)
- continue;
- found = true;
-
-#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC)
- buf = kmalloc(SZ_4K, GFP_NOWAIT);
- if (buf && stack_depot_snprint(ct->fast_req[n].stack, buf, SZ_4K, 0))
- xe_gt_err(gt, "Fence 0x%x was used by action %#04x sent at:\n%s",
- fence, ct->fast_req[n].action, buf);
- else
- xe_gt_err(gt, "Fence 0x%x was used by action %#04x [failed to retrieve stack]\n",
- fence, ct->fast_req[n].action);
- kfree(buf);
-#else
- xe_gt_err(gt, "Fence 0x%x was used by action %#04x\n",
- fence, ct->fast_req[n].action);
-#endif
- break;
- }
-
- if (!found)
- xe_gt_warn(gt, "Fence 0x%x not found - tracking buffer wrapped? [range = 0x%x -> 0x%x, next = 0x%X]\n",
- fence, fence_min, fence_max, ct->fence_seqno);
-}
-#else
-static void fast_req_report(struct xe_guc_ct *ct, u16 fence)
-{
-}
-#endif
-
static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
{
struct xe_gt *gt = ct_to_gt(ct);
@@ -1549,6 +1593,15 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
ret = xe_guc_pagefault_handler(guc, payload, adj_len);
break;
case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+ case XE_GUC_ACTION_PAGE_RECLAMATION_DONE:
+ /*
+ * Page reclamation is an extension of TLB invalidation. Both
+ * operations share the same seqno and fence. When either
+ * action completes, we need to signal the corresponding
+ * fence. Since the handling logic (lookup fence by seqno,
+ * fence signalling) is identical, we use the same handler
+ * for both G2H events.
+ */
ret = xe_guc_tlb_inval_done_handler(guc, payload, adj_len);
break;
case XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF:
@@ -1572,6 +1625,13 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
ret = xe_guc_g2g_test_notification(guc, payload, adj_len);
break;
#endif
+ case XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE:
+ ret = xe_guc_exec_queue_cgp_sync_done_handler(guc, payload, adj_len);
+ break;
+ case XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CGP_CONTEXT_ERROR:
+ ret = xe_guc_exec_queue_cgp_context_error_handler(guc, payload,
+ adj_len);
+ break;
default:
xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action);
}
@@ -1714,6 +1774,7 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
switch (action) {
case XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC:
case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+ case XE_GUC_ACTION_PAGE_RECLAMATION_DONE:
break; /* Process these in fast-path */
default:
return 0;
@@ -1750,6 +1811,12 @@ static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len)
ret = xe_guc_pagefault_handler(guc, payload, adj_len);
break;
case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
+ case XE_GUC_ACTION_PAGE_RECLAMATION_DONE:
+ /*
+ * Seqno and fence handling of page reclamation and TLB
+ * invalidation is identical, so we can use the same handler
+ * for both actions.
+ */
__g2h_release_space(ct, len);
ret = xe_guc_tlb_inval_done_handler(guc, payload, adj_len);
break;
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h
index ca1ce2b3c354..5599939f8fe1 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct.h
@@ -17,6 +17,8 @@ int xe_guc_ct_init_post_hwconfig(struct xe_guc_ct *ct);
int xe_guc_ct_enable(struct xe_guc_ct *ct);
int xe_guc_ct_restart(struct xe_guc_ct *ct);
void xe_guc_ct_disable(struct xe_guc_ct *ct);
+void xe_guc_ct_runtime_resume(struct xe_guc_ct *ct);
+void xe_guc_ct_runtime_suspend(struct xe_guc_ct *ct);
void xe_guc_ct_stop(struct xe_guc_ct *ct);
void xe_guc_ct_flush_and_stop(struct xe_guc_ct *ct);
void xe_guc_ct_fast_path(struct xe_guc_ct *ct);
diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c
index 0b102ab46c4d..23827e87450f 100644
--- a/drivers/gpu/drm/xe/xe_guc_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c
@@ -70,13 +70,9 @@ static int guc_debugfs_show(struct seq_file *m, void *data)
struct xe_gt *gt = grandparent->d_inode->i_private;
struct xe_device *xe = gt_to_xe(gt);
int (*print)(struct xe_guc *, struct drm_printer *) = node->info_ent->data;
- int ret;
- xe_pm_runtime_get(xe);
- ret = print(&gt->uc.guc, &p);
- xe_pm_runtime_put(xe);
-
- return ret;
+ guard(xe_pm_runtime)(xe);
+ return print(&gt->uc.guc, &p);
}
static int guc_log(struct xe_guc *guc, struct drm_printer *p)
@@ -85,6 +81,12 @@ static int guc_log(struct xe_guc *guc, struct drm_printer *p)
return 0;
}
+static int guc_log_lfd(struct xe_guc *guc, struct drm_printer *p)
+{
+ xe_guc_log_print_lfd(&guc->log, p);
+ return 0;
+}
+
static int guc_log_dmesg(struct xe_guc *guc, struct drm_printer *p)
{
xe_guc_log_print_dmesg(&guc->log);
@@ -121,6 +123,7 @@ static const struct drm_info_list slpc_debugfs_list[] = {
/* everything else should be added here */
static const struct drm_info_list pf_only_debugfs_list[] = {
{ "guc_log", .show = guc_debugfs_show, .data = guc_log },
+ { "guc_log_lfd", .show = guc_debugfs_show, .data = guc_log_lfd },
{ "guc_log_dmesg", .show = guc_debugfs_show, .data = guc_log_dmesg },
};
diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h
index c90dd266e9cf..a04faec477ae 100644
--- a/drivers/gpu/drm/xe/xe_guc_fwif.h
+++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
@@ -16,6 +16,8 @@
#define G2H_LEN_DW_DEREGISTER_CONTEXT 3
#define G2H_LEN_DW_TLB_INVALIDATE 3
#define G2H_LEN_DW_G2G_NOTIFY_MIN 3
+#define G2H_LEN_DW_MULTI_QUEUE_CONTEXT 3
+#define G2H_LEN_DW_PAGE_RECLAMATION 3
#define GUC_ID_MAX 65535
#define GUC_ID_UNKNOWN 0xffffffff
@@ -62,6 +64,8 @@ struct guc_ctxt_registration_info {
u32 wq_base_lo;
u32 wq_base_hi;
u32 wq_size;
+ u32 cgp_lo;
+ u32 cgp_hi;
u32 hwlrca_lo;
u32 hwlrca_hi;
};
@@ -91,9 +95,9 @@ struct guc_update_exec_queue_policy {
#define GUC_LOG_NOTIFY_ON_HALF_FULL BIT(1)
#define GUC_LOG_CAPTURE_ALLOC_UNITS BIT(2)
#define GUC_LOG_LOG_ALLOC_UNITS BIT(3)
-#define GUC_LOG_CRASH REG_GENMASK(5, 4)
-#define GUC_LOG_DEBUG REG_GENMASK(9, 6)
-#define GUC_LOG_CAPTURE REG_GENMASK(11, 10)
+#define GUC_LOG_CRASH_DUMP REG_GENMASK(5, 4)
+#define GUC_LOG_EVENT_DATA REG_GENMASK(9, 6)
+#define GUC_LOG_STATE_CAPTURE REG_GENMASK(11, 10)
#define GUC_LOG_BUF_ADDR REG_GENMASK(31, 12)
#define GUC_CTL_WA 1
diff --git a/drivers/gpu/drm/xe/xe_guc_klv_thresholds_set_types.h b/drivers/gpu/drm/xe/xe_guc_klv_thresholds_set_types.h
index 0a028c94756d..45ab5a3b5218 100644
--- a/drivers/gpu/drm/xe/xe_guc_klv_thresholds_set_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_klv_thresholds_set_types.h
@@ -24,6 +24,11 @@
* ABI and the associated &NAME, that may be used in code or debugfs/sysfs::
*
* define(TAG, NAME)
+ *
+ * If required, KLVs can be labeled with GuC firmware version that added them::
+ *
+ * define(TAG, NAME, MAJOR, MINOR)
+ * define(TAG, NAME, MAJOR, MINOR, PATCH)
*/
#define MAKE_XE_GUC_KLV_THRESHOLDS_SET(define) \
define(CAT_ERR, cat_error_count) \
@@ -32,6 +37,7 @@
define(H2G_STORM, guc_time_us) \
define(IRQ_STORM, irq_time_us) \
define(DOORBELL_STORM, doorbell_time_us) \
+ define(MULTI_LRC_COUNT, multi_lrc_count, 70, 53)\
/* end */
/**
diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c
index c01ccb35dc75..d7473b9673bb 100644
--- a/drivers/gpu/drm/xe/xe_guc_log.c
+++ b/drivers/gpu/drm/xe/xe_guc_log.c
@@ -7,8 +7,10 @@
#include <linux/fault-inject.h>
+#include <linux/utsname.h>
#include <drm/drm_managed.h>
+#include "abi/guc_lfd_abi.h"
#include "regs/xe_guc_regs.h"
#include "xe_bo.h"
#include "xe_devcoredump.h"
@@ -19,6 +21,77 @@
#include "xe_mmio.h"
#include "xe_module.h"
+#define GUC_LOG_CHUNK_SIZE SZ_2M
+
+/* Magic keys define */
+#define GUC_LFD_DRIVER_KEY_STREAMING 0x8086AAAA474C5346
+#define GUC_LFD_LOG_BUFFER_MARKER_2 0xDEADFEED
+#define GUC_LFD_CRASH_DUMP_BUFFER_MARKER_2 0x8086DEAD
+#define GUC_LFD_STATE_CAPTURE_BUFFER_MARKER_2 0xBEEFFEED
+#define GUC_LFD_LOG_BUFFER_MARKER_1V2 0xCABBA9E6
+#define GUC_LFD_STATE_CAPTURE_BUFFER_MARKER_1V2 0xCABBA9F7
+#define GUC_LFD_DATA_HEADER_MAGIC 0x8086
+
+/* LFD supported LIC type range */
+#define GUC_LIC_TYPE_FIRST GUC_LIC_TYPE_GUC_SW_VERSION
+#define GUC_LIC_TYPE_LAST GUC_LIC_TYPE_BUILD_PLATFORM_ID
+#define GUC_LFD_TYPE_FW_RANGE_FIRST GUC_LFD_TYPE_FW_VERSION
+#define GUC_LFD_TYPE_FW_RANGE_LAST GUC_LFD_TYPE_BUILD_PLATFORM_ID
+
+#define GUC_LOG_BUFFER_STATE_HEADER_LENGTH 4096
+#define GUC_LOG_BUFFER_INIT_CONFIG 3
+
+struct guc_log_buffer_entry_list {
+ u32 offset;
+ u32 rd_ptr;
+ u32 wr_ptr;
+ u32 wrap_offset;
+ u32 buf_size;
+};
+
+struct guc_lic_save {
+ u32 version;
+ /*
+ * Array of init config KLV values.
+ * Range from GUC_LOG_LIC_TYPE_FIRST to GUC_LOG_LIC_TYPE_LAST
+ */
+ u32 values[GUC_LIC_TYPE_LAST - GUC_LIC_TYPE_FIRST + 1];
+ struct guc_log_buffer_entry_list entry[GUC_LOG_BUFFER_INIT_CONFIG];
+};
+
+static struct guc_log_buffer_entry_markers {
+ u32 key[2];
+} const entry_markers[GUC_LOG_BUFFER_INIT_CONFIG + 1] = {
+ {{
+ GUC_LFD_LOG_BUFFER_MARKER_1V2,
+ GUC_LFD_LOG_BUFFER_MARKER_2
+ }},
+ {{
+ GUC_LFD_LOG_BUFFER_MARKER_1V2,
+ GUC_LFD_CRASH_DUMP_BUFFER_MARKER_2
+ }},
+ {{
+ GUC_LFD_STATE_CAPTURE_BUFFER_MARKER_1V2,
+ GUC_LFD_STATE_CAPTURE_BUFFER_MARKER_2
+ }},
+ {{
+ GUC_LIC_MAGIC,
+ (FIELD_PREP_CONST(GUC_LIC_VERSION_MASK_MAJOR, GUC_LIC_VERSION_MAJOR) |
+ FIELD_PREP_CONST(GUC_LIC_VERSION_MASK_MINOR, GUC_LIC_VERSION_MINOR))
+ }}
+};
+
+static struct guc_log_lic_lfd_map {
+ u32 lic;
+ u32 lfd;
+} const lic_lfd_type_map[] = {
+ {GUC_LIC_TYPE_GUC_SW_VERSION, GUC_LFD_TYPE_FW_VERSION},
+ {GUC_LIC_TYPE_GUC_DEVICE_ID, GUC_LFD_TYPE_GUC_DEVICE_ID},
+ {GUC_LIC_TYPE_TSC_FREQUENCY, GUC_LFD_TYPE_TSC_FREQUENCY},
+ {GUC_LIC_TYPE_GMD_ID, GUC_LFD_TYPE_GMD_ID},
+ {GUC_LIC_TYPE_BUILD_PLATFORM_ID, GUC_LFD_TYPE_BUILD_PLATFORM_ID}
+};
+
static struct xe_guc *
log_to_guc(struct xe_guc_log *log)
{
@@ -37,33 +110,6 @@ log_to_xe(struct xe_guc_log *log)
return gt_to_xe(log_to_gt(log));
}
-static size_t guc_log_size(void)
-{
- /*
- * GuC Log buffer Layout
- *
- * +===============================+ 00B
- * | Crash dump state header |
- * +-------------------------------+ 32B
- * | Debug state header |
- * +-------------------------------+ 64B
- * | Capture state header |
- * +-------------------------------+ 96B
- * | |
- * +===============================+ PAGE_SIZE (4KB)
- * | Crash Dump logs |
- * +===============================+ + CRASH_SIZE
- * | Debug logs |
- * +===============================+ + DEBUG_SIZE
- * | Capture logs |
- * +===============================+ + CAPTURE_SIZE
- */
- return PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE +
- CAPTURE_BUFFER_SIZE;
-}
-
-#define GUC_LOG_CHUNK_SIZE SZ_2M
-
static struct xe_guc_log_snapshot *xe_guc_log_snapshot_alloc(struct xe_guc_log *log, bool atomic)
{
struct xe_guc_log_snapshot *snapshot;
@@ -145,7 +191,6 @@ struct xe_guc_log_snapshot *xe_guc_log_snapshot_capture(struct xe_guc_log *log,
struct xe_device *xe = log_to_xe(log);
struct xe_guc *guc = log_to_guc(log);
struct xe_gt *gt = log_to_gt(log);
- unsigned int fw_ref;
size_t remain;
int i;
@@ -165,13 +210,12 @@ struct xe_guc_log_snapshot *xe_guc_log_snapshot_capture(struct xe_guc_log *log,
remain -= size;
}
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (!fw_ref) {
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref.domains)
snapshot->stamp = ~0ULL;
- } else {
+ else
snapshot->stamp = xe_mmio_read64_2x32(&gt->mmio, GUC_PMTIMESTAMP_LO);
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
- }
+
snapshot->ktime = ktime_get_boottime_ns();
snapshot->level = log->level;
snapshot->ver_found = guc->fw.versions.found[XE_UC_FW_VER_RELEASE];
@@ -216,6 +260,318 @@ void xe_guc_log_snapshot_print(struct xe_guc_log_snapshot *snapshot, struct drm_
}
}
+static inline void lfd_output_binary(struct drm_printer *p, char *buf, int buf_size)
+{
+ seq_write(p->arg, buf, buf_size);
+}
+
+static inline int xe_guc_log_add_lfd_header(struct guc_lfd_data *lfd)
+{
+ lfd->header = FIELD_PREP_CONST(GUC_LFD_DATA_HEADER_MASK_MAGIC, GUC_LFD_DATA_HEADER_MAGIC);
+ return offsetof(struct guc_lfd_data, data);
+}
+
+static int xe_guc_log_add_typed_payload(struct drm_printer *p, u32 type,
+ u32 data_len, void *data)
+{
+ struct guc_lfd_data lfd;
+ int len;
+
+ len = xe_guc_log_add_lfd_header(&lfd);
+ lfd.header |= FIELD_PREP(GUC_LFD_DATA_HEADER_MASK_TYPE, type);
+ /* make length DW aligned */
+ lfd.data_count = DIV_ROUND_UP(data_len, sizeof(u32));
+ lfd_output_binary(p, (char *)&lfd, len);
+
+ lfd_output_binary(p, data, data_len);
+ len += lfd.data_count * sizeof(u32);
+
+ return len;
+}
+
+static inline int lic_type_to_index(u32 lic_type)
+{
+ XE_WARN_ON(lic_type < GUC_LIC_TYPE_FIRST || lic_type > GUC_LIC_TYPE_LAST);
+
+ return lic_type - GUC_LIC_TYPE_FIRST;
+}
+
+static inline int lfd_type_to_index(u32 lfd_type)
+{
+ int i, lic_type = 0;
+
+ XE_WARN_ON(lfd_type < GUC_LFD_TYPE_FW_RANGE_FIRST || lfd_type > GUC_LFD_TYPE_FW_RANGE_LAST);
+
+ for (i = 0; i < ARRAY_SIZE(lic_lfd_type_map); i++)
+ if (lic_lfd_type_map[i].lfd == lfd_type)
+ lic_type = lic_lfd_type_map[i].lic;
+
+ /* If not found, lic_type_to_index will warning invalid type */
+ return lic_type_to_index(lic_type);
+}
+
+static int xe_guc_log_add_klv(struct drm_printer *p, u32 lfd_type,
+ struct guc_lic_save *config)
+{
+ int klv_index = lfd_type_to_index(lfd_type);
+
+ return xe_guc_log_add_typed_payload(p, lfd_type, sizeof(u32), &config->values[klv_index]);
+}
+
+static int xe_guc_log_add_os_id(struct drm_printer *p, u32 id)
+{
+ struct guc_lfd_data_os_info os_id;
+ struct guc_lfd_data lfd;
+ int len, info_len, section_len;
+ char *version;
+ u32 blank = 0;
+
+ len = xe_guc_log_add_lfd_header(&lfd);
+ lfd.header |= FIELD_PREP(GUC_LFD_DATA_HEADER_MASK_TYPE, GUC_LFD_TYPE_OS_ID);
+
+ os_id.os_id = id;
+ section_len = offsetof(struct guc_lfd_data_os_info, build_version);
+
+ version = init_utsname()->release;
+ info_len = strlen(version);
+
+ /* make length DW aligned */
+ lfd.data_count = DIV_ROUND_UP(section_len + info_len, sizeof(u32));
+ lfd_output_binary(p, (char *)&lfd, len);
+ lfd_output_binary(p, (char *)&os_id, section_len);
+ lfd_output_binary(p, version, info_len);
+
+ /* Padding with 0 */
+ section_len = lfd.data_count * sizeof(u32) - section_len - info_len;
+ if (section_len)
+ lfd_output_binary(p, (char *)&blank, section_len);
+
+ len += lfd.data_count * sizeof(u32);
+ return len;
+}
+
+static void xe_guc_log_loop_log_init(struct guc_lic *init, struct guc_lic_save *config)
+{
+ struct guc_klv_generic_dw_t *p = (void *)init->data;
+ int i;
+
+ for (i = 0; i < init->data_count;) {
+ int klv_len = FIELD_GET(GUC_KLV_0_LEN, p->kl) + 1;
+ int key = FIELD_GET(GUC_KLV_0_KEY, p->kl);
+
+ if (key < GUC_LIC_TYPE_FIRST || key > GUC_LIC_TYPE_LAST) {
+ XE_WARN_ON(key < GUC_LIC_TYPE_FIRST || key > GUC_LIC_TYPE_LAST);
+ break;
+ }
+ config->values[lic_type_to_index(key)] = p->value;
+ i += klv_len + 1; /* Whole KLV structure length in dwords */
+ p = (void *)((u32 *)p + klv_len);
+ }
+}
+
+static int find_marker(u32 mark0, u32 mark1)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(entry_markers); i++)
+ if (mark0 == entry_markers[i].key[0] && mark1 == entry_markers[i].key[1])
+ return i;
+
+ return ARRAY_SIZE(entry_markers);
+}
+
+static void xe_guc_log_load_lic(void *guc_log, struct guc_lic_save *config)
+{
+ u32 offset = GUC_LOG_BUFFER_STATE_HEADER_LENGTH;
+ struct guc_log_buffer_state *p = guc_log;
+
+ config->version = p->version;
+ while (p->marker[0]) {
+ int index;
+
+ index = find_marker(p->marker[0], p->marker[1]);
+
+ if (index < ARRAY_SIZE(entry_markers)) {
+ if (index == GUC_LOG_BUFFER_INIT_CONFIG) {
+ /* Load log init config */
+ xe_guc_log_loop_log_init((void *)p, config);
+
+ /* LIC structure is the last */
+ return;
+ }
+ config->entry[index].offset = offset;
+ config->entry[index].rd_ptr = p->read_ptr;
+ config->entry[index].wr_ptr = p->write_ptr;
+ config->entry[index].wrap_offset = p->wrap_offset;
+ config->entry[index].buf_size = p->size;
+ }
+ offset += p->size;
+ p++;
+ }
+}
+
+static int
+xe_guc_log_output_lfd_init(struct drm_printer *p, struct xe_guc_log_snapshot *snapshot,
+ struct guc_lic_save *config)
+{
+ int type, len;
+ size_t size = 0;
+
+ /* FW required types */
+ for (type = GUC_LFD_TYPE_FW_RANGE_FIRST; type <= GUC_LFD_TYPE_FW_RANGE_LAST; type++)
+ size += xe_guc_log_add_klv(p, type, config);
+
+ /* KMD required type(s) */
+ len = xe_guc_log_add_os_id(p, GUC_LFD_OS_TYPE_OSID_LIN);
+ size += len;
+
+ return size;
+}
+
+static void
+xe_guc_log_print_chunks(struct drm_printer *p, struct xe_guc_log_snapshot *snapshot,
+ u32 from, u32 to)
+{
+ int chunk_from = from % GUC_LOG_CHUNK_SIZE;
+ int chunk_id = from / GUC_LOG_CHUNK_SIZE;
+ int to_chunk_id = to / GUC_LOG_CHUNK_SIZE;
+ int chunk_to = to % GUC_LOG_CHUNK_SIZE;
+ int pos = from;
+
+ do {
+ size_t size = (to_chunk_id == chunk_id ? chunk_to : GUC_LOG_CHUNK_SIZE) -
+ chunk_from;
+
+ lfd_output_binary(p, snapshot->copy[chunk_id] + chunk_from, size);
+ pos += size;
+ chunk_id++;
+ chunk_from = 0;
+ } while (pos < to);
+}
+
+static inline int
+xe_guc_log_add_log_event(struct drm_printer *p, struct xe_guc_log_snapshot *snapshot,
+ struct guc_lic_save *config)
+{
+ size_t size;
+ u32 data_len, section_len;
+ struct guc_lfd_data lfd;
+ struct guc_log_buffer_entry_list *entry;
+ struct guc_lfd_data_log_events_buf events_buf;
+
+ entry = &config->entry[GUC_LOG_TYPE_EVENT_DATA];
+
+ /* Skip empty log */
+ if (entry->rd_ptr == entry->wr_ptr)
+ return 0;
+
+ size = xe_guc_log_add_lfd_header(&lfd);
+ lfd.header |= FIELD_PREP(GUC_LFD_DATA_HEADER_MASK_TYPE, GUC_LFD_TYPE_LOG_EVENTS_BUFFER);
+ events_buf.log_events_format_version = config->version;
+
+ /* Adjust to log_format_buf */
+ section_len = offsetof(struct guc_lfd_data_log_events_buf, log_event);
+ data_len = section_len;
+
+ /* Calculate data length */
+ data_len += entry->rd_ptr < entry->wr_ptr ? (entry->wr_ptr - entry->rd_ptr) :
+ (entry->wr_ptr + entry->wrap_offset - entry->rd_ptr);
+ /* make length u32 aligned */
+ lfd.data_count = DIV_ROUND_UP(data_len, sizeof(u32));
+
+ /* Output GUC_LFD_TYPE_LOG_EVENTS_BUFFER header */
+ lfd_output_binary(p, (char *)&lfd, size);
+ lfd_output_binary(p, (char *)&events_buf, section_len);
+
+ /* Output data from guc log chunks directly */
+ if (entry->rd_ptr < entry->wr_ptr) {
+ xe_guc_log_print_chunks(p, snapshot, entry->offset + entry->rd_ptr,
+ entry->offset + entry->wr_ptr);
+ } else {
+ /* 1st, print from rd to wrap offset */
+ xe_guc_log_print_chunks(p, snapshot, entry->offset + entry->rd_ptr,
+ entry->offset + entry->wrap_offset);
+
+ /* 2nd, print from buf start to wr */
+ xe_guc_log_print_chunks(p, snapshot, entry->offset, entry->offset + entry->wr_ptr);
+ }
+ return size;
+}
+
+static int
+xe_guc_log_add_crash_dump(struct drm_printer *p, struct xe_guc_log_snapshot *snapshot,
+ struct guc_lic_save *config)
+{
+ struct guc_log_buffer_entry_list *entry;
+ int chunk_from, chunk_id;
+ int from, to, i;
+ size_t size = 0;
+ u32 *buf32;
+
+ entry = &config->entry[GUC_LOG_TYPE_CRASH_DUMP];
+
+ /* Skip zero sized crash dump */
+ if (!entry->buf_size)
+ return 0;
+
+ /* Check if crash dump section are all zero */
+ from = entry->offset;
+ to = entry->offset + entry->buf_size;
+ chunk_from = from % GUC_LOG_CHUNK_SIZE;
+ chunk_id = from / GUC_LOG_CHUNK_SIZE;
+ buf32 = snapshot->copy[chunk_id] + chunk_from;
+
+ for (i = 0; i < entry->buf_size / sizeof(u32); i++)
+ if (buf32[i])
+ break;
+
+ /* Buffer has non-zero data? */
+ if (i < entry->buf_size / sizeof(u32)) {
+ struct guc_lfd_data lfd;
+
+ size = xe_guc_log_add_lfd_header(&lfd);
+ lfd.header |= FIELD_PREP(GUC_LFD_DATA_HEADER_MASK_TYPE, GUC_LFD_TYPE_FW_CRASH_DUMP);
+ /* Calculate data length */
+ lfd.data_count = DIV_ROUND_UP(entry->buf_size, sizeof(u32));
+ /* Output GUC_LFD_TYPE_FW_CRASH_DUMP header */
+ lfd_output_binary(p, (char *)&lfd, size);
+
+ /* rd/wr ptr is not used for crash dump */
+ xe_guc_log_print_chunks(p, snapshot, from, to);
+ }
+ return size;
+}
+
+static void
+xe_guc_log_snapshot_print_lfd(struct xe_guc_log_snapshot *snapshot, struct drm_printer *p)
+{
+ struct guc_lfd_file_header header;
+ struct guc_lic_save config;
+ size_t size;
+
+ if (!snapshot || !snapshot->size)
+ return;
+
+ header.magic = GUC_LFD_DRIVER_KEY_STREAMING;
+ header.version = FIELD_PREP_CONST(GUC_LFD_FILE_HEADER_VERSION_MASK_MINOR,
+ GUC_LFD_FORMAT_VERSION_MINOR) |
+ FIELD_PREP_CONST(GUC_LFD_FILE_HEADER_VERSION_MASK_MAJOR,
+ GUC_LFD_FORMAT_VERSION_MAJOR);
+
+ /* Output LFD file header */
+ lfd_output_binary(p, (char *)&header,
+ offsetof(struct guc_lfd_file_header, stream));
+
+ /* Output LFD stream */
+ xe_guc_log_load_lic(snapshot->copy[0], &config);
+ size = xe_guc_log_output_lfd_init(p, snapshot, &config);
+ if (!size)
+ return;
+
+ xe_guc_log_add_log_event(p, snapshot, &config);
+ xe_guc_log_add_crash_dump(p, snapshot, &config);
+}
+
/**
* xe_guc_log_print_dmesg - dump a copy of the GuC log to dmesg
* @log: GuC log structure
@@ -251,13 +607,27 @@ void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p)
xe_guc_log_snapshot_free(snapshot);
}
+/**
+ * xe_guc_log_print_lfd - dump a copy of the GuC log in LFD format
+ * @log: GuC log structure
+ * @p: the printer object to output to
+ */
+void xe_guc_log_print_lfd(struct xe_guc_log *log, struct drm_printer *p)
+{
+ struct xe_guc_log_snapshot *snapshot;
+
+ snapshot = xe_guc_log_snapshot_capture(log, false);
+ xe_guc_log_snapshot_print_lfd(snapshot, p);
+ xe_guc_log_snapshot_free(snapshot);
+}
+
int xe_guc_log_init(struct xe_guc_log *log)
{
struct xe_device *xe = log_to_xe(log);
struct xe_tile *tile = gt_to_tile(log_to_gt(log));
struct xe_bo *bo;
- bo = xe_managed_bo_create_pin_map(xe, tile, guc_log_size(),
+ bo = xe_managed_bo_create_pin_map(xe, tile, GUC_LOG_SIZE,
XE_BO_FLAG_SYSTEM |
XE_BO_FLAG_GGTT |
XE_BO_FLAG_GGTT_INVALIDATE |
@@ -265,7 +635,7 @@ int xe_guc_log_init(struct xe_guc_log *log)
if (IS_ERR(bo))
return PTR_ERR(bo);
- xe_map_memset(xe, &bo->vmap, 0, 0, guc_log_size());
+ xe_map_memset(xe, &bo->vmap, 0, 0, xe_bo_size(bo));
log->bo = bo;
log->level = xe_modparam.guc_log_level;
@@ -274,71 +644,6 @@ int xe_guc_log_init(struct xe_guc_log *log)
ALLOW_ERROR_INJECTION(xe_guc_log_init, ERRNO); /* See xe_pci_probe() */
-static u32 xe_guc_log_section_size_crash(struct xe_guc_log *log)
-{
- return CRASH_BUFFER_SIZE;
-}
-
-static u32 xe_guc_log_section_size_debug(struct xe_guc_log *log)
-{
- return DEBUG_BUFFER_SIZE;
-}
-
-/**
- * xe_guc_log_section_size_capture - Get capture buffer size within log sections.
- * @log: The log object.
- *
- * This function will return the capture buffer size within log sections.
- *
- * Return: capture buffer size.
- */
-u32 xe_guc_log_section_size_capture(struct xe_guc_log *log)
-{
- return CAPTURE_BUFFER_SIZE;
-}
-
-/**
- * xe_guc_get_log_buffer_size - Get log buffer size for a type.
- * @log: The log object.
- * @type: The log buffer type
- *
- * Return: buffer size.
- */
-u32 xe_guc_get_log_buffer_size(struct xe_guc_log *log, enum guc_log_buffer_type type)
-{
- switch (type) {
- case GUC_LOG_BUFFER_CRASH_DUMP:
- return xe_guc_log_section_size_crash(log);
- case GUC_LOG_BUFFER_DEBUG:
- return xe_guc_log_section_size_debug(log);
- case GUC_LOG_BUFFER_CAPTURE:
- return xe_guc_log_section_size_capture(log);
- }
- return 0;
-}
-
-/**
- * xe_guc_get_log_buffer_offset - Get offset in log buffer for a type.
- * @log: The log object.
- * @type: The log buffer type
- *
- * This function will return the offset in the log buffer for a type.
- * Return: buffer offset.
- */
-u32 xe_guc_get_log_buffer_offset(struct xe_guc_log *log, enum guc_log_buffer_type type)
-{
- enum guc_log_buffer_type i;
- u32 offset = PAGE_SIZE;/* for the log_buffer_states */
-
- for (i = GUC_LOG_BUFFER_CRASH_DUMP; i < GUC_LOG_BUFFER_TYPE_MAX; ++i) {
- if (i == type)
- break;
- offset += xe_guc_get_log_buffer_size(log, i);
- }
-
- return offset;
-}
-
/**
* xe_guc_check_log_buf_overflow - Check if log buffer overflowed
* @log: The log object.
@@ -352,7 +657,7 @@ u32 xe_guc_get_log_buffer_offset(struct xe_guc_log *log, enum guc_log_buffer_typ
*
* Return: True if overflowed.
*/
-bool xe_guc_check_log_buf_overflow(struct xe_guc_log *log, enum guc_log_buffer_type type,
+bool xe_guc_check_log_buf_overflow(struct xe_guc_log *log, enum guc_log_type type,
unsigned int full_cnt)
{
unsigned int prev_full_cnt = log->stats[type].sampled_overflow;
diff --git a/drivers/gpu/drm/xe/xe_guc_log.h b/drivers/gpu/drm/xe/xe_guc_log.h
index 98a47ac42b08..1b05bb60c1c7 100644
--- a/drivers/gpu/drm/xe/xe_guc_log.h
+++ b/drivers/gpu/drm/xe/xe_guc_log.h
@@ -13,14 +13,26 @@ struct drm_printer;
struct xe_device;
#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC)
-#define CRASH_BUFFER_SIZE SZ_1M
-#define DEBUG_BUFFER_SIZE SZ_8M
-#define CAPTURE_BUFFER_SIZE SZ_2M
+#define XE_GUC_LOG_EVENT_DATA_BUFFER_SIZE SZ_8M
+#define XE_GUC_LOG_CRASH_DUMP_BUFFER_SIZE SZ_1M
+#define XE_GUC_LOG_STATE_CAPTURE_BUFFER_SIZE SZ_2M
#else
-#define CRASH_BUFFER_SIZE SZ_16K
-#define DEBUG_BUFFER_SIZE SZ_64K
-#define CAPTURE_BUFFER_SIZE SZ_1M
+#define XE_GUC_LOG_EVENT_DATA_BUFFER_SIZE SZ_64K
+#define XE_GUC_LOG_CRASH_DUMP_BUFFER_SIZE SZ_16K
+#define XE_GUC_LOG_STATE_CAPTURE_BUFFER_SIZE SZ_1M
#endif
+
+#define GUC_LOG_SIZE (SZ_4K + \
+ XE_GUC_LOG_EVENT_DATA_BUFFER_SIZE + \
+ XE_GUC_LOG_CRASH_DUMP_BUFFER_SIZE + \
+ XE_GUC_LOG_STATE_CAPTURE_BUFFER_SIZE)
+
+#define XE_GUC_LOG_EVENT_DATA_OFFSET SZ_4K
+#define XE_GUC_LOG_CRASH_DUMP_OFFSET (XE_GUC_LOG_EVENT_DATA_OFFSET + \
+ XE_GUC_LOG_EVENT_DATA_BUFFER_SIZE)
+#define XE_GUC_LOG_STATE_CAPTURE_OFFSET (XE_GUC_LOG_CRASH_DUMP_OFFSET + \
+ XE_GUC_LOG_CRASH_DUMP_BUFFER_SIZE)
+
/*
* While we're using plain log level in i915, GuC controls are much more...
* "elaborate"? We have a couple of bits for verbosity, separate bit for actual
@@ -40,6 +52,7 @@ struct xe_device;
int xe_guc_log_init(struct xe_guc_log *log);
void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p);
+void xe_guc_log_print_lfd(struct xe_guc_log *log, struct drm_printer *p);
void xe_guc_log_print_dmesg(struct xe_guc_log *log);
struct xe_guc_log_snapshot *xe_guc_log_snapshot_capture(struct xe_guc_log *log, bool atomic);
void xe_guc_log_snapshot_print(struct xe_guc_log_snapshot *snapshot, struct drm_printer *p);
@@ -51,11 +64,8 @@ xe_guc_log_get_level(struct xe_guc_log *log)
return log->level;
}
-u32 xe_guc_log_section_size_capture(struct xe_guc_log *log);
-u32 xe_guc_get_log_buffer_size(struct xe_guc_log *log, enum guc_log_buffer_type type);
-u32 xe_guc_get_log_buffer_offset(struct xe_guc_log *log, enum guc_log_buffer_type type);
bool xe_guc_check_log_buf_overflow(struct xe_guc_log *log,
- enum guc_log_buffer_type type,
+ enum guc_log_type type,
unsigned int full_cnt);
#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 951a49fb1d3e..54702a0fd05b 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -76,7 +76,7 @@
* exposes a programming interface to the host for the control of SLPC.
*
* Frequency management:
- * =====================
+ * ---------------------
*
* Xe driver enables SLPC with all of its defaults features and frequency
* selection, which varies per platform.
@@ -87,7 +87,7 @@
* for any workload.
*
* Render-C States:
- * ================
+ * ----------------
*
* Render-C states is also a GuC PC feature that is now enabled in Xe for
* all platforms.
@@ -499,21 +499,17 @@ u32 xe_guc_pc_get_cur_freq_fw(struct xe_guc_pc *pc)
int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq)
{
struct xe_gt *gt = pc_to_gt(pc);
- unsigned int fw_ref;
/*
* GuC SLPC plays with cur freq request when GuCRC is enabled
* Block RC6 for a more reliable read.
*/
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_GT)) {
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
+ if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FW_GT))
return -ETIMEDOUT;
- }
*freq = get_cur_freq(gt);
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
return 0;
}
@@ -1087,13 +1083,8 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc)
*/
int xe_guc_pc_override_gucrc_mode(struct xe_guc_pc *pc, enum slpc_gucrc_mode mode)
{
- int ret;
-
- xe_pm_runtime_get(pc_to_xe(pc));
- ret = pc_action_set_param(pc, SLPC_PARAM_PWRGATE_RC_MODE, mode);
- xe_pm_runtime_put(pc_to_xe(pc));
-
- return ret;
+ guard(xe_pm_runtime)(pc_to_xe(pc));
+ return pc_action_set_param(pc, SLPC_PARAM_PWRGATE_RC_MODE, mode);
}
/**
@@ -1104,13 +1095,8 @@ int xe_guc_pc_override_gucrc_mode(struct xe_guc_pc *pc, enum slpc_gucrc_mode mod
*/
int xe_guc_pc_unset_gucrc_mode(struct xe_guc_pc *pc)
{
- int ret;
-
- xe_pm_runtime_get(pc_to_xe(pc));
- ret = pc_action_unset_param(pc, SLPC_PARAM_PWRGATE_RC_MODE);
- xe_pm_runtime_put(pc_to_xe(pc));
-
- return ret;
+ guard(xe_pm_runtime)(pc_to_xe(pc));
+ return pc_action_unset_param(pc, SLPC_PARAM_PWRGATE_RC_MODE);
}
static void pc_init_pcode_freq(struct xe_guc_pc *pc)
@@ -1198,7 +1184,7 @@ int xe_guc_pc_set_power_profile(struct xe_guc_pc *pc, const char *buf)
return -EINVAL;
guard(mutex)(&pc->freq_lock);
- xe_pm_runtime_get_noresume(pc_to_xe(pc));
+ guard(xe_pm_runtime_noresume)(pc_to_xe(pc));
ret = pc_action_set_param(pc,
SLPC_PARAM_POWER_PROFILE,
@@ -1209,8 +1195,6 @@ int xe_guc_pc_set_power_profile(struct xe_guc_pc *pc, const char *buf)
else
pc->power_profile = val;
- xe_pm_runtime_put(pc_to_xe(pc));
-
return ret;
}
@@ -1223,17 +1207,14 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
struct xe_device *xe = pc_to_xe(pc);
struct xe_gt *gt = pc_to_gt(pc);
u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data));
- unsigned int fw_ref;
ktime_t earlier;
int ret;
xe_gt_assert(gt, xe_device_uc_enabled(xe));
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_GT)) {
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
+ if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FW_GT))
return -ETIMEDOUT;
- }
if (xe->info.skip_guc_pc) {
if (xe->info.platform != XE_PVC)
@@ -1241,9 +1222,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
/* Request max possible since dynamic freq mgmt is not enabled */
pc_set_cur_freq(pc, UINT_MAX);
-
- ret = 0;
- goto out;
+ return 0;
}
xe_map_memset(xe, &pc->bo->vmap, 0, 0, size);
@@ -1252,7 +1231,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
earlier = ktime_get();
ret = pc_action_reset(pc);
if (ret)
- goto out;
+ return ret;
if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING,
SLPC_RESET_TIMEOUT_MS)) {
@@ -1263,8 +1242,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING,
SLPC_RESET_EXTENDED_TIMEOUT_MS)) {
xe_gt_err(gt, "GuC PC Start failed: Dynamic GT frequency control and GT sleep states are now disabled.\n");
- ret = -EIO;
- goto out;
+ return -EIO;
}
xe_gt_warn(gt, "GuC PC excessive start time: %lldms",
@@ -1273,21 +1251,20 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
ret = pc_init_freqs(pc);
if (ret)
- goto out;
+ return ret;
ret = pc_set_mert_freq_cap(pc);
if (ret)
- goto out;
+ return ret;
if (xe->info.platform == XE_PVC) {
xe_guc_pc_gucrc_disable(pc);
- ret = 0;
- goto out;
+ return 0;
}
ret = pc_action_setup_gucrc(pc, GUCRC_FIRMWARE_CONTROL);
if (ret)
- goto out;
+ return ret;
/* Enable SLPC Optimized Strategy for compute */
ret = pc_action_set_strategy(pc, SLPC_OPTIMIZED_STRATEGY_COMPUTE);
@@ -1297,8 +1274,6 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
if (unlikely(ret))
xe_gt_err(gt, "Failed to set SLPC power profile: %pe\n", ERR_PTR(ret));
-out:
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
return ret;
}
@@ -1330,19 +1305,16 @@ static void xe_guc_pc_fini_hw(void *arg)
{
struct xe_guc_pc *pc = arg;
struct xe_device *xe = pc_to_xe(pc);
- unsigned int fw_ref;
if (xe_device_wedged(xe))
return;
- fw_ref = xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL);
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL);
xe_guc_pc_gucrc_disable(pc);
XE_WARN_ON(xe_guc_pc_stop(pc));
/* Bind requested freq to mert_freq_cap before unload */
pc_set_cur_freq(pc, min(pc_max_freq_cap(pc), xe_guc_pc_get_rpe_freq(pc)));
-
- xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), fw_ref);
}
/**
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index f6ba2b0f074d..0b590271c326 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -19,6 +19,7 @@
#include "abi/guc_klvs_abi.h"
#include "regs/xe_lrc_layout.h"
#include "xe_assert.h"
+#include "xe_bo.h"
#include "xe_devcoredump.h"
#include "xe_device.h"
#include "xe_exec_queue.h"
@@ -47,6 +48,8 @@
#include "xe_uc_fw.h"
#include "xe_vm.h"
+#define XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN 6
+
static struct xe_guc *
exec_queue_to_guc(struct xe_exec_queue *q)
{
@@ -72,6 +75,7 @@ exec_queue_to_guc(struct xe_exec_queue *q)
#define EXEC_QUEUE_STATE_EXTRA_REF (1 << 11)
#define EXEC_QUEUE_STATE_PENDING_RESUME (1 << 12)
#define EXEC_QUEUE_STATE_PENDING_TDR_EXIT (1 << 13)
+#define EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND (1 << 14)
static bool exec_queue_registered(struct xe_exec_queue *q)
{
@@ -263,6 +267,21 @@ static void clear_exec_queue_pending_tdr_exit(struct xe_exec_queue *q)
atomic_and(~EXEC_QUEUE_STATE_PENDING_TDR_EXIT, &q->guc->state);
}
+static bool exec_queue_idle_skip_suspend(struct xe_exec_queue *q)
+{
+ return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND;
+}
+
+static void set_exec_queue_idle_skip_suspend(struct xe_exec_queue *q)
+{
+ atomic_or(EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND, &q->guc->state);
+}
+
+static void clear_exec_queue_idle_skip_suspend(struct xe_exec_queue *q)
+{
+ atomic_and(~EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND, &q->guc->state);
+}
+
static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q)
{
return (atomic_read(&q->guc->state) &
@@ -541,7 +560,8 @@ static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q)
u32 slpc_exec_queue_freq_req = 0;
u32 preempt_timeout_us = q->sched_props.preempt_timeout_us;
- xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
+ xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q) &&
+ !xe_exec_queue_is_multi_queue_secondary(q));
if (q->flags & EXEC_QUEUE_FLAG_LOW_LATENCY)
slpc_exec_queue_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE;
@@ -561,6 +581,8 @@ static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue
{
struct exec_queue_policy policy;
+ xe_assert(guc_to_xe(guc), !xe_exec_queue_is_multi_queue_secondary(q));
+
__guc_exec_queue_policy_start_klv(&policy, q->guc->id);
__guc_exec_queue_policy_add_preemption_timeout(&policy, 1);
@@ -568,6 +590,89 @@ static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue
__guc_exec_queue_policy_action_size(&policy), 0, 0);
}
+static bool vf_recovery(struct xe_guc *guc)
+{
+ return xe_gt_recovery_pending(guc_to_gt(guc));
+}
+
+static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q)
+{
+ struct xe_guc *guc = exec_queue_to_guc(q);
+ struct xe_device *xe = guc_to_xe(guc);
+
+ /** to wakeup xe_wait_user_fence ioctl if exec queue is reset */
+ wake_up_all(&xe->ufence_wq);
+
+ if (xe_exec_queue_is_lr(q))
+ queue_work(guc_to_gt(guc)->ordered_wq, &q->guc->lr_tdr);
+ else
+ xe_sched_tdr_queue_imm(&q->guc->sched);
+}
+
+static void xe_guc_exec_queue_group_trigger_cleanup(struct xe_exec_queue *q)
+{
+ struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
+ struct xe_exec_queue_group *group = q->multi_queue.group;
+ struct xe_exec_queue *eq;
+
+ xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)),
+ xe_exec_queue_is_multi_queue(q));
+
+ /* Group banned, skip timeout check in TDR */
+ WRITE_ONCE(group->banned, true);
+ xe_guc_exec_queue_trigger_cleanup(primary);
+
+ mutex_lock(&group->list_lock);
+ list_for_each_entry(eq, &group->list, multi_queue.link)
+ xe_guc_exec_queue_trigger_cleanup(eq);
+ mutex_unlock(&group->list_lock);
+}
+
+static void xe_guc_exec_queue_reset_trigger_cleanup(struct xe_exec_queue *q)
+{
+ if (xe_exec_queue_is_multi_queue(q)) {
+ struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
+ struct xe_exec_queue_group *group = q->multi_queue.group;
+ struct xe_exec_queue *eq;
+
+ /* Group banned, skip timeout check in TDR */
+ WRITE_ONCE(group->banned, true);
+
+ set_exec_queue_reset(primary);
+ if (!exec_queue_banned(primary) && !exec_queue_check_timeout(primary))
+ xe_guc_exec_queue_trigger_cleanup(primary);
+
+ mutex_lock(&group->list_lock);
+ list_for_each_entry(eq, &group->list, multi_queue.link) {
+ set_exec_queue_reset(eq);
+ if (!exec_queue_banned(eq) && !exec_queue_check_timeout(eq))
+ xe_guc_exec_queue_trigger_cleanup(eq);
+ }
+ mutex_unlock(&group->list_lock);
+ } else {
+ set_exec_queue_reset(q);
+ if (!exec_queue_banned(q) && !exec_queue_check_timeout(q))
+ xe_guc_exec_queue_trigger_cleanup(q);
+ }
+}
+
+static void set_exec_queue_group_banned(struct xe_exec_queue *q)
+{
+ struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
+ struct xe_exec_queue_group *group = q->multi_queue.group;
+ struct xe_exec_queue *eq;
+
+ /* Ban all queues of the multi-queue group */
+ xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)),
+ xe_exec_queue_is_multi_queue(q));
+ set_exec_queue_banned(primary);
+
+ mutex_lock(&group->list_lock);
+ list_for_each_entry(eq, &group->list, multi_queue.link)
+ set_exec_queue_banned(eq);
+ mutex_unlock(&group->list_lock);
+}
+
#define parallel_read(xe_, map_, field_) \
xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
field_)
@@ -575,6 +680,181 @@ static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue
xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
field_, val_)
+/**
+ * DOC: Multi Queue Group GuC interface
+ *
+ * The multi queue group coordination between KMD and GuC is through a software
+ * construct called Context Group Page (CGP). The CGP is a KMD managed 4KB page
+ * allocated in the global GTT.
+ *
+ * CGP format:
+ *
+ * +-----------+---------------------------+---------------------------------------------+
+ * | DWORD | Name | Description |
+ * +-----------+---------------------------+---------------------------------------------+
+ * | 0 | Version | Bits [15:8]=Major ver, [7:0]=Minor ver |
+ * +-----------+---------------------------+---------------------------------------------+
+ * | 1..15 | RESERVED | MBZ |
+ * +-----------+---------------------------+---------------------------------------------+
+ * | 16 | KMD_QUEUE_UPDATE_MASK_DW0 | KMD queue mask for queues 31..0 |
+ * +-----------+---------------------------+---------------------------------------------+
+ * | 17 | KMD_QUEUE_UPDATE_MASK_DW1 | KMD queue mask for queues 63..32 |
+ * +-----------+---------------------------+---------------------------------------------+
+ * | 18..31 | RESERVED | MBZ |
+ * +-----------+---------------------------+---------------------------------------------+
+ * | 32 | Q0CD_DW0 | Queue 0 context LRC descriptor lower DWORD |
+ * +-----------+---------------------------+---------------------------------------------+
+ * | 33 | Q0ContextIndex | Context ID for Queue 0 |
+ * +-----------+---------------------------+---------------------------------------------+
+ * | 34 | Q1CD_DW0 | Queue 1 context LRC descriptor lower DWORD |
+ * +-----------+---------------------------+---------------------------------------------+
+ * | 35 | Q1ContextIndex | Context ID for Queue 1 |
+ * +-----------+---------------------------+---------------------------------------------+
+ * | ... |... | ... |
+ * +-----------+---------------------------+---------------------------------------------+
+ * | 158 | Q63CD_DW0 | Queue 63 context LRC descriptor lower DWORD |
+ * +-----------+---------------------------+---------------------------------------------+
+ * | 159 | Q63ContextIndex | Context ID for Queue 63 |
+ * +-----------+---------------------------+---------------------------------------------+
+ * | 160..1024 | RESERVED | MBZ |
+ * +-----------+---------------------------+---------------------------------------------+
+ *
+ * While registering Q0 with GuC, CGP is updated with Q0 entry and GuC is notified
+ * through XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE H2G message which specifies
+ * the CGP address. When the secondary queues are added to the group, the CGP is
+ * updated with entry for that queue and GuC is notified through the H2G interface
+ * XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC. GuC responds to these H2G messages
+ * with a XE_GUC_ACTION_NOTIFY_MULTIQ_CONTEXT_CGP_SYNC_DONE G2H message. GuC also
+ * sends a XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CGP_CONTEXT_ERROR notification for any
+ * error in the CGP. Only one of these CGP update messages can be outstanding
+ * (waiting for GuC response) at any time. The bits in KMD_QUEUE_UPDATE_MASK_DW*
+ * fields indicate which queue entry is being updated in the CGP.
+ *
+ * The primary queue (Q0) represents the multi queue group context in GuC and
+ * submission on any queue of the group must be through Q0 GuC interface only.
+ *
+ * As it is not required to register secondary queues with GuC, the secondary queue
+ * context ids in the CGP are populated with Q0 context id.
+ */
+
+#define CGP_VERSION_MAJOR_SHIFT 8
+
+static void xe_guc_exec_queue_group_cgp_update(struct xe_device *xe,
+ struct xe_exec_queue *q)
+{
+ struct xe_exec_queue_group *group = q->multi_queue.group;
+ u32 guc_id = group->primary->guc->id;
+
+ /* Currently implementing CGP version 1.0 */
+ xe_map_wr(xe, &group->cgp_bo->vmap, 0, u32,
+ 1 << CGP_VERSION_MAJOR_SHIFT);
+
+ xe_map_wr(xe, &group->cgp_bo->vmap,
+ (32 + q->multi_queue.pos * 2) * sizeof(u32),
+ u32, lower_32_bits(xe_lrc_descriptor(q->lrc[0])));
+
+ xe_map_wr(xe, &group->cgp_bo->vmap,
+ (33 + q->multi_queue.pos * 2) * sizeof(u32),
+ u32, guc_id);
+
+ if (q->multi_queue.pos / 32) {
+ xe_map_wr(xe, &group->cgp_bo->vmap, 17 * sizeof(u32),
+ u32, BIT(q->multi_queue.pos % 32));
+ xe_map_wr(xe, &group->cgp_bo->vmap, 16 * sizeof(u32), u32, 0);
+ } else {
+ xe_map_wr(xe, &group->cgp_bo->vmap, 16 * sizeof(u32),
+ u32, BIT(q->multi_queue.pos));
+ xe_map_wr(xe, &group->cgp_bo->vmap, 17 * sizeof(u32), u32, 0);
+ }
+}
+
+static void xe_guc_exec_queue_group_cgp_sync(struct xe_guc *guc,
+ struct xe_exec_queue *q,
+ const u32 *action, u32 len)
+{
+ struct xe_exec_queue_group *group = q->multi_queue.group;
+ struct xe_device *xe = guc_to_xe(guc);
+ long ret;
+
+ /*
+ * As all queues of a multi queue group use single drm scheduler
+ * submit workqueue, CGP synchronization with GuC are serialized.
+ * Hence, no locking is required here.
+ * Wait for any pending CGP_SYNC_DONE response before updating the
+ * CGP page and sending CGP_SYNC message.
+ *
+ * FIXME: Support VF migration
+ */
+ ret = wait_event_timeout(guc->ct.wq,
+ !READ_ONCE(group->sync_pending) ||
+ xe_guc_read_stopped(guc), HZ);
+ if (!ret || xe_guc_read_stopped(guc)) {
+ /* CGP_SYNC failed. Reset gt, cleanup the group */
+ xe_gt_warn(guc_to_gt(guc), "Wait for CGP_SYNC_DONE response failed!\n");
+ set_exec_queue_group_banned(q);
+ xe_gt_reset_async(q->gt);
+ xe_guc_exec_queue_group_trigger_cleanup(q);
+ return;
+ }
+
+ xe_lrc_set_multi_queue_priority(q->lrc[0], q->multi_queue.priority);
+ xe_guc_exec_queue_group_cgp_update(xe, q);
+
+ WRITE_ONCE(group->sync_pending, true);
+ xe_guc_ct_send(&guc->ct, action, len, G2H_LEN_DW_MULTI_QUEUE_CONTEXT, 1);
+}
+
+static void __register_exec_queue_group(struct xe_guc *guc,
+ struct xe_exec_queue *q,
+ struct guc_ctxt_registration_info *info)
+{
+#define MAX_MULTI_QUEUE_REG_SIZE (8)
+ u32 action[MAX_MULTI_QUEUE_REG_SIZE];
+ int len = 0;
+
+ action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE;
+ action[len++] = info->flags;
+ action[len++] = info->context_idx;
+ action[len++] = info->engine_class;
+ action[len++] = info->engine_submit_mask;
+ action[len++] = 0; /* Reserved */
+ action[len++] = info->cgp_lo;
+ action[len++] = info->cgp_hi;
+
+ xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_REG_SIZE);
+#undef MAX_MULTI_QUEUE_REG_SIZE
+
+ /*
+ * The above XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE do expect a
+ * XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE response
+ * from guc.
+ */
+ xe_guc_exec_queue_group_cgp_sync(guc, q, action, len);
+}
+
+static void xe_guc_exec_queue_group_add(struct xe_guc *guc,
+ struct xe_exec_queue *q)
+{
+#define MAX_MULTI_QUEUE_CGP_SYNC_SIZE (2)
+ u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE];
+ int len = 0;
+
+ xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_multi_queue_secondary(q));
+
+ action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC;
+ action[len++] = q->multi_queue.group->primary->guc->id;
+
+ xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE);
+#undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE
+
+ /*
+ * The above XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC do expect a
+ * XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE response
+ * from guc.
+ */
+ xe_guc_exec_queue_group_cgp_sync(guc, q, action, len);
+}
+
static void __register_mlrc_exec_queue(struct xe_guc *guc,
struct xe_exec_queue *q,
struct guc_ctxt_registration_info *info)
@@ -670,6 +950,13 @@ static void register_exec_queue(struct xe_exec_queue *q, int ctx_type)
info.flags = CONTEXT_REGISTRATION_FLAG_KMD |
FIELD_PREP(CONTEXT_REGISTRATION_FLAG_TYPE, ctx_type);
+ if (xe_exec_queue_is_multi_queue(q)) {
+ struct xe_exec_queue_group *group = q->multi_queue.group;
+
+ info.cgp_lo = xe_bo_ggtt_addr(group->cgp_bo);
+ info.cgp_hi = 0;
+ }
+
if (xe_exec_queue_is_parallel(q)) {
u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc);
struct iosys_map map = xe_lrc_parallel_map(lrc);
@@ -700,11 +987,18 @@ static void register_exec_queue(struct xe_exec_queue *q, int ctx_type)
set_exec_queue_registered(q);
trace_xe_exec_queue_register(q);
- if (xe_exec_queue_is_parallel(q))
+ if (xe_exec_queue_is_multi_queue_primary(q))
+ __register_exec_queue_group(guc, q, &info);
+ else if (xe_exec_queue_is_parallel(q))
__register_mlrc_exec_queue(guc, q, &info);
- else
+ else if (!xe_exec_queue_is_multi_queue_secondary(q))
__register_exec_queue(guc, &info);
- init_policies(guc, q);
+
+ if (!xe_exec_queue_is_multi_queue_secondary(q))
+ init_policies(guc, q);
+
+ if (xe_exec_queue_is_multi_queue_secondary(q))
+ xe_guc_exec_queue_group_add(guc, q);
}
static u32 wq_space_until_wrap(struct xe_exec_queue *q)
@@ -712,11 +1006,6 @@ static u32 wq_space_until_wrap(struct xe_exec_queue *q)
return (WQ_SIZE - q->guc->wqi_tail);
}
-static bool vf_recovery(struct xe_guc *guc)
-{
- return xe_gt_recovery_pending(guc_to_gt(guc));
-}
-
static inline void relaxed_ms_sleep(unsigned int delay_ms)
{
unsigned long min_us, max_us;
@@ -845,7 +1134,7 @@ static void submit_exec_queue(struct xe_exec_queue *q, struct xe_sched_job *job)
if (!job->restore_replay || job->last_replay) {
if (xe_exec_queue_is_parallel(q))
wq_item_append(q);
- else
+ else if (!exec_queue_idle_skip_suspend(q))
xe_lrc_set_ring_tail(lrc, lrc->ring.tail);
job->last_replay = false;
}
@@ -853,6 +1142,12 @@ static void submit_exec_queue(struct xe_exec_queue *q, struct xe_sched_job *job)
if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q))
return;
+ /*
+ * All queues in a multi-queue group will use the primary queue
+ * of the group to interface with GuC.
+ */
+ q = xe_exec_queue_multi_queue_primary(q);
+
if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) {
action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
action[len++] = q->guc->id;
@@ -899,6 +1194,18 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job)
trace_xe_sched_job_run(job);
if (!killed_or_banned_or_wedged && !xe_sched_job_is_error(job)) {
+ if (xe_exec_queue_is_multi_queue_secondary(q)) {
+ struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
+
+ if (exec_queue_killed_or_banned_or_wedged(primary)) {
+ killed_or_banned_or_wedged = true;
+ goto run_job_out;
+ }
+
+ if (!exec_queue_registered(primary))
+ register_exec_queue(primary, GUC_CONTEXT_NORMAL);
+ }
+
if (!exec_queue_registered(q))
register_exec_queue(q, GUC_CONTEXT_NORMAL);
if (!job->restore_replay)
@@ -907,6 +1214,7 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job)
job->restore_replay = false;
}
+run_job_out:
/*
* We don't care about job-fence ordering in LR VMs because these fences
* are never exported; they are used solely to keep jobs on the pending
@@ -932,6 +1240,11 @@ int xe_guc_read_stopped(struct xe_guc *guc)
return atomic_read(&guc->submission_state.stopped);
}
+static void handle_multi_queue_secondary_sched_done(struct xe_guc *guc,
+ struct xe_exec_queue *q,
+ u32 runnable_state);
+static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q);
+
#define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable) \
u32 action[] = { \
XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \
@@ -945,7 +1258,9 @@ static void disable_scheduling_deregister(struct xe_guc *guc,
MAKE_SCHED_CONTEXT_ACTION(q, DISABLE);
int ret;
- set_min_preemption_timeout(guc, q);
+ if (!xe_exec_queue_is_multi_queue_secondary(q))
+ set_min_preemption_timeout(guc, q);
+
smp_rmb();
ret = wait_event_timeout(guc->ct.wq,
(!exec_queue_pending_enable(q) &&
@@ -973,23 +1288,12 @@ static void disable_scheduling_deregister(struct xe_guc *guc,
* Reserve space for both G2H here as the 2nd G2H is sent from a G2H
* handler and we are not allowed to reserved G2H space in handlers.
*/
- xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
- G2H_LEN_DW_SCHED_CONTEXT_MODE_SET +
- G2H_LEN_DW_DEREGISTER_CONTEXT, 2);
-}
-
-static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q)
-{
- struct xe_guc *guc = exec_queue_to_guc(q);
- struct xe_device *xe = guc_to_xe(guc);
-
- /** to wakeup xe_wait_user_fence ioctl if exec queue is reset */
- wake_up_all(&xe->ufence_wq);
-
- if (xe_exec_queue_is_lr(q))
- queue_work(guc_to_gt(guc)->ordered_wq, &q->guc->lr_tdr);
+ if (xe_exec_queue_is_multi_queue_secondary(q))
+ handle_multi_queue_secondary_sched_done(guc, q, 0);
else
- xe_sched_tdr_queue_imm(&q->guc->sched);
+ xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
+ G2H_LEN_DW_SCHED_CONTEXT_MODE_SET +
+ G2H_LEN_DW_DEREGISTER_CONTEXT, 2);
}
/**
@@ -1181,8 +1485,11 @@ static void enable_scheduling(struct xe_exec_queue *q)
set_exec_queue_enabled(q);
trace_xe_exec_queue_scheduling_enable(q);
- xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
- G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
+ if (xe_exec_queue_is_multi_queue_secondary(q))
+ handle_multi_queue_secondary_sched_done(guc, q, 1);
+ else
+ xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
+ G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
ret = wait_event_timeout(guc->ct.wq,
!exec_queue_pending_enable(q) ||
@@ -1206,14 +1513,17 @@ static void disable_scheduling(struct xe_exec_queue *q, bool immediate)
xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
- if (immediate)
+ if (immediate && !xe_exec_queue_is_multi_queue_secondary(q))
set_min_preemption_timeout(guc, q);
clear_exec_queue_enabled(q);
set_exec_queue_pending_disable(q);
trace_xe_exec_queue_scheduling_disable(q);
- xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
- G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
+ if (xe_exec_queue_is_multi_queue_secondary(q))
+ handle_multi_queue_secondary_sched_done(guc, q, 0);
+ else
+ xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
+ G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
}
static void __deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q)
@@ -1231,8 +1541,11 @@ static void __deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q)
set_exec_queue_destroyed(q);
trace_xe_exec_queue_deregister(q);
- xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
- G2H_LEN_DW_DEREGISTER_CONTEXT, 1);
+ if (xe_exec_queue_is_multi_queue_secondary(q))
+ handle_deregister_done(guc, q);
+ else
+ xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
+ G2H_LEN_DW_DEREGISTER_CONTEXT, 1);
}
static enum drm_gpu_sched_stat
@@ -1245,7 +1558,6 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
struct xe_guc *guc = exec_queue_to_guc(q);
const char *process_name = "no process";
struct xe_device *xe = guc_to_xe(guc);
- unsigned int fw_ref;
int err = -ETIME;
pid_t pid = -1;
int i = 0;
@@ -1271,6 +1583,19 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
exec_queue_killed_or_banned_or_wedged(q) ||
exec_queue_destroyed(q);
+ /* Skip timeout check if multi-queue group is banned */
+ if (xe_exec_queue_is_multi_queue(q) &&
+ READ_ONCE(q->multi_queue.group->banned))
+ skip_timeout_check = true;
+
+ /*
+ * FIXME: In multi-queue scenario, the TDR must ensure that the whole
+ * multi-queue group is off the HW before signaling the fences to avoid
+ * possible memory corruptions. This means disabling scheduling on the
+ * primary queue before or during the secondary queue's TDR. Need to
+ * implement this in least obtrusive way.
+ */
+
/*
* If devcoredump not captured and GuC capture for the job is not ready
* do manual capture first and decide later if we need to use it
@@ -1278,13 +1603,11 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
if (!exec_queue_killed(q) && !xe->devcoredump.captured &&
!xe_guc_capture_get_matching_and_lock(q)) {
/* take force wake before engine register manual capture */
- fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
- if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL))
xe_gt_info(q->gt, "failed to get forcewake for coredump capture\n");
xe_engine_snapshot_capture_for_queue(q);
-
- xe_force_wake_put(gt_to_fw(q->gt), fw_ref);
}
/*
@@ -1425,7 +1748,10 @@ trigger_reset:
xe_sched_add_pending_job(sched, job);
xe_sched_submission_start(sched);
- xe_guc_exec_queue_trigger_cleanup(q);
+ if (xe_exec_queue_is_multi_queue(q))
+ xe_guc_exec_queue_group_trigger_cleanup(q);
+ else
+ xe_guc_exec_queue_trigger_cleanup(q);
/* Mark all outstanding jobs as bad, thus completing them */
spin_lock(&sched->base.job_list_lock);
@@ -1475,17 +1801,23 @@ static void __guc_exec_queue_destroy_async(struct work_struct *w)
struct xe_exec_queue *q = ge->q;
struct xe_guc *guc = exec_queue_to_guc(q);
- xe_pm_runtime_get(guc_to_xe(guc));
+ guard(xe_pm_runtime)(guc_to_xe(guc));
trace_xe_exec_queue_destroy(q);
+ if (xe_exec_queue_is_multi_queue_secondary(q)) {
+ struct xe_exec_queue_group *group = q->multi_queue.group;
+
+ mutex_lock(&group->list_lock);
+ list_del(&q->multi_queue.link);
+ mutex_unlock(&group->list_lock);
+ }
+
if (xe_exec_queue_is_lr(q))
cancel_work_sync(&ge->lr_tdr);
/* Confirm no work left behind accessing device structures */
cancel_delayed_work_sync(&ge->sched.base.work_tdr);
xe_exec_queue_fini(q);
-
- xe_pm_runtime_put(guc_to_xe(guc));
}
static void guc_exec_queue_destroy_async(struct xe_exec_queue *q)
@@ -1590,9 +1922,10 @@ static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg)
{
struct xe_exec_queue *q = msg->private_data;
struct xe_guc *guc = exec_queue_to_guc(q);
+ bool idle_skip_suspend = xe_exec_queue_idle_skip_suspend(q);
- if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) &&
- exec_queue_enabled(q)) {
+ if (!idle_skip_suspend && guc_exec_queue_allowed_to_change_state(q) &&
+ !exec_queue_suspended(q) && exec_queue_enabled(q)) {
wait_event(guc->ct.wq, vf_recovery(guc) ||
((q->guc->resume_time != RESUME_PENDING ||
xe_guc_read_stopped(guc)) && !exec_queue_pending_disable(q)));
@@ -1611,11 +1944,33 @@ static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg)
disable_scheduling(q, false);
}
} else if (q->guc->suspend_pending) {
+ if (idle_skip_suspend)
+ set_exec_queue_idle_skip_suspend(q);
set_exec_queue_suspended(q);
suspend_fence_signal(q);
}
}
+static void sched_context(struct xe_exec_queue *q)
+{
+ struct xe_guc *guc = exec_queue_to_guc(q);
+ struct xe_lrc *lrc = q->lrc[0];
+ u32 action[] = {
+ XE_GUC_ACTION_SCHED_CONTEXT,
+ q->guc->id,
+ };
+
+ xe_gt_assert(guc_to_gt(guc), !xe_exec_queue_is_parallel(q));
+ xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q));
+ xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
+ xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
+
+ trace_xe_exec_queue_submit(q);
+
+ xe_lrc_set_ring_tail(lrc, lrc->ring.tail);
+ xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
+}
+
static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg)
{
struct xe_exec_queue *q = msg->private_data;
@@ -1623,19 +1978,53 @@ static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg)
if (guc_exec_queue_allowed_to_change_state(q)) {
clear_exec_queue_suspended(q);
if (!exec_queue_enabled(q)) {
+ if (exec_queue_idle_skip_suspend(q)) {
+ struct xe_lrc *lrc = q->lrc[0];
+
+ clear_exec_queue_idle_skip_suspend(q);
+ xe_lrc_set_ring_tail(lrc, lrc->ring.tail);
+ }
q->guc->resume_time = RESUME_PENDING;
set_exec_queue_pending_resume(q);
enable_scheduling(q);
+ } else if (exec_queue_idle_skip_suspend(q)) {
+ clear_exec_queue_idle_skip_suspend(q);
+ sched_context(q);
}
} else {
clear_exec_queue_suspended(q);
+ clear_exec_queue_idle_skip_suspend(q);
+ }
+}
+
+static void __guc_exec_queue_process_msg_set_multi_queue_priority(struct xe_sched_msg *msg)
+{
+ struct xe_exec_queue *q = msg->private_data;
+
+ if (guc_exec_queue_allowed_to_change_state(q)) {
+#define MAX_MULTI_QUEUE_CGP_SYNC_SIZE (2)
+ struct xe_guc *guc = exec_queue_to_guc(q);
+ struct xe_exec_queue_group *group = q->multi_queue.group;
+ u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE];
+ int len = 0;
+
+ action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC;
+ action[len++] = group->primary->guc->id;
+
+ xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE);
+#undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE
+
+ xe_guc_exec_queue_group_cgp_sync(guc, q, action, len);
}
+
+ kfree(msg);
}
-#define CLEANUP 1 /* Non-zero values to catch uninitialized msg */
-#define SET_SCHED_PROPS 2
-#define SUSPEND 3
-#define RESUME 4
+#define CLEANUP 1 /* Non-zero values to catch uninitialized msg */
+#define SET_SCHED_PROPS 2
+#define SUSPEND 3
+#define RESUME 4
+#define SET_MULTI_QUEUE_PRIORITY 5
#define OPCODE_MASK 0xf
#define MSG_LOCKED BIT(8)
#define MSG_HEAD BIT(9)
@@ -1659,6 +2048,9 @@ static void guc_exec_queue_process_msg(struct xe_sched_msg *msg)
case RESUME:
__guc_exec_queue_process_msg_resume(msg);
break;
+ case SET_MULTI_QUEUE_PRIORITY:
+ __guc_exec_queue_process_msg_set_multi_queue_priority(msg);
+ break;
default:
XE_WARN_ON("Unknown message type");
}
@@ -1680,6 +2072,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
{
struct xe_gpu_scheduler *sched;
struct xe_guc *guc = exec_queue_to_guc(q);
+ struct workqueue_struct *submit_wq = NULL;
struct xe_guc_exec_queue *ge;
long timeout;
int err, i;
@@ -1700,8 +2093,20 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT :
msecs_to_jiffies(q->sched_props.job_timeout_ms);
+
+ /*
+ * Use primary queue's submit_wq for all secondary queues of a
+ * multi queue group. This serialization avoids any locking around
+ * CGP synchronization with GuC.
+ */
+ if (xe_exec_queue_is_multi_queue_secondary(q)) {
+ struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
+
+ submit_wq = primary->guc->sched.base.submit_wq;
+ }
+
err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops,
- NULL, xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES, 64,
+ submit_wq, xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES, 64,
timeout, guc_to_gt(guc)->ordered_wq, NULL,
q->name, gt_to_xe(q->gt)->drm.dev);
if (err)
@@ -1730,7 +2135,23 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
xe_exec_queue_assign_name(q, q->guc->id);
- trace_xe_exec_queue_create(q);
+ /*
+ * Maintain secondary queues of the multi queue group in a list
+ * for handling dependencies across the queues in the group.
+ */
+ if (xe_exec_queue_is_multi_queue_secondary(q)) {
+ struct xe_exec_queue_group *group = q->multi_queue.group;
+
+ INIT_LIST_HEAD(&q->multi_queue.link);
+ mutex_lock(&group->list_lock);
+ list_add_tail(&q->multi_queue.link, &group->list);
+ mutex_unlock(&group->list_lock);
+ }
+
+ if (xe_exec_queue_is_multi_queue(q))
+ trace_xe_exec_queue_create_multi_queue(q);
+ else
+ trace_xe_exec_queue_create(q);
return 0;
@@ -1862,6 +2283,27 @@ static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
return 0;
}
+static int guc_exec_queue_set_multi_queue_priority(struct xe_exec_queue *q,
+ enum xe_multi_queue_priority priority)
+{
+ struct xe_sched_msg *msg;
+
+ xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), xe_exec_queue_is_multi_queue(q));
+
+ if (q->multi_queue.priority == priority ||
+ exec_queue_killed_or_banned_or_wedged(q))
+ return 0;
+
+ msg = kmalloc(sizeof(*msg), GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ q->multi_queue.priority = priority;
+ guc_exec_queue_add_msg(q, msg, SET_MULTI_QUEUE_PRIORITY);
+
+ return 0;
+}
+
static int guc_exec_queue_suspend(struct xe_exec_queue *q)
{
struct xe_gpu_scheduler *sched = &q->guc->sched;
@@ -1936,6 +2378,10 @@ static void guc_exec_queue_resume(struct xe_exec_queue *q)
static bool guc_exec_queue_reset_status(struct xe_exec_queue *q)
{
+ if (xe_exec_queue_is_multi_queue_secondary(q) &&
+ guc_exec_queue_reset_status(xe_exec_queue_multi_queue_primary(q)))
+ return true;
+
return exec_queue_reset(q) || exec_queue_killed_or_banned_or_wedged(q);
}
@@ -1953,6 +2399,7 @@ static const struct xe_exec_queue_ops guc_exec_queue_ops = {
.set_priority = guc_exec_queue_set_priority,
.set_timeslice = guc_exec_queue_set_timeslice,
.set_preempt_timeout = guc_exec_queue_set_preempt_timeout,
+ .set_multi_queue_priority = guc_exec_queue_set_multi_queue_priority,
.suspend = guc_exec_queue_suspend,
.suspend_wait = guc_exec_queue_suspend_wait,
.resume = guc_exec_queue_resume,
@@ -2202,6 +2649,22 @@ void xe_guc_submit_pause(struct xe_guc *guc)
struct xe_exec_queue *q;
unsigned long index;
+ mutex_lock(&guc->submission_state.lock);
+ xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
+ xe_sched_submission_stop(&q->guc->sched);
+ mutex_unlock(&guc->submission_state.lock);
+}
+
+/**
+ * xe_guc_submit_pause_vf - Stop further runs of submission tasks for VF.
+ * @guc: the &xe_guc struct instance whose scheduler is to be disabled
+ */
+void xe_guc_submit_pause_vf(struct xe_guc *guc)
+{
+ struct xe_exec_queue *q;
+ unsigned long index;
+
+ xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc)));
xe_gt_assert(guc_to_gt(guc), vf_recovery(guc));
mutex_lock(&guc->submission_state.lock);
@@ -2293,14 +2756,15 @@ static void guc_exec_queue_unpause_prepare(struct xe_guc *guc,
}
/**
- * xe_guc_submit_unpause_prepare - Prepare unpause submission tasks on given GuC.
+ * xe_guc_submit_unpause_prepare_vf - Prepare unpause submission tasks for VF.
* @guc: the &xe_guc struct instance whose scheduler is to be prepared for unpause
*/
-void xe_guc_submit_unpause_prepare(struct xe_guc *guc)
+void xe_guc_submit_unpause_prepare_vf(struct xe_guc *guc)
{
struct xe_exec_queue *q;
unsigned long index;
+ xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc)));
xe_gt_assert(guc_to_gt(guc), vf_recovery(guc));
mutex_lock(&guc->submission_state.lock);
@@ -2377,6 +2841,23 @@ void xe_guc_submit_unpause(struct xe_guc *guc)
unsigned long index;
mutex_lock(&guc->submission_state.lock);
+ xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
+ xe_sched_submission_start(&q->guc->sched);
+ mutex_unlock(&guc->submission_state.lock);
+}
+
+/**
+ * xe_guc_submit_unpause_vf - Allow further runs of submission tasks for VF.
+ * @guc: the &xe_guc struct instance whose scheduler is to be enabled
+ */
+void xe_guc_submit_unpause_vf(struct xe_guc *guc)
+{
+ struct xe_exec_queue *q;
+ unsigned long index;
+
+ xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc)));
+
+ mutex_lock(&guc->submission_state.lock);
xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
/*
* Prevent redundant attempts to stop parallel queues, or queues
@@ -2452,7 +2933,11 @@ static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q)
trace_xe_exec_queue_deregister(q);
- xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action));
+ if (xe_exec_queue_is_multi_queue_secondary(q))
+ handle_deregister_done(guc, q);
+ else
+ xe_guc_ct_send_g2h_handler(&guc->ct, action,
+ ARRAY_SIZE(action));
}
static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q,
@@ -2502,6 +2987,16 @@ static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q,
}
}
+static void handle_multi_queue_secondary_sched_done(struct xe_guc *guc,
+ struct xe_exec_queue *q,
+ u32 runnable_state)
+{
+ /* Take CT lock here as handle_sched_done() do send a h2g message */
+ mutex_lock(&guc->ct.lock);
+ handle_sched_done(guc, q, runnable_state);
+ mutex_unlock(&guc->ct.lock);
+}
+
int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
{
struct xe_exec_queue *q;
@@ -2585,8 +3080,9 @@ int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len)
if (unlikely(!q))
return -EPROTO;
- xe_gt_info(gt, "Engine reset: engine_class=%s, logical_mask: 0x%x, guc_id=%d",
- xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id);
+ xe_gt_info(gt, "Engine reset: engine_class=%s, logical_mask: 0x%x, guc_id=%d, state=0x%0x",
+ xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id,
+ atomic_read(&q->guc->state));
trace_xe_exec_queue_reset(q);
@@ -2596,9 +3092,7 @@ int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len)
* jobs by setting timeout of the job to the minimum value kicking
* guc_exec_queue_timedout_job.
*/
- set_exec_queue_reset(q);
- if (!exec_queue_banned(q) && !exec_queue_check_timeout(q))
- xe_guc_exec_queue_trigger_cleanup(q);
+ xe_guc_exec_queue_reset_trigger_cleanup(q);
return 0;
}
@@ -2666,20 +3160,18 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
* See bspec 54047 and 72187 for details.
*/
if (type != XE_GUC_CAT_ERR_TYPE_INVALID)
- xe_gt_dbg(gt,
- "Engine memory CAT error [%u]: class=%s, logical_mask: 0x%x, guc_id=%d",
- type, xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id);
+ xe_gt_info(gt,
+ "Engine memory CAT error [%u]: class=%s, logical_mask: 0x%x, guc_id=%d",
+ type, xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id);
else
- xe_gt_dbg(gt,
- "Engine memory CAT error: class=%s, logical_mask: 0x%x, guc_id=%d",
- xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id);
+ xe_gt_info(gt,
+ "Engine memory CAT error: class=%s, logical_mask: 0x%x, guc_id=%d",
+ xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id);
trace_xe_exec_queue_memory_cat_error(q);
/* Treat the same as engine reset */
- set_exec_queue_reset(q);
- if (!exec_queue_banned(q) && !exec_queue_check_timeout(q))
- xe_guc_exec_queue_trigger_cleanup(q);
+ xe_guc_exec_queue_reset_trigger_cleanup(q);
return 0;
}
@@ -2706,6 +3198,73 @@ int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 le
return 0;
}
+int xe_guc_exec_queue_cgp_context_error_handler(struct xe_guc *guc, u32 *msg,
+ u32 len)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_device *xe = guc_to_xe(guc);
+ struct xe_exec_queue *q;
+ u32 guc_id = msg[2];
+
+ if (unlikely(len != XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN)) {
+ drm_err(&xe->drm, "Invalid length %u", len);
+ return -EPROTO;
+ }
+
+ q = g2h_exec_queue_lookup(guc, guc_id);
+ if (unlikely(!q))
+ return -EPROTO;
+
+ xe_gt_dbg(gt,
+ "CGP context error: [%s] err=0x%x, q0_id=0x%x LRCA=0x%x guc_id=0x%x",
+ msg[0] & 1 ? "uc" : "kmd", msg[1], msg[2], msg[3], msg[4]);
+
+ trace_xe_exec_queue_cgp_context_error(q);
+
+ /* Treat the same as engine reset */
+ xe_guc_exec_queue_reset_trigger_cleanup(q);
+
+ return 0;
+}
+
+/**
+ * xe_guc_exec_queue_cgp_sync_done_handler - CGP synchronization done handler
+ * @guc: guc
+ * @msg: message indicating CGP sync done
+ * @len: length of message
+ *
+ * Set multi queue group's sync_pending flag to false and wakeup anyone waiting
+ * for CGP synchronization to complete.
+ *
+ * Return: 0 on success, -EPROTO for malformed messages.
+ */
+int xe_guc_exec_queue_cgp_sync_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+ struct xe_device *xe = guc_to_xe(guc);
+ struct xe_exec_queue *q;
+ u32 guc_id = msg[0];
+
+ if (unlikely(len < 1)) {
+ drm_err(&xe->drm, "Invalid CGP_SYNC_DONE length %u", len);
+ return -EPROTO;
+ }
+
+ q = g2h_exec_queue_lookup(guc, guc_id);
+ if (unlikely(!q))
+ return -EPROTO;
+
+ if (!xe_exec_queue_is_multi_queue_primary(q)) {
+ drm_err(&xe->drm, "Unexpected CGP_SYNC_DONE response");
+ return -EPROTO;
+ }
+
+ /* Wakeup the serialized cgp update wait */
+ WRITE_ONCE(q->multi_queue.group->sync_pending, false);
+ xe_guc_ct_wake_waiters(&guc->ct);
+
+ return 0;
+}
+
static void
guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q,
struct xe_guc_submit_exec_queue_snapshot *snapshot)
@@ -2805,6 +3364,11 @@ xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
if (snapshot->parallel_execution)
guc_exec_queue_wq_snapshot_capture(q, snapshot);
+ if (xe_exec_queue_is_multi_queue(q)) {
+ snapshot->multi_queue.valid = true;
+ snapshot->multi_queue.primary = xe_exec_queue_multi_queue_primary(q)->guc->id;
+ snapshot->multi_queue.pos = q->multi_queue.pos;
+ }
spin_lock(&sched->base.job_list_lock);
snapshot->pending_list_size = list_count_nodes(&sched->base.pending_list);
snapshot->pending_list = kmalloc_array(snapshot->pending_list_size,
@@ -2887,6 +3451,11 @@ xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps
if (snapshot->parallel_execution)
guc_exec_queue_wq_snapshot_print(snapshot, p);
+ if (snapshot->multi_queue.valid) {
+ drm_printf(p, "\tMulti queue primary GuC ID: %d\n", snapshot->multi_queue.primary);
+ drm_printf(p, "\tMulti queue position: %d\n", snapshot->multi_queue.pos);
+ }
+
for (i = 0; snapshot->pending_list && i < snapshot->pending_list_size;
i++)
drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n",
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h
index b49a2748ec46..4d89b2975fe9 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.h
+++ b/drivers/gpu/drm/xe/xe_guc_submit.h
@@ -21,9 +21,11 @@ void xe_guc_submit_reset_wait(struct xe_guc *guc);
void xe_guc_submit_stop(struct xe_guc *guc);
int xe_guc_submit_start(struct xe_guc *guc);
void xe_guc_submit_pause(struct xe_guc *guc);
-void xe_guc_submit_unpause(struct xe_guc *guc);
-void xe_guc_submit_unpause_prepare(struct xe_guc *guc);
void xe_guc_submit_pause_abort(struct xe_guc *guc);
+void xe_guc_submit_pause_vf(struct xe_guc *guc);
+void xe_guc_submit_unpause(struct xe_guc *guc);
+void xe_guc_submit_unpause_vf(struct xe_guc *guc);
+void xe_guc_submit_unpause_prepare_vf(struct xe_guc *guc);
void xe_guc_submit_wedge(struct xe_guc *guc);
int xe_guc_read_stopped(struct xe_guc *guc);
@@ -34,6 +36,9 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
u32 len);
int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len);
int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len);
+int xe_guc_exec_queue_cgp_sync_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
+int xe_guc_exec_queue_cgp_context_error_handler(struct xe_guc *guc, u32 *msg,
+ u32 len);
struct xe_guc_submit_exec_queue_snapshot *
xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q);
diff --git a/drivers/gpu/drm/xe/xe_guc_submit_types.h b/drivers/gpu/drm/xe/xe_guc_submit_types.h
index dc7456c34583..25e29e85502c 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_submit_types.h
@@ -135,6 +135,19 @@ struct xe_guc_submit_exec_queue_snapshot {
u32 wq[WQ_SIZE / sizeof(u32)];
} parallel;
+ /** @multi_queue: snapshot of the multi queue information */
+ struct {
+ /**
+ * @multi_queue.primary: GuC id of the primary exec queue
+ * of the multi queue group.
+ */
+ u32 primary;
+ /** @multi_queue.pos: Position of the exec queue within the multi queue group */
+ u8 pos;
+ /** @valid: The exec queue is part of a multi queue group */
+ bool valid;
+ } multi_queue;
+
/** @pending_list_size: Size of the pending list snapshot array */
int pending_list_size;
/** @pending_list: snapshot of the pending list info */
diff --git a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
index a80175c7c478..6532a88d51e2 100644
--- a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
+++ b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
@@ -13,6 +13,7 @@
#include "xe_guc_tlb_inval.h"
#include "xe_force_wake.h"
#include "xe_mmio.h"
+#include "xe_sa.h"
#include "xe_tlb_inval.h"
#include "regs/xe_guc_regs.h"
@@ -34,9 +35,12 @@ static int send_tlb_inval(struct xe_guc *guc, const u32 *action, int len)
G2H_LEN_DW_TLB_INVALIDATE, 1);
}
-#define MAKE_INVAL_OP(type) ((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \
+#define MAKE_INVAL_OP_FLUSH(type, flush_cache) ((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \
XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \
- XE_GUC_TLB_INVAL_FLUSH_CACHE)
+ (flush_cache ? \
+ XE_GUC_TLB_INVAL_FLUSH_CACHE : 0))
+
+#define MAKE_INVAL_OP(type) MAKE_INVAL_OP_FLUSH(type, true)
static int send_tlb_inval_all(struct xe_tlb_inval *tlb_inval, u32 seqno)
{
@@ -71,12 +75,11 @@ static int send_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval, u32 seqno)
return send_tlb_inval(guc, action, ARRAY_SIZE(action));
} else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) {
struct xe_mmio *mmio = &gt->mmio;
- unsigned int fw_ref;
if (IS_SRIOV_VF(xe))
return -ECANCELED;
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) {
xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC1,
PVC_GUC_TLB_INV_DESC1_INVALIDATE);
@@ -86,12 +89,25 @@ static int send_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval, u32 seqno)
xe_mmio_write32(mmio, GUC_TLB_INV_CR,
GUC_TLB_INV_CR_INVALIDATE);
}
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
return -ECANCELED;
}
+static int send_page_reclaim(struct xe_guc *guc, u32 seqno,
+ u64 gpu_addr)
+{
+ u32 action[] = {
+ XE_GUC_ACTION_PAGE_RECLAMATION,
+ seqno,
+ lower_32_bits(gpu_addr),
+ upper_32_bits(gpu_addr),
+ };
+
+ return xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
+ G2H_LEN_DW_PAGE_RECLAMATION, 1);
+}
+
/*
* Ensure that roundup_pow_of_two(length) doesn't overflow.
* Note that roundup_pow_of_two() operates on unsigned long,
@@ -100,20 +116,21 @@ static int send_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval, u32 seqno)
#define MAX_RANGE_TLB_INVALIDATION_LENGTH (rounddown_pow_of_two(ULONG_MAX))
static int send_tlb_inval_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno,
- u64 start, u64 end, u32 asid)
+ u64 start, u64 end, u32 asid,
+ struct drm_suballoc *prl_sa)
{
#define MAX_TLB_INVALIDATION_LEN 7
struct xe_guc *guc = tlb_inval->private;
struct xe_gt *gt = guc_to_gt(guc);
u32 action[MAX_TLB_INVALIDATION_LEN];
u64 length = end - start;
- int len = 0;
+ int len = 0, err;
if (guc_to_xe(guc)->info.force_execlist)
return -ECANCELED;
action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
- action[len++] = seqno;
+ action[len++] = !prl_sa ? seqno : TLB_INVALIDATION_SEQNO_INVALID;
if (!gt_to_xe(gt)->info.has_range_tlb_inval ||
length > MAX_RANGE_TLB_INVALIDATION_LENGTH) {
action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
@@ -154,7 +171,8 @@ static int send_tlb_inval_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno,
ilog2(SZ_2M) + 1)));
xe_gt_assert(gt, IS_ALIGNED(start, length));
- action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
+ /* Flush on NULL case, Media is not required to modify flush due to no PPC so NOP */
+ action[len++] = MAKE_INVAL_OP_FLUSH(XE_GUC_TLB_INVAL_PAGE_SELECTIVE, !prl_sa);
action[len++] = asid;
action[len++] = lower_32_bits(start);
action[len++] = upper_32_bits(start);
@@ -163,7 +181,10 @@ static int send_tlb_inval_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno,
xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN);
- return send_tlb_inval(guc, action, len);
+ err = send_tlb_inval(guc, action, len);
+ if (!err && prl_sa)
+ err = send_page_reclaim(guc, seqno, xe_sa_bo_gpu_addr(prl_sa));
+ return err;
}
static bool tlb_inval_initialized(struct xe_tlb_inval *tlb_inval)
diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c
index 0a70c8924582..4212162913af 100644
--- a/drivers/gpu/drm/xe/xe_huc.c
+++ b/drivers/gpu/drm/xe/xe_huc.c
@@ -300,19 +300,16 @@ void xe_huc_sanitize(struct xe_huc *huc)
void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p)
{
struct xe_gt *gt = huc_to_gt(huc);
- unsigned int fw_ref;
xe_uc_fw_print(&huc->fw, p);
if (!xe_uc_fw_is_enabled(&huc->fw))
return;
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (!fw_ref)
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref.domains)
return;
drm_printf(p, "\nHuC status: 0x%08x\n",
xe_mmio_read32(&gt->mmio, HUC_KERNEL_LOAD_INFO));
-
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
diff --git a/drivers/gpu/drm/xe/xe_huc_debugfs.c b/drivers/gpu/drm/xe/xe_huc_debugfs.c
index 3a888a40188b..df9c4d79b710 100644
--- a/drivers/gpu/drm/xe/xe_huc_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_huc_debugfs.c
@@ -37,9 +37,8 @@ static int huc_info(struct seq_file *m, void *data)
struct xe_device *xe = huc_to_xe(huc);
struct drm_printer p = drm_seq_file_printer(m);
- xe_pm_runtime_get(xe);
+ guard(xe_pm_runtime)(xe);
xe_huc_print_info(huc, &p);
- xe_pm_runtime_put(xe);
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
index 640950172088..cb45cdceef67 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
@@ -43,16 +43,14 @@ static ssize_t xe_hw_engine_class_sysfs_attr_show(struct kobject *kobj,
{
struct xe_device *xe = kobj_to_xe(kobj);
struct kobj_attribute *kattr;
- ssize_t ret = -EIO;
kattr = container_of(attr, struct kobj_attribute, attr);
if (kattr->show) {
- xe_pm_runtime_get(xe);
- ret = kattr->show(kobj, kattr, buf);
- xe_pm_runtime_put(xe);
+ guard(xe_pm_runtime)(xe);
+ return kattr->show(kobj, kattr, buf);
}
- return ret;
+ return -EIO;
}
static ssize_t xe_hw_engine_class_sysfs_attr_store(struct kobject *kobj,
@@ -62,16 +60,14 @@ static ssize_t xe_hw_engine_class_sysfs_attr_store(struct kobject *kobj,
{
struct xe_device *xe = kobj_to_xe(kobj);
struct kobj_attribute *kattr;
- ssize_t ret = -EIO;
kattr = container_of(attr, struct kobj_attribute, attr);
if (kattr->store) {
- xe_pm_runtime_get(xe);
- ret = kattr->store(kobj, kattr, buf, count);
- xe_pm_runtime_put(xe);
+ guard(xe_pm_runtime)(xe);
+ return kattr->store(kobj, kattr, buf, count);
}
- return ret;
+ return -EIO;
}
static const struct sysfs_ops xe_hw_engine_class_sysfs_ops = {
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c
index fa4db5f23342..f69a32c27458 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_group.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c
@@ -9,7 +9,9 @@
#include "xe_device.h"
#include "xe_exec_queue.h"
#include "xe_gt.h"
+#include "xe_gt_stats.h"
#include "xe_hw_engine_group.h"
+#include "xe_sync.h"
#include "xe_vm.h"
static void
@@ -20,7 +22,8 @@ hw_engine_group_resume_lr_jobs_func(struct work_struct *w)
int err;
enum xe_hw_engine_group_execution_mode previous_mode;
- err = xe_hw_engine_group_get_mode(group, EXEC_MODE_LR, &previous_mode);
+ err = xe_hw_engine_group_get_mode(group, EXEC_MODE_LR, &previous_mode,
+ NULL, 0);
if (err)
return;
@@ -188,23 +191,39 @@ void xe_hw_engine_group_resume_faulting_lr_jobs(struct xe_hw_engine_group *group
/**
* xe_hw_engine_group_suspend_faulting_lr_jobs() - Suspend the faulting LR jobs of this group
* @group: The hw engine group
+ * @has_deps: dma-fence job triggering suspend has dependencies
*
* Return: 0 on success, negative error code on error.
*/
-static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group *group)
+static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group *group,
+ bool has_deps)
{
int err;
struct xe_exec_queue *q;
+ struct xe_gt *gt = NULL;
bool need_resume = false;
+ ktime_t start = xe_gt_stats_ktime_get();
lockdep_assert_held_write(&group->mode_sem);
list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) {
+ bool idle_skip_suspend;
+
if (!xe_vm_in_fault_mode(q->vm))
continue;
- need_resume = true;
+ idle_skip_suspend = xe_exec_queue_idle_skip_suspend(q);
+ if (!idle_skip_suspend && has_deps)
+ return -EAGAIN;
+
+ xe_gt_stats_incr(q->gt, XE_GT_STATS_ID_HW_ENGINE_GROUP_SUSPEND_LR_QUEUE_COUNT, 1);
+ if (idle_skip_suspend)
+ xe_gt_stats_incr(q->gt,
+ XE_GT_STATS_ID_HW_ENGINE_GROUP_SKIP_LR_QUEUE_COUNT, 1);
+
+ need_resume |= !idle_skip_suspend;
q->ops->suspend(q);
+ gt = q->gt;
}
list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) {
@@ -216,6 +235,12 @@ static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group
return err;
}
+ if (gt) {
+ xe_gt_stats_incr(gt,
+ XE_GT_STATS_ID_HW_ENGINE_GROUP_SUSPEND_LR_QUEUE_US,
+ xe_gt_stats_ktime_us_delta(start));
+ }
+
if (need_resume)
xe_hw_engine_group_resume_faulting_lr_jobs(group);
@@ -236,7 +261,9 @@ static int xe_hw_engine_group_wait_for_dma_fence_jobs(struct xe_hw_engine_group
{
long timeout;
struct xe_exec_queue *q;
+ struct xe_gt *gt = NULL;
struct dma_fence *fence;
+ ktime_t start = xe_gt_stats_ktime_get();
lockdep_assert_held_write(&group->mode_sem);
@@ -244,18 +271,26 @@ static int xe_hw_engine_group_wait_for_dma_fence_jobs(struct xe_hw_engine_group
if (xe_vm_in_lr_mode(q->vm))
continue;
+ xe_gt_stats_incr(q->gt, XE_GT_STATS_ID_HW_ENGINE_GROUP_WAIT_DMA_QUEUE_COUNT, 1);
fence = xe_exec_queue_last_fence_get_for_resume(q, q->vm);
timeout = dma_fence_wait(fence, false);
dma_fence_put(fence);
+ gt = q->gt;
if (timeout < 0)
return -ETIME;
}
+ if (gt) {
+ xe_gt_stats_incr(gt,
+ XE_GT_STATS_ID_HW_ENGINE_GROUP_WAIT_DMA_QUEUE_US,
+ xe_gt_stats_ktime_us_delta(start));
+ }
+
return 0;
}
-static int switch_mode(struct xe_hw_engine_group *group)
+static int switch_mode(struct xe_hw_engine_group *group, bool has_deps)
{
int err = 0;
enum xe_hw_engine_group_execution_mode new_mode;
@@ -265,7 +300,8 @@ static int switch_mode(struct xe_hw_engine_group *group)
switch (group->cur_mode) {
case EXEC_MODE_LR:
new_mode = EXEC_MODE_DMA_FENCE;
- err = xe_hw_engine_group_suspend_faulting_lr_jobs(group);
+ err = xe_hw_engine_group_suspend_faulting_lr_jobs(group,
+ has_deps);
break;
case EXEC_MODE_DMA_FENCE:
new_mode = EXEC_MODE_LR;
@@ -281,19 +317,36 @@ static int switch_mode(struct xe_hw_engine_group *group)
return 0;
}
+static int wait_syncs(struct xe_sync_entry *syncs, int num_syncs)
+{
+ int err, i;
+
+ for (i = 0; i < num_syncs; ++i) {
+ err = xe_sync_entry_wait(syncs + i);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
/**
* xe_hw_engine_group_get_mode() - Get the group to execute in the new mode
* @group: The hw engine group
* @new_mode: The new execution mode
* @previous_mode: Pointer to the previous mode provided for use by caller
+ * @syncs: Syncs from exec IOCTL
+ * @num_syncs: Number of syncs from exec IOCTL
*
* Return: 0 if successful, -EINTR if locking failed.
*/
int xe_hw_engine_group_get_mode(struct xe_hw_engine_group *group,
enum xe_hw_engine_group_execution_mode new_mode,
- enum xe_hw_engine_group_execution_mode *previous_mode)
+ enum xe_hw_engine_group_execution_mode *previous_mode,
+ struct xe_sync_entry *syncs, int num_syncs)
__acquires(&group->mode_sem)
{
+ bool has_deps = !!num_syncs;
int err = down_read_interruptible(&group->mode_sem);
if (err)
@@ -303,15 +356,25 @@ __acquires(&group->mode_sem)
if (new_mode != group->cur_mode) {
up_read(&group->mode_sem);
+retry:
err = down_write_killable(&group->mode_sem);
if (err)
return err;
if (new_mode != group->cur_mode) {
- err = switch_mode(group);
+ err = switch_mode(group, has_deps);
if (err) {
up_write(&group->mode_sem);
- return err;
+
+ if (err != -EAGAIN)
+ return err;
+
+ err = wait_syncs(syncs, num_syncs);
+ if (err)
+ return err;
+
+ has_deps = false;
+ goto retry;
}
}
downgrade_write(&group->mode_sem);
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.h b/drivers/gpu/drm/xe/xe_hw_engine_group.h
index 797ee81acbf2..8b17ccd30b70 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_group.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine_group.h
@@ -11,6 +11,7 @@
struct drm_device;
struct xe_exec_queue;
struct xe_gt;
+struct xe_sync_entry;
int xe_hw_engine_setup_groups(struct xe_gt *gt);
@@ -19,7 +20,8 @@ void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct
int xe_hw_engine_group_get_mode(struct xe_hw_engine_group *group,
enum xe_hw_engine_group_execution_mode new_mode,
- enum xe_hw_engine_group_execution_mode *previous_mode);
+ enum xe_hw_engine_group_execution_mode *previous_mode,
+ struct xe_sync_entry *syncs, int num_syncs);
void xe_hw_engine_group_put(struct xe_hw_engine_group *group);
enum xe_hw_engine_group_execution_mode
diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
index 97879daeefc1..ff2aea52ef75 100644
--- a/drivers/gpu/drm/xe/xe_hwmon.c
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -502,7 +502,7 @@ xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *at
int ret = 0;
- xe_pm_runtime_get(hwmon->xe);
+ guard(xe_pm_runtime)(hwmon->xe);
mutex_lock(&hwmon->hwmon_lock);
@@ -521,8 +521,6 @@ xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *at
mutex_unlock(&hwmon->hwmon_lock);
- xe_pm_runtime_put(hwmon->xe);
-
x = REG_FIELD_GET(PWR_LIM_TIME_X, reg_val);
y = REG_FIELD_GET(PWR_LIM_TIME_Y, reg_val);
@@ -604,7 +602,7 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a
rxy = REG_FIELD_PREP(PWR_LIM_TIME_X, x) |
REG_FIELD_PREP(PWR_LIM_TIME_Y, y);
- xe_pm_runtime_get(hwmon->xe);
+ guard(xe_pm_runtime)(hwmon->xe);
mutex_lock(&hwmon->hwmon_lock);
@@ -616,8 +614,6 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a
mutex_unlock(&hwmon->hwmon_lock);
- xe_pm_runtime_put(hwmon->xe);
-
return count;
}
@@ -1124,37 +1120,25 @@ xe_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
int channel, long *val)
{
struct xe_hwmon *hwmon = dev_get_drvdata(dev);
- int ret;
- xe_pm_runtime_get(hwmon->xe);
+ guard(xe_pm_runtime)(hwmon->xe);
switch (type) {
case hwmon_temp:
- ret = xe_hwmon_temp_read(hwmon, attr, channel, val);
- break;
+ return xe_hwmon_temp_read(hwmon, attr, channel, val);
case hwmon_power:
- ret = xe_hwmon_power_read(hwmon, attr, channel, val);
- break;
+ return xe_hwmon_power_read(hwmon, attr, channel, val);
case hwmon_curr:
- ret = xe_hwmon_curr_read(hwmon, attr, channel, val);
- break;
+ return xe_hwmon_curr_read(hwmon, attr, channel, val);
case hwmon_in:
- ret = xe_hwmon_in_read(hwmon, attr, channel, val);
- break;
+ return xe_hwmon_in_read(hwmon, attr, channel, val);
case hwmon_energy:
- ret = xe_hwmon_energy_read(hwmon, attr, channel, val);
- break;
+ return xe_hwmon_energy_read(hwmon, attr, channel, val);
case hwmon_fan:
- ret = xe_hwmon_fan_read(hwmon, attr, channel, val);
- break;
+ return xe_hwmon_fan_read(hwmon, attr, channel, val);
default:
- ret = -EOPNOTSUPP;
- break;
+ return -EOPNOTSUPP;
}
-
- xe_pm_runtime_put(hwmon->xe);
-
- return ret;
}
static int
@@ -1162,25 +1146,17 @@ xe_hwmon_write(struct device *dev, enum hwmon_sensor_types type, u32 attr,
int channel, long val)
{
struct xe_hwmon *hwmon = dev_get_drvdata(dev);
- int ret;
- xe_pm_runtime_get(hwmon->xe);
+ guard(xe_pm_runtime)(hwmon->xe);
switch (type) {
case hwmon_power:
- ret = xe_hwmon_power_write(hwmon, attr, channel, val);
- break;
+ return xe_hwmon_power_write(hwmon, attr, channel, val);
case hwmon_curr:
- ret = xe_hwmon_curr_write(hwmon, attr, channel, val);
- break;
+ return xe_hwmon_curr_write(hwmon, attr, channel, val);
default:
- ret = -EOPNOTSUPP;
- break;
+ return -EOPNOTSUPP;
}
-
- xe_pm_runtime_put(hwmon->xe);
-
- return ret;
}
static int xe_hwmon_read_label(struct device *dev,
diff --git a/drivers/gpu/drm/xe/xe_i2c.c b/drivers/gpu/drm/xe/xe_i2c.c
index 0b5452be0c87..8eccbae05705 100644
--- a/drivers/gpu/drm/xe/xe_i2c.c
+++ b/drivers/gpu/drm/xe/xe_i2c.c
@@ -319,7 +319,7 @@ int xe_i2c_probe(struct xe_device *xe)
struct xe_i2c *i2c;
int ret;
- if (xe->info.platform != XE_BATTLEMAGE)
+ if (!xe->info.has_i2c)
return 0;
if (IS_SRIOV_VF(xe))
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index 024e13e606ec..baf5d2c6e802 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -21,6 +21,7 @@
#include "xe_hw_error.h"
#include "xe_i2c.h"
#include "xe_memirq.h"
+#include "xe_mert.h"
#include "xe_mmio.h"
#include "xe_pxp.h"
#include "xe_sriov.h"
@@ -525,6 +526,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
xe_heci_csc_irq_handler(xe, master_ctl);
xe_display_irq_handler(xe, master_ctl);
xe_i2c_irq_handler(xe, master_ctl);
+ xe_mert_irq_handler(xe, master_ctl);
gu_misc_iir = gu_misc_irq_ack(xe, master_ctl);
}
}
diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c
index 4dc1de482eee..3059ea6525bc 100644
--- a/drivers/gpu/drm/xe/xe_lmtt.c
+++ b/drivers/gpu/drm/xe/xe_lmtt.c
@@ -8,15 +8,18 @@
#include <drm/drm_managed.h>
#include "regs/xe_gt_regs.h"
+#include "regs/xe_mert_regs.h"
#include "xe_assert.h"
#include "xe_bo.h"
#include "xe_tlb_inval.h"
#include "xe_lmtt.h"
#include "xe_map.h"
+#include "xe_mert.h"
#include "xe_mmio.h"
#include "xe_res_cursor.h"
#include "xe_sriov.h"
+#include "xe_tile.h"
#include "xe_tile_sriov_printk.h"
/**
@@ -196,16 +199,22 @@ static void lmtt_setup_dir_ptr(struct xe_lmtt *lmtt)
struct xe_device *xe = tile_to_xe(tile);
dma_addr_t offset = xe_bo_main_addr(lmtt->pd->bo, XE_PAGE_SIZE);
struct xe_gt *gt;
+ u32 config;
u8 id;
lmtt_debug(lmtt, "DIR offset %pad\n", &offset);
lmtt_assert(lmtt, xe_bo_is_vram(lmtt->pd->bo));
lmtt_assert(lmtt, IS_ALIGNED(offset, SZ_64K));
+ config = LMEM_EN | REG_FIELD_PREP(LMTT_DIR_PTR, offset / SZ_64K);
+
for_each_gt_on_tile(gt, tile, id)
xe_mmio_write32(&gt->mmio,
GRAPHICS_VER(xe) >= 20 ? XE2_LMEM_CFG : LMEM_CFG,
- LMEM_EN | REG_FIELD_PREP(LMTT_DIR_PTR, offset / SZ_64K));
+ config);
+
+ if (xe_device_has_mert(xe) && xe_tile_is_root(tile))
+ xe_mmio_write32(&tile->mmio, MERT_LMEM_CFG, config);
}
/**
@@ -262,19 +271,29 @@ static int lmtt_invalidate_hw(struct xe_lmtt *lmtt)
* @lmtt: the &xe_lmtt to invalidate
*
* Send requests to all GuCs on this tile to invalidate all TLBs.
+ * If the platform has a standalone MERT, also invalidate MERT's TLB.
*
* This function should be called only when running as a PF driver.
*/
void xe_lmtt_invalidate_hw(struct xe_lmtt *lmtt)
{
+ struct xe_tile *tile = lmtt_to_tile(lmtt);
+ struct xe_device *xe = lmtt_to_xe(lmtt);
int err;
- lmtt_assert(lmtt, IS_SRIOV_PF(lmtt_to_xe(lmtt)));
+ lmtt_assert(lmtt, IS_SRIOV_PF(xe));
err = lmtt_invalidate_hw(lmtt);
if (err)
- xe_tile_sriov_err(lmtt_to_tile(lmtt), "LMTT invalidation failed (%pe)",
+ xe_tile_sriov_err(tile, "LMTT invalidation failed (%pe)",
ERR_PTR(err));
+
+ if (xe_device_has_mert(xe) && xe_tile_is_root(tile)) {
+ err = xe_mert_invalidate_lmtt(tile);
+ if (err)
+ xe_tile_sriov_err(tile, "MERT LMTT invalidation failed (%pe)",
+ ERR_PTR(err));
+ }
}
static void lmtt_write_pte(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pt,
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index b5083c99dd50..70eae7d03a27 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -44,6 +44,11 @@
#define LRC_INDIRECT_CTX_BO_SIZE SZ_4K
#define LRC_INDIRECT_RING_STATE_SIZE SZ_4K
+#define LRC_PRIORITY GENMASK_ULL(10, 9)
+#define LRC_PRIORITY_LOW 0
+#define LRC_PRIORITY_NORMAL 1
+#define LRC_PRIORITY_HIGH 2
+
/*
* Layout of the LRC and associated data allocated as
* lrc->bo:
@@ -91,13 +96,19 @@ gt_engine_needs_indirect_ctx(struct xe_gt *gt, enum xe_engine_class class)
return false;
}
-size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class)
+/**
+ * xe_gt_lrc_hang_replay_size() - Hang replay size
+ * @gt: The GT
+ * @class: Hardware engine class
+ *
+ * Determine size of GPU hang replay state for a GT and hardware engine class.
+ *
+ * Return: Size of GPU hang replay size
+ */
+size_t xe_gt_lrc_hang_replay_size(struct xe_gt *gt, enum xe_engine_class class)
{
struct xe_device *xe = gt_to_xe(gt);
- size_t size;
-
- /* Per-process HW status page (PPHWSP) */
- size = LRC_PPHWSP_SIZE;
+ size_t size = 0;
/* Engine context image */
switch (class) {
@@ -123,11 +134,18 @@ size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class)
size += 1 * SZ_4K;
}
+ return size;
+}
+
+size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class)
+{
+ size_t size = xe_gt_lrc_hang_replay_size(gt, class);
+
/* Add indirect ring state page */
if (xe_gt_has_indirect_ring_state(gt))
size += LRC_INDIRECT_RING_STATE_SIZE;
- return size;
+ return size + LRC_PPHWSP_SIZE;
}
/*
@@ -1386,8 +1404,33 @@ setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
return 0;
}
+static u8 xe_multi_queue_prio_to_lrc(struct xe_lrc *lrc, enum xe_multi_queue_priority priority)
+{
+ struct xe_device *xe = gt_to_xe(lrc->gt);
+
+ xe_assert(xe, (priority >= XE_MULTI_QUEUE_PRIORITY_LOW &&
+ priority <= XE_MULTI_QUEUE_PRIORITY_HIGH));
+
+ /* xe_multi_queue_priority is directly mapped to LRC priority values */
+ return priority;
+}
+
+/**
+ * xe_lrc_set_multi_queue_priority() - Set multi queue priority in LRC
+ * @lrc: Logical Ring Context
+ * @priority: Multi queue priority of the exec queue
+ *
+ * Convert @priority to LRC multi queue priority and update the @lrc descriptor
+ */
+void xe_lrc_set_multi_queue_priority(struct xe_lrc *lrc, enum xe_multi_queue_priority priority)
+{
+ lrc->desc &= ~LRC_PRIORITY;
+ lrc->desc |= FIELD_PREP(LRC_PRIORITY, xe_multi_queue_prio_to_lrc(lrc, priority));
+}
+
static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
- struct xe_vm *vm, u32 ring_size, u16 msix_vec,
+ struct xe_vm *vm, void *replay_state, u32 ring_size,
+ u16 msix_vec,
u32 init_flags)
{
struct xe_gt *gt = hwe->gt;
@@ -1402,6 +1445,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
kref_init(&lrc->refcount);
lrc->gt = gt;
+ lrc->replay_size = xe_gt_lrc_hang_replay_size(gt, hwe->class);
lrc->size = lrc_size;
lrc->flags = 0;
lrc->ring.size = ring_size;
@@ -1438,11 +1482,14 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
* scratch.
*/
map = __xe_lrc_pphwsp_map(lrc);
- if (gt->default_lrc[hwe->class]) {
+ if (gt->default_lrc[hwe->class] || replay_state) {
xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */
xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE,
lrc_size - LRC_PPHWSP_SIZE);
+ if (replay_state)
+ xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
+ replay_state, lrc->replay_size);
} else {
void *init_data = empty_lrc_data(hwe);
@@ -1550,6 +1597,7 @@ err_lrc_finish:
* xe_lrc_create - Create a LRC
* @hwe: Hardware Engine
* @vm: The VM (address space)
+ * @replay_state: GPU hang replay state
* @ring_size: LRC ring size
* @msix_vec: MSI-X interrupt vector (for platforms that support it)
* @flags: LRC initialization flags
@@ -1560,7 +1608,7 @@ err_lrc_finish:
* upon failure.
*/
struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
- u32 ring_size, u16 msix_vec, u32 flags)
+ void *replay_state, u32 ring_size, u16 msix_vec, u32 flags)
{
struct xe_lrc *lrc;
int err;
@@ -1569,7 +1617,7 @@ struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
if (!lrc)
return ERR_PTR(-ENOMEM);
- err = xe_lrc_init(lrc, hwe, vm, ring_size, msix_vec, flags);
+ err = xe_lrc_init(lrc, hwe, vm, replay_state, ring_size, msix_vec, flags);
if (err) {
kfree(lrc);
return ERR_PTR(err);
@@ -2235,6 +2283,8 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
snapshot->lrc_bo = xe_bo_get(lrc->bo);
snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc);
snapshot->lrc_size = lrc->size;
+ snapshot->replay_offset = 0;
+ snapshot->replay_size = lrc->replay_size;
snapshot->lrc_snapshot = NULL;
snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc));
snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc);
@@ -2305,6 +2355,9 @@ void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer
}
drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE);
+ drm_printf(p, "\n\t[HWCTX].replay_offset: 0x%lx\n", snapshot->replay_offset);
+ drm_printf(p, "\n\t[HWCTX].replay_length: 0x%lx\n", snapshot->replay_size);
+
drm_puts(p, "\t[HWCTX].data: ");
for (; i < snapshot->lrc_size; i += sizeof(u32)) {
u32 *val = snapshot->lrc_snapshot + i;
diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
index 2fb628da5c43..8acf85273c1a 100644
--- a/drivers/gpu/drm/xe/xe_lrc.h
+++ b/drivers/gpu/drm/xe/xe_lrc.h
@@ -13,6 +13,7 @@ struct drm_printer;
struct xe_bb;
struct xe_device;
struct xe_exec_queue;
+enum xe_multi_queue_priority;
enum xe_engine_class;
struct xe_gt;
struct xe_hw_engine;
@@ -23,6 +24,7 @@ struct xe_lrc_snapshot {
struct xe_bo *lrc_bo;
void *lrc_snapshot;
unsigned long lrc_size, lrc_offset;
+ unsigned long replay_size, replay_offset;
u32 context_desc;
u32 ring_addr;
@@ -49,7 +51,7 @@ struct xe_lrc_snapshot {
#define XE_LRC_CREATE_USER_CTX BIT(2)
struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
- u32 ring_size, u16 msix_vec, u32 flags);
+ void *replay_state, u32 ring_size, u16 msix_vec, u32 flags);
void xe_lrc_destroy(struct kref *ref);
/**
@@ -86,6 +88,7 @@ static inline size_t xe_lrc_ring_size(void)
return SZ_16K;
}
+size_t xe_gt_lrc_hang_replay_size(struct xe_gt *gt, enum xe_engine_class class);
size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class);
u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc);
u32 xe_lrc_regs_offset(struct xe_lrc *lrc);
@@ -133,6 +136,8 @@ void xe_lrc_dump_default(struct drm_printer *p,
u32 *xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, u32 *cs);
+void xe_lrc_set_multi_queue_priority(struct xe_lrc *lrc, enum xe_multi_queue_priority priority);
+
struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc);
void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot);
void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p);
diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h
index e9883706e004..a4373d280c39 100644
--- a/drivers/gpu/drm/xe/xe_lrc_types.h
+++ b/drivers/gpu/drm/xe/xe_lrc_types.h
@@ -25,6 +25,9 @@ struct xe_lrc {
/** @size: size of the lrc and optional indirect ring state */
u32 size;
+ /** @replay_size: Size LRC needed for replaying a hang */
+ u32 replay_size;
+
/** @gt: gt which this LRC belongs to */
struct xe_gt *gt;
diff --git a/drivers/gpu/drm/xe/xe_mert.c b/drivers/gpu/drm/xe/xe_mert.c
new file mode 100644
index 000000000000..f7689e922953
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_mert.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright(c) 2025, Intel Corporation. All rights reserved.
+ */
+
+#include "regs/xe_irq_regs.h"
+#include "regs/xe_mert_regs.h"
+
+#include "xe_device.h"
+#include "xe_mert.h"
+#include "xe_mmio.h"
+#include "xe_tile.h"
+
+/**
+ * xe_mert_invalidate_lmtt - Invalidate MERT LMTT
+ * @tile: the &xe_tile
+ *
+ * Trigger invalidation of the MERT LMTT and wait for completion.
+ *
+ * Return: 0 on success or -ETIMEDOUT in case of a timeout.
+ */
+int xe_mert_invalidate_lmtt(struct xe_tile *tile)
+{
+ struct xe_device *xe = tile_to_xe(tile);
+ struct xe_mert *mert = &tile->mert;
+ const long timeout = HZ / 4;
+ unsigned long flags;
+
+ xe_assert(xe, xe_device_has_mert(xe));
+ xe_assert(xe, xe_tile_is_root(tile));
+
+ spin_lock_irqsave(&mert->lock, flags);
+ if (!mert->tlb_inv_triggered) {
+ mert->tlb_inv_triggered = true;
+ reinit_completion(&mert->tlb_inv_done);
+ xe_mmio_write32(&tile->mmio, MERT_TLB_INV_DESC_A, MERT_TLB_INV_DESC_A_VALID);
+ }
+ spin_unlock_irqrestore(&mert->lock, flags);
+
+ if (!wait_for_completion_timeout(&mert->tlb_inv_done, timeout))
+ return -ETIMEDOUT;
+
+ return 0;
+}
+
+/**
+ * xe_mert_irq_handler - Handler for MERT interrupts
+ * @xe: the &xe_device
+ * @master_ctl: interrupt register
+ *
+ * Handle interrupts generated by MERT.
+ */
+void xe_mert_irq_handler(struct xe_device *xe, u32 master_ctl)
+{
+ struct xe_tile *tile = xe_device_get_root_tile(xe);
+ unsigned long flags;
+ u32 reg_val;
+ u8 err;
+
+ if (!(master_ctl & SOC_H2DMEMINT_IRQ))
+ return;
+
+ reg_val = xe_mmio_read32(&tile->mmio, MERT_TLB_CT_INTR_ERR_ID_PORT);
+ xe_mmio_write32(&tile->mmio, MERT_TLB_CT_INTR_ERR_ID_PORT, 0);
+
+ err = FIELD_GET(MERT_TLB_CT_ERROR_MASK, reg_val);
+ if (err == MERT_TLB_CT_LMTT_FAULT)
+ drm_dbg(&xe->drm, "MERT catastrophic error: LMTT fault (VF%u)\n",
+ FIELD_GET(MERT_TLB_CT_VFID_MASK, reg_val));
+ else if (err)
+ drm_dbg(&xe->drm, "MERT catastrophic error: Unexpected fault (0x%x)\n", err);
+
+ spin_lock_irqsave(&tile->mert.lock, flags);
+ if (tile->mert.tlb_inv_triggered) {
+ reg_val = xe_mmio_read32(&tile->mmio, MERT_TLB_INV_DESC_A);
+ if (!(reg_val & MERT_TLB_INV_DESC_A_VALID)) {
+ tile->mert.tlb_inv_triggered = false;
+ complete_all(&tile->mert.tlb_inv_done);
+ }
+ }
+ spin_unlock_irqrestore(&tile->mert.lock, flags);
+}
diff --git a/drivers/gpu/drm/xe/xe_mert.h b/drivers/gpu/drm/xe/xe_mert.h
new file mode 100644
index 000000000000..2e14c5dec008
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_mert.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright(c) 2025, Intel Corporation. All rights reserved.
+ */
+
+#ifndef __XE_MERT_H__
+#define __XE_MERT_H__
+
+#include <linux/completion.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+struct xe_device;
+struct xe_tile;
+
+struct xe_mert {
+ /** @lock: protects the TLB invalidation status */
+ spinlock_t lock;
+ /** @tlb_inv_triggered: indicates if TLB invalidation was triggered */
+ bool tlb_inv_triggered;
+ /** @mert.tlb_inv_done: completion of TLB invalidation */
+ struct completion tlb_inv_done;
+};
+
+#ifdef CONFIG_PCI_IOV
+int xe_mert_invalidate_lmtt(struct xe_tile *tile);
+void xe_mert_irq_handler(struct xe_device *xe, u32 master_ctl);
+#else
+static inline void xe_mert_irq_handler(struct xe_device *xe, u32 master_ctl) { }
+#endif
+
+#endif /* __XE_MERT_H__ */
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 2184af413b91..f3b66b55acfb 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -34,6 +34,7 @@
#include "xe_res_cursor.h"
#include "xe_sa.h"
#include "xe_sched_job.h"
+#include "xe_sriov_vf_ccs.h"
#include "xe_sync.h"
#include "xe_trace_bo.h"
#include "xe_validation.h"
@@ -1103,12 +1104,16 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
u32 batch_size, batch_size_allocated;
struct xe_device *xe = gt_to_xe(gt);
struct xe_res_cursor src_it, ccs_it;
+ struct xe_sriov_vf_ccs_ctx *ctx;
+ struct xe_sa_manager *bb_pool;
u64 size = xe_bo_size(src_bo);
struct xe_bb *bb = NULL;
u64 src_L0, src_L0_ofs;
u32 src_L0_pt;
int err;
+ ctx = &xe->sriov.vf.ccs.contexts[read_write];
+
xe_res_first_sg(xe_bo_sg(src_bo), 0, size, &src_it);
xe_res_first_sg(xe_bo_sg(src_bo), xe_bo_ccs_pages_start(src_bo),
@@ -1141,11 +1146,15 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
size -= src_L0;
}
+ bb_pool = ctx->mem.ccs_bb_pool;
+ guard(mutex) (xe_sa_bo_swap_guard(bb_pool));
+ xe_sa_bo_swap_shadow(bb_pool);
+
bb = xe_bb_ccs_new(gt, batch_size, read_write);
if (IS_ERR(bb)) {
drm_err(&xe->drm, "BB allocation failed.\n");
err = PTR_ERR(bb);
- goto err_ret;
+ return err;
}
batch_size_allocated = batch_size;
@@ -1194,10 +1203,52 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
xe_assert(xe, (batch_size_allocated == bb->len));
src_bo->bb_ccs[read_write] = bb;
+ xe_sriov_vf_ccs_rw_update_bb_addr(ctx);
+ xe_sa_bo_sync_shadow(bb->bo);
return 0;
+}
-err_ret:
- return err;
+/**
+ * xe_migrate_ccs_rw_copy_clear() - Clear the CCS read/write batch buffer
+ * content.
+ * @src_bo: The buffer object @src is currently bound to.
+ * @read_write : Creates BB commands for CCS read/write.
+ *
+ * Directly clearing the BB lacks atomicity and can lead to undefined
+ * behavior if the vCPU is halted mid-operation during the clearing
+ * process. To avoid this issue, we use a shadow buffer object approach.
+ *
+ * First swap the SA BO address with the shadow BO, perform the clearing
+ * operation on the BB, update the shadow BO in the ring buffer, then
+ * sync the shadow and the actual buffer to maintain consistency.
+ *
+ * Returns: None.
+ */
+void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo,
+ enum xe_sriov_vf_ccs_rw_ctxs read_write)
+{
+ struct xe_bb *bb = src_bo->bb_ccs[read_write];
+ struct xe_device *xe = xe_bo_device(src_bo);
+ struct xe_sriov_vf_ccs_ctx *ctx;
+ struct xe_sa_manager *bb_pool;
+ u32 *cs;
+
+ xe_assert(xe, IS_SRIOV_VF(xe));
+
+ ctx = &xe->sriov.vf.ccs.contexts[read_write];
+ bb_pool = ctx->mem.ccs_bb_pool;
+
+ guard(mutex) (xe_sa_bo_swap_guard(bb_pool));
+ xe_sa_bo_swap_shadow(bb_pool);
+
+ cs = xe_sa_bo_cpu_addr(bb->bo);
+ memset(cs, MI_NOOP, bb->len * sizeof(u32));
+ xe_sriov_vf_ccs_rw_update_bb_addr(ctx);
+
+ xe_sa_bo_sync_shadow(bb->bo);
+
+ xe_bb_free(bb, NULL);
+ src_bo->bb_ccs[read_write] = NULL;
}
/**
diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
index 260e298e5dd7..464c05dde1ba 100644
--- a/drivers/gpu/drm/xe/xe_migrate.h
+++ b/drivers/gpu/drm/xe/xe_migrate.h
@@ -134,6 +134,9 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
struct xe_bo *src_bo,
enum xe_sriov_vf_ccs_rw_ctxs read_write);
+void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo,
+ enum xe_sriov_vf_ccs_rw_ctxs read_write);
+
struct xe_lrc *xe_migrate_lrc(struct xe_migrate *migrate);
struct xe_exec_queue *xe_migrate_exec_queue(struct xe_migrate *migrate);
struct dma_fence *xe_migrate_vram_copy_chunk(struct xe_bo *vram_bo, u64 vram_offset,
diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index 6613d3b48a84..0b7225bd77e0 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -811,26 +811,20 @@ int xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p)
struct xe_device *xe = gt_to_xe(gt);
enum xe_force_wake_domains domain;
struct xe_mocs_info table;
- unsigned int fw_ref, flags;
- int err = 0;
+ unsigned int flags;
flags = get_mocs_settings(xe, &table);
domain = flags & HAS_LNCF_MOCS ? XE_FORCEWAKE_ALL : XE_FW_GT;
- xe_pm_runtime_get_noresume(xe);
- fw_ref = xe_force_wake_get(gt_to_fw(gt), domain);
- if (!xe_force_wake_ref_has_domain(fw_ref, domain)) {
- err = -ETIMEDOUT;
- goto err_fw;
- }
+ guard(xe_pm_runtime_noresume)(xe);
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), domain);
+ if (!xe_force_wake_ref_has_domain(fw_ref.domains, domain))
+ return -ETIMEDOUT;
table.ops->dump(&table, flags, gt, p);
-err_fw:
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
- xe_pm_runtime_put(xe);
- return err;
+ return 0;
}
#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
diff --git a/drivers/gpu/drm/xe/xe_nvm.c b/drivers/gpu/drm/xe/xe_nvm.c
index 33f4ac82fc80..01510061d4d4 100644
--- a/drivers/gpu/drm/xe/xe_nvm.c
+++ b/drivers/gpu/drm/xe/xe_nvm.c
@@ -10,6 +10,7 @@
#include "xe_device_types.h"
#include "xe_mmio.h"
#include "xe_nvm.h"
+#include "xe_pcode_api.h"
#include "regs/xe_gsc_regs.h"
#include "xe_sriov.h"
@@ -45,39 +46,50 @@ static bool xe_nvm_non_posted_erase(struct xe_device *xe)
{
struct xe_mmio *mmio = xe_root_tile_mmio(xe);
- if (xe->info.platform != XE_BATTLEMAGE)
+ switch (xe->info.platform) {
+ case XE_CRESCENTISLAND:
+ case XE_BATTLEMAGE:
+ return !(xe_mmio_read32(mmio, XE_REG(GEN12_CNTL_PROTECTED_NVM_REG)) &
+ NVM_NON_POSTED_ERASE_CHICKEN_BIT);
+ default:
return false;
- return !(xe_mmio_read32(mmio, XE_REG(GEN12_CNTL_PROTECTED_NVM_REG)) &
- NVM_NON_POSTED_ERASE_CHICKEN_BIT);
+ }
}
static bool xe_nvm_writable_override(struct xe_device *xe)
{
struct xe_mmio *mmio = xe_root_tile_mmio(xe);
bool writable_override;
- resource_size_t base;
+ struct xe_reg reg;
+ u32 test_bit;
switch (xe->info.platform) {
+ case XE_CRESCENTISLAND:
+ reg = PCODE_SCRATCH(0);
+ test_bit = FDO_MODE;
+ break;
case XE_BATTLEMAGE:
- base = DG2_GSC_HECI2_BASE;
+ reg = HECI_FWSTS2(DG2_GSC_HECI2_BASE);
+ test_bit = HECI_FW_STATUS_2_NVM_ACCESS_MODE;
break;
case XE_PVC:
- base = PVC_GSC_HECI2_BASE;
+ reg = HECI_FWSTS2(PVC_GSC_HECI2_BASE);
+ test_bit = HECI_FW_STATUS_2_NVM_ACCESS_MODE;
break;
case XE_DG2:
- base = DG2_GSC_HECI2_BASE;
+ reg = HECI_FWSTS2(DG2_GSC_HECI2_BASE);
+ test_bit = HECI_FW_STATUS_2_NVM_ACCESS_MODE;
break;
case XE_DG1:
- base = DG1_GSC_HECI2_BASE;
+ reg = HECI_FWSTS2(DG1_GSC_HECI2_BASE);
+ test_bit = HECI_FW_STATUS_2_NVM_ACCESS_MODE;
break;
default:
drm_err(&xe->drm, "Unknown platform\n");
return true;
}
- writable_override =
- !(xe_mmio_read32(mmio, HECI_FWSTS2(base)) &
- HECI_FW_STATUS_2_NVM_ACCESS_MODE);
+ writable_override = !(xe_mmio_read32(mmio, reg) & test_bit);
if (writable_override)
drm_info(&xe->drm, "NVM access overridden by jumper\n");
return writable_override;
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index f8bb28ab8124..abf87fe0b345 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -1941,6 +1941,7 @@ static bool oa_unit_supports_oa_format(struct xe_oa_open_param *param, int type)
type == DRM_XE_OA_FMT_TYPE_OAC || type == DRM_XE_OA_FMT_TYPE_PEC;
case DRM_XE_OA_UNIT_TYPE_OAM:
case DRM_XE_OA_UNIT_TYPE_OAM_SAG:
+ case DRM_XE_OA_UNIT_TYPE_MERT:
return type == DRM_XE_OA_FMT_TYPE_OAM || type == DRM_XE_OA_FMT_TYPE_OAM_MPEC;
default:
return false;
@@ -1966,10 +1967,6 @@ static int xe_oa_assign_hwe(struct xe_oa *oa, struct xe_oa_open_param *param)
enum xe_hw_engine_id id;
int ret = 0;
- /* If not provided, OA unit defaults to OA unit 0 as per uapi */
- if (!param->oa_unit)
- param->oa_unit = &xe_root_mmio_gt(oa->xe)->oa.oa_unit[0];
-
/* When we have an exec_q, get hwe from the exec_q */
if (param->exec_q) {
param->hwe = xe_gt_hw_engine(param->exec_q->gt, param->exec_q->class,
@@ -2035,7 +2032,15 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f
if (ret)
return ret;
+ /* If not provided, OA unit defaults to OA unit 0 as per uapi */
+ if (!param.oa_unit)
+ param.oa_unit = &xe_root_mmio_gt(oa->xe)->oa.oa_unit[0];
+
if (param.exec_queue_id > 0) {
+ /* An exec_queue is only needed for OAR/OAC functionality on OAG */
+ if (XE_IOCTL_DBG(oa->xe, param.oa_unit->type != DRM_XE_OA_UNIT_TYPE_OAG))
+ return -EINVAL;
+
param.exec_q = xe_exec_queue_lookup(xef, param.exec_queue_id);
if (XE_IOCTL_DBG(oa->xe, !param.exec_q))
return -ENOENT;
@@ -2224,6 +2229,8 @@ static const struct xe_mmio_range xe2_oa_mux_regs[] = {
{ .start = 0xE18C, .end = 0xE18C }, /* SAMPLER_MODE */
{ .start = 0xE590, .end = 0xE590 }, /* TDL_LSC_LAT_MEASURE_TDL_GFX */
{ .start = 0x13000, .end = 0x137FC }, /* PES_0_PESL0 - PES_63_UPPER_PESL3 */
+ { .start = 0x145194, .end = 0x145194 }, /* SYS_MEM_LAT_MEASURE */
+ { .start = 0x145340, .end = 0x14537C }, /* MERTSS_PES_0 - MERTSS_PES_7 */
{},
};
@@ -2515,7 +2522,12 @@ int xe_oa_register(struct xe_device *xe)
static u32 num_oa_units_per_gt(struct xe_gt *gt)
{
if (xe_gt_is_main_type(gt) || GRAPHICS_VER(gt_to_xe(gt)) < 20)
- return 1;
+ /*
+ * Mert OA unit belongs to the SoC, not a gt, so should be accessed using
+ * xe_root_tile_mmio(). However, for all known platforms this is the same as
+ * accessing via xe_root_mmio_gt()->mmio.
+ */
+ return xe_device_has_mert(gt_to_xe(gt)) ? 2 : 1;
else if (!IS_DGFX(gt_to_xe(gt)))
return XE_OAM_UNIT_SCMI_0 + 1; /* SAG + SCMI_0 */
else
@@ -2570,40 +2582,57 @@ static u32 __hwe_oa_unit(struct xe_hw_engine *hwe)
static struct xe_oa_regs __oam_regs(u32 base)
{
return (struct xe_oa_regs) {
- base,
- OAM_HEAD_POINTER(base),
- OAM_TAIL_POINTER(base),
- OAM_BUFFER(base),
- OAM_CONTEXT_CONTROL(base),
- OAM_CONTROL(base),
- OAM_DEBUG(base),
- OAM_STATUS(base),
- OAM_CONTROL_COUNTER_SEL_MASK,
+ .base = base,
+ .oa_head_ptr = OAM_HEAD_POINTER(base),
+ .oa_tail_ptr = OAM_TAIL_POINTER(base),
+ .oa_buffer = OAM_BUFFER(base),
+ .oa_ctx_ctrl = OAM_CONTEXT_CONTROL(base),
+ .oa_ctrl = OAM_CONTROL(base),
+ .oa_debug = OAM_DEBUG(base),
+ .oa_status = OAM_STATUS(base),
+ .oa_mmio_trg = OAM_MMIO_TRG(base),
+ .oa_ctrl_counter_select_mask = OAM_CONTROL_COUNTER_SEL_MASK,
};
}
static struct xe_oa_regs __oag_regs(void)
{
return (struct xe_oa_regs) {
- 0,
- OAG_OAHEADPTR,
- OAG_OATAILPTR,
- OAG_OABUFFER,
- OAG_OAGLBCTXCTRL,
- OAG_OACONTROL,
- OAG_OA_DEBUG,
- OAG_OASTATUS,
- OAG_OACONTROL_OA_COUNTER_SEL_MASK,
+ .base = 0,
+ .oa_head_ptr = OAG_OAHEADPTR,
+ .oa_tail_ptr = OAG_OATAILPTR,
+ .oa_buffer = OAG_OABUFFER,
+ .oa_ctx_ctrl = OAG_OAGLBCTXCTRL,
+ .oa_ctrl = OAG_OACONTROL,
+ .oa_debug = OAG_OA_DEBUG,
+ .oa_status = OAG_OASTATUS,
+ .oa_mmio_trg = OAG_MMIOTRIGGER,
+ .oa_ctrl_counter_select_mask = OAG_OACONTROL_OA_COUNTER_SEL_MASK,
+ };
+}
+
+static struct xe_oa_regs __oamert_regs(void)
+{
+ return (struct xe_oa_regs) {
+ .base = 0,
+ .oa_head_ptr = OAMERT_HEAD_POINTER,
+ .oa_tail_ptr = OAMERT_TAIL_POINTER,
+ .oa_buffer = OAMERT_BUFFER,
+ .oa_ctx_ctrl = OAMERT_CONTEXT_CONTROL,
+ .oa_ctrl = OAMERT_CONTROL,
+ .oa_debug = OAMERT_DEBUG,
+ .oa_status = OAMERT_STATUS,
+ .oa_mmio_trg = OAMERT_MMIO_TRG,
+ .oa_ctrl_counter_select_mask = OAM_CONTROL_COUNTER_SEL_MASK,
};
}
static void __xe_oa_init_oa_units(struct xe_gt *gt)
{
- /* Actual address is MEDIA_GT_GSI_OFFSET + oam_base_addr[i] */
const u32 oam_base_addr[] = {
- [XE_OAM_UNIT_SAG] = 0x13000,
- [XE_OAM_UNIT_SCMI_0] = 0x14000,
- [XE_OAM_UNIT_SCMI_1] = 0x14800,
+ [XE_OAM_UNIT_SAG] = XE_OAM_SAG_BASE,
+ [XE_OAM_UNIT_SCMI_0] = XE_OAM_SCMI_0_BASE,
+ [XE_OAM_UNIT_SCMI_1] = XE_OAM_SCMI_1_BASE,
};
int i, num_units = gt->oa.num_oa_units;
@@ -2611,8 +2640,15 @@ static void __xe_oa_init_oa_units(struct xe_gt *gt)
struct xe_oa_unit *u = &gt->oa.oa_unit[i];
if (xe_gt_is_main_type(gt)) {
- u->regs = __oag_regs();
- u->type = DRM_XE_OA_UNIT_TYPE_OAG;
+ if (!i) {
+ u->regs = __oag_regs();
+ u->type = DRM_XE_OA_UNIT_TYPE_OAG;
+ } else {
+ xe_gt_assert(gt, xe_device_has_mert(gt_to_xe(gt)));
+ xe_gt_assert(gt, gt == xe_root_mmio_gt(gt_to_xe(gt)));
+ u->regs = __oamert_regs();
+ u->type = DRM_XE_OA_UNIT_TYPE_MERT;
+ }
} else {
xe_gt_assert(gt, GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270);
u->regs = __oam_regs(oam_base_addr[i]);
diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
index cf080f412189..08cc8d7c2215 100644
--- a/drivers/gpu/drm/xe/xe_oa_types.h
+++ b/drivers/gpu/drm/xe/xe_oa_types.h
@@ -87,6 +87,7 @@ struct xe_oa_regs {
struct xe_reg oa_ctrl;
struct xe_reg oa_debug;
struct xe_reg oa_status;
+ struct xe_reg oa_mmio_trg;
u32 oa_ctrl_counter_select_mask;
};
diff --git a/drivers/gpu/drm/xe/xe_page_reclaim.c b/drivers/gpu/drm/xe/xe_page_reclaim.c
new file mode 100644
index 000000000000..fd8c33761127
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_page_reclaim.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include <linux/bitfield.h>
+#include <linux/kref.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+#include "xe_page_reclaim.h"
+
+#include "regs/xe_gt_regs.h"
+#include "xe_assert.h"
+#include "xe_macros.h"
+#include "xe_mmio.h"
+#include "xe_pat.h"
+#include "xe_sa.h"
+#include "xe_tlb_inval_types.h"
+#include "xe_vm.h"
+
+/**
+ * xe_page_reclaim_skip() - Decide whether PRL should be skipped for a VMA
+ * @tile: Tile owning the VMA
+ * @vma: VMA under consideration
+ *
+ * PPC flushing may be handled by HW for specific PAT encodings.
+ * Skip PPC flushing/Page Reclaim for scenarios below due to redundant
+ * flushes.
+ * - pat_index is transient display (1)
+ *
+ * Return: true when page reclamation is unnecessary, false otherwise.
+ */
+bool xe_page_reclaim_skip(struct xe_tile *tile, struct xe_vma *vma)
+{
+ u8 l3_policy;
+
+ l3_policy = xe_pat_index_get_l3_policy(tile->xe, vma->attr.pat_index);
+
+ /*
+ * - l3_policy: 0=WB, 1=XD ("WB - Transient Display"), 3=UC
+ * Transient display flushes is taken care by HW, l3_policy = 1.
+ *
+ * HW will sequence these transient flushes at various sync points so
+ * any event of page reclamation will hit these sync points before
+ * page reclamation could execute.
+ */
+ return (l3_policy == XE_L3_POLICY_XD);
+}
+
+/**
+ * xe_page_reclaim_create_prl_bo() - Back a PRL with a suballocated GGTT BO
+ * @tlb_inval: TLB invalidation frontend associated with the request
+ * @prl: page reclaim list data that bo will copy from
+ * @fence: tlb invalidation fence that page reclaim action is paired to
+ *
+ * Suballocates a 4K BO out of the tile reclaim pool, copies the PRL CPU
+ * copy into the BO and queues the buffer for release when @fence signals.
+ *
+ * Return: struct drm_suballoc pointer on success or ERR_PTR on failure.
+ */
+struct drm_suballoc *xe_page_reclaim_create_prl_bo(struct xe_tlb_inval *tlb_inval,
+ struct xe_page_reclaim_list *prl,
+ struct xe_tlb_inval_fence *fence)
+{
+ struct xe_gt *gt = container_of(tlb_inval, struct xe_gt, tlb_inval);
+ struct xe_tile *tile = gt_to_tile(gt);
+ /* (+1) for NULL page_reclaim_entry to indicate end of list */
+ int prl_size = min(prl->num_entries + 1, XE_PAGE_RECLAIM_MAX_ENTRIES) *
+ sizeof(struct xe_guc_page_reclaim_entry);
+ struct drm_suballoc *prl_sa;
+
+ /* Maximum size of PRL is 1 4K-page */
+ prl_sa = __xe_sa_bo_new(tile->mem.reclaim_pool,
+ prl_size, GFP_ATOMIC);
+ if (IS_ERR(prl_sa))
+ return prl_sa;
+
+ memcpy(xe_sa_bo_cpu_addr(prl_sa), prl->entries,
+ prl_size);
+ xe_sa_bo_flush_write(prl_sa);
+ /* Queue up sa_bo_free on tlb invalidation fence signal */
+ xe_sa_bo_free(prl_sa, &fence->base);
+
+ return prl_sa;
+}
+
+/**
+ * xe_page_reclaim_list_invalidate() - Mark a PRL as invalid
+ * @prl: Page reclaim list to reset
+ *
+ * Clears the entries pointer and marks the list as invalid so
+ * future use knows PRL is unusable. It is expected that the entries
+ * have already been released.
+ */
+void xe_page_reclaim_list_invalidate(struct xe_page_reclaim_list *prl)
+{
+ xe_page_reclaim_entries_put(prl->entries);
+ prl->entries = NULL;
+ prl->num_entries = XE_PAGE_RECLAIM_INVALID_LIST;
+}
+
+/**
+ * xe_page_reclaim_list_init() - Initialize a page reclaim list
+ * @prl: Page reclaim list to initialize
+ *
+ * NULLs both values in list to prepare on initalization.
+ */
+void xe_page_reclaim_list_init(struct xe_page_reclaim_list *prl)
+{
+ // xe_page_reclaim_list_invalidate(prl);
+ prl->entries = NULL;
+ prl->num_entries = 0;
+}
+
+/**
+ * xe_page_reclaim_list_alloc_entries() - Allocate page reclaim list entries
+ * @prl: Page reclaim list to allocate entries for
+ *
+ * Allocate one 4K page for the PRL entries, otherwise assign prl->entries to NULL.
+ */
+int xe_page_reclaim_list_alloc_entries(struct xe_page_reclaim_list *prl)
+{
+ struct page *page;
+
+ if (XE_WARN_ON(prl->entries))
+ return 0;
+
+ page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (page) {
+ prl->entries = page_address(page);
+ prl->num_entries = 0;
+ }
+
+ return page ? 0 : -ENOMEM;
+}
diff --git a/drivers/gpu/drm/xe/xe_page_reclaim.h b/drivers/gpu/drm/xe/xe_page_reclaim.h
new file mode 100644
index 000000000000..a4f58e0ce9b4
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_page_reclaim.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_PAGE_RECLAIM_H_
+#define _XE_PAGE_RECLAIM_H_
+
+#include <linux/kref.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <linux/bits.h>
+
+#define XE_PAGE_RECLAIM_MAX_ENTRIES 512
+#define XE_PAGE_RECLAIM_LIST_MAX_SIZE SZ_4K
+
+struct xe_tlb_inval;
+struct xe_tlb_inval_fence;
+struct xe_tile;
+struct xe_vma;
+
+struct xe_guc_page_reclaim_entry {
+ u64 qw;
+/* valid reclaim entry bit */
+#define XE_PAGE_RECLAIM_VALID BIT_ULL(0)
+/*
+ * offset order of page size to be reclaimed
+ * page_size = 1 << (XE_PTE_SHIFT + reclamation_size)
+ */
+#define XE_PAGE_RECLAIM_SIZE GENMASK_ULL(6, 1)
+#define XE_PAGE_RECLAIM_RSVD_0 GENMASK_ULL(11, 7)
+/* lower 20 bits of the physical address */
+#define XE_PAGE_RECLAIM_ADDR_LO GENMASK_ULL(31, 12)
+/* upper 20 bits of the physical address */
+#define XE_PAGE_RECLAIM_ADDR_HI GENMASK_ULL(51, 32)
+#define XE_PAGE_RECLAIM_RSVD_1 GENMASK_ULL(63, 52)
+} __packed;
+
+struct xe_page_reclaim_list {
+ /** @entries: array of page reclaim entries, page allocated */
+ struct xe_guc_page_reclaim_entry *entries;
+ /** @num_entries: number of entries */
+ int num_entries;
+#define XE_PAGE_RECLAIM_INVALID_LIST -1
+};
+
+/**
+ * xe_page_reclaim_list_is_new() - Check if PRL is new allocation
+ * @prl: Pointer to page reclaim list
+ *
+ * PRL indicates it hasn't been allocated through both values being NULL
+ */
+static inline bool xe_page_reclaim_list_is_new(struct xe_page_reclaim_list *prl)
+{
+ return !prl->entries && prl->num_entries == 0;
+}
+
+/**
+ * xe_page_reclaim_list_valid() - Check if the page reclaim list is valid
+ * @prl: Pointer to page reclaim list
+ *
+ * PRL uses the XE_PAGE_RECLAIM_INVALID_LIST to indicate that a PRL
+ * is unusable.
+ */
+static inline bool xe_page_reclaim_list_valid(struct xe_page_reclaim_list *prl)
+{
+ return !xe_page_reclaim_list_is_new(prl) &&
+ prl->num_entries != XE_PAGE_RECLAIM_INVALID_LIST;
+}
+
+bool xe_page_reclaim_skip(struct xe_tile *tile, struct xe_vma *vma);
+struct drm_suballoc *xe_page_reclaim_create_prl_bo(struct xe_tlb_inval *tlb_inval,
+ struct xe_page_reclaim_list *prl,
+ struct xe_tlb_inval_fence *fence);
+void xe_page_reclaim_list_invalidate(struct xe_page_reclaim_list *prl);
+void xe_page_reclaim_list_init(struct xe_page_reclaim_list *prl);
+int xe_page_reclaim_list_alloc_entries(struct xe_page_reclaim_list *prl);
+/**
+ * xe_page_reclaim_entries_get() - Increment the reference count of page reclaim entries.
+ * @entries: Pointer to the array of page reclaim entries.
+ *
+ * This function increments the reference count of the backing page.
+ */
+static inline void xe_page_reclaim_entries_get(struct xe_guc_page_reclaim_entry *entries)
+{
+ if (entries)
+ get_page(virt_to_page(entries));
+}
+
+/**
+ * xe_page_reclaim_entries_put() - Decrement the reference count of page reclaim entries.
+ * @entries: Pointer to the array of page reclaim entries.
+ *
+ * This function decrements the reference count of the backing page
+ * and frees it if the count reaches zero.
+ */
+static inline void xe_page_reclaim_entries_put(struct xe_guc_page_reclaim_entry *entries)
+{
+ if (entries)
+ put_page(virt_to_page(entries));
+}
+
+#endif /* _XE_PAGE_RECLAIM_H_ */
diff --git a/drivers/gpu/drm/xe/xe_pagefault.c b/drivers/gpu/drm/xe/xe_pagefault.c
index afb06598b6e1..6bee53d6ffc3 100644
--- a/drivers/gpu/drm/xe/xe_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_pagefault.c
@@ -223,22 +223,22 @@ static bool xe_pagefault_queue_pop(struct xe_pagefault_queue *pf_queue,
static void xe_pagefault_print(struct xe_pagefault *pf)
{
- xe_gt_dbg(pf->gt, "\n\tASID: %d\n"
- "\tFaulted Address: 0x%08x%08x\n"
- "\tFaultType: %d\n"
- "\tAccessType: %d\n"
- "\tFaultLevel: %d\n"
- "\tEngineClass: %d %s\n"
- "\tEngineInstance: %d\n",
- pf->consumer.asid,
- upper_32_bits(pf->consumer.page_addr),
- lower_32_bits(pf->consumer.page_addr),
- pf->consumer.fault_type,
- pf->consumer.access_type,
- pf->consumer.fault_level,
- pf->consumer.engine_class,
- xe_hw_engine_class_to_str(pf->consumer.engine_class),
- pf->consumer.engine_instance);
+ xe_gt_info(pf->gt, "\n\tASID: %d\n"
+ "\tFaulted Address: 0x%08x%08x\n"
+ "\tFaultType: %d\n"
+ "\tAccessType: %d\n"
+ "\tFaultLevel: %d\n"
+ "\tEngineClass: %d %s\n"
+ "\tEngineInstance: %d\n",
+ pf->consumer.asid,
+ upper_32_bits(pf->consumer.page_addr),
+ lower_32_bits(pf->consumer.page_addr),
+ pf->consumer.fault_type,
+ pf->consumer.access_type,
+ pf->consumer.fault_level,
+ pf->consumer.engine_class,
+ xe_hw_engine_class_to_str(pf->consumer.engine_class),
+ pf->consumer.engine_instance);
}
static void xe_pagefault_queue_work(struct work_struct *w)
@@ -260,8 +260,8 @@ static void xe_pagefault_queue_work(struct work_struct *w)
err = xe_pagefault_service(&pf);
if (err) {
xe_pagefault_print(&pf);
- xe_gt_dbg(pf.gt, "Fault response: Unsuccessful %pe\n",
- ERR_PTR(err));
+ xe_gt_info(pf.gt, "Fault response: Unsuccessful %pe\n",
+ ERR_PTR(err));
}
pf.producer.ops->ack_fault(&pf, err);
diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index 68171cceea18..2c3375e0250b 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -9,6 +9,7 @@
#include <generated/xe_wa_oob.h>
+#include "regs/xe_gt_regs.h"
#include "regs/xe_reg_defs.h"
#include "xe_assert.h"
#include "xe_device.h"
@@ -50,8 +51,37 @@
#define XELP_PAT_WC REG_FIELD_PREP(XELP_MEM_TYPE_MASK, 1)
#define XELP_PAT_UC REG_FIELD_PREP(XELP_MEM_TYPE_MASK, 0)
+#define PAT_LABEL_LEN 20
+
static const char *XELP_MEM_TYPE_STR_MAP[] = { "UC", "WC", "WT", "WB" };
+static void xe_pat_index_label(char *label, size_t len, int index)
+{
+ snprintf(label, len, "PAT[%2d] ", index);
+}
+
+static void xelp_pat_entry_dump(struct drm_printer *p, int index, u32 pat)
+{
+ u8 mem_type = REG_FIELD_GET(XELP_MEM_TYPE_MASK, pat);
+
+ drm_printf(p, "PAT[%2d] = %s (%#8x)\n", index,
+ XELP_MEM_TYPE_STR_MAP[mem_type], pat);
+}
+
+static void xehpc_pat_entry_dump(struct drm_printer *p, int index, u32 pat)
+{
+ drm_printf(p, "PAT[%2d] = [ %u, %u ] (%#8x)\n", index,
+ REG_FIELD_GET(XELP_MEM_TYPE_MASK, pat),
+ REG_FIELD_GET(XEHPC_CLOS_LEVEL_MASK, pat), pat);
+}
+
+static void xelpg_pat_entry_dump(struct drm_printer *p, int index, u32 pat)
+{
+ drm_printf(p, "PAT[%2d] = [ %u, %u ] (%#8x)\n", index,
+ REG_FIELD_GET(XELPG_L4_POLICY_MASK, pat),
+ REG_FIELD_GET(XELPG_INDEX_COH_MODE_MASK, pat), pat);
+}
+
struct xe_pat_ops {
void (*program_graphics)(struct xe_gt *gt, const struct xe_pat_table_entry table[],
int n_entries);
@@ -196,6 +226,19 @@ u16 xe_pat_index_get_coh_mode(struct xe_device *xe, u16 pat_index)
return xe->pat.table[pat_index].coh_mode;
}
+bool xe_pat_index_get_comp_en(struct xe_device *xe, u16 pat_index)
+{
+ WARN_ON(pat_index >= xe->pat.n_entries);
+ return !!(xe->pat.table[pat_index].value & XE2_COMP_EN);
+}
+
+u16 xe_pat_index_get_l3_policy(struct xe_device *xe, u16 pat_index)
+{
+ WARN_ON(pat_index >= xe->pat.n_entries);
+
+ return REG_FIELD_GET(XE2_L3_POLICY, xe->pat.table[pat_index].value);
+}
+
static void program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[],
int n_entries)
{
@@ -233,24 +276,20 @@ static void program_pat_mcr(struct xe_gt *gt, const struct xe_pat_table_entry ta
static int xelp_dump(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_device *xe = gt_to_xe(gt);
- unsigned int fw_ref;
int i;
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (!fw_ref)
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref.domains)
return -ETIMEDOUT;
drm_printf(p, "PAT table:\n");
for (i = 0; i < xe->pat.n_entries; i++) {
u32 pat = xe_mmio_read32(&gt->mmio, XE_REG(_PAT_INDEX(i)));
- u8 mem_type = REG_FIELD_GET(XELP_MEM_TYPE_MASK, pat);
- drm_printf(p, "PAT[%2d] = %s (%#8x)\n", i,
- XELP_MEM_TYPE_STR_MAP[mem_type], pat);
+ xelp_pat_entry_dump(p, i, pat);
}
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
return 0;
}
@@ -262,26 +301,20 @@ static const struct xe_pat_ops xelp_pat_ops = {
static int xehp_dump(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_device *xe = gt_to_xe(gt);
- unsigned int fw_ref;
int i;
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (!fw_ref)
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref.domains)
return -ETIMEDOUT;
drm_printf(p, "PAT table:\n");
for (i = 0; i < xe->pat.n_entries; i++) {
u32 pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i)));
- u8 mem_type;
-
- mem_type = REG_FIELD_GET(XELP_MEM_TYPE_MASK, pat);
- drm_printf(p, "PAT[%2d] = %s (%#8x)\n", i,
- XELP_MEM_TYPE_STR_MAP[mem_type], pat);
+ xelp_pat_entry_dump(p, i, pat);
}
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
return 0;
}
@@ -293,11 +326,10 @@ static const struct xe_pat_ops xehp_pat_ops = {
static int xehpc_dump(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_device *xe = gt_to_xe(gt);
- unsigned int fw_ref;
int i;
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (!fw_ref)
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref.domains)
return -ETIMEDOUT;
drm_printf(p, "PAT table:\n");
@@ -305,12 +337,9 @@ static int xehpc_dump(struct xe_gt *gt, struct drm_printer *p)
for (i = 0; i < xe->pat.n_entries; i++) {
u32 pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i)));
- drm_printf(p, "PAT[%2d] = [ %u, %u ] (%#8x)\n", i,
- REG_FIELD_GET(XELP_MEM_TYPE_MASK, pat),
- REG_FIELD_GET(XEHPC_CLOS_LEVEL_MASK, pat), pat);
+ xehpc_pat_entry_dump(p, i, pat);
}
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
return 0;
}
@@ -322,11 +351,10 @@ static const struct xe_pat_ops xehpc_pat_ops = {
static int xelpg_dump(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_device *xe = gt_to_xe(gt);
- unsigned int fw_ref;
int i;
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (!fw_ref)
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref.domains)
return -ETIMEDOUT;
drm_printf(p, "PAT table:\n");
@@ -339,12 +367,9 @@ static int xelpg_dump(struct xe_gt *gt, struct drm_printer *p)
else
pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i)));
- drm_printf(p, "PAT[%2d] = [ %u, %u ] (%#8x)\n", i,
- REG_FIELD_GET(XELPG_L4_POLICY_MASK, pat),
- REG_FIELD_GET(XELPG_INDEX_COH_MODE_MASK, pat), pat);
+ xelpg_pat_entry_dump(p, i, pat);
}
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
return 0;
}
@@ -358,15 +383,38 @@ static const struct xe_pat_ops xelpg_pat_ops = {
.dump = xelpg_dump,
};
+static void xe2_pat_entry_dump(struct drm_printer *p, const char *label, u32 pat, bool rsvd)
+{
+ drm_printf(p, "%s= [ %u, %u, %u, %u, %u, %u ] (%#8x)%s\n", label,
+ !!(pat & XE2_NO_PROMOTE),
+ !!(pat & XE2_COMP_EN),
+ REG_FIELD_GET(XE2_L3_CLOS, pat),
+ REG_FIELD_GET(XE2_L3_POLICY, pat),
+ REG_FIELD_GET(XE2_L4_POLICY, pat),
+ REG_FIELD_GET(XE2_COH_MODE, pat),
+ pat, rsvd ? " *" : "");
+}
+
+static void xe3p_xpc_pat_entry_dump(struct drm_printer *p, const char *label, u32 pat, bool rsvd)
+{
+ drm_printf(p, "%s= [ %u, %u, %u, %u, %u ] (%#8x)%s\n", label,
+ !!(pat & XE2_NO_PROMOTE),
+ REG_FIELD_GET(XE2_L3_CLOS, pat),
+ REG_FIELD_GET(XE2_L3_POLICY, pat),
+ REG_FIELD_GET(XE2_L4_POLICY, pat),
+ REG_FIELD_GET(XE2_COH_MODE, pat),
+ pat, rsvd ? " *" : "");
+}
+
static int xe2_dump(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_device *xe = gt_to_xe(gt);
- unsigned int fw_ref;
u32 pat;
int i;
+ char label[PAT_LABEL_LEN];
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (!fw_ref)
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref.domains)
return -ETIMEDOUT;
drm_printf(p, "PAT table: (* = reserved entry)\n");
@@ -377,14 +425,8 @@ static int xe2_dump(struct xe_gt *gt, struct drm_printer *p)
else
pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i)));
- drm_printf(p, "PAT[%2d] = [ %u, %u, %u, %u, %u, %u ] (%#8x)%s\n", i,
- !!(pat & XE2_NO_PROMOTE),
- !!(pat & XE2_COMP_EN),
- REG_FIELD_GET(XE2_L3_CLOS, pat),
- REG_FIELD_GET(XE2_L3_POLICY, pat),
- REG_FIELD_GET(XE2_L4_POLICY, pat),
- REG_FIELD_GET(XE2_COH_MODE, pat),
- pat, xe->pat.table[i].valid ? "" : " *");
+ xe_pat_index_label(label, sizeof(label), i);
+ xe2_pat_entry_dump(p, label, pat, !xe->pat.table[i].valid);
}
/*
@@ -397,16 +439,8 @@ static int xe2_dump(struct xe_gt *gt, struct drm_printer *p)
pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_PTA));
drm_printf(p, "Page Table Access:\n");
- drm_printf(p, "PTA_MODE= [ %u, %u, %u, %u, %u, %u ] (%#8x)\n",
- !!(pat & XE2_NO_PROMOTE),
- !!(pat & XE2_COMP_EN),
- REG_FIELD_GET(XE2_L3_CLOS, pat),
- REG_FIELD_GET(XE2_L3_POLICY, pat),
- REG_FIELD_GET(XE2_L4_POLICY, pat),
- REG_FIELD_GET(XE2_COH_MODE, pat),
- pat);
+ xe2_pat_entry_dump(p, "PTA_MODE", pat, false);
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
return 0;
}
@@ -419,12 +453,12 @@ static const struct xe_pat_ops xe2_pat_ops = {
static int xe3p_xpc_dump(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_device *xe = gt_to_xe(gt);
- unsigned int fw_ref;
u32 pat;
int i;
+ char label[PAT_LABEL_LEN];
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (!fw_ref)
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref.domains)
return -ETIMEDOUT;
drm_printf(p, "PAT table: (* = reserved entry)\n");
@@ -432,13 +466,8 @@ static int xe3p_xpc_dump(struct xe_gt *gt, struct drm_printer *p)
for (i = 0; i < xe->pat.n_entries; i++) {
pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i)));
- drm_printf(p, "PAT[%2d] = [ %u, %u, %u, %u, %u ] (%#8x)%s\n", i,
- !!(pat & XE2_NO_PROMOTE),
- REG_FIELD_GET(XE2_L3_CLOS, pat),
- REG_FIELD_GET(XE2_L3_POLICY, pat),
- REG_FIELD_GET(XE2_L4_POLICY, pat),
- REG_FIELD_GET(XE2_COH_MODE, pat),
- pat, xe->pat.table[i].valid ? "" : " *");
+ xe_pat_index_label(label, sizeof(label), i);
+ xe3p_xpc_pat_entry_dump(p, label, pat, !xe->pat.table[i].valid);
}
/*
@@ -448,15 +477,8 @@ static int xe3p_xpc_dump(struct xe_gt *gt, struct drm_printer *p)
pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_PTA));
drm_printf(p, "Page Table Access:\n");
- drm_printf(p, "PTA_MODE= [ %u, %u, %u, %u, %u ] (%#8x)\n",
- !!(pat & XE2_NO_PROMOTE),
- REG_FIELD_GET(XE2_L3_CLOS, pat),
- REG_FIELD_GET(XE2_L3_POLICY, pat),
- REG_FIELD_GET(XE2_L4_POLICY, pat),
- REG_FIELD_GET(XE2_COH_MODE, pat),
- pat);
+ xe3p_xpc_pat_entry_dump(p, "PTA_MODE", pat, false);
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
return 0;
}
@@ -578,3 +600,65 @@ int xe_pat_dump(struct xe_gt *gt, struct drm_printer *p)
return xe->pat.ops->dump(gt, p);
}
+
+/**
+ * xe_pat_dump_sw_config() - Dump the software-configured GT PAT table into a drm printer.
+ * @gt: the &xe_gt
+ * @p: the &drm_printer
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_pat_dump_sw_config(struct xe_gt *gt, struct drm_printer *p)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ char label[PAT_LABEL_LEN];
+
+ if (!xe->pat.table || !xe->pat.n_entries)
+ return -EOPNOTSUPP;
+
+ drm_printf(p, "PAT table:%s\n", GRAPHICS_VER(xe) >= 20 ? " (* = reserved entry)" : "");
+ for (u32 i = 0; i < xe->pat.n_entries; i++) {
+ u32 pat = xe->pat.table[i].value;
+
+ if (GRAPHICS_VERx100(xe) == 3511) {
+ xe_pat_index_label(label, sizeof(label), i);
+ xe3p_xpc_pat_entry_dump(p, label, pat, !xe->pat.table[i].valid);
+ } else if (GRAPHICS_VER(xe) == 30 || GRAPHICS_VER(xe) == 20) {
+ xe_pat_index_label(label, sizeof(label), i);
+ xe2_pat_entry_dump(p, label, pat, !xe->pat.table[i].valid);
+ } else if (xe->info.platform == XE_METEORLAKE) {
+ xelpg_pat_entry_dump(p, i, pat);
+ } else if (xe->info.platform == XE_PVC) {
+ xehpc_pat_entry_dump(p, i, pat);
+ } else if (xe->info.platform == XE_DG2 || GRAPHICS_VERx100(xe) <= 1210) {
+ xelp_pat_entry_dump(p, i, pat);
+ } else {
+ return -EOPNOTSUPP;
+ }
+ }
+
+ if (xe->pat.pat_pta) {
+ u32 pat = xe->pat.pat_pta->value;
+
+ drm_printf(p, "Page Table Access:\n");
+ xe2_pat_entry_dump(p, "PTA_MODE", pat, false);
+ }
+
+ if (xe->pat.pat_ats) {
+ u32 pat = xe->pat.pat_ats->value;
+
+ drm_printf(p, "PCIe ATS/PASID:\n");
+ xe2_pat_entry_dump(p, "PAT_ATS ", pat, false);
+ }
+
+ drm_printf(p, "Cache Level:\n");
+ drm_printf(p, "IDX[XE_CACHE_NONE] = %d\n", xe->pat.idx[XE_CACHE_NONE]);
+ drm_printf(p, "IDX[XE_CACHE_WT] = %d\n", xe->pat.idx[XE_CACHE_WT]);
+ drm_printf(p, "IDX[XE_CACHE_WB] = %d\n", xe->pat.idx[XE_CACHE_WB]);
+ if (GRAPHICS_VER(xe) >= 20) {
+ drm_printf(p, "IDX[XE_CACHE_NONE_COMPRESSION] = %d\n",
+ xe->pat.idx[XE_CACHE_NONE_COMPRESSION]);
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_pat.h b/drivers/gpu/drm/xe/xe_pat.h
index 05dae03a5f54..d5dadfb7f924 100644
--- a/drivers/gpu/drm/xe/xe_pat.h
+++ b/drivers/gpu/drm/xe/xe_pat.h
@@ -49,6 +49,7 @@ void xe_pat_init_early(struct xe_device *xe);
void xe_pat_init(struct xe_gt *gt);
int xe_pat_dump(struct xe_gt *gt, struct drm_printer *p);
+int xe_pat_dump_sw_config(struct xe_gt *gt, struct drm_printer *p);
/**
* xe_pat_index_get_coh_mode - Extract the coherency mode for the given
@@ -58,4 +59,24 @@ int xe_pat_dump(struct xe_gt *gt, struct drm_printer *p);
*/
u16 xe_pat_index_get_coh_mode(struct xe_device *xe, u16 pat_index);
+/**
+ * xe_pat_index_get_comp_en - Extract the compression enable flag for
+ * the given pat_index.
+ * @xe: xe device
+ * @pat_index: The pat_index to query
+ *
+ * Return: true if compression is enabled for this pat_index, false otherwise.
+ */
+bool xe_pat_index_get_comp_en(struct xe_device *xe, u16 pat_index);
+
+#define XE_L3_POLICY_WB 0 /* Write-back */
+#define XE_L3_POLICY_XD 1 /* WB - Transient Display */
+#define XE_L3_POLICY_UC 3 /* Uncached */
+/**
+ * xe_pat_index_get_l3_policy - Extract the L3 policy for the given pat_index.
+ * @xe: xe device
+ * @pat_index: The pat_index to query
+ */
+u16 xe_pat_index_get_l3_policy(struct xe_device *xe, u16 pat_index);
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 9c9ea10d994c..18d4e6b5c319 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -108,6 +108,7 @@ static const struct xe_graphics_desc graphics_xe2 = {
static const struct xe_graphics_desc graphics_xe3p_xpc = {
XE2_GFX_FEATURES,
+ .has_indirect_ring_state = 1,
.hw_engine_mask =
GENMASK(XE_HW_ENGINE_BCS8, XE_HW_ENGINE_BCS1) |
GENMASK(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0),
@@ -168,6 +169,7 @@ static const struct xe_device_desc tgl_desc = {
.pre_gmdid_media_ip = &media_ip_xem,
PLATFORM(TIGERLAKE),
.dma_mask_size = 39,
+ .has_cached_pt = true,
.has_display = true,
.has_llc = true,
.has_sriov = true,
@@ -182,6 +184,7 @@ static const struct xe_device_desc rkl_desc = {
.pre_gmdid_media_ip = &media_ip_xem,
PLATFORM(ROCKETLAKE),
.dma_mask_size = 39,
+ .has_cached_pt = true,
.has_display = true,
.has_llc = true,
.max_gt_per_tile = 1,
@@ -197,6 +200,7 @@ static const struct xe_device_desc adl_s_desc = {
.pre_gmdid_media_ip = &media_ip_xem,
PLATFORM(ALDERLAKE_S),
.dma_mask_size = 39,
+ .has_cached_pt = true,
.has_display = true,
.has_llc = true,
.has_sriov = true,
@@ -217,6 +221,7 @@ static const struct xe_device_desc adl_p_desc = {
.pre_gmdid_media_ip = &media_ip_xem,
PLATFORM(ALDERLAKE_P),
.dma_mask_size = 39,
+ .has_cached_pt = true,
.has_display = true,
.has_llc = true,
.has_sriov = true,
@@ -235,6 +240,7 @@ static const struct xe_device_desc adl_n_desc = {
.pre_gmdid_media_ip = &media_ip_xem,
PLATFORM(ALDERLAKE_N),
.dma_mask_size = 39,
+ .has_cached_pt = true,
.has_display = true,
.has_llc = true,
.has_sriov = true,
@@ -361,7 +367,9 @@ static const struct xe_device_desc bmg_desc = {
.has_mbx_power_limits = true,
.has_gsc_nvm = 1,
.has_heci_cscfi = 1,
+ .has_i2c = true,
.has_late_bind = true,
+ .has_pre_prod_wa = 1,
.has_sriov = true,
.has_mem_copy_instr = true,
.max_gt_per_tile = 2,
@@ -381,6 +389,7 @@ static const struct xe_device_desc ptl_desc = {
.has_flat_ccs = 1,
.has_sriov = true,
.has_mem_copy_instr = true,
+ .has_pre_prod_wa = 1,
.max_gt_per_tile = 2,
.needs_scratch = true,
.needs_shared_vf_gt_wq = true,
@@ -394,6 +403,7 @@ static const struct xe_device_desc nvls_desc = {
.has_display = true,
.has_flat_ccs = 1,
.has_mem_copy_instr = true,
+ .has_pre_prod_wa = 1,
.max_gt_per_tile = 2,
.require_force_probe = true,
.va_bits = 48,
@@ -406,7 +416,11 @@ static const struct xe_device_desc cri_desc = {
.dma_mask_size = 52,
.has_display = false,
.has_flat_ccs = false,
+ .has_gsc_nvm = 1,
+ .has_i2c = true,
.has_mbx_power_limits = true,
+ .has_mert = true,
+ .has_pre_prod_wa = 1,
.has_sriov = true,
.max_gt_per_tile = 2,
.require_force_probe = true,
@@ -663,6 +677,7 @@ static int xe_info_init_early(struct xe_device *xe,
xe->info.vram_flags = desc->vram_flags;
xe->info.is_dgfx = desc->is_dgfx;
+ xe->info.has_cached_pt = desc->has_cached_pt;
xe->info.has_fan_control = desc->has_fan_control;
/* runtime fusing may force flat_ccs to disabled later */
xe->info.has_flat_ccs = desc->has_flat_ccs;
@@ -670,8 +685,12 @@ static int xe_info_init_early(struct xe_device *xe,
xe->info.has_gsc_nvm = desc->has_gsc_nvm;
xe->info.has_heci_gscfi = desc->has_heci_gscfi;
xe->info.has_heci_cscfi = desc->has_heci_cscfi;
+ xe->info.has_i2c = desc->has_i2c;
xe->info.has_late_bind = desc->has_late_bind;
xe->info.has_llc = desc->has_llc;
+ xe->info.has_mert = desc->has_mert;
+ xe->info.has_page_reclaim_hw_assist = desc->has_page_reclaim_hw_assist;
+ xe->info.has_pre_prod_wa = desc->has_pre_prod_wa;
xe->info.has_pxp = desc->has_pxp;
xe->info.has_sriov = xe_configfs_primary_gt_allowed(to_pci_dev(xe->drm.dev)) &&
desc->has_sriov;
@@ -755,6 +774,7 @@ static struct xe_gt *alloc_primary_gt(struct xe_tile *tile,
gt->info.type = XE_GT_TYPE_MAIN;
gt->info.id = tile->id * xe->info.max_gt_per_tile;
gt->info.has_indirect_ring_state = graphics_desc->has_indirect_ring_state;
+ gt->info.multi_queue_engine_class_mask = graphics_desc->multi_queue_engine_class_mask;
gt->info.engine_mask = graphics_desc->hw_engine_mask;
/*
@@ -1153,6 +1173,15 @@ static int xe_pci_runtime_suspend(struct device *dev)
struct xe_device *xe = pdev_to_xe_device(pdev);
int err;
+ /*
+ * We hold an additional reference to the runtime PM to keep PF in D0
+ * during VFs lifetime, as our VFs do not implement the PM capability.
+ * This means we should never be runtime suspending as long as VFs are
+ * enabled.
+ */
+ xe_assert(xe, !IS_SRIOV_VF(xe));
+ xe_assert(xe, !pci_num_vf(pdev));
+
err = xe_pm_runtime_suspend(xe);
if (err)
return err;
diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.c b/drivers/gpu/drm/xe/xe_pci_sriov.c
index 9ff69c4843b0..3fd22034f03e 100644
--- a/drivers/gpu/drm/xe/xe_pci_sriov.c
+++ b/drivers/gpu/drm/xe/xe_pci_sriov.c
@@ -219,7 +219,6 @@ static int pf_disable_vfs(struct xe_device *xe)
int xe_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
{
struct xe_device *xe = pdev_to_xe_device(pdev);
- int ret;
if (!IS_SRIOV_PF(xe))
return -ENODEV;
@@ -233,14 +232,11 @@ int xe_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
if (num_vfs && pci_num_vf(pdev))
return -EBUSY;
- xe_pm_runtime_get(xe);
+ guard(xe_pm_runtime)(xe);
if (num_vfs > 0)
- ret = pf_enable_vfs(xe, num_vfs);
+ return pf_enable_vfs(xe, num_vfs);
else
- ret = pf_disable_vfs(xe);
- xe_pm_runtime_put(xe);
-
- return ret;
+ return pf_disable_vfs(xe);
}
/**
diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h
index 9892c063a9c5..3bb51d155951 100644
--- a/drivers/gpu/drm/xe/xe_pci_types.h
+++ b/drivers/gpu/drm/xe/xe_pci_types.h
@@ -37,16 +37,21 @@ struct xe_device_desc {
u8 require_force_probe:1;
u8 is_dgfx:1;
+ u8 has_cached_pt:1;
u8 has_display:1;
u8 has_fan_control:1;
u8 has_flat_ccs:1;
u8 has_gsc_nvm:1;
u8 has_heci_gscfi:1;
u8 has_heci_cscfi:1;
+ u8 has_i2c:1;
u8 has_late_bind:1;
u8 has_llc:1;
u8 has_mbx_power_limits:1;
u8 has_mem_copy_instr:1;
+ u8 has_mert:1;
+ u8 has_pre_prod_wa:1;
+ u8 has_page_reclaim_hw_assist:1;
u8 has_pxp:1;
u8 has_sriov:1;
u8 needs_scratch:1;
@@ -58,6 +63,7 @@ struct xe_device_desc {
struct xe_graphics_desc {
u64 hw_engine_mask; /* hardware engines provided by graphics IP */
+ u16 multi_queue_engine_class_mask; /* bitmask of engine classes which support multi queue */
u8 has_asid:1;
u8 has_atomic_enable_pte_bit:1;
diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h
index 70dcd6625680..975892d6b230 100644
--- a/drivers/gpu/drm/xe/xe_pcode_api.h
+++ b/drivers/gpu/drm/xe/xe_pcode_api.h
@@ -77,11 +77,13 @@
#define PCODE_SCRATCH(x) XE_REG(0x138320 + ((x) * 4))
/* PCODE_SCRATCH0 */
+#define BREADCRUMB_VERSION REG_GENMASK(31, 29)
#define AUXINFO_REG_OFFSET REG_GENMASK(17, 15)
#define OVERFLOW_REG_OFFSET REG_GENMASK(14, 12)
#define HISTORY_TRACKING REG_BIT(11)
#define OVERFLOW_SUPPORT REG_BIT(10)
#define AUXINFO_SUPPORT REG_BIT(9)
+#define FDO_MODE REG_BIT(4)
#define BOOT_STATUS REG_GENMASK(3, 1)
#define CRITICAL_FAILURE 4
#define NON_CRITICAL_FAILURE 7
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 766922530265..4390ba69610d 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -591,7 +591,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
}
for_each_gt(gt, xe, id) {
- err = xe_gt_suspend(gt);
+ err = xe->d3cold.allowed ? xe_gt_suspend(gt) : xe_gt_runtime_suspend(gt);
if (err)
goto out_resume;
}
@@ -633,10 +633,10 @@ int xe_pm_runtime_resume(struct xe_device *xe)
xe_rpm_lockmap_acquire(xe);
- for_each_gt(gt, xe, id)
- xe_gt_idle_disable_c6(gt);
-
if (xe->d3cold.allowed) {
+ for_each_gt(gt, xe, id)
+ xe_gt_idle_disable_c6(gt);
+
err = xe_pcode_ready(xe, true);
if (err)
goto out;
@@ -657,7 +657,7 @@ int xe_pm_runtime_resume(struct xe_device *xe)
xe_irq_resume(xe);
for_each_gt(gt, xe, id)
- xe_gt_resume(gt);
+ xe->d3cold.allowed ? xe_gt_resume(gt) : xe_gt_runtime_resume(gt);
xe_display_pm_runtime_resume(xe);
diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
index c63335eb69e5..0b20059dd7b3 100644
--- a/drivers/gpu/drm/xe/xe_pmu.c
+++ b/drivers/gpu/drm/xe/xe_pmu.c
@@ -425,7 +425,7 @@ static ssize_t event_attr_show(struct device *dev,
struct perf_pmu_events_attr *pmu_attr =
container_of(attr, struct perf_pmu_events_attr, attr);
- return sprintf(buf, "event=%#04llx\n", pmu_attr->id);
+ return sysfs_emit(buf, "event=%#04llx\n", pmu_attr->id);
}
#define XE_EVENT_ATTR(name_, v_, id_) \
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 884127b4d97d..6cd78bb2b652 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -12,6 +12,7 @@
#include "xe_exec_queue.h"
#include "xe_gt.h"
#include "xe_migrate.h"
+#include "xe_page_reclaim.h"
#include "xe_pt_types.h"
#include "xe_pt_walk.h"
#include "xe_res_cursor.h"
@@ -1535,6 +1536,9 @@ struct xe_pt_stage_unbind_walk {
/** @modified_end: Walk range start, modified like @modified_start. */
u64 modified_end;
+ /** @prl: Backing pointer to page reclaim list in pt_update_ops */
+ struct xe_page_reclaim_list *prl;
+
/* Output */
/* @wupd: Structure to track the page-table updates we're building */
struct xe_walk_update wupd;
@@ -1572,6 +1576,68 @@ static bool xe_pt_check_kill(u64 addr, u64 next, unsigned int level,
return false;
}
+/* Huge 2MB leaf lives directly in a level-1 table and has no children */
+static bool is_2m_pte(struct xe_pt *pte)
+{
+ return pte->level == 1 && !pte->base.children;
+}
+
+/* page_size = 2^(reclamation_size + XE_PTE_SHIFT) */
+#define COMPUTE_RECLAIM_ADDRESS_MASK(page_size) \
+({ \
+ BUILD_BUG_ON(!__builtin_constant_p(page_size)); \
+ ilog2(page_size) - XE_PTE_SHIFT; \
+})
+
+static int generate_reclaim_entry(struct xe_tile *tile,
+ struct xe_page_reclaim_list *prl,
+ u64 pte, struct xe_pt *xe_child)
+{
+ struct xe_guc_page_reclaim_entry *reclaim_entries = prl->entries;
+ u64 phys_page = (pte & XE_PTE_ADDR_MASK) >> XE_PTE_SHIFT;
+ int num_entries = prl->num_entries;
+ u32 reclamation_size;
+
+ xe_tile_assert(tile, xe_child->level <= MAX_HUGEPTE_LEVEL);
+ xe_tile_assert(tile, reclaim_entries);
+ xe_tile_assert(tile, num_entries < XE_PAGE_RECLAIM_MAX_ENTRIES - 1);
+
+ if (!xe_page_reclaim_list_valid(prl))
+ return -EINVAL;
+
+ /**
+ * reclamation_size indicates the size of the page to be
+ * invalidated and flushed from non-coherent cache.
+ * Page size is computed as 2^(reclamation_size + XE_PTE_SHIFT) bytes.
+ * Only 4K, 64K (level 0), and 2M pages are supported by hardware for page reclaim
+ */
+ if (xe_child->level == 0 && !(pte & XE_PTE_PS64)) {
+ reclamation_size = COMPUTE_RECLAIM_ADDRESS_MASK(SZ_4K); /* reclamation_size = 0 */
+ } else if (xe_child->level == 0) {
+ reclamation_size = COMPUTE_RECLAIM_ADDRESS_MASK(SZ_64K); /* reclamation_size = 4 */
+ } else if (is_2m_pte(xe_child)) {
+ reclamation_size = COMPUTE_RECLAIM_ADDRESS_MASK(SZ_2M); /* reclamation_size = 9 */
+ } else {
+ xe_page_reclaim_list_invalidate(prl);
+ vm_dbg(&tile_to_xe(tile)->drm,
+ "PRL invalidate: unsupported PTE level=%u pte=%#llx\n",
+ xe_child->level, pte);
+ return -EINVAL;
+ }
+
+ reclaim_entries[num_entries].qw =
+ FIELD_PREP(XE_PAGE_RECLAIM_VALID, 1) |
+ FIELD_PREP(XE_PAGE_RECLAIM_SIZE, reclamation_size) |
+ FIELD_PREP(XE_PAGE_RECLAIM_ADDR_LO, phys_page) |
+ FIELD_PREP(XE_PAGE_RECLAIM_ADDR_HI, phys_page >> 20);
+ prl->num_entries++;
+ vm_dbg(&tile_to_xe(tile)->drm,
+ "PRL add entry: level=%u pte=%#llx reclamation_size=%u prl_idx=%d\n",
+ xe_child->level, pte, reclamation_size, num_entries);
+
+ return 0;
+}
+
static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset,
unsigned int level, u64 addr, u64 next,
struct xe_ptw **child,
@@ -1579,11 +1645,48 @@ static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset,
struct xe_pt_walk *walk)
{
struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base);
+ struct xe_pt_stage_unbind_walk *xe_walk =
+ container_of(walk, typeof(*xe_walk), base);
+ struct xe_device *xe = tile_to_xe(xe_walk->tile);
XE_WARN_ON(!*child);
XE_WARN_ON(!level);
+ /* Check for leaf node */
+ if (xe_walk->prl && xe_page_reclaim_list_valid(xe_walk->prl) &&
+ !xe_child->base.children) {
+ struct iosys_map *leaf_map = &xe_child->bo->vmap;
+ pgoff_t first = xe_pt_offset(addr, 0, walk);
+ pgoff_t count = xe_pt_num_entries(addr, next, 0, walk);
+
+ for (pgoff_t i = 0; i < count; i++) {
+ u64 pte = xe_map_rd(xe, leaf_map, (first + i) * sizeof(u64), u64);
+ int ret;
+
+ /* Account for NULL terminated entry on end (-1) */
+ if (xe_walk->prl->num_entries < XE_PAGE_RECLAIM_MAX_ENTRIES - 1) {
+ ret = generate_reclaim_entry(xe_walk->tile, xe_walk->prl,
+ pte, xe_child);
+ if (ret)
+ break;
+ } else {
+ /* overflow, mark as invalid */
+ xe_page_reclaim_list_invalidate(xe_walk->prl);
+ vm_dbg(&xe->drm,
+ "PRL invalidate: overflow while adding pte=%#llx",
+ pte);
+ break;
+ }
+ }
+ }
- xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk);
+ /* If aborting page walk early, invalidate PRL since PTE may be dropped from this abort */
+ if (xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk) &&
+ xe_walk->prl && level > 1 && xe_child->base.children && xe_child->num_live != 0) {
+ xe_page_reclaim_list_invalidate(xe_walk->prl);
+ vm_dbg(&xe->drm,
+ "PRL invalidate: kill at level=%u addr=%#llx next=%#llx num_live=%u\n",
+ level, addr, next, xe_child->num_live);
+ }
return 0;
}
@@ -1654,6 +1757,8 @@ static unsigned int xe_pt_stage_unbind(struct xe_tile *tile,
{
u64 start = range ? xe_svm_range_start(range) : xe_vma_start(vma);
u64 end = range ? xe_svm_range_end(range) : xe_vma_end(vma);
+ struct xe_vm_pgtable_update_op *pt_update_op =
+ container_of(entries, struct xe_vm_pgtable_update_op, entries[0]);
struct xe_pt_stage_unbind_walk xe_walk = {
.base = {
.ops = &xe_pt_stage_unbind_ops,
@@ -1665,6 +1770,7 @@ static unsigned int xe_pt_stage_unbind(struct xe_tile *tile,
.modified_start = start,
.modified_end = end,
.wupd.entries = entries,
+ .prl = pt_update_op->prl,
};
struct xe_pt *pt = vm->pt_root[tile->id];
@@ -1897,6 +2003,7 @@ static int unbind_op_prepare(struct xe_tile *tile,
struct xe_vm_pgtable_update_ops *pt_update_ops,
struct xe_vma *vma)
{
+ struct xe_device *xe = tile_to_xe(tile);
u32 current_op = pt_update_ops->current_op;
struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op];
int err;
@@ -1914,6 +2021,17 @@ static int unbind_op_prepare(struct xe_tile *tile,
pt_op->vma = vma;
pt_op->bind = false;
pt_op->rebind = false;
+ /*
+ * Maintain one PRL located in pt_update_ops that all others in unbind op reference.
+ * Ensure that PRL is allocated only once, and if invalidated, remains an invalidated PRL.
+ */
+ if (xe->info.has_page_reclaim_hw_assist &&
+ xe_page_reclaim_list_is_new(&pt_update_ops->prl))
+ xe_page_reclaim_list_alloc_entries(&pt_update_ops->prl);
+
+ /* Page reclaim may not be needed due to other features, so skip the corresponding VMA */
+ pt_op->prl = (xe_page_reclaim_list_valid(&pt_update_ops->prl) &&
+ !xe_page_reclaim_skip(tile, vma)) ? &pt_update_ops->prl : NULL;
err = vma_reserve_fences(tile_to_xe(tile), vma);
if (err)
@@ -1979,6 +2097,7 @@ static int unbind_range_prepare(struct xe_vm *vm,
pt_op->vma = XE_INVALID_VMA;
pt_op->bind = false;
pt_op->rebind = false;
+ pt_op->prl = NULL;
pt_op->num_entries = xe_pt_stage_unbind(tile, vm, NULL, range,
pt_op->entries);
@@ -2096,6 +2215,7 @@ xe_pt_update_ops_init(struct xe_vm_pgtable_update_ops *pt_update_ops)
init_llist_head(&pt_update_ops->deferred);
pt_update_ops->start = ~0x0ull;
pt_update_ops->last = 0x0ull;
+ xe_page_reclaim_list_init(&pt_update_ops->prl);
}
/**
@@ -2393,6 +2513,17 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
goto kill_vm_tile1;
}
update.ijob = ijob;
+ /*
+ * Only add page reclaim for the primary GT. Media GT does not have
+ * any PPC to flush, so enabling the PPC flush bit for media is
+ * effectively a NOP and provides no performance benefit nor
+ * interfere with primary GT.
+ */
+ if (xe_page_reclaim_list_valid(&pt_update_ops->prl)) {
+ xe_tlb_inval_job_add_page_reclaim(ijob, &pt_update_ops->prl);
+ /* Release ref from alloc, job will now handle it */
+ xe_page_reclaim_list_invalidate(&pt_update_ops->prl);
+ }
if (tile->media_gt) {
dep_scheduler = to_dep_scheduler(q, tile->media_gt);
@@ -2518,6 +2649,8 @@ void xe_pt_update_ops_fini(struct xe_tile *tile, struct xe_vma_ops *vops)
&vops->pt_update_ops[tile->id];
int i;
+ xe_page_reclaim_entries_put(pt_update_ops->prl.entries);
+
lockdep_assert_held(&vops->vm->lock);
xe_vm_assert_held(vops->vm);
diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h
index 881f01e14db8..88fabf8e2655 100644
--- a/drivers/gpu/drm/xe/xe_pt_types.h
+++ b/drivers/gpu/drm/xe/xe_pt_types.h
@@ -8,6 +8,7 @@
#include <linux/types.h>
+#include "xe_page_reclaim.h"
#include "xe_pt_walk.h"
struct xe_bo;
@@ -79,6 +80,8 @@ struct xe_vm_pgtable_update_op {
struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1];
/** @vma: VMA for operation, operation not valid if NULL */
struct xe_vma *vma;
+ /** @prl: Backing pointer to page reclaim list of pt_update_ops */
+ struct xe_page_reclaim_list *prl;
/** @num_entries: number of entries for this update operation */
u32 num_entries;
/** @bind: is a bind */
@@ -95,6 +98,8 @@ struct xe_vm_pgtable_update_ops {
struct llist_head deferred;
/** @q: exec queue for PT operations */
struct xe_exec_queue *q;
+ /** @prl: embedded page reclaim list */
+ struct xe_page_reclaim_list prl;
/** @start: start address of ops */
u64 start;
/** @last: last address of ops */
diff --git a/drivers/gpu/drm/xe/xe_pxp.c b/drivers/gpu/drm/xe/xe_pxp.c
index bdbdbbf6a678..508f4c128a48 100644
--- a/drivers/gpu/drm/xe/xe_pxp.c
+++ b/drivers/gpu/drm/xe/xe_pxp.c
@@ -58,10 +58,9 @@ bool xe_pxp_is_enabled(const struct xe_pxp *pxp)
static bool pxp_prerequisites_done(const struct xe_pxp *pxp)
{
struct xe_gt *gt = pxp->gt;
- unsigned int fw_ref;
bool ready;
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL);
/*
* If force_wake fails we could falsely report the prerequisites as not
@@ -71,14 +70,12 @@ static bool pxp_prerequisites_done(const struct xe_pxp *pxp)
* PXP. Therefore, we can just log the force_wake error and not escalate
* it.
*/
- XE_WARN_ON(!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL));
+ XE_WARN_ON(!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL));
/* PXP requires both HuC authentication via GSC and GSC proxy initialized */
ready = xe_huc_is_authenticated(&gt->uc.huc, XE_HUC_AUTH_VIA_GSC) &&
xe_gsc_proxy_init_done(&gt->uc.gsc);
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
-
return ready;
}
@@ -104,13 +101,12 @@ int xe_pxp_get_readiness_status(struct xe_pxp *pxp)
xe_uc_fw_status_to_error(pxp->gt->uc.gsc.fw.status))
return -EIO;
- xe_pm_runtime_get(pxp->xe);
+ guard(xe_pm_runtime)(pxp->xe);
/* PXP requires both HuC loaded and GSC proxy initialized */
if (pxp_prerequisites_done(pxp))
ret = 1;
- xe_pm_runtime_put(pxp->xe);
return ret;
}
@@ -135,35 +131,28 @@ static void pxp_invalidate_queues(struct xe_pxp *pxp);
static int pxp_terminate_hw(struct xe_pxp *pxp)
{
struct xe_gt *gt = pxp->gt;
- unsigned int fw_ref;
int ret = 0;
drm_dbg(&pxp->xe->drm, "Terminating PXP\n");
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_GT)) {
- ret = -EIO;
- goto out;
- }
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
+ if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FW_GT))
+ return -EIO;
/* terminate the hw session */
ret = xe_pxp_submit_session_termination(pxp, ARB_SESSION);
if (ret)
- goto out;
+ return ret;
ret = pxp_wait_for_session_state(pxp, ARB_SESSION, false);
if (ret)
- goto out;
+ return ret;
/* Trigger full HW cleanup */
xe_mmio_write32(&gt->mmio, KCR_GLOBAL_TERMINATE, 1);
/* now we can tell the GSC to clean up its own state */
- ret = xe_pxp_submit_session_invalidation(&pxp->gsc_res, ARB_SESSION);
-
-out:
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
- return ret;
+ return xe_pxp_submit_session_invalidation(&pxp->gsc_res, ARB_SESSION);
}
static void mark_termination_in_progress(struct xe_pxp *pxp)
@@ -326,14 +315,12 @@ static int kcr_pxp_set_status(const struct xe_pxp *pxp, bool enable)
{
u32 val = enable ? _MASKED_BIT_ENABLE(KCR_INIT_ALLOW_DISPLAY_ME_WRITES) :
_MASKED_BIT_DISABLE(KCR_INIT_ALLOW_DISPLAY_ME_WRITES);
- unsigned int fw_ref;
- fw_ref = xe_force_wake_get(gt_to_fw(pxp->gt), XE_FW_GT);
- if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_GT))
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(pxp->gt), XE_FW_GT);
+ if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FW_GT))
return -EIO;
xe_mmio_write32(&pxp->gt->mmio, KCR_INIT, val);
- xe_force_wake_put(gt_to_fw(pxp->gt), fw_ref);
return 0;
}
@@ -453,34 +440,28 @@ out:
static int __pxp_start_arb_session(struct xe_pxp *pxp)
{
int ret;
- unsigned int fw_ref;
- fw_ref = xe_force_wake_get(gt_to_fw(pxp->gt), XE_FW_GT);
- if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_GT))
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(pxp->gt), XE_FW_GT);
+ if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FW_GT))
return -EIO;
- if (pxp_session_is_in_play(pxp, ARB_SESSION)) {
- ret = -EEXIST;
- goto out_force_wake;
- }
+ if (pxp_session_is_in_play(pxp, ARB_SESSION))
+ return -EEXIST;
ret = xe_pxp_submit_session_init(&pxp->gsc_res, ARB_SESSION);
if (ret) {
drm_err(&pxp->xe->drm, "Failed to init PXP arb session: %pe\n", ERR_PTR(ret));
- goto out_force_wake;
+ return ret;
}
ret = pxp_wait_for_session_state(pxp, ARB_SESSION, true);
if (ret) {
drm_err(&pxp->xe->drm, "PXP ARB session failed to go in play%pe\n", ERR_PTR(ret));
- goto out_force_wake;
+ return ret;
}
drm_dbg(&pxp->xe->drm, "PXP ARB session is active\n");
-
-out_force_wake:
- xe_force_wake_put(gt_to_fw(pxp->gt), fw_ref);
- return ret;
+ return 0;
}
/**
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 1c0915e2cc16..75490683bad2 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -122,7 +122,6 @@ query_engine_cycles(struct xe_device *xe,
__ktime_func_t cpu_clock;
struct xe_hw_engine *hwe;
struct xe_gt *gt;
- unsigned int fw_ref;
if (IS_SRIOV_VF(xe))
return -EOPNOTSUPP;
@@ -158,16 +157,13 @@ query_engine_cycles(struct xe_device *xe,
if (!hwe)
return -EINVAL;
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
- return -EIO;
- }
-
- hwe_read_timestamp(hwe, &resp.engine_cycles, &resp.cpu_timestamp,
- &resp.cpu_delta, cpu_clock);
+ xe_with_force_wake(fw_ref, gt_to_fw(gt), XE_FORCEWAKE_ALL) {
+ if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL))
+ return -EIO;
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ hwe_read_timestamp(hwe, &resp.engine_cycles, &resp.cpu_timestamp,
+ &resp.cpu_delta, cpu_clock);
+ }
if (GRAPHICS_VER(xe) >= 20)
resp.width = 64;
@@ -342,6 +338,9 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query)
if (xe->info.has_usm && IS_ENABLED(CONFIG_DRM_XE_GPUSVM))
config->info[DRM_XE_QUERY_CONFIG_FLAGS] |=
DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR;
+ if (GRAPHICS_VER(xe) >= 20)
+ config->info[DRM_XE_QUERY_CONFIG_FLAGS] |=
+ DRM_XE_QUERY_CONFIG_FLAG_HAS_NO_COMPRESSION_HINT;
config->info[DRM_XE_QUERY_CONFIG_FLAGS] |=
DRM_XE_QUERY_CONFIG_FLAG_HAS_LOW_LATENCY;
config->info[DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT] =
@@ -686,7 +685,9 @@ static int query_oa_units(struct xe_device *xe,
du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS |
DRM_XE_OA_CAPS_OA_BUFFER_SIZE |
DRM_XE_OA_CAPS_WAIT_NUM_REPORTS |
- DRM_XE_OA_CAPS_OAM;
+ DRM_XE_OA_CAPS_OAM |
+ DRM_XE_OA_CAPS_OA_UNIT_GT_ID;
+ du->gt_id = u->gt->info.id;
j = 0;
for_each_hw_engine(hwe, gt, hwe_id) {
if (!xe_hw_engine_is_reserved(hwe) &&
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
index fc8447a838c4..1a465385f909 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.c
+++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -168,7 +168,6 @@ void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt)
{
struct xe_reg_sr_entry *entry;
unsigned long reg;
- unsigned int fw_ref;
if (xa_empty(&sr->xa))
return;
@@ -178,20 +177,14 @@ void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt)
xe_gt_dbg(gt, "Applying %s save-restore MMIOs\n", sr->name);
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
- goto err_force_wake;
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) {
+ xe_gt_err(gt, "Failed to apply, err=-ETIMEDOUT\n");
+ return;
+ }
xa_for_each(&sr->xa, reg, entry)
apply_one_mmio(gt, entry);
-
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
-
- return;
-
-err_force_wake:
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
- xe_gt_err(gt, "Failed to apply, err=-ETIMEDOUT\n");
}
/**
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
index 7ca360b2c20d..1391cb6ec9c6 100644
--- a/drivers/gpu/drm/xe/xe_reg_whitelist.c
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
@@ -9,6 +9,7 @@
#include "regs/xe_gt_regs.h"
#include "regs/xe_oa_regs.h"
#include "regs/xe_regs.h"
+#include "xe_device.h"
#include "xe_gt_types.h"
#include "xe_gt_printk.h"
#include "xe_platform_types.h"
@@ -26,6 +27,13 @@ static bool match_not_render(const struct xe_device *xe,
return hwe->class != XE_ENGINE_CLASS_RENDER;
}
+static bool match_has_mert(const struct xe_device *xe,
+ const struct xe_gt *gt,
+ const struct xe_hw_engine *hwe)
+{
+ return xe_device_has_mert((struct xe_device *)xe);
+}
+
static const struct xe_rtp_entry_sr register_whitelist[] = {
{ XE_RTP_NAME("WaAllowPMDepthAndInvocationCountAccessFromUMD, 1408556865"),
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
@@ -67,28 +75,6 @@ static const struct xe_rtp_entry_sr register_whitelist[] = {
ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(WHITELIST(CSBE_DEBUG_STATUS(RENDER_RING_BASE), 0))
},
- { XE_RTP_NAME("oa_reg_render"),
- XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED),
- ENGINE_CLASS(RENDER)),
- XE_RTP_ACTIONS(WHITELIST(OAG_MMIOTRIGGER,
- RING_FORCE_TO_NONPRIV_ACCESS_RW),
- WHITELIST(OAG_OASTATUS,
- RING_FORCE_TO_NONPRIV_ACCESS_RD),
- WHITELIST(OAG_OAHEADPTR,
- RING_FORCE_TO_NONPRIV_ACCESS_RD |
- RING_FORCE_TO_NONPRIV_RANGE_4))
- },
- { XE_RTP_NAME("oa_reg_compute"),
- XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED),
- ENGINE_CLASS(COMPUTE)),
- XE_RTP_ACTIONS(WHITELIST(OAG_MMIOTRIGGER,
- RING_FORCE_TO_NONPRIV_ACCESS_RW),
- WHITELIST(OAG_OASTATUS,
- RING_FORCE_TO_NONPRIV_ACCESS_RD),
- WHITELIST(OAG_OAHEADPTR,
- RING_FORCE_TO_NONPRIV_ACCESS_RD |
- RING_FORCE_TO_NONPRIV_RANGE_4))
- },
{ XE_RTP_NAME("14024997852"),
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(WHITELIST(FF_MODE,
@@ -96,6 +82,57 @@ static const struct xe_rtp_entry_sr register_whitelist[] = {
WHITELIST(VFLSKPD,
RING_FORCE_TO_NONPRIV_ACCESS_RW))
},
+
+#define WHITELIST_OA_MMIO_TRG(trg, status, head) \
+ WHITELIST(trg, RING_FORCE_TO_NONPRIV_ACCESS_RW), \
+ WHITELIST(status, RING_FORCE_TO_NONPRIV_ACCESS_RD), \
+ WHITELIST(head, RING_FORCE_TO_NONPRIV_ACCESS_RD | RING_FORCE_TO_NONPRIV_RANGE_4)
+
+#define WHITELIST_OAG_MMIO_TRG \
+ WHITELIST_OA_MMIO_TRG(OAG_MMIOTRIGGER, OAG_OASTATUS, OAG_OAHEADPTR)
+
+#define WHITELIST_OAM_MMIO_TRG \
+ WHITELIST_OA_MMIO_TRG(OAM_MMIO_TRG(XE_OAM_SAG_BASE_ADJ), \
+ OAM_STATUS(XE_OAM_SAG_BASE_ADJ), \
+ OAM_HEAD_POINTER(XE_OAM_SAG_BASE_ADJ)), \
+ WHITELIST_OA_MMIO_TRG(OAM_MMIO_TRG(XE_OAM_SCMI_0_BASE_ADJ), \
+ OAM_STATUS(XE_OAM_SCMI_0_BASE_ADJ), \
+ OAM_HEAD_POINTER(XE_OAM_SCMI_0_BASE_ADJ)), \
+ WHITELIST_OA_MMIO_TRG(OAM_MMIO_TRG(XE_OAM_SCMI_1_BASE_ADJ), \
+ OAM_STATUS(XE_OAM_SCMI_1_BASE_ADJ), \
+ OAM_HEAD_POINTER(XE_OAM_SCMI_1_BASE_ADJ))
+
+#define WHITELIST_OA_MERT_MMIO_TRG \
+ WHITELIST_OA_MMIO_TRG(OAMERT_MMIO_TRG, OAMERT_STATUS, OAMERT_HEAD_POINTER)
+
+ { XE_RTP_NAME("oag_mmio_trg_rcs"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED),
+ ENGINE_CLASS(RENDER)),
+ XE_RTP_ACTIONS(WHITELIST_OAG_MMIO_TRG)
+ },
+ { XE_RTP_NAME("oag_mmio_trg_ccs"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED),
+ ENGINE_CLASS(COMPUTE)),
+ XE_RTP_ACTIONS(WHITELIST_OAG_MMIO_TRG)
+ },
+ { XE_RTP_NAME("oam_mmio_trg_vcs"),
+ XE_RTP_RULES(MEDIA_VERSION_RANGE(1300, XE_RTP_END_VERSION_UNDEFINED),
+ ENGINE_CLASS(VIDEO_DECODE)),
+ XE_RTP_ACTIONS(WHITELIST_OAM_MMIO_TRG)
+ },
+ { XE_RTP_NAME("oam_mmio_trg_vecs"),
+ XE_RTP_RULES(MEDIA_VERSION_RANGE(1300, XE_RTP_END_VERSION_UNDEFINED),
+ ENGINE_CLASS(VIDEO_ENHANCE)),
+ XE_RTP_ACTIONS(WHITELIST_OAM_MMIO_TRG)
+ },
+ { XE_RTP_NAME("oa_mert_mmio_trg_ccs"),
+ XE_RTP_RULES(FUNC(match_has_mert), ENGINE_CLASS(COMPUTE)),
+ XE_RTP_ACTIONS(WHITELIST_OA_MERT_MMIO_TRG)
+ },
+ { XE_RTP_NAME("oa_mert_mmio_trg_bcs"),
+ XE_RTP_RULES(FUNC(match_has_mert), ENGINE_CLASS(COPY)),
+ XE_RTP_ACTIONS(WHITELIST_OA_MERT_MMIO_TRG)
+ },
};
static void whitelist_apply_to_hwe(struct xe_hw_engine *hwe)
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index ac0c6dcffe15..957b9e2fd138 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -12,7 +12,7 @@
#include "regs/xe_engine_regs.h"
#include "regs/xe_gt_regs.h"
#include "regs/xe_lrc_layout.h"
-#include "xe_exec_queue_types.h"
+#include "xe_exec_queue.h"
#include "xe_gt.h"
#include "xe_lrc.h"
#include "xe_macros.h"
@@ -135,12 +135,11 @@ emit_pipe_control(u32 *dw, int i, u32 bit_group_0, u32 bit_group_1, u32 offset,
return i;
}
-static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw,
- int i)
+static int emit_pipe_invalidate(struct xe_exec_queue *q, u32 mask_flags,
+ bool invalidate_tlb, u32 *dw, int i)
{
u32 flags0 = 0;
- u32 flags1 = PIPE_CONTROL_CS_STALL |
- PIPE_CONTROL_COMMAND_CACHE_INVALIDATE |
+ u32 flags1 = PIPE_CONTROL_COMMAND_CACHE_INVALIDATE |
PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE |
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
PIPE_CONTROL_VF_CACHE_INVALIDATE |
@@ -152,6 +151,11 @@ static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw,
if (invalidate_tlb)
flags1 |= PIPE_CONTROL_TLB_INVALIDATE;
+ if (xe_exec_queue_is_multi_queue(q))
+ flags0 |= PIPE_CONTROL0_QUEUE_DRAIN_MODE;
+ else
+ flags1 |= PIPE_CONTROL_CS_STALL;
+
flags1 &= ~mask_flags;
if (flags1 & PIPE_CONTROL_VF_CACHE_INVALIDATE)
@@ -175,54 +179,52 @@ static int emit_store_imm_ppgtt_posted(u64 addr, u64 value,
static int emit_render_cache_flush(struct xe_sched_job *job, u32 *dw, int i)
{
- struct xe_gt *gt = job->q->gt;
+ struct xe_exec_queue *q = job->q;
+ struct xe_gt *gt = q->gt;
bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK);
- u32 flags;
+ u32 flags0, flags1;
if (XE_GT_WA(gt, 14016712196))
i = emit_pipe_control(dw, i, 0, PIPE_CONTROL_DEPTH_CACHE_FLUSH,
LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR, 0);
- flags = (PIPE_CONTROL_CS_STALL |
- PIPE_CONTROL_TILE_CACHE_FLUSH |
+ flags0 = PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
+ flags1 = (PIPE_CONTROL_TILE_CACHE_FLUSH |
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_DC_FLUSH_ENABLE |
PIPE_CONTROL_FLUSH_ENABLE);
if (XE_GT_WA(gt, 1409600907))
- flags |= PIPE_CONTROL_DEPTH_STALL;
+ flags1 |= PIPE_CONTROL_DEPTH_STALL;
if (lacks_render)
- flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
+ flags1 &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
else if (job->q->class == XE_ENGINE_CLASS_COMPUTE)
- flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
+ flags1 &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
- return emit_pipe_control(dw, i, PIPE_CONTROL0_HDC_PIPELINE_FLUSH, flags, 0, 0);
-}
-
-static int emit_pipe_control_to_ring_end(struct xe_hw_engine *hwe, u32 *dw, int i)
-{
- if (hwe->class != XE_ENGINE_CLASS_RENDER)
- return i;
+ if (xe_exec_queue_is_multi_queue(q))
+ flags0 |= PIPE_CONTROL0_QUEUE_DRAIN_MODE;
+ else
+ flags1 |= PIPE_CONTROL_CS_STALL;
- if (XE_GT_WA(hwe->gt, 16020292621))
- i = emit_pipe_control(dw, i, 0, PIPE_CONTROL_LRI_POST_SYNC,
- RING_NOPID(hwe->mmio_base).addr, 0);
-
- return i;
+ return emit_pipe_control(dw, i, flags0, flags1, 0, 0);
}
-static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw,
- int i)
+static int emit_pipe_imm_ggtt(struct xe_exec_queue *q, u32 addr, u32 value,
+ bool stall_only, u32 *dw, int i)
{
- u32 flags = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_GLOBAL_GTT_IVB |
- PIPE_CONTROL_QW_WRITE;
+ u32 flags0 = 0, flags1 = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_QW_WRITE;
if (!stall_only)
- flags |= PIPE_CONTROL_FLUSH_ENABLE;
+ flags1 |= PIPE_CONTROL_FLUSH_ENABLE;
- return emit_pipe_control(dw, i, 0, flags, addr, value);
+ if (xe_exec_queue_is_multi_queue(q))
+ flags0 |= PIPE_CONTROL0_QUEUE_DRAIN_MODE;
+ else
+ flags1 |= PIPE_CONTROL_CS_STALL;
+
+ return emit_pipe_control(dw, i, flags0, flags1, addr, value);
}
static u32 get_ppgtt_flag(struct xe_sched_job *job)
@@ -371,7 +373,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS;
/* See __xe_pt_bind_vma() for a discussion on TLB invalidations. */
- i = emit_pipe_invalidate(mask_flags, job->ring_ops_flush_tlb, dw, i);
+ i = emit_pipe_invalidate(job->q, mask_flags, job->ring_ops_flush_tlb, dw, i);
/* hsdes: 1809175790 */
if (has_aux_ccs(xe))
@@ -391,12 +393,10 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
job->user_fence.value,
dw, i);
- i = emit_pipe_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, lacks_render, dw, i);
+ i = emit_pipe_imm_ggtt(job->q, xe_lrc_seqno_ggtt_addr(lrc), seqno, lacks_render, dw, i);
i = emit_user_interrupt(dw, i);
- i = emit_pipe_control_to_ring_end(job->q->hwe, dw, i);
-
xe_gt_assert(gt, i <= MAX_JOB_SIZE_DW);
xe_lrc_write_ring(lrc, dw, i * sizeof(*dw));
diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c
index 63a5263dcf1b..a87c1436c7c1 100644
--- a/drivers/gpu/drm/xe/xe_sa.c
+++ b/drivers/gpu/drm/xe/xe_sa.c
@@ -29,6 +29,7 @@ static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg)
kvfree(sa_manager->cpu_ptr);
sa_manager->bo = NULL;
+ sa_manager->shadow = NULL;
}
/**
@@ -37,12 +38,14 @@ static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg)
* @size: number of bytes to allocate
* @guard: number of bytes to exclude from suballocations
* @align: alignment for each suballocated chunk
+ * @flags: flags for suballocator
*
* Prepares the suballocation manager for suballocations.
*
* Return: a pointer to the &xe_sa_manager or an ERR_PTR on failure.
*/
-struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 guard, u32 align)
+struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size,
+ u32 guard, u32 align, u32 flags)
{
struct xe_device *xe = tile_to_xe(tile);
struct xe_sa_manager *sa_manager;
@@ -79,6 +82,26 @@ struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u3
memset(sa_manager->cpu_ptr, 0, bo->ttm.base.size);
}
+ if (flags & XE_SA_BO_MANAGER_FLAG_SHADOW) {
+ struct xe_bo *shadow;
+
+ ret = drmm_mutex_init(&xe->drm, &sa_manager->swap_guard);
+ if (ret)
+ return ERR_PTR(ret);
+
+ shadow = xe_managed_bo_create_pin_map(xe, tile, size,
+ XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE |
+ XE_BO_FLAG_PINNED_NORESTORE);
+ if (IS_ERR(shadow)) {
+ drm_err(&xe->drm, "Failed to prepare %uKiB BO for SA manager (%pe)\n",
+ size / SZ_1K, shadow);
+ return ERR_CAST(shadow);
+ }
+ sa_manager->shadow = shadow;
+ }
+
drm_suballoc_manager_init(&sa_manager->base, managed_size, align);
ret = drmm_add_action_or_reset(&xe->drm, xe_sa_bo_manager_fini,
sa_manager);
@@ -89,6 +112,48 @@ struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u3
}
/**
+ * xe_sa_bo_swap_shadow() - Swap the SA BO with shadow BO.
+ * @sa_manager: the XE sub allocator manager
+ *
+ * Swaps the sub-allocator primary buffer object with shadow buffer object.
+ *
+ * Return: None.
+ */
+void xe_sa_bo_swap_shadow(struct xe_sa_manager *sa_manager)
+{
+ struct xe_device *xe = tile_to_xe(sa_manager->bo->tile);
+
+ xe_assert(xe, sa_manager->shadow);
+ lockdep_assert_held(&sa_manager->swap_guard);
+
+ swap(sa_manager->bo, sa_manager->shadow);
+ if (!sa_manager->bo->vmap.is_iomem)
+ sa_manager->cpu_ptr = sa_manager->bo->vmap.vaddr;
+}
+
+/**
+ * xe_sa_bo_sync_shadow() - Sync the SA Shadow BO with primary BO.
+ * @sa_bo: the sub-allocator buffer object.
+ *
+ * Synchronize sub-allocator shadow buffer object with primary buffer object.
+ *
+ * Return: None.
+ */
+void xe_sa_bo_sync_shadow(struct drm_suballoc *sa_bo)
+{
+ struct xe_sa_manager *sa_manager = to_xe_sa_manager(sa_bo->manager);
+ struct xe_device *xe = tile_to_xe(sa_manager->bo->tile);
+
+ xe_assert(xe, sa_manager->shadow);
+ lockdep_assert_held(&sa_manager->swap_guard);
+
+ xe_map_memcpy_to(xe, &sa_manager->shadow->vmap,
+ drm_suballoc_soffset(sa_bo),
+ xe_sa_bo_cpu_addr(sa_bo),
+ drm_suballoc_size(sa_bo));
+}
+
+/**
* __xe_sa_bo_new() - Make a suballocation but use custom gfp flags.
* @sa_manager: the &xe_sa_manager
* @size: number of bytes we want to suballocate
diff --git a/drivers/gpu/drm/xe/xe_sa.h b/drivers/gpu/drm/xe/xe_sa.h
index 1be744350836..05e9a4e00e78 100644
--- a/drivers/gpu/drm/xe/xe_sa.h
+++ b/drivers/gpu/drm/xe/xe_sa.h
@@ -14,12 +14,14 @@
struct dma_fence;
struct xe_tile;
-struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 guard, u32 align);
+#define XE_SA_BO_MANAGER_FLAG_SHADOW BIT(0)
+struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size,
+ u32 guard, u32 align, u32 flags);
struct drm_suballoc *__xe_sa_bo_new(struct xe_sa_manager *sa_manager, u32 size, gfp_t gfp);
static inline struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 align)
{
- return __xe_sa_bo_manager_init(tile, size, SZ_4K, align);
+ return __xe_sa_bo_manager_init(tile, size, SZ_4K, align, 0);
}
/**
@@ -69,4 +71,18 @@ static inline void *xe_sa_bo_cpu_addr(struct drm_suballoc *sa)
drm_suballoc_soffset(sa);
}
+void xe_sa_bo_swap_shadow(struct xe_sa_manager *sa_manager);
+void xe_sa_bo_sync_shadow(struct drm_suballoc *sa_bo);
+
+/**
+ * xe_sa_bo_swap_guard() - Retrieve the SA BO swap guard within sub-allocator.
+ * @sa_manager: the &xe_sa_manager
+ *
+ * Return: Sub alloctor swap guard mutex.
+ */
+static inline struct mutex *xe_sa_bo_swap_guard(struct xe_sa_manager *sa_manager)
+{
+ return &sa_manager->swap_guard;
+}
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_sa_types.h b/drivers/gpu/drm/xe/xe_sa_types.h
index cb7238799dcb..1085c9c37d6b 100644
--- a/drivers/gpu/drm/xe/xe_sa_types.h
+++ b/drivers/gpu/drm/xe/xe_sa_types.h
@@ -12,6 +12,9 @@ struct xe_bo;
struct xe_sa_manager {
struct drm_suballoc_manager base;
struct xe_bo *bo;
+ struct xe_bo *shadow;
+ /** @swap_guard: Timeline guard updating @bo and @shadow */
+ struct mutex swap_guard;
void *cpu_ptr;
bool is_iomem;
};
diff --git a/drivers/gpu/drm/xe/xe_sriov_packet.c b/drivers/gpu/drm/xe/xe_sriov_packet.c
index bab994696896..2cefefaed9ba 100644
--- a/drivers/gpu/drm/xe/xe_sriov_packet.c
+++ b/drivers/gpu/drm/xe/xe_sriov_packet.c
@@ -358,7 +358,7 @@ ssize_t xe_sriov_packet_write_single(struct xe_device *xe, unsigned int vfid,
#define MIGRATION_DESCRIPTOR_DWORDS (GUC_KLV_LEN_MIN + MIGRATION_KLV_DEVICE_DEVID_LEN + \
GUC_KLV_LEN_MIN + MIGRATION_KLV_DEVICE_REVID_LEN)
-static size_t pf_descriptor_init(struct xe_device *xe, unsigned int vfid)
+static int pf_descriptor_init(struct xe_device *xe, unsigned int vfid)
{
struct xe_sriov_packet **desc = pf_pick_descriptor(xe, vfid);
struct xe_sriov_packet *data;
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.c b/drivers/gpu/drm/xe/xe_sriov_pf.c
index 7c779d63179f..72423bb17e6f 100644
--- a/drivers/gpu/drm/xe/xe_sriov_pf.c
+++ b/drivers/gpu/drm/xe/xe_sriov_pf.c
@@ -90,6 +90,7 @@ bool xe_sriov_pf_readiness(struct xe_device *xe)
*/
int xe_sriov_pf_init_early(struct xe_device *xe)
{
+ struct xe_mert *mert = &xe_device_get_root_tile(xe)->mert;
int err;
xe_assert(xe, IS_SRIOV_PF(xe));
@@ -111,6 +112,9 @@ int xe_sriov_pf_init_early(struct xe_device *xe)
xe_sriov_pf_service_init(xe);
+ spin_lock_init(&mert->lock);
+ init_completion(&mert->tlb_inv_done);
+
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c
index bad751217e1e..e84bdde9bc80 100644
--- a/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c
@@ -70,9 +70,8 @@ static ssize_t from_file_write_to_xe_call(struct file *file, const char __user *
if (ret < 0)
return ret;
if (yes) {
- xe_pm_runtime_get(xe);
+ guard(xe_pm_runtime)(xe);
ret = call(xe);
- xe_pm_runtime_put(xe);
}
if (ret < 0)
return ret;
@@ -209,9 +208,8 @@ static ssize_t from_file_write_to_vf_call(struct file *file, const char __user *
if (ret < 0)
return ret;
if (yes) {
- xe_pm_runtime_get(xe);
+ guard(xe_pm_runtime)(xe);
ret = call(xe, vfid);
- xe_pm_runtime_put(xe);
}
if (ret < 0)
return ret;
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c b/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c
index c0b767ac735c..3d140506ba36 100644
--- a/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c
@@ -389,16 +389,12 @@ static ssize_t xe_sriov_dev_attr_store(struct kobject *kobj, struct attribute *a
struct xe_sriov_dev_attr *vattr = to_xe_sriov_dev_attr(attr);
struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj);
struct xe_device *xe = vkobj->xe;
- ssize_t ret;
if (!vattr->store)
return -EPERM;
- xe_pm_runtime_get(xe);
- ret = xe_sriov_pf_wait_ready(xe) ?: vattr->store(xe, buf, count);
- xe_pm_runtime_put(xe);
-
- return ret;
+ guard(xe_pm_runtime)(xe);
+ return xe_sriov_pf_wait_ready(xe) ?: vattr->store(xe, buf, count);
}
static ssize_t xe_sriov_vf_attr_show(struct kobject *kobj, struct attribute *attr, char *buf)
@@ -423,18 +419,14 @@ static ssize_t xe_sriov_vf_attr_store(struct kobject *kobj, struct attribute *at
struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj);
struct xe_device *xe = vkobj->xe;
unsigned int vfid = vkobj->vfid;
- ssize_t ret;
xe_sriov_pf_assert_vfid(xe, vfid);
if (!vattr->store)
return -EPERM;
- xe_pm_runtime_get(xe);
- ret = xe_sriov_pf_wait_ready(xe) ?: vattr->store(xe, vfid, buf, count);
- xe_pm_runtime_get(xe);
-
- return ret;
+ guard(xe_pm_runtime)(xe);
+ return xe_sriov_pf_wait_ready(xe) ?: vattr->store(xe, vfid, buf, count);
}
static const struct sysfs_ops xe_sriov_dev_sysfs_ops = {
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c
index 284ce37ca92d..1b75405b8d02 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_sriov_vf.c
@@ -49,11 +49,13 @@
*
* As soon as Virtual GPU of the VM starts, the VF driver within receives
* the MIGRATED interrupt and schedules post-migration recovery worker.
- * That worker queries GuC for new provisioning (using MMIO communication),
+ * That worker sends `VF2GUC_RESFIX_START` action along with non-zero
+ * marker, queries GuC for new provisioning (using MMIO communication),
* and applies fixups to any non-virtualized resources used by the VF.
*
* When the VF driver is ready to continue operation on the newly connected
- * hardware, it sends `VF2GUC_NOTIFY_RESFIX_DONE` which causes it to
+ * hardware, it sends `VF2GUC_RESFIX_DONE` action along with the same
+ * marker which was sent with `VF2GUC_RESFIX_START` which causes it to
* enter the long awaited `VF_RUNNING` state, and therefore start handling
* CTB messages and scheduling workloads from the VF::
*
@@ -102,12 +104,17 @@
* | [ ] new VF provisioning [ ]
* | [ ]---------------------------> [ ]
* | | [ ]
+ * | | VF2GUC_RESFIX_START [ ]
+ * | [ ] <---------------------------[ ]
+ * | [ ] [ ]
+ * | [ ] success [ ]
+ * | [ ]---------------------------> [ ]
* | | VF driver applies post [ ]
* | | migration fixups -------[ ]
* | | | [ ]
* | | -----> [ ]
* | | [ ]
- * | | VF2GUC_NOTIFY_RESFIX_DONE [ ]
+ * | | VF2GUC_RESFIX_DONE [ ]
* | [ ] <---------------------------[ ]
* | [ ] [ ]
* | [ ] GuC sets new VF state to [ ]
@@ -118,6 +125,55 @@
* | [ ]---------------------------> [ ]
* | | |
* | | |
+ *
+ * Handling of VF double migration flow is shown below::
+ *
+ * GuC1 VF
+ * | |
+ * | [ ]<--- start fixups
+ * | VF2GUC_RESFIX_START(marker) [ ]
+ * [ ] <-------------------------------------------[ ]
+ * [ ] [ ]
+ * [ ]---\ [ ]
+ * [ ] store marker [ ]
+ * [ ]<--/ [ ]
+ * [ ] [ ]
+ * [ ] success [ ]
+ * [ ] ------------------------------------------> [ ]
+ * | [ ]
+ * | [ ]---\
+ * | [ ] do fixups
+ * | [ ]<--/
+ * | [ ]
+ * -------------- VF paused / saved ----------------
+ * :
+ *
+ * GuC2
+ * |
+ * ----------------- VF restored ------------------
+ * |
+ * [ ]
+ * [ ]---\
+ * [ ] reset marker
+ * [ ]<--/
+ * [ ]
+ * ----------------- VF resumed ------------------
+ * | [ ]
+ * | [ ]
+ * | VF2GUC_RESFIX_DONE(marker) [ ]
+ * [ ] <-------------------------------------------[ ]
+ * [ ] [ ]
+ * [ ]---\ [ ]
+ * [ ] check marker [ ]
+ * [ ] (mismatch) [ ]
+ * [ ]<--/ [ ]
+ * [ ] [ ]
+ * [ ] RESPONSE_VF_MIGRATED [ ]
+ * [ ] ------------------------------------------> [ ]
+ * | [ ]---\
+ * | [ ] reschedule fixups
+ * | [ ]<--/
+ * | |
*/
/**
@@ -170,6 +226,26 @@ void xe_sriov_vf_init_early(struct xe_device *xe)
vf_migration_init_early(xe);
}
+static int vf_migration_init_late(struct xe_device *xe)
+{
+ struct xe_gt *gt = xe_root_mmio_gt(xe);
+ struct xe_uc_fw_version guc_version;
+
+ if (!xe_sriov_vf_migration_supported(xe))
+ return 0;
+
+ xe_gt_sriov_vf_guc_versions(gt, NULL, &guc_version);
+ if (MAKE_GUC_VER_STRUCT(guc_version) < MAKE_GUC_VER(1, 27, 0)) {
+ xe_sriov_vf_migration_disable(xe,
+ "requires GuC ABI >= 1.27.0, but only %u.%u.%u found",
+ guc_version.major, guc_version.minor,
+ guc_version.patch);
+ return 0;
+ }
+
+ return xe_sriov_vf_ccs_init(xe);
+}
+
/**
* xe_sriov_vf_init_late() - SR-IOV VF late initialization functions.
* @xe: the &xe_device to initialize
@@ -180,7 +256,7 @@ void xe_sriov_vf_init_early(struct xe_device *xe)
*/
int xe_sriov_vf_init_late(struct xe_device *xe)
{
- return xe_sriov_vf_ccs_init(xe);
+ return vf_migration_init_late(xe);
}
static int sa_info_vf_ccs(struct seq_file *m, void *data)
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
index 797a4b866226..052a5071e69f 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
+++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
@@ -150,7 +150,8 @@ static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx)
xe_sriov_info(xe, "Allocating %s CCS BB pool size = %lldMB\n",
ctx->ctx_id ? "Restore" : "Save", bb_pool_size / SZ_1M);
- sa_manager = xe_sa_bo_manager_init(tile, bb_pool_size, SZ_16);
+ sa_manager = __xe_sa_bo_manager_init(tile, bb_pool_size, SZ_4K, SZ_16,
+ XE_SA_BO_MANAGER_FLAG_SHADOW);
if (IS_ERR(sa_manager)) {
xe_sriov_err(xe, "Suballocator init failed with error: %pe\n",
@@ -162,9 +163,12 @@ static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx)
offset = 0;
xe_map_memset(xe, &sa_manager->bo->vmap, offset, MI_NOOP,
bb_pool_size);
+ xe_map_memset(xe, &sa_manager->shadow->vmap, offset, MI_NOOP,
+ bb_pool_size);
offset = bb_pool_size - sizeof(u32);
xe_map_wr(xe, &sa_manager->bo->vmap, offset, u32, MI_BATCH_BUFFER_END);
+ xe_map_wr(xe, &sa_manager->shadow->vmap, offset, u32, MI_BATCH_BUFFER_END);
ctx->mem.ccs_bb_pool = sa_manager;
@@ -381,6 +385,18 @@ err_ret:
return err;
}
+#define XE_SRIOV_VF_CCS_RW_BB_ADDR_OFFSET (2 * sizeof(u32))
+void xe_sriov_vf_ccs_rw_update_bb_addr(struct xe_sriov_vf_ccs_ctx *ctx)
+{
+ u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool);
+ struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q);
+ struct xe_device *xe = gt_to_xe(ctx->mig_q->gt);
+
+ xe_device_wmb(xe);
+ xe_map_wr(xe, &lrc->bo->vmap, XE_SRIOV_VF_CCS_RW_BB_ADDR_OFFSET, u32, addr);
+ xe_device_wmb(xe);
+}
+
/**
* xe_sriov_vf_ccs_attach_bo - Insert CCS read write commands in the BO.
* @bo: the &buffer object to which batch buffer commands will be added.
@@ -441,9 +457,7 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo)
if (!bb)
continue;
- memset(bb->cs, MI_NOOP, bb->len * sizeof(u32));
- xe_bb_free(bb, NULL);
- bo->bb_ccs[ctx_id] = NULL;
+ xe_migrate_ccs_rw_copy_clear(bo, ctx_id);
}
return 0;
}
@@ -463,8 +477,7 @@ void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p)
if (!IS_VF_CCS_READY(xe))
return;
- xe_pm_runtime_get(xe);
-
+ guard(xe_pm_runtime)(xe);
for_each_ccs_rw_ctx(ctx_id) {
bb_pool = xe->sriov.vf.ccs.contexts[ctx_id].mem.ccs_bb_pool;
if (!bb_pool)
@@ -475,6 +488,4 @@ void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p)
drm_suballoc_dump_debug_info(&bb_pool->base, p, xe_sa_manager_gpu_addr(bb_pool));
drm_puts(p, "\n");
}
-
- xe_pm_runtime_put(xe);
}
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h
index f8ca6efce9ee..00e58b36c510 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h
+++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h
@@ -20,6 +20,7 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo);
int xe_sriov_vf_ccs_register_context(struct xe_device *xe);
void xe_sriov_vf_ccs_rebase(struct xe_device *xe);
void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p);
+void xe_sriov_vf_ccs_rw_update_bb_addr(struct xe_sriov_vf_ccs_ctx *ctx);
static inline bool xe_sriov_vf_ccs_ready(struct xe_device *xe)
{
diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.c b/drivers/gpu/drm/xe/xe_survivability_mode.c
index 1662bfddd4bc..4c716182ad3b 100644
--- a/drivers/gpu/drm/xe/xe_survivability_mode.c
+++ b/drivers/gpu/drm/xe/xe_survivability_mode.c
@@ -16,11 +16,10 @@
#include "xe_heci_gsc.h"
#include "xe_i2c.h"
#include "xe_mmio.h"
+#include "xe_nvm.h"
#include "xe_pcode_api.h"
#include "xe_vsec.h"
-#define MAX_SCRATCH_MMIO 8
-
/**
* DOC: Survivability Mode
*
@@ -48,19 +47,43 @@
*
* Refer :ref:`xe_configfs` for more details on how to use configfs
*
- * Survivability mode is indicated by the below admin-only readable sysfs which provides additional
- * debug information::
+ * Survivability mode is indicated by the below admin-only readable sysfs entry. It
+ * provides information about the type of survivability mode (Boot/Runtime).
+ *
+ * .. code-block:: shell
+ *
+ * # cat /sys/bus/pci/devices/<device>/survivability_mode
+ * Boot
+ *
+ *
+ * Any additional debug information if present will be visible under the directory
+ * ``survivability_info``::
+ *
+ * /sys/bus/pci/devices/<device>/survivability_info/
+ * ├── aux_info0
+ * ├── aux_info1
+ * ├── aux_info2
+ * ├── aux_info3
+ * ├── aux_info4
+ * ├── capability_info
+ * ├── fdo_mode
+ * ├── postcode_trace
+ * └── postcode_trace_overflow
+ *
+ * This directory has the following attributes
+ *
+ * - ``capability_info`` : Indicates Boot status and support for additional information
*
- * /sys/bus/pci/devices/<device>/survivability_mode
+ * - ``postcode_trace``, ``postcode_trace_overflow`` : Each postcode is a 8bit value and
+ * represents a boot failure event. When a new failure event is logged by PCODE the
+ * existing postcodes are shifted left. These entries provide a history of 8 postcodes.
*
- * Capability Information:
- * Provides boot status
- * Postcode Information:
- * Provides information about the failure
- * Overflow Information
- * Provides history of previous failures
- * Auxiliary Information
- * Certain failures may have information in addition to postcode information
+ * - ``aux_info<n>`` : Some failures have additional debug information
+ *
+ * - ``fdo_mode`` : To allow recovery in scenarios where MEI itself fails, a new SPI Flash
+ * Descriptor Override (FDO) mode is added in v2 survivability breadcrumbs. This mode is enabled
+ * by PCODE and provides the ability to directly update the firmware via SPI Driver without
+ * any dependency on MEI. Xe KMD initializes the nvm aux driver if FDO mode is enabled.
*
* Runtime Survivability
* =====================
@@ -68,61 +91,77 @@
* Certain runtime firmware errors can cause the device to enter a wedged state
* (:ref:`xe-device-wedging`) requiring a firmware flash to restore normal operation.
* Runtime Survivability Mode indicates that a firmware flash is necessary to recover the device and
- * is indicated by the presence of survivability mode sysfs::
+ * is indicated by the presence of survivability mode sysfs.
+ * Survivability mode sysfs provides information about the type of survivability mode.
*
- * /sys/bus/pci/devices/<device>/survivability_mode
+ * .. code-block:: shell
*
- * Survivability mode sysfs provides information about the type of survivability mode.
+ * # cat /sys/bus/pci/devices/<device>/survivability_mode
+ * Runtime
*
* When such errors occur, userspace is notified with the drm device wedged uevent and runtime
* survivability mode. User can then initiate a firmware flash using userspace tools like fwupd
* to restore device to normal operation.
*/
-static u32 aux_history_offset(u32 reg_value)
+static const char * const reg_map[] = {
+ [CAPABILITY_INFO] = "Capability Info",
+ [POSTCODE_TRACE] = "Postcode trace",
+ [POSTCODE_TRACE_OVERFLOW] = "Postcode trace overflow",
+ [AUX_INFO0] = "Auxiliary Info 0",
+ [AUX_INFO1] = "Auxiliary Info 1",
+ [AUX_INFO2] = "Auxiliary Info 2",
+ [AUX_INFO3] = "Auxiliary Info 3",
+ [AUX_INFO4] = "Auxiliary Info 4",
+};
+
+#define FDO_INFO (MAX_SCRATCH_REG + 1)
+
+struct xe_survivability_attribute {
+ struct device_attribute attr;
+ u8 index;
+};
+
+static struct
+xe_survivability_attribute *dev_attr_to_survivability_attr(struct device_attribute *attr)
{
- return REG_FIELD_GET(AUXINFO_HISTORY_OFFSET, reg_value);
+ return container_of(attr, struct xe_survivability_attribute, attr);
}
-static void set_survivability_info(struct xe_mmio *mmio, struct xe_survivability_info *info,
- int id, char *name)
+static void set_survivability_info(struct xe_mmio *mmio, u32 *info, int id)
{
- strscpy(info[id].name, name, sizeof(info[id].name));
- info[id].reg = PCODE_SCRATCH(id).raw;
- info[id].value = xe_mmio_read32(mmio, PCODE_SCRATCH(id));
+ info[id] = xe_mmio_read32(mmio, PCODE_SCRATCH(id));
}
static void populate_survivability_info(struct xe_device *xe)
{
struct xe_survivability *survivability = &xe->survivability;
- struct xe_survivability_info *info = survivability->info;
+ u32 *info = survivability->info;
struct xe_mmio *mmio;
u32 id = 0, reg_value;
- char name[NAME_MAX];
- int index;
mmio = xe_root_tile_mmio(xe);
- set_survivability_info(mmio, info, id, "Capability Info");
- reg_value = info[id].value;
+ set_survivability_info(mmio, info, CAPABILITY_INFO);
+ reg_value = info[CAPABILITY_INFO];
+
+ survivability->version = REG_FIELD_GET(BREADCRUMB_VERSION, reg_value);
+ /* FDO mode is exposed only from version 2 */
+ if (survivability->version >= 2)
+ survivability->fdo_mode = REG_FIELD_GET(FDO_MODE, reg_value);
if (reg_value & HISTORY_TRACKING) {
- id++;
- set_survivability_info(mmio, info, id, "Postcode Info");
+ set_survivability_info(mmio, info, POSTCODE_TRACE);
- if (reg_value & OVERFLOW_SUPPORT) {
- id = REG_FIELD_GET(OVERFLOW_REG_OFFSET, reg_value);
- set_survivability_info(mmio, info, id, "Overflow Info");
- }
+ if (reg_value & OVERFLOW_SUPPORT)
+ set_survivability_info(mmio, info, POSTCODE_TRACE_OVERFLOW);
}
+ /* Traverse the linked list of aux info registers */
if (reg_value & AUXINFO_SUPPORT) {
- id = REG_FIELD_GET(AUXINFO_REG_OFFSET, reg_value);
-
- for (index = 0; id && reg_value; index++, reg_value = info[id].value,
- id = aux_history_offset(reg_value)) {
- snprintf(name, NAME_MAX, "Auxiliary Info %d", index);
- set_survivability_info(mmio, info, id, name);
- }
+ for (id = REG_FIELD_GET(AUXINFO_REG_OFFSET, reg_value);
+ id >= AUX_INFO0 && id < MAX_SCRATCH_REG;
+ id = REG_FIELD_GET(AUXINFO_HISTORY_OFFSET, info[id]))
+ set_survivability_info(mmio, info, id);
}
}
@@ -130,15 +169,14 @@ static void log_survivability_info(struct pci_dev *pdev)
{
struct xe_device *xe = pdev_to_xe_device(pdev);
struct xe_survivability *survivability = &xe->survivability;
- struct xe_survivability_info *info = survivability->info;
+ u32 *info = survivability->info;
int id;
dev_info(&pdev->dev, "Survivability Boot Status : Critical Failure (%d)\n",
survivability->boot_status);
- for (id = 0; id < MAX_SCRATCH_MMIO; id++) {
- if (info[id].reg)
- dev_info(&pdev->dev, "%s: 0x%x - 0x%x\n", info[id].name,
- info[id].reg, info[id].value);
+ for (id = 0; id < MAX_SCRATCH_REG; id++) {
+ if (info[id])
+ dev_info(&pdev->dev, "%s: 0x%x\n", reg_map[id], info[id]);
}
}
@@ -156,43 +194,103 @@ static ssize_t survivability_mode_show(struct device *dev,
struct pci_dev *pdev = to_pci_dev(dev);
struct xe_device *xe = pdev_to_xe_device(pdev);
struct xe_survivability *survivability = &xe->survivability;
- struct xe_survivability_info *info = survivability->info;
- int index = 0, count = 0;
- count += sysfs_emit_at(buff, count, "Survivability mode type: %s\n",
- survivability->type ? "Runtime" : "Boot");
+ return sysfs_emit(buff, "%s\n", survivability->type ? "Runtime" : "Boot");
+}
+
+static DEVICE_ATTR_ADMIN_RO(survivability_mode);
- if (!check_boot_failure(xe))
- return count;
+static ssize_t survivability_info_show(struct device *dev,
+ struct device_attribute *attr, char *buff)
+{
+ struct xe_survivability_attribute *sa = dev_attr_to_survivability_attr(attr);
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct xe_device *xe = pdev_to_xe_device(pdev);
+ struct xe_survivability *survivability = &xe->survivability;
+ u32 *info = survivability->info;
- for (index = 0; index < MAX_SCRATCH_MMIO; index++) {
- if (info[index].reg)
- count += sysfs_emit_at(buff, count, "%s: 0x%x - 0x%x\n", info[index].name,
- info[index].reg, info[index].value);
- }
+ if (sa->index == FDO_INFO)
+ return sysfs_emit(buff, "%s\n", str_enabled_disabled(survivability->fdo_mode));
- return count;
+ return sysfs_emit(buff, "0x%x\n", info[sa->index]);
}
-static DEVICE_ATTR_ADMIN_RO(survivability_mode);
+#define SURVIVABILITY_ATTR_RO(name, _index) \
+ struct xe_survivability_attribute attr_##name = { \
+ .attr = __ATTR(name, 0400, survivability_info_show, NULL), \
+ .index = _index, \
+ }
+
+static SURVIVABILITY_ATTR_RO(capability_info, CAPABILITY_INFO);
+static SURVIVABILITY_ATTR_RO(postcode_trace, POSTCODE_TRACE);
+static SURVIVABILITY_ATTR_RO(postcode_trace_overflow, POSTCODE_TRACE_OVERFLOW);
+static SURVIVABILITY_ATTR_RO(aux_info0, AUX_INFO0);
+static SURVIVABILITY_ATTR_RO(aux_info1, AUX_INFO1);
+static SURVIVABILITY_ATTR_RO(aux_info2, AUX_INFO2);
+static SURVIVABILITY_ATTR_RO(aux_info3, AUX_INFO3);
+static SURVIVABILITY_ATTR_RO(aux_info4, AUX_INFO4);
+static SURVIVABILITY_ATTR_RO(fdo_mode, FDO_INFO);
static void xe_survivability_mode_fini(void *arg)
{
struct xe_device *xe = arg;
+ struct xe_survivability *survivability = &xe->survivability;
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
struct device *dev = &pdev->dev;
- sysfs_remove_file(&dev->kobj, &dev_attr_survivability_mode.attr);
+ if (survivability->fdo_mode)
+ xe_nvm_fini(xe);
+
+ device_remove_file(dev, &dev_attr_survivability_mode);
+}
+
+static umode_t survivability_info_attrs_visible(struct kobject *kobj, struct attribute *attr,
+ int idx)
+{
+ struct xe_device *xe = kdev_to_xe_device(kobj_to_dev(kobj));
+ struct xe_survivability *survivability = &xe->survivability;
+ u32 *info = survivability->info;
+
+ /*
+ * Last index in survivability_info_attrs is fdo mode and is applicable only in
+ * version 2 of survivability mode
+ */
+ if (idx == MAX_SCRATCH_REG && survivability->version >= 2)
+ return 0400;
+
+ if (idx < MAX_SCRATCH_REG && info[idx])
+ return 0400;
+
+ return 0;
}
+/* Attributes are ordered according to enum scratch_reg */
+static struct attribute *survivability_info_attrs[] = {
+ &attr_capability_info.attr.attr,
+ &attr_postcode_trace.attr.attr,
+ &attr_postcode_trace_overflow.attr.attr,
+ &attr_aux_info0.attr.attr,
+ &attr_aux_info1.attr.attr,
+ &attr_aux_info2.attr.attr,
+ &attr_aux_info3.attr.attr,
+ &attr_aux_info4.attr.attr,
+ &attr_fdo_mode.attr.attr,
+ NULL,
+};
+
+static const struct attribute_group survivability_info_group = {
+ .name = "survivability_info",
+ .attrs = survivability_info_attrs,
+ .is_visible = survivability_info_attrs_visible,
+};
+
static int create_survivability_sysfs(struct pci_dev *pdev)
{
struct device *dev = &pdev->dev;
struct xe_device *xe = pdev_to_xe_device(pdev);
int ret;
- /* create survivability mode sysfs */
- ret = sysfs_create_file(&dev->kobj, &dev_attr_survivability_mode.attr);
+ ret = device_create_file(dev, &dev_attr_survivability_mode);
if (ret) {
dev_warn(dev, "Failed to create survivability sysfs files\n");
return ret;
@@ -203,6 +301,12 @@ static int create_survivability_sysfs(struct pci_dev *pdev)
if (ret)
return ret;
+ if (check_boot_failure(xe)) {
+ ret = devm_device_add_group(dev, &survivability_info_group);
+ if (ret)
+ return ret;
+ }
+
return 0;
}
@@ -220,12 +324,16 @@ static int enable_boot_survivability_mode(struct pci_dev *pdev)
/* Make sure xe_heci_gsc_init() knows about survivability mode */
survivability->mode = true;
- ret = xe_heci_gsc_init(xe);
- if (ret)
- goto err;
+ xe_heci_gsc_init(xe);
xe_vsec_init(xe);
+ if (survivability->fdo_mode) {
+ ret = xe_nvm_init(xe);
+ if (ret)
+ goto err;
+ }
+
ret = xe_i2c_probe(xe);
if (ret)
goto err;
@@ -235,29 +343,11 @@ static int enable_boot_survivability_mode(struct pci_dev *pdev)
return 0;
err:
+ dev_err(dev, "Failed to enable Survivability Mode\n");
survivability->mode = false;
return ret;
}
-static int init_survivability_mode(struct xe_device *xe)
-{
- struct xe_survivability *survivability = &xe->survivability;
- struct xe_survivability_info *info;
-
- survivability->size = MAX_SCRATCH_MMIO;
-
- info = devm_kcalloc(xe->drm.dev, survivability->size, sizeof(*info),
- GFP_KERNEL);
- if (!info)
- return -ENOMEM;
-
- survivability->info = info;
-
- populate_survivability_info(xe);
-
- return 0;
-}
-
/**
* xe_survivability_mode_is_boot_enabled- check if boot survivability mode is enabled
* @xe: xe device instance
@@ -325,9 +415,7 @@ int xe_survivability_mode_runtime_enable(struct xe_device *xe)
return -EINVAL;
}
- ret = init_survivability_mode(xe);
- if (ret)
- return ret;
+ populate_survivability_info(xe);
ret = create_survivability_sysfs(pdev);
if (ret)
@@ -356,17 +444,16 @@ int xe_survivability_mode_boot_enable(struct xe_device *xe)
{
struct xe_survivability *survivability = &xe->survivability;
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
- int ret;
if (!xe_survivability_mode_is_requested(xe))
return 0;
- ret = init_survivability_mode(xe);
- if (ret)
- return ret;
+ populate_survivability_info(xe);
- /* Log breadcrumbs but do not enter survivability mode for Critical boot errors */
- if (survivability->boot_status == CRITICAL_FAILURE) {
+ /*
+ * v2 supports survivability mode for critical errors
+ */
+ if (survivability->version < 2 && survivability->boot_status == CRITICAL_FAILURE) {
log_survivability_info(pdev);
return -ENXIO;
}
diff --git a/drivers/gpu/drm/xe/xe_survivability_mode_types.h b/drivers/gpu/drm/xe/xe_survivability_mode_types.h
index cd65a5d167c9..bd5dc1c955ff 100644
--- a/drivers/gpu/drm/xe/xe_survivability_mode_types.h
+++ b/drivers/gpu/drm/xe/xe_survivability_mode_types.h
@@ -9,23 +9,29 @@
#include <linux/limits.h>
#include <linux/types.h>
+enum scratch_reg {
+ CAPABILITY_INFO,
+ POSTCODE_TRACE,
+ POSTCODE_TRACE_OVERFLOW,
+ AUX_INFO0,
+ AUX_INFO1,
+ AUX_INFO2,
+ AUX_INFO3,
+ AUX_INFO4,
+ MAX_SCRATCH_REG,
+};
+
enum xe_survivability_type {
XE_SURVIVABILITY_TYPE_BOOT,
XE_SURVIVABILITY_TYPE_RUNTIME,
};
-struct xe_survivability_info {
- char name[NAME_MAX];
- u32 reg;
- u32 value;
-};
-
/**
* struct xe_survivability: Contains survivability mode information
*/
struct xe_survivability {
- /** @info: struct that holds survivability info from scratch registers */
- struct xe_survivability_info *info;
+ /** @info: survivability debug info */
+ u32 info[MAX_SCRATCH_REG];
/** @size: number of scratch registers */
u32 size;
@@ -38,6 +44,12 @@ struct xe_survivability {
/** @type: survivability type */
enum xe_survivability_type type;
+
+ /** @fdo_mode: indicates if FDO mode is enabled */
+ bool fdo_mode;
+
+ /** @version: breadcrumb version of survivability mode */
+ u8 version;
};
#endif /* _XE_SURVIVABILITY_MODE_TYPES_H_ */
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 55c5a0eb82e1..93550c7c84ac 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -176,24 +176,13 @@ xe_svm_range_notifier_event_end(struct xe_vm *vm, struct drm_gpusvm_range *r,
mmu_range);
}
-static s64 xe_svm_stats_ktime_us_delta(ktime_t start)
-{
- return IS_ENABLED(CONFIG_DEBUG_FS) ?
- ktime_us_delta(ktime_get(), start) : 0;
-}
-
static void xe_svm_tlb_inval_us_stats_incr(struct xe_gt *gt, ktime_t start)
{
- s64 us_delta = xe_svm_stats_ktime_us_delta(start);
+ s64 us_delta = xe_gt_stats_ktime_us_delta(start);
xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_TLB_INVAL_US, us_delta);
}
-static ktime_t xe_svm_stats_ktime_get(void)
-{
- return IS_ENABLED(CONFIG_DEBUG_FS) ? ktime_get() : 0;
-}
-
static void xe_svm_invalidate(struct drm_gpusvm *gpusvm,
struct drm_gpusvm_notifier *notifier,
const struct mmu_notifier_range *mmu_range)
@@ -202,7 +191,7 @@ static void xe_svm_invalidate(struct drm_gpusvm *gpusvm,
struct xe_device *xe = vm->xe;
struct drm_gpusvm_range *r, *first;
struct xe_tile *tile;
- ktime_t start = xe_svm_stats_ktime_get();
+ ktime_t start = xe_gt_stats_ktime_get();
u64 adj_start = mmu_range->start, adj_end = mmu_range->end;
u8 tile_mask = 0, id;
long err;
@@ -285,19 +274,21 @@ static int __xe_svm_garbage_collector(struct xe_vm *vm,
return 0;
}
-static int xe_svm_range_set_default_attr(struct xe_vm *vm, u64 range_start, u64 range_end)
+static void xe_vma_set_default_attributes(struct xe_vma *vma)
+{
+ vma->attr.preferred_loc.devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE;
+ vma->attr.preferred_loc.migration_policy = DRM_XE_MIGRATE_ALL_PAGES;
+ vma->attr.pat_index = vma->attr.default_pat_index;
+ vma->attr.atomic_access = DRM_XE_ATOMIC_UNDEFINED;
+}
+
+static int xe_svm_range_set_default_attr(struct xe_vm *vm, u64 start, u64 end)
{
struct xe_vma *vma;
- struct xe_vma_mem_attr default_attr = {
- .preferred_loc = {
- .devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE,
- .migration_policy = DRM_XE_MIGRATE_ALL_PAGES,
- },
- .atomic_access = DRM_XE_ATOMIC_UNDEFINED,
- };
- int err = 0;
+ bool has_default_attr;
+ int err;
- vma = xe_vm_find_vma_by_addr(vm, range_start);
+ vma = xe_vm_find_vma_by_addr(vm, start);
if (!vma)
return -EINVAL;
@@ -306,25 +297,30 @@ static int xe_svm_range_set_default_attr(struct xe_vm *vm, u64 range_start, u64
return 0;
}
- if (xe_vma_has_default_mem_attrs(vma))
- return 0;
-
vm_dbg(&vm->xe->drm, "Existing VMA start=0x%016llx, vma_end=0x%016llx",
xe_vma_start(vma), xe_vma_end(vma));
- if (xe_vma_start(vma) == range_start && xe_vma_end(vma) == range_end) {
- default_attr.pat_index = vma->attr.default_pat_index;
- default_attr.default_pat_index = vma->attr.default_pat_index;
- vma->attr = default_attr;
- } else {
- vm_dbg(&vm->xe->drm, "Split VMA start=0x%016llx, vma_end=0x%016llx",
- range_start, range_end);
- err = xe_vm_alloc_cpu_addr_mirror_vma(vm, range_start, range_end - range_start);
- if (err) {
- drm_warn(&vm->xe->drm, "VMA SPLIT failed: %pe\n", ERR_PTR(err));
- xe_vm_kill(vm, true);
- return err;
- }
+ has_default_attr = xe_vma_has_default_mem_attrs(vma);
+
+ if (has_default_attr) {
+ start = xe_vma_start(vma);
+ end = xe_vma_end(vma);
+ } else if (xe_vma_start(vma) == start && xe_vma_end(vma) == end) {
+ xe_vma_set_default_attributes(vma);
+ }
+
+ xe_vm_find_cpu_addr_mirror_vma_range(vm, &start, &end);
+
+ if (xe_vma_start(vma) == start && xe_vma_end(vma) == end && has_default_attr)
+ return 0;
+
+ vm_dbg(&vm->xe->drm, "New VMA start=0x%016llx, vma_end=0x%016llx", start, end);
+
+ err = xe_vm_alloc_cpu_addr_mirror_vma(vm, start, end - start);
+ if (err) {
+ drm_warn(&vm->xe->drm, "New VMA MAP failed: %pe\n", ERR_PTR(err));
+ xe_vm_kill(vm, true);
+ return err;
}
/*
@@ -435,7 +431,7 @@ static void xe_svm_copy_us_stats_incr(struct xe_gt *gt,
unsigned long npages,
ktime_t start)
{
- s64 us_delta = xe_svm_stats_ktime_us_delta(start);
+ s64 us_delta = xe_gt_stats_ktime_us_delta(start);
if (dir == XE_SVM_COPY_TO_VRAM) {
switch (npages) {
@@ -487,7 +483,7 @@ static int xe_svm_copy(struct page **pages,
u64 vram_addr = XE_VRAM_ADDR_INVALID;
int err = 0, pos = 0;
bool sram = dir == XE_SVM_COPY_TO_SRAM;
- ktime_t start = xe_svm_stats_ktime_get();
+ ktime_t start = xe_gt_stats_ktime_get();
/*
* This flow is complex: it locates physically contiguous device pages,
@@ -979,7 +975,7 @@ static void xe_svm_range_##elem##_us_stats_incr(struct xe_gt *gt, \
struct xe_svm_range *range, \
ktime_t start) \
{ \
- s64 us_delta = xe_svm_stats_ktime_us_delta(start); \
+ s64 us_delta = xe_gt_stats_ktime_us_delta(start); \
\
switch (xe_svm_range_size(range)) { \
case SZ_4K: \
@@ -1024,7 +1020,7 @@ static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
struct drm_pagemap *dpagemap;
struct xe_tile *tile = gt_to_tile(gt);
int migrate_try_count = ctx.devmem_only ? 3 : 1;
- ktime_t start = xe_svm_stats_ktime_get(), bind_start, get_pages_start;
+ ktime_t start = xe_gt_stats_ktime_get(), bind_start, get_pages_start;
int err;
lockdep_assert_held_write(&vm->lock);
@@ -1063,7 +1059,7 @@ retry:
if (--migrate_try_count >= 0 &&
xe_svm_range_needs_migrate_to_vram(range, vma, !!dpagemap || ctx.devmem_only)) {
- ktime_t migrate_start = xe_svm_stats_ktime_get();
+ ktime_t migrate_start = xe_gt_stats_ktime_get();
/* TODO : For multi-device dpagemap will be used to find the
* remote tile and remote device. Will need to modify
@@ -1100,7 +1096,7 @@ retry:
}
get_pages:
- get_pages_start = xe_svm_stats_ktime_get();
+ get_pages_start = xe_gt_stats_ktime_get();
range_debug(range, "GET PAGES");
err = xe_svm_range_get_pages(vm, range, &ctx);
@@ -1127,7 +1123,7 @@ get_pages:
xe_svm_range_get_pages_us_stats_incr(gt, range, get_pages_start);
range_debug(range, "PAGE FAULT - BIND");
- bind_start = xe_svm_stats_ktime_get();
+ bind_start = xe_gt_stats_ktime_get();
xe_validation_guard(&vctx, &vm->xe->val, &exec, (struct xe_val_flags) {}, err) {
err = xe_vm_drm_exec_lock(vm, &exec);
drm_exec_retry_on_contention(&exec);
diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
index ff74528ca0c6..c8fdcdbd6ae7 100644
--- a/drivers/gpu/drm/xe/xe_sync.c
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -228,6 +228,32 @@ int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job)
return 0;
}
+/**
+ * xe_sync_entry_wait() - Wait on in-sync
+ * @sync: Sync object
+ *
+ * If the sync is in an in-sync, wait on the sync to signal.
+ *
+ * Return: 0 on success, -ERESTARTSYS on failure (interruption)
+ */
+int xe_sync_entry_wait(struct xe_sync_entry *sync)
+{
+ return xe_sync_needs_wait(sync) ?
+ dma_fence_wait(sync->fence, true) : 0;
+}
+
+/**
+ * xe_sync_needs_wait() - Sync needs a wait (input dma-fence not signaled)
+ * @sync: Sync object
+ *
+ * Return: True if sync needs a wait, False otherwise
+ */
+bool xe_sync_needs_wait(struct xe_sync_entry *sync)
+{
+ return sync->fence &&
+ !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &sync->fence->flags);
+}
+
void xe_sync_entry_signal(struct xe_sync_entry *sync, struct dma_fence *fence)
{
if (!(sync->flags & DRM_XE_SYNC_FLAG_SIGNAL))
@@ -311,8 +337,11 @@ xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync,
struct xe_tile *tile;
u8 id;
- for_each_tile(tile, vm->xe, id)
- num_fence += (1 + XE_MAX_GT_PER_TILE);
+ for_each_tile(tile, vm->xe, id) {
+ num_fence++;
+ for_each_tlb_inval(i)
+ num_fence++;
+ }
fences = kmalloc_array(num_fence, sizeof(*fences),
GFP_KERNEL);
diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h
index 51f2d803e977..6b949194acff 100644
--- a/drivers/gpu/drm/xe/xe_sync.h
+++ b/drivers/gpu/drm/xe/xe_sync.h
@@ -29,6 +29,8 @@ int xe_sync_entry_add_deps(struct xe_sync_entry *sync,
struct xe_sched_job *job);
void xe_sync_entry_signal(struct xe_sync_entry *sync,
struct dma_fence *fence);
+int xe_sync_entry_wait(struct xe_sync_entry *sync);
+bool xe_sync_needs_wait(struct xe_sync_entry *sync);
void xe_sync_entry_cleanup(struct xe_sync_entry *sync);
struct dma_fence *
xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync,
diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c
index 4f4f9a5c43af..63c060c2ea5c 100644
--- a/drivers/gpu/drm/xe/xe_tile.c
+++ b/drivers/gpu/drm/xe/xe_tile.c
@@ -209,6 +209,11 @@ int xe_tile_init(struct xe_tile *tile)
if (IS_ERR(tile->mem.kernel_bb_pool))
return PTR_ERR(tile->mem.kernel_bb_pool);
+ /* Optimistically anticipate at most 256 TLB fences with PRL */
+ tile->mem.reclaim_pool = xe_sa_bo_manager_init(tile, SZ_1M, XE_PAGE_RECLAIM_LIST_MAX_SIZE);
+ if (IS_ERR(tile->mem.reclaim_pool))
+ return PTR_ERR(tile->mem.reclaim_pool);
+
return 0;
}
void xe_tile_migrate_wait(struct xe_tile *tile)
diff --git a/drivers/gpu/drm/xe/xe_tile_debugfs.c b/drivers/gpu/drm/xe/xe_tile_debugfs.c
index fff242a5ae56..5df2f461b7b7 100644
--- a/drivers/gpu/drm/xe/xe_tile_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_tile_debugfs.c
@@ -82,13 +82,9 @@ int xe_tile_debugfs_show_with_rpm(struct seq_file *m, void *data)
struct drm_info_node *node = m->private;
struct xe_tile *tile = node_to_tile(node);
struct xe_device *xe = tile_to_xe(tile);
- int ret;
- xe_pm_runtime_get(xe);
- ret = xe_tile_debugfs_simple_show(m, data);
- xe_pm_runtime_put(xe);
-
- return ret;
+ guard(xe_pm_runtime)(xe);
+ return xe_tile_debugfs_simple_show(m, data);
}
static int ggtt(struct xe_tile *tile, struct drm_printer *p)
@@ -110,6 +106,13 @@ static const struct drm_info_list vf_safe_debugfs_list[] = {
{ "sa_info", .show = xe_tile_debugfs_show_with_rpm, .data = sa_info },
};
+static void tile_debugfs_create_vram_mm(struct xe_tile *tile)
+{
+ if (tile->mem.vram)
+ ttm_resource_manager_create_debugfs(&tile->mem.vram->ttm.manager, tile->debugfs,
+ "vram_mm");
+}
+
/**
* xe_tile_debugfs_register - Register tile's debugfs attributes
* @tile: the &xe_tile to register
@@ -139,4 +142,6 @@ void xe_tile_debugfs_register(struct xe_tile *tile)
drm_debugfs_create_files(vf_safe_debugfs_list,
ARRAY_SIZE(vf_safe_debugfs_list),
tile->debugfs, minor);
+
+ tile_debugfs_create_vram_mm(tile);
}
diff --git a/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c
index f3f478f14ff5..7f97db2f89bb 100644
--- a/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c
@@ -141,12 +141,11 @@ static int NAME##_set(void *data, u64 val) \
if (val > (TYPE)~0ull) \
return -EOVERFLOW; \
\
- xe_pm_runtime_get(xe); \
+ guard(xe_pm_runtime)(xe); \
err = xe_sriov_pf_wait_ready(xe) ?: \
xe_gt_sriov_pf_config_set_##CONFIG(gt, vfid, val); \
if (!err) \
xe_sriov_pf_provision_set_custom_mode(xe); \
- xe_pm_runtime_put(xe); \
\
return err; \
} \
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval.c b/drivers/gpu/drm/xe/xe_tlb_inval.c
index 918a59e686ea..dec042248164 100644
--- a/drivers/gpu/drm/xe/xe_tlb_inval.c
+++ b/drivers/gpu/drm/xe/xe_tlb_inval.c
@@ -199,6 +199,20 @@ void xe_tlb_inval_reset(struct xe_tlb_inval *tlb_inval)
mutex_unlock(&tlb_inval->seqno_lock);
}
+/**
+ * xe_tlb_inval_reset_timeout() - Reset TLB inval fence timeout
+ * @tlb_inval: TLB invalidation client
+ *
+ * Reset the TLB invalidation timeout timer.
+ */
+static void xe_tlb_inval_reset_timeout(struct xe_tlb_inval *tlb_inval)
+{
+ lockdep_assert_held(&tlb_inval->pending_lock);
+
+ mod_delayed_work(system_wq, &tlb_inval->fence_tdr,
+ tlb_inval->ops->timeout_delay(tlb_inval));
+}
+
static bool xe_tlb_inval_seqno_past(struct xe_tlb_inval *tlb_inval, int seqno)
{
int seqno_recv = READ_ONCE(tlb_inval->seqno_recv);
@@ -299,6 +313,7 @@ int xe_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval)
* @start: start address
* @end: end address
* @asid: address space id
+ * @prl_sa: suballocation of page reclaim list if used, NULL indicates PPC flush
*
* Issue a range based TLB invalidation if supported, if not fallback to a full
* TLB invalidation. Completion of TLB is asynchronous and caller can use
@@ -308,10 +323,10 @@ int xe_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval)
*/
int xe_tlb_inval_range(struct xe_tlb_inval *tlb_inval,
struct xe_tlb_inval_fence *fence, u64 start, u64 end,
- u32 asid)
+ u32 asid, struct drm_suballoc *prl_sa)
{
return xe_tlb_inval_issue(tlb_inval, fence, tlb_inval->ops->ppgtt,
- start, end, asid);
+ start, end, asid, prl_sa);
}
/**
@@ -327,7 +342,7 @@ void xe_tlb_inval_vm(struct xe_tlb_inval *tlb_inval, struct xe_vm *vm)
u64 range = 1ull << vm->xe->info.va_bits;
xe_tlb_inval_fence_init(tlb_inval, &fence, true);
- xe_tlb_inval_range(tlb_inval, &fence, 0, range, vm->usm.asid);
+ xe_tlb_inval_range(tlb_inval, &fence, 0, range, vm->usm.asid, NULL);
xe_tlb_inval_fence_wait(&fence);
}
@@ -360,6 +375,12 @@ void xe_tlb_inval_done_handler(struct xe_tlb_inval *tlb_inval, int seqno)
* process_g2h_msg().
*/
spin_lock_irqsave(&tlb_inval->pending_lock, flags);
+ if (seqno == TLB_INVALIDATION_SEQNO_INVALID) {
+ xe_tlb_inval_reset_timeout(tlb_inval);
+ spin_unlock_irqrestore(&tlb_inval->pending_lock, flags);
+ return;
+ }
+
if (xe_tlb_inval_seqno_past(tlb_inval, seqno)) {
spin_unlock_irqrestore(&tlb_inval->pending_lock, flags);
return;
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval.h b/drivers/gpu/drm/xe/xe_tlb_inval.h
index 05614915463a..858d0690f995 100644
--- a/drivers/gpu/drm/xe/xe_tlb_inval.h
+++ b/drivers/gpu/drm/xe/xe_tlb_inval.h
@@ -23,7 +23,7 @@ int xe_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval);
void xe_tlb_inval_vm(struct xe_tlb_inval *tlb_inval, struct xe_vm *vm);
int xe_tlb_inval_range(struct xe_tlb_inval *tlb_inval,
struct xe_tlb_inval_fence *fence,
- u64 start, u64 end, u32 asid);
+ u64 start, u64 end, u32 asid, struct drm_suballoc *prl_sa);
void xe_tlb_inval_fence_init(struct xe_tlb_inval *tlb_inval,
struct xe_tlb_inval_fence *fence,
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_job.c b/drivers/gpu/drm/xe/xe_tlb_inval_job.c
index 1ae0dec2cf31..6a7bd6315797 100644
--- a/drivers/gpu/drm/xe/xe_tlb_inval_job.c
+++ b/drivers/gpu/drm/xe/xe_tlb_inval_job.c
@@ -7,7 +7,9 @@
#include "xe_dep_job_types.h"
#include "xe_dep_scheduler.h"
#include "xe_exec_queue.h"
+#include "xe_gt_printk.h"
#include "xe_gt_types.h"
+#include "xe_page_reclaim.h"
#include "xe_tlb_inval.h"
#include "xe_tlb_inval_job.h"
#include "xe_migrate.h"
@@ -24,6 +26,8 @@ struct xe_tlb_inval_job {
struct xe_exec_queue *q;
/** @vm: VM which TLB invalidation is being issued for */
struct xe_vm *vm;
+ /** @prl: Embedded copy of page reclaim list */
+ struct xe_page_reclaim_list prl;
/** @refcount: ref count of this job */
struct kref refcount;
/**
@@ -47,9 +51,16 @@ static struct dma_fence *xe_tlb_inval_job_run(struct xe_dep_job *dep_job)
container_of(dep_job, typeof(*job), dep);
struct xe_tlb_inval_fence *ifence =
container_of(job->fence, typeof(*ifence), base);
+ struct drm_suballoc *prl_sa = NULL;
+
+ if (xe_page_reclaim_list_valid(&job->prl)) {
+ prl_sa = xe_page_reclaim_create_prl_bo(job->tlb_inval, &job->prl, ifence);
+ if (IS_ERR(prl_sa))
+ prl_sa = NULL; /* Indicate fall back PPC flush with NULL */
+ }
xe_tlb_inval_range(job->tlb_inval, ifence, job->start,
- job->end, job->vm->usm.asid);
+ job->end, job->vm->usm.asid, prl_sa);
return job->fence;
}
@@ -107,6 +118,7 @@ xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval,
job->start = start;
job->end = end;
job->fence_armed = false;
+ xe_page_reclaim_list_init(&job->prl);
job->dep.ops = &dep_job_ops;
job->type = type;
kref_init(&job->refcount);
@@ -140,6 +152,25 @@ err_job:
return ERR_PTR(err);
}
+/**
+ * xe_tlb_inval_job_add_page_reclaim() - Embed PRL into a TLB job
+ * @job: TLB invalidation job that may trigger reclamation
+ * @prl: Page reclaim list populated during unbind
+ *
+ * Copies @prl into the job and takes an extra reference to the entry page so
+ * ownership can transfer to the TLB fence when the job is pushed.
+ */
+void xe_tlb_inval_job_add_page_reclaim(struct xe_tlb_inval_job *job,
+ struct xe_page_reclaim_list *prl)
+{
+ struct xe_device *xe = gt_to_xe(job->q->gt);
+
+ xe_gt_WARN_ON(job->q->gt, !xe->info.has_page_reclaim_hw_assist);
+ job->prl = *prl;
+ /* Pair with put in job_destroy */
+ xe_page_reclaim_entries_get(job->prl.entries);
+}
+
static void xe_tlb_inval_job_destroy(struct kref *ref)
{
struct xe_tlb_inval_job *job = container_of(ref, typeof(*job),
@@ -150,6 +181,9 @@ static void xe_tlb_inval_job_destroy(struct kref *ref)
struct xe_device *xe = gt_to_xe(q->gt);
struct xe_vm *vm = job->vm;
+ /* BO creation retains a copy (if used), so no longer needed */
+ xe_page_reclaim_entries_put(job->prl.entries);
+
if (!job->fence_armed)
kfree(ifence);
else
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_job.h b/drivers/gpu/drm/xe/xe_tlb_inval_job.h
index 4d6df1a6c6ca..03d6e21cd611 100644
--- a/drivers/gpu/drm/xe/xe_tlb_inval_job.h
+++ b/drivers/gpu/drm/xe/xe_tlb_inval_job.h
@@ -12,6 +12,7 @@ struct dma_fence;
struct xe_dep_scheduler;
struct xe_exec_queue;
struct xe_migrate;
+struct xe_page_reclaim_list;
struct xe_tlb_inval;
struct xe_tlb_inval_job;
struct xe_vm;
@@ -21,6 +22,9 @@ xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval,
struct xe_dep_scheduler *dep_scheduler,
struct xe_vm *vm, u64 start, u64 end, int type);
+void xe_tlb_inval_job_add_page_reclaim(struct xe_tlb_inval_job *job,
+ struct xe_page_reclaim_list *prl);
+
int xe_tlb_inval_job_alloc_dep(struct xe_tlb_inval_job *job);
struct dma_fence *xe_tlb_inval_job_push(struct xe_tlb_inval_job *job,
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_types.h b/drivers/gpu/drm/xe/xe_tlb_inval_types.h
index 8f8b060e9005..48d1503e8460 100644
--- a/drivers/gpu/drm/xe/xe_tlb_inval_types.h
+++ b/drivers/gpu/drm/xe/xe_tlb_inval_types.h
@@ -9,6 +9,7 @@
#include <linux/workqueue.h>
#include <linux/dma-fence.h>
+struct drm_suballoc;
struct xe_tlb_inval;
/** struct xe_tlb_inval_ops - TLB invalidation ops (backend) */
@@ -40,12 +41,13 @@ struct xe_tlb_inval_ops {
* @start: Start address
* @end: End address
* @asid: Address space ID
+ * @prl_sa: Suballocation for page reclaim list
*
* Return 0 on success, -ECANCELED if backend is mid-reset, error on
* failure
*/
int (*ppgtt)(struct xe_tlb_inval *tlb_inval, u32 seqno, u64 start,
- u64 end, u32 asid);
+ u64 end, u32 asid, struct drm_suballoc *prl_sa);
/**
* @initialized: Backend is initialized
@@ -80,6 +82,7 @@ struct xe_tlb_inval {
const struct xe_tlb_inval_ops *ops;
/** @tlb_inval.seqno: TLB invalidation seqno, protected by CT lock */
#define TLB_INVALIDATION_SEQNO_MAX 0x100000
+#define TLB_INVALIDATION_SEQNO_INVALID TLB_INVALIDATION_SEQNO_MAX
int seqno;
/** @tlb_invalidation.seqno_lock: protects @tlb_invalidation.seqno */
struct mutex seqno_lock;
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index 79a97b086cb2..6d12fcc13f43 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -13,6 +13,7 @@
#include <linux/types.h>
#include "xe_exec_queue_types.h"
+#include "xe_exec_queue.h"
#include "xe_gpu_scheduler_types.h"
#include "xe_gt_types.h"
#include "xe_guc_exec_queue_types.h"
@@ -97,11 +98,51 @@ DECLARE_EVENT_CLASS(xe_exec_queue,
__entry->guc_state, __entry->flags)
);
+DECLARE_EVENT_CLASS(xe_exec_queue_multi_queue,
+ TP_PROTO(struct xe_exec_queue *q),
+ TP_ARGS(q),
+
+ TP_STRUCT__entry(
+ __string(dev, __dev_name_eq(q))
+ __field(enum xe_engine_class, class)
+ __field(u32, logical_mask)
+ __field(u8, gt_id)
+ __field(u16, width)
+ __field(u32, guc_id)
+ __field(u32, guc_state)
+ __field(u32, flags)
+ __field(u32, primary)
+ ),
+
+ TP_fast_assign(
+ __assign_str(dev);
+ __entry->class = q->class;
+ __entry->logical_mask = q->logical_mask;
+ __entry->gt_id = q->gt->info.id;
+ __entry->width = q->width;
+ __entry->guc_id = q->guc->id;
+ __entry->guc_state = atomic_read(&q->guc->state);
+ __entry->flags = q->flags;
+ __entry->primary = xe_exec_queue_multi_queue_primary(q)->guc->id;
+ ),
+
+ TP_printk("dev=%s, %d:0x%x, gt=%d, width=%d guc_id=%d, guc_state=0x%x, flags=0x%x, primary=%d",
+ __get_str(dev), __entry->class, __entry->logical_mask,
+ __entry->gt_id, __entry->width, __entry->guc_id,
+ __entry->guc_state, __entry->flags,
+ __entry->primary)
+);
+
DEFINE_EVENT(xe_exec_queue, xe_exec_queue_create,
TP_PROTO(struct xe_exec_queue *q),
TP_ARGS(q)
);
+DEFINE_EVENT(xe_exec_queue_multi_queue, xe_exec_queue_create_multi_queue,
+ TP_PROTO(struct xe_exec_queue *q),
+ TP_ARGS(q)
+);
+
DEFINE_EVENT(xe_exec_queue, xe_exec_queue_supress_resume,
TP_PROTO(struct xe_exec_queue *q),
TP_ARGS(q)
@@ -172,6 +213,11 @@ DEFINE_EVENT(xe_exec_queue, xe_exec_queue_memory_cat_error,
TP_ARGS(q)
);
+DEFINE_EVENT(xe_exec_queue, xe_exec_queue_cgp_context_error,
+ TP_PROTO(struct xe_exec_queue *q),
+ TP_ARGS(q)
+);
+
DEFINE_EVENT(xe_exec_queue, xe_exec_queue_stop,
TP_PROTO(struct xe_exec_queue *q),
TP_ARGS(q)
diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
index 465bda355443..157520ea1783 100644
--- a/drivers/gpu/drm/xe/xe_uc.c
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -218,9 +218,12 @@ int xe_uc_load_hw(struct xe_uc *uc)
xe_guc_engine_activity_enable_stats(&uc->guc);
- /* We don't fail the driver load if HuC fails to auth, but let's warn */
+ /* We don't fail the driver load if HuC fails to auth */
ret = xe_huc_auth(&uc->huc, XE_HUC_AUTH_VIA_GUC);
- xe_gt_assert(uc_to_gt(uc), !ret);
+ if (ret)
+ xe_gt_err(uc_to_gt(uc),
+ "HuC authentication failed (%pe), continuing with no HuC\n",
+ ERR_PTR(ret));
/* GSC load is async */
xe_gsc_load_start(&uc->gsc);
@@ -302,6 +305,34 @@ int xe_uc_suspend(struct xe_uc *uc)
}
/**
+ * xe_uc_runtime_suspend() - UC runtime suspend
+ * @uc: the UC object
+ *
+ * Runtime suspend all UCs.
+ */
+void xe_uc_runtime_suspend(struct xe_uc *uc)
+{
+ if (!xe_device_uc_enabled(uc_to_xe(uc)))
+ return;
+
+ xe_guc_runtime_suspend(&uc->guc);
+}
+
+/**
+ * xe_uc_runtime_resume() - UC runtime resume
+ * @uc: the UC object
+ *
+ * Runtime resume all UCs.
+ */
+void xe_uc_runtime_resume(struct xe_uc *uc)
+{
+ if (!xe_device_uc_enabled(uc_to_xe(uc)))
+ return;
+
+ xe_guc_runtime_resume(&uc->guc);
+}
+
+/**
* xe_uc_declare_wedged() - Declare UC wedged
* @uc: the UC object
*
diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h
index 21c9306098cf..5398da1a8097 100644
--- a/drivers/gpu/drm/xe/xe_uc.h
+++ b/drivers/gpu/drm/xe/xe_uc.h
@@ -14,6 +14,8 @@ int xe_uc_init_post_hwconfig(struct xe_uc *uc);
int xe_uc_load_hw(struct xe_uc *uc);
void xe_uc_gucrc_disable(struct xe_uc *uc);
int xe_uc_reset_prepare(struct xe_uc *uc);
+void xe_uc_runtime_resume(struct xe_uc *uc);
+void xe_uc_runtime_suspend(struct xe_uc *uc);
void xe_uc_stop_prepare(struct xe_uc *uc);
void xe_uc_stop(struct xe_uc *uc);
int xe_uc_start(struct xe_uc *uc);
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 622b76078567..dcb4a32e7a64 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -115,11 +115,11 @@ struct fw_blobs_by_type {
#define XE_GT_TYPE_ANY XE_GT_TYPE_UNINITIALIZED
#define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \
- fw_def(PANTHERLAKE, GT_TYPE_ANY, major_ver(xe, guc, ptl, 70, 49, 4)) \
- fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 49, 4)) \
- fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 45, 2)) \
- fw_def(METEORLAKE, GT_TYPE_ANY, major_ver(i915, guc, mtl, 70, 44, 1)) \
- fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 45, 2)) \
+ fw_def(PANTHERLAKE, GT_TYPE_ANY, major_ver(xe, guc, ptl, 70, 54, 0)) \
+ fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 54, 0)) \
+ fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 53, 0)) \
+ fw_def(METEORLAKE, GT_TYPE_ANY, major_ver(i915, guc, mtl, 70, 53, 0)) \
+ fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 53, 0)) \
fw_def(DG1, GT_TYPE_ANY, major_ver(i915, guc, dg1, 70, 44, 1)) \
fw_def(ALDERLAKE_N, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1)) \
fw_def(ALDERLAKE_P, GT_TYPE_ANY, major_ver(i915, guc, adlp, 70, 44, 1)) \
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 79ab6c512d3e..95e22ff95ea8 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1509,9 +1509,9 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
INIT_LIST_HEAD(&vm->preempt.exec_queues);
if (flags & XE_VM_FLAG_FAULT_MODE)
- vm->preempt.min_run_period_ms = 0;
+ vm->preempt.min_run_period_ms = xe->min_run_period_pf_ms;
else
- vm->preempt.min_run_period_ms = 5;
+ vm->preempt.min_run_period_ms = xe->min_run_period_lr_ms;
for_each_tile(tile, xe, id)
xe_range_fence_tree_init(&vm->rftree[id]);
@@ -2236,6 +2236,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops,
struct drm_gpuva_ops *ops;
struct drm_gpuva_op *__op;
struct drm_gpuvm_bo *vm_bo;
+ u64 range_start = addr;
u64 range_end = addr + range;
int err;
@@ -2248,10 +2249,16 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops,
switch (operation) {
case DRM_XE_VM_BIND_OP_MAP:
+ if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) {
+ xe_vm_find_cpu_addr_mirror_vma_range(vm, &range_start, &range_end);
+ vops->flags |= XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP;
+ }
+
+ fallthrough;
case DRM_XE_VM_BIND_OP_MAP_USERPTR: {
struct drm_gpuvm_map_req map_req = {
- .map.va.addr = addr,
- .map.va.range = range,
+ .map.va.addr = range_start,
+ .map.va.range = range_end - range_start,
.map.gem.obj = obj,
.map.gem.offset = bo_offset_or_userptr,
};
@@ -2451,8 +2458,17 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
if (IS_ERR(vma))
return vma;
- if (xe_vma_is_userptr(vma))
+ if (xe_vma_is_userptr(vma)) {
err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
+ /*
+ * -EBUSY has dedicated meaning that a user fence
+ * attached to the VMA is busy, in practice
+ * xe_vma_userptr_pin_pages can only fail with -EBUSY if
+ * we are low on memory so convert this to -ENOMEM.
+ */
+ if (err == -EBUSY)
+ err = -ENOMEM;
+ }
}
if (err) {
prep_vma_destroy(vm, vma, false);
@@ -2727,7 +2743,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
if (xe_vma_is_cpu_addr_mirror(vma) &&
xe_svm_has_mapping(vm, xe_vma_start(vma),
- xe_vma_end(vma)))
+ xe_vma_end(vma)) &&
+ !(vops->flags & XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP))
return -EBUSY;
if (!xe_vma_is_cpu_addr_mirror(vma))
@@ -3107,19 +3124,19 @@ static struct dma_fence *ops_execute(struct xe_vm *vm,
struct dma_fence *fence = NULL;
struct dma_fence **fences = NULL;
struct dma_fence_array *cf = NULL;
- int number_tiles = 0, current_fence = 0, n_fence = 0, err;
+ int number_tiles = 0, current_fence = 0, n_fence = 0, err, i;
u8 id;
number_tiles = vm_ops_setup_tile_args(vm, vops);
if (number_tiles == 0)
return ERR_PTR(-ENODATA);
- if (vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT) {
- for_each_tile(tile, vm->xe, id)
- ++n_fence;
- } else {
- for_each_tile(tile, vm->xe, id)
- n_fence += (1 + XE_MAX_GT_PER_TILE);
+ for_each_tile(tile, vm->xe, id) {
+ ++n_fence;
+
+ if (!(vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT))
+ for_each_tlb_inval(i)
+ ++n_fence;
}
fences = kmalloc_array(n_fence, sizeof(*fences), GFP_KERNEL);
@@ -3149,7 +3166,6 @@ static struct dma_fence *ops_execute(struct xe_vm *vm,
for_each_tile(tile, vm->xe, id) {
struct xe_exec_queue *q = vops->pt_update_ops[tile->id].q;
- int i;
fence = NULL;
if (!vops->pt_update_ops[id].num_ops)
@@ -3214,7 +3230,8 @@ static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op,
{
switch (op->base.op) {
case DRM_GPUVA_OP_MAP:
- vma_add_ufence(op->map.vma, ufence);
+ if (!xe_vma_is_cpu_addr_mirror(op->map.vma))
+ vma_add_ufence(op->map.vma, ufence);
break;
case DRM_GPUVA_OP_REMAP:
if (op->remap.prev)
@@ -3490,6 +3507,10 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
{
u16 coh_mode;
+ if (XE_IOCTL_DBG(xe, (bo->flags & XE_BO_FLAG_NO_COMPRESSION) &&
+ xe_pat_index_get_comp_en(xe, pat_index)))
+ return -EINVAL;
+
if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) ||
XE_IOCTL_DBG(xe, obj_offset >
xe_bo_size(bo) - range)) {
@@ -3913,7 +3934,7 @@ int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start,
err = xe_tlb_inval_range(&tile->primary_gt->tlb_inval,
&fence[fence_id], start, end,
- vm->usm.asid);
+ vm->usm.asid, NULL);
if (err)
goto wait;
++fence_id;
@@ -3926,7 +3947,7 @@ int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start,
err = xe_tlb_inval_range(&tile->media_gt->tlb_inval,
&fence[fence_id], start, end,
- vm->usm.asid);
+ vm->usm.asid, NULL);
if (err)
goto wait;
++fence_id;
@@ -4032,10 +4053,18 @@ int xe_vm_validate_protected(struct xe_vm *vm)
}
struct xe_vm_snapshot {
+ int uapi_flags;
unsigned long num_snaps;
struct {
u64 ofs, bo_ofs;
unsigned long len;
+#define XE_VM_SNAP_FLAG_USERPTR BIT(0)
+#define XE_VM_SNAP_FLAG_READ_ONLY BIT(1)
+#define XE_VM_SNAP_FLAG_IS_NULL BIT(2)
+ unsigned long flags;
+ int uapi_mem_region;
+ int pat_index;
+ int cpu_caching;
struct xe_bo *bo;
void *data;
struct mm_struct *mm;
@@ -4064,6 +4093,13 @@ struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
goto out_unlock;
}
+ if (vm->flags & XE_VM_FLAG_FAULT_MODE)
+ snap->uapi_flags |= DRM_XE_VM_CREATE_FLAG_FAULT_MODE;
+ if (vm->flags & XE_VM_FLAG_LR_MODE)
+ snap->uapi_flags |= DRM_XE_VM_CREATE_FLAG_LR_MODE;
+ if (vm->flags & XE_VM_FLAG_SCRATCH_PAGE)
+ snap->uapi_flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
+
snap->num_snaps = num_snaps;
i = 0;
drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
@@ -4076,9 +4112,25 @@ struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
snap->snap[i].ofs = xe_vma_start(vma);
snap->snap[i].len = xe_vma_size(vma);
+ snap->snap[i].flags = xe_vma_read_only(vma) ?
+ XE_VM_SNAP_FLAG_READ_ONLY : 0;
+ snap->snap[i].pat_index = vma->attr.pat_index;
if (bo) {
+ snap->snap[i].cpu_caching = bo->cpu_caching;
snap->snap[i].bo = xe_bo_get(bo);
snap->snap[i].bo_ofs = xe_vma_bo_offset(vma);
+ switch (bo->ttm.resource->mem_type) {
+ case XE_PL_SYSTEM:
+ case XE_PL_TT:
+ snap->snap[i].uapi_mem_region = 0;
+ break;
+ case XE_PL_VRAM0:
+ snap->snap[i].uapi_mem_region = 1;
+ break;
+ case XE_PL_VRAM1:
+ snap->snap[i].uapi_mem_region = 2;
+ break;
+ }
} else if (xe_vma_is_userptr(vma)) {
struct mm_struct *mm =
to_userptr_vma(vma)->userptr.notifier.mm;
@@ -4089,8 +4141,14 @@ struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
snap->snap[i].data = ERR_PTR(-EFAULT);
snap->snap[i].bo_ofs = xe_vma_userptr(vma);
+ snap->snap[i].flags |= XE_VM_SNAP_FLAG_USERPTR;
+ snap->snap[i].uapi_mem_region = 0;
+ } else if (xe_vma_is_null(vma)) {
+ snap->snap[i].flags |= XE_VM_SNAP_FLAG_IS_NULL;
+ snap->snap[i].uapi_mem_region = -1;
} else {
snap->snap[i].data = ERR_PTR(-ENOENT);
+ snap->snap[i].uapi_mem_region = -1;
}
i++;
}
@@ -4109,7 +4167,8 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
struct xe_bo *bo = snap->snap[i].bo;
int err;
- if (IS_ERR(snap->snap[i].data))
+ if (IS_ERR(snap->snap[i].data) ||
+ snap->snap[i].flags & XE_VM_SNAP_FLAG_IS_NULL)
continue;
snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER);
@@ -4155,15 +4214,32 @@ void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
return;
}
+ drm_printf(p, "VM.uapi_flags: 0x%x\n", snap->uapi_flags);
for (i = 0; i < snap->num_snaps; i++) {
drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len);
+ drm_printf(p, "[%llx].properties: %s|%s|mem_region=0x%lx|pat_index=%d|cpu_caching=%d\n",
+ snap->snap[i].ofs,
+ snap->snap[i].flags & XE_VM_SNAP_FLAG_READ_ONLY ?
+ "read_only" : "read_write",
+ snap->snap[i].flags & XE_VM_SNAP_FLAG_IS_NULL ?
+ "null_sparse" :
+ snap->snap[i].flags & XE_VM_SNAP_FLAG_USERPTR ?
+ "userptr" : "bo",
+ snap->snap[i].uapi_mem_region == -1 ? 0 :
+ BIT(snap->snap[i].uapi_mem_region),
+ snap->snap[i].pat_index,
+ snap->snap[i].cpu_caching);
+
if (IS_ERR(snap->snap[i].data)) {
drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs,
PTR_ERR(snap->snap[i].data));
continue;
}
+ if (snap->snap[i].flags & XE_VM_SNAP_FLAG_IS_NULL)
+ continue;
+
drm_printf(p, "[%llx].data: ", snap->snap[i].ofs);
for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) {
@@ -4317,6 +4393,8 @@ static int xe_vm_alloc_vma(struct xe_vm *vm,
if (is_madvise)
vops.flags |= XE_VMA_OPS_FLAG_MADVISE;
+ else
+ vops.flags |= XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP;
err = vm_bind_ioctl_ops_parse(vm, ops, &vops);
if (err)
@@ -4390,6 +4468,46 @@ int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
return xe_vm_alloc_vma(vm, &map_req, true);
}
+static bool is_cpu_addr_vma_with_default_attr(struct xe_vma *vma)
+{
+ return vma && xe_vma_is_cpu_addr_mirror(vma) &&
+ xe_vma_has_default_mem_attrs(vma);
+}
+
+/**
+ * xe_vm_find_cpu_addr_mirror_vma_range - Extend a VMA range to include adjacent CPU-mirrored VMAs
+ * @vm: VM to search within
+ * @start: Input/output pointer to the starting address of the range
+ * @end: Input/output pointer to the end address of the range
+ *
+ * Given a range defined by @start and @range, this function checks the VMAs
+ * immediately before and after the range. If those neighboring VMAs are
+ * CPU-address-mirrored and have default memory attributes, the function
+ * updates @start and @range to include them. This extended range can then
+ * be used for merging or other operations that require a unified VMA.
+ *
+ * The function does not perform the merge itself; it only computes the
+ * mergeable boundaries.
+ */
+void xe_vm_find_cpu_addr_mirror_vma_range(struct xe_vm *vm, u64 *start, u64 *end)
+{
+ struct xe_vma *prev, *next;
+
+ lockdep_assert_held(&vm->lock);
+
+ if (*start >= SZ_4K) {
+ prev = xe_vm_find_vma_by_addr(vm, *start - SZ_4K);
+ if (is_cpu_addr_vma_with_default_attr(prev))
+ *start = xe_vma_start(prev);
+ }
+
+ if (*end < vm->size) {
+ next = xe_vm_find_vma_by_addr(vm, *end + 1);
+ if (is_cpu_addr_vma_with_default_attr(next))
+ *end = xe_vma_end(next);
+ }
+}
+
/**
* xe_vm_alloc_cpu_addr_mirror_vma - Allocate CPU addr mirror vma
* @vm: Pointer to the xe_vm structure
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index ef8a5019574e..361f10b3c453 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -68,6 +68,9 @@ xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range);
bool xe_vma_has_default_mem_attrs(struct xe_vma *vma);
+void xe_vm_find_cpu_addr_mirror_vma_range(struct xe_vm *vm,
+ u64 *start,
+ u64 *end);
/**
* xe_vm_has_scratch() - Whether the vm is configured for scratch PTEs
* @vm: The vm
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 2168ef052499..18bad1dd08e6 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -467,6 +467,7 @@ struct xe_vma_ops {
#define XE_VMA_OPS_FLAG_MADVISE BIT(1)
#define XE_VMA_OPS_ARRAY_OF_BINDS BIT(2)
#define XE_VMA_OPS_FLAG_SKIP_TLB_WAIT BIT(3)
+#define XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP BIT(4)
u32 flags;
#ifdef TEST_VM_OPS_ERROR
/** @inject_error: inject error to test error handling */
diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c
index d50baefcd124..1b9e9b028975 100644
--- a/drivers/gpu/drm/xe/xe_vram.c
+++ b/drivers/gpu/drm/xe/xe_vram.c
@@ -156,12 +156,11 @@ static int determine_lmem_bar_size(struct xe_device *xe, struct xe_vram_region *
static int get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size, u64 *poffset)
{
struct xe_device *xe = gt_to_xe(gt);
- unsigned int fw_ref;
u64 offset;
u32 reg;
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (!fw_ref)
+ CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref.domains)
return -ETIMEDOUT;
if (GRAPHICS_VER(xe) >= 20) {
@@ -193,7 +192,6 @@ static int get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size, u64 *poffset)
offset = (u64)REG_FIELD_GET(XEHP_FLAT_CCS_PTR, reg) * SZ_64K;
}
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
*poffset = offset;
return 0;
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index e32dd2fde6f1..a93717e77da0 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -15,6 +15,7 @@
#include "regs/xe_engine_regs.h"
#include "regs/xe_gt_regs.h"
+#include "regs/xe_guc_regs.h"
#include "regs/xe_regs.h"
#include "xe_device_types.h"
#include "xe_force_wake.h"
@@ -216,20 +217,6 @@ static const struct xe_rtp_entry_sr gt_was[] = {
XE_RTP_ACTIONS(SET(XELPMP_SQCNT1, ENFORCE_RAR))
},
- /* Xe2_LPG */
-
- { XE_RTP_NAME("16020975621"),
- XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)),
- XE_RTP_ACTIONS(SET(XEHP_SLICE_UNIT_LEVEL_CLKGATE, SBEUNIT_CLKGATE_DIS))
- },
- { XE_RTP_NAME("14018157293"),
- XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)),
- XE_RTP_ACTIONS(SET(XEHPC_L3CLOS_MASK(0), ~0),
- SET(XEHPC_L3CLOS_MASK(1), ~0),
- SET(XEHPC_L3CLOS_MASK(2), ~0),
- SET(XEHPC_L3CLOS_MASK(3), ~0))
- },
-
/* Xe2_LPM */
{ XE_RTP_NAME("14017421178"),
@@ -315,6 +302,10 @@ static const struct xe_rtp_entry_sr gt_was[] = {
XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), RAMDFTUNIT_CLKGATE_DIS)),
XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
},
+ { XE_RTP_NAME("16028005424"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005)),
+ XE_RTP_ACTIONS(SET(GUC_INTR_CHICKEN, DISABLE_SIGNALING_ENGINES))
+ },
};
static const struct xe_rtp_entry_sr engine_was[] = {
@@ -504,11 +495,6 @@ static const struct xe_rtp_entry_sr engine_was[] = {
XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, XE2_ALLOC_DPA_STARVE_FIX_DIS))
},
- { XE_RTP_NAME("14018957109"),
- XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
- FUNC(xe_rtp_match_first_render_or_compute)),
- XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN5, DISABLE_SAMPLE_G_PERFORMANCE))
- },
{ XE_RTP_NAME("14020338487"),
XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(ROW_CHICKEN3, XE2_EUPEND_CHK_FLUSH_DIS))
@@ -518,11 +504,6 @@ static const struct xe_rtp_entry_sr engine_was[] = {
FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH))
},
- { XE_RTP_NAME("14019322943"),
- XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
- FUNC(xe_rtp_match_first_render_or_compute)),
- XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, TGM_WRITE_EOM_FORCE))
- },
{ XE_RTP_NAME("14018471104"),
XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, ENABLE_SMP_LD_RENDER_SURFACE_CONTROL))
@@ -693,7 +674,7 @@ static const struct xe_rtp_entry_sr engine_was[] = {
XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE))
},
{ XE_RTP_NAME("18041344222"),
- XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001),
+ XE_RTP_RULES(GRAPHICS_VERSION(3000),
FUNC(xe_rtp_match_first_render_or_compute),
FUNC(xe_rtp_match_not_sriov_vf),
FUNC(xe_rtp_match_gt_has_discontiguous_dss_groups)),
@@ -799,17 +780,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
/* Xe2_LPG */
- { XE_RTP_NAME("16020518922"),
- XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
- ENGINE_CLASS(RENDER)),
- XE_RTP_ACTIONS(SET(FF_MODE,
- DIS_TE_AUTOSTRIP |
- DIS_MESH_PARTIAL_AUTOSTRIP |
- DIS_MESH_AUTOSTRIP),
- SET(VFLSKPD,
- DIS_PARTIAL_AUTOSTRIP |
- DIS_AUTOSTRIP))
- },
{ XE_RTP_NAME("14019386621"),
XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(VF_SCRATCHPAD, XE2_VFG_TED_CREDIT_INTERFACE_DISABLE))
@@ -818,20 +788,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT))
},
- { XE_RTP_NAME("14020013138"),
- XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
- ENGINE_CLASS(RENDER)),
- XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS))
- },
{ XE_RTP_NAME("14019988906"),
XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD))
},
- { XE_RTP_NAME("16020183090"),
- XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
- ENGINE_CLASS(RENDER)),
- XE_RTP_ACTIONS(SET(INSTPM(RENDER_RING_BASE), ENABLE_SEMAPHORE_POLL_BIT))
- },
{ XE_RTP_NAME("18033852989"),
XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN1, DISABLE_BOTTOM_CLIP_RECTANGLE_TEST))
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index 7ca7258eb5d8..5cd7fa6d2a5c 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -16,15 +16,11 @@
16017236439 PLATFORM(PVC)
14019821291 MEDIA_VERSION_RANGE(1300, 2000)
14015076503 MEDIA_VERSION(1300)
-16020292621 GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)
-14018913170 GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)
- MEDIA_VERSION(2000), GRAPHICS_STEP(A0, A1)
- GRAPHICS_VERSION_RANGE(1270, 1274)
+14018913170 GRAPHICS_VERSION_RANGE(1270, 1274)
MEDIA_VERSION(1300)
PLATFORM(DG2)
14018094691 GRAPHICS_VERSION_RANGE(2001, 2002)
GRAPHICS_VERSION(2004)
-14019882105 GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)
18024947630 GRAPHICS_VERSION(2001)
GRAPHICS_VERSION(2004)
MEDIA_VERSION(2000)
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index f64dc0eff0e6..726e481574fe 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -106,6 +106,7 @@ extern "C" {
#define DRM_XE_OBSERVATION 0x0b
#define DRM_XE_MADVISE 0x0c
#define DRM_XE_VM_QUERY_MEM_RANGE_ATTRS 0x0d
+#define DRM_XE_EXEC_QUEUE_SET_PROPERTY 0x0e
/* Must be kept compact -- no holes */
@@ -123,6 +124,7 @@ extern "C" {
#define DRM_IOCTL_XE_OBSERVATION DRM_IOW(DRM_COMMAND_BASE + DRM_XE_OBSERVATION, struct drm_xe_observation_param)
#define DRM_IOCTL_XE_MADVISE DRM_IOW(DRM_COMMAND_BASE + DRM_XE_MADVISE, struct drm_xe_madvise)
#define DRM_IOCTL_XE_VM_QUERY_MEM_RANGE_ATTRS DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_VM_QUERY_MEM_RANGE_ATTRS, struct drm_xe_vm_query_mem_range_attr)
+#define DRM_IOCTL_XE_EXEC_QUEUE_SET_PROPERTY DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_SET_PROPERTY, struct drm_xe_exec_queue_set_property)
/**
* DOC: Xe IOCTL Extensions
@@ -210,8 +212,12 @@ struct drm_xe_ext_set_property {
/** @pad: MBZ */
__u32 pad;
- /** @value: property value */
- __u64 value;
+ union {
+ /** @value: property value */
+ __u64 value;
+ /** @ptr: pointer to user value */
+ __u64 ptr;
+ };
/** @reserved: Reserved */
__u64 reserved[2];
@@ -403,6 +409,9 @@ struct drm_xe_query_mem_regions {
* has low latency hint support
* - %DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR - Flag is set if the
* device has CPU address mirroring support
+ * - %DRM_XE_QUERY_CONFIG_FLAG_HAS_NO_COMPRESSION_HINT - Flag is set if the
+ * device supports the userspace hint %DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION.
+ * This is exposed only on Xe2+.
* - %DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT - Minimal memory alignment
* required by this device, typically SZ_4K or SZ_64K
* - %DRM_XE_QUERY_CONFIG_VA_BITS - Maximum bits of a virtual address
@@ -421,6 +430,7 @@ struct drm_xe_query_config {
#define DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM (1 << 0)
#define DRM_XE_QUERY_CONFIG_FLAG_HAS_LOW_LATENCY (1 << 1)
#define DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR (1 << 2)
+ #define DRM_XE_QUERY_CONFIG_FLAG_HAS_NO_COMPRESSION_HINT (1 << 3)
#define DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT 2
#define DRM_XE_QUERY_CONFIG_VA_BITS 3
#define DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY 4
@@ -791,6 +801,17 @@ struct drm_xe_device_query {
* need to use VRAM for display surfaces, therefore the kernel requires
* setting this flag for such objects, otherwise an error is thrown on
* small-bar systems.
+ * - %DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION - Allows userspace to
+ * hint that compression (CCS) should be disabled for the buffer being
+ * created. This can avoid unnecessary memory operations and CCS state
+ * management.
+ * On pre-Xe2 platforms, this flag is currently rejected as compression
+ * control is not supported via PAT index. On Xe2+ platforms, compression
+ * is controlled via PAT entries. If this flag is set, the driver will reject
+ * any VM bind that requests a PAT index enabling compression for this BO.
+ * Note: On dGPU platforms, there is currently no change in behavior with
+ * this flag, but future improvements may leverage it. The current benefit is
+ * primarily applicable to iGPU platforms.
*
* @cpu_caching supports the following values:
* - %DRM_XE_GEM_CPU_CACHING_WB - Allocate the pages with write-back
@@ -837,6 +858,7 @@ struct drm_xe_gem_create {
#define DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING (1 << 0)
#define DRM_XE_GEM_CREATE_FLAG_SCANOUT (1 << 1)
#define DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM (1 << 2)
+#define DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION (1 << 3)
/**
* @flags: Flags, currently a mask of memory instances of where BO can
* be placed
@@ -1252,6 +1274,20 @@ struct drm_xe_vm_bind {
* Given that going into a power-saving state kills PXP HWDRM sessions,
* runtime PM will be blocked while queues of this type are alive.
* All PXP queues will be killed if a PXP invalidation event occurs.
+ * - %DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP - Create a multi-queue group
+ * or add secondary queues to a multi-queue group.
+ * If the extension's 'value' field has %DRM_XE_MULTI_GROUP_CREATE flag set,
+ * then a new multi-queue group is created with this queue as the primary queue
+ * (Q0). Otherwise, the queue gets added to the multi-queue group whose primary
+ * queue's exec_queue_id is specified in the lower 32 bits of the 'value' field.
+ * If the extension's 'value' field has %DRM_XE_MULTI_GROUP_KEEP_ACTIVE flag
+ * set, then the multi-queue group is kept active after the primary queue is
+ * destroyed.
+ * All the other non-relevant bits of extension's 'value' field while adding the
+ * primary or the secondary queues of the group must be set to 0.
+ * - %DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY - Set the queue
+ * priority within the multi-queue group. Current valid priority values are 0–2
+ * (default is 1), with higher values indicating higher priority.
*
* The example below shows how to use @drm_xe_exec_queue_create to create
* a simple exec_queue (no parallel submission) of class
@@ -1292,6 +1328,11 @@ struct drm_xe_exec_queue_create {
#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY 0
#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE 1
#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE 2
+#define DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE 3
+#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP 4
+#define DRM_XE_MULTI_GROUP_CREATE (1ull << 63)
+#define DRM_XE_MULTI_GROUP_KEEP_ACTIVE (1ull << 62)
+#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY 5
/** @extensions: Pointer to the first extension struct, if any */
__u64 extensions;
@@ -1655,6 +1696,9 @@ enum drm_xe_oa_unit_type {
/** @DRM_XE_OA_UNIT_TYPE_OAM_SAG: OAM_SAG OA unit */
DRM_XE_OA_UNIT_TYPE_OAM_SAG,
+
+ /** @DRM_XE_OA_UNIT_TYPE_MERT: MERT OA unit */
+ DRM_XE_OA_UNIT_TYPE_MERT,
};
/**
@@ -1677,12 +1721,19 @@ struct drm_xe_oa_unit {
#define DRM_XE_OA_CAPS_OA_BUFFER_SIZE (1 << 2)
#define DRM_XE_OA_CAPS_WAIT_NUM_REPORTS (1 << 3)
#define DRM_XE_OA_CAPS_OAM (1 << 4)
+#define DRM_XE_OA_CAPS_OA_UNIT_GT_ID (1 << 5)
/** @oa_timestamp_freq: OA timestamp freq */
__u64 oa_timestamp_freq;
+ /** @gt_id: gt id for this OA unit */
+ __u16 gt_id;
+
+ /** @reserved1: MBZ */
+ __u16 reserved1[3];
+
/** @reserved: MBZ */
- __u64 reserved[4];
+ __u64 reserved[3];
/** @num_engines: number of engines in @eci array */
__u64 num_engines;
@@ -2274,6 +2325,30 @@ struct drm_xe_vm_query_mem_range_attr {
};
+/**
+ * struct drm_xe_exec_queue_set_property - exec queue set property
+ *
+ * Sets execution queue properties dynamically.
+ * Currently only %DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY
+ * property can be dynamically set.
+ */
+struct drm_xe_exec_queue_set_property {
+ /** @extensions: Pointer to the first extension struct, if any */
+ __u64 extensions;
+
+ /** @exec_queue_id: Exec queue ID */
+ __u32 exec_queue_id;
+
+ /** @property: property to set */
+ __u32 property;
+
+ /** @value: property value */
+ __u64 value;
+
+ /** @reserved: Reserved */
+ __u64 reserved[2];
+};
+
#if defined(__cplusplus)
}
#endif