From 9f49e185ee31d5d5fc8ea1a23aaaba5a4374a949 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 12 Mar 2026 09:29:23 -0700 Subject: drm/xe/wa: Drop redundant entries for Wa_16021867713 & Wa_14019449301 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Xe2_HPM-specific RTP table entries for Wa_16021867713 and Wa_14019449301 were removed by commit 941f538b0af8 ("drm/xe: Consolidate workaround entries for Wa_16021867713") and commit aa0f0a678370 ("drm/xe: Consolidate workaround entries for Wa_14019449301") in favor of alternate entries earlier in the table that cover a wider range of IP versions. However these Xe2_HPM-specific entries were accidentally resurrected during a backmerge, which causes the Xe driver to complain on probe about two entries trying to program the same registers+bits: <3> [48.491155] xe 0000:03:00.0: [drm] *ERROR* Tile0: GT1: discarding save-restore reg 1c3f1c (clear: 00000008, set: 00000008, masked: no, mcr: no): ret=-22 <3> [48.491211] xe 0000:03:00.0: [drm] *ERROR* Tile0: GT1: discarding save-restore reg 1d3f1c (clear: 00000008, set: 00000008, masked: no, mcr: no): ret=-22 <3> [48.491225] xe 0000:03:00.0: [drm] *ERROR* Tile0: GT1: discarding save-restore reg 1c3f08 (clear: 00000020, set: 00000020, masked: no, mcr: no): ret=-22 <3> [48.491238] xe 0000:03:00.0: [drm] *ERROR* Tile0: GT1: discarding save-restore reg 1d3f08 (clear: 00000020, set: 00000020, masked: no, mcr: no): ret=-22 Re-drop the redundant Xe2_HPM-specific entries to eliminate the dmesg errors. Fixes: 58351f46de26 ("Merge v7.0-rc3 into drm-next") Cc: Simona Vetter Cc: Rodrigo Vivi Cc: Matthew Brost Cc: Thomas Hellström Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/7608 Reviewed-by: Simona Vetter Link: https://patch.msgid.link/20260312-wa_merge_fix-v1-1-2ec6607f1e0c@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_wa.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 0eb96abc27df..d2fc1f50c508 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -263,20 +263,6 @@ static const struct xe_rtp_entry_sr gt_was[] = { LSN_DIM_Z_WGT(1))) }, - /* Xe2_HPM */ - - { XE_RTP_NAME("16021867713"), - XE_RTP_RULES(MEDIA_VERSION(1301), - ENGINE_CLASS(VIDEO_DECODE)), - XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)), - XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), - }, - { XE_RTP_NAME("14019449301"), - XE_RTP_RULES(MEDIA_VERSION(1301), ENGINE_CLASS(VIDEO_DECODE)), - XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F08(0), CG3DDISHRS_CLKGATE_DIS)), - XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), - }, - /* Xe3_LPG */ { XE_RTP_NAME("14021871409"), -- cgit v1.2.3 From 341a2c99c87ce6f62c6f4423fa641a39f0966bff Mon Sep 17 00:00:00 2001 From: Nitin Gote Date: Thu, 12 Mar 2026 21:32:45 +0530 Subject: drm/xe/uapi: Fix kernel-doc for DRM_XE_VM_BIND_FLAG_DECOMPRESS There is kernel-doc warning for DRM_XE_VM_BIND_FLAG_DECOMPRESS: ./include/uapi/drm/xe_drm.h:1060: WARNING: Block quote ends without a blank line; unexpected unindent. Fix the warning by adding the missing '%' prefix to DRM_XE_VM_BIND_FLAG_DECOMPRESS in the kernel-doc list entry for struct drm_xe_vm_bind_op. Fixes: 2270bd7124f4 ("drm/xe: add VM_BIND DECOMPRESS uapi flag") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202603121515.gEMrFlTL-lkp@intel.com/ Cc: Matthew Auld Signed-off-by: Nitin Gote Reviewed-by: Matthew Auld Link: https://patch.msgid.link/20260312160244.809849-2-nitin.r.gote@intel.com Signed-off-by: Tejas Upadhyay --- include/uapi/drm/xe_drm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 0497b85fa12a..f8b2afb20540 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1057,7 +1057,7 @@ struct drm_xe_vm_destroy { * not invoke autoreset. Neither will stack variables going out of scope. * Therefore it's recommended to always explicitly reset the madvises when * freeing the memory backing a region used in a &DRM_IOCTL_XE_MADVISE call. - * - DRM_XE_VM_BIND_FLAG_DECOMPRESS - Request on-device decompression for a MAP. + * - %DRM_XE_VM_BIND_FLAG_DECOMPRESS - Request on-device decompression for a MAP. * When set on a MAP bind operation, request the driver schedule an on-device * in-place decompression (via the migrate/resolve path) for the GPU mapping * created by this bind. Only valid for DRM_XE_VM_BIND_OP_MAP; usage on -- cgit v1.2.3 From c85ec5c5753a46b5c2aea1292536487be9470ffe Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Mon, 2 Mar 2026 16:17:33 -0800 Subject: drm/xe/guc: Fail immediately on GuC load error By using the same variable for both the return of poll_timeout_us and the return of the polled function guc_wait_ucode, the return value of the latter is overwritten and lost after exiting the polling loop. Since guc_wait_ucode returns -1 on GuC load failure, we lose that information and always continue as if the GuC had been loaded correctly. This is fixed by simply using 2 separate variables. Fixes: a4916b4da448 ("drm/xe/guc: Refactor GuC load to use poll_timeout_us()") Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Matthew Brost Signed-off-by: Vinay Belgaumkar Link: https://patch.msgid.link/20260303001732.2540493-2-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/xe/xe_guc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index e75653a5e797..ad18be4688c5 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -1176,14 +1176,14 @@ static int guc_wait_ucode(struct xe_guc *guc) struct xe_guc_pc *guc_pc = >->uc.guc.pc; u32 before_freq, act_freq, cur_freq; u32 status = 0, tries = 0; + int load_result, ret; ktime_t before; u64 delta_ms; - int ret; before_freq = xe_guc_pc_get_act_freq(guc_pc); before = ktime_get(); - ret = poll_timeout_us(ret = guc_load_done(gt, &status, &tries), ret, + ret = poll_timeout_us(load_result = guc_load_done(gt, &status, &tries), load_result, 10 * USEC_PER_MSEC, GUC_LOAD_TIMEOUT_SEC * USEC_PER_SEC, false); @@ -1191,7 +1191,7 @@ static int guc_wait_ucode(struct xe_guc *guc) act_freq = xe_guc_pc_get_act_freq(guc_pc); cur_freq = xe_guc_pc_get_cur_freq_fw(guc_pc); - if (ret) { + if (ret || load_result <= 0) { xe_gt_err(gt, "load failed: status = 0x%08X, time = %lldms, freq = %dMHz (req %dMHz)\n", status, delta_ms, xe_guc_pc_get_act_freq(guc_pc), xe_guc_pc_get_cur_freq_fw(guc_pc)); -- cgit v1.2.3 From 78f3bf00be4f15daead02ba32d4737129419c902 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 10 Mar 2026 18:50:33 -0400 Subject: drm/xe: Always kill exec queues in xe_guc_submit_pause_abort xe_guc_submit_pause_abort is intended to be called after something disastrous occurs (e.g., VF migration fails, device wedging, or driver unload) and should immediately trigger the teardown of remaining submission state. With that, kill any remaining queues in this function. Fixes: 7c4b7e34c83b ("drm/xe/vf: Abort VF post migration recovery on failure") Cc: stable@vger.kernel.org Signed-off-by: Zhanjun Dong Reviewed-by: Stuart Summers Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20260310225039.1320161-2-zhanjun.dong@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index ca7aa4f358d0..b31e0e0af5cb 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -2763,8 +2763,7 @@ void xe_guc_submit_pause_abort(struct xe_guc *guc) continue; xe_sched_submission_start(sched); - if (exec_queue_killed_or_banned_or_wedged(q)) - xe_guc_exec_queue_trigger_cleanup(q); + guc_exec_queue_kill(q); } mutex_unlock(&guc->submission_state.lock); } -- cgit v1.2.3 From a6ab444a111a59924bd9d0c1e0613a75a0a40b89 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 10 Mar 2026 18:50:34 -0400 Subject: drm/xe: Forcefully tear down exec queues in GuC submit fini In GuC submit fini, forcefully tear down any exec queues by disabling CTs, stopping the scheduler (which cleans up lost G2H), killing all remaining queues, and resuming scheduling to allow any remaining cleanup actions to complete and signal any remaining fences. Split guc_submit_fini into device related and software only part. Using device-managed and drm-managed action guarantees the correct ordering of cleanup. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: stable@vger.kernel.org Reviewed-by: Zhanjun Dong Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20260310225039.1320161-3-zhanjun.dong@intel.com --- drivers/gpu/drm/xe/xe_guc.c | 26 +++++++++++++++++++-- drivers/gpu/drm/xe/xe_guc.h | 1 + drivers/gpu/drm/xe/xe_guc_submit.c | 48 ++++++++++++++++++++++++++++++-------- 3 files changed, 63 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index ad18be4688c5..576f3d500390 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -1399,15 +1399,37 @@ int xe_guc_enable_communication(struct xe_guc *guc) return 0; } -int xe_guc_suspend(struct xe_guc *guc) +/** + * xe_guc_softreset() - Soft reset GuC + * @guc: The GuC object + * + * Send soft reset command to GuC through mmio send. + * + * Return: 0 if success, otherwise error code + */ +int xe_guc_softreset(struct xe_guc *guc) { - struct xe_gt *gt = guc_to_gt(guc); u32 action[] = { XE_GUC_ACTION_CLIENT_SOFT_RESET, }; int ret; + if (!xe_uc_fw_is_running(&guc->fw)) + return 0; + ret = xe_guc_mmio_send(guc, action, ARRAY_SIZE(action)); + if (ret) + return ret; + + return 0; +} + +int xe_guc_suspend(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + int ret; + + ret = xe_guc_softreset(guc); if (ret) { xe_gt_err(gt, "GuC suspend failed: %pe\n", ERR_PTR(ret)); return ret; diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h index 66e7edc70ed9..02514914f404 100644 --- a/drivers/gpu/drm/xe/xe_guc.h +++ b/drivers/gpu/drm/xe/xe_guc.h @@ -44,6 +44,7 @@ int xe_guc_opt_in_features_enable(struct xe_guc *guc); void xe_guc_runtime_suspend(struct xe_guc *guc); void xe_guc_runtime_resume(struct xe_guc *guc); int xe_guc_suspend(struct xe_guc *guc); +int xe_guc_softreset(struct xe_guc *guc); void xe_guc_notify(struct xe_guc *guc); int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr); int xe_guc_mmio_send(struct xe_guc *guc, const u32 *request, u32 len); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index b31e0e0af5cb..8afd424b27fb 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -47,6 +47,8 @@ #define XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN 6 +static int guc_submit_reset_prepare(struct xe_guc *guc); + static struct xe_guc * exec_queue_to_guc(struct xe_exec_queue *q) { @@ -238,7 +240,7 @@ static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) EXEC_QUEUE_STATE_BANNED)); } -static void guc_submit_fini(struct drm_device *drm, void *arg) +static void guc_submit_sw_fini(struct drm_device *drm, void *arg) { struct xe_guc *guc = arg; struct xe_device *xe = guc_to_xe(guc); @@ -256,6 +258,19 @@ static void guc_submit_fini(struct drm_device *drm, void *arg) xa_destroy(&guc->submission_state.exec_queue_lookup); } +static void guc_submit_fini(void *arg) +{ + struct xe_guc *guc = arg; + + /* Forcefully kill any remaining exec queues */ + xe_guc_ct_stop(&guc->ct); + guc_submit_reset_prepare(guc); + xe_guc_softreset(guc); + xe_guc_submit_stop(guc); + xe_uc_fw_sanitize(&guc->fw); + xe_guc_submit_pause_abort(guc); +} + static void guc_submit_wedged_fini(void *arg) { struct xe_guc *guc = arg; @@ -325,7 +340,11 @@ int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids) guc->submission_state.initialized = true; - return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); + err = drmm_add_action_or_reset(&xe->drm, guc_submit_sw_fini, guc); + if (err) + return err; + + return devm_add_action_or_reset(xe->drm.dev, guc_submit_fini, guc); } /* @@ -2298,6 +2317,7 @@ static const struct xe_exec_queue_ops guc_exec_queue_ops = { static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) { struct xe_gpu_scheduler *sched = &q->guc->sched; + bool do_destroy = false; /* Stop scheduling + flush any DRM scheduler operations */ xe_sched_submission_stop(sched); @@ -2305,7 +2325,7 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) /* Clean up lost G2H + reset engine state */ if (exec_queue_registered(q)) { if (exec_queue_destroyed(q)) - __guc_exec_queue_destroy(guc, q); + do_destroy = true; } if (q->guc->suspend_pending) { set_exec_queue_suspended(q); @@ -2341,18 +2361,15 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) xe_guc_exec_queue_trigger_cleanup(q); } } + + if (do_destroy) + __guc_exec_queue_destroy(guc, q); } -int xe_guc_submit_reset_prepare(struct xe_guc *guc) +static int guc_submit_reset_prepare(struct xe_guc *guc) { int ret; - if (xe_gt_WARN_ON(guc_to_gt(guc), vf_recovery(guc))) - return 0; - - if (!guc->submission_state.initialized) - return 0; - /* * Using an atomic here rather than submission_state.lock as this * function can be called while holding the CT lock (engine reset @@ -2367,6 +2384,17 @@ int xe_guc_submit_reset_prepare(struct xe_guc *guc) return ret; } +int xe_guc_submit_reset_prepare(struct xe_guc *guc) +{ + if (xe_gt_WARN_ON(guc_to_gt(guc), vf_recovery(guc))) + return 0; + + if (!guc->submission_state.initialized) + return 0; + + return guc_submit_reset_prepare(guc); +} + void xe_guc_submit_reset_wait(struct xe_guc *guc) { wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) || -- cgit v1.2.3 From e25ba41c8227c5393c16e4aab398076014bd345f Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 10 Mar 2026 18:50:35 -0400 Subject: drm/xe: Trigger queue cleanup if not in wedged mode 2 The intent of wedging a device is to allow queues to continue running only in wedged mode 2. In other modes, queues should initiate cleanup and signal all remaining fences. Fix xe_guc_submit_wedge to correctly clean up queues when wedge mode != 2. Fixes: 7dbe8af13c18 ("drm/xe: Wedge the entire device") Cc: stable@vger.kernel.org Reviewed-by: Zhanjun Dong Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20260310225039.1320161-4-zhanjun.dong@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 8afd424b27fb..cb32053d57ec 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1319,6 +1319,7 @@ static void disable_scheduling_deregister(struct xe_guc *guc, */ void xe_guc_submit_wedge(struct xe_guc *guc) { + struct xe_device *xe = guc_to_xe(guc); struct xe_gt *gt = guc_to_gt(guc); struct xe_exec_queue *q; unsigned long index; @@ -1333,20 +1334,28 @@ void xe_guc_submit_wedge(struct xe_guc *guc) if (!guc->submission_state.initialized) return; - err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, - guc_submit_wedged_fini, guc); - if (err) { - xe_gt_err(gt, "Failed to register clean-up in wedged.mode=%s; " - "Although device is wedged.\n", - xe_wedged_mode_to_string(XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET)); - return; - } + if (xe->wedged.mode == 2) { + err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, + guc_submit_wedged_fini, guc); + if (err) { + xe_gt_err(gt, "Failed to register clean-up on wedged.mode=2; " + "Although device is wedged.\n"); + return; + } - mutex_lock(&guc->submission_state.lock); - xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) - if (xe_exec_queue_get_unless_zero(q)) - set_exec_queue_wedged(q); - mutex_unlock(&guc->submission_state.lock); + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) + if (xe_exec_queue_get_unless_zero(q)) + set_exec_queue_wedged(q); + mutex_unlock(&guc->submission_state.lock); + } else { + /* Forcefully kill any remaining exec queues, signal fences */ + guc_submit_reset_prepare(guc); + xe_guc_submit_stop(guc); + xe_guc_softreset(guc); + xe_uc_fw_sanitize(&guc->fw); + xe_guc_submit_pause_abort(guc); + } } static bool guc_submit_hint_wedged(struct xe_guc *guc) -- cgit v1.2.3 From a7f607610da721f77db358b09be8091e60bd8e89 Mon Sep 17 00:00:00 2001 From: Zhanjun Dong Date: Tue, 10 Mar 2026 18:50:36 -0400 Subject: drm/xe: Use XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET enum instead of magic number Replace the magic number 2 with the proper enum value XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET for better code readability and maintainability. Signed-off-by: Zhanjun Dong Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20260310225039.1320161-5-zhanjun.dong@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index cb32053d57ec..a145234f662b 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1334,12 +1334,13 @@ void xe_guc_submit_wedge(struct xe_guc *guc) if (!guc->submission_state.initialized) return; - if (xe->wedged.mode == 2) { + if (xe->wedged.mode == XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET) { err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, guc_submit_wedged_fini, guc); if (err) { - xe_gt_err(gt, "Failed to register clean-up on wedged.mode=2; " - "Although device is wedged.\n"); + xe_gt_err(gt, "Failed to register clean-up on wedged.mode=%s; " + "Although device is wedged.\n", + xe_wedged_mode_to_string(XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET)); return; } -- cgit v1.2.3 From dace8cb0032f57ea67c87b3b92ad73c89dd2db44 Mon Sep 17 00:00:00 2001 From: Zhanjun Dong Date: Tue, 10 Mar 2026 18:50:37 -0400 Subject: drm/xe/guc: Ensure CT state transitions via STOP before DISABLED The GuC CT state transition requires moving to the STOP state before entering the DISABLED state. Update the driver teardown sequence to make the proper state machine transitions. Fixes: ee4b32220a6b ("drm/xe/guc: Add devm release action to safely tear down CT") Cc: stable@vger.kernel.org Signed-off-by: Zhanjun Dong Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20260310225039.1320161-6-zhanjun.dong@intel.com --- drivers/gpu/drm/xe/xe_guc_ct.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 496c6c77bee6..3b1c03743f83 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -352,6 +352,7 @@ static void guc_action_disable_ct(void *arg) { struct xe_guc_ct *ct = arg; + xe_guc_ct_stop(ct); guc_ct_change_state(ct, XE_GUC_CT_STATE_DISABLED); } -- cgit v1.2.3 From 9a9d960dce05b45b2a7dacd2c98d602480812a13 Mon Sep 17 00:00:00 2001 From: Zhanjun Dong Date: Tue, 10 Mar 2026 18:50:38 -0400 Subject: drm/xe/uc: Drop xe_guc_sanitize in favor of managed cleanup If the firmware fails to load in GT resets the device is wedged also initiating a GuC state cleanup. Signed-off-by: Zhanjun Dong Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20260310225039.1320161-7-zhanjun.dong@intel.com --- drivers/gpu/drm/xe/xe_uc.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index d9aa845a308d..75091bde0d50 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -157,23 +157,19 @@ static int vf_uc_load_hw(struct xe_uc *uc) err = xe_gt_sriov_vf_connect(uc_to_gt(uc)); if (err) - goto err_out; + return err; uc->guc.submission_state.enabled = true; err = xe_guc_opt_in_features_enable(&uc->guc); if (err) - goto err_out; + return err; err = xe_gt_record_default_lrcs(uc_to_gt(uc)); if (err) - goto err_out; + return err; return 0; - -err_out: - xe_guc_sanitize(&uc->guc); - return err; } /* @@ -205,19 +201,19 @@ int xe_uc_load_hw(struct xe_uc *uc) ret = xe_gt_record_default_lrcs(uc_to_gt(uc)); if (ret) - goto err_out; + return ret; ret = xe_guc_post_load_init(&uc->guc); if (ret) - goto err_out; + return ret; ret = xe_guc_pc_start(&uc->guc.pc); if (ret) - goto err_out; + return ret; ret = xe_guc_rc_enable(&uc->guc); if (ret) - goto err_out; + return ret; xe_guc_engine_activity_enable_stats(&uc->guc); @@ -232,10 +228,6 @@ int xe_uc_load_hw(struct xe_uc *uc) xe_gsc_load_start(&uc->gsc); return 0; - -err_out: - xe_guc_sanitize(&uc->guc); - return ret; } int xe_uc_reset_prepare(struct xe_uc *uc) -- cgit v1.2.3 From 4f3a998a173b4325c2efd90bdadc6ccd3ad9a431 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 10 Mar 2026 18:50:39 -0400 Subject: drm/xe: Open-code GGTT MMIO access protection GGTT MMIO access is currently protected by hotplug (drm_dev_enter), which works correctly when the driver loads successfully and is later unbound or unloaded. However, if driver load fails, this protection is insufficient because drm_dev_unplug() is never called. Additionally, devm release functions cannot guarantee that all BOs with GGTT mappings are destroyed before the GGTT MMIO region is removed, as some BOs may be freed asynchronously by worker threads. To address this, introduce an open-coded flag, protected by the GGTT lock, that guards GGTT MMIO access. The flag is cleared during the dev_fini_ggtt devm release function to ensure MMIO access is disabled once teardown begins. Cc: stable@vger.kernel.org Fixes: 919bb54e989c ("drm/xe: Fix missing runtime outer protection for ggtt_remove_node") Reviewed-by: Zhanjun Dong Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20260310225039.1320161-8-zhanjun.dong@intel.com --- drivers/gpu/drm/xe/xe_ggtt.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 0f2e3af49912..21071b64b09d 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -66,6 +66,9 @@ * give us the correct placement for free. */ +#define XE_GGTT_FLAGS_64K BIT(0) +#define XE_GGTT_FLAGS_ONLINE BIT(1) + /** * struct xe_ggtt_node - A node in GGTT. * @@ -117,6 +120,8 @@ struct xe_ggtt { * @flags: Flags for this GGTT * Acceptable flags: * - %XE_GGTT_FLAGS_64K - if PTE size is 64K. Otherwise, regular is 4K. + * - %XE_GGTT_FLAGS_ONLINE - is GGTT online, protected by ggtt->lock + * after init */ unsigned int flags; /** @scratch: Internal object allocation used as a scratch page */ @@ -367,6 +372,8 @@ static void dev_fini_ggtt(void *arg) { struct xe_ggtt *ggtt = arg; + scoped_guard(mutex, &ggtt->lock) + ggtt->flags &= ~XE_GGTT_FLAGS_ONLINE; drain_workqueue(ggtt->wq); } @@ -437,6 +444,7 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) if (err) return err; + ggtt->flags |= XE_GGTT_FLAGS_ONLINE; return devm_add_action_or_reset(xe->drm.dev, dev_fini_ggtt, ggtt); } ALLOW_ERROR_INJECTION(xe_ggtt_init_early, ERRNO); /* See xe_pci_probe() */ @@ -465,13 +473,10 @@ static void ggtt_node_fini(struct xe_ggtt_node *node) static void ggtt_node_remove(struct xe_ggtt_node *node) { struct xe_ggtt *ggtt = node->ggtt; - struct xe_device *xe = tile_to_xe(ggtt->tile); bool bound; - int idx; - - bound = drm_dev_enter(&xe->drm, &idx); mutex_lock(&ggtt->lock); + bound = ggtt->flags & XE_GGTT_FLAGS_ONLINE; if (bound) xe_ggtt_clear(ggtt, xe_ggtt_node_addr(node), xe_ggtt_node_size(node)); drm_mm_remove_node(&node->base); @@ -484,8 +489,6 @@ static void ggtt_node_remove(struct xe_ggtt_node *node) if (node->invalidate_on_remove) xe_ggtt_invalidate(ggtt); - drm_dev_exit(idx); - free_node: ggtt_node_fini(node); } -- cgit v1.2.3 From 440ec190c2fc06c368096df4862213f3f795db37 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Thu, 12 Mar 2026 20:20:11 +0100 Subject: drm/pagemap: Unlock and put folios when possible If the page is part of a folio, unlock and put the whole folio at once instead of individual pages one after the other. This will reduce the amount of operations once device THP are in use. Cc: Andrew Morton Cc: David Hildenbrand Cc: Lorenzo Stoakes Cc: Liam R. Howlett Cc: Vlastimil Babka Cc: Mike Rapoport Cc: Suren Baghdasaryan Cc: Michal Hocko Cc: Zi Yan Cc: Alistair Popple Cc: Balbir Singh Cc: linux-mm@kvack.org Suggested-by: Matthew Brost Reviewed-by: Matthew Brost Signed-off-by: Francois Dugast Reviewed-by: Balbir Singh Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20260312192126.2024853-2-francois.dugast@intel.com --- drivers/gpu/drm/drm_pagemap.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c index 862675ac5bb2..f453a12b6a8e 100644 --- a/drivers/gpu/drm/drm_pagemap.c +++ b/drivers/gpu/drm/drm_pagemap.c @@ -154,15 +154,15 @@ static void drm_pagemap_zdd_put(struct drm_pagemap_zdd *zdd) } /** - * drm_pagemap_migration_unlock_put_page() - Put a migration page - * @page: Pointer to the page to put + * drm_pagemap_migration_unlock_put_folio() - Put a migration folio + * @folio: Pointer to the folio to put * - * This function unlocks and puts a page. + * This function unlocks and puts a folio. */ -static void drm_pagemap_migration_unlock_put_page(struct page *page) +static void drm_pagemap_migration_unlock_put_folio(struct folio *folio) { - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); } /** @@ -177,15 +177,23 @@ static void drm_pagemap_migration_unlock_put_pages(unsigned long npages, { unsigned long i; - for (i = 0; i < npages; ++i) { + for (i = 0; i < npages;) { struct page *page; + struct folio *folio; + unsigned int order = 0; if (!migrate_pfn[i]) - continue; + goto next; page = migrate_pfn_to_page(migrate_pfn[i]); - drm_pagemap_migration_unlock_put_page(page); + folio = page_folio(page); + order = folio_order(folio); + + drm_pagemap_migration_unlock_put_folio(folio); migrate_pfn[i] = 0; + +next: + i += NR_PAGES(order); } } -- cgit v1.2.3 From 2e03c0c5c59a086df534e15ddde03cb33bc475c4 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Thu, 12 Mar 2026 20:20:12 +0100 Subject: drm/pagemap: Add helper to access zone_device_data This new helper helps ensure all accesses to zone_device_data use the correct API whether the page is part of a folio or not. v2: - Move to drm_pagemap.h, stick to folio_zone_device_data (Matthew Brost) - Return struct drm_pagemap_zdd * (Matthew Brost) v3: - Add stub for !CONFIG_ZONE_DEVICE (CI) Cc: Andrew Morton Cc: David Hildenbrand Cc: Lorenzo Stoakes Cc: Liam R. Howlett Cc: Vlastimil Babka Cc: Mike Rapoport Cc: Suren Baghdasaryan Cc: Michal Hocko Cc: Zi Yan Cc: Alistair Popple Cc: Balbir Singh Cc: linux-mm@kvack.org Suggested-by: Matthew Brost Reviewed-by: Matthew Brost Signed-off-by: Francois Dugast Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20260312192126.2024853-3-francois.dugast@intel.com --- drivers/gpu/drm/drm_gpusvm.c | 7 +++++-- drivers/gpu/drm/drm_pagemap.c | 21 ++++++++++++--------- include/drm/drm_pagemap.h | 21 +++++++++++++++++++++ 3 files changed, 38 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c index 35dd07297dd0..4b928fda5b12 100644 --- a/drivers/gpu/drm/drm_gpusvm.c +++ b/drivers/gpu/drm/drm_gpusvm.c @@ -1488,12 +1488,15 @@ map_pages: order = drm_gpusvm_hmm_pfn_to_order(pfns[i], i, npages); if (is_device_private_page(page) || is_device_coherent_page(page)) { + struct drm_pagemap_zdd *__zdd = + drm_pagemap_page_zone_device_data(page); + if (!ctx->allow_mixed && - zdd != page->zone_device_data && i > 0) { + zdd != __zdd && i > 0) { err = -EOPNOTSUPP; goto err_unmap; } - zdd = page->zone_device_data; + zdd = __zdd; if (pagemap != page_pgmap(page)) { if (pagemap) { err = -EOPNOTSUPP; diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c index f453a12b6a8e..733a3857947c 100644 --- a/drivers/gpu/drm/drm_pagemap.c +++ b/drivers/gpu/drm/drm_pagemap.c @@ -252,7 +252,7 @@ static int drm_pagemap_migrate_map_pages(struct device *dev, order = folio_order(folio); if (is_device_private_page(page)) { - struct drm_pagemap_zdd *zdd = page->zone_device_data; + struct drm_pagemap_zdd *zdd = drm_pagemap_page_zone_device_data(page); struct drm_pagemap *dpagemap = zdd->dpagemap; struct drm_pagemap_addr addr; @@ -323,7 +323,7 @@ static void drm_pagemap_migrate_unmap_pages(struct device *dev, goto next; if (is_zone_device_page(page)) { - struct drm_pagemap_zdd *zdd = page->zone_device_data; + struct drm_pagemap_zdd *zdd = drm_pagemap_page_zone_device_data(page); struct drm_pagemap *dpagemap = zdd->dpagemap; dpagemap->ops->device_unmap(dpagemap, dev, &pagemap_addr[i]); @@ -601,7 +601,8 @@ int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation, pages[i] = NULL; if (src_page && is_device_private_page(src_page)) { - struct drm_pagemap_zdd *src_zdd = src_page->zone_device_data; + struct drm_pagemap_zdd *src_zdd = + drm_pagemap_page_zone_device_data(src_page); if (page_pgmap(src_page) == pagemap && !mdetails->can_migrate_same_pagemap) { @@ -723,8 +724,8 @@ static int drm_pagemap_migrate_populate_ram_pfn(struct vm_area_struct *vas, goto next; if (fault_page) { - if (src_page->zone_device_data != - fault_page->zone_device_data) + if (drm_pagemap_page_zone_device_data(src_page) != + drm_pagemap_page_zone_device_data(fault_page)) goto next; } @@ -1065,7 +1066,7 @@ static int __drm_pagemap_migrate_to_ram(struct vm_area_struct *vas, void *buf; int i, err = 0; - zdd = page->zone_device_data; + zdd = drm_pagemap_page_zone_device_data(page); if (time_before64(get_jiffies_64(), zdd->devmem_allocation->timeslice_expiration)) return 0; @@ -1148,7 +1149,9 @@ err_out: */ static void drm_pagemap_folio_free(struct folio *folio) { - drm_pagemap_zdd_put(folio->page.zone_device_data); + struct page *page = folio_page(folio, 0); + + drm_pagemap_zdd_put(drm_pagemap_page_zone_device_data(page)); } /** @@ -1164,7 +1167,7 @@ static void drm_pagemap_folio_free(struct folio *folio) */ static vm_fault_t drm_pagemap_migrate_to_ram(struct vm_fault *vmf) { - struct drm_pagemap_zdd *zdd = vmf->page->zone_device_data; + struct drm_pagemap_zdd *zdd = drm_pagemap_page_zone_device_data(vmf->page); int err; err = __drm_pagemap_migrate_to_ram(vmf->vma, @@ -1230,7 +1233,7 @@ EXPORT_SYMBOL_GPL(drm_pagemap_devmem_init); */ struct drm_pagemap *drm_pagemap_page_to_dpagemap(struct page *page) { - struct drm_pagemap_zdd *zdd = page->zone_device_data; + struct drm_pagemap_zdd *zdd = drm_pagemap_page_zone_device_data(page); return zdd->devmem_allocation->dpagemap; } diff --git a/include/drm/drm_pagemap.h b/include/drm/drm_pagemap.h index c848f578e3da..75e6ca58922d 100644 --- a/include/drm/drm_pagemap.h +++ b/include/drm/drm_pagemap.h @@ -4,6 +4,7 @@ #include #include +#include #include #define NR_PAGES(order) (1U << (order)) @@ -367,6 +368,26 @@ void drm_pagemap_destroy(struct drm_pagemap *dpagemap, bool is_atomic_or_reclaim int drm_pagemap_reinit(struct drm_pagemap *dpagemap); +/** + * drm_pagemap_page_zone_device_data() - Page to zone_device_data + * @page: Pointer to the page + * + * Return: Page's zone_device_data + */ +static inline struct drm_pagemap_zdd *drm_pagemap_page_zone_device_data(struct page *page) +{ + struct folio *folio = page_folio(page); + + return folio_zone_device_data(folio); +} + +#else + +static inline struct drm_pagemap_zdd *drm_pagemap_page_zone_device_data(struct page *page) +{ + return NULL; +} + #endif /* IS_ENABLED(CONFIG_ZONE_DEVICE) */ #endif -- cgit v1.2.3 From 139ab31aea8a9436460568d556b432bb7e9311f7 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 12 Mar 2026 20:20:13 +0100 Subject: drm/pagemap: Correct cpages calculation for migrate_vma_setup cpages returned from migrate_vma_setup represents the total number of individual pages found, not the number of 4K pages. The math in drm_pagemap_migrate_to_devmem for npages is based on the number of 4K pages, so cpages != npages can fail even if the entire memory range is found in migrate_vma_setup (e.g., when a single 2M page is found). Add drm_pagemap_cpages, which converts cpages to the number of 4K pages found. Cc: Andrew Morton Cc: David Hildenbrand Cc: Lorenzo Stoakes Cc: Liam R. Howlett Cc: Vlastimil Babka Cc: Mike Rapoport Cc: Suren Baghdasaryan Cc: Michal Hocko Cc: Zi Yan Cc: Alistair Popple Cc: Balbir Singh Cc: linux-mm@kvack.org Reviewed-by: Francois Dugast Signed-off-by: Francois Dugast Reviewed-by: Balbir Singh Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20260312192126.2024853-4-francois.dugast@intel.com --- drivers/gpu/drm/drm_pagemap.c | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c index 733a3857947c..837b881883f9 100644 --- a/drivers/gpu/drm/drm_pagemap.c +++ b/drivers/gpu/drm/drm_pagemap.c @@ -452,6 +452,41 @@ out: return ret; } +/** + * drm_pagemap_cpages() - Count collected pages + * @migrate_pfn: Array of migrate_pfn entries to account + * @npages: Number of entries in @migrate_pfn + * + * Compute the total number of minimum-sized pages represented by the + * collected entries in @migrate_pfn. The total is derived from the + * order encoded in each entry. + * + * Return: Total number of minimum-sized pages. + */ +static int drm_pagemap_cpages(unsigned long *migrate_pfn, unsigned long npages) +{ + unsigned long i, cpages = 0; + + for (i = 0; i < npages;) { + struct page *page = migrate_pfn_to_page(migrate_pfn[i]); + struct folio *folio; + unsigned int order = 0; + + if (page) { + folio = page_folio(page); + order = folio_order(folio); + cpages += NR_PAGES(order); + } else if (migrate_pfn[i] & MIGRATE_PFN_COMPOUND) { + order = HPAGE_PMD_ORDER; + cpages += NR_PAGES(order); + } + + i += NR_PAGES(order); + } + + return cpages; +} + /** * drm_pagemap_migrate_to_devmem() - Migrate a struct mm_struct range to device memory * @devmem_allocation: The device memory allocation to migrate to. @@ -554,7 +589,8 @@ int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation, goto err_free; } - if (migrate.cpages != npages) { + if (migrate.cpages != npages && + drm_pagemap_cpages(migrate.src, npages) != npages) { /* * Some pages to migrate. But we want to migrate all or * nothing. Raced or unknown device pages. -- cgit v1.2.3 From 192cb1f5cb1694c45b7cac14519d7bad65ba22f6 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Thu, 12 Mar 2026 20:20:14 +0100 Subject: drm/pagemap: Enable THP support for GPU memory migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This enables support for Transparent Huge Pages (THP) for device pages by using MIGRATE_VMA_SELECT_COMPOUND during migration. It removes the need to split folios and loop multiple times over all pages to perform required operations at page level. Instead, we rely on newly introduced support for higher orders in drm_pagemap and folio-level API. In Xe, this drastically improves performance when using SVM. The GT stats below collected after a 2MB page fault show overall servicing is more than 7 times faster, and thanks to reduced CPU overhead the time spent on the actual copy goes from 23% without THP to 80% with THP: Without THP: svm_2M_pagefault_us: 966 svm_2M_migrate_us: 942 svm_2M_device_copy_us: 223 svm_2M_get_pages_us: 9 svm_2M_bind_us: 10 With THP: svm_2M_pagefault_us: 132 svm_2M_migrate_us: 128 svm_2M_device_copy_us: 106 svm_2M_get_pages_us: 1 svm_2M_bind_us: 2 v2: - Fix one occurrence of drm_pagemap_get_devmem_page() (Matthew Brost) v3: - Remove migrate_device_split_page() and folio_split_lock, instead rely on free_zone_device_folio() to split folios before freeing (Matthew Brost) - Assert folio order is HPAGE_PMD_ORDER (Matthew Brost) - Always use folio_set_zone_device_data() in split (Matthew Brost) v4: - Warn on compound device page, s/continue/goto next/ (Matthew Brost) v5: - Revert warn on compound device page - s/zone_device_page_init()/zone_device_folio_init() (Matthew Brost) Cc: Matthew Brost Cc: Thomas Hellström Cc: Michal Mrozek Cc: Andrew Morton Cc: David Hildenbrand Cc: Lorenzo Stoakes Cc: Liam R. Howlett Cc: Vlastimil Babka Cc: Mike Rapoport Cc: Suren Baghdasaryan Cc: Michal Hocko Cc: Zi Yan Cc: Alistair Popple Cc: Balbir Singh Cc: linux-mm@kvack.org Reviewed-by: Matthew Brost Signed-off-by: Francois Dugast Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20260312192126.2024853-5-francois.dugast@intel.com --- drivers/gpu/drm/drm_pagemap.c | 72 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 62 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c index 837b881883f9..5002049e0198 100644 --- a/drivers/gpu/drm/drm_pagemap.c +++ b/drivers/gpu/drm/drm_pagemap.c @@ -200,16 +200,19 @@ next: /** * drm_pagemap_get_devmem_page() - Get a reference to a device memory page * @page: Pointer to the page + * @order: Order * @zdd: Pointer to the GPU SVM zone device data * * This function associates the given page with the specified GPU SVM zone * device data and initializes it for zone device usage. */ static void drm_pagemap_get_devmem_page(struct page *page, + unsigned int order, struct drm_pagemap_zdd *zdd) { - page->zone_device_data = drm_pagemap_zdd_get(zdd); - zone_device_page_init(page, page_pgmap(page), 0); + zone_device_folio_init((struct folio *)page, zdd->dpagemap->pagemap, + order); + folio_set_zone_device_data(page_folio(page), drm_pagemap_zdd_get(zdd)); } /** @@ -524,7 +527,7 @@ int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation, .end = end, .pgmap_owner = pagemap->owner, .flags = MIGRATE_VMA_SELECT_SYSTEM | MIGRATE_VMA_SELECT_DEVICE_COHERENT | - MIGRATE_VMA_SELECT_DEVICE_PRIVATE, + MIGRATE_VMA_SELECT_DEVICE_PRIVATE | MIGRATE_VMA_SELECT_COMPOUND, }; unsigned long i, npages = npages_in_range(start, end); unsigned long own_pages = 0, migrated_pages = 0; @@ -630,11 +633,13 @@ int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation, own_pages = 0; - for (i = 0; i < npages; ++i) { + for (i = 0; i < npages;) { + unsigned long j; struct page *page = pfn_to_page(migrate.dst[i]); struct page *src_page = migrate_pfn_to_page(migrate.src[i]); - cur.start = i; + unsigned int order = 0; + cur.start = i; pages[i] = NULL; if (src_page && is_device_private_page(src_page)) { struct drm_pagemap_zdd *src_zdd = @@ -644,7 +649,7 @@ int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation, !mdetails->can_migrate_same_pagemap) { migrate.dst[i] = 0; own_pages++; - continue; + goto next; } if (mdetails->source_peer_migrates) { cur.dpagemap = src_zdd->dpagemap; @@ -660,7 +665,20 @@ int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation, pages[i] = page; } migrate.dst[i] = migrate_pfn(migrate.dst[i]); - drm_pagemap_get_devmem_page(page, zdd); + + if (migrate.src[i] & MIGRATE_PFN_COMPOUND) { + drm_WARN_ONCE(dpagemap->drm, src_page && + folio_order(page_folio(src_page)) != HPAGE_PMD_ORDER, + "Unexpected folio order\n"); + + order = HPAGE_PMD_ORDER; + migrate.dst[i] |= MIGRATE_PFN_COMPOUND; + + for (j = 1; j < NR_PAGES(order) && i + j < npages; j++) + migrate.dst[i + j] = 0; + } + + drm_pagemap_get_devmem_page(page, order, zdd); /* If we switched the migrating drm_pagemap, migrate previous pages now */ err = drm_pagemap_migrate_range(devmem_allocation, migrate.src, migrate.dst, @@ -670,7 +688,11 @@ int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation, npages = i + 1; goto err_finalize; } + +next: + i += NR_PAGES(order); } + cur.start = npages; cur.ops = NULL; /* Force migration */ err = drm_pagemap_migrate_range(devmem_allocation, migrate.src, migrate.dst, @@ -779,6 +801,8 @@ static int drm_pagemap_migrate_populate_ram_pfn(struct vm_area_struct *vas, page = folio_page(folio, 0); mpfn[i] = migrate_pfn(page_to_pfn(page)); + if (order) + mpfn[i] |= MIGRATE_PFN_COMPOUND; next: if (page) addr += page_size(page); @@ -1034,8 +1058,15 @@ retry: if (err) goto err_finalize; - for (i = 0; i < npages; ++i) + for (i = 0; i < npages;) { + unsigned int order = 0; + pages[i] = migrate_pfn_to_page(src[i]); + if (pages[i]) + order = folio_order(page_folio(pages[i])); + + i += NR_PAGES(order); + } err = ops->copy_to_ram(pages, pagemap_addr, npages, NULL); if (err) @@ -1088,7 +1119,8 @@ static int __drm_pagemap_migrate_to_ram(struct vm_area_struct *vas, .vma = vas, .pgmap_owner = page_pgmap(page)->owner, .flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE | - MIGRATE_VMA_SELECT_DEVICE_COHERENT, + MIGRATE_VMA_SELECT_DEVICE_COHERENT | + MIGRATE_VMA_SELECT_COMPOUND, .fault_page = page, }; struct drm_pagemap_migrate_details mdetails = {}; @@ -1154,8 +1186,15 @@ static int __drm_pagemap_migrate_to_ram(struct vm_area_struct *vas, if (err) goto err_finalize; - for (i = 0; i < npages; ++i) + for (i = 0; i < npages;) { + unsigned int order = 0; + pages[i] = migrate_pfn_to_page(migrate.src[i]); + if (pages[i]) + order = folio_order(page_folio(pages[i])); + + i += NR_PAGES(order); + } err = ops->copy_to_ram(pages, pagemap_addr, npages, NULL); if (err) @@ -1213,9 +1252,22 @@ static vm_fault_t drm_pagemap_migrate_to_ram(struct vm_fault *vmf) return err ? VM_FAULT_SIGBUS : 0; } +static void drm_pagemap_folio_split(struct folio *orig_folio, struct folio *new_folio) +{ + struct drm_pagemap_zdd *zdd; + + if (!new_folio) + return; + + new_folio->pgmap = orig_folio->pgmap; + zdd = folio_zone_device_data(orig_folio); + folio_set_zone_device_data(new_folio, drm_pagemap_zdd_get(zdd)); +} + static const struct dev_pagemap_ops drm_pagemap_pagemap_ops = { .folio_free = drm_pagemap_folio_free, .migrate_to_ram = drm_pagemap_migrate_to_ram, + .folio_split = drm_pagemap_folio_split, }; /** -- cgit v1.2.3 From 27a5e78a23dd36e487678aee92364fc9ef6d6871 Mon Sep 17 00:00:00 2001 From: Raag Jadav Date: Fri, 13 Mar 2026 13:34:38 +0530 Subject: drm/xe/i2c: Assert/Deassert I2C IRQ I2C IRQ is triggered using virtual wire. Assert/Deassert it in IRQ handler to allow subsequent interrupt generation. Signed-off-by: Raag Jadav Reviewed-by: Heikki Krogerus Link: https://patch.msgid.link/20260313080438.4166251-1-raag.jadav@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_i2c.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_i2c.c b/drivers/gpu/drm/xe/xe_i2c.c index 1deb812fe01d..706783863d07 100644 --- a/drivers/gpu/drm/xe/xe_i2c.c +++ b/drivers/gpu/drm/xe/xe_i2c.c @@ -176,11 +176,18 @@ static bool xe_i2c_irq_present(struct xe_device *xe) */ void xe_i2c_irq_handler(struct xe_device *xe, u32 master_ctl) { - if (!xe_i2c_irq_present(xe)) + struct xe_mmio *mmio = xe_root_tile_mmio(xe); + + if (!(master_ctl & I2C_IRQ) || !xe_i2c_irq_present(xe)) return; - if (master_ctl & I2C_IRQ) - generic_handle_irq_safe(xe->i2c->adapter_irq); + /* Forward interrupt to I2C adapter */ + generic_handle_irq_safe(xe->i2c->adapter_irq); + + /* Deassert after I2C adapter clears the interrupt */ + xe_mmio_rmw32(mmio, I2C_CONFIG_CMD, 0, PCI_COMMAND_INTX_DISABLE); + /* Reassert to allow subsequent interrupt generation */ + xe_mmio_rmw32(mmio, I2C_CONFIG_CMD, PCI_COMMAND_INTX_DISABLE, 0); } void xe_i2c_irq_reset(struct xe_device *xe) @@ -190,6 +197,7 @@ void xe_i2c_irq_reset(struct xe_device *xe) if (!xe_i2c_irq_present(xe)) return; + xe_mmio_rmw32(mmio, I2C_CONFIG_CMD, 0, PCI_COMMAND_INTX_DISABLE); xe_mmio_rmw32(mmio, I2C_BRIDGE_PCICFGCTL, ACPI_INTR_EN, 0); } @@ -201,6 +209,7 @@ void xe_i2c_irq_postinstall(struct xe_device *xe) return; xe_mmio_rmw32(mmio, I2C_BRIDGE_PCICFGCTL, 0, ACPI_INTR_EN); + xe_mmio_rmw32(mmio, I2C_CONFIG_CMD, PCI_COMMAND_INTX_DISABLE, 0); } static int xe_i2c_irq_map(struct irq_domain *h, unsigned int virq, -- cgit v1.2.3 From 65fcf19cb36bf43c893c3444d5bd120006cb843d Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 11 Mar 2026 16:04:58 -0700 Subject: drm/xe: Include running dword offset in default_lrc dumps Printing a running dword offset in the default_lrc_* debugfs entries makes it easier for developers to find the right offsets to use in regs/xe_lrc_layout.h and/or compare the default LRC contents against the bspec-documented LRC layout. Reviewed-by: Dnyaneshwar Bhadane Link: https://patch.msgid.link/20260311-default_lrc_offsets-v1-1-58d8ed3aa081@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_lrc.c | 65 +++++++++++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 73a503d88217..7fb59386d1c5 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -1902,6 +1902,7 @@ static int instr_dw(u32 cmd_header) static int dump_mi_command(struct drm_printer *p, struct xe_gt *gt, + u32 *start, u32 *dw, int remaining_dw) { @@ -1917,15 +1918,18 @@ static int dump_mi_command(struct drm_printer *p, while (num_noop < remaining_dw && (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP) num_noop++; - drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop); + drm_printf(p, "LRC[%#5lx] = [%#010x] MI_NOOP (%d dwords)\n", + dw - num_noop - start, inst_header, num_noop); return num_noop; case MI_TOPOLOGY_FILTER: - drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header); + drm_printf(p, "LRC[%#5lx] = [%#010x] MI_TOPOLOGY_FILTER\n", + dw - start, inst_header); return 1; case MI_BATCH_BUFFER_END: - drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header); + drm_printf(p, "LRC[%#5lx] = [%#010x] MI_BATCH_BUFFER_END\n", + dw - start, inst_header); /* Return 'remaining_dw' to consume the rest of the LRC */ return remaining_dw; } @@ -1939,39 +1943,43 @@ static int dump_mi_command(struct drm_printer *p, switch (inst_header & MI_OPCODE) { case MI_LOAD_REGISTER_IMM: - drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n", - inst_header, (numdw - 1) / 2); + drm_printf(p, "LRC[%#5lx] = [%#010x] MI_LOAD_REGISTER_IMM: %d regs\n", + dw - start, inst_header, (numdw - 1) / 2); for (int i = 1; i < numdw; i += 2) - drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]); + drm_printf(p, "LRC[%#5lx] = - %#6x = %#010x\n", + &dw[i] - start, dw[i], dw[i + 1]); return numdw; case MI_LOAD_REGISTER_MEM & MI_OPCODE: - drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n", - inst_header, + drm_printf(p, "LRC[%#5lx] = [%#010x] MI_LOAD_REGISTER_MEM: %s%s\n", + dw - start, inst_header, dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "", dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : ""); if (numdw == 4) - drm_printf(p, " - %#6x = %#010llx\n", + drm_printf(p, "LRC[%#5lx] = - %#6x = %#010llx\n", + dw - start, dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2]))); else - drm_printf(p, " - %*ph (%s)\n", - (int)sizeof(u32) * (numdw - 1), dw + 1, - numdw < 4 ? "truncated" : "malformed"); + drm_printf(p, "LRC[%#5lx] = - %*ph (%s)\n", + dw - start, (int)sizeof(u32) * (numdw - 1), + dw + 1, numdw < 4 ? "truncated" : "malformed"); return numdw; case MI_FORCE_WAKEUP: - drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header); + drm_printf(p, "LRC[%#5lx] = [%#010x] MI_FORCE_WAKEUP\n", + dw - start, inst_header); return numdw; default: - drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n", - inst_header, opcode, numdw); + drm_printf(p, "LRC[%#5lx] = [%#010x] unknown MI opcode %#x, likely %d dwords\n", + dw - start, inst_header, opcode, numdw); return numdw; } } static int dump_gfxpipe_command(struct drm_printer *p, struct xe_gt *gt, + u32 *start, u32 *dw, int remaining_dw) { @@ -1990,11 +1998,13 @@ static int dump_gfxpipe_command(struct drm_printer *p, switch (*dw & GFXPIPE_MATCH_MASK) { #define MATCH(cmd) \ case cmd: \ - drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ + drm_printf(p, "LRC[%#5lx] = [%#010x] " #cmd " (%d dwords)\n", \ + dw - start, *dw, numdw); \ return numdw #define MATCH3D(cmd) \ case CMD_##cmd: \ - drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ + drm_printf(p, "LRC[%#5lx] = [%#010x] " #cmd " (%d dwords)\n", \ + dw - start, *dw, numdw); \ return numdw MATCH(STATE_BASE_ADDRESS); @@ -2126,14 +2136,15 @@ static int dump_gfxpipe_command(struct drm_printer *p, MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTER_2); default: - drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n", - *dw, pipeline, opcode, subopcode, numdw); + drm_printf(p, "LRC[%#5lx] = [%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n", + dw - start, *dw, pipeline, opcode, subopcode, numdw); return numdw; } } static int dump_gfx_state_command(struct drm_printer *p, struct xe_gt *gt, + u32 *start, u32 *dw, int remaining_dw) { @@ -2151,8 +2162,8 @@ static int dump_gfx_state_command(struct drm_printer *p, MATCH(STATE_WRITE_INLINE); default: - drm_printf(p, "[%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n", - *dw, opcode, numdw); + drm_printf(p, "LRC[%#5lx] = [%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n", + dw - start, *dw, opcode, numdw); return numdw; } } @@ -2161,7 +2172,7 @@ void xe_lrc_dump_default(struct drm_printer *p, struct xe_gt *gt, enum xe_engine_class hwe_class) { - u32 *dw; + u32 *dw, *start; int remaining_dw, num_dw; if (!gt->default_lrc[hwe_class]) { @@ -2174,18 +2185,20 @@ void xe_lrc_dump_default(struct drm_printer *p, * hardware status page. */ dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE; + start = dw; remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4; while (remaining_dw > 0) { if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) { - num_dw = dump_mi_command(p, gt, dw, remaining_dw); + num_dw = dump_mi_command(p, gt, start, dw, remaining_dw); } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) { - num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw); + num_dw = dump_gfxpipe_command(p, gt, start, dw, remaining_dw); } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) { - num_dw = dump_gfx_state_command(p, gt, dw, remaining_dw); + num_dw = dump_gfx_state_command(p, gt, start, dw, remaining_dw); } else { num_dw = min(instr_dw(*dw), remaining_dw); - drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n", + drm_printf(p, "LRC[%#5lx] = [%#10x] Unknown instruction of type %#x, likely %d dwords\n", + dw - start, *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw), num_dw); } -- cgit v1.2.3 From 1d123587525db86cc8f0d2beb35d9e33ca3ade83 Mon Sep 17 00:00:00 2001 From: Brian Nguyen Date: Thu, 5 Mar 2026 17:15:48 +0000 Subject: drm/xe: Skip over non leaf pte for PRL generation The check using xe_child->base.children was insufficient in determining if a pte was a leaf node. So explicitly skip over every non-leaf pt and conditionally abort if there is a scenario where a non-leaf pt is interleaved between leaf pt, which results in the page walker skipping over some leaf pt. Note that the behavior being targeted for abort is PD[0] = 2M PTE PD[1] = PT -> 512 4K PTEs PD[2] = 2M PTE results in abort, page walker won't descend PD[1]. With new abort, ensuring valid PRL before handling a second abort. v2: - Revert to previous assert. - Revised non-leaf handling for interleaf child pt and leaf pte. - Update comments to specifications. (Stuart) - Remove unnecessary XE_PTE_PS64. (Matthew B) v3: - Modify secondary abort to only check non-leaf PTEs. (Matthew B) Fixes: b912138df299 ("drm/xe: Create page reclaim list on unbind") Signed-off-by: Brian Nguyen Reviewed-by: Matthew Brost Cc: Stuart Summers Link: https://patch.msgid.link/20260305171546.67691-6-brian3.nguyen@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_pt.c | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 13b355fadd58..2d9ce2c4cb4f 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1655,14 +1655,35 @@ static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, XE_WARN_ON(!level); /* Check for leaf node */ if (xe_walk->prl && xe_page_reclaim_list_valid(xe_walk->prl) && - (!xe_child->base.children || !xe_child->base.children[first])) { + xe_child->level <= MAX_HUGEPTE_LEVEL) { struct iosys_map *leaf_map = &xe_child->bo->vmap; pgoff_t count = xe_pt_num_entries(addr, next, xe_child->level, walk); for (pgoff_t i = 0; i < count; i++) { - u64 pte = xe_map_rd(xe, leaf_map, (first + i) * sizeof(u64), u64); + u64 pte; int ret; + /* + * If not a leaf pt, skip unless non-leaf pt is interleaved between + * leaf ptes which causes the page walk to skip over the child leaves + */ + if (xe_child->base.children && xe_child->base.children[first + i]) { + u64 pt_size = 1ULL << walk->shifts[xe_child->level]; + bool edge_pt = (i == 0 && !IS_ALIGNED(addr, pt_size)) || + (i == count - 1 && !IS_ALIGNED(next, pt_size)); + + if (!edge_pt) { + xe_page_reclaim_list_abort(xe_walk->tile->primary_gt, + xe_walk->prl, + "PT is skipped by walk at level=%u offset=%lu", + xe_child->level, first + i); + break; + } + continue; + } + + pte = xe_map_rd(xe, leaf_map, (first + i) * sizeof(u64), u64); + /* * In rare scenarios, pte may not be written yet due to racy conditions. * In such cases, invalidate the PRL and fallback to full PPC invalidation. @@ -1674,9 +1695,8 @@ static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, } /* Ensure it is a defined page */ - xe_tile_assert(xe_walk->tile, - xe_child->level == 0 || - (pte & (XE_PTE_PS64 | XE_PDE_PS_2M | XE_PDPE_PS_1G))); + xe_tile_assert(xe_walk->tile, xe_child->level == 0 || + (pte & (XE_PDE_PS_2M | XE_PDPE_PS_1G))); /* An entry should be added for 64KB but contigious 4K have XE_PTE_PS64 */ if (pte & XE_PTE_PS64) @@ -1701,11 +1721,11 @@ static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, killed = xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk); /* - * Verify PRL is active and if entry is not a leaf pte (base.children conditions), - * there is a potential need to invalidate the PRL if any PTE (num_live) are dropped. + * Verify if any PTE are potentially dropped at non-leaf levels, either from being + * killed or the page walk covers the region. */ - if (xe_walk->prl && level > 1 && xe_child->num_live && - xe_child->base.children && xe_child->base.children[first]) { + if (xe_walk->prl && xe_page_reclaim_list_valid(xe_walk->prl) && + xe_child->level > MAX_HUGEPTE_LEVEL && xe_child->num_live) { bool covered = xe_pt_covers(addr, next, xe_child->level, &xe_walk->base); /* -- cgit v1.2.3 From d88fa967dc4335196d9ec2e07b2269bb53feec0d Mon Sep 17 00:00:00 2001 From: Brian Nguyen Date: Thu, 5 Mar 2026 17:15:49 +0000 Subject: drm/xe: Move page reclaim done_handler to own func Originally, page reclamation is handled by the same fence as tlb invalidation and uses its seqno, so there was no reason to separate out the handlers. However in hindsight, for readability, and possible future changes, it seems more beneficial to move this all out to its own function. Signed-off-by: Brian Nguyen Reviewed-by: Matthew Brost Reviewed-by: Shuicheng Lin Reviewed-by: Stuart Summers Link: https://patch.msgid.link/20260305171546.67691-7-brian3.nguyen@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_guc_ct.c | 23 ++++++++--------------- drivers/gpu/drm/xe/xe_page_reclaim.c | 20 ++++++++++++++++++++ drivers/gpu/drm/xe/xe_page_reclaim.h | 3 +++ 3 files changed, 31 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 3b1c03743f83..a11cff7a20be 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -31,6 +31,7 @@ #include "xe_guc_submit.h" #include "xe_guc_tlb_inval.h" #include "xe_map.h" +#include "xe_page_reclaim.h" #include "xe_pm.h" #include "xe_sleep.h" #include "xe_sriov_vf.h" @@ -1630,17 +1631,11 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) ret = xe_guc_pagefault_handler(guc, payload, adj_len); break; case XE_GUC_ACTION_TLB_INVALIDATION_DONE: - case XE_GUC_ACTION_PAGE_RECLAMATION_DONE: - /* - * Page reclamation is an extension of TLB invalidation. Both - * operations share the same seqno and fence. When either - * action completes, we need to signal the corresponding - * fence. Since the handling logic (lookup fence by seqno, - * fence signalling) is identical, we use the same handler - * for both G2H events. - */ ret = xe_guc_tlb_inval_done_handler(guc, payload, adj_len); break; + case XE_GUC_ACTION_PAGE_RECLAMATION_DONE: + ret = xe_guc_page_reclaim_done_handler(guc, payload, adj_len); + break; case XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF: ret = xe_guc_relay_process_guc2pf(&guc->relay, hxg, hxg_len); break; @@ -1848,15 +1843,13 @@ static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len) ret = xe_guc_pagefault_handler(guc, payload, adj_len); break; case XE_GUC_ACTION_TLB_INVALIDATION_DONE: - case XE_GUC_ACTION_PAGE_RECLAMATION_DONE: - /* - * Seqno and fence handling of page reclamation and TLB - * invalidation is identical, so we can use the same handler - * for both actions. - */ __g2h_release_space(ct, len); ret = xe_guc_tlb_inval_done_handler(guc, payload, adj_len); break; + case XE_GUC_ACTION_PAGE_RECLAMATION_DONE: + __g2h_release_space(ct, len); + ret = xe_guc_page_reclaim_done_handler(guc, payload, adj_len); + break; default: xe_gt_warn(gt, "NOT_POSSIBLE\n"); } diff --git a/drivers/gpu/drm/xe/xe_page_reclaim.c b/drivers/gpu/drm/xe/xe_page_reclaim.c index e13c71a89da2..60b0fda59ce3 100644 --- a/drivers/gpu/drm/xe/xe_page_reclaim.c +++ b/drivers/gpu/drm/xe/xe_page_reclaim.c @@ -11,6 +11,7 @@ #include "xe_page_reclaim.h" #include "xe_gt_stats.h" +#include "xe_guc_tlb_inval.h" #include "xe_macros.h" #include "xe_pat.h" #include "xe_sa.h" @@ -130,3 +131,22 @@ int xe_page_reclaim_list_alloc_entries(struct xe_page_reclaim_list *prl) return page ? 0 : -ENOMEM; } + +/** + * xe_guc_page_reclaim_done_handler() - Page reclaim done handler + * @guc: guc + * @msg: message indicating page reclamation done + * @len: length of message + * + * Page reclamation is an extension of TLB invalidation. Both + * operations share the same seqno and fence. When either + * action completes, we need to signal the corresponding + * fence. Since the handling logic is currently identical, this + * function delegates to the TLB invalidation handler. + * + * Return: 0 on success, -EPROTO for malformed messages. + */ +int xe_guc_page_reclaim_done_handler(struct xe_guc *guc, u32 *msg, u32 len) +{ + return xe_guc_tlb_inval_done_handler(guc, msg, len); +} diff --git a/drivers/gpu/drm/xe/xe_page_reclaim.h b/drivers/gpu/drm/xe/xe_page_reclaim.h index 3dd103e37beb..0412611f3af7 100644 --- a/drivers/gpu/drm/xe/xe_page_reclaim.h +++ b/drivers/gpu/drm/xe/xe_page_reclaim.h @@ -20,6 +20,7 @@ struct xe_tlb_inval; struct xe_tlb_inval_fence; struct xe_tile; struct xe_gt; +struct xe_guc; struct xe_vma; struct xe_guc_page_reclaim_entry { @@ -122,4 +123,6 @@ static inline void xe_page_reclaim_entries_put(struct xe_guc_page_reclaim_entry put_page(virt_to_page(entries)); } +int xe_guc_page_reclaim_done_handler(struct xe_guc *guc, u32 *msg, u32 len); + #endif /* _XE_PAGE_RECLAIM_H_ */ -- cgit v1.2.3 From 1b12096b4bc5177d685ae098fdb90260ffd5db6b Mon Sep 17 00:00:00 2001 From: Brian Nguyen Date: Thu, 5 Mar 2026 17:15:50 +0000 Subject: drm/xe: Skip adding PRL entry to NULL VMA NULL VMAs have no corresponding PTE, so skip adding a PRL entry to avoid an unnecessary PRL abort during unbind. Signed-off-by: Brian Nguyen Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20260305171546.67691-8-brian3.nguyen@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_page_reclaim.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_page_reclaim.c b/drivers/gpu/drm/xe/xe_page_reclaim.c index 60b0fda59ce3..da1ed99cd3f8 100644 --- a/drivers/gpu/drm/xe/xe_page_reclaim.c +++ b/drivers/gpu/drm/xe/xe_page_reclaim.c @@ -27,12 +27,18 @@ * flushes. * - pat_index is transient display (1) * + * For cases of NULL VMA, there should be no corresponding PRL entry + * so skip over. + * * Return: true when page reclamation is unnecessary, false otherwise. */ bool xe_page_reclaim_skip(struct xe_tile *tile, struct xe_vma *vma) { u8 l3_policy; + if (xe_vma_is_null(vma)) + return true; + l3_policy = xe_pat_index_get_l3_policy(tile->xe, vma->attr.pat_index); /* -- cgit v1.2.3 From 4ff57c5e8dbba23b5457be12f9709d5c016da16e Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Thu, 12 Mar 2026 22:36:30 -0700 Subject: drm/xe/oa: Allow reading after disabling OA stream Some OA data might be present in the OA buffer when OA stream is disabled. Allow UMD's to retrieve this data, so that all data till the point when OA stream is disabled can be retrieved. v2: Update tail pointer after disable (Umesh) Fixes: efb315d0a013 ("drm/xe/oa/uapi: Read file_operation") Cc: stable@vger.kernel.org Signed-off-by: Ashutosh Dixit Reviewed-by: Umesh Nerlige Ramappa Link: https://patch.msgid.link/20260313053630.3176100-1-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/xe_oa.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index dcd393b0931a..2efc16c3a95d 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -543,8 +543,7 @@ static ssize_t xe_oa_read(struct file *file, char __user *buf, size_t offset = 0; int ret; - /* Can't read from disabled streams */ - if (!stream->enabled || !stream->sample) + if (!stream->sample) return -EINVAL; if (!(file->f_flags & O_NONBLOCK)) { @@ -1456,6 +1455,10 @@ static void xe_oa_stream_disable(struct xe_oa_stream *stream) if (stream->sample) hrtimer_cancel(&stream->poll_check_timer); + + /* Update stream->oa_buffer.tail to allow any final reports to be read */ + if (xe_oa_buffer_check_unlocked(stream)) + wake_up(&stream->poll_wq); } static int xe_oa_enable_preempt_timeslice(struct xe_oa_stream *stream) -- cgit v1.2.3 From 466e75d48038af252187855058a7a9312db9d2f8 Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Thu, 12 Mar 2026 05:53:09 -0700 Subject: drm/xe/lrc: Fix uninitialized new_ts when capturing context timestamp Getting engine specific CTX TIMESTAMP register can fail. In that case, if the context is active, new_ts is uninitialized. Fix that case by initializing new_ts to the last value that was sampled in SW - lrc->ctx_timestamp. Flagged by static analysis. v2: Fix new_ts initialization (Ashutosh) Fixes: bb63e7257e63 ("drm/xe: Avoid toggling schedule state to check LRC timestamp in TDR") Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Ashutosh Dixit Link: https://patch.msgid.link/20260312125308.3126607-2-umesh.nerlige.ramappa@intel.com --- drivers/gpu/drm/xe/xe_lrc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 7fb59386d1c5..35b365ac55e5 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -2576,14 +2576,14 @@ static int get_ctx_timestamp(struct xe_lrc *lrc, u32 engine_id, u64 *reg_ctx_ts) * @lrc: Pointer to the lrc. * * Return latest ctx timestamp. With support for active contexts, the - * calculation may bb slightly racy, so follow a read-again logic to ensure that + * calculation may be slightly racy, so follow a read-again logic to ensure that * the context is still active before returning the right timestamp. * * Returns: New ctx timestamp value */ u64 xe_lrc_timestamp(struct xe_lrc *lrc) { - u64 lrc_ts, reg_ts, new_ts; + u64 lrc_ts, reg_ts, new_ts = lrc->ctx_timestamp; u32 engine_id; lrc_ts = xe_lrc_ctx_timestamp(lrc); -- cgit v1.2.3 From 7937ea733f79b3f25e802a0c8360bf7423856f36 Mon Sep 17 00:00:00 2001 From: Sanjay Yadav Date: Fri, 13 Mar 2026 12:46:09 +0530 Subject: drm/xe: Fix missing runtime PM reference in ccs_mode_store MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ccs_mode_store() calls xe_gt_reset() which internally invokes xe_pm_runtime_get_noresume(). That function requires the caller to already hold an outer runtime PM reference and warns if none is held: [46.891177] xe 0000:03:00.0: [drm] Missing outer runtime PM protection [46.891178] WARNING: drivers/gpu/drm/xe/xe_pm.c:885 at xe_pm_runtime_get_noresume+0x8b/0xc0 Fix this by protecting xe_gt_reset() with the scope-based guard(xe_pm_runtime)(xe), which is the preferred form when the reference lifetime matches a single scope. v2: - Use scope-based guard(xe_pm_runtime)(xe) (Shuicheng) - Update commit message accordingly Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/7593 Fixes: 480b358e7d8e ("drm/xe: Do not wake device during a GT reset") Cc: # v6.19+ Cc: Thomas Hellström Cc: Matthew Brost Cc: Rodrigo Vivi Cc: Shuicheng Lin Suggested-by: Matthew Auld Signed-off-by: Sanjay Yadav Reviewed-by: Shuicheng Lin Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patch.msgid.link/20260313071608.3459480-2-sanjay.kumar.yadav@intel.com --- drivers/gpu/drm/xe/xe_gt_ccs_mode.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c index b35be36b0eaa..baee1f4a6b01 100644 --- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c +++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c @@ -12,6 +12,7 @@ #include "xe_gt_printk.h" #include "xe_gt_sysfs.h" #include "xe_mmio.h" +#include "xe_pm.h" #include "xe_sriov.h" #include "xe_sriov_pf.h" @@ -163,6 +164,7 @@ ccs_mode_store(struct device *kdev, struct device_attribute *attr, xe_gt_info(gt, "Setting compute mode to %d\n", num_engines); gt->ccs_mode = num_engines; xe_gt_record_user_engines(gt); + guard(xe_pm_runtime)(xe); xe_gt_reset(gt); /* We may end PF lockdown once CCS mode is default again */ -- cgit v1.2.3 From c56af8fee9ec3124fa6c8cf3d2966070a7aed934 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Mon, 16 Mar 2026 09:29:15 +0530 Subject: drm/ttm: Fix spelling mistakes and comment style in ttm_resource.c Correct several spelling mistakes and textual inconsistencies in kdoc comments and inline comments. Suggested-by: Sanjay Yadav Signed-off-by: Varun Gupta Reviewed-by: Nitin Gote Link: https://patch.msgid.link/20260316035915.1403424-1-varun.gupta@intel.com Signed-off-by: Tejas Upadhyay --- drivers/gpu/drm/ttm/ttm_resource.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index 192fca24f37e..9f36631d48b6 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -37,7 +37,7 @@ #include #include -/* Detach the cursor from the bulk move list*/ +/* Detach the cursor from the bulk move list */ static void ttm_resource_cursor_clear_bulk(struct ttm_resource_cursor *cursor) { @@ -105,9 +105,9 @@ void ttm_resource_cursor_init(struct ttm_resource_cursor *cursor, * ttm_resource_cursor_fini() - Finalize the LRU list cursor usage * @cursor: The struct ttm_resource_cursor to finalize. * - * The function pulls the LRU list cursor off any lists it was previusly + * The function pulls the LRU list cursor off any lists it was previously * attached to. Needs to be called with the LRU lock held. The function - * can be called multiple times after eachother. + * can be called multiple times after each other. */ void ttm_resource_cursor_fini(struct ttm_resource_cursor *cursor) { @@ -317,10 +317,10 @@ void ttm_resource_move_to_lru_tail(struct ttm_resource *res) } /** - * ttm_resource_init - resource object constructure - * @bo: buffer object this resources is allocated for + * ttm_resource_init - resource object constructor + * @bo: buffer object this resource is allocated for * @place: placement of the resource - * @res: the resource object to inistilize + * @res: the resource object to initialize * * Initialize a new resource object. Counterpart of ttm_resource_fini(). */ @@ -435,7 +435,7 @@ EXPORT_SYMBOL(ttm_resource_free); * @size: How many bytes the new allocation needs. * * Test if @res intersects with @place and @size. Used for testing if evictions - * are valueable or not. + * are valuable or not. * * Returns true if the res placement intersects with @place and @size. */ @@ -513,7 +513,7 @@ void ttm_resource_set_bo(struct ttm_resource *res, * @bdev: ttm device this manager belongs to * @size: size of managed resources in arbitrary units * - * Initialise core parts of a manager object. + * Initialize core parts of a manager object. */ void ttm_resource_manager_init(struct ttm_resource_manager *man, struct ttm_device *bdev, @@ -536,8 +536,8 @@ EXPORT_SYMBOL(ttm_resource_manager_init); /* * ttm_resource_manager_evict_all * - * @bdev - device to use - * @man - manager to use + * @bdev: device to use + * @man: manager to use * * Evict all the objects out of a memory manager until it is empty. * Part of memory manager cleanup sequence. @@ -882,7 +882,7 @@ out_err: /** * ttm_kmap_iter_linear_io_fini - Clean up an iterator for linear io memory - * @iter_io: The iterator to initialize + * @iter_io: The iterator to finalize * @bdev: The TTM device * @mem: The ttm resource representing the iomap. * @@ -921,15 +921,15 @@ DEFINE_SHOW_ATTRIBUTE(ttm_resource_manager); /** * ttm_resource_manager_create_debugfs - Create debugfs entry for specified * resource manager. - * @man: The TTM resource manager for which the debugfs stats file be creates + * @man: The TTM resource manager for which the debugfs stats file to be created * @parent: debugfs directory in which the file will reside * @name: The filename to create. * - * This function setups up a debugfs file that can be used to look + * This function sets up a debugfs file that can be used to look * at debug statistics of the specified ttm_resource_manager. */ void ttm_resource_manager_create_debugfs(struct ttm_resource_manager *man, - struct dentry * parent, + struct dentry *parent, const char *name) { #if defined(CONFIG_DEBUG_FS) -- cgit v1.2.3 From 18fc0f1d7dbeada7810192fe739172c5abd2a902 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Tue, 17 Mar 2026 09:34:47 +0530 Subject: drm/xe/xe3p_lpg: Add Wa_16029437861 Wa_16029437861 requires disabling COAMA atomics by setting bit 22 (SQ_DISABLE_COAMA) of L3SQCREG2 (0xb104) for Xe3p_LPG graphics version 35.10 stepping A0..B0. This bit is already set by the existing Wa_14026144927 entry, so add the new WA ID to the same implementation. Signed-off-by: Varun Gupta Reviewed-by: Tejas Upadhyay Link: https://patch.msgid.link/20260317040447.1792687-1-varun.gupta@intel.com Signed-off-by: Tejas Upadhyay --- drivers/gpu/drm/xe/xe_wa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index d2fc1f50c508..1d77f5b6401f 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -292,7 +292,7 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_ACTIONS(SET(MMIOATSREQLIMIT_GAM_WALK_3D, DIS_ATS_WRONLY_PG)) }, - { XE_RTP_NAME("14026144927"), + { XE_RTP_NAME("14026144927, 16029437861"), XE_RTP_RULES(GRAPHICS_VERSION(3510), GRAPHICS_STEP(A0, B0)), XE_RTP_ACTIONS(SET(L3SQCREG2, L3_SQ_DISABLE_COAMA_2WAY_COH | L3_SQ_DISABLE_COAMA)) -- cgit v1.2.3 From 2c440f2fccf35d18f1b7eafc9015f0230c25395b Mon Sep 17 00:00:00 2001 From: Nitin Gote Date: Tue, 17 Mar 2026 13:30:59 +0530 Subject: drm/xe: Extend Wa_14026781792 for xe3lpg Wa_14026781792 applies to all graphics versions from 30.00 through 35.10 (inclusive). Since there are no IPs between 30.05 and 35.10, consolidate the RTP rules into a single GRAPHICS_VERSION_RANGE(3000, 3510). v2: (Matt) - There are no IPs between 30.05 and 35.10 either, So, consolidate this into a single GRAPHICS_VERSION_RANGE(3000, 3510) - Also move it up to the top part of the table Signed-off-by: Nitin Gote Reviewed-by: Matt Roper Link: https://patch.msgid.link/20260317080059.1275116-2-nitin.r.gote@intel.com Signed-off-by: Tejas Upadhyay --- drivers/gpu/drm/xe/xe_wa.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 1d77f5b6401f..a02ee1282d35 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -656,6 +656,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX)) }, + { XE_RTP_NAME("14026781792"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3510), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(FF_MODE, DIS_TE_PATCH_CTRL)) + }, /* DG1 */ @@ -784,10 +788,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = { ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX)) }, - { XE_RTP_NAME("14026781792"), - XE_RTP_RULES(GRAPHICS_VERSION(3510), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(FF_MODE, DIS_TE_PATCH_CTRL)) - }, }; static __maybe_unused const struct xe_rtp_entry oob_was[] = { -- cgit v1.2.3 From cb7415d8cbb750221b48e5beebe8f402719a20d9 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 16 Mar 2026 14:54:22 -0700 Subject: drm/xe: Fix format specifier for printing pointer differences GCC and clang warn (or error with CONFIG_WERROR=y / W=e) several times when targeting 32-bit platforms along the lines of drivers/gpu/drm/xe/xe_lrc.c: In function 'dump_mi_command': drivers/gpu/drm/xe/xe_lrc.c:1921:40: error: format '%lx' expects argument of type 'long unsigned int', but argument 3 has type 'int' [-Werror=format=] 1921 | drm_printf(p, "LRC[%#5lx] = [%#010x] MI_NOOP (%d dwords)\n", | ~~~~^ | | | long unsigned int | %#5x 1922 | dw - num_noop - start, inst_header, num_noop); | ~~~~~~~~~~~~~~~~~~~~~ | | | int drivers/gpu/drm/xe/xe_lrc.c:1922:7: error: format specifies type 'unsigned long' but the argument has type '__ptrdiff_t' (aka 'int') [-Werror,-Wformat] 1921 | drm_printf(p, "LRC[%#5lx] = [%#010x] MI_NOOP (%d dwords)\n", | ~~~~~ | %#5tx 1922 | dw - num_noop - start, inst_header, num_noop); | ^~~~~~~~~~~~~~~~~~~~~ Use the '%tx' specifier for printing pointer differences, which clears up the warnings for 32-bit platforms while introducing no regressions for 64-bit platforms. Fixes: 65fcf19cb36b ("drm/xe: Include running dword offset in default_lrc dumps") Signed-off-by: Nathan Chancellor Reviewed-by: Matt Roper Link: https://patch.msgid.link/20260316-drm-xe-fix-32-bit-wformat-ptrdiff-v1-1-0108b10b2b6b@kernel.org Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_lrc.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index c5cfd8f75a94..dde3bcff3c22 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -1918,17 +1918,17 @@ static int dump_mi_command(struct drm_printer *p, while (num_noop < remaining_dw && (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP) num_noop++; - drm_printf(p, "LRC[%#5lx] = [%#010x] MI_NOOP (%d dwords)\n", + drm_printf(p, "LRC[%#5tx] = [%#010x] MI_NOOP (%d dwords)\n", dw - num_noop - start, inst_header, num_noop); return num_noop; case MI_TOPOLOGY_FILTER: - drm_printf(p, "LRC[%#5lx] = [%#010x] MI_TOPOLOGY_FILTER\n", + drm_printf(p, "LRC[%#5tx] = [%#010x] MI_TOPOLOGY_FILTER\n", dw - start, inst_header); return 1; case MI_BATCH_BUFFER_END: - drm_printf(p, "LRC[%#5lx] = [%#010x] MI_BATCH_BUFFER_END\n", + drm_printf(p, "LRC[%#5tx] = [%#010x] MI_BATCH_BUFFER_END\n", dw - start, inst_header); /* Return 'remaining_dw' to consume the rest of the LRC */ return remaining_dw; @@ -1943,35 +1943,35 @@ static int dump_mi_command(struct drm_printer *p, switch (inst_header & MI_OPCODE) { case MI_LOAD_REGISTER_IMM: - drm_printf(p, "LRC[%#5lx] = [%#010x] MI_LOAD_REGISTER_IMM: %d regs\n", + drm_printf(p, "LRC[%#5tx] = [%#010x] MI_LOAD_REGISTER_IMM: %d regs\n", dw - start, inst_header, (numdw - 1) / 2); for (int i = 1; i < numdw; i += 2) - drm_printf(p, "LRC[%#5lx] = - %#6x = %#010x\n", + drm_printf(p, "LRC[%#5tx] = - %#6x = %#010x\n", &dw[i] - start, dw[i], dw[i + 1]); return numdw; case MI_LOAD_REGISTER_MEM & MI_OPCODE: - drm_printf(p, "LRC[%#5lx] = [%#010x] MI_LOAD_REGISTER_MEM: %s%s\n", + drm_printf(p, "LRC[%#5tx] = [%#010x] MI_LOAD_REGISTER_MEM: %s%s\n", dw - start, inst_header, dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "", dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : ""); if (numdw == 4) - drm_printf(p, "LRC[%#5lx] = - %#6x = %#010llx\n", + drm_printf(p, "LRC[%#5tx] = - %#6x = %#010llx\n", dw - start, dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2]))); else - drm_printf(p, "LRC[%#5lx] = - %*ph (%s)\n", + drm_printf(p, "LRC[%#5tx] = - %*ph (%s)\n", dw - start, (int)sizeof(u32) * (numdw - 1), dw + 1, numdw < 4 ? "truncated" : "malformed"); return numdw; case MI_FORCE_WAKEUP: - drm_printf(p, "LRC[%#5lx] = [%#010x] MI_FORCE_WAKEUP\n", + drm_printf(p, "LRC[%#5tx] = [%#010x] MI_FORCE_WAKEUP\n", dw - start, inst_header); return numdw; default: - drm_printf(p, "LRC[%#5lx] = [%#010x] unknown MI opcode %#x, likely %d dwords\n", + drm_printf(p, "LRC[%#5tx] = [%#010x] unknown MI opcode %#x, likely %d dwords\n", dw - start, inst_header, opcode, numdw); return numdw; } @@ -1998,12 +1998,12 @@ static int dump_gfxpipe_command(struct drm_printer *p, switch (*dw & GFXPIPE_MATCH_MASK) { #define MATCH(cmd) \ case cmd: \ - drm_printf(p, "LRC[%#5lx] = [%#010x] " #cmd " (%d dwords)\n", \ + drm_printf(p, "LRC[%#5tx] = [%#010x] " #cmd " (%d dwords)\n", \ dw - start, *dw, numdw); \ return numdw #define MATCH3D(cmd) \ case CMD_##cmd: \ - drm_printf(p, "LRC[%#5lx] = [%#010x] " #cmd " (%d dwords)\n", \ + drm_printf(p, "LRC[%#5tx] = [%#010x] " #cmd " (%d dwords)\n", \ dw - start, *dw, numdw); \ return numdw @@ -2136,7 +2136,7 @@ static int dump_gfxpipe_command(struct drm_printer *p, MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTER_2); default: - drm_printf(p, "LRC[%#5lx] = [%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n", + drm_printf(p, "LRC[%#5tx] = [%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n", dw - start, *dw, pipeline, opcode, subopcode, numdw); return numdw; } @@ -2162,7 +2162,7 @@ static int dump_gfx_state_command(struct drm_printer *p, MATCH(STATE_WRITE_INLINE); default: - drm_printf(p, "LRC[%#5lx] = [%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n", + drm_printf(p, "LRC[%#5tx] = [%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n", dw - start, *dw, opcode, numdw); return numdw; } @@ -2197,7 +2197,7 @@ void xe_lrc_dump_default(struct drm_printer *p, num_dw = dump_gfx_state_command(p, gt, start, dw, remaining_dw); } else { num_dw = min(instr_dw(*dw), remaining_dw); - drm_printf(p, "LRC[%#5lx] = [%#10x] Unknown instruction of type %#x, likely %d dwords\n", + drm_printf(p, "LRC[%#5tx] = [%#10x] Unknown instruction of type %#x, likely %d dwords\n", dw - start, *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw), num_dw); -- cgit v1.2.3 From 4f53d8c6d23527d734fe3531d08e15cb170a0819 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Tue, 17 Feb 2026 16:41:18 +0100 Subject: drm/xe/pf: Fix use-after-free in migration restore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When an error is returned from xe_sriov_pf_migration_restore_produce(), the data pointer is not set to NULL, which can trigger use-after-free in subsequent .write() calls. Set the pointer to NULL upon error to fix the problem. Fixes: 1ed30397c0b92 ("drm/xe/pf: Add support for encap/decap of bitstream to/from packet") Reported-by: Sebastian Österlund Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/7230 Reviewed-by: Shuicheng Lin Link: https://patch.msgid.link/20260217154118.176902-1-michal.winiarski@intel.com Signed-off-by: Michał Winiarski --- drivers/gpu/drm/xe/xe_sriov_packet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_sriov_packet.c b/drivers/gpu/drm/xe/xe_sriov_packet.c index 968f32496282..2ae9eff2a7c0 100644 --- a/drivers/gpu/drm/xe/xe_sriov_packet.c +++ b/drivers/gpu/drm/xe/xe_sriov_packet.c @@ -341,6 +341,8 @@ ssize_t xe_sriov_packet_write_single(struct xe_device *xe, unsigned int vfid, ret = xe_sriov_pf_migration_restore_produce(xe, vfid, *data); if (ret) { xe_sriov_packet_free(*data); + *data = NULL; + return ret; } -- cgit v1.2.3 From 6cd7d168c414fa895c8cf21d81d563ef1f557b51 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 3 Mar 2026 21:13:52 +0100 Subject: drm/xe: Add PR_CTR_CTRL/THRSH register definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Watchdog Counter Control and Watchdog Counter Threshold registers are needed for watchdog programming. This watchdog will generate the "Media Hang Notify" interrupt. Bspec: 45999, 46000 Bspec: 60373, 60374 Signed-off-by: Michal Wajdeczko Reviewed-by: Michał Winiarski Link: https://patch.msgid.link/20260303201354.17948-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index dc5a4fafa70c..1b4a7e9a703d 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -132,6 +132,14 @@ #define RING_BBADDR(base) XE_REG((base) + 0x140) #define RING_BBADDR_UDW(base) XE_REG((base) + 0x168) +#define PR_CTR_CTRL(base) XE_REG((base) + 0x178) +#define CTR_COUNT_SELECT_FF REG_BIT(31) +#define CTR_LOGIC_OP_MASK REG_GENMASK(30, 0) +#define CTR_START 0 +#define CTR_STOP 1 +#define CTR_LOGIC_OP(OP) REG_FIELD_PREP(CTR_LOGIC_OP_MASK, CTR_##OP) +#define PR_CTR_THRSH(base) XE_REG((base) + 0x17c) + #define BCS_SWCTRL(base) XE_REG((base) + 0x200, XE_REG_OPTION_MASKED) #define BCS_SWCTRL_DISABLE_256B REG_BIT(2) -- cgit v1.2.3 From d95fda29758b59f4279465892905ca57dfd4bb10 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 3 Mar 2026 21:13:53 +0100 Subject: drm/xe: Add MI_SEMAPHORE_WAIT command definition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This command supports memory based Semaphore WAIT. Memory based semaphores will be used for synchronization between the Producer and the Consumer contexts. Producer and Consumer Contexts could be running on different engines or on the same engine inside GT. Bspec: 45749, 60244 Signed-off-by: Michal Wajdeczko Reviewed-by: Michał Winiarski Link: https://patch.msgid.link/20260303201354.17948-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/instructions/xe_mi_commands.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h index c47b290e0e9f..29569eff1af3 100644 --- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h @@ -34,6 +34,19 @@ #define MI_FORCE_WAKEUP __MI_INSTR(0x1D) #define MI_MATH(n) (__MI_INSTR(0x1A) | XE_INSTR_NUM_DW((n) + 1)) +#define MI_SEMAPHORE_WAIT (__MI_INSTR(0x1c) | XE_INSTR_NUM_DW(5)) +#define MI_SEMW_GGTT REG_BIT(22) +#define MI_SEMW_POLL REG_BIT(15) +#define MI_SEMW_COMPARE_OP_MASK REG_GENMASK(14, 12) +#define COMPARE_OP_SAD_GT_SDD 0 +#define COMPARE_OP_SAD_GTE_SDD 1 +#define COMPARE_OP_SAD_LT_SDD 2 +#define COMPARE_OP_SAD_LTE_SDD 3 +#define COMPARE_OP_SAD_EQ_SDD 4 +#define COMPARE_OP_SAD_NEQ_SDD 5 +#define MI_SEMW_COMPARE(OP) REG_FIELD_PREP(MI_SEMW_COMPARE_OP_MASK, COMPARE_OP_##OP) +#define MI_SEMW_TOKEN(token) REG_FIELD_PREP(REG_GENMASK(9, 2), (token)) + #define MI_STORE_DATA_IMM __MI_INSTR(0x20) #define MI_SDI_GGTT REG_BIT(22) #define MI_SDI_LEN_DW GENMASK(9, 0) -- cgit v1.2.3 From 61e7649a1a253609769063a30018e68b970324d6 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 3 Mar 2026 21:13:54 +0100 Subject: drm/xe/vf: Improve getting clean NULL context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a small risk that when fetching a NULL context image the VF may get a tweaked context image prepared by another VF that was previously running on the engine before the GuC scheduler switched the VFs. To avoid that risk, without forcing GuC scheduler to trigger costly engine reset on every VF switch, use a watchdog mechanism that when configured with impossible condition, triggers an interrupt, which GuC will handle by doing an engine reset. Also adjust job size to account for additional dwords with watchdog setup. Signed-off-by: Michal Wajdeczko Reviewed-by: Michał Winiarski Link: https://patch.msgid.link/20260303201354.17948-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt.c | 9 +++++--- drivers/gpu/drm/xe/xe_ring_ops.c | 37 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_ring_ops_types.h | 2 +- drivers/gpu/drm/xe/xe_sched_job_types.h | 2 ++ 4 files changed, 46 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index bae895fa066a..8a31c963c372 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -171,7 +171,7 @@ static void xe_gt_enable_comp_1wcoh(struct xe_gt *gt) static void gt_reset_worker(struct work_struct *w); static int emit_job_sync(struct xe_exec_queue *q, struct xe_bb *bb, - long timeout_jiffies) + long timeout_jiffies, bool force_reset) { struct xe_sched_job *job; struct dma_fence *fence; @@ -181,6 +181,8 @@ static int emit_job_sync(struct xe_exec_queue *q, struct xe_bb *bb, if (IS_ERR(job)) return PTR_ERR(job); + job->ring_ops_force_reset = force_reset; + xe_sched_job_arm(job); fence = dma_fence_get(&job->drm.s_fence->finished); xe_sched_job_push(job); @@ -204,7 +206,7 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) if (IS_ERR(bb)) return PTR_ERR(bb); - ret = emit_job_sync(q, bb, HZ); + ret = emit_job_sync(q, bb, HZ, false); xe_bb_free(bb, NULL); return ret; @@ -369,7 +371,8 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) bb->len = cs - bb->cs; - ret = emit_job_sync(q, bb, HZ); + /* only VFs need to trigger reset to get a clean NULL context */ + ret = emit_job_sync(q, bb, HZ, IS_SRIOV_VF(gt_to_xe(gt))); xe_bb_free(bb, NULL); diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 53d420d72164..bce7d93ce3a3 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -256,6 +256,32 @@ static int emit_copy_timestamp(struct xe_device *xe, struct xe_lrc *lrc, return i; } +static int emit_fake_watchdog(struct xe_lrc *lrc, u32 *dw, int i) +{ + /* + * Setup a watchdog with impossible condition to always trigger an + * hardware interrupt that would force the GuC to reset the engine. + */ + + dw[i++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | MI_LRI_LRM_CS_MMIO; + dw[i++] = PR_CTR_THRSH(0).addr; + dw[i++] = 2; /* small threshold */ + dw[i++] = PR_CTR_CTRL(0).addr; + dw[i++] = CTR_LOGIC_OP(START); + + dw[i++] = MI_SEMAPHORE_WAIT | MI_SEMW_GGTT | MI_SEMW_POLL | MI_SEMW_COMPARE(SAD_EQ_SDD); + dw[i++] = 0xdead; /* this should never be seen */ + dw[i++] = lower_32_bits(xe_lrc_ggtt_addr(lrc)); + dw[i++] = upper_32_bits(xe_lrc_ggtt_addr(lrc)); + dw[i++] = 0; /* unused token */ + + dw[i++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1) | MI_LRI_LRM_CS_MMIO; + dw[i++] = PR_CTR_CTRL(0).addr; + dw[i++] = CTR_LOGIC_OP(STOP); + + return i; +} + /* for engines that don't require any special HW handling (no EUs, no aux inval, etc) */ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc, u64 batch_addr, u32 *head, u32 seqno) @@ -266,6 +292,9 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc *head = lrc->ring.tail; + if (job->ring_ops_force_reset) + i = emit_fake_watchdog(lrc, dw, i); + i = emit_copy_timestamp(gt_to_xe(gt), lrc, dw, i); if (job->ring_ops_flush_tlb) { @@ -324,6 +353,9 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, *head = lrc->ring.tail; + if (job->ring_ops_force_reset) + i = emit_fake_watchdog(lrc, dw, i); + i = emit_copy_timestamp(xe, lrc, dw, i); dw[i++] = preparser_disable(true); @@ -381,6 +413,9 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, *head = lrc->ring.tail; + if (job->ring_ops_force_reset) + i = emit_fake_watchdog(lrc, dw, i); + i = emit_copy_timestamp(xe, lrc, dw, i); dw[i++] = preparser_disable(true); @@ -433,6 +468,8 @@ static void emit_migration_job_gen12(struct xe_sched_job *job, *head = lrc->ring.tail; + xe_gt_assert(gt, !job->ring_ops_force_reset); + i = emit_copy_timestamp(xe, lrc, dw, i); i = emit_store_imm_ggtt(saddr, seqno, dw, i); diff --git a/drivers/gpu/drm/xe/xe_ring_ops_types.h b/drivers/gpu/drm/xe/xe_ring_ops_types.h index d7e3e150a9a5..a42a465ac438 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops_types.h +++ b/drivers/gpu/drm/xe/xe_ring_ops_types.h @@ -8,7 +8,7 @@ struct xe_sched_job; -#define MAX_JOB_SIZE_DW 58 +#define MAX_JOB_SIZE_DW 72 #define MAX_JOB_SIZE_BYTES (MAX_JOB_SIZE_DW * 4) /** diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h index 13c2970e81a8..0490b1247a6e 100644 --- a/drivers/gpu/drm/xe/xe_sched_job_types.h +++ b/drivers/gpu/drm/xe/xe_sched_job_types.h @@ -63,6 +63,8 @@ struct xe_sched_job { u64 sample_timestamp; /** @ring_ops_flush_tlb: The ring ops need to flush TLB before payload. */ bool ring_ops_flush_tlb; + /** @ring_ops_force_reset: The ring ops need to trigger a reset before payload. */ + bool ring_ops_force_reset; /** @ggtt: mapped in ggtt. */ bool ggtt; /** @restore_replay: job being replayed for restore */ -- cgit v1.2.3 From 4e7ebff69aed345f65f590a17b3119c0cb5eadde Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Thu, 5 Mar 2026 17:49:04 +0530 Subject: drm/xe/xe3p_lpg: flush shrinker bo cachelines manually MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit XA, new pat_index introduced post xe3p_lpg, is memory shared between the CPU and GPU is treated differently from other GPU memory when the Media engine is power-gated. XA is *always* flushed, like at the end-of-submssion (and maybe other places), just that internally as an optimisation hw doesn't need to make that a full flush (which will also include XA) when Media is off/powergated, since it doesn't need to worry about GT caches vs Media coherency, and only CPU vs GPU coherency, so can make that flush a targeted XA flush, since stuff tagged with XA now means it's shared with the CPU. The main implication is that we now need to somehow flush non-XA before freeing system memory pages, otherwise dirty cachelines could be flushed after the free (like if Media suddenly turns on and does a full flush) V4: Add comments for L2 flush path V3(Thomas/MattA/MattR): Restrict userptr with non-xa, then no need to flush manually V2(MattA): Expand commit description Reviewed-by: Thomas Hellström Link: https://patch.msgid.link/20260305121902.1892593-7-tejas.upadhyay@intel.com Signed-off-by: Tejas Upadhyay --- drivers/gpu/drm/xe/xe_bo.c | 7 ++++++- drivers/gpu/drm/xe/xe_device.c | 23 +++++++++++++++++++++++ drivers/gpu/drm/xe/xe_device.h | 1 + 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 22179b2df85c..216e1d8635f4 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -689,7 +689,12 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo, if (!xe_vm_in_fault_mode(vm)) { drm_gpuvm_bo_evict(vm_bo, true); - continue; + /* + * L2 cache may not be flushed, so ensure that is done in + * xe_vm_invalidate_vma() below + */ + if (!xe_device_is_l2_flush_optimized(xe)) + continue; } if (!idle) { diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index e77a3a3db73d..daf2c815082c 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -1094,6 +1094,29 @@ static void tdf_request_sync(struct xe_device *xe) } } +/** + * xe_device_is_l2_flush_optimized - if L2 flush is optimized by HW + * @xe: The device to check. + * + * Return: true if the HW device optimizing L2 flush, false otherwise. + */ +bool xe_device_is_l2_flush_optimized(struct xe_device *xe) +{ + /* XA is *always* flushed, like at the end-of-submssion (and maybe other + * places), just that internally as an optimisation hw doesn't need to make + * that a full flush (which will also include XA) when Media is + * off/powergated, since it doesn't need to worry about GT caches vs Media + * coherency, and only CPU vs GPU coherency, so can make that flush a + * targeted XA flush, since stuff tagged with XA now means it's shared with + * the CPU. The main implication is that we now need to somehow flush non-XA before + * freeing system memory pages, otherwise dirty cachelines could be flushed after the free + * (like if Media suddenly turns on and does a full flush) + */ + if (GRAPHICS_VER(xe) >= 35 && !IS_DGFX(xe)) + return true; + return false; +} + void xe_device_l2_flush(struct xe_device *xe) { struct xe_gt *gt; diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index c4d267002661..e4b9de8d8e95 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -188,6 +188,7 @@ void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p); u64 xe_device_canonicalize_addr(struct xe_device *xe, u64 address); u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address); +bool xe_device_is_l2_flush_optimized(struct xe_device *xe); void xe_device_td_flush(struct xe_device *xe); void xe_device_l2_flush(struct xe_device *xe); -- cgit v1.2.3 From 411389d29eab5325e2b250b1cd2ddb567abb7bbb Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Thu, 5 Mar 2026 17:49:05 +0530 Subject: drm/xe/pat: define coh_mode 2way Defining 2way (two-way coherency) is critical for Xe3p_LPG (Nova Lake P) platforms to support L2 flush optimization safely. This mode allows the driver to skip certain manual cache flushes (L2 flush optimization) without risking memory corruption because the hardware ensures the most recent data is visible to both entities. Reviewed-by: Matthew Auld Link: https://patch.msgid.link/20260305121902.1892593-8-tejas.upadhyay@intel.com Signed-off-by: Tejas Upadhyay --- drivers/gpu/drm/xe/xe_pat.c | 14 +++++++------- drivers/gpu/drm/xe/xe_pat.h | 5 +++-- drivers/gpu/drm/xe/xe_vm.c | 2 +- drivers/gpu/drm/xe/xe_vm_madvise.c | 2 +- 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index 34c9031e1e74..356f53bdb83c 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -92,7 +92,7 @@ struct xe_pat_ops { }; static const struct xe_pat_table_entry xelp_pat_table[] = { - [0] = { XELP_PAT_WB, XE_COH_AT_LEAST_1WAY }, + [0] = { XELP_PAT_WB, XE_COH_1WAY }, [1] = { XELP_PAT_WC, XE_COH_NONE }, [2] = { XELP_PAT_WT, XE_COH_NONE }, [3] = { XELP_PAT_UC, XE_COH_NONE }, @@ -102,19 +102,19 @@ static const struct xe_pat_table_entry xehpc_pat_table[] = { [0] = { XELP_PAT_UC, XE_COH_NONE }, [1] = { XELP_PAT_WC, XE_COH_NONE }, [2] = { XELP_PAT_WT, XE_COH_NONE }, - [3] = { XELP_PAT_WB, XE_COH_AT_LEAST_1WAY }, + [3] = { XELP_PAT_WB, XE_COH_1WAY }, [4] = { XEHPC_PAT_CLOS(1) | XELP_PAT_WT, XE_COH_NONE }, - [5] = { XEHPC_PAT_CLOS(1) | XELP_PAT_WB, XE_COH_AT_LEAST_1WAY }, + [5] = { XEHPC_PAT_CLOS(1) | XELP_PAT_WB, XE_COH_1WAY }, [6] = { XEHPC_PAT_CLOS(2) | XELP_PAT_WT, XE_COH_NONE }, - [7] = { XEHPC_PAT_CLOS(2) | XELP_PAT_WB, XE_COH_AT_LEAST_1WAY }, + [7] = { XEHPC_PAT_CLOS(2) | XELP_PAT_WB, XE_COH_1WAY }, }; static const struct xe_pat_table_entry xelpg_pat_table[] = { [0] = { XELPG_PAT_0_WB, XE_COH_NONE }, [1] = { XELPG_PAT_1_WT, XE_COH_NONE }, [2] = { XELPG_PAT_3_UC, XE_COH_NONE }, - [3] = { XELPG_PAT_0_WB | XELPG_2_COH_1W, XE_COH_AT_LEAST_1WAY }, - [4] = { XELPG_PAT_0_WB | XELPG_3_COH_2W, XE_COH_AT_LEAST_1WAY }, + [3] = { XELPG_PAT_0_WB | XELPG_2_COH_1W, XE_COH_1WAY }, + [4] = { XELPG_PAT_0_WB | XELPG_3_COH_2W, XE_COH_2WAY }, }; /* @@ -147,7 +147,7 @@ static const struct xe_pat_table_entry xelpg_pat_table[] = { REG_FIELD_PREP(XE2_L3_POLICY, l3_policy) | \ REG_FIELD_PREP(XE2_L4_POLICY, l4_policy) | \ REG_FIELD_PREP(XE2_COH_MODE, __coh_mode), \ - .coh_mode = __coh_mode ? XE_COH_AT_LEAST_1WAY : XE_COH_NONE, \ + .coh_mode = __coh_mode ? __coh_mode : XE_COH_NONE, \ .valid = 1 \ } diff --git a/drivers/gpu/drm/xe/xe_pat.h b/drivers/gpu/drm/xe/xe_pat.h index c7e2a53d8cee..a1e287c08f57 100644 --- a/drivers/gpu/drm/xe/xe_pat.h +++ b/drivers/gpu/drm/xe/xe_pat.h @@ -28,8 +28,9 @@ struct xe_pat_table_entry { /** * @coh_mode: The GPU coherency mode that @value maps to. */ -#define XE_COH_NONE 1 -#define XE_COH_AT_LEAST_1WAY 2 +#define XE_COH_NONE 1 +#define XE_COH_1WAY 2 +#define XE_COH_2WAY 3 u16 coh_mode; /** diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 5572e12c2a7e..c0d8f5db019d 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3465,7 +3465,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, goto free_bind_ops; } - if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) { + if (XE_WARN_ON(coh_mode > XE_COH_2WAY)) { err = -EINVAL; goto free_bind_ops; } diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c index 869db304d96d..431be53be56f 100644 --- a/drivers/gpu/drm/xe/xe_vm_madvise.c +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c @@ -309,7 +309,7 @@ static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madv if (XE_IOCTL_DBG(xe, !coh_mode)) return false; - if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) + if (XE_WARN_ON(coh_mode > XE_COH_2WAY)) return false; if (XE_IOCTL_DBG(xe, args->pat_index.pad)) -- cgit v1.2.3 From 4f39a194d41e6b8cb61a91a7bb01b17be59a7d73 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Thu, 5 Mar 2026 17:49:06 +0530 Subject: drm/xe/xe3p_lpg: Restrict UAPI to enable L2 flush optimization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When set, starting xe3p_lpg, the L2 flush optimization feature will control whether L2 is in Persistent or Transient mode through monitoring of media activity. To enable L2 flush optimization include new feature flag GUC_CTL_ENABLE_L2FLUSH_OPT for Novalake platforms when media type is detected. Tighten UAPI validation to restrict userptr, svm and dmabuf mappings to be either 2WAY or XA+1WAY V5(Thomas): logic correction V4(MattA): Modify uapi doc and commit V3(MattA): check valid op and pat_index value V2(MattA): validate dma-buf bos and madvise pat-index Acked-by: José Roberto de Souza Acked-by: Michal Mrozek Acked-by: Carl Zhang Reviewed-by: Thomas Hellström Reviewed-by: Matthew Auld Link: https://patch.msgid.link/20260305121902.1892593-9-tejas.upadhyay@intel.com Signed-off-by: Tejas Upadhyay --- drivers/gpu/drm/xe/xe_guc.c | 3 +++ drivers/gpu/drm/xe/xe_guc_fwif.h | 1 + drivers/gpu/drm/xe/xe_vm.c | 8 ++++++++ drivers/gpu/drm/xe/xe_vm_madvise.c | 23 +++++++++++++++++++++++ include/uapi/drm/xe_drm.h | 4 +++- 5 files changed, 38 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 576f3d500390..ccebb437e37f 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -98,6 +98,9 @@ static u32 guc_ctl_feature_flags(struct xe_guc *guc) if (xe_guc_using_main_gamctrl_queues(guc)) flags |= GUC_CTL_MAIN_GAMCTRL_QUEUES; + if (GRAPHICS_VER(xe) >= 35 && !IS_DGFX(xe) && xe_gt_is_media_type(guc_to_gt(guc))) + flags |= GUC_CTL_ENABLE_L2FLUSH_OPT; + return flags; } diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h index bb8f71d38611..b73fae063fac 100644 --- a/drivers/gpu/drm/xe/xe_guc_fwif.h +++ b/drivers/gpu/drm/xe/xe_guc_fwif.h @@ -67,6 +67,7 @@ struct guc_update_exec_queue_policy { #define GUC_CTL_ENABLE_PSMI_LOGGING BIT(7) #define GUC_CTL_MAIN_GAMCTRL_QUEUES BIT(9) #define GUC_CTL_DISABLE_SCHEDULER BIT(14) +#define GUC_CTL_ENABLE_L2FLUSH_OPT BIT(15) #define GUC_CTL_DEBUG 3 #define GUC_LOG_VERBOSITY REG_GENMASK(1, 0) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index c0d8f5db019d..e24436287786 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3492,6 +3492,10 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || + XE_IOCTL_DBG(xe, xe_device_is_l2_flush_optimized(xe) && + (op == DRM_XE_VM_BIND_OP_MAP_USERPTR || + is_cpu_addr_mirror) && + (pat_index != 19 && coh_mode != XE_COH_2WAY)) || XE_IOCTL_DBG(xe, comp_en && op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || XE_IOCTL_DBG(xe, op == DRM_XE_VM_BIND_OP_MAP_USERPTR && @@ -3633,6 +3637,10 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, if (XE_IOCTL_DBG(xe, bo->ttm.base.import_attach && comp_en)) return -EINVAL; + if (XE_IOCTL_DBG(xe, bo->ttm.base.import_attach && xe_device_is_l2_flush_optimized(xe) && + (pat_index != 19 && coh_mode != XE_COH_2WAY))) + return -EINVAL; + /* If a BO is protected it can only be mapped if the key is still valid */ if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) && op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL) diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c index 431be53be56f..e564b12c02d9 100644 --- a/drivers/gpu/drm/xe/xe_vm_madvise.c +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c @@ -419,6 +419,7 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil struct xe_vmas_in_madvise_range madvise_range = {.addr = args->start, .range = args->range, }; struct xe_madvise_details details; + u16 pat_index, coh_mode; struct xe_vm *vm; struct drm_exec exec; int err, attr_type; @@ -455,6 +456,17 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil if (err || !madvise_range.num_vmas) goto madv_fini; + if (args->type == DRM_XE_MEM_RANGE_ATTR_PAT) { + pat_index = array_index_nospec(args->pat_index.val, xe->pat.n_entries); + coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); + if (XE_IOCTL_DBG(xe, madvise_range.has_svm_userptr_vmas && + xe_device_is_l2_flush_optimized(xe) && + (pat_index != 19 && coh_mode != XE_COH_2WAY))) { + err = -EINVAL; + goto madv_fini; + } + } + if (madvise_range.has_bo_vmas) { if (args->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC) { if (!check_bo_args_are_sane(vm, madvise_range.vmas, @@ -472,6 +484,17 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil if (!bo) continue; + + if (args->type == DRM_XE_MEM_RANGE_ATTR_PAT) { + if (XE_IOCTL_DBG(xe, bo->ttm.base.import_attach && + xe_device_is_l2_flush_optimized(xe) && + (pat_index != 19 && + coh_mode != XE_COH_2WAY))) { + err = -EINVAL; + goto err_fini; + } + } + err = drm_exec_lock_obj(&exec, &bo->ttm.base); drm_exec_retry_on_contention(&exec); if (err) diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index f8b2afb20540..7014dde1c9c4 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1114,7 +1114,9 @@ struct drm_xe_vm_bind_op { * incoherent GT access is possible. * * Note: For userptr and externally imported dma-buf the kernel expects - * either 1WAY or 2WAY for the @pat_index. + * either 1WAY or 2WAY for the @pat_index. Starting from NVL-P, for + * userptr, svm, madvise and externally imported dma-buf the kernel expects + * either 2WAY or 1WAY and XA @pat_index. * * For DRM_XE_VM_BIND_FLAG_NULL bindings there are no KMD restrictions * on the @pat_index. For such mappings there is no actual memory being -- cgit v1.2.3 From e8c3a913c8582d255899212c68492eeb58f06f07 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Thu, 5 Mar 2026 17:49:07 +0530 Subject: drm/xe/xe3p: Skip TD flush MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Xe3p has HW ability to do transient display flush so the xe driver can enable this HW feature by default and skip the software TD flush. Bspec: 60002 Reviewed-by: Thomas Hellström Reviewed-by: Shekhar Chauhan Link: https://patch.msgid.link/20260305121902.1892593-10-tejas.upadhyay@intel.com Signed-off-by: Tejas Upadhyay --- drivers/gpu/drm/xe/xe_device.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index daf2c815082c..05fa16044f9b 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -1163,6 +1163,14 @@ void xe_device_td_flush(struct xe_device *xe) { struct xe_gt *root_gt; + /* + * From Xe3p onward the HW takes care of flush of TD entries also along + * with flushing XA entries, which will be at the usual sync points, + * like at the end of submission, so no manual flush is needed here. + */ + if (GRAPHICS_VER(xe) >= 35) + return; + if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20) return; -- cgit v1.2.3 From a31566762d4075646a8a2214586158b681e94305 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 19 Mar 2026 15:30:34 -0700 Subject: drm/xe: Implement recent spec updates to Wa_16025250150 The hardware teams noticed that the originally documented workaround steps for Wa_16025250150 may not be sufficient to fully avoid a hardware issue. The workaround documentation has been augmented to suggest programming one additional register; make the corresponding change in the driver. Fixes: 7654d51f1fd8 ("drm/xe/xe2hpg: Add Wa_16025250150") Reviewed-by: Matt Atwood Link: https://patch.msgid.link/20260319-wa_16025250150_part2-v1-1-46b1de1a31b2@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 + drivers/gpu/drm/xe/xe_wa.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 84b80e83ac46..4ebaa0888a43 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -578,6 +578,7 @@ #define ENABLE_SMP_LD_RENDER_SURFACE_CONTROL REG_BIT(44 - 32) #define FORCE_SLM_FENCE_SCOPE_TO_TILE REG_BIT(42 - 32) #define FORCE_UGM_FENCE_SCOPE_TO_TILE REG_BIT(41 - 32) +#define L3_128B_256B_WRT_DIS REG_BIT(40 - 32) #define MAXREQS_PER_BANK REG_GENMASK(39 - 32, 37 - 32) #define DISABLE_128B_EVICTION_COMMAND_UDW REG_BIT(36 - 32) #define LSCFE_SAME_ADDRESS_ATOMICS_COALESCING_DISABLE REG_BIT(35 - 32) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index a02ee1282d35..546296f0220b 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -260,7 +260,8 @@ static const struct xe_rtp_entry_sr gt_was[] = { LSN_DIM_Z_WGT_MASK, LSN_LNI_WGT(1) | LSN_LNE_WGT(1) | LSN_DIM_X_WGT(1) | LSN_DIM_Y_WGT(1) | - LSN_DIM_Z_WGT(1))) + LSN_DIM_Z_WGT(1)), + SET(LSC_CHICKEN_BIT_0_UDW, L3_128B_256B_WRT_DIS)) }, /* Xe3_LPG */ -- cgit v1.2.3 From 2de36e3f72dae2035b2742ffe3355e43067a81ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Pi=C3=B3rkowski?= Date: Mon, 9 Mar 2026 16:24:48 +0100 Subject: drm/xe/pf: Add FLR_PREPARE state to VF control flow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Our xe-vfio-pci component relies on the confirmation from the PF that VF FLR processing has finished, but due to the notification latency on the HW/FW side, PF might be unaware yet of the already triggered VF FLR. Update VF state machine with new FLR_PREPARE state that indicate imminent VF FLR notification and treat that as a begin of the FLR sequence. Also introduce function that xe-vfio-pci should call to guarantee correct synchronization. v2: move PREPARE into WIP, update commit msg (Michal) Signed-off-by: Piotr Piórkowski Co-developed-by: Michal Wajdeczko Signed-off-by: Michal Wajdeczko Reviewed-by: Michał Winiarski Link: https://patch.msgid.link/20260309152449.910636-2-piotr.piorkowski@intel.com Signed-off-by: Michał Winiarski --- drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c | 78 ++++++++++++++++++----- drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h | 1 + drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h | 2 + drivers/gpu/drm/xe/xe_sriov_pf_control.c | 24 +++++++ drivers/gpu/drm/xe/xe_sriov_pf_control.h | 1 + drivers/gpu/drm/xe/xe_sriov_vfio.c | 1 + include/drm/intel/xe_sriov_vfio.h | 11 ++++ 7 files changed, 102 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c index 5cb705c7ee7a..058585f063a9 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c @@ -171,6 +171,7 @@ static const char *control_bit_to_string(enum xe_gt_sriov_control_bits bit) case XE_GT_SRIOV_STATE_##_X: return #_X CASE2STR(WIP); CASE2STR(FLR_WIP); + CASE2STR(FLR_PREPARE); CASE2STR(FLR_SEND_START); CASE2STR(FLR_WAIT_GUC); CASE2STR(FLR_GUC_DONE); @@ -1486,11 +1487,15 @@ int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid) * The VF FLR state machine looks like:: * * (READY,PAUSED,STOPPED)<------------<--------------o - * | \ - * flr \ - * | \ - * ....V..........................FLR_WIP........... \ - * : \ : \ + * | | \ + * flr prepare \ + * | | \ + * ....V.............V............FLR_WIP........... \ + * : | | : \ + * : | FLR_PREPARE : | + * : | / : | + * : \ flr : | + * : \ / : | * : \ o----<----busy : | * : \ / / : | * : FLR_SEND_START---failed----->-----------o--->(FLR_FAILED)<---o @@ -1539,20 +1544,28 @@ static void pf_enter_vf_flr_send_start(struct xe_gt *gt, unsigned int vfid) pf_queue_vf(gt, vfid); } -static void pf_enter_vf_flr_wip(struct xe_gt *gt, unsigned int vfid) +static bool pf_exit_vf_flr_prepare(struct xe_gt *gt, unsigned int vfid) { - if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) { - xe_gt_sriov_dbg(gt, "VF%u FLR is already in progress\n", vfid); - return; - } + if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_PREPARE)) + return false; - pf_enter_vf_wip(gt, vfid); pf_enter_vf_flr_send_start(gt, vfid); + return true; +} + +static bool pf_enter_vf_flr_wip(struct xe_gt *gt, unsigned int vfid) +{ + if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) + return false; + + pf_enter_vf_wip(gt, vfid); + return true; } static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid) { if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) { + pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_PREPARE); pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH); pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO); pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA); @@ -1760,21 +1773,54 @@ static void pf_enter_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid) } /** - * xe_gt_sriov_pf_control_trigger_flr - Start a VF FLR sequence. + * xe_gt_sriov_pf_control_prepare_flr() - Notify PF that VF FLR request was issued. * @gt: the &xe_gt * @vfid: the VF identifier * + * This is an optional early notification path used to mark pending FLR before + * the GuC notifies the PF with a FLR event. + * * This function is for PF only. * * Return: 0 on success or a negative error code on failure. */ -int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid) +int xe_gt_sriov_pf_control_prepare_flr(struct xe_gt *gt, unsigned int vfid) { - pf_enter_vf_flr_wip(gt, vfid); + if (!pf_enter_vf_flr_wip(gt, vfid)) + return -EALREADY; + pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_PREPARE); return 0; } +static int pf_begin_vf_flr(struct xe_gt *gt, unsigned int vfid) +{ + if (pf_enter_vf_flr_wip(gt, vfid)) { + pf_enter_vf_flr_send_start(gt, vfid); + return 0; + } + + if (pf_exit_vf_flr_prepare(gt, vfid)) + return 0; + + xe_gt_sriov_dbg(gt, "VF%u FLR is already in progress\n", vfid); + return -EALREADY; +} + +/** + * xe_gt_sriov_pf_control_trigger_flr - Start a VF FLR sequence. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid) +{ + return pf_begin_vf_flr(gt, vfid); +} + /** * xe_gt_sriov_pf_control_sync_flr() - Synchronize on the VF FLR checkpoint. * @gt: the &xe_gt @@ -1879,9 +1925,9 @@ static void pf_handle_vf_flr(struct xe_gt *gt, u32 vfid) if (needs_dispatch_flr(xe)) { for_each_gt(gtit, xe, gtid) - pf_enter_vf_flr_wip(gtit, vfid); + pf_begin_vf_flr(gtit, vfid); } else { - pf_enter_vf_flr_wip(gt, vfid); + pf_begin_vf_flr(gt, vfid); } } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h index c36c8767f3ad..23182a5c5fb8 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h @@ -27,6 +27,7 @@ int xe_gt_sriov_pf_control_process_restore_data(struct xe_gt *gt, unsigned int v int xe_gt_sriov_pf_control_trigger_restore_vf(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_control_finish_restore_vf(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_control_prepare_flr(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid); int xe_gt_sriov_pf_control_sync_flr(struct xe_gt *gt, unsigned int vfid, bool sync); int xe_gt_sriov_pf_control_wait_flr(struct xe_gt *gt, unsigned int vfid); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h index 6027ba05a7f2..e78c59e08adf 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h @@ -15,6 +15,7 @@ * * @XE_GT_SRIOV_STATE_WIP: indicates that some operations are in progress. * @XE_GT_SRIOV_STATE_FLR_WIP: indicates that a VF FLR is in progress. + * @XE_GT_SRIOV_STATE_FLR_PREPARE: indicates that the PF received early VF FLR prepare notification. * @XE_GT_SRIOV_STATE_FLR_SEND_START: indicates that the PF wants to send a FLR START command. * @XE_GT_SRIOV_STATE_FLR_WAIT_GUC: indicates that the PF awaits for a response from the GuC. * @XE_GT_SRIOV_STATE_FLR_GUC_DONE: indicates that the PF has received a response from the GuC. @@ -56,6 +57,7 @@ enum xe_gt_sriov_control_bits { XE_GT_SRIOV_STATE_WIP = 1, XE_GT_SRIOV_STATE_FLR_WIP, + XE_GT_SRIOV_STATE_FLR_PREPARE, XE_GT_SRIOV_STATE_FLR_SEND_START, XE_GT_SRIOV_STATE_FLR_WAIT_GUC, XE_GT_SRIOV_STATE_FLR_GUC_DONE, diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_sriov_pf_control.c index ed4b9820b06e..15b4341d7f12 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_control.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf_control.c @@ -123,6 +123,30 @@ int xe_sriov_pf_control_reset_vf(struct xe_device *xe, unsigned int vfid) return result; } +/** + * xe_sriov_pf_control_prepare_flr() - Notify PF that VF FLR prepare has started. + * @xe: the &xe_device + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_control_prepare_flr(struct xe_device *xe, unsigned int vfid) +{ + struct xe_gt *gt; + unsigned int id; + int result = 0; + int err; + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_control_prepare_flr(gt, vfid); + result = result ? -EUCLEAN : err; + } + + return result; +} + /** * xe_sriov_pf_control_wait_flr() - Wait for a VF reset (FLR) to complete. * @xe: the &xe_device diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_control.h b/drivers/gpu/drm/xe/xe_sriov_pf_control.h index ef9f219b2109..74981a67db88 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_control.h +++ b/drivers/gpu/drm/xe/xe_sriov_pf_control.h @@ -12,6 +12,7 @@ int xe_sriov_pf_control_pause_vf(struct xe_device *xe, unsigned int vfid); int xe_sriov_pf_control_resume_vf(struct xe_device *xe, unsigned int vfid); int xe_sriov_pf_control_stop_vf(struct xe_device *xe, unsigned int vfid); int xe_sriov_pf_control_reset_vf(struct xe_device *xe, unsigned int vfid); +int xe_sriov_pf_control_prepare_flr(struct xe_device *xe, unsigned int vfid); int xe_sriov_pf_control_wait_flr(struct xe_device *xe, unsigned int vfid); int xe_sriov_pf_control_sync_flr(struct xe_device *xe, unsigned int vfid); int xe_sriov_pf_control_trigger_save_vf(struct xe_device *xe, unsigned int vfid); diff --git a/drivers/gpu/drm/xe/xe_sriov_vfio.c b/drivers/gpu/drm/xe/xe_sriov_vfio.c index 3da81af97b8b..00f96b0976d1 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vfio.c +++ b/drivers/gpu/drm/xe/xe_sriov_vfio.c @@ -42,6 +42,7 @@ _type xe_sriov_vfio_##_func(struct xe_device *xe, unsigned int vfid) \ EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_##_func, "xe-vfio-pci") DEFINE_XE_SRIOV_VFIO_FUNCTION(int, wait_flr_done, control_wait_flr); +DEFINE_XE_SRIOV_VFIO_FUNCTION(int, flr_prepare, control_prepare_flr); DEFINE_XE_SRIOV_VFIO_FUNCTION(int, suspend_device, control_pause_vf); DEFINE_XE_SRIOV_VFIO_FUNCTION(int, resume_device, control_resume_vf); DEFINE_XE_SRIOV_VFIO_FUNCTION(int, stop_copy_enter, control_trigger_save_vf); diff --git a/include/drm/intel/xe_sriov_vfio.h b/include/drm/intel/xe_sriov_vfio.h index e9814e8149fd..27c224a70e6f 100644 --- a/include/drm/intel/xe_sriov_vfio.h +++ b/include/drm/intel/xe_sriov_vfio.h @@ -27,6 +27,17 @@ struct xe_device *xe_sriov_vfio_get_pf(struct pci_dev *pdev); */ bool xe_sriov_vfio_migration_supported(struct xe_device *xe); +/** + * xe_sriov_vfio_flr_prepare() - Notify PF that VF FLR prepare has started. + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf() + * @vfid: the VF identifier (can't be 0) + * + * This function marks VF FLR as pending before PF receives GuC FLR event. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_vfio_flr_prepare(struct xe_device *xe, unsigned int vfid); + /** * xe_sriov_vfio_wait_flr_done() - Wait for VF FLR completion. * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf() -- cgit v1.2.3 From 9e60ee0e6dc2c81e0e5b285d09807a361f6745ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Pi=C3=B3rkowski?= Date: Mon, 9 Mar 2026 16:24:49 +0100 Subject: vfio/xe: Notify PF about VF FLR in reset_prepare MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hook into the PCI error handler reset_prepare() callback to notify the PF about an upcoming VF FLR before reset_done() is executed. This enables early FLR_PREPARE signaling and ensures that the PF is aware of the reset before the completion wait begins. Signed-off-by: Piotr Piórkowski Cc: Michał Winiarski Reviewed-by: Michał Winiarski Reviewed-by: Alex Williamson Link: https://patch.msgid.link/20260309152449.910636-3-piotr.piorkowski@intel.com Signed-off-by: Michał Winiarski --- drivers/vfio/pci/xe/main.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/vfio/pci/xe/main.c b/drivers/vfio/pci/xe/main.c index fff95b2d5dde..88acfcf840fc 100644 --- a/drivers/vfio/pci/xe/main.c +++ b/drivers/vfio/pci/xe/main.c @@ -85,6 +85,19 @@ again: spin_unlock(&xe_vdev->reset_lock); } +static void xe_vfio_pci_reset_prepare(struct pci_dev *pdev) +{ + struct xe_vfio_pci_core_device *xe_vdev = pci_get_drvdata(pdev); + int ret; + + if (!pdev->is_virtfn) + return; + + ret = xe_sriov_vfio_flr_prepare(xe_vdev->xe, xe_vdev->vfid); + if (ret) + dev_err(&pdev->dev, "Failed to prepare FLR: %d\n", ret); +} + static void xe_vfio_pci_reset_done(struct pci_dev *pdev) { struct xe_vfio_pci_core_device *xe_vdev = pci_get_drvdata(pdev); @@ -127,6 +140,7 @@ static void xe_vfio_pci_reset_done(struct pci_dev *pdev) } static const struct pci_error_handlers xe_vfio_pci_err_handlers = { + .reset_prepare = xe_vfio_pci_reset_prepare, .reset_done = xe_vfio_pci_reset_done, .error_detected = vfio_pci_core_aer_err_detected, }; -- cgit v1.2.3 From 2bb026f3fbe8cb59ab70ec21b5cbd729b0c94bac Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 24 Mar 2026 08:40:07 +0000 Subject: drm/xe: Rename XE_BO_FLAG_SCANOUT to XE_BO_FLAG_FORCE_WC Rename XE_BO_FLAG_SCANOUT to XE_BO_FLAG_FORCE_WC so that the usage of the flag can legitimately be expanded to more than just the actual frame- buffer objects. Signed-off-by: Tvrtko Ursulin Suggested-by: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Link: https://patch.msgid.link/20260324084018.20353-2-tvrtko.ursulin@igalia.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/display/intel_fbdev_fb.c | 12 ++++++++---- drivers/gpu/drm/xe/display/xe_display_bo.c | 6 +++--- drivers/gpu/drm/xe/display/xe_dsb_buffer.c | 4 +++- drivers/gpu/drm/xe/display/xe_fb_pin.c | 2 +- drivers/gpu/drm/xe/display/xe_initial_plane.c | 2 +- drivers/gpu/drm/xe/xe_bo.c | 17 +++++++++-------- drivers/gpu/drm/xe/xe_bo.h | 2 +- 7 files changed, 26 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c index 87af5646c938..d7030e4d814c 100644 --- a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c +++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c @@ -56,9 +56,11 @@ struct drm_gem_object *intel_fbdev_fb_bo_create(struct drm_device *drm, int size if (intel_fbdev_fb_prefer_stolen(drm, size)) { obj = xe_bo_create_pin_map_novm(xe, xe_device_get_root_tile(xe), size, - ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT | + ttm_bo_type_kernel, + XE_BO_FLAG_FORCE_WC | XE_BO_FLAG_STOLEN | - XE_BO_FLAG_GGTT, false); + XE_BO_FLAG_GGTT, + false); if (!IS_ERR(obj)) drm_info(&xe->drm, "Allocated fbdev into stolen\n"); else @@ -69,9 +71,11 @@ struct drm_gem_object *intel_fbdev_fb_bo_create(struct drm_device *drm, int size if (IS_ERR(obj)) { obj = xe_bo_create_pin_map_novm(xe, xe_device_get_root_tile(xe), size, - ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT | + ttm_bo_type_kernel, + XE_BO_FLAG_FORCE_WC | XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | - XE_BO_FLAG_GGTT, false); + XE_BO_FLAG_GGTT, + false); } if (IS_ERR(obj)) { diff --git a/drivers/gpu/drm/xe/display/xe_display_bo.c b/drivers/gpu/drm/xe/display/xe_display_bo.c index a689f71e7b14..1d81b9908265 100644 --- a/drivers/gpu/drm/xe/display/xe_display_bo.c +++ b/drivers/gpu/drm/xe/display/xe_display_bo.c @@ -42,9 +42,9 @@ static int xe_display_bo_framebuffer_init(struct drm_gem_object *obj, if (ret) goto err; - if (!(bo->flags & XE_BO_FLAG_SCANOUT)) { + if (!(bo->flags & XE_BO_FLAG_FORCE_WC)) { /* - * XE_BO_FLAG_SCANOUT should ideally be set at creation, or is + * XE_BO_FLAG_FORCE_WC should ideally be set at creation, or is * automatically set when creating FB. We cannot change caching * mode when the bo is VM_BINDed, so we can only set * coherency with display when unbound. @@ -54,7 +54,7 @@ static int xe_display_bo_framebuffer_init(struct drm_gem_object *obj, ret = -EINVAL; goto err; } - bo->flags |= XE_BO_FLAG_SCANOUT; + bo->flags |= XE_BO_FLAG_FORCE_WC; } ttm_bo_unreserve(&bo->ttm); return 0; diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c index 1c67a950c6ad..a7158c73a14c 100644 --- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c +++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c @@ -54,7 +54,9 @@ static struct intel_dsb_buffer *xe_dsb_buffer_create(struct drm_device *drm, siz PAGE_ALIGN(size), ttm_bo_type_kernel, XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | - XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT, false); + XE_BO_FLAG_FORCE_WC | + XE_BO_FLAG_GGTT, + false); if (IS_ERR(obj)) { ret = PTR_ERR(obj); goto err_pin_map; diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index dbbc61032b7f..d4a9eb550cae 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -429,7 +429,7 @@ int intel_plane_pin_fb(struct intel_plane_state *new_plane_state, return 0; /* We reject creating !SCANOUT fb's, so this is weird.. */ - drm_WARN_ON(bo->ttm.base.dev, !(bo->flags & XE_BO_FLAG_SCANOUT)); + drm_WARN_ON(bo->ttm.base.dev, !(bo->flags & XE_BO_FLAG_FORCE_WC)); vma = __xe_pin_fb_vma(intel_fb, &new_plane_state->view.gtt, alignment); diff --git a/drivers/gpu/drm/xe/display/xe_initial_plane.c b/drivers/gpu/drm/xe/display/xe_initial_plane.c index 65cc0b0c934b..8bcae552dddc 100644 --- a/drivers/gpu/drm/xe/display/xe_initial_plane.c +++ b/drivers/gpu/drm/xe/display/xe_initial_plane.c @@ -48,7 +48,7 @@ initial_plane_bo(struct xe_device *xe, if (plane_config->size == 0) return NULL; - flags = XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT; + flags = XE_BO_FLAG_FORCE_WC | XE_BO_FLAG_GGTT; base = round_down(plane_config->base, page_size); if (IS_DGFX(xe)) { diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 216e1d8635f4..7545d2fa3255 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -510,13 +510,11 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching); /* - * Display scanout is always non-coherent with the CPU cache. - * * For Xe_LPG and beyond up to NVL-P (excluding), PPGTT PTE * lookups are also non-coherent and require a CPU:WC mapping. */ - if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) || - (!xe->info.has_cached_pt && bo->flags & XE_BO_FLAG_PAGETABLE)) + if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_FORCE_WC) || + (!xe->info.has_cached_pt && bo->flags & XE_BO_FLAG_PAGETABLE)) caching = ttm_write_combined; } @@ -3201,8 +3199,11 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING) bo_flags |= XE_BO_FLAG_DEFER_BACKING; + /* + * Display scanout is always non-coherent with the CPU cache. + */ if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT) - bo_flags |= XE_BO_FLAG_SCANOUT; + bo_flags |= XE_BO_FLAG_FORCE_WC; if (args->flags & DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION) { if (XE_IOCTL_DBG(xe, GRAPHICS_VER(xe) < 20)) @@ -3214,7 +3215,7 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, /* CCS formats need physical placement at a 64K alignment in VRAM. */ if ((bo_flags & XE_BO_FLAG_VRAM_MASK) && - (bo_flags & XE_BO_FLAG_SCANOUT) && + (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT) && !(xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) && IS_ALIGNED(args->size, SZ_64K)) bo_flags |= XE_BO_FLAG_NEEDS_64K; @@ -3234,7 +3235,7 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, args->cpu_caching != DRM_XE_GEM_CPU_CACHING_WC)) return -EINVAL; - if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_SCANOUT && + if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_FORCE_WC && args->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) return -EINVAL; @@ -3702,7 +3703,7 @@ int xe_bo_dumb_create(struct drm_file *file_priv, bo = xe_bo_create_user(xe, NULL, args->size, DRM_XE_GEM_CPU_CACHING_WC, XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | - XE_BO_FLAG_SCANOUT | + XE_BO_FLAG_FORCE_WC | XE_BO_FLAG_NEEDS_CPU_ACCESS, NULL); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 2cbac16f7db7..a0ad846e9450 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -35,7 +35,7 @@ #define XE_BO_FLAG_PINNED BIT(7) #define XE_BO_FLAG_NO_RESV_EVICT BIT(8) #define XE_BO_FLAG_DEFER_BACKING BIT(9) -#define XE_BO_FLAG_SCANOUT BIT(10) +#define XE_BO_FLAG_FORCE_WC BIT(10) #define XE_BO_FLAG_FIXED_PLACEMENT BIT(11) #define XE_BO_FLAG_PAGETABLE BIT(12) #define XE_BO_FLAG_NEEDS_CPU_ACCESS BIT(13) -- cgit v1.2.3 From 36052e56d5943bbd8244321c3b1445ba5db0f12b Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 24 Mar 2026 08:40:08 +0000 Subject: drm/xe: Use write-combine mapping when populating DPT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fallback case for DPT backing store is a buffer object in system memory buffer, which by default use a write-back CPU caching policy. If this fallback gets triggered, and since there is currently no flushing, the DPT writes made when pinning a buffer to display are not guaranteed to be seen by the display engine. To fix this, since both the local memory and the stolen memory DPT placements already use write-combine, let us make the system memory option follow suit by passing down the appropriate flag. Signed-off-by: Tvrtko Ursulin Suggested-by: Ville Syrjälä Reviewed-by: Rodrigo Vivi Link: https://patch.msgid.link/20260324084018.20353-3-tvrtko.ursulin@igalia.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/display/xe_fb_pin.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index d4a9eb550cae..df7d305c6fcd 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -122,7 +122,8 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb, ttm_bo_type_kernel, XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT | - XE_BO_FLAG_PAGETABLE, + XE_BO_FLAG_PAGETABLE | + XE_BO_FLAG_FORCE_WC, alignment, false); if (IS_ERR(dpt)) return PTR_ERR(dpt); -- cgit v1.2.3 From 88139af77d6acd74bf73f5e36f4bdc63f033f399 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 24 Mar 2026 08:40:09 +0000 Subject: drm/xe/xelpg: Limit AuxCCS ring buffer programming to Alderlake At the moment the driver does not support AuxCCS at all due respective modifiers being hidden from userspace. As we are about to start enabling them, starting with Alderlake, let us begin by limiting the ring buffer support to just that initial platform. Signed-off-by: Tvrtko Ursulin Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Link: https://patch.msgid.link/20260324084018.20353-4-tvrtko.ursulin@igalia.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ring_ops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index bce7d93ce3a3..92b33925ce08 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -334,9 +334,9 @@ static bool has_aux_ccs(struct xe_device *xe) * PVC is a special case that has no compression of either type * (FlatCCS or AuxCCS). Also, AuxCCS is no longer used from Xe2 * onward, so any future platforms with no FlatCCS will not have - * AuxCCS either. + * AuxCCS, and we explicitly do not want to support it on MTL. */ - if (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) + if (GRAPHICS_VERx100(xe) >= 1270 || xe->info.platform == XE_PVC) return false; return !xe->info.has_flat_ccs; -- cgit v1.2.3 From 458b1e64e7c0594cca8515fae8996bc52619d2f6 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 24 Mar 2026 08:40:10 +0000 Subject: drm/xe/xelp: Quiesce memory traffic before invalidating AuxCCS According to i915 commit ad8ebf12217e ("drm/i915/gt: Ensure memory quiesced before invalidation") quiescing of the memory traffic is required before invalidating the AuxCCS tables. Add an extra pipe control flush to achieve that. Signed-off-by: Tvrtko Ursulin Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Link: https://patch.msgid.link/20260324084018.20353-5-tvrtko.ursulin@igalia.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ring_ops.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 92b33925ce08..629e551304be 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -409,6 +409,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, struct xe_gt *gt = job->q->gt; struct xe_device *xe = gt_to_xe(gt); bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK); + const bool aux_ccs = has_aux_ccs(xe); u32 mask_flags = 0; *head = lrc->ring.tail; @@ -418,6 +419,13 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, i = emit_copy_timestamp(xe, lrc, dw, i); + /* + * On AuxCCS platforms the invalidation of the Aux table requires + * quiescing the memory traffic beforehand. + */ + if (aux_ccs) + i = emit_render_cache_flush(job, dw, i); + dw[i++] = preparser_disable(true); if (lacks_render) mask_flags = PIPE_CONTROL_3D_ARCH_FLAGS; @@ -428,7 +436,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, i = emit_pipe_invalidate(job->q, mask_flags, job->ring_ops_flush_tlb, dw, i); /* hsdes: 1809175790 */ - if (has_aux_ccs(xe)) + if (aux_ccs) i = emit_aux_table_inv(gt, CCS_AUX_INV, dw, i); dw[i++] = preparser_disable(false); -- cgit v1.2.3 From cd1a516234ebb049007ce20c6b6e76936b29bade Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 24 Mar 2026 08:40:11 +0000 Subject: drm/xe/xelp: Wait for AuxCCS invalidation to complete On AuxCCS platforms we need to wait for AuxCCS invalidations to complete. Signed-off-by: Tvrtko Ursulin Reviewed-by: Rodrigo Vivi Link: https://patch.msgid.link/20260324084018.20353-6-tvrtko.ursulin@igalia.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/instructions/xe_mi_commands.h | 6 ++++++ drivers/gpu/drm/xe/xe_ring_ops.c | 9 ++++++++- drivers/gpu/drm/xe/xe_ring_ops_types.h | 2 +- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h index 29569eff1af3..ad7d98f2dbba 100644 --- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h @@ -94,4 +94,10 @@ #define MI_SET_APPID_SESSION_ID_MASK REG_GENMASK(6, 0) #define MI_SET_APPID_SESSION_ID(x) REG_FIELD_PREP(MI_SET_APPID_SESSION_ID_MASK, x) +#define MI_SEMAPHORE_WAIT_TOKEN (__MI_INSTR(0x1c) | XE_INSTR_NUM_DW(5)) /* XeLP+ */ +#define MI_SEMAPHORE_REGISTER_POLL REG_BIT(16) +#define MI_SEMAPHORE_POLL REG_BIT(15) +#define MI_SEMAPHORE_CMP_OP_MASK REG_GENMASK(14, 12) +#define MI_SEMAPHORE_SAD_EQ_SDD REG_FIELD_PREP(MI_SEMAPHORE_CMP_OP_MASK, 4) + #endif diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 629e551304be..7551a6acd076 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -54,7 +54,14 @@ static int emit_aux_table_inv(struct xe_gt *gt, struct xe_reg reg, dw[i++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1) | MI_LRI_MMIO_REMAP_EN; dw[i++] = reg.addr + gt->mmio.adj_offset; dw[i++] = AUX_INV; - dw[i++] = MI_NOOP; + dw[i++] = MI_SEMAPHORE_WAIT_TOKEN | + MI_SEMAPHORE_REGISTER_POLL | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_EQ_SDD; + dw[i++] = 0; + dw[i++] = reg.addr + gt->mmio.adj_offset; + dw[i++] = 0; + dw[i++] = 0; return i; } diff --git a/drivers/gpu/drm/xe/xe_ring_ops_types.h b/drivers/gpu/drm/xe/xe_ring_ops_types.h index a42a465ac438..1197fc0bf2af 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops_types.h +++ b/drivers/gpu/drm/xe/xe_ring_ops_types.h @@ -8,7 +8,7 @@ struct xe_sched_job; -#define MAX_JOB_SIZE_DW 72 +#define MAX_JOB_SIZE_DW 74 #define MAX_JOB_SIZE_BYTES (MAX_JOB_SIZE_DW * 4) /** -- cgit v1.2.3 From fd4c1eea1cfe20efc470f01bfb1a04d272e8eb74 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 24 Mar 2026 08:40:12 +0000 Subject: drm/xe: Move aux table invalidation to ring ops Implement the suggestion of moving the aux invalidation from a helper to a ring ops vfunc, together with the suggestion to split the vfunc table of video decode and video enhance engines. With this done the LRC code will be able to access the functionality via the newly added ring ops vfunc. Signed-off-by: Tvrtko Ursulin Suggested-by: Matthew Brost Cc: Rodrigo Vivi Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20260324084018.20353-7-tvrtko.ursulin@igalia.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ring_ops.c | 105 ++++++++++++++++++++++++--------- drivers/gpu/drm/xe/xe_ring_ops_types.h | 6 ++ 2 files changed, 83 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 7551a6acd076..cfeb4fc7d217 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -48,22 +48,48 @@ static u32 preparser_disable(bool state) return MI_ARB_CHECK | BIT(8) | state; } -static int emit_aux_table_inv(struct xe_gt *gt, struct xe_reg reg, - u32 *dw, int i) +static u32 * +__emit_aux_table_inv(u32 *cmd, const struct xe_reg reg, u32 adj_offset) { - dw[i++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1) | MI_LRI_MMIO_REMAP_EN; - dw[i++] = reg.addr + gt->mmio.adj_offset; - dw[i++] = AUX_INV; - dw[i++] = MI_SEMAPHORE_WAIT_TOKEN | - MI_SEMAPHORE_REGISTER_POLL | - MI_SEMAPHORE_POLL | - MI_SEMAPHORE_SAD_EQ_SDD; - dw[i++] = 0; - dw[i++] = reg.addr + gt->mmio.adj_offset; - dw[i++] = 0; - dw[i++] = 0; + *cmd++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1) | + MI_LRI_MMIO_REMAP_EN; + *cmd++ = reg.addr + adj_offset; + *cmd++ = AUX_INV; + *cmd++ = MI_SEMAPHORE_WAIT_TOKEN | MI_SEMAPHORE_REGISTER_POLL | + MI_SEMAPHORE_POLL | MI_SEMAPHORE_SAD_EQ_SDD; + *cmd++ = 0; + *cmd++ = reg.addr + adj_offset; + *cmd++ = 0; + *cmd++ = 0; + + return cmd; +} - return i; +static u32 *emit_aux_table_inv_render_compute(struct xe_gt *gt, u32 *cmd) +{ + return __emit_aux_table_inv(cmd, CCS_AUX_INV, gt->mmio.adj_offset); +} + +static u32 *emit_aux_table_inv_video_decode(struct xe_gt *gt, u32 *cmd) +{ + return __emit_aux_table_inv(cmd, VD0_AUX_INV, gt->mmio.adj_offset); +} + +static u32 *emit_aux_table_inv_video_enhance(struct xe_gt *gt, u32 *cmd) +{ + return __emit_aux_table_inv(cmd, VE0_AUX_INV, gt->mmio.adj_offset); +} + +static int emit_aux_table_inv(struct xe_hw_engine *hwe, u32 *dw, int i) +{ + struct xe_gt *gt = hwe->gt; + u32 *(*emit)(struct xe_gt *gt, u32 *cmd) = + gt->ring_ops[hwe->class]->emit_aux_table_inv; + + if (emit) + return emit(gt, dw + i) - dw; + else + return i; } static int emit_user_interrupt(u32 *dw, int i) @@ -356,7 +382,6 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, u32 ppgtt_flag = get_ppgtt_flag(job); struct xe_gt *gt = job->q->gt; struct xe_device *xe = gt_to_xe(gt); - bool decode = job->q->class == XE_ENGINE_CLASS_VIDEO_DECODE; *head = lrc->ring.tail; @@ -368,12 +393,7 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, dw[i++] = preparser_disable(true); /* hsdes: 1809175790 */ - if (has_aux_ccs(xe)) { - if (decode) - i = emit_aux_table_inv(gt, VD0_AUX_INV, dw, i); - else - i = emit_aux_table_inv(gt, VE0_AUX_INV, dw, i); - } + i = emit_aux_table_inv(job->q->hwe, dw, i); if (job->ring_ops_flush_tlb) i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), @@ -416,7 +436,6 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, struct xe_gt *gt = job->q->gt; struct xe_device *xe = gt_to_xe(gt); bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK); - const bool aux_ccs = has_aux_ccs(xe); u32 mask_flags = 0; *head = lrc->ring.tail; @@ -430,7 +449,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, * On AuxCCS platforms the invalidation of the Aux table requires * quiescing the memory traffic beforehand. */ - if (aux_ccs) + if (has_aux_ccs(xe)) i = emit_render_cache_flush(job, dw, i); dw[i++] = preparser_disable(true); @@ -443,8 +462,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, i = emit_pipe_invalidate(job->q, mask_flags, job->ring_ops_flush_tlb, dw, i); /* hsdes: 1809175790 */ - if (aux_ccs) - i = emit_aux_table_inv(gt, CCS_AUX_INV, dw, i); + i = emit_aux_table_inv(job->q->hwe, dw, i); dw[i++] = preparser_disable(false); @@ -571,7 +589,11 @@ static const struct xe_ring_ops ring_ops_gen12_copy = { .emit_job = emit_job_gen12_copy, }; -static const struct xe_ring_ops ring_ops_gen12_video = { +static const struct xe_ring_ops ring_ops_gen12_video_decode = { + .emit_job = emit_job_gen12_video, +}; + +static const struct xe_ring_ops ring_ops_gen12_video_enhance = { .emit_job = emit_job_gen12_video, }; @@ -579,20 +601,47 @@ static const struct xe_ring_ops ring_ops_gen12_render_compute = { .emit_job = emit_job_gen12_render_compute, }; +static const struct xe_ring_ops auxccs_ring_ops_gen12_video_decode = { + .emit_job = emit_job_gen12_video, + .emit_aux_table_inv = emit_aux_table_inv_video_decode, +}; + +static const struct xe_ring_ops auxccs_ring_ops_gen12_video_enhance = { + .emit_job = emit_job_gen12_video, + .emit_aux_table_inv = emit_aux_table_inv_video_enhance, +}; + +static const struct xe_ring_ops auxccs_ring_ops_gen12_render_compute = { + .emit_job = emit_job_gen12_render_compute, + .emit_aux_table_inv = emit_aux_table_inv_render_compute, +}; + const struct xe_ring_ops * xe_ring_ops_get(struct xe_gt *gt, enum xe_engine_class class) { + struct xe_device *xe = gt_to_xe(gt); + switch (class) { case XE_ENGINE_CLASS_OTHER: return &ring_ops_gen12_gsc; case XE_ENGINE_CLASS_COPY: return &ring_ops_gen12_copy; case XE_ENGINE_CLASS_VIDEO_DECODE: + if (has_aux_ccs(xe)) + return &auxccs_ring_ops_gen12_video_decode; + else + return &ring_ops_gen12_video_decode; case XE_ENGINE_CLASS_VIDEO_ENHANCE: - return &ring_ops_gen12_video; + if (has_aux_ccs(xe)) + return &auxccs_ring_ops_gen12_video_enhance; + else + return &ring_ops_gen12_video_enhance; case XE_ENGINE_CLASS_RENDER: case XE_ENGINE_CLASS_COMPUTE: - return &ring_ops_gen12_render_compute; + if (has_aux_ccs(xe)) + return &auxccs_ring_ops_gen12_render_compute; + else + return &ring_ops_gen12_render_compute; default: return NULL; } diff --git a/drivers/gpu/drm/xe/xe_ring_ops_types.h b/drivers/gpu/drm/xe/xe_ring_ops_types.h index 1197fc0bf2af..52ff96bc4100 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops_types.h +++ b/drivers/gpu/drm/xe/xe_ring_ops_types.h @@ -6,6 +6,9 @@ #ifndef _XE_RING_OPS_TYPES_H_ #define _XE_RING_OPS_TYPES_H_ +#include + +struct xe_gt; struct xe_sched_job; #define MAX_JOB_SIZE_DW 74 @@ -17,6 +20,9 @@ struct xe_sched_job; struct xe_ring_ops { /** @emit_job: Write job to ring */ void (*emit_job)(struct xe_sched_job *job); + + /** @emit_aux_table_inv: Emit aux table invalidation to the ring */ + u32 *(*emit_aux_table_inv)(struct xe_gt *gt, u32 *cmd); }; #endif -- cgit v1.2.3 From 1d5945d74b4d4c8dcc884ad7f0ccbb377d777443 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 24 Mar 2026 08:40:13 +0000 Subject: drm/xe/xelp: Add AuxCCS invalidation to the indirect context workarounds Following from the i915 reference implementation, we add the AuxCCS invalidation to the indirect context workarounds page. Signed-off-by: Tvrtko Ursulin Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Link: https://patch.msgid.link/20260324084018.20353-8-tvrtko.ursulin@igalia.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_lrc.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index dde3bcff3c22..24f4c7210cfb 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -28,6 +28,7 @@ #include "xe_map.h" #include "xe_memirq.h" #include "xe_mmio.h" +#include "xe_ring_ops.h" #include "xe_sriov.h" #include "xe_trace_lrc.h" #include "xe_vm.h" @@ -94,6 +95,9 @@ gt_engine_needs_indirect_ctx(struct xe_gt *gt, enum xe_engine_class class) class, NULL)) return true; + if (gt->ring_ops[class]->emit_aux_table_inv) + return true; + return false; } @@ -1217,6 +1221,23 @@ static ssize_t setup_invalidate_state_cache_wa(struct xe_lrc *lrc, return cmd - batch; } +static ssize_t setup_invalidate_auxccs_wa(struct xe_lrc *lrc, + struct xe_hw_engine *hwe, + u32 *batch, size_t max_len) +{ + struct xe_gt *gt = lrc->gt; + u32 *(*emit)(struct xe_gt *gt, u32 *cmd) = + gt->ring_ops[hwe->class]->emit_aux_table_inv; + + if (!emit) + return 0; + + if (xe_gt_WARN_ON(gt, max_len < 8)) + return -ENOSPC; + + return emit(gt, batch) - batch; +} + struct bo_setup { ssize_t (*setup)(struct xe_lrc *lrc, struct xe_hw_engine *hwe, u32 *batch, size_t max_size); @@ -1349,9 +1370,11 @@ setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe) { static const struct bo_setup rcs_funcs[] = { { .setup = setup_timestamp_wa }, + { .setup = setup_invalidate_auxccs_wa }, { .setup = setup_configfs_mid_ctx_restore_bb }, }; static const struct bo_setup xcs_funcs[] = { + { .setup = setup_invalidate_auxccs_wa }, { .setup = setup_configfs_mid_ctx_restore_bb }, }; struct bo_setup_state state = { -- cgit v1.2.3 From 7c42193d9049ab7e3069e6e9ff94d8a37b774af9 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 24 Mar 2026 08:40:14 +0000 Subject: drm/xe/display: Move remapped plane loop out of __xe_pin_fb_vma_dpt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation for adding support for the auxccs plane lets move the plane iteration loop to its own function. Signed-off-by: Tvrtko Ursulin Cc: Juha-Pekka Heikkila Cc: Michael J. Ruhl Cc: Rodrigo Vivi Cc: Thomas Hellström Cc: Maarten Lankhorst Reviewed-by: Uma Shankar Link: https://patch.msgid.link/20260324084018.20353-9-tvrtko.ursulin@igalia.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/display/xe_fb_pin.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index df7d305c6fcd..845e28efce61 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -50,9 +50,9 @@ write_dpt_rotated(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, u32 bo_ } static void -write_dpt_remapped(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, - u32 bo_ofs, u32 width, u32 height, u32 src_stride, - u32 dst_stride) +write_dpt_remapped_tiled(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, + u32 bo_ofs, u32 width, u32 height, u32 src_stride, + u32 dst_stride) { struct xe_device *xe = xe_bo_device(bo); struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt; @@ -78,6 +78,22 @@ write_dpt_remapped(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, *dpt_ofs = ALIGN(*dpt_ofs, 4096); } +static void +write_dpt_remapped(struct xe_bo *bo, + const struct intel_remapped_info *remap_info, + struct iosys_map *map) +{ + u32 i, dpt_ofs = 0; + + for (i = 0; i < ARRAY_SIZE(remap_info->plane); i++) + write_dpt_remapped_tiled(bo, map, &dpt_ofs, + remap_info->plane[i].offset, + remap_info->plane[i].width, + remap_info->plane[i].height, + remap_info->plane[i].src_stride, + sremap_info->plane[i].dst_stride); +} + static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb, const struct i915_gtt_view *view, struct i915_vma *vma, @@ -138,17 +154,7 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb, iosys_map_wr(&dpt->vmap, x * 8, u64, pte | addr); } } else if (view->type == I915_GTT_VIEW_REMAPPED) { - const struct intel_remapped_info *remap_info = &view->remapped; - u32 i, dpt_ofs = 0; - - for (i = 0; i < ARRAY_SIZE(remap_info->plane); i++) - write_dpt_remapped(bo, &dpt->vmap, &dpt_ofs, - remap_info->plane[i].offset, - remap_info->plane[i].width, - remap_info->plane[i].height, - remap_info->plane[i].src_stride, - remap_info->plane[i].dst_stride); - + write_dpt_remapped(bo, &view->remapped, &dpt->vmap); } else { const struct intel_rotation_info *rot_info = &view->rotated; u32 i, dpt_ofs = 0; -- cgit v1.2.3 From 11dbd2d14663450111a6104a6d9aec5267405c86 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 24 Mar 2026 08:40:15 +0000 Subject: drm/xe/display: Change write_dpt_remapped_tiled function signature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation for adding support for the auxccs plane lets change the function signature of write_dpt_remapped_tiled(). This will enable a tidier way of extending it subsequent patches. Signed-off-by: Tvrtko Ursulin Cc: Juha-Pekka Heikkila Cc: Michael J. Ruhl Cc: Rodrigo Vivi Cc: Thomas Hellström Cc: Maarten Lankhorst Cc: Uma Shankar Reviewed-by: Uma Shankar Link: https://patch.msgid.link/20260324084018.20353-10-tvrtko.ursulin@igalia.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/display/xe_fb_pin.c | 60 ++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index 845e28efce61..ead70ee48028 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -49,33 +49,44 @@ write_dpt_rotated(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, u32 bo_ *dpt_ofs = ALIGN(*dpt_ofs, 4096); } -static void -write_dpt_remapped_tiled(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, - u32 bo_ofs, u32 width, u32 height, u32 src_stride, - u32 dst_stride) +static unsigned int +write_dpt_padding(struct iosys_map *map, unsigned int dest, unsigned int pad) +{ + /* The DE ignores the PTEs for the padding tiles */ + return dest + pad * sizeof(u64); +} + +static unsigned int +write_dpt_remapped_tiled(struct xe_bo *bo, struct iosys_map *map, + unsigned int dest, + const struct intel_remapped_plane_info *plane) { struct xe_device *xe = xe_bo_device(bo); struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt; - u32 column, row; - u64 pte = xe_ggtt_encode_pte_flags(ggtt, bo, xe->pat.idx[XE_CACHE_NONE]); + const u64 pte = xe_ggtt_encode_pte_flags(ggtt, bo, + xe->pat.idx[XE_CACHE_NONE]); + unsigned int offset, column, row; - for (row = 0; row < height; row++) { - u32 src_idx = src_stride * row + bo_ofs; + for (row = 0; row < plane->height; row++) { + offset = (plane->offset + plane->src_stride * row) * + XE_PAGE_SIZE; - for (column = 0; column < width; column++) { - u64 addr = xe_bo_addr(bo, src_idx * XE_PAGE_SIZE, XE_PAGE_SIZE); - iosys_map_wr(map, *dpt_ofs, u64, pte | addr); + for (column = 0; column < plane->width; column++) { + u64 addr = xe_bo_addr(bo, offset, XE_PAGE_SIZE); - *dpt_ofs += 8; - src_idx++; + iosys_map_wr(map, dest, u64, addr | pte); + dest += sizeof(u64); + offset += XE_PAGE_SIZE; } - /* The DE ignores the PTEs for the padding tiles */ - *dpt_ofs += (dst_stride - width) * 8; + dest = write_dpt_padding(map, dest, + plane->dst_stride - plane->width); } /* Align to next page */ - *dpt_ofs = ALIGN(*dpt_ofs, 4096); + dest = ALIGN(dest, XE_PAGE_SIZE); + + return dest; } static void @@ -83,15 +94,14 @@ write_dpt_remapped(struct xe_bo *bo, const struct intel_remapped_info *remap_info, struct iosys_map *map) { - u32 i, dpt_ofs = 0; - - for (i = 0; i < ARRAY_SIZE(remap_info->plane); i++) - write_dpt_remapped_tiled(bo, map, &dpt_ofs, - remap_info->plane[i].offset, - remap_info->plane[i].width, - remap_info->plane[i].height, - remap_info->plane[i].src_stride, - sremap_info->plane[i].dst_stride); + unsigned int i, dest = 0; + + for (i = 0; i < ARRAY_SIZE(remap_info->plane); i++) { + const struct intel_remapped_plane_info *plane = + &remap_info->plane[i]; + + dest = write_dpt_remapped_tiled(bo, map, dest, plane); + } } static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb, -- cgit v1.2.3 From cce1c47726579f0974e12e03e8471053d29182da Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 24 Mar 2026 08:40:16 +0000 Subject: drm/xe/display: Respect remapped plane alignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of assuming PAGE_SIZE alignment between the remapped planes respect the value set in the struct intel_remapped_info. Signed-off-by: Tvrtko Ursulin Cc: Juha-Pekka Heikkila Cc: Michael J. Ruhl Cc: Rodrigo Vivi Cc: Thomas Hellström Cc: Maarten Lankhorst Reviewed-by: Uma Shankar Link: https://patch.msgid.link/20260324084018.20353-11-tvrtko.ursulin@igalia.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/display/xe_fb_pin.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index ead70ee48028..23a7ec41f01d 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -83,9 +83,6 @@ write_dpt_remapped_tiled(struct xe_bo *bo, struct iosys_map *map, plane->dst_stride - plane->width); } - /* Align to next page */ - dest = ALIGN(dest, XE_PAGE_SIZE); - return dest; } @@ -100,6 +97,18 @@ write_dpt_remapped(struct xe_bo *bo, const struct intel_remapped_plane_info *plane = &remap_info->plane[i]; + if (!plane->linear && !plane->width && !plane->height) + continue; + + if (dest && remap_info->plane_alignment) { + const unsigned int index = dest / sizeof(u64); + const unsigned int pad = + ALIGN(index, remap_info->plane_alignment) - + index; + + dest = write_dpt_padding(map, dest, pad); + } + dest = write_dpt_remapped_tiled(bo, map, dest, plane); } } -- cgit v1.2.3 From 9110675732a3c2320ec172d17b2b3d78d250aed4 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 24 Mar 2026 08:40:17 +0000 Subject: drm/xe/display: Add support for AuxCCS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for mapping the auxiliary CCS buffer into the DPT page tables. This will allow for better power efficiency by enabling the render compression frame buffer modifiers such as I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS in a following patch. Signed-off-by: Tvrtko Ursulin Cc: Juha-Pekka Heikkila Cc: Michael J. Ruhl Cc: Rodrigo Vivi Cc: Thomas Hellström Cc: Maarten Lankhorst Reviewed-by: Uma Shankar Link: https://patch.msgid.link/20260324084018.20353-12-tvrtko.ursulin@igalia.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/display/xe_fb_pin.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index 23a7ec41f01d..e45a1e7a4670 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -56,6 +56,29 @@ write_dpt_padding(struct iosys_map *map, unsigned int dest, unsigned int pad) return dest + pad * sizeof(u64); } +static unsigned int +write_dpt_remapped_linear(struct xe_bo *bo, struct iosys_map *map, + unsigned int dest, + const struct intel_remapped_plane_info *plane) +{ + struct xe_device *xe = xe_bo_device(bo); + struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt; + const u64 pte = xe_ggtt_encode_pte_flags(ggtt, bo, + xe->pat.idx[XE_CACHE_NONE]); + unsigned int offset = plane->offset * XE_PAGE_SIZE; + unsigned int size = plane->size; + + while (size--) { + u64 addr = xe_bo_addr(bo, offset, XE_PAGE_SIZE); + + iosys_map_wr(map, dest, u64, addr | pte); + dest += sizeof(u64); + offset += XE_PAGE_SIZE; + } + + return dest; +} + static unsigned int write_dpt_remapped_tiled(struct xe_bo *bo, struct iosys_map *map, unsigned int dest, @@ -109,7 +132,10 @@ write_dpt_remapped(struct xe_bo *bo, dest = write_dpt_padding(map, dest, pad); } - dest = write_dpt_remapped_tiled(bo, map, dest, plane); + if (plane->linear) + dest = write_dpt_remapped_linear(bo, map, dest, plane); + else + dest = write_dpt_remapped_tiled(bo, map, dest, plane); } } -- cgit v1.2.3 From 737ec1e81a816731ba74b3f510441b1f4d5068f6 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 24 Mar 2026 08:40:18 +0000 Subject: drm/xe/xelp: Expose AuxCCS frame buffer modifiers on Alderlake-P MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we have implemented all the related missing bits we can enable the AuxCCS compressed modifiers which were disabled in cf48bddd31de ("drm/i915/display: Disable AuxCCS framebuffers if built for Xe"). Tested with KDE Wayland, on Lenovo Carbon X1 ADL-P: [PLANE:32:plane 1A]: type=PRI uapi: [FB:242] AR30 little-endian (0x30335241),0x100000000000008,2880x1800, visible=visible, src=28 hw: [FB:242] AR30 little-endian (0x30335241),0x100000000000008,2880x1800, visible=yes, src=2880.000 Display is working fine - no artefacts, no DMAR/PIPE faults. v2: * Adjust patch title. (Rodrigo) v3: * Complete rewrite based on the display parent interface. Signed-off-by: Tvrtko Ursulin References: cf48bddd31de ("drm/i915/display: Disable AuxCCS framebuffers if built for Xe") Cc: Jani Nikula Cc: José Roberto de Souza Cc: Juha-Pekka Heikkila Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Link: https://patch.msgid.link/20260324084018.20353-13-tvrtko.ursulin@igalia.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/display/xe_display.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 49b6f98e7391..a0a4ddf3bb46 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -541,6 +541,13 @@ static const struct intel_display_irq_interface xe_display_irq_interface = { .synchronize = irq_synchronize, }; +static bool has_auxccs(struct drm_device *drm) +{ + struct xe_device *xe = to_xe_device(drm); + + return xe->info.platform == XE_ALDERLAKE_P; +} + static const struct intel_display_parent_interface parent = { .bo = &xe_display_bo_interface, .dsb = &xe_display_dsb_interface, @@ -552,6 +559,7 @@ static const struct intel_display_parent_interface parent = { .pcode = &xe_display_pcode_interface, .rpm = &xe_display_rpm_interface, .stolen = &xe_display_stolen_interface, + .has_auxccs = has_auxccs, }; /** -- cgit v1.2.3 From aec6969f75afbf4e01fd5fb5850ed3e9c27043ac Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 18 Mar 2026 10:02:09 +0000 Subject: drm/xe: always keep track of remap prev/next During 3D workload, user is reporting hitting: [ 413.361679] WARNING: drivers/gpu/drm/xe/xe_vm.c:1217 at vm_bind_ioctl_ops_unwind+0x1e2/0x2e0 [xe], CPU#7: vkd3d_queue/9925 [ 413.361944] CPU: 7 UID: 1000 PID: 9925 Comm: vkd3d_queue Kdump: loaded Not tainted 7.0.0-070000rc3-generic #202603090038 PREEMPT(lazy) [ 413.361949] RIP: 0010:vm_bind_ioctl_ops_unwind+0x1e2/0x2e0 [xe] [ 413.362074] RSP: 0018:ffffd4c25c3df930 EFLAGS: 00010282 [ 413.362077] RAX: 0000000000000000 RBX: ffff8f3ee817ed10 RCX: 0000000000000000 [ 413.362078] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 [ 413.362079] RBP: ffffd4c25c3df980 R08: 0000000000000000 R09: 0000000000000000 [ 413.362081] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8f41fbf99380 [ 413.362082] R13: ffff8f3ee817e968 R14: 00000000ffffffef R15: ffff8f43d00bd380 [ 413.362083] FS: 00000001040ff6c0(0000) GS:ffff8f4696d89000(0000) knlGS:00000000330b0000 [ 413.362085] CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033 [ 413.362086] CR2: 00007ddfc4747000 CR3: 00000002e6262005 CR4: 0000000000f72ef0 [ 413.362088] PKRU: 55555554 [ 413.362089] Call Trace: [ 413.362092] [ 413.362096] xe_vm_bind_ioctl+0xa9a/0xc60 [xe] Which seems to hint that the vma we are re-inserting for the ops unwind is either invalid or overlapping with something already inserted in the vm. It shouldn't be invalid since this is a re-insertion, so must have worked before. Leaving the likely culprit as something already placed where we want to insert the vma. Following from that, for the case where we do something like a rebind in the middle of a vma, and one or both mapped ends are already compatible, we skip doing the rebind of those vma and set next/prev to NULL. As well as then adjust the original unmap va range, to avoid unmapping the ends. However, if we trigger the unwind path, we end up with three va, with the two ends never being removed and the original va range in the middle still being the shrunken size. If this occurs, one failure mode is when another unwind op needs to interact with that range, which can happen with a vector of binds. For example, if we need to re-insert something in place of the original va. In this case the va is still the shrunken version, so when removing it and then doing a re-insert it can overlap with the ends, which were never removed, triggering a warning like above, plus leaving the vm in a bad state. With that, we need two things here: 1) Stop nuking the prev/next tracking for the skip cases. Instead relying on checking for skip prev/next, where needed. That way on the unwind path, we now correctly remove both ends. 2) Undo the unmap va shrinkage, on the unwind path. With the two ends now removed the unmap va should expand back to the original size again, before re-insertion. v2: - Update the explanation in the commit message, based on an actual IGT of triggering this issue, rather than conjecture. - Also undo the unmap shrinkage, for the skip case. With the two ends now removed, the original unmap va range should expand back to the original range. v3: - Track the old start/range separately. vma_size/start() uses the va info directly. Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/7602 Fixes: 8f33b4f054fc ("drm/xe: Avoid doing rebinds") Signed-off-by: Matthew Auld Cc: Matthew Brost Cc: # v6.8+ Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20260318100208.78097-2-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_pt.c | 12 ++++++------ drivers/gpu/drm/xe/xe_vm.c | 22 ++++++++++++++++++---- drivers/gpu/drm/xe/xe_vm_types.h | 4 ++++ 3 files changed, 28 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 2d9ce2c4cb4f..713a303c9053 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1442,9 +1442,9 @@ static int op_check_svm_userptr(struct xe_vm *vm, struct xe_vma_op *op, err = vma_check_userptr(vm, op->map.vma, pt_update); break; case DRM_GPUVA_OP_REMAP: - if (op->remap.prev) + if (op->remap.prev && !op->remap.skip_prev) err = vma_check_userptr(vm, op->remap.prev, pt_update); - if (!err && op->remap.next) + if (!err && op->remap.next && !op->remap.skip_next) err = vma_check_userptr(vm, op->remap.next, pt_update); break; case DRM_GPUVA_OP_UNMAP: @@ -2198,12 +2198,12 @@ static int op_prepare(struct xe_vm *vm, err = unbind_op_prepare(tile, pt_update_ops, old); - if (!err && op->remap.prev) { + if (!err && op->remap.prev && !op->remap.skip_prev) { err = bind_op_prepare(vm, tile, pt_update_ops, op->remap.prev, false); pt_update_ops->wait_vm_bookkeep = true; } - if (!err && op->remap.next) { + if (!err && op->remap.next && !op->remap.skip_next) { err = bind_op_prepare(vm, tile, pt_update_ops, op->remap.next, false); pt_update_ops->wait_vm_bookkeep = true; @@ -2428,10 +2428,10 @@ static void op_commit(struct xe_vm *vm, unbind_op_commit(vm, tile, pt_update_ops, old, fence, fence2); - if (op->remap.prev) + if (op->remap.prev && !op->remap.skip_prev) bind_op_commit(vm, tile, pt_update_ops, op->remap.prev, fence, fence2, false); - if (op->remap.next) + if (op->remap.next && !op->remap.skip_next) bind_op_commit(vm, tile, pt_update_ops, op->remap.next, fence, fence2, false); break; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index e24436287786..ff162e4fc84b 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2584,7 +2584,6 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) if (!err && op->remap.skip_prev) { op->remap.prev->tile_present = tile_present; - op->remap.prev = NULL; } } if (op->remap.next) { @@ -2594,11 +2593,13 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) if (!err && op->remap.skip_next) { op->remap.next->tile_present = tile_present; - op->remap.next = NULL; } } - /* Adjust for partial unbind after removing VMA from VM */ + /* + * Adjust for partial unbind after removing VMA from VM. In case + * of unwind we might need to undo this later. + */ if (!err) { op->base.remap.unmap->va->va.addr = op->remap.start; op->base.remap.unmap->va->va.range = op->remap.range; @@ -2717,6 +2718,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, op->remap.start = xe_vma_start(old); op->remap.range = xe_vma_size(old); + op->remap.old_start = op->remap.start; + op->remap.old_range = op->remap.range; flags |= op->base.remap.unmap->va->flags & XE_VMA_CREATE_MASK; if (op->base.remap.prev) { @@ -2865,8 +2868,19 @@ static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, xe_svm_notifier_lock(vm); vma->gpuva.flags &= ~XE_VMA_DESTROYED; xe_svm_notifier_unlock(vm); - if (post_commit) + if (post_commit) { + /* + * Restore the old va range, in case of the + * prev/next skip optimisation. Otherwise what + * we re-insert here could be smaller than the + * original range. + */ + op->base.remap.unmap->va->va.addr = + op->remap.old_start; + op->base.remap.unmap->va->va.range = + op->remap.old_range; xe_vm_insert_vma(vm, vma); + } } break; } diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 69e80c94138a..fc811b5e308c 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -393,6 +393,10 @@ struct xe_vma_op_remap { u64 start; /** @range: range of the VMA unmap */ u64 range; + /** @old_start: Original start of the VMA we unmap */ + u64 old_start; + /** @old_range: Original range of the VMA we unmap */ + u64 old_range; /** @skip_prev: skip prev rebind */ bool skip_prev; /** @skip_next: skip next rebind */ -- cgit v1.2.3 From 714ee6754ac5fa3dc078856a196a6b124cd797a0 Mon Sep 17 00:00:00 2001 From: Jonathan Cavitt Date: Tue, 24 Mar 2026 15:29:37 +0000 Subject: drm/xe/xe_pagefault: Disallow writes to read-only VMAs The page fault handler should reject write/atomic access to read only VMAs. Add code to handle this in xe_pagefault_service after the VMA lookup. v2: - Apply max line length (Matthew) Fixes: fb544b844508 ("drm/xe: Implement xe_pagefault_queue_work") Signed-off-by: Jonathan Cavitt Suggested-by: Matthew Brost Cc: Shuicheng Lin Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20260324152935.72444-7-jonathan.cavitt@intel.com --- drivers/gpu/drm/xe/xe_pagefault.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_pagefault.c b/drivers/gpu/drm/xe/xe_pagefault.c index ea4857acf28d..918d595d1c1b 100644 --- a/drivers/gpu/drm/xe/xe_pagefault.c +++ b/drivers/gpu/drm/xe/xe_pagefault.c @@ -187,6 +187,12 @@ static int xe_pagefault_service(struct xe_pagefault *pf) goto unlock_vm; } + if (xe_vma_read_only(vma) && + pf->consumer.access_type != XE_PAGEFAULT_ACCESS_TYPE_READ) { + err = -EPERM; + goto unlock_vm; + } + atomic = xe_pagefault_access_is_atomic(pf->consumer.access_type); if (xe_vma_is_cpu_addr_mirror(vma)) -- cgit v1.2.3 From 716c11d462c598328edca2565e8d25457c6ee3d6 Mon Sep 17 00:00:00 2001 From: Jonathan Cavitt Date: Tue, 24 Mar 2026 15:29:38 +0000 Subject: drm/xe/uapi: Define drm_xe_vm_get_property MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add initial declarations for the drm_xe_vm_get_property ioctl. v2: - Expand kernel docs for drm_xe_vm_get_property (Jianxun) v3: - Remove address type external definitions (Jianxun) - Add fault type to xe_drm_fault struct (Jianxun) v4: - Remove engine class and instance (Ivan) v5: - Add declares for fault type, access type, and fault level (Matt Brost, Ivan) v6: - Fix inconsistent use of whitespace in defines v7: - Rebase and refactor (jcavitt) v8: - Rebase (jcavitt) v9: - Clarify address is canonical (José) v10: - s/uAPI/Link in the commit log links Link: https://github.com/intel/compute-runtime/pull/878 Signed-off-by: Jonathan Cavitt Reviewed-by: Shuicheng Lin Acked-by: Matthew Brost Acked-by: Ivan Briano Acked-by: José Roberto de Souza Cc: Zhang Jianxun Cc: Ivan Briano Cc: Matthew Brost Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20260324152935.72444-8-jonathan.cavitt@intel.com --- include/uapi/drm/xe_drm.h | 86 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 7014dde1c9c4..6c99514a85e1 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -83,6 +83,7 @@ extern "C" { * - &DRM_IOCTL_XE_OBSERVATION * - &DRM_IOCTL_XE_MADVISE * - &DRM_IOCTL_XE_VM_QUERY_MEM_RANGE_ATTRS + * - &DRM_IOCTL_XE_VM_GET_PROPERTY */ /* @@ -107,6 +108,7 @@ extern "C" { #define DRM_XE_MADVISE 0x0c #define DRM_XE_VM_QUERY_MEM_RANGE_ATTRS 0x0d #define DRM_XE_EXEC_QUEUE_SET_PROPERTY 0x0e +#define DRM_XE_VM_GET_PROPERTY 0x0f /* Must be kept compact -- no holes */ @@ -125,6 +127,7 @@ extern "C" { #define DRM_IOCTL_XE_MADVISE DRM_IOW(DRM_COMMAND_BASE + DRM_XE_MADVISE, struct drm_xe_madvise) #define DRM_IOCTL_XE_VM_QUERY_MEM_RANGE_ATTRS DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_VM_QUERY_MEM_RANGE_ATTRS, struct drm_xe_vm_query_mem_range_attr) #define DRM_IOCTL_XE_EXEC_QUEUE_SET_PROPERTY DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_SET_PROPERTY, struct drm_xe_exec_queue_set_property) +#define DRM_IOCTL_XE_VM_GET_PROPERTY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_VM_GET_PROPERTY, struct drm_xe_vm_get_property) /** * DOC: Xe IOCTL Extensions @@ -1263,6 +1266,89 @@ struct drm_xe_vm_bind { __u64 reserved[2]; }; +/** struct xe_vm_fault - Describes faults for %DRM_XE_VM_GET_PROPERTY_FAULTS */ +struct xe_vm_fault { + /** @address: Canonical address of the fault */ + __u64 address; + /** @address_precision: Precision of faulted address */ + __u32 address_precision; + /** @access_type: Type of address access that resulted in fault */ +#define FAULT_ACCESS_TYPE_READ 0 +#define FAULT_ACCESS_TYPE_WRITE 1 +#define FAULT_ACCESS_TYPE_ATOMIC 2 + __u8 access_type; + /** @fault_type: Type of fault reported */ +#define FAULT_TYPE_NOT_PRESENT 0 +#define FAULT_TYPE_WRITE_ACCESS 1 +#define FAULT_TYPE_ATOMIC_ACCESS 2 + __u8 fault_type; + /** @fault_level: fault level of the fault */ +#define FAULT_LEVEL_PTE 0 +#define FAULT_LEVEL_PDE 1 +#define FAULT_LEVEL_PDP 2 +#define FAULT_LEVEL_PML4 3 +#define FAULT_LEVEL_PML5 4 + __u8 fault_level; + /** @pad: MBZ */ + __u8 pad; + /** @reserved: MBZ */ + __u64 reserved[4]; +}; + +/** + * struct drm_xe_vm_get_property - Input of &DRM_IOCTL_XE_VM_GET_PROPERTY + * + * The user provides a VM and a property to query among DRM_XE_VM_GET_PROPERTY_*, + * and sets the values in the vm_id and property members, respectively. This + * determines both the VM to get the property of, as well as the property to + * report. + * + * If size is set to 0, the driver fills it with the required size for the + * requested property. The user is expected here to allocate memory for the + * property structure and to provide a pointer to the allocated memory using the + * data member. For some properties, this may be zero, in which case, the + * value of the property will be saved to the value member and size will remain + * zero on return. + * + * If size is not zero, then the IOCTL will attempt to copy the requested + * property into the data member. + * + * The IOCTL will return -ENOENT if the VM could not be identified from the + * provided VM ID, or -EINVAL if the IOCTL fails for any other reason, such as + * providing an invalid size for the given property or if the property data + * could not be copied to the memory allocated to the data member. + * + * The property member can be: + * - %DRM_XE_VM_GET_PROPERTY_FAULTS + */ +struct drm_xe_vm_get_property { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @vm_id: The ID of the VM to query the properties of */ + __u32 vm_id; + +#define DRM_XE_VM_GET_PROPERTY_FAULTS 0 + /** @property: property to get */ + __u32 property; + + /** @size: Size to allocate for @data */ + __u32 size; + + /** @pad: MBZ */ + __u32 pad; + + union { + /** @data: Pointer to user-defined array of flexible size and type */ + __u64 data; + /** @value: Return value for scalar queries */ + __u64 value; + }; + + /** @reserved: MBZ */ + __u64 reserved[3]; +}; + /** * struct drm_xe_exec_queue_create - Input of &DRM_IOCTL_XE_EXEC_QUEUE_CREATE * -- cgit v1.2.3 From 64c732ee2a00a2d6a2693ed25663fa0544c56ba8 Mon Sep 17 00:00:00 2001 From: Jonathan Cavitt Date: Tue, 24 Mar 2026 15:29:39 +0000 Subject: drm/xe/xe_vm: Add per VM fault info Add additional information to each VM so they can report up to the first 50 seen faults. Only pagefaults are saved this way currently, though in the future, all faults should be tracked by the VM for future reporting. Additionally, of the pagefaults reported, only failed pagefaults are saved this way, as successful pagefaults should recover silently and not need to be reported to userspace. v2: - Free vm after use (Shuicheng) - Compress pf copy logic (Shuicheng) - Update fault_unsuccessful before storing (Shuicheng) - Fix old struct name in comments (Shuicheng) - Keep first 50 pagefaults instead of last 50 (Jianxun) v3: - Avoid unnecessary execution by checking MAX_PFS earlier (jcavitt) - Fix double-locking error (jcavitt) - Assert kmemdump is successful (Shuicheng) v4: - Rename xe_vm.pfs to xe_vm.faults (jcavitt) - Store fault data and not pagefault in xe_vm faults list (jcavitt) - Store address, address type, and address precision per fault (jcavitt) - Store engine class and instance data per fault (Jianxun) - Add and fix kernel docs (Michal W) - Properly handle kzalloc error (Michal W) - s/MAX_PFS/MAX_FAULTS_SAVED_PER_VM (Michal W) - Store fault level per fault (Micahl M) v5: - Store fault and access type instead of address type (Jianxun) v6: - Store pagefaults in non-fault-mode VMs as well (Jianxun) v7: - Fix kernel docs and comments (Michal W) v8: - Fix double-locking issue (Jianxun) v9: - Do not report faults from reserved engines (Jianxun) v10: - Remove engine class and instance (Ivan) v11: - Perform kzalloc outside of lock (Auld) v12: - Fix xe_vm_fault_entry kernel docs (Shuicheng) v13: - Rebase and refactor (jcavitt) v14: - Correctly ignore fault mode in save_pagefault_to_vm (jcavitt) v15: - s/save_pagefault_to_vm/xe_pagefault_save_to_vm (Matt Brost) - Use guard instead of spin_lock/unlock (Matt Brost) - GT was added to xe_pagefault struct. Use xe_gt_hw_engine instead of creating a new helper function (Matt Brost) v16: - Set address precision programmatically (Matt Brost) v17: - Set address precision to fixed value (Matt Brost) v18: - s/uAPI/Link in commit log links - Use kzalloc_obj Link: https://github.com/intel/compute-runtime/pull/878 Signed-off-by: Jonathan Cavitt Suggested-by: Matthew Brost Reviewed-by: Matthew Brost Cc: Shuicheng Lin Cc: Jianxun Zhang Cc: Michal Wajdeczko Cc: Michal Mzorek Cc: Ivan Briano Cc: Matthew Auld Cc: Matthew Brost Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20260324152935.72444-9-jonathan.cavitt@intel.com --- drivers/gpu/drm/xe/xe_pagefault.c | 26 ++++++++++++++ drivers/gpu/drm/xe/xe_vm.c | 74 +++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_vm.h | 9 +++++ drivers/gpu/drm/xe/xe_vm_types.h | 29 +++++++++++++++ 4 files changed, 138 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_pagefault.c b/drivers/gpu/drm/xe/xe_pagefault.c index 918d595d1c1b..2fd55d7c98f9 100644 --- a/drivers/gpu/drm/xe/xe_pagefault.c +++ b/drivers/gpu/drm/xe/xe_pagefault.c @@ -250,6 +250,31 @@ static void xe_pagefault_print(struct xe_pagefault *pf) pf->consumer.engine_instance); } +static void xe_pagefault_save_to_vm(struct xe_device *xe, struct xe_pagefault *pf) +{ + struct xe_vm *vm; + + /* + * Pagefault may be asociated to VM that is not in fault mode. + * Perform asid_to_vm behavior, except if VM is not in fault + * mode, return VM anyways. + */ + down_read(&xe->usm.lock); + vm = xa_load(&xe->usm.asid_to_vm, pf->consumer.asid); + if (vm) + xe_vm_get(vm); + else + vm = ERR_PTR(-EINVAL); + up_read(&xe->usm.lock); + + if (IS_ERR(vm)) + return; + + xe_vm_add_fault_entry_pf(vm, pf); + + xe_vm_put(vm); +} + static void xe_pagefault_queue_work(struct work_struct *w) { struct xe_pagefault_queue *pf_queue = @@ -268,6 +293,7 @@ static void xe_pagefault_queue_work(struct work_struct *w) err = xe_pagefault_service(&pf); if (err) { + xe_pagefault_save_to_vm(gt_to_xe(pf.gt), &pf); if (!(pf.consumer.access_type & XE_PAGEFAULT_ACCESS_PREFETCH)) { xe_pagefault_print(&pf); xe_gt_info(pf.gt, "Fault response: Unsuccessful %pe\n", diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index ff162e4fc84b..5ed98ec8674b 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -27,6 +27,7 @@ #include "xe_device.h" #include "xe_drm_client.h" #include "xe_exec_queue.h" +#include "xe_gt.h" #include "xe_migrate.h" #include "xe_pat.h" #include "xe_pm.h" @@ -577,6 +578,74 @@ out_unlock_outer: trace_xe_vm_rebind_worker_exit(vm); } +/** + * xe_vm_add_fault_entry_pf() - Add pagefault to vm fault list + * @vm: The VM. + * @pf: The pagefault. + * + * This function takes the data from the pagefault @pf and saves it to @vm->faults.list. + * + * The function exits silently if the list is full, and reports a warning if the pagefault + * could not be saved to the list. + */ +void xe_vm_add_fault_entry_pf(struct xe_vm *vm, struct xe_pagefault *pf) +{ + struct xe_vm_fault_entry *e; + struct xe_hw_engine *hwe; + + /* Do not report faults on reserved engines */ + hwe = xe_gt_hw_engine(pf->gt, pf->consumer.engine_class, + pf->consumer.engine_instance, false); + if (!hwe || xe_hw_engine_is_reserved(hwe)) + return; + + e = kzalloc_obj(*e); + if (!e) { + drm_warn(&vm->xe->drm, + "Could not allocate memory for fault!\n"); + return; + } + + guard(spinlock)(&vm->faults.lock); + + /* + * Limit the number of faults in the fault list to prevent + * memory overuse. + */ + if (vm->faults.len >= MAX_FAULTS_SAVED_PER_VM) { + kfree(e); + return; + } + + e->address = pf->consumer.page_addr; + /* + * TODO: + * Address precision is currently always SZ_4K, but this may change + * in the future. + */ + e->address_precision = SZ_4K; + e->access_type = pf->consumer.access_type; + e->fault_type = FIELD_GET(XE_PAGEFAULT_TYPE_MASK, + pf->consumer.fault_type_level), + e->fault_level = FIELD_GET(XE_PAGEFAULT_LEVEL_MASK, + pf->consumer.fault_type_level), + + list_add_tail(&e->list, &vm->faults.list); + vm->faults.len++; +} + +static void xe_vm_clear_fault_entries(struct xe_vm *vm) +{ + struct xe_vm_fault_entry *e, *tmp; + + guard(spinlock)(&vm->faults.lock); + list_for_each_entry_safe(e, tmp, &vm->faults.list, list) { + list_del(&e->list); + kfree(e); + } + vm->faults.len = 0; +} + static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) { int i; @@ -1538,6 +1607,9 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) INIT_LIST_HEAD(&vm->userptr.invalidated); spin_lock_init(&vm->userptr.invalidated_lock); + INIT_LIST_HEAD(&vm->faults.list); + spin_lock_init(&vm->faults.lock); + ttm_lru_bulk_move_init(&vm->lru_bulk_move); INIT_WORK(&vm->destroy_work, vm_destroy_work_func); @@ -1854,6 +1926,8 @@ void xe_vm_close_and_put(struct xe_vm *vm) } up_write(&xe->usm.lock); + xe_vm_clear_fault_entries(vm); + for_each_tile(tile, xe, id) xe_range_fence_tree_fini(&vm->rftree[id]); diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 0bc7ed23eeae..42767d2aebac 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -12,6 +12,12 @@ #include "xe_map.h" #include "xe_vm_types.h" +/** + * MAX_FAULTS_SAVED_PER_VM - Maximum number of faults each vm can store before future + * faults are discarded to prevent memory overuse + */ +#define MAX_FAULTS_SAVED_PER_VM 50 + struct drm_device; struct drm_printer; struct drm_file; @@ -22,6 +28,7 @@ struct dma_fence; struct xe_exec_queue; struct xe_file; +struct xe_pagefault; struct xe_sync_entry; struct xe_svm_range; struct drm_exec; @@ -318,6 +325,8 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap); void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p); void xe_vm_snapshot_free(struct xe_vm_snapshot *snap); +void xe_vm_add_fault_entry_pf(struct xe_vm *vm, struct xe_pagefault *pf); + /** * xe_vm_set_validating() - Register this task as currently making bos resident * @allow_res_evict: Allow eviction of buffer objects bound to @vm when diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index fc811b5e308c..3ab2cef25426 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -24,6 +24,7 @@ struct drm_pagemap; struct xe_bo; +struct xe_pagefault; struct xe_svm_range; struct xe_sync_entry; struct xe_user_fence; @@ -176,6 +177,24 @@ struct xe_userptr_vma { struct xe_device; +/** + * struct xe_vm_fault_entry - Elements of vm->faults.list + * @list: link into @xe_vm.faults.list + * @address: address of the fault + * @address_precision: precision of faulted address + * @access_type: type of address access that resulted in fault + * @fault_type: type of fault reported + * @fault_level: fault level of the fault + */ +struct xe_vm_fault_entry { + struct list_head list; + u64 address; + u32 address_precision; + u8 access_type; + u8 fault_type; + u8 fault_level; +}; + struct xe_vm { /** @gpuvm: base GPUVM used to track VMAs */ struct drm_gpuvm gpuvm; @@ -333,6 +352,16 @@ struct xe_vm { bool capture_once; } error_capture; + /** @faults: List of all faults associated with this VM */ + struct { + /** @faults.lock: lock protecting @faults.list */ + spinlock_t lock; + /** @faults.list: list of xe_vm_fault_entry entries */ + struct list_head list; + /** @faults.len: length of @faults.list */ + unsigned int len; + } faults; + /** * @validation: Validation data only valid with the vm resv held. * Note: This is really task state of the task holding the vm resv, -- cgit v1.2.3 From 50c577eab051638fbe8989fae1f826ecc1d2e2c7 Mon Sep 17 00:00:00 2001 From: Jonathan Cavitt Date: Tue, 24 Mar 2026 15:29:40 +0000 Subject: drm/xe/xe_vm: Implement xe_vm_get_property_ioctl Add support for userspace to request a list of observed faults from a specified VM. v2: - Only allow querying of failed pagefaults (Matt Brost) v3: - Remove unnecessary size parameter from helper function, as it is a property of the arguments. (jcavitt) - Remove unnecessary copy_from_user (Jainxun) - Set address_precision to 1 (Jainxun) - Report max size instead of dynamic size for memory allocation purposes. Total memory usage is reported separately. v4: - Return int from xe_vm_get_property_size (Shuicheng) - Fix memory leak (Shuicheng) - Remove unnecessary size variable (jcavitt) v5: - Rename ioctl to xe_vm_get_faults_ioctl (jcavitt) - Update fill_property_pfs to eliminate need for kzalloc (Jianxun) v6: - Repair and move fill_faults break condition (Dan Carpenter) - Free vm after use (jcavitt) - Combine assertions (jcavitt) - Expand size check in xe_vm_get_faults_ioctl (jcavitt) - Remove return mask from fill_faults, as return is already -EFAULT or 0 (jcavitt) v7: - Revert back to using xe_vm_get_property_ioctl - Apply better copy_to_user logic (jcavitt) v8: - Fix and clean up error value handling in ioctl (jcavitt) - Reapply return mask for fill_faults (jcavitt) v9: - Future-proof size logic for zero-size properties (jcavitt) - Add access and fault types (Jianxun) - Remove address type (Jianxun) v10: - Remove unnecessary switch case logic (Raag) - Compress size get, size validation, and property fill functions into a single helper function (jcavitt) - Assert valid size (jcavitt) v11: - Remove unnecessary else condition - Correct backwards helper function size logic (jcavitt) v12: - Use size_t instead of int (Raag) v13: - Remove engine class and instance (Ivan) v14: - Map access type, fault type, and fault level to user macros (Matt Brost, Ivan) v15: - Remove unnecessary size assertion (jcavitt) v16: - Nit fixes (Matt Brost) v17: - Rebase and refactor (jcavitt) v18: - Do not copy_to_user in critical section (Matt Brost) - Assert args->size is multiple of sizeof(struct xe_vm_fault) (Matt Brost) v19: - Remove unnecessary memset (Matt Brost) v20: - Report canonicalized address (Jose) - Mask out prefetch data from access type (Jose, jcavitt) v21: - s/uAPI/Link in the commit log links - Align debug parameters Link: https://github.com/intel/compute-runtime/pull/878 Signed-off-by: Jonathan Cavitt Suggested-by: Matthew Brost Reviewed-by: Matthew Brost Acked-by: Michal Mrozek Cc: Jainxun Zhang Cc: Shuicheng Lin Cc: Raag Jadav Cc: Ivan Briano Cc: Jose Souza Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20260324152935.72444-10-jonathan.cavitt@intel.com --- drivers/gpu/drm/xe/xe_device.c | 2 + drivers/gpu/drm/xe/xe_vm.c | 117 +++++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_vm.h | 3 ++ 3 files changed, 122 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 05fa16044f9b..041e014ed92c 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -211,6 +211,8 @@ static const struct drm_ioctl_desc xe_ioctls[] = { DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_SET_PROPERTY, xe_exec_queue_set_property_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_VM_GET_PROPERTY, xe_vm_get_property_ioctl, + DRM_RENDER_ALLOW), }; static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 5ed98ec8674b..d96e0a0c5605 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3974,6 +3974,123 @@ put_vm: return err; } +/* + * Map access type, fault type, and fault level from current bspec + * specification to user spec abstraction. The current mapping is + * approximately 1-to-1, with access type being the only notable + * exception as it carries additional data with respect to prefetch + * status that needs to be masked out. + */ +static u8 xe_to_user_access_type(u8 access_type) +{ + return access_type & XE_PAGEFAULT_ACCESS_TYPE_MASK; +} + +static u8 xe_to_user_fault_type(u8 fault_type) +{ + return fault_type; +} + +static u8 xe_to_user_fault_level(u8 fault_level) +{ + return fault_level; +} + +static int fill_faults(struct xe_vm *vm, + struct drm_xe_vm_get_property *args) +{ + struct xe_vm_fault __user *usr_ptr = u64_to_user_ptr(args->data); + struct xe_vm_fault *fault_list, fault_entry = { 0 }; + struct xe_vm_fault_entry *entry; + int ret = 0, i = 0, count, entry_size; + + entry_size = sizeof(struct xe_vm_fault); + count = args->size / entry_size; + + fault_list = kcalloc(count, sizeof(struct xe_vm_fault), GFP_KERNEL); + if (!fault_list) + return -ENOMEM; + + spin_lock(&vm->faults.lock); + list_for_each_entry(entry, &vm->faults.list, list) { + if (i == count) + break; + + fault_entry.address = xe_device_canonicalize_addr(vm->xe, entry->address); + fault_entry.address_precision = entry->address_precision; + + fault_entry.access_type = xe_to_user_access_type(entry->access_type); + fault_entry.fault_type = xe_to_user_fault_type(entry->fault_type); + fault_entry.fault_level = xe_to_user_fault_level(entry->fault_level); + + memcpy(&fault_list[i], &fault_entry, entry_size); + + i++; + } + spin_unlock(&vm->faults.lock); + + ret = copy_to_user(usr_ptr, fault_list, args->size); + + kfree(fault_list); + return ret ? -EFAULT : 0; +} + +static int xe_vm_get_property_helper(struct xe_vm *vm, + struct drm_xe_vm_get_property *args) +{ + size_t size; + + switch (args->property) { + case DRM_XE_VM_GET_PROPERTY_FAULTS: + spin_lock(&vm->faults.lock); + size = size_mul(sizeof(struct xe_vm_fault), vm->faults.len); + spin_unlock(&vm->faults.lock); + + if (!args->size) { + args->size = size; + return 0; + } + + /* + * Number of faults may increase between calls to + * xe_vm_get_property_ioctl, so just report the number of + * faults the user requests if it's less than or equal to + * the number of faults in the VM fault array. + * + * We should also at least assert that the args->size value + * is a multiple of the xe_vm_fault struct size. + */ + if (args->size > size || args->size % sizeof(struct xe_vm_fault)) + return -EINVAL; + + return fill_faults(vm, args); + } + return -EINVAL; +} + +int xe_vm_get_property_ioctl(struct drm_device *drm, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(drm); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_vm_get_property *args = data; + struct xe_vm *vm; + int ret = 0; + + if (XE_IOCTL_DBG(xe, (args->reserved[0] || args->reserved[1] || + args->reserved[2]))) + return -EINVAL; + + vm = xe_vm_lookup(xef, args->vm_id); + if (XE_IOCTL_DBG(xe, !vm)) + return -ENOENT; + + ret = xe_vm_get_property_helper(vm, args); + + xe_vm_put(vm); + return ret; +} + /** * xe_vm_bind_kernel_bo - bind a kernel BO to a VM * @vm: VM to bind the BO to diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 42767d2aebac..c5b900f38ded 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -210,6 +210,9 @@ int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int xe_vm_get_property_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); + void xe_vm_close_and_put(struct xe_vm *vm); static inline bool xe_vm_in_fault_mode(struct xe_vm *vm) -- cgit v1.2.3 From 4e966014ce8826bb7d0180394f40b643b1405925 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Wed, 25 Mar 2026 17:01:52 +0100 Subject: drm/xe: Add new SVM copy GT stats per size Breakdown the GT stats for copy to host and copy to device per size (4K, 64K 2M) to make it easier for user space to track memory migrations. This is helpful to verify allocation alignment is correct when porting applications to SVM. Cc: Matthew Brost Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20260325160152.1057556-1-francois.dugast@intel.com --- drivers/gpu/drm/xe/xe_gt_stats.c | 6 ++++++ drivers/gpu/drm/xe/xe_gt_stats_types.h | 6 ++++++ drivers/gpu/drm/xe/xe_svm.c | 27 +++++++++++++++++++++++++-- 3 files changed, 37 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_stats.c b/drivers/gpu/drm/xe/xe_gt_stats.c index 81cec441b449..59b3b23a54c8 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats.c +++ b/drivers/gpu/drm/xe/xe_gt_stats.c @@ -85,7 +85,13 @@ static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = { DEF_STAT_STR(SVM_64K_CPU_COPY_US, "svm_64K_cpu_copy_us"), DEF_STAT_STR(SVM_2M_CPU_COPY_US, "svm_2M_cpu_copy_us"), DEF_STAT_STR(SVM_DEVICE_COPY_KB, "svm_device_copy_kb"), + DEF_STAT_STR(SVM_4K_DEVICE_COPY_KB, "svm_4K_device_copy_kb"), + DEF_STAT_STR(SVM_64K_DEVICE_COPY_KB, "svm_64K_device_copy_kb"), + DEF_STAT_STR(SVM_2M_DEVICE_COPY_KB, "svm_2M_device_copy_kb"), DEF_STAT_STR(SVM_CPU_COPY_KB, "svm_cpu_copy_kb"), + DEF_STAT_STR(SVM_4K_CPU_COPY_KB, "svm_4K_cpu_copy_kb"), + DEF_STAT_STR(SVM_64K_CPU_COPY_KB, "svm_64K_cpu_copy_kb"), + DEF_STAT_STR(SVM_2M_CPU_COPY_KB, "svm_2M_cpu_copy_kb"), DEF_STAT_STR(SVM_4K_GET_PAGES_US, "svm_4K_get_pages_us"), DEF_STAT_STR(SVM_64K_GET_PAGES_US, "svm_64K_get_pages_us"), DEF_STAT_STR(SVM_2M_GET_PAGES_US, "svm_2M_get_pages_us"), diff --git a/drivers/gpu/drm/xe/xe_gt_stats_types.h b/drivers/gpu/drm/xe/xe_gt_stats_types.h index b6081c312474..081c787ddcb6 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats_types.h +++ b/drivers/gpu/drm/xe/xe_gt_stats_types.h @@ -40,7 +40,13 @@ enum xe_gt_stats_id { XE_GT_STATS_ID_SVM_64K_CPU_COPY_US, XE_GT_STATS_ID_SVM_2M_CPU_COPY_US, XE_GT_STATS_ID_SVM_DEVICE_COPY_KB, + XE_GT_STATS_ID_SVM_4K_DEVICE_COPY_KB, + XE_GT_STATS_ID_SVM_64K_DEVICE_COPY_KB, + XE_GT_STATS_ID_SVM_2M_DEVICE_COPY_KB, XE_GT_STATS_ID_SVM_CPU_COPY_KB, + XE_GT_STATS_ID_SVM_4K_CPU_COPY_KB, + XE_GT_STATS_ID_SVM_64K_CPU_COPY_KB, + XE_GT_STATS_ID_SVM_2M_CPU_COPY_KB, XE_GT_STATS_ID_SVM_4K_GET_PAGES_US, XE_GT_STATS_ID_SVM_64K_GET_PAGES_US, XE_GT_STATS_ID_SVM_2M_GET_PAGES_US, diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index a91c84487a67..0251098650af 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -485,10 +485,33 @@ static void xe_svm_copy_kb_stats_incr(struct xe_gt *gt, const enum xe_svm_copy_dir dir, int kb) { - if (dir == XE_SVM_COPY_TO_VRAM) + if (dir == XE_SVM_COPY_TO_VRAM) { + switch (kb) { + case 4: + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_DEVICE_COPY_KB, kb); + break; + case 64: + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_DEVICE_COPY_KB, kb); + break; + case 2048: + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_DEVICE_COPY_KB, kb); + break; + } xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_DEVICE_COPY_KB, kb); - else + } else { + switch (kb) { + case 4: + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_CPU_COPY_KB, kb); + break; + case 64: + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_CPU_COPY_KB, kb); + break; + case 2048: + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_CPU_COPY_KB, kb); + break; + } xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_CPU_COPY_KB, kb); + } } static void xe_svm_copy_us_stats_incr(struct xe_gt *gt, -- cgit v1.2.3 From 3d4939c0ec011ad6dfda7c13362b3d2013425789 Mon Sep 17 00:00:00 2001 From: Tomasz Lis Date: Fri, 20 Mar 2026 15:57:33 +0100 Subject: drm/xe: Fix confusion with locals on context creation After setting a local variable, check that local value rather that checking destination at which the value will be stored later. This fixes the obvious mistake in error path; without it, allocation fail would lead to NULL dereference during context creation. Fixes: 89340099c6a4 ("drm/xe/lrc: Refactor context init into xe_lrc_ctx_init()") Signed-off-by: Tomasz Lis Cc: Raag Jadav Cc: Matthew Brost Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20260320145733.1337682-1-tomasz.lis@intel.com --- drivers/gpu/drm/xe/xe_lrc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 24f4c7210cfb..9d12a0d2f0b5 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -1630,8 +1630,8 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, struct xe_v bo = xe_bo_create_pin_map_novm(xe, tile, bo_size, ttm_bo_type_kernel, bo_flags, false); - if (IS_ERR(lrc->bo)) - return PTR_ERR(lrc->bo); + if (IS_ERR(bo)) + return PTR_ERR(bo); lrc->bo = bo; -- cgit v1.2.3