From d95ca7f515cfd2e721de07e86aa79adb17575a52 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Tue, 28 Oct 2025 16:18:31 +0800 Subject: drm/amdgpu: suspend ras module before gpu reset During gpu reset, all GPU-related resources are inaccessible. To avoid affecting ras functionality, suspend ras module before gpu reset and resume it after gpu reset is complete. V2: Rename functions to avoid misunderstanding. V3: Move flush_delayed_work to amdgpu_ras_process_pause, Move schedule_delayed_work to amdgpu_ras_process_unpause. V4: Rename functions. V5: Move the function to amdgpu_ras.c. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Acked-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 95f7ae36e4f1..dcf6fce1c5a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -71,6 +71,7 @@ #include "amdgpu_xgmi.h" #include "amdgpu_ras.h" +#include "amdgpu_ras_mgr.h" #include "amdgpu_pmu.h" #include "amdgpu_fru_eeprom.h" #include "amdgpu_reset.h" @@ -6660,6 +6661,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, goto end_reset; } + /* Cannot be called after locking reset domain */ + amdgpu_ras_pre_reset(adev, &device_list); + /* We need to lock reset domain only once both for XGMI and single device */ amdgpu_device_recovery_get_reset_lock(adev, &device_list); @@ -6691,6 +6695,7 @@ skip_sched_resume: reset_unlock: amdgpu_device_recovery_put_reset_lock(adev, &device_list); end_reset: + amdgpu_ras_post_reset(adev, &device_list); if (hive) { mutex_unlock(&hive->hive_lock); amdgpu_put_xgmi_hive(hive); -- cgit v1.2.3