From e09b081d8ae0e60022a66b26e9d51909c6f2e383 Mon Sep 17 00:00:00 2001 From: "Stanley.Yang" Date: Thu, 21 Aug 2025 16:05:42 +0800 Subject: drm/amdgpu: wait pmfw polling mca bank info done wait 500ms to ensure pmfw polling mca bank info done. Signed-off-by: Stanley.Yang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 7fe5b1940df8..663b96583bfd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2702,6 +2702,7 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) struct amdgpu_device *adev = ras->adev; struct list_head device_list, *device_list_handle = NULL; struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); + unsigned int error_query_mode; enum ras_event_type type; if (hive) { @@ -2730,6 +2731,13 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) device_list_handle = &device_list; } + if (amdgpu_ras_get_error_query_mode(adev, &error_query_mode)) { + if (error_query_mode == AMDGPU_RAS_FIRMWARE_ERROR_QUERY) { + /* wait 500ms to ensure pmfw polling mca bank info done */ + msleep(500); + } + } + type = amdgpu_ras_get_fatal_error_event(adev); list_for_each_entry(remote_adev, device_list_handle, gmc.xgmi.head) { -- cgit v1.2.3 From 1ed511fb760848e3ccdcb936f4257c094e95a43c Mon Sep 17 00:00:00 2001 From: Xiang Liu Date: Tue, 19 Aug 2025 13:06:24 +0800 Subject: drm/amdgpu: Check VF critical region before RAS poison injection Check VF critical region before RAS poison injection to ensure that the poison injection will not hit the VF critical region. Signed-off-by: Xiang Liu Reviewed-by: Shravan Kumar Gande Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 663b96583bfd..e0ee21150860 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -219,10 +219,17 @@ static int amdgpu_check_address_validity(struct amdgpu_device *adev, struct amdgpu_vram_block_info blk_info; uint64_t page_pfns[32] = {0}; int i, ret, count; + bool hit = false; if (amdgpu_ip_version(adev, UMC_HWIP, 0) < IP_VERSION(12, 0, 0)) return 0; + if (amdgpu_sriov_vf(adev)) { + if (amdgpu_virt_check_vf_critical_region(adev, address, &hit)) + return -EPERM; + return hit ? -EACCES : 0; + } + if ((address >= adev->gmc.mc_vram_size) || (address >= RAS_UMC_INJECT_ADDR_LIMIT)) return -EFAULT; -- cgit v1.2.3