Merge tag 'amd-drm-next-6.19-2025-11-14' of https://gitlab.freedesktop.org/agd5f/linux into drm-next

amd-drm-next-6.19-2025-11-14: amdgpu: - RAS updates - GC12 DCC P2P fix - Documentation fixes - Power limit code cleanup - Userq updates - VRR fix - SMART OLED support - DSC refactor for DCN 3.5 - Replay updates - DC clockgating updates - HDCP refactor - ISP fix - SMU 13.0.12 updates - JPEG 5.0.1 fix - VCE1 support - Enable DC by default on SI - Refactor CIK and SI enablement - Enable amdgpu by default for CI dGPUs - XGMI fixes - SR-IOV fixes - Memory allocation critical path fixes - Enable amdgpu by default on SI dGPUs amdkfd: - Relax checks on save area overallocations - Fix GPU mappings after prefetch radeon: - Refactor CIK and SI enablement Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexander.deucher@amd.com> Link: https://patch.msgid.link/20251114192553.442621-1-alexander.deucher@amd.com
author: Dave Airlie <airlied@redhat.com> 2025-11-18 06:58:01 +1000
committer: Dave Airlie <airlied@redhat.com> 2025-11-18 07:01:26 +1000
commit: f3a1d69f9b388271986f4efe1fd775df15b443c1 (patch)
tree: b82a68c63a88b24b30ede3b4263fb3d2f2b6344d /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
parent: fd1a11ea111b083aa3d19f36516ecda5efa2b69f (diff)
parent: ccd3b4c7c37fbbd3e5244d3c54ca24ae0a37810d (diff)
1 files changed, 19 insertions, 4 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 055a9bbabbdb..9e2e098af86c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -3014,8 +3014,13 @@ static int amdgpu_ras_mca2pa_by_idx(struct amdgpu_device *adev,
 	addr_in.ma.err_addr = bps->address;
 	addr_in.ma.socket_id = socket;
 	addr_in.ma.ch_inst = bps->mem_channel;
-	/* tell RAS TA the node instance is not used */
-	addr_in.ma.node_inst = TA_RAS_INV_NODE;
+	if (!amdgpu_ras_smu_eeprom_supported(adev)) {
+		/* tell RAS TA the node instance is not used */
+		addr_in.ma.node_inst = TA_RAS_INV_NODE;
+	} else {
+		addr_in.ma.umc_inst = bps->mcumc_id;
+		addr_in.ma.node_inst = bps->cu;
+	}
 
 	if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr)
 		ret = adev->umc.ras->convert_ras_err_addr(adev, err_data,
@@ -3162,7 +3167,11 @@ static int __amdgpu_ras_convert_rec_from_rom(struct amdgpu_device *adev,
 		save_nps = (bps->retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK;
 		bps->retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT);
 	} else {
-		save_nps = nps;
+		/* if pmfw manages eeprom, save_nps is not stored on eeprom,
+		 * we should always convert mca address into physical address,
+		 * make save_nps different from nps
+		 */
+		save_nps = nps + 1;
 	}
 
 	if (save_nps == nps) {
@@ -3300,7 +3309,13 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
 	mutex_lock(&con->recovery_lock);
 	control = &con->eeprom_control;
 	data = con->eh_data;
-	unit_num = data->count / adev->umc.retire_unit - control->ras_num_recs;
+	if (amdgpu_ras_smu_eeprom_supported(adev))
+		unit_num = control->ras_num_recs -
+			control->ras_num_recs_old;
+	else
+		unit_num = data->count / adev->umc.retire_unit -
+			control->ras_num_recs;
+
 	save_count = con->bad_page_num - control->ras_num_bad_pages;
 	mutex_unlock(&con->recovery_lock);
author	Dave Airlie <airlied@redhat.com>	2025-11-18 06:58:01 +1000
committer	Dave Airlie <airlied@redhat.com>	2025-11-18 07:01:26 +1000
commit	f3a1d69f9b388271986f4efe1fd775df15b443c1 (patch)
tree	b82a68c63a88b24b30ede3b4263fb3d2f2b6344d /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
parent	fd1a11ea111b083aa3d19f36516ecda5efa2b69f (diff)
parent	ccd3b4c7c37fbbd3e5244d3c54ca24ae0a37810d (diff)