diff options
| author | Dave Airlie <airlied@redhat.com> | 2025-11-18 06:58:01 +1000 |
|---|---|---|
| committer | Dave Airlie <airlied@redhat.com> | 2025-11-18 07:01:26 +1000 |
| commit | f3a1d69f9b388271986f4efe1fd775df15b443c1 (patch) | |
| tree | b82a68c63a88b24b30ede3b4263fb3d2f2b6344d /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | |
| parent | fd1a11ea111b083aa3d19f36516ecda5efa2b69f (diff) | |
| parent | ccd3b4c7c37fbbd3e5244d3c54ca24ae0a37810d (diff) | |
Merge tag 'amd-drm-next-6.19-2025-11-14' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.19-2025-11-14:
amdgpu:
- RAS updates
- GC12 DCC P2P fix
- Documentation fixes
- Power limit code cleanup
- Userq updates
- VRR fix
- SMART OLED support
- DSC refactor for DCN 3.5
- Replay updates
- DC clockgating updates
- HDCP refactor
- ISP fix
- SMU 13.0.12 updates
- JPEG 5.0.1 fix
- VCE1 support
- Enable DC by default on SI
- Refactor CIK and SI enablement
- Enable amdgpu by default for CI dGPUs
- XGMI fixes
- SR-IOV fixes
- Memory allocation critical path fixes
- Enable amdgpu by default on SI dGPUs
amdkfd:
- Relax checks on save area overallocations
- Fix GPU mappings after prefetch
radeon:
- Refactor CIK and SI enablement
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patch.msgid.link/20251114192553.442621-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 23 |
1 files changed, 19 insertions, 4 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 055a9bbabbdb..9e2e098af86c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -3014,8 +3014,13 @@ static int amdgpu_ras_mca2pa_by_idx(struct amdgpu_device *adev, addr_in.ma.err_addr = bps->address; addr_in.ma.socket_id = socket; addr_in.ma.ch_inst = bps->mem_channel; - /* tell RAS TA the node instance is not used */ - addr_in.ma.node_inst = TA_RAS_INV_NODE; + if (!amdgpu_ras_smu_eeprom_supported(adev)) { + /* tell RAS TA the node instance is not used */ + addr_in.ma.node_inst = TA_RAS_INV_NODE; + } else { + addr_in.ma.umc_inst = bps->mcumc_id; + addr_in.ma.node_inst = bps->cu; + } if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) ret = adev->umc.ras->convert_ras_err_addr(adev, err_data, @@ -3162,7 +3167,11 @@ static int __amdgpu_ras_convert_rec_from_rom(struct amdgpu_device *adev, save_nps = (bps->retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK; bps->retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT); } else { - save_nps = nps; + /* if pmfw manages eeprom, save_nps is not stored on eeprom, + * we should always convert mca address into physical address, + * make save_nps different from nps + */ + save_nps = nps + 1; } if (save_nps == nps) { @@ -3300,7 +3309,13 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev, mutex_lock(&con->recovery_lock); control = &con->eeprom_control; data = con->eh_data; - unit_num = data->count / adev->umc.retire_unit - control->ras_num_recs; + if (amdgpu_ras_smu_eeprom_supported(adev)) + unit_num = control->ras_num_recs - + control->ras_num_recs_old; + else + unit_num = data->count / adev->umc.retire_unit - + control->ras_num_recs; + save_count = con->bad_page_num - control->ras_num_bad_pages; mutex_unlock(&con->recovery_lock); |
