summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
diff options
context:
space:
mode:
authorTao Zhou <tao.zhou1@amd.com>2025-09-24 17:52:24 +0800
committerAlex Deucher <alexander.deucher@amd.com>2025-11-11 21:54:14 -0500
commiteed30152746ec1d8b6e8ab31e349f1eb8d8bd666 (patch)
treefd09ac875be1b2f7a725b2ce60b9bf0e2b9e6064 /drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
parentded3ad780cf97a04927773c4600823b84f7f3cc2 (diff)
drm/amdgpu: add RAS bad page threshold handling for PMFW manages eeprom
Check if bad page threshold is reached and take actions accordingly. v2: remove rma message sent to smu when pmfw manages eeprom. v3: add null pointer check for con. Signed-off-by: Tao Zhou <tao.zhou1@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c36
1 files changed, 30 insertions, 6 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index 01b38a6e198e..99aa1908833d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -903,6 +903,33 @@ int amdgpu_ras_eeprom_update_record_num(struct amdgpu_ras_eeprom_control *contro
return ret;
}
+static int amdgpu_ras_smu_eeprom_append(struct amdgpu_ras_eeprom_control *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ if (!amdgpu_ras_smu_eeprom_supported(adev) || !con)
+ return 0;
+
+ control->ras_num_bad_pages = con->bad_page_num;
+
+ if (amdgpu_bad_page_threshold != 0 &&
+ control->ras_num_bad_pages > con->bad_page_cnt_threshold) {
+ dev_warn(adev->dev,
+ "Saved bad pages %d reaches threshold value %d\n",
+ control->ras_num_bad_pages, con->bad_page_cnt_threshold);
+
+ if (adev->cper.enabled && amdgpu_cper_generate_bp_threshold_record(adev))
+ dev_warn(adev->dev, "fail to generate bad page threshold cper records\n");
+
+ if ((amdgpu_bad_page_threshold != -1) &&
+ (amdgpu_bad_page_threshold != -2))
+ con->is_rma = true;
+ }
+
+ return 0;
+}
+
/**
* amdgpu_ras_eeprom_append -- append records to the EEPROM RAS table
* @control: pointer to control structure
@@ -921,17 +948,14 @@ int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control,
const u32 num)
{
struct amdgpu_device *adev = to_amdgpu_device(control);
- struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
int res, i;
uint64_t nps = AMDGPU_NPS1_PARTITION_MODE;
- if (!__is_ras_eeprom_supported(adev) || !con)
+ if (!__is_ras_eeprom_supported(adev))
return 0;
- if (amdgpu_ras_smu_eeprom_supported(adev)) {
- control->ras_num_bad_pages = con->bad_page_num;
- return 0;
- }
+ if (amdgpu_ras_smu_eeprom_supported(adev))
+ return amdgpu_ras_smu_eeprom_append(control);
if (num == 0) {
dev_err(adev->dev, "will not append 0 records\n");