diff options
| author | Tao Zhou <tao.zhou1@amd.com> | 2025-11-06 16:26:56 +0800 |
|---|---|---|
| committer | Alex Deucher <alexander.deucher@amd.com> | 2025-11-11 21:54:14 -0500 |
| commit | 7fb41ab3c94828ad48e1a6d2237e8a7e682c74b9 (patch) | |
| tree | 6fbfe0f157e779fdb3b0097fce4171562a6cec24 /drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | |
| parent | eed30152746ec1d8b6e8ab31e349f1eb8d8bd666 (diff) | |
drm/amdgpu: optimize timeout implemention in ras_eeprom_update_record_num
The busy status returned by ras_eeprom_update_record_num may not be
an error, increase timeout to exclude false busy status. Also add more
comments to make the code readable.
v2: define a macro for the timeout value.
Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 18 |
1 files changed, 13 insertions, 5 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 99aa1908833d..64dd7a81bff5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -124,6 +124,8 @@ RAS_TABLE_V2_1_INFO_SIZE) \ / RAS_TABLE_RECORD_SIZE) +#define RAS_SMU_MESSAGE_TIMEOUT_MS 1000 /* 1s */ + /* Given a zero-based index of an EEPROM RAS record, yields the EEPROM * offset off of RAS_TABLE_START. That is, this is something you can * add to control->i2c_address, and then tell I2C layer to read @@ -874,7 +876,7 @@ Out: int amdgpu_ras_eeprom_update_record_num(struct amdgpu_ras_eeprom_control *control) { struct amdgpu_device *adev = to_amdgpu_device(control); - int ret, timeout = 1000; + int ret, retry = 20; if (!amdgpu_ras_smu_eeprom_supported(adev)) return 0; @@ -882,17 +884,23 @@ int amdgpu_ras_eeprom_update_record_num(struct amdgpu_ras_eeprom_control *contro control->ras_num_recs_old = control->ras_num_recs; do { + /* 1000ms timeout is long enough, smu_get_badpage_count won't + * return -EBUSY before timeout. + */ ret = amdgpu_ras_smu_get_badpage_count(adev, - &(control->ras_num_recs), 12); + &(control->ras_num_recs), RAS_SMU_MESSAGE_TIMEOUT_MS); if (!ret && (control->ras_num_recs_old == control->ras_num_recs)) { - /* record number update in PMFW needs some time */ + /* record number update in PMFW needs some time, + * smu_get_badpage_count may return immediately without + * count update, sleep for a while and retry again. + */ msleep(50); - timeout -= 50; + retry--; } else { break; } - } while (timeout); + } while (retry); /* no update of record number is not a real failure, * don't print warning here |
