summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
diff options
context:
space:
mode:
authorTao Zhou <tao.zhou1@amd.com>2025-07-23 19:04:17 +0800
committerAlex Deucher <alexander.deucher@amd.com>2025-11-06 09:58:35 -0500
commit7f34ddf77d30959fcc24cd279074f7e0d4f732df (patch)
tree810370e75ab227e3cebc46439e2e58f2a5cbbc8f /drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
parentcd74132be8461429b5e98991aa15edeeb81a9f56 (diff)
drm/amdgpu: add ras_eeprom_read_idx interface
PMFW will manage RAS eeprom data by itself, add new interface to read eeprom data via PMFW, we can read part of records by setting index. v2: use IPID parse interface. pa is not used and set it to a fixed value. v3: optimize the null pointer check for IPID parse interface. Signed-off-by: Tao Zhou <tao.zhou1@amd.com> Reviewed-by: Yang Wang <kevinyang.wang@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c51
1 files changed, 51 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index d7e2a81bc274..9854238ce7bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -970,6 +970,50 @@ static int __amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
return res;
}
+int amdgpu_ras_eeprom_read_idx(struct amdgpu_ras_eeprom_control *control,
+ struct eeprom_table_record *record, u32 rec_idx,
+ const u32 num)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ uint64_t ts, end_idx;
+ int i, ret;
+ u64 mca, ipid;
+
+ if (!amdgpu_ras_smu_eeprom_supported(adev))
+ return 0;
+
+ if (!adev->umc.ras || !adev->umc.ras->mca_ipid_parse)
+ return -EOPNOTSUPP;
+
+ end_idx = rec_idx + num;
+ for (i = rec_idx; i < end_idx; i++) {
+ ret = amdgpu_ras_smu_get_badpage_mca_addr(adev, i, &mca);
+ if (ret)
+ return ret;
+
+ ret = amdgpu_ras_smu_get_badpage_ipid(adev, i, &ipid);
+ if (ret)
+ return ret;
+
+ ret = amdgpu_ras_smu_get_timestamp(adev, i, &ts);
+ if (ret)
+ return ret;
+
+ record[i - rec_idx].address = mca;
+ /* retired_page (pa) is unused now */
+ record[i - rec_idx].retired_page = 0x1ULL;
+ record[i - rec_idx].ts = ts;
+ record[i - rec_idx].err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
+ record[i - rec_idx].cu = 0;
+
+ adev->umc.ras->mca_ipid_parse(adev, ipid, NULL,
+ (uint32_t *)&(record[i - rec_idx].mem_channel),
+ (uint32_t *)&(record[i - rec_idx].mcumc_id), NULL);
+ }
+
+ return 0;
+}
+
/**
* amdgpu_ras_eeprom_read -- read EEPROM
* @control: pointer to control structure
@@ -991,6 +1035,9 @@ int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
u8 *buf, *pp;
u32 g0, g1;
+ if (amdgpu_ras_smu_eeprom_supported(adev))
+ return amdgpu_ras_eeprom_read_idx(control, record, 0, num);
+
if (!__is_ras_eeprom_supported(adev))
return 0;
@@ -1162,6 +1209,10 @@ static ssize_t amdgpu_ras_debugfs_table_read(struct file *f, char __user *buf,
int res = -EFAULT;
size_t data_len;
+ /* pmfw manages eeprom data by itself */
+ if (amdgpu_ras_smu_eeprom_supported(adev))
+ return 0;
+
mutex_lock(&control->ras_tbl_mutex);
/* We want *pos - data_len > 0, which means there's