summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2024-12-19 12:04:47 +1000
committerDave Airlie <airlied@redhat.com>2024-12-20 07:57:01 +1000
commit8368e9719de1ecf60e27883a08692283f4086b05 (patch)
tree0cf3ca10bb4c21cbd602b7668e3c8981824909d8 /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
parent38e961097e04e7adfe1d3335e3371e97c1723064 (diff)
parent695c2c745e5dff201b75da8a1d237ce403600d04 (diff)
Merge tag 'amd-drm-next-6.14-2024-12-18' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.14-2024-12-18: amdgpu: - RAS updates - ISP updates - SDMA queue reset support - Rework DPM powergating interfaces - Documentation updates and cleanups - Panel replay fixes - DCN 3.5 updates - DP tunneling fixes - Use a pm notifier to more gracefully handle VRAM eviction on suspend or hibernate - Add debugfs interfaces for forcing scheduling to specific engine instances - GG 9.5 updates - IH 4.4 updates - Make missing optional firmware less noisy - PSP 13.x updates - SMU 13.x updates - VCN 5.x updates - JPEG 5.x updates - Misc cleanups - GC 12.x updates - DRM panic support - DC FAMS updates - DSC fixes - job handling fixes amdkfd: - GG 9.5 updates - Logging improvements - Misc cleanups - Various Optimizations Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20241218201758.2580723-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h15
1 files changed, 11 insertions, 4 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 6db772ecfee4..82db986c36a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -99,7 +99,8 @@ enum amdgpu_ras_block {
AMDGPU_RAS_BLOCK__IH,
AMDGPU_RAS_BLOCK__MPIO,
- AMDGPU_RAS_BLOCK__LAST
+ AMDGPU_RAS_BLOCK__LAST,
+ AMDGPU_RAS_BLOCK__ANY = -1
};
enum amdgpu_ras_mca_block {
@@ -482,6 +483,8 @@ struct ras_ecc_err {
uint64_t ipid;
uint64_t addr;
uint64_t pa_pfn;
+ /* save global channel index across all UMC instances */
+ uint32_t channel_idx;
struct ras_err_pages err_pages;
};
@@ -558,8 +561,8 @@ struct amdgpu_ras {
struct ras_ecc_log_info umc_ecc_log;
struct delayed_work page_retirement_dwork;
- /* Fatal error detected flag */
- atomic_t fed;
+ /* ras errors detected */
+ unsigned long ras_err_state;
/* RAS event manager */
struct ras_event_manager __event_mgr;
@@ -750,7 +753,7 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
/* error handling functions */
int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
- struct eeprom_table_record *bps, int pages);
+ struct eeprom_table_record *bps, int pages, bool from_rom);
int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
unsigned long *new_cnt);
@@ -952,6 +955,10 @@ ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *a
void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status);
bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev);
+void amdgpu_ras_set_err_poison(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block);
+void amdgpu_ras_clear_err_state(struct amdgpu_device *adev);
+bool amdgpu_ras_is_err_state(struct amdgpu_device *adev, int block);
u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type type);
int amdgpu_ras_mark_ras_event_caller(struct amdgpu_device *adev, enum ras_event_type type,