drm/amdgpu: add a helper for processing recoverable GPUVM faults

Add a common helper to remove the repeated logic from each gmc module. Suggested-by: Lijo Lazar <lijo.lazar@amd.com> Reviewed-by: Lijo Lazar <lijo.lazar@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
author: Alex Deucher <alexander.deucher@amd.com> 2025-12-01 14:46:53 -0500
committer: Alex Deucher <alexander.deucher@amd.com> 2025-12-08 14:14:38 -0500
commit: d3ff65243a52afa85166abaa8d00a44c17691dbd (patch)
tree: 6ae38427430b7ce215e3435ba9e734ef9202743e /drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
parent: a50d32c41fb25d772cc1c47b6abed8fb811d58c2 (diff)
1 files changed, 48 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 4abed753fc2d..8ac92e7bed31 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -524,6 +524,54 @@ void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
 	} while (fault->timestamp < tmp);
 }
 
+int amdgpu_gmc_handle_retry_fault(struct amdgpu_device *adev,
+				  struct amdgpu_iv_entry *entry,
+				  u64 addr,
+				  u32 cam_index,
+				  u32 node_id,
+				  bool write_fault)
+{
+	int ret;
+
+	if (adev->irq.retry_cam_enabled) {
+		/* Delegate it to a different ring if the hardware hasn't
+		 * already done it.
+		 */
+		if (entry->ih == &adev->irq.ih) {
+			amdgpu_irq_delegate(adev, entry, 8);
+			return 1;
+		}
+
+		ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id,
+					     addr, entry->timestamp, write_fault);
+		WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index);
+		if (ret)
+			return 1;
+	} else {
+		/* Process it only if it's the first fault for this address */
+		if (entry->ih != &adev->irq.ih_soft &&
+		    amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
+					     entry->timestamp))
+			return 1;
+
+		/* Delegate it to a different ring if the hardware hasn't
+		 * already done it.
+		 */
+		if (entry->ih == &adev->irq.ih) {
+			amdgpu_irq_delegate(adev, entry, 8);
+			return 1;
+		}
+
+		/* Try to handle the recoverable page faults by filling page
+		 * tables
+		 */
+		if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id,
+					   addr, entry->timestamp, write_fault))
+			return 1;
+	}
+	return 0;
+}
+
 int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev)
 {
 	int r;
author	Alex Deucher <alexander.deucher@amd.com>	2025-12-01 14:46:53 -0500
committer	Alex Deucher <alexander.deucher@amd.com>	2025-12-08 14:14:38 -0500
commit	d3ff65243a52afa85166abaa8d00a44c17691dbd (patch)
tree	6ae38427430b7ce215e3435ba9e734ef9202743e /drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
parent	a50d32c41fb25d772cc1c47b6abed8fb811d58c2 (diff)