diff options
| author | Alex Deucher <alexander.deucher@amd.com> | 2026-03-16 11:04:46 -0400 |
|---|---|---|
| committer | Alex Deucher <alexander.deucher@amd.com> | 2026-03-17 18:03:09 -0400 |
| commit | e9f58ff991dd4be13fd7a651bbf64329c090af09 (patch) | |
| tree | ccb54786f65431ed23fc1c022c8fa9f388378873 | |
| parent | 3fc4648b53b7e393b91e63600e28e6f25c8ef0c5 (diff) | |
drm/amdgpu: rework how we handle TLB fences
Add a new VM flag to indicate whether or not we need
a TLB fence. Userqs (KFD or KGD) require a TLB fence.
A TLB fence is not strictly required for kernel queues,
but it shouldn't hurt. That said, enabling this
unconditionally should be fine, but it seems to tickle
some issues in KIQ/MES. Only enable them for KFD,
or when KGD userq queues are enabled (currently via module
parameter).
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4798
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4749
Fixes: f3854e04b708 ("drm/amdgpu: attach tlb fence to the PTs update")
Cc: Christian König <christian.koenig@amd.com>
Cc: Prike Liang <Prike.Liang@amd.com>
Reviewed-by: Prike Liang <Prike.Liang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 69c5fbd2b93b5ced77c6e79afe83371bca84c788)
Cc: stable@vger.kernel.org
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 7 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 |
2 files changed, 8 insertions, 1 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index f2beb980e3c3..c60cbce356cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1069,7 +1069,10 @@ amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params, } /* Prepare a TLB flush fence to be attached to PTs */ - if (!params->unlocked) { + /* The check for need_tlb_fence should be dropped once we + * sort out the issues with KIQ/MES TLB invalidation timeouts. + */ + if (!params->unlocked && vm->need_tlb_fence) { amdgpu_vm_tlb_fence_create(params->adev, vm, fence); /* Makes sure no PD/PT is freed before the flush */ @@ -2602,6 +2605,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, ttm_lru_bulk_move_init(&vm->lru_bulk_move); vm->is_compute_context = false; + vm->need_tlb_fence = amdgpu_userq_enabled(&adev->ddev); vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & AMDGPU_VM_USE_CPU_FOR_GFX); @@ -2739,6 +2743,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) dma_fence_put(vm->last_update); vm->last_update = dma_fence_get_stub(); vm->is_compute_context = true; + vm->need_tlb_fence = true; unreserve_bo: amdgpu_bo_unreserve(vm->root.bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 806d62ed61ef..bb276c0ad06d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -441,6 +441,8 @@ struct amdgpu_vm { struct ttm_lru_bulk_move lru_bulk_move; /* Flag to indicate if VM is used for compute */ bool is_compute_context; + /* Flag to indicate if VM needs a TLB fence (KFD or KGD) */ + bool need_tlb_fence; /* Memory partition number, -1 means any partition */ int8_t mem_id; |
