From ec58991054e899c9d86f7e3c8a96cb602d4b5938 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Tue, 21 May 2024 15:03:02 +0800 Subject: drm/amdgpu: correct hbm field in boot status hbm filed takes bit 13 and bit 14 in boot status. Signed-off-by: Hawking Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index c8980d5f6540..7021c4a66fb5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -46,7 +46,7 @@ struct amdgpu_iv_entry; #define AMDGPU_RAS_GPU_ERR_HBM_BIST_TEST(x) AMDGPU_GET_REG_FIELD(x, 7, 7) #define AMDGPU_RAS_GPU_ERR_SOCKET_ID(x) AMDGPU_GET_REG_FIELD(x, 10, 8) #define AMDGPU_RAS_GPU_ERR_AID_ID(x) AMDGPU_GET_REG_FIELD(x, 12, 11) -#define AMDGPU_RAS_GPU_ERR_HBM_ID(x) AMDGPU_GET_REG_FIELD(x, 13, 13) +#define AMDGPU_RAS_GPU_ERR_HBM_ID(x) AMDGPU_GET_REG_FIELD(x, 14, 13) #define AMDGPU_RAS_GPU_ERR_BOOT_STATUS(x) AMDGPU_GET_REG_FIELD(x, 31, 31) #define AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT 1000 -- cgit v1.2.3 From ba46b3bda296c4f82b061ac40b90f49d2a00a380 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 15 May 2024 11:25:49 -0400 Subject: drm/amdgpu: Adjust logic in amdgpu_device_partner_bandwidth() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use current speed/width on devices which don't support dynamic PCIe switching. Fixes: 466a7d115326 ("drm/amd: Use the first non-dGPU PCI device for BW limits") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3289 Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 861ccff78af9..932dc93b2e63 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5944,13 +5944,18 @@ static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev, *speed = PCI_SPEED_UNKNOWN; *width = PCIE_LNK_WIDTH_UNKNOWN; - while ((parent = pci_upstream_bridge(parent))) { - /* skip upstream/downstream switches internal to dGPU*/ - if (parent->vendor == PCI_VENDOR_ID_ATI) - continue; - *speed = pcie_get_speed_cap(parent); - *width = pcie_get_width_cap(parent); - break; + if (amdgpu_device_pcie_dynamic_switching_supported(adev)) { + while ((parent = pci_upstream_bridge(parent))) { + /* skip upstream/downstream switches internal to dGPU*/ + if (parent->vendor == PCI_VENDOR_ID_ATI) + continue; + *speed = pcie_get_speed_cap(parent); + *width = pcie_get_width_cap(parent); + break; + } + } else { + /* use the current speeds rather than max if switching is not supported */ + pcie_bandwidth_available(adev->pdev, NULL, speed, width); } } -- cgit v1.2.3 From a0cf36546cc24ae1c95d72253c7795d4d2fc77aa Mon Sep 17 00:00:00 2001 From: Jesse Zhang Date: Thu, 23 May 2024 17:14:45 +0800 Subject: drm/amdgpu: fix dereference null return value for the function amdgpu_vm_pt_parent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pointer parent may be NULLed by the function amdgpu_vm_pt_parent. To make the code more robust, check the pointer parent. Signed-off-by: Jesse Zhang Suggested-by: Christian König Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index 7fdd306a48a0..f07647a9a9d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -706,11 +706,15 @@ int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params, struct amdgpu_vm_bo_base *entry) { struct amdgpu_vm_bo_base *parent = amdgpu_vm_pt_parent(entry); - struct amdgpu_bo *bo = parent->bo, *pbo; + struct amdgpu_bo *bo, *pbo; struct amdgpu_vm *vm = params->vm; uint64_t pde, pt, flags; unsigned int level; + if (WARN_ON(!parent)) + return -EINVAL; + + bo = parent->bo; for (level = 0, pbo = bo->parent; pbo; ++level) pbo = pbo->parent; -- cgit v1.2.3 From 1f327dfc846ae82e16e52ed9c559d566826486d2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 22 May 2024 15:26:50 -0400 Subject: drm/amdkfd: simplify APU VRAM handling With commit 89773b85599a ("drm/amdkfd: Let VRAM allocations go to GTT domain on small APUs") big and small APU "VRAM" handling in KFD was unified. Since AMD_IS_APU is set for both big and small APUs, we can simplify the checks in the code. v2: clean up a few more places (Lang) Acked-by: Felix Kuehling Reviewed-by: Lang Yu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 8975cf41a91a..48ad0c04aa72 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -196,7 +196,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, return -EINVAL; vram_size = KFD_XCP_MEMORY_SIZE(adev, xcp_id); - if (adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) { + if (adev->flags & AMD_IS_APU) { system_mem_needed = size; ttm_mem_needed = size; } @@ -233,7 +233,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, if (adev && xcp_id >= 0) { adev->kfd.vram_used[xcp_id] += vram_needed; adev->kfd.vram_used_aligned[xcp_id] += - (adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) ? + (adev->flags & AMD_IS_APU) ? vram_needed : ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN); } @@ -261,7 +261,7 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, if (adev) { adev->kfd.vram_used[xcp_id] -= size; - if (adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) { + if (adev->flags & AMD_IS_APU) { adev->kfd.vram_used_aligned[xcp_id] -= size; kfd_mem_limit.system_mem_used -= size; kfd_mem_limit.ttm_mem_used -= size; @@ -890,7 +890,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, * if peer device has large BAR. In contrast, access over xGMI is * allowed for both small and large BAR configurations of peer device */ - if ((adev != bo_adev && !(adev->gmc.is_app_apu || adev->flags & AMD_IS_APU)) && + if ((adev != bo_adev && !(adev->flags & AMD_IS_APU)) && ((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) || (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) || (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) { @@ -1658,7 +1658,7 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev, - atomic64_read(&adev->vram_pin_size) - reserved_for_pt; - if (adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) { + if (adev->flags & AMD_IS_APU) { system_mem_available = no_system_mem_limit ? kfd_mem_limit.max_system_mem_limit : kfd_mem_limit.max_system_mem_limit - @@ -1706,7 +1706,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; - if (adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) { + if (adev->flags & AMD_IS_APU) { domain = AMDGPU_GEM_DOMAIN_GTT; alloc_domain = AMDGPU_GEM_DOMAIN_GTT; alloc_flags = 0; @@ -1953,7 +1953,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( if (size) { if (!is_imported && (mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM || - ((adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) && + ((adev->flags & AMD_IS_APU) && mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT))) *size = bo_size; else @@ -2376,7 +2376,7 @@ static int import_obj_create(struct amdgpu_device *adev, (*mem)->bo = bo; (*mem)->va = va; (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) && - !(adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) ? + !(adev->flags & AMD_IS_APU) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT; (*mem)->mapped_to_gpu_memory = 0; -- cgit v1.2.3 From a9bc5a19e4958fe664254d1ad2dc2a9f5868c210 Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Wed, 22 May 2024 15:04:29 -0400 Subject: drm/amdgpu: Make CPX mode auto default in NPS4 On GFXIP9.4.3, make CPX mode as the default compute mode if the node is setup in NPS4 memory partition mode. This change is only applicable for dGPU, for APU, continue to use TPX mode. Reviewed-by: Felix Kuehling Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index 414ea3f560a7..d4e2aed2efa3 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -422,7 +422,7 @@ __aqua_vanjaram_get_auto_mode(struct amdgpu_xcp_mgr *xcp_mgr) if (adev->gmc.num_mem_partitions == num_xcc / 2) return (adev->flags & AMD_IS_APU) ? AMDGPU_TPX_PARTITION_MODE : - AMDGPU_QPX_PARTITION_MODE; + AMDGPU_CPX_PARTITION_MODE; if (adev->gmc.num_mem_partitions == 2 && !(adev->flags & AMD_IS_APU)) return AMDGPU_DPX_PARTITION_MODE; -- cgit v1.2.3