From 90b75e12a6e831c8516498f690058d4165d5a5d6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 14 Oct 2025 16:45:17 -0400 Subject: drm/amdgpu: set default gfx reset masks for gfx6-8 These were not set so soft recovery was inadvertantly disabled. Fixes: 6ac55eab4fc4 ("drm/amdgpu: move reset support type checks into the caller") Reviewed-by: Jesse Zhang Signed-off-by: Alex Deucher (cherry picked from commit 1972763505d728c604b537180727ec8132e619df) --- drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 5 +++++ 3 files changed, 15 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 7693b7953426..80565392313f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -3102,6 +3102,11 @@ static int gfx_v6_0_sw_init(struct amdgpu_ip_block *ip_block) return r; } + adev->gfx.gfx_supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); + adev->gfx.compute_supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); + return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 5976ed55d9db..2b7aba22ecc1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -4399,6 +4399,11 @@ static int gfx_v7_0_sw_init(struct amdgpu_ip_block *ip_block) gfx_v7_0_gpu_early_init(adev); + adev->gfx.gfx_supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); + adev->gfx.compute_supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); + return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 0856ff65288c..8a81713d97aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -2023,6 +2023,11 @@ static int gfx_v8_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; + adev->gfx.gfx_supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); + adev->gfx.compute_supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); + return 0; } -- cgit v1.2.3 From c81f5cebe849a2beeed4c8f2b06a58dfc02d5350 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sat, 25 Oct 2025 23:29:36 -0500 Subject: drm/amdgpu: Drop PMFW RLC notifier from amdgpu_device_suspend() For S3 on vangogh, PMFW needs to be notified before the driver powers down RLC. This already happens in smu_disable_dpms() so drop the superfluous call in amdgpu_device_suspend(). Co-developed-by: Mario Limonciello (AMD) Reviewed-by: Alex Deucher Signed-off-by: Mario Limonciello (AMD) Signed-off-by: Alex Deucher (cherry picked from commit 960e30a61e1a7ca5341a6cf9481e770e1cda24aa) --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 3d032c4e2dce..2819aceaab74 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5243,10 +5243,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients) if (amdgpu_sriov_vf(adev)) amdgpu_virt_release_full_gpu(adev, false); - r = amdgpu_dpm_notify_rlc_state(adev, false); - if (r) - return r; - return 0; } -- cgit v1.2.3 From 597eb70f7ff7551ff795cd51754b81aabedab67b Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Fri, 31 Oct 2025 10:50:02 -0400 Subject: drm/amdkfd: Don't clear PT after process killed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If process is killed. the vm entity is stopped, submit pt update job will trigger the error message "*ERROR* Trying to push to a killed entity", job will not execute. Suggested-by: Christian König Signed-off-by: Philip Yang Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit 10c382ec6c6d1e11975a11962bec21cba6360391) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index a2ca9acf8c4e..923f0fa7350c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1267,6 +1267,10 @@ static int unmap_bo_from_gpuvm(struct kgd_mem *mem, (void)amdgpu_vm_bo_unmap(adev, bo_va, entry->va); + /* VM entity stopped if process killed, don't clear freed pt bo */ + if (!amdgpu_vm_ready(vm)) + return 0; + (void)amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); (void)amdgpu_sync_fence(sync, bo_va->last_pt_update, GFP_KERNEL); -- cgit v1.2.3 From eb6e7f520d6efa4d4ebf1671455abe4a681f7a05 Mon Sep 17 00:00:00 2001 From: Samuel Zhang Date: Wed, 5 Nov 2025 03:04:08 +0000 Subject: drm/amdgpu: fix gpu page fault after hibernation on PF passthrough On PF passthrough environment, after hibernate and then resume, coralgemm will cause gpu page fault. Mode1 reset happens during hibernate, but partition mode is not restored on resume, register mmCP_HYP_XCP_CTL and mmCP_PSP_XCP_CTL is not right after resume. When CP access the MQD BO, wrong stride size is used, this will cause out of bound access on the MQD BO, resulting page fault. The fix is to ensure gfx_v9_4_3_switch_compute_partition() is called when resume from a hibernation. KFD resume is called separately during a reset recovery or resume from suspend sequence. Hence it's not required to be called as part of partition switch. Signed-off-by: Samuel Zhang Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher (cherry picked from commit 5d1b32cfe4a676fe552416cb5ae847b215463a1a) --- drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c | 3 ++- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index 811124ff88a8..f9e2edf5260b 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -407,7 +407,8 @@ static int aqua_vanjaram_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, return -EINVAL; } - if (adev->kfd.init_complete && !amdgpu_in_reset(adev)) + if (adev->kfd.init_complete && !amdgpu_in_reset(adev) && + !adev->in_suspend) flags |= AMDGPU_XCP_OPS_KFD; if (flags & AMDGPU_XCP_OPS_KFD) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 77f9d5b9a556..c90cbe053ef3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -2292,7 +2292,9 @@ static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev) r = amdgpu_xcp_init(adev->xcp_mgr, num_xcp, mode); } else { - if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr, + if (adev->in_suspend) + amdgpu_xcp_restore_partition_mode(adev->xcp_mgr); + else if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr, AMDGPU_XCP_FL_NONE) == AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) r = amdgpu_xcp_switch_partition_mode( -- cgit v1.2.3 From b09cb2996cdf50cd1ab4020e002c95d742c81313 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 4 Nov 2025 13:38:02 -0600 Subject: drm/amd: Fix suspend failure with secure display TA commit c760bcda83571 ("drm/amd: Check whether secure display TA loaded successfully") attempted to fix extra messages, but failed to port the cleanup that was in commit 5c6d52ff4b61e ("drm/amd: Don't try to enable secure display TA multiple times") to prevent multiple tries. Add that to the failure handling path even on a quick failure. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4679 Fixes: c760bcda8357 ("drm/amd: Check whether secure display TA loaded successfully") Signed-off-by: Mario Limonciello Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 4104c0a454f6a4d1e0d14895d03c0e7bdd0c8240) --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 8c0e5d03de50..aa7987d0806c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -2355,8 +2355,11 @@ static int psp_securedisplay_initialize(struct psp_context *psp) if (!ret && !psp->securedisplay_context.context.resp_status) { psp->securedisplay_context.context.initialized = true; mutex_init(&psp->securedisplay_context.mutex); - } else + } else { + /* don't try again */ + psp->securedisplay_context.context.bin_desc.size_bytes = 0; return ret; + } mutex_lock(&psp->securedisplay_context.mutex); -- cgit v1.2.3 From 570a66b48c22214851949fcd71816fee280aa096 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Mon, 3 Nov 2025 16:21:50 +0530 Subject: drm/amdgpu: Fix wait after reset sequence in S3 For a mode-1 reset done at the end of S3 on PSPv11 dGPUs, only check if TOS is unloaded. Fixes: 32f73741d6ee ("drm/amdgpu: Wait for bootloader after PSPv11 reset") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4649 Signed-off-by: Lijo Lazar Acked-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 1ad25fd272753db14c5d1cc8c68e20ce01f3f888) --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 9 +++++++-- drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 26 +++++++++++++++++++++++++- 2 files changed, 32 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 61268aa82df4..7333e19291cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2632,9 +2632,14 @@ static int amdgpu_pmops_suspend_noirq(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); + int r; - if (amdgpu_acpi_should_gpu_reset(adev)) - return amdgpu_asic_reset(adev); + if (amdgpu_acpi_should_gpu_reset(adev)) { + amdgpu_device_lock_reset_domain(adev->reset_domain); + r = amdgpu_asic_reset(adev); + amdgpu_device_unlock_reset_domain(adev->reset_domain); + return r; + } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 64b240b51f1a..a9be7a505026 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -142,13 +142,37 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) return err; } -static int psp_v11_0_wait_for_bootloader(struct psp_context *psp) +static int psp_v11_wait_for_tos_unload(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; + uint32_t sol_reg1, sol_reg2; + int retry_loop; + /* Wait for the TOS to be unloaded */ + for (retry_loop = 0; retry_loop < 20; retry_loop++) { + sol_reg1 = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + usleep_range(1000, 2000); + sol_reg2 = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + if (sol_reg1 == sol_reg2) + return 0; + } + dev_err(adev->dev, "TOS unload failed, C2PMSG_33: %x C2PMSG_81: %x", + RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_33), + RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81)); + + return -ETIME; +} + +static int psp_v11_0_wait_for_bootloader(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; int ret; int retry_loop; + /* For a reset done at the end of S3, only wait for TOS to be unloaded */ + if (adev->in_s3 && !(adev->flags & AMD_IS_APU) && amdgpu_in_reset(adev)) + return psp_v11_wait_for_tos_unload(psp); + for (retry_loop = 0; retry_loop < 20; retry_loop++) { /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ -- cgit v1.2.3 From 22a36e660d014925114feb09a2680bb3c2d1e279 Mon Sep 17 00:00:00 2001 From: Vitaly Prosyak Date: Thu, 6 Nov 2025 12:35:53 -0500 Subject: drm/amdgpu: disable peer-to-peer access for DCC-enabled GC12 VRAM surfaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Certain multi-GPU configurations (especially GFX12) may hit data corruption when a DCC-compressed VRAM surface is shared across GPUs using peer-to-peer (P2P) DMA transfers. Such surfaces rely on device-local metadata and cannot be safely accessed through a remote GPU’s page tables. Attempting to import a DCC-enabled surface through P2P leads to incorrect rendering or GPU faults. This change disables P2P for DCC-enabled VRAM buffers that are contiguous and allocated on GFX12+ hardware. In these cases, the importer falls back to the standard system-memory path, avoiding invalid access to compressed surfaces. Future work could consider optional migration (VRAM→System→VRAM) if a performance regression is observed when `attach->peer2peer = false`. Tested on: - Dual RX 9700 XT (Navi4x) setup - GNOME and Wayland compositor scenarios - Confirmed no corruption after disabling P2P under these conditions v2: Remove check TTM_PL_VRAM & TTM_PL_FLAG_CONTIGUOUS. v3: simplify for upsteam and fix ip version check (Alex) Suggested-by: Christian König Signed-off-by: Vitaly Prosyak Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit 9dff2bb709e6fbd97e263fd12bf12802d2b5a0cf) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 8561ad7f6180..ed3bef1edfe4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -82,6 +82,18 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + /* + * Disable peer-to-peer access for DCC-enabled VRAM surfaces on GFX12+. + * Such buffers cannot be safely accessed over P2P due to device-local + * compression metadata. Fallback to system-memory path instead. + * Device supports GFX12 (GC 12.x or newer) + * BO was created with the AMDGPU_GEM_CREATE_GFX12_DCC flag + * + */ + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0) && + bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) + attach->peer2peer = false; + if (!amdgpu_dmabuf_is_xgmi_accessible(attach_adev, bo) && pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0) attach->peer2peer = false; -- cgit v1.2.3 From 9f8fd538e244b87e4556833da51ddd986f50cc81 Mon Sep 17 00:00:00 2001 From: Pierre-Eric Pelloux-Prayer Date: Tue, 4 Nov 2025 10:42:45 +0100 Subject: drm/amdgpu: jump to the correct label on failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drm_sched_entity_init wasn't called yet, so the only thing to do is to release allocated memory. This doesn't fix any bug since entity is zero allocated and drm_sched_entity_fini does nothing in this case. Signed-off-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Tvrtko Ursulin Acked-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit ec49374ccb8da86b465beaf09c367f3dfd648d8f) --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index f5d5c45ddc0d..afedea02188d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -236,7 +236,7 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip, r = amdgpu_xcp_select_scheds(adev, hw_ip, hw_prio, fpriv, &num_scheds, &scheds); if (r) - goto cleanup_entity; + goto error_free_entity; } /* disable load balance if the hw engine retains context among dependent jobs */ -- cgit v1.2.3 From 6623c5f9fd877868fba133b4ae4dab0052e82dad Mon Sep 17 00:00:00 2001 From: "Jesse.Zhang" Date: Fri, 24 Oct 2025 16:09:25 +0800 Subject: drm/amdgpu: fix lock warning in amdgpu_userq_fence_driver_process MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a potential deadlock caused by inconsistent spinlock usage between interrupt and process contexts in the userq fence driver. The issue occurs when amdgpu_userq_fence_driver_process() is called from both: - Interrupt context: gfx_v11_0_eop_irq() -> amdgpu_userq_fence_driver_process() - Process context: amdgpu_eviction_fence_suspend_worker() -> amdgpu_userq_fence_driver_force_completion() -> amdgpu_userq_fence_driver_process() In interrupt context, the spinlock was acquired without disabling interrupts, leaving it in {IN-HARDIRQ-W} state. When the same lock is acquired in process context, the kernel detects inconsistent locking since the process context acquisition would enable interrupts while holding a lock previously acquired in interrupt context. Kernel log shows: [ 4039.310790] inconsistent {IN-HARDIRQ-W} -> {HARDIRQ-ON-W} usage. [ 4039.310804] kworker/7:2/409 [HC0[0]:SC0[0]:HE1:SE1] takes: [ 4039.310818] ffff9284e1bed000 (&fence_drv->fence_list_lock){?...}-{3:3}, [ 4039.310993] {IN-HARDIRQ-W} state was registered at: [ 4039.311004] lock_acquire+0xc6/0x300 [ 4039.311018] _raw_spin_lock+0x39/0x80 [ 4039.311031] amdgpu_userq_fence_driver_process.part.0+0x30/0x180 [amdgpu] [ 4039.311146] amdgpu_userq_fence_driver_process+0x17/0x30 [amdgpu] [ 4039.311257] gfx_v11_0_eop_irq+0x132/0x170 [amdgpu] Fix by using spin_lock_irqsave()/spin_unlock_irqrestore() to properly manage interrupt state regardless of calling context. Reviewed-by: Christian König Signed-off-by: Jesse Zhang Signed-off-by: Alex Deucher (cherry picked from commit ded3ad780cf97a04927773c4600823b84f7f3cc2) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 761bad98da3e..4d0096d0baa9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -151,15 +151,16 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d { struct amdgpu_userq_fence *userq_fence, *tmp; struct dma_fence *fence; + unsigned long flags; u64 rptr; int i; if (!fence_drv) return; + spin_lock_irqsave(&fence_drv->fence_list_lock, flags); rptr = amdgpu_userq_fence_read(fence_drv); - spin_lock(&fence_drv->fence_list_lock); list_for_each_entry_safe(userq_fence, tmp, &fence_drv->fences, link) { fence = &userq_fence->base; @@ -174,7 +175,7 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d list_del(&userq_fence->link); dma_fence_put(fence); } - spin_unlock(&fence_drv->fence_list_lock); + spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags); } void amdgpu_userq_fence_driver_destroy(struct kref *ref) -- cgit v1.2.3 From 7132f7e025f9382157543dd86a62d161335b48b9 Mon Sep 17 00:00:00 2001 From: Sultan Alsawaf Date: Fri, 7 Nov 2025 13:07:13 -0500 Subject: drm/amd/amdgpu: Ensure isp_kernel_buffer_alloc() creates a new BO When the BO pointer provided to amdgpu_bo_create_kernel() points to non-NULL, amdgpu_bo_create_kernel() takes it as a hint to pin that address rather than allocate a new BO. This functionality is never desired for allocating ISP buffers. A new BO should always be created when isp_kernel_buffer_alloc() is called, per the description for isp_kernel_buffer_alloc(). Ensure this by zeroing *bo right before the amdgpu_bo_create_kernel() call. Fixes: 55d42f616976 ("drm/amd/amdgpu: Add helper functions for isp buffers") Reviewed-by: Mario Limonciello (AMD) Reviewed-by: Pratap Nirujogi Signed-off-by: Sultan Alsawaf Signed-off-by: Alex Deucher (cherry picked from commit 73c8c29baac7f0c7e703d92eba009008cbb5228e) --- drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c index 9cddbf50442a..37270c4dab8d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c @@ -280,6 +280,8 @@ int isp_kernel_buffer_alloc(struct device *dev, u64 size, if (ret) return ret; + /* Ensure *bo is NULL so a new BO will be created */ + *bo = NULL; ret = amdgpu_bo_create_kernel(adev, size, ISP_MC_ADDR_ALIGN, -- cgit v1.2.3 From bbe3c115030da431c9ec843c18d5583e59482dd2 Mon Sep 17 00:00:00 2001 From: Sathishkumar S Date: Tue, 7 Oct 2025 13:17:51 +0530 Subject: drm/amdgpu/jpeg: Add parse_cs for JPEG5_0_1 enable parse_cs callback for JPEG5_0_1. Signed-off-by: Sathishkumar S Reviewed-by: Leo Liu Signed-off-by: Alex Deucher (cherry picked from commit 547985579932c1de13f57f8bcf62cd9361b9d3d3) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c index baf097d2e1ac..ab0bf880d3d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c @@ -878,6 +878,7 @@ static const struct amdgpu_ring_funcs jpeg_v5_0_1_dec_ring_vm_funcs = { .get_rptr = jpeg_v5_0_1_dec_ring_get_rptr, .get_wptr = jpeg_v5_0_1_dec_ring_get_wptr, .set_wptr = jpeg_v5_0_1_dec_ring_set_wptr, + .parse_cs = amdgpu_jpeg_dec_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + -- cgit v1.2.3